diff --git a/pyk/docs/pipeline.md b/pyk/docs/2026-04-27-pipeline.md similarity index 100% rename from pyk/docs/pipeline.md rename to pyk/docs/2026-04-27-pipeline.md diff --git a/pyk/docs/regression-triage.md b/pyk/docs/2026-04-27-regression-triage.md similarity index 100% rename from pyk/docs/regression-triage.md rename to pyk/docs/2026-04-27-regression-triage.md diff --git a/pyk/docs/2026-06-01-sortparam-kore-emission.md b/pyk/docs/2026-06-01-sortparam-kore-emission.md new file mode 100644 index 0000000000..2c473b76b5 --- /dev/null +++ b/pyk/docs/2026-06-01-sortparam-kore-emission.md @@ -0,0 +1,59 @@ +# Emitting the `#SortParam` sentinel to Kore + +## Status + +Not implemented. +`_ksort_to_kore` raises a clear error when it encounters the sentinel, rather than producing invalid Kore. +This document describes the full fix so it can be implemented and validated against real Kore output later. + +## Background + +Some ML-predicate productions have a result sort that cannot be inferred from their arguments. +The canonical example is `#Equals{S1, S2}(S1, S1)`: `S1` is determined by the arguments, but `S2` (the result sort) is context-dependent. +When `KDefinition.add_sort_params` fills in a partially-applied ML-predicate label, it can determine `S1` but not `S2`, so it fills `S2` with the sentinel sort `KSort('#SortParam')` (see `add_sort_params` in `src/pyk/kast/outer.py`). + +The Java frontend (`org.kframework.compile.AddSortInjections`) does the same thing, but with two differences that matter for Kore emission: + +- It generates a *fresh, uniquely named* sentinel per unresolved position: `#SortParam{Q0}`, `#SortParam{Q1}`, …. +- It collects those `Qn` names during the traversal and declares them as universally-quantified sort variables on the resulting axiom (surfaced in the `sortParams(...)` rule attribute, and as sort parameters on the Kore `axiom{...}`). + +## The problem in pyk + +`_ksort_to_kore` (in `src/pyk/konvert/_kast_to_kore.py`) is a context-free, term-level converter: it maps every `KSort` to `SortApp('Sort' + name)`. +For the sentinel this yields `SortApp('Sort#SortParam')`, which is doubly wrong: + +- `#` is not a legal Kore identifier character, so the `SortApp`/`Id` constructor raises `ValueError: Expected identifier`. +- Even if it were constructible, a bare sort application referencing a sort that exists in no definition is not what the sentinel means — it must be a *bound sort variable*. + +`krule_to_kore` builds an axiom by calling `kast_to_kore` several independent times (lhs body, rhs body, each constraint), and sets the axiom's sort variables (`Axiom.vars`) to `()` for ordinary rules or `(SortVar('R'),)` for functional rules. +There is no mechanism to (a) emit the sentinel as a sort variable, (b) give it a unique name, or (c) collect it back up and bind it on the axiom. + +## Reachability + +In the normal pyk flow this path is rarely hit. +ML predicates constructed through the prelude (`mlEquals`, `mlEqualsTrue`, `bool_to_ml_pred`, …) always pass explicit sort params (e.g. `KLabel('#Equals', arg_sort, sort)`), so they hit the early return in `add_sort_params` and never produce the sentinel. +The sentinel only arises for ML predicates that reach `add_sort_params` with *missing* sort params (e.g. frontend-loaded or hand-constructed terms) and whose result sort cannot be inferred. + +## Proposed fix + +The goal is to mirror the Java behaviour: each unresolved result sort becomes a uniquely-named, universally-quantified Kore sort variable on the enclosing axiom. + +1. **Unique sentinels.** + Replace the single bare `KSort('#SortParam')` with parametric sentinels `KSort('#SortParam', (KSort('Q0'),))`, `KSort('#SortParam', (KSort('Q1'),))`, …. + This removes the current "at most one unbound param" restriction (the `NotImplementedError` guard in `add_sort_params`) and lets distinct unresolved positions stay distinct. + Generating fresh names requires a counter; since `add_sort_params` runs per `kast_to_kore` call, the counter must be threaded from `krule_to_kore` (or `add_sort_params` must accept a starting index and return the names it used) so that names are unique *across* the several `kast_to_kore` calls that make up one axiom. + +2. **Emit sentinels as sort variables.** + Teach `_ksort_to_kore` (or the `_kapply_to_pattern` sort-emission path) to map `#SortParam{Qn}` to `SortVar('Qn')` instead of a `SortApp`. + +3. **Bind them on the axiom.** + `krule_to_kore` collects every `SortVar` emitted from a sentinel across all of its `kast_to_kore` calls and adds them to `Axiom.vars` (alongside `R` for functional rules), and to the `sortParams(...)` attribute, matching the Java output. + +## Validation + +This change alters Kore axiom output and must be validated end-to-end, not just at the unit level: + +- The konvert integration tests (`src/tests/integration/konvert/`), which require the K toolchain. +- The pyk regression suite (`pyk/regression-new/`), comparing emitted Kore against the Java backend for a definition that actually exercises an uninferable ML-predicate result sort. + +A minimal repro to build first: a rule (loaded from a compiled definition, not constructed via the prelude) containing an ML predicate whose result sort is not determined by its arguments, asserting the emitted axiom binds a sort variable rather than referencing `Sort#SortParam`. diff --git a/pyk/src/pyk/kast/inner.py b/pyk/src/pyk/kast/inner.py index 806fdaeb3d..87b27ae2ef 100644 --- a/pyk/src/pyk/kast/inner.py +++ b/pyk/src/pyk/kast/inner.py @@ -51,6 +51,10 @@ def let(self, *, name: str | None = None, params: Iterable[KSort] | None = None) params = params if params is not None else self.params return KSort(name=name, params=params) + def contains(self, sort: KSort) -> bool: + """Return whether ``sort`` appears anywhere in this sort's parameter tree (including itself).""" + return self == sort or any(param.contains(sort) for param in self.params) + @final @dataclass(frozen=True) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index a0c8539a16..2d373f7470 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -17,6 +17,7 @@ from .att import EMPTY_ATT, Atts, Format, KAst, KAtt, WithKAtt from .inner import ( KApply, + KAs, KInner, KLabel, KRewrite, @@ -30,7 +31,7 @@ top_down, ) from .kast import kast_term -from .prelude.k import K_ITEM, K +from .prelude.k import K_ITEM, SORT_PARAM_SENTINEL, K from .rewrite import indexed_rewrite if TYPE_CHECKING: @@ -1000,6 +1001,40 @@ def let(self, *, require: str | None = None) -> KRequire: return KRequire(require=require) +def _match_sort_params( + parametric: KSort, + actual: KSort, + params: frozenset[KSort], + subsorts_fn: Callable[[KSort], frozenset[KSort]] | None = None, +) -> dict[KSort, list[KSort]]: + """Match ``parametric`` sort against ``actual``, collecting candidate bindings per sort param. + + Three matching strategies, mirroring Java ``AddSortInjections.match()``: + + 1. Direct: ``parametric`` is itself a sort param — bind it to ``actual``. + 2. Structural: same constructor head — recurse on sub-params. + 3. Subsort-aware: iterate subsorts ``s ≤ actual`` with same head as ``parametric``, + collecting additional candidates for LUB resolution. + """ + if parametric in params: + return {parametric: [actual]} + if parametric.name == actual.name and len(parametric.params) == len(actual.params): + result: dict[KSort, list[KSort]] = {} + for p_sub, a_sub in zip(parametric.params, actual.params, strict=True): + for k, vs in _match_sort_params(p_sub, a_sub, params, subsorts_fn).items(): + result.setdefault(k, []).extend(vs) + return result + if parametric.params and subsorts_fn is not None: + result = {} + for s in subsorts_fn(actual): + if s.name == parametric.name and len(s.params) == len(parametric.params): + for p_sub, a_sub in zip(parametric.params, s.params, strict=True): + for k, vs in _match_sort_params(p_sub, a_sub, params).items(): + result.setdefault(k, []).extend(vs) + return result + return {} + + @final @dataclass(frozen=True) class KDefinition(KOuter, WithKAtt, Iterable[KFlatModule]): @@ -1346,6 +1381,8 @@ def sort(self, kast: KInner) -> KSort | None: match kast: case KToken(_, sort) | KVariable(_, sort): return sort + case KAs(alias=KVariable(sort=sort)): + return sort case KRewrite(lhs, rhs): lhs_sort = self.sort(lhs) rhs_sort = self.sort(rhs) @@ -1355,8 +1392,11 @@ def sort(self, kast: KInner) -> KSort | None: case KSequence(_): return KSort('K') case KApply(label, _): - sort, _ = self.resolve_sorts(label) - return sort + try: + sort, _ = self.resolve_sorts(label) + return sort + except (KeyError, ValueError): + return None case _: return None @@ -1373,7 +1413,13 @@ def resolve_sorts(self, label: KLabel) -> tuple[KSort, tuple[KSort, ...]]: sorts = dict(zip(prod.params, label.params, strict=True)) def resolve(sort: KSort) -> KSort: - return sorts.get(sort, sort) + # Direct match: sort IS one of the sort parameters. + if sort in sorts: + return sorts[sort] + # Recursive substitution: sort params may appear nested (e.g. MInt{Width} → MInt{8}). + if sort.params: + return KSort(sort.name, tuple(resolve(p) for p in sort.params)) + return sort return resolve(prod.sort), tuple(resolve(sort) for sort in prod.argument_sorts) @@ -1389,6 +1435,34 @@ def least_common_supersort(self, sort1: KSort, sort2: KSort) -> KSort | None: # vice versa. In that case there may be more than one LCS. return None + def least_upper_bound(self, sorts: Iterable[KSort]) -> KSort | None: + """Compute the least upper bound of a set of sorts in the subsort lattice. + + Returns the unique minimal common supersort, or `None` when none exists or the bound is + ambiguous (several incomparable minimal upper bounds). Unlike `least_common_supersort`, + this examines the whole lattice rather than only the pairwise subsort relation, so it is + order-independent and resolves siblings that share a common supersort (e.g. the LUB of + `Int` and `Float` is `Number` when both are subsorts of `Number`). Mirrors the set-based + `AddSortInjections.lub` in the Java frontend. + """ + sort_set = set(sorts) + if not sort_set: + return None + if len(sort_set) == 1: + return single(sort_set) + + def is_supersort(upper: KSort, lower: KSort) -> bool: + return upper == lower or lower in self.subsorts(upper) + + # Every proper supersort of some sort appears as a key in the subsort table; the sorts + # themselves are included to cover a candidate that is already an upper bound of the rest. + universe = set(self.subsort_table.keys()) | sort_set + upper_bounds = {u for u in universe if all(is_supersort(u, s) for s in sort_set)} + minimal = {u for u in upper_bounds if not any(o != u and is_supersort(u, o) for o in upper_bounds)} + if len(minimal) == 1: + return single(minimal) + return None + def greatest_common_subsort(self, sort1: KSort, sort2: KSort) -> KSort | None: """Compute the greatest-lower-bound of two sorts in the sort lattice using very simple approach, returning `None` on failure (not necessarily meaning there isn't a glb).""" if sort1 == sort2: @@ -1499,34 +1573,140 @@ def transform( return Subst(subst)(new_term) - # Best-effort addition of sort parameters to klabels, context insensitive - def add_sort_params(self, kast: KInner) -> KInner: - """Return a given term with the sort parameters on the `KLabel` filled in (which may be missing because of how the frontend works), best effort.""" - - def _add_sort_params(_k: KInner) -> KInner: - if type(_k) is KApply: - prod = self.symbols[_k.label.name] - if len(_k.label.params) == 0 and len(prod.params) > 0: - sort_dict: dict[KSort, KSort] = {} - for psort, asort in zip(prod.argument_sorts, map(self.sort, _k.args), strict=True): - if asort is None: - _LOGGER.warning( - f'Failed to add sort parameter, unable to determine sort for argument in production: {(prod, psort, asort)}' - ) - return _k - if psort in prod.params: - if psort in sort_dict and sort_dict[psort] != asort: - _LOGGER.warning( - f'Failed to add sort parameter, sort mismatch between different occurances of sort parameter: {(prod, psort, sort_dict[psort], asort)}' - ) - return _k - elif psort not in sort_dict: - sort_dict[psort] = asort - if all(p in sort_dict for p in prod.params): - return _k.let(label=KLabel(_k.label.name, [sort_dict[p] for p in prod.params])) - return _k + # Best-effort addition of sort parameters to klabels + def add_sort_params(self, kast: KInner, sort: KSort | None = None) -> KInner: + """Return ``kast`` with missing sort parameters filled in on every parametric ``KLabel``, best effort. + + The frontend often leaves the sort parameters off a parametric ``KLabel``. This fills + them with a single recursive sweep that threads an *expected* sort downwards — so a + parametric **return** sort is taken from its context — and resolves the remaining + parameters from the argument sorts coming back up, mirroring ``AddSortInjections`` in the + Java frontend. + + ``sort`` is the expected sort of the whole term (e.g. ``GeneratedTopCell`` for a rule + body). When omitted, a fresh sort variable is used at the top, so the algorithm still + runs but a parametric return sort that cannot be pinned from context becomes a fresh + ``#SortParam{Qn}`` sort variable. + + Each parameter is either *argument-bound* (occurs in some argument sort, resolved + bottom-up) or *return-only* (resolved top-down from the expected sort). ``#SortParam`` + sentinels act as low-priority placeholders, so a real sort always wins when one is + available. The implementation is one interleaved sweep (``_go``); a future split into + explicit passes can reuse the same per-node logic. + + It is asserted that every parametric return sort lives on the "spine" of the term: a + return-only parameter is only allowed to stay free when its expected sort is itself free + — a concrete expected sort it cannot satisfy means the term is ill-formed. + """ + counter = [0] + + def _fresh() -> KSort: + param = KSort(SORT_PARAM_SENTINEL.name, (KSort(f'Q{counter[0]}'),)) + counter[0] += 1 + return param + + def _is_sentinel(s: KSort | None) -> bool: + return s is not None and s.name == SORT_PARAM_SENTINEL.name + + def _subst_sort(subst: dict[KSort, KSort], s: KSort) -> KSort: + if s in subst: + return subst[s] + if s.params: + return KSort(s.name, tuple(_subst_sort(subst, p) for p in s.params)) + return s + + def _lub(candidates: list[KSort]) -> KSort | None: + # Sentinels are placeholders; a real sort always wins (as Java's lub filters out + # #SortParam). Only when every candidate is a sentinel do we keep one. + real = [c for c in candidates if not _is_sentinel(c)] + if real: + return self.least_upper_bound(real) + return candidates[0] if candidates else None + + def _go(term: KInner, expected: KSort) -> KInner: + if isinstance(term, KRewrite): + return KRewrite(_go(term.lhs, expected), _go(term.rhs, expected)) + if isinstance(term, KAs): + return KAs(_go(term.pattern, expected), _go(term.alias, expected)) + if isinstance(term, KSequence): + return KSequence([_go(item, SORT_PARAM_SENTINEL) for item in term.items]) + if not isinstance(term, KApply): + return term + + prod = self.symbols.get(term.label.name) + if prod is None: + return term + + # Already-applied or non-parametric: thread the expected sort into the children using + # the known bindings, then recurse. + if len(prod.params) == 0 or len(term.label.params) == len(prod.params): + subst = dict(zip(prod.params, term.label.params, strict=True)) if prod.params else {} + child_expected = [_subst_sort(subst, asort) for asort in prod.argument_sorts] + return term.let(args=[_go(arg, e) for arg, e in zip(term.args, child_expected, strict=True)]) + + params = list(prod.params) + param_set = frozenset(params) + + # Mirror Java's `fresh`: the parameter that IS the whole result sort takes the + # expected sort (so it propagates into the children, sharing one sort variable down + # the spine); the rest take a bare sentinel placeholder. Used to type the children. + fresh_subst = {p: (expected if p == prod.sort else SORT_PARAM_SENTINEL) for p in params} + child_expected = [_subst_sort(fresh_subst, asort) for asort in prod.argument_sorts] + new_args = [_go(arg, e) for arg, e in zip(term.args, child_expected, strict=True)] + child_sorts = [self.sort(arg) for arg in new_args] + + # Bottom-up: argument-bound parameters, from the actual child sorts. + candidates: dict[KSort, list[KSort]] = {} + for psort, asort in zip(prod.argument_sorts, child_sorts, strict=True): + if asort is None or _is_sentinel(asort): + continue + for k, vs in _match_sort_params(psort, asort, param_set, self.subsorts).items(): + candidates.setdefault(k, []).extend(vs) + + # Top-down: return-only parameters, from the expected sort (only when it is concrete). + return_only = frozenset( + p + for p in params + if prod.sort.contains(p) and not any(asort.contains(p) for asort in prod.argument_sorts) + ) + if return_only and not _is_sentinel(expected): + for k, vs in _match_sort_params(prod.sort, expected, return_only).items(): + candidates.setdefault(k, []).extend(vs) + + bindings: dict[KSort, KSort] = {} + unsortable: list[KSort] = [] + for p in params: + value = _lub(candidates.get(p, []) + [fresh_subst[p]]) + if value is None: + _LOGGER.warning(f'Failed to add sort parameter, conflicting sorts for sort parameter: {(prod, p)}') + return term.let(args=new_args) + if not _is_sentinel(value) or value.params: + # A concrete sort, or an already-named sort variable propagated from the + # expected sort (e.g. one shared down the spine) — keep it as is. + bindings[p] = value + elif any(asort.contains(p) for asort in prod.argument_sorts): + # Bare sentinel for an argument-bound parameter: the argument was unsortable. + unsortable.append(p) + else: + # Bare sentinel for a return-only or phantom parameter: a fresh, free sort + # variable. A return sort is only allowed to stay free on the spine — i.e. + # when its own expected sort is itself free. + assert not prod.sort.contains(p) or _is_sentinel(expected), ( + f'parametric return sort {p.name} of {term.label.name!r} occurs off the spine, ' + f'in a position expecting concrete sort {expected.name}' + ) + bindings[p] = _fresh() + + if unsortable: + _LOGGER.warning( + f'Failed to add sort parameter, unable to determine sort for argument: {(prod, unsortable)}' + ) + return term.let(args=new_args) + + label = KLabel(term.label.name, [bindings[p] for p in params]) + return term.let(label=label, args=new_args) - return bottom_up(_add_sort_params, kast) + return _go(kast, sort if sort is not None else _fresh()) def add_cell_map_items(self, kast: KInner) -> KInner: """Wrap cell-map items in the syntactical wrapper that the frontend generates for them. diff --git a/pyk/src/pyk/kast/prelude/k.py b/pyk/src/pyk/kast/prelude/k.py index 42324339e8..dbd4e1f8d3 100644 --- a/pyk/src/pyk/kast/prelude/k.py +++ b/pyk/src/pyk/kast/prelude/k.py @@ -14,6 +14,12 @@ K_ITEM: Final = KSort('KItem') GENERATED_TOP_CELL: Final = KSort('GeneratedTopCell') +# Sentinel sort marking an ML-predicate result sort that could not be inferred from arguments +# (introduced by KDefinition.add_sort_params). The family also covers uniquely-named variants +# such as `#SortParam{Q0}` once those are generated. This sort cannot yet be emitted to Kore; +# see pyk/docs/2026-06-01-sortparam-kore-emission.md. +SORT_PARAM_SENTINEL: Final = KSort('#SortParam') + DOTS: Final = KToken('...', K) diff --git a/pyk/src/pyk/konvert/_kast_to_kore.py b/pyk/src/pyk/konvert/_kast_to_kore.py index 1f667e2752..f95de07a66 100644 --- a/pyk/src/pyk/konvert/_kast_to_kore.py +++ b/pyk/src/pyk/konvert/_kast_to_kore.py @@ -9,7 +9,7 @@ from ..kast.manip import bool_to_ml_pred, extract_lhs, extract_rhs, flatten_label, ml_pred_to_bool, var_occurrences from ..kast.outer import KRule from ..kast.prelude.bytes import BYTES, pretty_bytes_str -from ..kast.prelude.k import K_ITEM, K, inj +from ..kast.prelude.k import K_ITEM, SORT_PARAM_SENTINEL, K, inj from ..kast.prelude.kbool import BOOL, TRUE, andBool from ..kast.prelude.ml import mlAnd from ..kast.prelude.string import STRING, pretty_string @@ -356,6 +356,18 @@ def _kimport_to_kore(kimport: KImport) -> Import: def _ksort_to_kore(ksort: KSort) -> SortApp: + # Emitting the #SortParam sentinel correctly requires a universally-quantified sort variable + # bound at the axiom level (a `sortParams({Q0})` attribute, as Java's AddSortInjections does); + # this term-level converter has no way to introduce or bind one. Fail clearly rather than + # building the invalid Kore identifier `Sort#SortParam` ('#' is not a legal identifier char). + # `startswith` covers the whole sentinel family (e.g. a future `#SortParam{Q0}`). The full + # design is in pyk/docs/2026-06-01-sortparam-kore-emission.md. + if ksort.name.startswith(SORT_PARAM_SENTINEL.name): + raise ValueError( + f'Cannot emit the {SORT_PARAM_SENTINEL.name} sentinel sort to Kore: it marks an ML-predicate ' + 'result sort that add_sort_params could not infer and that must become an axiom-level ' + 'sort variable. This is not yet implemented; see pyk/docs/2026-06-01-sortparam-kore-emission.md.' + ) return SortApp('Sort' + ksort.name) diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index 1b6f987be0..fd8c3c01ca 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -1,14 +1,24 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING import pytest from pyk.kast.att import Atts, KAtt -from pyk.kast.inner import KApply, KSort, KVariable -from pyk.kast.outer import KDefinition, KFlatModule, KNonTerminal, KProduction, KTerminal +from pyk.kast.inner import KApply, KAs, KLabel, KSequence, KSort, KToken, KVariable +from pyk.kast.outer import ( + KDefinition, + KFlatModule, + KNonTerminal, + KProduction, + KTerminal, + _match_sort_params, +) +from pyk.kast.prelude.k import GENERATED_TOP_CELL if TYPE_CHECKING: + from collections.abc import Callable from typing import Final from pyk.kast.inner import KInner @@ -17,6 +27,18 @@ # --------------------------------------------------------------------------- # Minimal test definition # +# bar: syntax N ::= bar(N) -- result sort is the param directly +# foo: syntax MInt{N} ::= foo(MInt{N}) -- result/arg sorts nest the param +# #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred, result sort context-dependent +# #Ceil: syntax S2 ::= #Ceil{S1,S2}(S1) -- ML pred, S2 is the free result sort +# #And: syntax {S} S ::= #And{S}(S, S) -- homogeneous ML connective (S arg-bound) +# pair: syntax Pair{S1,S2} ::= pair(S1) -- user label; S2 is return-only +# +# Subsorts: +# syntax Int ::= MInt{Int} -- MInt{Int} <: Int (enables subsort-aware matching) +# syntax Number ::= Int -- Int <: Number +# syntax Number ::= Float -- Float <: Number (Int, Float incomparable; LUB is Number) +# # Cell map fragment: # AccountCellMap ::= AccountCellMap AccountCellMap [cellCollection, element(AccountCellMapItem), wrapElement()] # AccountCellMap ::= AccountCellMapItem(Int, AccountCell) @@ -25,9 +47,91 @@ # --------------------------------------------------------------------------- INT: Final = KSort('Int') +BOOL: Final = KSort('Bool') +FLOAT: Final = KSort('Float') +NUMBER: Final = KSort('Number') +N: Final = KSort('N') +S: Final = KSort('S') +S1: Final = KSort('S1') +S2: Final = KSort('S2') +S3: Final = KSort('S3') +MINT_N: Final = KSort('MInt', (N,)) +MINT_INT: Final = KSort('MInt', (INT,)) +MINT_BOOL: Final = KSort('MInt', (BOOL,)) ACCOUNT_CELL_MAP: Final = KSort('AccountCellMap') ACCOUNT_CELL: Final = KSort('AccountCell') +_BAR_PROD: Final = KProduction( + sort=N, + items=[KTerminal('bar'), KTerminal('('), KNonTerminal(N), KTerminal(')')], + params=[N], + klabel='bar', +) + +_FOO_PROD: Final = KProduction( + sort=MINT_N, + items=[KTerminal('foo'), KTerminal('('), KNonTerminal(MINT_N), KTerminal(')')], + params=[N], + klabel='foo', +) + +_EQUALS_PROD: Final = KProduction( + sort=S2, + items=[KNonTerminal(S1), KNonTerminal(S1)], + params=[S1, S2], + klabel='#Equals', +) + +# Hypothetical 3-param #Equals. S1 is inferred from arguments; S2 (result) and S3 (occurring +# nowhere) are independent free sorts, so add_sort_params gives each a distinct fresh sort var. +_EQUALS3_PROD: Final = KProduction( + sort=S2, + items=[KNonTerminal(S1), KNonTerminal(S1)], + params=[S1, S2, S3], + klabel='#Equals', +) + +# User-defined label whose S2 occurs only in the result sort (not in any argument). With no +# expected sort it becomes a fresh sort variable; with a matching expected sort it resolves. +_PAIR_PROD: Final = KProduction( + sort=KSort('Pair', (S1, S2)), + items=[KTerminal('pair'), KTerminal('('), KNonTerminal(S1), KTerminal(')')], + params=[S1, S2], + klabel='pair', +) + +# syntax Int ::= MInt{Int} — subsort declaration: MInt{Int} <: Int +# Enables the subsort-aware matching path (Java AddSortInjections.match step 3). +_MINT_INT_SUBSORT: Final = KProduction(sort=INT, items=[KNonTerminal(MINT_INT)]) + +# Subsort lattice with two incomparable subsorts sharing a common supersort: +# syntax Number ::= Int and syntax Number ::= Float +# LUB(Int, Float) = Number, exercising set-based (not pairwise-chain) lub computation. +_NUMBER_INT_SUBSORT: Final = KProduction(sort=NUMBER, items=[KNonTerminal(INT)]) +_NUMBER_FLOAT_SUBSORT: Final = KProduction(sort=NUMBER, items=[KNonTerminal(FLOAT)]) + +# #Ceil: syntax S2 ::= #Ceil{S1,S2}(S1) -- ML pred, like #Equals (S2 is the free result sort) +_CEIL_PROD: Final = KProduction( + sort=S2, + items=[KNonTerminal(S1)], + params=[S1, S2], + klabel='#Ceil', +) + +# #And: syntax S ::= #And{S}(S, S) -- homogeneous ML connective (S is arg-bound and the result) +_AND_PROD: Final = KProduction( + sort=S, + items=[KNonTerminal(S), KNonTerminal(S)], + params=[S], + klabel='#And', +) + + +def _sp(n: int) -> KSort: + """The n-th fresh #SortParam sort variable (`#SortParam{Qn}`) the algorithm allocates.""" + return KSort('#SortParam', (KSort(f'Q{n}'),)) + + _ACCT_MAP_CONCAT: Final = KProduction( sort=ACCOUNT_CELL_MAP, items=[KNonTerminal(ACCOUNT_CELL_MAP), KNonTerminal(ACCOUNT_CELL_MAP)], @@ -69,9 +173,337 @@ DEFN: Final = KDefinition( 'TEST', - [KFlatModule('TEST', [_ACCT_MAP_CONCAT, _ACCT_MAP_ITEM, _ACCOUNT_CELL, _GET_ENTRY])], + [ + KFlatModule( + 'TEST', + [ + _BAR_PROD, + _FOO_PROD, + _EQUALS_PROD, + _CEIL_PROD, + _AND_PROD, + _MINT_INT_SUBSORT, + _NUMBER_INT_SUBSORT, + _NUMBER_FLOAT_SUBSORT, + _ACCT_MAP_CONCAT, + _ACCT_MAP_ITEM, + _ACCOUNT_CELL, + _GET_ENTRY, + ], + ) + ], +) + +# Definition used only to verify the multi-unbound-param guard in add_sort_params. +DEFN3: Final = KDefinition('TEST3', [KFlatModule('TEST3', [_EQUALS3_PROD])]) + +# Definition used only to verify the unresolvable-user-label warning path. +DEFN_PAIR: Final = KDefinition('TEST_PAIR', [KFlatModule('TEST_PAIR', [_PAIR_PROD])]) + + +# --------------------------------------------------------------------------- +# KDefinition.sort +# --------------------------------------------------------------------------- + +SORT_DATA: Final = ( + # Basic leaf terms + ('ktoken', KToken('42', INT), INT), + ('kvariable_with_sort', KVariable('X', sort=INT), INT), + ('ksequence', KSequence([]), KSort('K')), + # KApply: result sort substituted directly from param + ('kapply_direct_result', KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), INT), + # KApply: result sort nests the param (MInt{N} with N→Int → MInt{Int}) + ('kapply_nested_result', KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]), MINT_INT), + # KApply with unfilled sort params: sort() returns None rather than raising + ('kapply_unfilled_params', KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]), None), + # KApply with unknown label: KeyError from symbols lookup → None + ('kapply_unknown_label', KApply(KLabel('nonexistent'), []), None), + # KAs: sort of the alias variable + ('kas_sorted_alias', KAs(KVariable('X', sort=MINT_INT), KVariable('Y', sort=MINT_INT)), MINT_INT), + # KAs whose alias has no sort annotation: returns None + ('kas_unsorted_alias', KAs(KVariable('X', sort=MINT_INT), KVariable('Y')), None), ) + +@pytest.mark.parametrize( + 'test_id,term,expected', + SORT_DATA, + ids=[test_id for test_id, *_ in SORT_DATA], +) +def test_sort(test_id: str, term: KInner, expected: KSort | None) -> None: + assert DEFN.sort(term) == expected + + +# --------------------------------------------------------------------------- +# KDefinition.resolve_sorts +# --------------------------------------------------------------------------- + +RESOLVE_SORTS_DATA: Final = ( + # Direct substitution: result sort IS the param (N → Int) + ('direct_bar', KLabel('bar', [INT]), INT, (INT,)), + # Recursive substitution: result/arg sort nests the param (MInt{N} with N → Int → MInt{Int}) + ('nested_foo', KLabel('foo', [INT]), MINT_INT, (MINT_INT,)), +) + + +@pytest.mark.parametrize( + 'test_id,label,expected_result,expected_args', + RESOLVE_SORTS_DATA, + ids=[test_id for test_id, *_ in RESOLVE_SORTS_DATA], +) +def test_resolve_sorts(test_id: str, label: KLabel, expected_result: KSort, expected_args: tuple[KSort, ...]) -> None: + result, args = DEFN.resolve_sorts(label) + assert result == expected_result + assert args == expected_args + + +# --------------------------------------------------------------------------- +# KDefinition.add_sort_params +# --------------------------------------------------------------------------- + +ADD_SORT_PARAMS_DATA: Final = ( + # Label already has params filled: leave unchanged + ( + 'already_filled', + KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), + KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), + ), + # Direct sort param: psort IS the param (N ~ Int → N=Int) + ( + 'direct_param', + KApply(KLabel('bar'), [KVariable('X', sort=INT)]), + KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), + ), + # Nested sort param: psort = MInt{N}, asort = MInt{Int} → N=Int via unification + ( + 'nested_param', + KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]), + KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]), + ), + # ML pred, no expected sort: S1 inferred from args, S2 (result sort) becomes a fresh sort var. + ( + 'ml_pred_sentinel', + KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]), + KApply(KLabel('#Equals', [INT, _sp(0)]), [KVariable('X', sort=INT), KVariable('Y', sort=INT)]), + ), + # Unsortable argument (no sort annotation): cannot fill params, term returned unchanged + ( + 'unsortable_arg_unchanged', + KApply(KLabel('foo'), [KVariable('X')]), + KApply(KLabel('foo'), [KVariable('X')]), + ), + # Subsort-aware: arg sort is Int, but MInt{Int} <: Int in DEFN, so N=Int via subsort match + # (this case would fail with structural-only unification since Int ≠ MInt{N}). + # Note: X keeps sort Int even though the resolved production argument sort is MInt{Int}. + # add_sort_params only fills KLabel sort params; it does not check or modify variable sorts. + ( + 'subsort_aware', + KApply(KLabel('foo'), [KVariable('X', sort=INT)]), + KApply(KLabel('foo', [INT]), [KVariable('X', sort=INT)]), + ), +) + + +@pytest.mark.parametrize( + 'test_id,term,expected', + ADD_SORT_PARAMS_DATA, + ids=[test_id for test_id, *_ in ADD_SORT_PARAMS_DATA], +) +def test_add_sort_params(test_id: str, term: KInner, expected: KInner) -> None: + assert DEFN.add_sort_params(term) == expected + + +def test_add_sort_params_multi_unbound_distinct_sentinels() -> None: + # #Equals with 3 sort params and no expected sort: S1 is inferred from the arguments; S2 (the + # result sort) and S3 (a parameter occurring nowhere) are independent free sorts and so get + # distinct fresh sort variables — the single-sentinel scheme could not distinguish them. + term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) + result = DEFN3.add_sort_params(term) + assert isinstance(result, KApply) + assert result.label.params == (INT, _sp(0), _sp(1)) + + +def test_add_sort_params_user_label_free_return_sort() -> None: + # pair{S1,S2}(S1): S1 is bound from the argument; S2 occurs only in the result sort and has + # no determining context, so it becomes a fresh sort variable (best-effort). + term = KApply(KLabel('pair'), [KVariable('X', sort=INT)]) + result = DEFN_PAIR.add_sort_params(term) + assert isinstance(result, KApply) + assert result.label.name == 'pair' + assert result.label.params[0] == INT + assert result.label.params[1].name == '#SortParam' + + +def test_add_sort_params_conflicting_ml_pred_args_warns(caplog: pytest.LogCaptureFixture) -> None: + # #Equals(X:Int, Y:Bool) has a genuine sort mismatch in its shared S1 argument positions (Int + # and Bool have no common supersort). A sort *conflict* must be reported as a warning with + # the label left unfilled (best-effort), not papered over with a sort variable. + term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=BOOL)]) + with caplog.at_level(logging.WARNING): + result = DEFN.add_sort_params(term) + assert result == term + + +# --------------------------------------------------------------------------- +# KDefinition.add_sort_params with an expected sort (top-down resolution) +# --------------------------------------------------------------------------- + +_EQ_ARGS: Final = [KVariable('X', sort=INT), KVariable('Y', sort=INT)] +_CEIL_ARGS: Final = [KVariable('Z', sort=INT)] + +ADD_SORT_PARAMS_EXPECTED_DATA: Final = ( + # ML pred with a concrete expected sort: the free result sort resolves to it (no sort var). + ( + 'ml_pred_expected', + KApply('#Equals', _EQ_ARGS), + GENERATED_TOP_CELL, + KApply(KLabel('#Equals', [INT, GENERATED_TOP_CELL]), _EQ_ARGS), + ), + # #Ceil similarly resolves its result sort from the expected sort. + ( + 'ceil_expected', + KApply('#Ceil', _CEIL_ARGS), + GENERATED_TOP_CELL, + KApply(KLabel('#Ceil', [INT, GENERATED_TOP_CELL]), _CEIL_ARGS), + ), + # An argument-bound parameter is taken from the argument, never from the expected sort: + # N occurs in foo's argument sort MInt{N}, so the conflicting expected sort MInt{Bool} is + # ignored and N resolves to Int (the expected-sort match only applies to return-only params). + ( + 'expected_ignored_for_argbound', + KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]), + MINT_BOOL, + KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]), + ), + # Spine: the expected sort threads down through the #And connective so every conjunct's + # result sort resolves to the same concrete sort. + ( + 'spine_concrete', + KApply('#And', [KApply('#Equals', _EQ_ARGS), KApply('#Ceil', _CEIL_ARGS)]), + GENERATED_TOP_CELL, + KApply( + KLabel('#And', [GENERATED_TOP_CELL]), + [ + KApply(KLabel('#Equals', [INT, GENERATED_TOP_CELL]), _EQ_ARGS), + KApply(KLabel('#Ceil', [INT, GENERATED_TOP_CELL]), _CEIL_ARGS), + ], + ), + ), +) + + +@pytest.mark.parametrize( + 'test_id,term,sort,expected', + ADD_SORT_PARAMS_EXPECTED_DATA, + ids=[test_id for test_id, *_ in ADD_SORT_PARAMS_EXPECTED_DATA], +) +def test_add_sort_params_expected(test_id: str, term: KInner, sort: KSort, expected: KInner) -> None: + assert DEFN.add_sort_params(term, sort) == expected + + +def test_add_sort_params_spine_shares_one_sort_variable() -> None: + # No expected sort: a fresh sort variable is synthesized at the top and threads down the + # #And spine, so every conjunct's free result sort is the *same* variable (Q0). + term = KApply('#And', [KApply('#Equals', _EQ_ARGS), KApply('#Ceil', _CEIL_ARGS)]) + expected = KApply( + KLabel('#And', [_sp(0)]), + [ + KApply(KLabel('#Equals', [INT, _sp(0)]), _EQ_ARGS), + KApply(KLabel('#Ceil', [INT, _sp(0)]), _CEIL_ARGS), + ], + ) + assert DEFN.add_sort_params(term) == expected + + +def test_add_sort_params_expected_pair_resolves_return_sort() -> None: + # A user parametric symbol's return-only param resolves from a concrete expected sort. + term = KApply(KLabel('pair'), [KVariable('X', sort=INT)]) + expected = KApply(KLabel('pair', [INT, BOOL]), [KVariable('X', sort=INT)]) + assert DEFN_PAIR.add_sort_params(term, KSort('Pair', (INT, BOOL))) == expected + + +def test_add_sort_params_spine_violation_asserts() -> None: + # A parametric return sort in a position expecting a concrete sort it cannot satisfy (pair, + # whose result Pair{S1,S2} cannot match GeneratedTopCell) is off the spine — an ill-formed + # term that the spine assertion must reject. + term = KApply(KLabel('pair'), [KVariable('X', sort=INT)]) + with pytest.raises(AssertionError, match='off the spine'): + DEFN_PAIR.add_sort_params(term, GENERATED_TOP_CELL) + + +# --------------------------------------------------------------------------- +# KDefinition.least_upper_bound +# --------------------------------------------------------------------------- + +LEAST_UPPER_BOUND_DATA: Final[tuple[tuple[str, tuple[KSort, ...], KSort | None], ...]] = ( + ('singleton', (INT,), INT), + # Comparable: Int <: Number, so the bound is the larger sort. + ('comparable', (INT, NUMBER), NUMBER), + # Siblings with a common supersort: Int and Float are incomparable but both <: Number. + # A pairwise-chain lub would fail here; the set-based one resolves to Number. + ('siblings_common_supersort', (INT, FLOAT), NUMBER), + # Order independence: the same candidate set resolves to Number regardless of order — a + # pairwise fold would succeed for one order and fail for the other. + ('order_independent_a', (INT, NUMBER, FLOAT), NUMBER), + ('order_independent_b', (INT, FLOAT, NUMBER), NUMBER), + # No common supersort: Bool shares no supersort with Int. + ('no_common_supersort', (INT, BOOL), None), +) + + +@pytest.mark.parametrize( + 'test_id,sorts,expected', + LEAST_UPPER_BOUND_DATA, + ids=[test_id for test_id, *_ in LEAST_UPPER_BOUND_DATA], +) +def test_least_upper_bound(test_id: str, sorts: tuple[KSort, ...], expected: KSort | None) -> None: + assert DEFN.least_upper_bound(sorts) == expected + + +# --------------------------------------------------------------------------- +# _match_sort_params (module-level helper) +# --------------------------------------------------------------------------- +# +# Directly tests the three matching strategies described in the docstring. + + +MATCH_SORT_PARAMS_DATA: Final[ + tuple[ + tuple[ + str, KSort, KSort, frozenset[KSort], Callable[[KSort], frozenset[KSort]] | None, dict[KSort, list[KSort]] + ], + ..., + ] +] = ( + # Case 1 – direct: parametric IS a sort param + ('direct', N, INT, frozenset({N}), None, {N: [INT]}), + # Case 2 – structural: same head, recurse on sub-params + ('structural', MINT_N, MINT_INT, frozenset({N}), None, {N: [INT]}), + # Case 2 fails (different heads), no subsorts_fn → empty + ('structural_no_match_no_subsorts', MINT_N, INT, frozenset({N}), None, {}), + # Case 3 – subsort-aware: MInt{N} vs Int; DEFN.subsorts yields MInt{Int} → N=Int + ('subsort_aware', MINT_N, INT, frozenset({N}), DEFN.subsorts, {N: [INT]}), + # No match in any case + ('no_match', INT, BOOL, frozenset({N}), None, {}), +) + + +@pytest.mark.parametrize( + 'test_id,parametric,actual,params,subsorts_fn,expected', + MATCH_SORT_PARAMS_DATA, + ids=[test_id for test_id, *_ in MATCH_SORT_PARAMS_DATA], +) +def test_match_sort_params( + test_id: str, + parametric: KSort, + actual: KSort, + params: frozenset[KSort], + subsorts_fn: Callable[[KSort], frozenset[KSort]] | None, + expected: dict[KSort, list[KSort]], +) -> None: + assert _match_sort_params(parametric, actual, params, subsorts_fn) == expected + + # --------------------------------------------------------------------------- # KDefinition.add_cell_map_items # --------------------------------------------------------------------------- diff --git a/pyk/src/tests/unit/kast/test_inner.py b/pyk/src/tests/unit/kast/test_inner.py index d34bc5fd34..6bba1c070b 100644 --- a/pyk/src/tests/unit/kast/test_inner.py +++ b/pyk/src/tests/unit/kast/test_inner.py @@ -5,7 +5,7 @@ import pytest -from pyk.kast.inner import KVariable, flatten_label, keep_vars_sorted +from pyk.kast.inner import KSort, KVariable, flatten_label, keep_vars_sorted from ..utils import a, f, g, x, y, z @@ -69,3 +69,23 @@ def test_keep_vars_sorted(occurrences: dict[str, list[KVariable]], expected: dic # Then assert actual == expected + + +_INT: Final = KSort('Int') +_N: Final = KSort('N') +_MINT_N: Final = KSort('MInt', (_N,)) +_MINT_INT: Final = KSort('MInt', (_INT,)) + +KSORT_CONTAINS_DATA: Final[tuple[tuple[str, KSort, KSort, bool], ...]] = ( + ('self', _N, _N, True), + ('nested_one_level', _MINT_N, _N, True), + ('nested_two_levels', KSort('Foo', (_MINT_N,)), _N, True), + ('concrete_does_not_contain_param', _MINT_INT, _N, False), + ('unrelated', _INT, _N, False), +) + + +@pytest.mark.parametrize('test_id,sort,other,expected', KSORT_CONTAINS_DATA, ids=[d[0] for d in KSORT_CONTAINS_DATA]) +def test_ksort_contains(test_id: str, sort: KSort, other: KSort, expected: bool) -> None: + # When/Then + assert sort.contains(other) == expected diff --git a/pyk/src/tests/unit/test_konvert.py b/pyk/src/tests/unit/test_konvert.py index 7e070e3f6c..d1afb3f6b8 100644 --- a/pyk/src/tests/unit/test_konvert.py +++ b/pyk/src/tests/unit/test_konvert.py @@ -5,7 +5,11 @@ import pytest +from pyk.kast.inner import KSort +from pyk.kast.prelude.k import SORT_PARAM_SENTINEL from pyk.konvert import munge, unmunge +from pyk.konvert._kast_to_kore import _ksort_to_kore +from pyk.kore.syntax import SortApp if TYPE_CHECKING: from collections.abc import Iterator @@ -49,3 +53,23 @@ def test_unmunge(symbol: str, expected: str) -> None: # Then assert actual == expected + + +def test_ksort_to_kore_ordinary_sort() -> None: + assert _ksort_to_kore(KSort('Int')) == SortApp('SortInt') + + +@pytest.mark.parametrize( + 'sentinel', + ( + SORT_PARAM_SENTINEL, # bare sentinel + KSort(SORT_PARAM_SENTINEL.name, (KSort('Q0'),)), # uniquely-named family member + ), + ids=['bare', 'parametric'], +) +def test_ksort_to_kore_rejects_sortparam_sentinel(sentinel: KSort) -> None: + # The #SortParam sentinel family cannot yet be emitted to Kore (it needs an axiom-level sort + # variable); _ksort_to_kore must fail with a clear, actionable error rather than building the + # invalid identifier `Sort#SortParam`. See pyk/docs/2026-06-01-sortparam-kore-emission.md. + with pytest.raises(ValueError, match=SORT_PARAM_SENTINEL.name): + _ksort_to_kore(sentinel)