From 2d302c655ed1456335cd46cf7755920ecb26c6a8 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Sat, 25 Apr 2026 01:30:31 +0000 Subject: [PATCH 01/21] test_definition: add unit tests for KDefinition sort(), resolve_sorts(), add_sort_params() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjacent tests (already pass): sort for KVariable, KApply with direct result sort, resolve_sorts direct substitution, add_sort_params with already-filled or direct params. New-feature tests (fail at HEAD, committed with the fixes that make them pass): sort for unfilled KApply (should return None not raise), sort for nested result sort (MInt{N} → MInt{Int}), sort for KAs, resolve_sorts nested substitution, add_sort_params with nested param (MInt{N} ~ MInt{Int} → N=Int), add_sort_params ML pred sentinel. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/tests/unit/kast/test_definition.py | 170 +++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 pyk/src/tests/unit/kast/test_definition.py diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py new file mode 100644 index 0000000000..87c0317561 --- /dev/null +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from pyk.kast.inner import KApply, KAs, KLabel, KSort, KVariable +from pyk.kast.outer import KDefinition, KFlatModule, KNonTerminal, KProduction, KTerminal + +if TYPE_CHECKING: + from typing import Final + + +# --------------------------------------------------------------------------- +# Minimal test definition +# +# bar: syntax N ::= bar(N) [function] -- result sort is the param directly +# foo: syntax MInt{N} ::= foo(MInt{N}) [function] -- result/arg sorts nest the param +# #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred with context-dependent result sort +# --------------------------------------------------------------------------- + +INT: Final = KSort('Int') +N: Final = KSort('N') +S1: Final = KSort('S1') +S2: Final = KSort('S2') +MINT_N: Final = KSort('MInt', (N,)) +MINT_INT: Final = KSort('MInt', (INT,)) +SORT_PARAM: Final = KSort('#SortParam') + +_BAR_PROD: Final = KProduction( + sort=N, + items=[KTerminal('bar'), KTerminal('('), KNonTerminal(N), KTerminal(')')], + params=[N], + klabel='bar', +) + +_FOO_PROD: Final = KProduction( + sort=MINT_N, + items=[KTerminal('foo'), KTerminal('('), KNonTerminal(MINT_N), KTerminal(')')], + params=[N], + klabel='foo', +) + +_EQUALS_PROD: Final = KProduction( + sort=S2, + items=[KNonTerminal(S1), KNonTerminal(S1)], + params=[S1, S2], + klabel='#Equals', +) + +DEFN: Final = KDefinition( + 'TEST', + [KFlatModule('TEST', [_BAR_PROD, _FOO_PROD, _EQUALS_PROD])], +) + + +# --------------------------------------------------------------------------- +# KDefinition.sort — adjacent tests (pass at HEAD) +# --------------------------------------------------------------------------- + + +def test_sort_kvariable() -> None: + """sort() returns the explicit sort annotation on a KVariable.""" + assert DEFN.sort(KVariable('X', sort=INT)) == INT + + +def test_sort_kapply_direct_result() -> None: + """sort() for an application whose result sort is the param directly (bar{Int}).""" + term = KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]) + assert DEFN.sort(term) == INT + + +# --------------------------------------------------------------------------- +# KDefinition.resolve_sorts — adjacent tests (pass at HEAD) +# --------------------------------------------------------------------------- + + +def test_resolve_sorts_direct() -> None: + """resolve_sorts handles direct param substitution N → Int.""" + result_sort, arg_sorts = DEFN.resolve_sorts(KLabel('bar', [INT])) + assert result_sort == INT + assert arg_sorts == (INT,) + + +# --------------------------------------------------------------------------- +# KDefinition.add_sort_params — adjacent tests (pass at HEAD) +# --------------------------------------------------------------------------- + + +def test_add_sort_params_already_filled() -> None: + """add_sort_params leaves a label alone when its params are already filled.""" + term = KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]) + assert DEFN.add_sort_params(term) == term + + +def test_add_sort_params_direct_param() -> None: + """add_sort_params fills a direct sort param by inspecting the argument sort.""" + term = KApply(KLabel('bar'), [KVariable('X', sort=INT)]) + expected = KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]) + assert DEFN.add_sort_params(term) == expected + + +# --------------------------------------------------------------------------- +# KDefinition.sort — new-feature tests (fail at HEAD before the fix) +# --------------------------------------------------------------------------- + + +def test_sort_kapply_unfilled_params_returns_none() -> None: + """sort() returns None (not raises) when the KApply label has unfilled sort params.""" + # When label has no params but the production requires them, old code raises ValueError + # from resolve_sorts(); new code catches it and returns None instead. + term = KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]) + assert DEFN.sort(term) is None + + +def test_sort_kapply_nested_result_sort() -> None: + """sort() resolves a result sort that nests the sort param (MInt{N} → MInt{Int}).""" + # Old code: resolve_sorts returns MInt{N} because sorts.get(MInt{N}, MInt{N}) leaves + # the nested param unsubstituted. New code recurses into the param tuple. + term = KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]) + assert DEFN.sort(term) == MINT_INT + + +def test_sort_kas() -> None: + """sort() returns the sort of the alias variable in a KAs pattern.""" + # Old code has no KAs case and falls through to case _: return None. + alias = KVariable('Y', sort=MINT_INT) + term = KAs(pattern=KVariable('X', sort=MINT_INT), alias=alias) + assert DEFN.sort(term) == MINT_INT + + +# --------------------------------------------------------------------------- +# KDefinition.resolve_sorts — new-feature tests (fail at HEAD before the fix) +# --------------------------------------------------------------------------- + + +def test_resolve_sorts_nested() -> None: + """resolve_sorts recursively substitutes params nested inside compound sorts.""" + # foo has param N, result sort MInt{N}, arg sort MInt{N}. + # With N → Int, both should resolve to MInt{Int}. + # Old code: sorts.get(MInt{N}, MInt{N}) → MInt{N} unchanged (N is the key, not MInt{N}). + result_sort, arg_sorts = DEFN.resolve_sorts(KLabel('foo', [INT])) + assert result_sort == MINT_INT + assert arg_sorts == (MINT_INT,) + + +# --------------------------------------------------------------------------- +# KDefinition.add_sort_params — new-feature tests (fail at HEAD before the fix) +# --------------------------------------------------------------------------- + + +def test_add_sort_params_nested_param() -> None: + """add_sort_params fills a param that appears nested inside the argument sort.""" + # foo has param N and arg sort MInt{N}. Given arg KVariable('X', sort=MInt{Int}), + # the unifier extracts N → Int from the match MInt{N} ~ MInt{Int}. + # Old code only handled the case psort IS the param (direct); it left nested cases unfilled. + term = KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]) + expected = KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]) + assert DEFN.add_sort_params(term) == expected + + +def test_add_sort_params_ml_pred_sentinel() -> None: + """add_sort_params fills the context-dependent result sort of an ML predicate with a sentinel.""" + # #Equals has params [S1, S2] where S2 is the axiom result sort (context-dependent). + # Given args of sort Int, S1 → Int is inferable but S2 is not — so S2 gets the + # sentinel KSort('#SortParam') so that krule_to_kore can introduce sort variable Q0. + # Old code: no ML pred special case, returns the term unchanged (no params filled). + term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) + expected = KApply(KLabel('#Equals', [INT, SORT_PARAM]), [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) + assert DEFN.add_sort_params(term) == expected From 2c635f21c27e95eaa0900b87980a00ae88851d7d Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Sat, 25 Apr 2026 02:43:10 +0000 Subject: [PATCH 02/21] test_definition: convert to pytest parametrization, add extra test cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three parametrized test functions (test_sort, test_resolve_sorts, test_add_sort_params) replace the eleven individual test functions. Each has a *_DATA tuple at module scope so new cases can be added by appending one tuple element. New cases added: - test_sort: ktoken, ksequence, kapply_unknown_label (KeyError → None), kas_unsorted_alias (alias with no sort annotation → None) - test_add_sort_params: unsortable_arg_unchanged (arg with no sort annotation prevents param filling — returns term unchanged) Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/tests/unit/kast/test_definition.py | 185 ++++++++++----------- 1 file changed, 87 insertions(+), 98 deletions(-) diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index 87c0317561..dcae7e46e3 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -4,19 +4,21 @@ import pytest -from pyk.kast.inner import KApply, KAs, KLabel, KSort, KVariable +from pyk.kast.inner import KApply, KAs, KLabel, KSequence, KSort, KToken, KVariable from pyk.kast.outer import KDefinition, KFlatModule, KNonTerminal, KProduction, KTerminal if TYPE_CHECKING: from typing import Final + from pyk.kast.inner import KInner + # --------------------------------------------------------------------------- # Minimal test definition # -# bar: syntax N ::= bar(N) [function] -- result sort is the param directly -# foo: syntax MInt{N} ::= foo(MInt{N}) [function] -- result/arg sorts nest the param -# #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred with context-dependent result sort +# bar: syntax N ::= bar(N) -- result sort is the param directly +# foo: syntax MInt{N} ::= foo(MInt{N}) -- result/arg sorts nest the param +# #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred, result sort context-dependent # --------------------------------------------------------------------------- INT: Final = KSort('Int') @@ -55,116 +57,103 @@ # --------------------------------------------------------------------------- -# KDefinition.sort — adjacent tests (pass at HEAD) -# --------------------------------------------------------------------------- - - -def test_sort_kvariable() -> None: - """sort() returns the explicit sort annotation on a KVariable.""" - assert DEFN.sort(KVariable('X', sort=INT)) == INT - - -def test_sort_kapply_direct_result() -> None: - """sort() for an application whose result sort is the param directly (bar{Int}).""" - term = KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]) - assert DEFN.sort(term) == INT - - -# --------------------------------------------------------------------------- -# KDefinition.resolve_sorts — adjacent tests (pass at HEAD) +# KDefinition.sort # --------------------------------------------------------------------------- - -def test_resolve_sorts_direct() -> None: - """resolve_sorts handles direct param substitution N → Int.""" - result_sort, arg_sorts = DEFN.resolve_sorts(KLabel('bar', [INT])) - assert result_sort == INT - assert arg_sorts == (INT,) - - -# --------------------------------------------------------------------------- -# KDefinition.add_sort_params — adjacent tests (pass at HEAD) -# --------------------------------------------------------------------------- - - -def test_add_sort_params_already_filled() -> None: - """add_sort_params leaves a label alone when its params are already filled.""" - term = KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]) - assert DEFN.add_sort_params(term) == term +SORT_DATA: Final = ( + # Basic leaf terms + ('ktoken', KToken('42', INT), INT), + ('kvariable_with_sort', KVariable('X', sort=INT), INT), + ('ksequence', KSequence([]), KSort('K')), + # KApply: result sort substituted directly from param + ('kapply_direct_result', KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), INT), + # KApply: result sort nests the param (MInt{N} with N→Int → MInt{Int}) + ('kapply_nested_result', KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]), MINT_INT), + # KApply with unfilled sort params: sort() returns None rather than raising + ('kapply_unfilled_params', KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]), None), + # KApply with unknown label: KeyError from symbols lookup → None + ('kapply_unknown_label', KApply(KLabel('nonexistent'), []), None), + # KAs: sort of the alias variable + ('kas_sorted_alias', KAs(KVariable('X', sort=MINT_INT), KVariable('Y', sort=MINT_INT)), MINT_INT), + # KAs whose alias has no sort annotation: returns None + ('kas_unsorted_alias', KAs(KVariable('X', sort=MINT_INT), KVariable('Y')), None), +) -def test_add_sort_params_direct_param() -> None: - """add_sort_params fills a direct sort param by inspecting the argument sort.""" - term = KApply(KLabel('bar'), [KVariable('X', sort=INT)]) - expected = KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]) - assert DEFN.add_sort_params(term) == expected +@pytest.mark.parametrize( + 'test_id,term,expected', + SORT_DATA, + ids=[test_id for test_id, *_ in SORT_DATA], +) +def test_sort(test_id: str, term: KInner, expected: KSort | None) -> None: + assert DEFN.sort(term) == expected # --------------------------------------------------------------------------- -# KDefinition.sort — new-feature tests (fail at HEAD before the fix) +# KDefinition.resolve_sorts # --------------------------------------------------------------------------- - -def test_sort_kapply_unfilled_params_returns_none() -> None: - """sort() returns None (not raises) when the KApply label has unfilled sort params.""" - # When label has no params but the production requires them, old code raises ValueError - # from resolve_sorts(); new code catches it and returns None instead. - term = KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]) - assert DEFN.sort(term) is None - - -def test_sort_kapply_nested_result_sort() -> None: - """sort() resolves a result sort that nests the sort param (MInt{N} → MInt{Int}).""" - # Old code: resolve_sorts returns MInt{N} because sorts.get(MInt{N}, MInt{N}) leaves - # the nested param unsubstituted. New code recurses into the param tuple. - term = KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]) - assert DEFN.sort(term) == MINT_INT - - -def test_sort_kas() -> None: - """sort() returns the sort of the alias variable in a KAs pattern.""" - # Old code has no KAs case and falls through to case _: return None. - alias = KVariable('Y', sort=MINT_INT) - term = KAs(pattern=KVariable('X', sort=MINT_INT), alias=alias) - assert DEFN.sort(term) == MINT_INT - - -# --------------------------------------------------------------------------- -# KDefinition.resolve_sorts — new-feature tests (fail at HEAD before the fix) -# --------------------------------------------------------------------------- +RESOLVE_SORTS_DATA: Final = ( + # Direct substitution: result sort IS the param (N → Int) + ('direct_bar', KLabel('bar', [INT]), INT, (INT,)), + # Recursive substitution: result/arg sort nests the param (MInt{N} with N → Int → MInt{Int}) + ('nested_foo', KLabel('foo', [INT]), MINT_INT, (MINT_INT,)), +) -def test_resolve_sorts_nested() -> None: - """resolve_sorts recursively substitutes params nested inside compound sorts.""" - # foo has param N, result sort MInt{N}, arg sort MInt{N}. - # With N → Int, both should resolve to MInt{Int}. - # Old code: sorts.get(MInt{N}, MInt{N}) → MInt{N} unchanged (N is the key, not MInt{N}). - result_sort, arg_sorts = DEFN.resolve_sorts(KLabel('foo', [INT])) - assert result_sort == MINT_INT - assert arg_sorts == (MINT_INT,) +@pytest.mark.parametrize( + 'test_id,label,expected_result,expected_args', + RESOLVE_SORTS_DATA, + ids=[test_id for test_id, *_ in RESOLVE_SORTS_DATA], +) +def test_resolve_sorts(test_id: str, label: KLabel, expected_result: KSort, expected_args: tuple[KSort, ...]) -> None: + result, args = DEFN.resolve_sorts(label) + assert result == expected_result + assert args == expected_args # --------------------------------------------------------------------------- -# KDefinition.add_sort_params — new-feature tests (fail at HEAD before the fix) +# KDefinition.add_sort_params # --------------------------------------------------------------------------- - -def test_add_sort_params_nested_param() -> None: - """add_sort_params fills a param that appears nested inside the argument sort.""" - # foo has param N and arg sort MInt{N}. Given arg KVariable('X', sort=MInt{Int}), - # the unifier extracts N → Int from the match MInt{N} ~ MInt{Int}. - # Old code only handled the case psort IS the param (direct); it left nested cases unfilled. - term = KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]) - expected = KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]) - assert DEFN.add_sort_params(term) == expected +ADD_SORT_PARAMS_DATA: Final = ( + # Label already has params filled: leave unchanged + ( + 'already_filled', + KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), + KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), + ), + # Direct sort param: psort IS the param (N ~ Int → N=Int) + ( + 'direct_param', + KApply(KLabel('bar'), [KVariable('X', sort=INT)]), + KApply(KLabel('bar', [INT]), [KVariable('X', sort=INT)]), + ), + # Nested sort param: psort = MInt{N}, asort = MInt{Int} → N=Int via unification + ( + 'nested_param', + KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]), + KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]), + ), + # ML pred: S1 inferred from args, S2 (result sort) filled with #SortParam sentinel + ( + 'ml_pred_sentinel', + KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]), + KApply(KLabel('#Equals', [INT, SORT_PARAM]), [KVariable('X', sort=INT), KVariable('Y', sort=INT)]), + ), + # Unsortable argument (no sort annotation): cannot fill params, term returned unchanged + ( + 'unsortable_arg_unchanged', + KApply(KLabel('foo'), [KVariable('X')]), + KApply(KLabel('foo'), [KVariable('X')]), + ), +) -def test_add_sort_params_ml_pred_sentinel() -> None: - """add_sort_params fills the context-dependent result sort of an ML predicate with a sentinel.""" - # #Equals has params [S1, S2] where S2 is the axiom result sort (context-dependent). - # Given args of sort Int, S1 → Int is inferable but S2 is not — so S2 gets the - # sentinel KSort('#SortParam') so that krule_to_kore can introduce sort variable Q0. - # Old code: no ML pred special case, returns the term unchanged (no params filled). - term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) - expected = KApply(KLabel('#Equals', [INT, SORT_PARAM]), [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) +@pytest.mark.parametrize( + 'test_id,term,expected', + ADD_SORT_PARAMS_DATA, + ids=[test_id for test_id, *_ in ADD_SORT_PARAMS_DATA], +) +def test_add_sort_params(test_id: str, term: KInner, expected: KInner) -> None: assert DEFN.add_sort_params(term) == expected From bcc2dfc4062d14881c3b12bd8a16215c9922bf2d Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Sat, 25 Apr 2026 03:16:46 +0000 Subject: [PATCH 03/21] test_definition: add unit tests for add_cell_map_items guard Tests both the positive path (wrap when parent production expects the cell map sort) and the negative path (no wrap when parent expects the individual cell element sort) introduced in 78bfdab101. The cell-map productions are merged into the existing single DEFN to keep the fixture count low. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/tests/unit/kast/test_definition.py | 93 +++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index dcae7e46e3..da397e3916 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -4,6 +4,7 @@ import pytest +from pyk.kast.att import Atts, KAtt from pyk.kast.inner import KApply, KAs, KLabel, KSequence, KSort, KToken, KVariable from pyk.kast.outer import KDefinition, KFlatModule, KNonTerminal, KProduction, KTerminal @@ -19,6 +20,12 @@ # bar: syntax N ::= bar(N) -- result sort is the param directly # foo: syntax MInt{N} ::= foo(MInt{N}) -- result/arg sorts nest the param # #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred, result sort context-dependent +# +# Cell map fragment: +# AccountCellMap ::= AccountCellMap AccountCellMap [cellCollection, element(AccountCellMapItem), wrapElement()] +# AccountCellMap ::= AccountCellMapItem(Int, AccountCell) +# AccountCell ::= (Int, Int) +# AccountCell ::= getEntry(AccountCell) -- takes element sort, NOT map sort # --------------------------------------------------------------------------- INT: Final = KSort('Int') @@ -28,6 +35,8 @@ MINT_N: Final = KSort('MInt', (N,)) MINT_INT: Final = KSort('MInt', (INT,)) SORT_PARAM: Final = KSort('#SortParam') +ACCOUNT_CELL_MAP: Final = KSort('AccountCellMap') +ACCOUNT_CELL: Final = KSort('AccountCell') _BAR_PROD: Final = KProduction( sort=N, @@ -50,9 +59,52 @@ klabel='#Equals', ) +_ACCT_MAP_CONCAT: Final = KProduction( + sort=ACCOUNT_CELL_MAP, + items=[KNonTerminal(ACCOUNT_CELL_MAP), KNonTerminal(ACCOUNT_CELL_MAP)], + klabel='_AccountCellMap_', + att=KAtt(entries=[Atts.CELL_COLLECTION(None), Atts.ELEMENT('AccountCellMapItem'), Atts.WRAP_ELEMENT('')]), +) + +_ACCT_MAP_ITEM: Final = KProduction( + sort=ACCOUNT_CELL_MAP, + items=[ + KTerminal('AccountCellMapItem'), + KTerminal('('), + KNonTerminal(INT), + KTerminal(','), + KNonTerminal(ACCOUNT_CELL), + KTerminal(')'), + ], + klabel='AccountCellMapItem', +) + +_ACCOUNT_CELL: Final = KProduction( + sort=ACCOUNT_CELL, + items=[ + KTerminal(''), + KTerminal('('), + KNonTerminal(INT), + KTerminal(','), + KNonTerminal(INT), + KTerminal(')'), + ], + klabel='', +) + +_GET_ENTRY: Final = KProduction( + sort=ACCOUNT_CELL, + items=[KTerminal('getEntry'), KTerminal('('), KNonTerminal(ACCOUNT_CELL), KTerminal(')')], + klabel='getEntry', +) + DEFN: Final = KDefinition( 'TEST', - [KFlatModule('TEST', [_BAR_PROD, _FOO_PROD, _EQUALS_PROD])], + [ + KFlatModule( + 'TEST', [_BAR_PROD, _FOO_PROD, _EQUALS_PROD, _ACCT_MAP_CONCAT, _ACCT_MAP_ITEM, _ACCOUNT_CELL, _GET_ENTRY] + ) + ], ) @@ -157,3 +209,42 @@ def test_resolve_sorts(test_id: str, label: KLabel, expected_result: KSort, expe ) def test_add_sort_params(test_id: str, term: KInner, expected: KInner) -> None: assert DEFN.add_sort_params(term) == expected + + +# --------------------------------------------------------------------------- +# KDefinition.add_cell_map_items +# --------------------------------------------------------------------------- + +_ACCT_1: Final = KApply('', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) +_ACCT_2: Final = KApply('', [KVariable('A', sort=INT), KVariable('B', sort=INT)]) + +ADD_CELL_MAP_ITEMS_DATA: Final = ( + # Parent expects AccountCellMap (the map sort) — children are wrapped in AccountCellMapItem. + ( + 'wraps_when_parent_expects_cell_map_sort', + KApply('_AccountCellMap_', [_ACCT_1, _ACCT_2]), + KApply( + '_AccountCellMap_', + [ + KApply('AccountCellMapItem', [KVariable('X', sort=INT), _ACCT_1]), + KApply('AccountCellMapItem', [KVariable('A', sort=INT), _ACCT_2]), + ], + ), + ), + # Parent expects AccountCell (the element sort) — the child must NOT be wrapped. + # Before the guard fix, _wrap_elements would incorrectly wrap here too. + ( + 'no_wrap_when_parent_expects_cell_element_sort', + KApply('getEntry', [_ACCT_1]), + KApply('getEntry', [_ACCT_1]), + ), +) + + +@pytest.mark.parametrize( + 'test_id,term,expected', + ADD_CELL_MAP_ITEMS_DATA, + ids=[test_id for test_id, *_ in ADD_CELL_MAP_ITEMS_DATA], +) +def test_add_cell_map_items(test_id: str, term: KInner, expected: KInner) -> None: + assert DEFN.add_cell_map_items(term) == expected From e8671f148cc1854ed622d45e082a3a2edc6b02b5 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Sat, 25 Apr 2026 01:33:05 +0000 Subject: [PATCH 04/21] outer: sort() handles KAs and unfilled KApply; resolve_sorts() recursive substitution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sort() gains a KAs case that returns the alias variable's sort, and the KApply case is now wrapped in try/except so unfilled sort params (e.g. when label.params=[] but the production requires params) return None instead of raising ValueError. resolve_sorts() now recurses into nested compound sorts, enabling N → Int to also substitute inside MInt{N} → MInt{Int}. The old code used sorts.get(sort, sort) which only handled the case where sort IS a parameter, not nested inside one. Makes test_definition tests pass: test_sort_kas, test_sort_kapply_unfilled_params_returns_none, test_sort_kapply_nested_result_sort, test_resolve_sorts_nested. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 774a004e5c..a051a580e5 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -17,6 +17,7 @@ from .att import EMPTY_ATT, Atts, Format, KAst, KAtt, WithKAtt from .inner import ( KApply, + KAs, KInner, KLabel, KRewrite, @@ -1327,6 +1328,8 @@ def sort(self, kast: KInner) -> KSort | None: match kast: case KToken(_, sort) | KVariable(_, sort): return sort + case KAs(alias=KVariable(sort=sort)): + return sort case KRewrite(lhs, rhs): lhs_sort = self.sort(lhs) rhs_sort = self.sort(rhs) @@ -1336,8 +1339,11 @@ def sort(self, kast: KInner) -> KSort | None: case KSequence(_): return KSort('K') case KApply(label, _): - sort, _ = self.resolve_sorts(label) - return sort + try: + sort, _ = self.resolve_sorts(label) + return sort + except (KeyError, ValueError): + return None case _: return None @@ -1354,7 +1360,13 @@ def resolve_sorts(self, label: KLabel) -> tuple[KSort, tuple[KSort, ...]]: sorts = dict(zip(prod.params, label.params, strict=True)) def resolve(sort: KSort) -> KSort: - return sorts.get(sort, sort) + # Direct match: sort IS one of the sort parameters. + if sort in sorts: + return sorts[sort] + # Recursive substitution: sort params may appear nested (e.g. MInt{Width} → MInt{8}). + if sort.params: + return KSort(sort.name, tuple(resolve(p) for p in sort.params)) + return sort return resolve(prod.sort), tuple(resolve(sort) for sort in prod.argument_sorts) From f6a136f5704161cff837fd2365acc58babedac36 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Sat, 25 Apr 2026 01:34:04 +0000 Subject: [PATCH 05/21] outer: add_sort_params() handles nested sort params and ML pred result sort sentinel The old implementation only extracted sort-param bindings when the production argument sort was exactly a sort parameter (psort in prod.params). This missed cases like MInt{Width} ~ MInt{8} where the parameter is nested. New helper _unify_sort_params recursively matches a parametric sort against an actual sort, extracting {Width: 8} from MInt{Width} ~ MInt{8}. _merge_binding handles conflicts and LUB computation when the same parameter appears in multiple argument positions. ML predicates (#Equals, #Ceil, #Floor, #In) have a context-dependent result sort that cannot be inferred from arguments alone. When Sort1 (the operand sort) is determined but Sort2 (the axiom result sort) is not, Sort2 is filled with the sentinel KSort('#SortParam'). Downstream code (_ksort_to_kore) maps this sentinel to SortVar('Q0') to emit the correct axiom{R,Q0} universally-quantified form. Makes test_definition tests pass: test_add_sort_params_nested_param, test_add_sort_params_ml_pred_sentinel. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 72 +++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 7 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index a051a580e5..ba8fc327f1 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1495,28 +1495,86 @@ def transform( # Best-effort addition of sort parameters to klabels, context insensitive def add_sort_params(self, kast: KInner) -> KInner: """Return a given term with the sort parameters on the `KLabel` filled in (which may be missing because of how the frontend works), best effort.""" + # ML predicate labels whose result sort (Sort2) is context-dependent and not inferable + # from the arguments alone. When Sort1 can be determined but Sort2 cannot, we fill Sort2 + # with the sentinel KSort('#SortParam') so that downstream Kore emission can introduce a + # universally-quantified sort variable (Q0) in the axiom. + _ML_PRED_RESULT_SORT_PARAM = KSort('#SortParam') # noqa: N806 + _ML_PRED_LABELS = frozenset({'#Equals', '#Ceil', '#Floor', '#In'}) # noqa: N806 + + def _unify_sort_params(parametric: KSort, actual: KSort, params: frozenset[KSort]) -> dict[KSort, KSort]: + """Match parametric sort against actual, extracting bindings for sort params. + + Handles both direct (parametric IS a sort param) and nested + (parametric = MInt{Width}, actual = MInt{8}) cases. + Returns empty dict when no bindings could be extracted (no match). + """ + if parametric in params: + return {parametric: actual} + if parametric.name != actual.name or len(parametric.params) != len(actual.params): + return {} + result: dict[KSort, KSort] = {} + for p_sub, a_sub in zip(parametric.params, actual.params, strict=True): + sub_bindings = _unify_sort_params(p_sub, a_sub, params) + for k, v in sub_bindings.items(): + if k in result and result[k] != v: + return {} # Conflicting bindings + result[k] = v + return result + + def _merge_binding(sort_dict: dict[KSort, KSort], k: KSort, v: KSort) -> bool: + """Merge one binding into sort_dict in place. Returns False on irreconcilable conflict.""" + if k in sort_dict: + existing = sort_dict[k] + if existing == _ML_PRED_RESULT_SORT_PARAM: + sort_dict[k] = v # Concrete sort overrides sentinel. + elif existing != v: + lub = self.least_common_supersort(existing, v) + if lub is None: + _LOGGER.warning(f'Failed to add sort parameter, sort mismatch: {(k, existing, v)}') + return False + sort_dict[k] = lub + else: + sort_dict[k] = v + return True def _add_sort_params(_k: KInner) -> KInner: if type(_k) is KApply: prod = self.symbols[_k.label.name] if len(_k.label.params) == 0 and len(prod.params) > 0: + param_set = frozenset(prod.params) sort_dict: dict[KSort, KSort] = {} for psort, asort in zip(prod.argument_sorts, map(self.sort, _k.args), strict=True): + if asort == _ML_PRED_RESULT_SORT_PARAM: + # #SortParam is the sentinel for an ML pred result sort that cannot be + # inferred bottom-up (e.g. #Equals result sort depends on outer context). + # It propagates upward into ML connectives (#And, #Or, #Not) as a + # placeholder for the axiom sort variable Q0, but a concrete sort takes + # precedence when one is available. + bindings = _unify_sort_params(psort, asort, param_set) + for k, v in bindings.items(): + if k not in sort_dict: # sentinel fills only empty slots + sort_dict[k] = v + continue if asort is None: _LOGGER.warning( f'Failed to add sort parameter, unable to determine sort for argument in production: {(prod, psort, asort)}' ) return _k - if psort in prod.params: - if psort in sort_dict and sort_dict[psort] != asort: - _LOGGER.warning( - f'Failed to add sort parameter, sort mismatch between different occurances of sort parameter: {(prod, psort, sort_dict[psort], asort)}' - ) + # Unify psort with asort to extract bindings for sort params. + # Handles both direct (psort=Width) and nested (psort=MInt{Width}) cases. + bindings = _unify_sort_params(psort, asort, param_set) + for k, v in bindings.items(): + if not _merge_binding(sort_dict, k, v): return _k - elif psort not in sort_dict: - sort_dict[psort] = asort if all(p in sort_dict for p in prod.params): return _k.let(label=KLabel(_k.label.name, [sort_dict[p] for p in prod.params])) + # ML predicates have a context-dependent result sort (Sort2) that cannot be + # inferred from arguments. Fill it with the sentinel so that krule_to_kore can + # introduce a universally-quantified sort variable for the axiom. + if _k.label.name in _ML_PRED_LABELS: + filled = {p: sort_dict.get(p, _ML_PRED_RESULT_SORT_PARAM) for p in prod.params} + return _k.let(label=KLabel(_k.label.name, [filled[p] for p in prod.params])) return _k return bottom_up(_add_sort_params, kast) From f4a76480241648b0068eaca1397145874010c615 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Sat, 25 Apr 2026 01:35:23 +0000 Subject: [PATCH 06/21] outer: add_cell_map_items wraps only when parent expects cell map sort The old _wrap_elements unconditionally wrapped any KApply matching a cell label, even when the parent production expected the individual cell sort (not the map sort). This caused double-wrapping in productions like EntryCellMapKey((...)) where the argument sort is EntryCell, not EntryCellMap. The new implementation inspects the parent production's expected argument sorts and only wraps when the expected sort matches the cell map sort (e.g. EntryCellMap). Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index ba8fc327f1..ee42a19512 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1585,15 +1585,35 @@ def add_cell_map_items(self, kast: KInner) -> KInner: # syntax AccountCellMap [cellCollection, hook(MAP.Map)] # syntax AccountCellMap ::= AccountCellMap AccountCellMap [assoc, avoid, cellCollection, comm, element(AccountCellMapItem), function, hook(MAP.concat), unit(.AccountCellMap), wrapElement()] - cell_wrappers = {} + # Maps cell label -> (element_constructor, cell_map_sort). + # Wrapping is correct only when the parent production expects the cell MAP sort (e.g. + # EntryCellMap), not when it expects the individual cell element sort (e.g. EntryCell). + # For example, EntryCellMapKey((...)) takes EntryCell — the must NOT be + # wrapped, whereas _EntryCellMap_((...), ...) expects EntryCellMap — wrapping is needed. + cell_wrappers: dict[str, tuple[str, KSort]] = {} for ccp in self.cell_collection_productions: if Atts.ELEMENT in ccp.att and Atts.WRAP_ELEMENT in ccp.att: - cell_wrappers[ccp.att[Atts.WRAP_ELEMENT]] = ccp.att[Atts.ELEMENT] + cell_label = ccp.att[Atts.WRAP_ELEMENT] + element_ctor = ccp.att[Atts.ELEMENT] + if element_ctor in self.symbols: + cell_wrappers[cell_label] = (element_ctor, self.symbols[element_ctor].sort) def _wrap_elements(_k: KInner) -> KInner: - if type(_k) is KApply and _k.label.name in cell_wrappers: - return KApply(cell_wrappers[_k.label.name], [_k.args[0], _k]) - return _k + if not isinstance(_k, KApply) or _k.label.name not in self.symbols: + return _k + prod = self.symbols[_k.label.name] + arg_sorts = prod.argument_sorts + if not arg_sorts or len(arg_sorts) != _k.arity: + return _k + new_args: list[KInner] = list(_k.args) + changed = False + for i, (arg_sort, arg) in enumerate(zip(arg_sorts, _k.args, strict=True)): + if isinstance(arg, KApply) and arg.label.name in cell_wrappers: + element_ctor, cell_map_sort = cell_wrappers[arg.label.name] + if arg_sort == cell_map_sort: + new_args[i] = KApply(element_ctor, [arg.args[0], arg]) + changed = True + return _k.let(args=new_args) if changed else _k # To ensure we don't get duplicate wrappers. _kast = self.remove_cell_map_items(kast) From 1182496b29209722e31d1dd3fa0fac5acd614e6c Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Tue, 28 Apr 2026 23:11:15 +0000 Subject: [PATCH 07/21] outer: assert at most one unbound sort param when using ML pred sentinel The single KSort('#SortParam') sentinel is only unambiguous when exactly one sort parameter is unresolvable bottom-up. Add an assertion that documents this invariant and crashes loudly if it is ever violated, with a message pointing toward the Java-style unique fresh-param fix. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index ee42a19512..563eec0c52 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1573,6 +1573,18 @@ def _add_sort_params(_k: KInner) -> KInner: # inferred from arguments. Fill it with the sentinel so that krule_to_kore can # introduce a universally-quantified sort variable for the axiom. if _k.label.name in _ML_PRED_LABELS: + unbound = [p for p in prod.params if p not in sort_dict] + # The single sentinel KSort('#SortParam') is only unambiguous when at most + # one parameter is unresolvable bottom-up. All current ML predicates + # (#Equals, #Ceil, #Floor, #In) have exactly two sort params {Sort1, + # Sort2}: Sort1 is always determined by the arguments, Sort2 (the result + # sort) is the one remaining unbound param. If this assertion ever fires, + # the sentinel scheme needs to be replaced with unique fresh params, as + # Java does with #SortParam{Q0}, #SortParam{Q1}, .... + assert len(unbound) <= 1, ( + f'Expected at most one unbound sort parameter for {_k.label.name!r}, ' + f'got {len(unbound)}: {unbound}' + ) filled = {p: sort_dict.get(p, _ML_PRED_RESULT_SORT_PARAM) for p in prod.params} return _k.let(label=KLabel(_k.label.name, [filled[p] for p in prod.params])) return _k From a53e36280345235f6d7ee5dd27e3c8d66287e5ef Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Fri, 8 May 2026 20:43:23 +0000 Subject: [PATCH 08/21] outer, test_definition: replace assert with NotImplementedError for multi-unbound ML pred sort params The single KSort('#SortParam') sentinel can only represent one unbound sort parameter at a time. If an ML predicate ever had two or more unbound params simultaneously the old assert (disabled by -O) would silently pass or raise a bare AssertionError with no guidance. Replace the assert with NotImplementedError that names the fix: generate unique fresh sentinels analogous to Java's #SortParam{Q0}, #SortParam{Q1}, etc. Add a unit test with a hypothetical 3-sort-param #Equals production (two unbound params) that verifies the error is raised. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 20 +++++++++++++------- pyk/src/tests/unit/kast/test_definition.py | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 563eec0c52..e06bf90b59 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1578,13 +1578,19 @@ def _add_sort_params(_k: KInner) -> KInner: # one parameter is unresolvable bottom-up. All current ML predicates # (#Equals, #Ceil, #Floor, #In) have exactly two sort params {Sort1, # Sort2}: Sort1 is always determined by the arguments, Sort2 (the result - # sort) is the one remaining unbound param. If this assertion ever fires, - # the sentinel scheme needs to be replaced with unique fresh params, as - # Java does with #SortParam{Q0}, #SortParam{Q1}, .... - assert len(unbound) <= 1, ( - f'Expected at most one unbound sort parameter for {_k.label.name!r}, ' - f'got {len(unbound)}: {unbound}' - ) + # sort) is the one remaining unbound param. If more than one param is + # unbound, the sentinel scheme must be replaced with unique fresh params + # (e.g. KSort('#SortParam', (KSort('Q0'),)), KSort('#SortParam', (KSort('Q1'),)), ...) + # analogously to how Java's AddSortInjections generates #SortParam{Q0}, + # #SortParam{Q1}, etc. _ksort_to_kore would also need updating to emit + # these as sort variables rather than sort applications. + if len(unbound) > 1: + raise NotImplementedError( + f'ML predicate {_k.label.name!r} has {len(unbound)} unbound sort parameters ' + f'({unbound}); the single-sentinel scheme only handles at most one. ' + f'Implement unique fresh sentinels analogous to Java #SortParam{{Q0}}, ' + f'#SortParam{{Q1}}, ... and update _ksort_to_kore to emit them as sort variables.' + ) filled = {p: sort_dict.get(p, _ML_PRED_RESULT_SORT_PARAM) for p in prod.params} return _k.let(label=KLabel(_k.label.name, [filled[p] for p in prod.params])) return _k diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index da397e3916..6be46f92f8 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -32,6 +32,7 @@ N: Final = KSort('N') S1: Final = KSort('S1') S2: Final = KSort('S2') +S3: Final = KSort('S3') MINT_N: Final = KSort('MInt', (N,)) MINT_INT: Final = KSort('MInt', (INT,)) SORT_PARAM: Final = KSort('#SortParam') @@ -59,6 +60,16 @@ klabel='#Equals', ) +# Hypothetical 3-param #Equals to test the multi-unbound-param guard. +# S1 is inferred from arguments; S2 and S3 are both unbound, which the single-sentinel +# scheme cannot handle — add_sort_params must raise NotImplementedError. +_EQUALS3_PROD: Final = KProduction( + sort=S2, + items=[KNonTerminal(S1), KNonTerminal(S1)], + params=[S1, S2, S3], + klabel='#Equals', +) + _ACCT_MAP_CONCAT: Final = KProduction( sort=ACCOUNT_CELL_MAP, items=[KNonTerminal(ACCOUNT_CELL_MAP), KNonTerminal(ACCOUNT_CELL_MAP)], @@ -107,6 +118,9 @@ ], ) +# Definition used only to verify the multi-unbound-param guard in add_sort_params. +DEFN3: Final = KDefinition('TEST3', [KFlatModule('TEST3', [_EQUALS3_PROD])]) + # --------------------------------------------------------------------------- # KDefinition.sort @@ -211,6 +225,14 @@ def test_add_sort_params(test_id: str, term: KInner, expected: KInner) -> None: assert DEFN.add_sort_params(term) == expected +def test_add_sort_params_multi_unbound_raises() -> None: + # #Equals with 3 sort params: S1 is inferred from arguments, S2 and S3 are both unbound. + # The single-sentinel scheme cannot distinguish them, so NotImplementedError must be raised. + term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) + with pytest.raises(NotImplementedError, match='2 unbound sort parameters'): + DEFN3.add_sort_params(term) + + # --------------------------------------------------------------------------- # KDefinition.add_cell_map_items # --------------------------------------------------------------------------- From c9a42102a66cc832da8d9b6d529ad9368957875e Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Fri, 8 May 2026 20:54:49 +0000 Subject: [PATCH 09/21] outer, test_definition: warn when user-defined label has unresolvable sort params After the argument loop, if some sort params remain unbound and the label is not an ML predicate, add_sort_params previously fell through silently. Add a _LOGGER.warning that names the label and the unresolved params, matching the existing warning style for the asort-is-None case. Add test_add_sort_params_user_label_unresolvable_warns using a pair(S1,S2) production where S2 does not appear in any argument sort, verifying the warning fires and the term is returned unchanged. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 4 ++++ pyk/src/tests/unit/kast/test_definition.py | 24 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index e06bf90b59..bd47545028 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1593,6 +1593,10 @@ def _add_sort_params(_k: KInner) -> KInner: ) filled = {p: sort_dict.get(p, _ML_PRED_RESULT_SORT_PARAM) for p in prod.params} return _k.let(label=KLabel(_k.label.name, [filled[p] for p in prod.params])) + unbound = [p for p in prod.params if p not in sort_dict] + _LOGGER.warning( + f'Failed to add sort parameter, could not infer sort params from arguments: {(prod, unbound)}' + ) return _k return bottom_up(_add_sort_params, kast) diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index 6be46f92f8..b0ae216f79 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING import pytest @@ -70,6 +71,16 @@ klabel='#Equals', ) +# User-defined label where S2 does not appear in any argument sort, so it remains +# unbound after argument processing. add_sort_params must emit a warning and +# return the term unchanged (best-effort). +_PAIR_PROD: Final = KProduction( + sort=KSort('Pair', (S1, S2)), + items=[KTerminal('pair'), KTerminal('('), KNonTerminal(S1), KTerminal(')')], + params=[S1, S2], + klabel='pair', +) + _ACCT_MAP_CONCAT: Final = KProduction( sort=ACCOUNT_CELL_MAP, items=[KNonTerminal(ACCOUNT_CELL_MAP), KNonTerminal(ACCOUNT_CELL_MAP)], @@ -121,6 +132,9 @@ # Definition used only to verify the multi-unbound-param guard in add_sort_params. DEFN3: Final = KDefinition('TEST3', [KFlatModule('TEST3', [_EQUALS3_PROD])]) +# Definition used only to verify the unresolvable-user-label warning path. +DEFN_PAIR: Final = KDefinition('TEST_PAIR', [KFlatModule('TEST_PAIR', [_PAIR_PROD])]) + # --------------------------------------------------------------------------- # KDefinition.sort @@ -233,6 +247,16 @@ def test_add_sort_params_multi_unbound_raises() -> None: DEFN3.add_sort_params(term) +def test_add_sort_params_user_label_unresolvable_warns(caplog: pytest.LogCaptureFixture) -> None: + # pair(S1, S2) has S2 absent from arguments — S2 is unbound after inference. + # add_sort_params emits a warning and returns the term unchanged (best-effort). + term = KApply(KLabel('pair'), [KVariable('X', sort=INT)]) + with caplog.at_level(logging.WARNING): + result = DEFN_PAIR.add_sort_params(term) + assert result == term + assert any('could not infer sort params' in record.message for record in caplog.records) + + # --------------------------------------------------------------------------- # KDefinition.add_cell_map_items # --------------------------------------------------------------------------- From 5ff5f458ada9865762867b4287a964e99dc262b4 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Mon, 11 May 2026 19:55:47 +0000 Subject: [PATCH 10/21] outer, test_definition: port Java AddSortInjections direct sort param inference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract _sort_contains and _match_sort_params as module-level pure functions, implementing the same three-strategy matching algorithm as Java's AddSortInjections.match(): direct sort param binding, structural unification, and subsort-aware matching (iterate s ≤ actual with same head as parametric). Add KDefinition.infer_sort_params() mirroring substituteProd(), including the matchExpected path for binding result-sort-only params from a known expected sort. Simplify add_sort_params() to delegate to infer_sort_params(), keeping only the ML predicate sentinel policy inline. Expand test_definition.py with: test_infer_sort_params (direct, nested, subsort-aware, matchExpected, unbound cases), test_match_sort_params (all three strategies in isolation), test_sort_contains, and a subsort_aware case in the existing test_add_sort_params table. Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 247 +++++++++++++-------- pyk/src/tests/unit/kast/test_definition.py | 161 +++++++++++++- 2 files changed, 312 insertions(+), 96 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index bd47545028..6feb954b45 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1001,6 +1001,45 @@ def let(self, *, require: str | None = None) -> KRequire: return KRequire(require=require) +def _sort_contains(sort: KSort, param: KSort) -> bool: + """Return whether ``param`` appears anywhere in the sort tree of ``sort``.""" + return sort == param or any(_sort_contains(p, param) for p in sort.params) + + +def _match_sort_params( + parametric: KSort, + actual: KSort, + params: frozenset[KSort], + subsorts_fn: Callable[[KSort], frozenset[KSort]] | None = None, +) -> dict[KSort, list[KSort]]: + """Match ``parametric`` sort against ``actual``, collecting candidate bindings per sort param. + + Three matching strategies, mirroring Java ``AddSortInjections.match()``: + + 1. Direct: ``parametric`` is itself a sort param — bind it to ``actual``. + 2. Structural: same constructor head — recurse on sub-params. + 3. Subsort-aware: iterate subsorts ``s ≤ actual`` with same head as ``parametric``, + collecting additional candidates for LUB resolution. + """ + if parametric in params: + return {parametric: [actual]} + if parametric.name == actual.name and len(parametric.params) == len(actual.params): + result: dict[KSort, list[KSort]] = {} + for p_sub, a_sub in zip(parametric.params, actual.params, strict=True): + for k, vs in _match_sort_params(p_sub, a_sub, params, subsorts_fn).items(): + result.setdefault(k, []).extend(vs) + return result + if parametric.params and subsorts_fn is not None: + result = {} + for s in subsorts_fn(actual): + if s.name == parametric.name and len(s.params) == len(parametric.params): + for p_sub, a_sub in zip(parametric.params, s.params, strict=True): + for k, vs in _match_sort_params(p_sub, a_sub, params).items(): + result.setdefault(k, []).extend(vs) + return result + return {} + + @final @dataclass(frozen=True) class KDefinition(KOuter, WithKAtt, Iterable[KFlatModule]): @@ -1492,6 +1531,61 @@ def transform( return Subst(subst)(new_term) + def infer_sort_params( + self, + prod: KProduction, + actual_sorts: tuple[KSort | None, ...], + expected_sort: KSort | None = None, + ) -> dict[KSort, KSort]: + """Infer sort parameter bindings for a parametric production application. + + Returns a (possibly partial) mapping from sort params to concrete sorts; + unbound parameters are absent from the result. + Mirrors ``AddSortInjections.substituteProd()`` in the Java frontend. + + ``actual_sorts`` must have the same length as ``prod.argument_sorts``. + ``None`` entries are skipped (unsortable arguments). + If ``expected_sort`` is given, parameters that appear only in the result sort + (not in any argument sort) are also inferred from it — this is the + ``matchExpected`` path in the Java algorithm. + """ + params = frozenset(prod.params) + candidates: dict[KSort, list[KSort]] = {} + + for psort, asort in zip(prod.argument_sorts, actual_sorts, strict=True): + if asort is None: + continue + for k, vs in _match_sort_params(psort, asort, params, self.subsorts).items(): + candidates.setdefault(k, []).extend(vs) + + if expected_sort is not None: + unbound_result_params = frozenset( + p + for p in params + if _sort_contains(prod.sort, p) + and not any(_sort_contains(asort, p) for asort in actual_sorts if asort is not None) + ) + if unbound_result_params: + for k, vs in _match_sort_params(prod.sort, expected_sort, unbound_result_params).items(): + candidates.setdefault(k, []).extend(vs) + + result: dict[KSort, KSort] = {} + for p in prod.params: + if p not in candidates: + continue + lub: KSort = candidates[p][0] + for s in candidates[p][1:]: + if lub == s: + continue + new_lub = self.least_common_supersort(lub, s) + if new_lub is None: + break + lub = new_lub + else: + result[p] = lub + + return result + # Best-effort addition of sort parameters to klabels, context insensitive def add_sort_params(self, kast: KInner) -> KInner: """Return a given term with the sort parameters on the `KLabel` filled in (which may be missing because of how the frontend works), best effort.""" @@ -1502,101 +1596,72 @@ def add_sort_params(self, kast: KInner) -> KInner: _ML_PRED_RESULT_SORT_PARAM = KSort('#SortParam') # noqa: N806 _ML_PRED_LABELS = frozenset({'#Equals', '#Ceil', '#Floor', '#In'}) # noqa: N806 - def _unify_sort_params(parametric: KSort, actual: KSort, params: frozenset[KSort]) -> dict[KSort, KSort]: - """Match parametric sort against actual, extracting bindings for sort params. - - Handles both direct (parametric IS a sort param) and nested - (parametric = MInt{Width}, actual = MInt{8}) cases. - Returns empty dict when no bindings could be extracted (no match). - """ - if parametric in params: - return {parametric: actual} - if parametric.name != actual.name or len(parametric.params) != len(actual.params): - return {} - result: dict[KSort, KSort] = {} - for p_sub, a_sub in zip(parametric.params, actual.params, strict=True): - sub_bindings = _unify_sort_params(p_sub, a_sub, params) - for k, v in sub_bindings.items(): - if k in result and result[k] != v: - return {} # Conflicting bindings - result[k] = v - return result + def _add_sort_params(_k: KInner) -> KInner: + if type(_k) is not KApply: + return _k + prod = self.symbols[_k.label.name] + if len(_k.label.params) != 0 or len(prod.params) == 0: + return _k - def _merge_binding(sort_dict: dict[KSort, KSort], k: KSort, v: KSort) -> bool: - """Merge one binding into sort_dict in place. Returns False on irreconcilable conflict.""" - if k in sort_dict: - existing = sort_dict[k] - if existing == _ML_PRED_RESULT_SORT_PARAM: - sort_dict[k] = v # Concrete sort overrides sentinel. - elif existing != v: - lub = self.least_common_supersort(existing, v) - if lub is None: - _LOGGER.warning(f'Failed to add sort parameter, sort mismatch: {(k, existing, v)}') - return False - sort_dict[k] = lub - else: - sort_dict[k] = v - return True + actual_sorts = tuple(map(self.sort, _k.args)) + param_set = frozenset(prod.params) - def _add_sort_params(_k: KInner) -> KInner: - if type(_k) is KApply: - prod = self.symbols[_k.label.name] - if len(_k.label.params) == 0 and len(prod.params) > 0: - param_set = frozenset(prod.params) - sort_dict: dict[KSort, KSort] = {} - for psort, asort in zip(prod.argument_sorts, map(self.sort, _k.args), strict=True): - if asort == _ML_PRED_RESULT_SORT_PARAM: - # #SortParam is the sentinel for an ML pred result sort that cannot be - # inferred bottom-up (e.g. #Equals result sort depends on outer context). - # It propagates upward into ML connectives (#And, #Or, #Not) as a - # placeholder for the axiom sort variable Q0, but a concrete sort takes - # precedence when one is available. - bindings = _unify_sort_params(psort, asort, param_set) - for k, v in bindings.items(): - if k not in sort_dict: # sentinel fills only empty slots - sort_dict[k] = v - continue - if asort is None: - _LOGGER.warning( - f'Failed to add sort parameter, unable to determine sort for argument in production: {(prod, psort, asort)}' - ) - return _k - # Unify psort with asort to extract bindings for sort params. - # Handles both direct (psort=Width) and nested (psort=MInt{Width}) cases. - bindings = _unify_sort_params(psort, asort, param_set) - for k, v in bindings.items(): - if not _merge_binding(sort_dict, k, v): - return _k - if all(p in sort_dict for p in prod.params): - return _k.let(label=KLabel(_k.label.name, [sort_dict[p] for p in prod.params])) - # ML predicates have a context-dependent result sort (Sort2) that cannot be - # inferred from arguments. Fill it with the sentinel so that krule_to_kore can - # introduce a universally-quantified sort variable for the axiom. - if _k.label.name in _ML_PRED_LABELS: - unbound = [p for p in prod.params if p not in sort_dict] - # The single sentinel KSort('#SortParam') is only unambiguous when at most - # one parameter is unresolvable bottom-up. All current ML predicates - # (#Equals, #Ceil, #Floor, #In) have exactly two sort params {Sort1, - # Sort2}: Sort1 is always determined by the arguments, Sort2 (the result - # sort) is the one remaining unbound param. If more than one param is - # unbound, the sentinel scheme must be replaced with unique fresh params - # (e.g. KSort('#SortParam', (KSort('Q0'),)), KSort('#SortParam', (KSort('Q1'),)), ...) - # analogously to how Java's AddSortInjections generates #SortParam{Q0}, - # #SortParam{Q1}, etc. _ksort_to_kore would also need updating to emit - # these as sort variables rather than sort applications. - if len(unbound) > 1: - raise NotImplementedError( - f'ML predicate {_k.label.name!r} has {len(unbound)} unbound sort parameters ' - f'({unbound}); the single-sentinel scheme only handles at most one. ' - f'Implement unique fresh sentinels analogous to Java #SortParam{{Q0}}, ' - f'#SortParam{{Q1}}, ... and update _ksort_to_kore to emit them as sort variables.' - ) - filled = {p: sort_dict.get(p, _ML_PRED_RESULT_SORT_PARAM) for p in prod.params} - return _k.let(label=KLabel(_k.label.name, [filled[p] for p in prod.params])) - unbound = [p for p in prod.params if p not in sort_dict] + # Separate sentinel args from real args; bail out on genuinely unsortable ones. + # Sentinels (#SortParam) propagate from nested ML preds and are handled below. + inference_sorts: list[KSort | None] = [] + for psort, asort in zip(prod.argument_sorts, actual_sorts, strict=True): + if asort == _ML_PRED_RESULT_SORT_PARAM: + inference_sorts.append(None) # skip in inference, propagate as sentinel below + elif asort is None: _LOGGER.warning( - f'Failed to add sort parameter, could not infer sort params from arguments: {(prod, unbound)}' + f'Failed to add sort parameter, unable to determine sort for argument in production: {(prod, psort, asort)}' ) + return _k + else: + inference_sorts.append(asort) + + bindings = self.infer_sort_params(prod, tuple(inference_sorts)) + + # Sentinel propagation: if an arg carried the #SortParam sentinel (from a nested ML + # pred) and inference left that arg's param slot empty, fill it with the sentinel. + # Only direct-param positions (psort IS a param) propagate the sentinel; nested cases + # (psort = MInt{S}) do not, matching the current Java behaviour. + for psort, asort in zip(prod.argument_sorts, actual_sorts, strict=True): + if asort == _ML_PRED_RESULT_SORT_PARAM and psort in param_set and psort not in bindings: + bindings[psort] = _ML_PRED_RESULT_SORT_PARAM + + if all(p in bindings for p in prod.params): + return _k.let(label=KLabel(_k.label.name, [bindings[p] for p in prod.params])) + + # ML predicates have a context-dependent result sort (Sort2) that cannot be + # inferred from arguments. Fill it with the sentinel so that krule_to_kore can + # introduce a universally-quantified sort variable for the axiom. + if _k.label.name in _ML_PRED_LABELS: + unbound = [p for p in prod.params if p not in bindings] + # The single sentinel KSort('#SortParam') is only unambiguous when at most + # one parameter is unresolvable bottom-up. All current ML predicates + # (#Equals, #Ceil, #Floor, #In) have exactly two sort params {Sort1, + # Sort2}: Sort1 is always determined by the arguments, Sort2 (the result + # sort) is the one remaining unbound param. If more than one param is + # unbound, the sentinel scheme must be replaced with unique fresh params + # (e.g. KSort('#SortParam', (KSort('Q0'),)), KSort('#SortParam', (KSort('Q1'),)), ...) + # analogously to how Java's AddSortInjections generates #SortParam{Q0}, + # #SortParam{Q1}, etc. _ksort_to_kore would also need updating to emit + # these as sort variables rather than sort applications. + if len(unbound) > 1: + raise NotImplementedError( + f'ML predicate {_k.label.name!r} has {len(unbound)} unbound sort parameters ' + f'({unbound}); the single-sentinel scheme only handles at most one. ' + f'Implement unique fresh sentinels analogous to Java #SortParam{{Q0}}, ' + f'#SortParam{{Q1}}, ... and update _ksort_to_kore to emit them as sort variables.' + ) + filled = {p: bindings.get(p, _ML_PRED_RESULT_SORT_PARAM) for p in prod.params} + return _k.let(label=KLabel(_k.label.name, [filled[p] for p in prod.params])) + + unbound = [p for p in prod.params if p not in bindings] + _LOGGER.warning( + f'Failed to add sort parameter, could not infer sort params from arguments: {(prod, unbound)}' + ) return _k return bottom_up(_add_sort_params, kast) diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index b0ae216f79..6868303c76 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -7,9 +7,18 @@ from pyk.kast.att import Atts, KAtt from pyk.kast.inner import KApply, KAs, KLabel, KSequence, KSort, KToken, KVariable -from pyk.kast.outer import KDefinition, KFlatModule, KNonTerminal, KProduction, KTerminal +from pyk.kast.outer import ( + KDefinition, + KFlatModule, + KNonTerminal, + KProduction, + KTerminal, + _match_sort_params, + _sort_contains, +) if TYPE_CHECKING: + from collections.abc import Callable from typing import Final from pyk.kast.inner import KInner @@ -18,9 +27,12 @@ # --------------------------------------------------------------------------- # Minimal test definition # -# bar: syntax N ::= bar(N) -- result sort is the param directly -# foo: syntax MInt{N} ::= foo(MInt{N}) -- result/arg sorts nest the param -# #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred, result sort context-dependent +# bar: syntax N ::= bar(N) -- result sort is the param directly +# foo: syntax MInt{N} ::= foo(MInt{N}) -- result/arg sorts nest the param +# baz: syntax MInt{N} ::= baz() -- no args; param bound only from expected sort +# #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred, result sort context-dependent +# +# Subsort: syntax Int ::= MInt{Int} -- MInt{Int} <: Int (enables subsort-aware matching) # # Cell map fragment: # AccountCellMap ::= AccountCellMap AccountCellMap [cellCollection, element(AccountCellMapItem), wrapElement()] @@ -81,6 +93,18 @@ klabel='pair', ) +# syntax MInt{N} ::= baz() — no argument sorts; param N only bound via expected_sort +_BAZ_PROD: Final = KProduction( + sort=MINT_N, + items=[KTerminal('baz'), KTerminal('('), KTerminal(')')], + params=[N], + klabel='baz', +) + +# syntax Int ::= MInt{Int} — subsort declaration: MInt{Int} <: Int +# Enables the subsort-aware matching path (Java AddSortInjections.match step 3). +_MINT_INT_SUBSORT: Final = KProduction(sort=INT, items=[KNonTerminal(MINT_INT)]) + _ACCT_MAP_CONCAT: Final = KProduction( sort=ACCOUNT_CELL_MAP, items=[KNonTerminal(ACCOUNT_CELL_MAP), KNonTerminal(ACCOUNT_CELL_MAP)], @@ -124,7 +148,18 @@ 'TEST', [ KFlatModule( - 'TEST', [_BAR_PROD, _FOO_PROD, _EQUALS_PROD, _ACCT_MAP_CONCAT, _ACCT_MAP_ITEM, _ACCOUNT_CELL, _GET_ENTRY] + 'TEST', + [ + _BAR_PROD, + _FOO_PROD, + _BAZ_PROD, + _EQUALS_PROD, + _MINT_INT_SUBSORT, + _ACCT_MAP_CONCAT, + _ACCT_MAP_ITEM, + _ACCOUNT_CELL, + _GET_ENTRY, + ], ) ], ) @@ -227,6 +262,13 @@ def test_resolve_sorts(test_id: str, label: KLabel, expected_result: KSort, expe KApply(KLabel('foo'), [KVariable('X')]), KApply(KLabel('foo'), [KVariable('X')]), ), + # Subsort-aware: arg sort is Int, but MInt{Int} <: Int in DEFN, so N=Int via subsort match + # (this case would fail with structural-only unification since Int ≠ MInt{N}) + ( + 'subsort_aware', + KApply(KLabel('foo'), [KVariable('X', sort=INT)]), + KApply(KLabel('foo', [INT]), [KVariable('X', sort=INT)]), + ), ) @@ -257,6 +299,115 @@ def test_add_sort_params_user_label_unresolvable_warns(caplog: pytest.LogCapture assert any('could not infer sort params' in record.message for record in caplog.records) +# --------------------------------------------------------------------------- +# KDefinition.infer_sort_params +# --------------------------------------------------------------------------- +# +# Tests the public method directly (not through add_sort_params), mirroring the +# Java AddSortInjections.substituteProd() test scenarios derived from the algorithm. + +INFER_SORT_PARAMS_DATA: Final[ + tuple[tuple[str, KProduction, tuple[KSort | None, ...], KSort | None, dict[KSort, KSort]], ...] +] = ( + # Direct param: psort IS the param (N → Int) + ('direct_param', _BAR_PROD, (INT,), None, {N: INT}), + # Nested param: psort = MInt{N}, asort = MInt{Int} → N=Int via structural match + ('nested_param', _FOO_PROD, (MINT_INT,), None, {N: INT}), + # Subsort-aware: arg sort is Int, MInt{Int} <: Int in DEFN → N=Int via subsort iteration + # (structural match fails: MInt{N} ≠ Int; subsort check finds MInt{Int} ≤ Int) + ('subsort_aware', _FOO_PROD, (INT,), None, {N: INT}), + # matchExpected: baz() has no arg sorts; N is bound from the expected_sort MInt{Int} + ('expected_sort', _BAZ_PROD, (), MINT_INT, {N: INT}), + # None arg is skipped; no bindings → empty result + ('unbound_absent', _BAR_PROD, (None,), None, {}), +) + + +@pytest.mark.parametrize( + 'test_id,prod,actual_sorts,expected_sort,expected_bindings', + INFER_SORT_PARAMS_DATA, + ids=[test_id for test_id, *_ in INFER_SORT_PARAMS_DATA], +) +def test_infer_sort_params( + test_id: str, + prod: KProduction, + actual_sorts: tuple[KSort | None, ...], + expected_sort: KSort | None, + expected_bindings: dict[KSort, KSort], +) -> None: + assert DEFN.infer_sort_params(prod, actual_sorts, expected_sort) == expected_bindings + + +# --------------------------------------------------------------------------- +# _match_sort_params (module-level helper) +# --------------------------------------------------------------------------- +# +# Directly tests the three matching strategies described in the docstring. + + +def _subsorts_fn(s: KSort) -> frozenset[KSort]: + return frozenset({MINT_INT}) if s == INT else frozenset() + + +MATCH_SORT_PARAMS_DATA: Final[ + tuple[ + tuple[ + str, KSort, KSort, frozenset[KSort], Callable[[KSort], frozenset[KSort]] | None, dict[KSort, list[KSort]] + ], + ..., + ] +] = ( + # Case 1 – direct: parametric IS a sort param + ('direct', N, INT, frozenset({N}), None, {N: [INT]}), + # Case 2 – structural: same head, recurse on sub-params + ('structural', MINT_N, MINT_INT, frozenset({N}), None, {N: [INT]}), + # Case 2 fails (different heads), no subsorts_fn → empty + ('structural_no_match_no_subsorts', MINT_N, INT, frozenset({N}), None, {}), + # Case 3 – subsort-aware: MInt{N} vs Int; subsorts_fn yields MInt{Int} → N=Int + ('subsort_aware', MINT_N, INT, frozenset({N}), _subsorts_fn, {N: [INT]}), + # No match in any case + ('no_match', INT, KSort('Bool'), frozenset({N}), None, {}), +) + + +@pytest.mark.parametrize( + 'test_id,parametric,actual,params,subsorts_fn,expected', + MATCH_SORT_PARAMS_DATA, + ids=[test_id for test_id, *_ in MATCH_SORT_PARAMS_DATA], +) +def test_match_sort_params( + test_id: str, + parametric: KSort, + actual: KSort, + params: frozenset[KSort], + subsorts_fn: Callable[[KSort], frozenset[KSort]] | None, + expected: dict[KSort, list[KSort]], +) -> None: + assert _match_sort_params(parametric, actual, params, subsorts_fn) == expected + + +# --------------------------------------------------------------------------- +# _sort_contains (module-level helper) +# --------------------------------------------------------------------------- + +SORT_CONTAINS_DATA: Final = ( + ('param_itself', N, N, True), + ('nested_one_level', MINT_N, N, True), + ('nested_two_levels', KSort('Foo', (MINT_N,)), N, True), + ('concrete_not_param', MINT_INT, N, False), + ('unrelated', INT, N, False), +) + + +@pytest.mark.parametrize( + 'test_id,sort,param,expected', + SORT_CONTAINS_DATA, + ids=[test_id for test_id, *_ in SORT_CONTAINS_DATA], +) +def test_sort_contains(test_id: str, sort: KSort, param: KSort, expected: bool) -> None: + assert _sort_contains(sort, param) == expected + + # --------------------------------------------------------------------------- # KDefinition.add_cell_map_items # --------------------------------------------------------------------------- From 69a35ff861691dee9f5ac7e01bd00ece9864220d Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Wed, 27 May 2026 17:51:03 +0000 Subject: [PATCH 11/21] outer, test_definition: infer_sort_params returns (bindings, inferred_sort) tuple MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return a tuple from infer_sort_params so callers can tell, without inspecting prod.params separately, whether inference produced a fully-determined result sort: - bindings: dict[KSort, KSort | None] — None value signals a detected conflict (candidates found but no common supersort), absent key means no candidates - inferred_sort: KSort | None — None when any sort param in prod.sort is unbound or conflicted; concrete sort otherwise (always concrete for non-parametric prods) Add module-level _resolve_sort_partial helper that substitutes successful bindings into a sort and returns None for unbound params. Update add_sort_params to unpack the tuple (discards the inferred sort; filters None bindings before the existing downstream logic, which is unchanged). Test improvements: - replace hardcoded _subsorts_fn with DEFN.subsorts directly - add conflicting_args and expected_sort_mismatch negative cases - add no_params_trivial case to distinguish "no params needed" from "params exist but no candidates" — both were previously indistinguishable as {} - note in subsort_aware add_sort_params test that variable sorts are not modified Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 45 ++++++++++++++++--- pyk/src/tests/unit/kast/test_definition.py | 52 +++++++++++++--------- 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index f22194d104..f787013098 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1040,6 +1040,23 @@ def _match_sort_params( return {} +def _resolve_sort_partial( + sort: KSort, + bindings: dict[KSort, KSort], + params: frozenset[KSort], +) -> KSort | None: + """Substitute ``bindings`` into ``sort``, returning ``None`` if any param in + ``params`` appears in ``sort`` but is not present in ``bindings``.""" + if sort in params: + return bindings.get(sort) + if sort.params: + resolved = [_resolve_sort_partial(p, bindings, params) for p in sort.params] + if any(r is None for r in resolved): + return None + return KSort(sort.name, tuple(resolved)) # type: ignore[arg-type] + return sort + + @final @dataclass(frozen=True) class KDefinition(KOuter, WithKAtt, Iterable[KFlatModule]): @@ -1555,11 +1572,23 @@ def infer_sort_params( prod: KProduction, actual_sorts: tuple[KSort | None, ...], expected_sort: KSort | None = None, - ) -> dict[KSort, KSort]: + ) -> tuple[dict[KSort, KSort | None], KSort | None]: """Infer sort parameter bindings for a parametric production application. - Returns a (possibly partial) mapping from sort params to concrete sorts; - unbound parameters are absent from the result. + Returns ``(bindings, inferred_sort)`` where: + + ``bindings`` maps each sort param to one of three states: + - key present, value non-``None``: parameter successfully bound to that sort. + - key present, value ``None``: candidates were found but their LUB could not be + computed (conflicting sorts with no common supersort). + - key absent: no candidates were collected (unsortable / no structural match). + + ``inferred_sort`` is the production's result sort after substituting the + successful bindings. It is ``None`` if any sort param appearing in the result + sort is unbound or conflicted — including the case where ``prod`` is parametric + but no candidates were found. For non-parametric productions it is always the + concrete result sort. + Mirrors ``AddSortInjections.substituteProd()`` in the Java frontend. ``actual_sorts`` must have the same length as ``prod.argument_sorts``. @@ -1588,7 +1617,7 @@ def infer_sort_params( for k, vs in _match_sort_params(prod.sort, expected_sort, unbound_result_params).items(): candidates.setdefault(k, []).extend(vs) - result: dict[KSort, KSort] = {} + result: dict[KSort, KSort | None] = {} for p in prod.params: if p not in candidates: continue @@ -1598,12 +1627,15 @@ def infer_sort_params( continue new_lub = self.least_common_supersort(lub, s) if new_lub is None: + result[p] = None # conflict: candidates found but no common supersort break lub = new_lub else: result[p] = lub - return result + successful: dict[KSort, KSort] = {k: v for k, v in result.items() if v is not None} + inferred_sort = _resolve_sort_partial(prod.sort, successful, params) + return result, inferred_sort # Best-effort addition of sort parameters to klabels, context insensitive def add_sort_params(self, kast: KInner) -> KInner: @@ -1639,7 +1671,8 @@ def _add_sort_params(_k: KInner) -> KInner: else: inference_sorts.append(asort) - bindings = self.infer_sort_params(prod, tuple(inference_sorts)) + bindings_raw, _ = self.infer_sort_params(prod, tuple(inference_sorts)) + bindings: dict[KSort, KSort] = {k: v for k, v in bindings_raw.items() if v is not None} # Sentinel propagation: if an arg carried the #SortParam sentinel (from a nested ML # pred) and inference left that arg's param slot empty, fill it with the sentinel. diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index 6868303c76..3944b77561 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -263,7 +263,9 @@ def test_resolve_sorts(test_id: str, label: KLabel, expected_result: KSort, expe KApply(KLabel('foo'), [KVariable('X')]), ), # Subsort-aware: arg sort is Int, but MInt{Int} <: Int in DEFN, so N=Int via subsort match - # (this case would fail with structural-only unification since Int ≠ MInt{N}) + # (this case would fail with structural-only unification since Int ≠ MInt{N}). + # Note: X keeps sort Int even though the resolved production argument sort is MInt{Int}. + # add_sort_params only fills KLabel sort params; it does not check or modify variable sorts. ( 'subsort_aware', KApply(KLabel('foo'), [KVariable('X', sort=INT)]), @@ -307,24 +309,33 @@ def test_add_sort_params_user_label_unresolvable_warns(caplog: pytest.LogCapture # Java AddSortInjections.substituteProd() test scenarios derived from the algorithm. INFER_SORT_PARAMS_DATA: Final[ - tuple[tuple[str, KProduction, tuple[KSort | None, ...], KSort | None, dict[KSort, KSort]], ...] + tuple[ + tuple[str, KProduction, tuple[KSort | None, ...], KSort | None, dict[KSort, KSort | None], KSort | None], + ..., + ] ] = ( - # Direct param: psort IS the param (N → Int) - ('direct_param', _BAR_PROD, (INT,), None, {N: INT}), - # Nested param: psort = MInt{N}, asort = MInt{Int} → N=Int via structural match - ('nested_param', _FOO_PROD, (MINT_INT,), None, {N: INT}), + # Direct param: psort IS the param (N → Int); prod.sort=N → inferred=Int + ('direct_param', _BAR_PROD, (INT,), None, {N: INT}, INT), + # Nested param: psort = MInt{N}, asort = MInt{Int} → N=Int; prod.sort=MInt{N} → inferred=MInt{Int} + ('nested_param', _FOO_PROD, (MINT_INT,), None, {N: INT}, MINT_INT), # Subsort-aware: arg sort is Int, MInt{Int} <: Int in DEFN → N=Int via subsort iteration - # (structural match fails: MInt{N} ≠ Int; subsort check finds MInt{Int} ≤ Int) - ('subsort_aware', _FOO_PROD, (INT,), None, {N: INT}), - # matchExpected: baz() has no arg sorts; N is bound from the expected_sort MInt{Int} - ('expected_sort', _BAZ_PROD, (), MINT_INT, {N: INT}), - # None arg is skipped; no bindings → empty result - ('unbound_absent', _BAR_PROD, (None,), None, {}), + ('subsort_aware', _FOO_PROD, (INT,), None, {N: INT}, MINT_INT), + # matchExpected: baz() has no arg sorts; N bound from expected_sort MInt{Int} → inferred=MInt{Int} + ('expected_sort', _BAZ_PROD, (), MINT_INT, {N: INT}, MINT_INT), + # None arg skipped; N has no candidates → inferred=None (parametric prod, N unbound) + ('unbound_absent', _BAR_PROD, (None,), None, {}, None), + # Conflicting args: S1→[Int,Bool] LUB fails → S1:None; S2 no candidates; prod.sort=S2 → inferred=None + ('conflicting_args', _EQUALS_PROD, (INT, KSort('Bool')), None, {S1: None}, None), + # expected_sort head mismatch: MInt{N} vs Int → no structural match → N absent → inferred=None + ('expected_sort_mismatch', _BAZ_PROD, (), INT, {}, None), + # Non-parametric: no params to bind, result sort is always concrete → inferred=AccountCell ≠ None + # This distinguishes "trivially complete (no params)" from "params exist but no candidates". + ('no_params_trivial', _ACCOUNT_CELL, (INT, INT), None, {}, ACCOUNT_CELL), ) @pytest.mark.parametrize( - 'test_id,prod,actual_sorts,expected_sort,expected_bindings', + 'test_id,prod,actual_sorts,expected_sort,expected_bindings,expected_inferred_sort', INFER_SORT_PARAMS_DATA, ids=[test_id for test_id, *_ in INFER_SORT_PARAMS_DATA], ) @@ -333,9 +344,12 @@ def test_infer_sort_params( prod: KProduction, actual_sorts: tuple[KSort | None, ...], expected_sort: KSort | None, - expected_bindings: dict[KSort, KSort], + expected_bindings: dict[KSort, KSort | None], + expected_inferred_sort: KSort | None, ) -> None: - assert DEFN.infer_sort_params(prod, actual_sorts, expected_sort) == expected_bindings + bindings, inferred_sort = DEFN.infer_sort_params(prod, actual_sorts, expected_sort) + assert bindings == expected_bindings + assert inferred_sort == expected_inferred_sort # --------------------------------------------------------------------------- @@ -345,10 +359,6 @@ def test_infer_sort_params( # Directly tests the three matching strategies described in the docstring. -def _subsorts_fn(s: KSort) -> frozenset[KSort]: - return frozenset({MINT_INT}) if s == INT else frozenset() - - MATCH_SORT_PARAMS_DATA: Final[ tuple[ tuple[ @@ -363,8 +373,8 @@ def _subsorts_fn(s: KSort) -> frozenset[KSort]: ('structural', MINT_N, MINT_INT, frozenset({N}), None, {N: [INT]}), # Case 2 fails (different heads), no subsorts_fn → empty ('structural_no_match_no_subsorts', MINT_N, INT, frozenset({N}), None, {}), - # Case 3 – subsort-aware: MInt{N} vs Int; subsorts_fn yields MInt{Int} → N=Int - ('subsort_aware', MINT_N, INT, frozenset({N}), _subsorts_fn, {N: [INT]}), + # Case 3 – subsort-aware: MInt{N} vs Int; DEFN.subsorts yields MInt{Int} → N=Int + ('subsort_aware', MINT_N, INT, frozenset({N}), DEFN.subsorts, {N: [INT]}), # No match in any case ('no_match', INT, KSort('Bool'), frozenset({N}), None, {}), ) From 0c936a77eebd8abadf82f8cb452da8b25a37cbe9 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Wed, 27 May 2026 17:52:12 +0000 Subject: [PATCH 12/21] outer: fix _resolve_sort_partial docstring for pydocstyle Co-Authored-By: Claude Sonnet 4.6 --- pyk/src/pyk/kast/outer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index f787013098..aaa8084c9c 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1045,8 +1045,10 @@ def _resolve_sort_partial( bindings: dict[KSort, KSort], params: frozenset[KSort], ) -> KSort | None: - """Substitute ``bindings`` into ``sort``, returning ``None`` if any param in - ``params`` appears in ``sort`` but is not present in ``bindings``.""" + """Substitute ``bindings`` into ``sort``, returning ``None`` if any param is unbound. + + A param in ``params`` is considered unbound if it does not appear in ``bindings``. + """ if sort in params: return bindings.get(sort) if sort.params: From de7cd278defc362450e98edc1d49564fe437d757 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Mon, 1 Jun 2026 18:53:15 +0000 Subject: [PATCH 13/21] test_definition: add regression tests for sort-inference bugs (infer_sort_params, add_sort_params) Fold the bug-demonstration harness into the permanent unit suite. These cases encode the desired (Java AddSortInjections-equivalent) behaviour and currently FAIL; subsequent commits fix each issue and turn them green. - expected_sort_param_in_arg: matchExpected must skip params already bound by an argument sort (the membership check currently uses concrete actual sorts). - lub_common_supersort + lub_order_independent: candidate LUB must be set-based and order-independent (the pairwise least_common_supersort fold is neither). - conflicting_ml_pred_args: a genuine sort mismatch on an ML predicate must warn and return unchanged, not raise NotImplementedError about the sentinel scheme. Adds f2/f3 productions and a Number ::= Int | Float subsort lattice to the shared test definition; all pre-existing cases remain green. Co-Authored-By: Claude Opus 4.8 --- pyk/src/tests/unit/kast/test_definition.py | 87 +++++++++++++++++++++- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index 3944b77561..ec09e065c5 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -31,8 +31,13 @@ # foo: syntax MInt{N} ::= foo(MInt{N}) -- result/arg sorts nest the param # baz: syntax MInt{N} ::= baz() -- no args; param bound only from expected sort # #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred, result sort context-dependent +# f2: syntax {S} S ::= f2(S, S) -- one param bound from two argument positions +# f3: syntax {S} S ::= f3(S, S, S) -- one param bound from three argument positions # -# Subsort: syntax Int ::= MInt{Int} -- MInt{Int} <: Int (enables subsort-aware matching) +# Subsorts: +# syntax Int ::= MInt{Int} -- MInt{Int} <: Int (enables subsort-aware matching) +# syntax Number ::= Int -- Int <: Number +# syntax Number ::= Float -- Float <: Number (Int, Float incomparable; LUB is Number) # # Cell map fragment: # AccountCellMap ::= AccountCellMap AccountCellMap [cellCollection, element(AccountCellMapItem), wrapElement()] @@ -42,12 +47,17 @@ # --------------------------------------------------------------------------- INT: Final = KSort('Int') +BOOL: Final = KSort('Bool') +FLOAT: Final = KSort('Float') +NUMBER: Final = KSort('Number') N: Final = KSort('N') +S: Final = KSort('S') S1: Final = KSort('S1') S2: Final = KSort('S2') S3: Final = KSort('S3') MINT_N: Final = KSort('MInt', (N,)) MINT_INT: Final = KSort('MInt', (INT,)) +MINT_BOOL: Final = KSort('MInt', (BOOL,)) SORT_PARAM: Final = KSort('#SortParam') ACCOUNT_CELL_MAP: Final = KSort('AccountCellMap') ACCOUNT_CELL: Final = KSort('AccountCell') @@ -105,6 +115,37 @@ # Enables the subsort-aware matching path (Java AddSortInjections.match step 3). _MINT_INT_SUBSORT: Final = KProduction(sort=INT, items=[KNonTerminal(MINT_INT)]) +# syntax {S} S ::= f2(S, S) — one sort param bound from two argument positions. +_F2_PROD: Final = KProduction( + sort=S, + items=[KTerminal('f2'), KTerminal('('), KNonTerminal(S), KTerminal(','), KNonTerminal(S), KTerminal(')')], + params=[S], + klabel='f2', +) + +# syntax {S} S ::= f3(S, S, S) — one sort param bound from three argument positions. +_F3_PROD: Final = KProduction( + sort=S, + items=[ + KTerminal('f3'), + KTerminal('('), + KNonTerminal(S), + KTerminal(','), + KNonTerminal(S), + KTerminal(','), + KNonTerminal(S), + KTerminal(')'), + ], + params=[S], + klabel='f3', +) + +# Subsort lattice with two incomparable subsorts sharing a common supersort: +# syntax Number ::= Int and syntax Number ::= Float +# LUB(Int, Float) = Number, exercising set-based (not pairwise-chain) lub computation. +_NUMBER_INT_SUBSORT: Final = KProduction(sort=NUMBER, items=[KNonTerminal(INT)]) +_NUMBER_FLOAT_SUBSORT: Final = KProduction(sort=NUMBER, items=[KNonTerminal(FLOAT)]) + _ACCT_MAP_CONCAT: Final = KProduction( sort=ACCOUNT_CELL_MAP, items=[KNonTerminal(ACCOUNT_CELL_MAP), KNonTerminal(ACCOUNT_CELL_MAP)], @@ -154,7 +195,11 @@ _FOO_PROD, _BAZ_PROD, _EQUALS_PROD, + _F2_PROD, + _F3_PROD, _MINT_INT_SUBSORT, + _NUMBER_INT_SUBSORT, + _NUMBER_FLOAT_SUBSORT, _ACCT_MAP_CONCAT, _ACCT_MAP_ITEM, _ACCOUNT_CELL, @@ -301,6 +346,19 @@ def test_add_sort_params_user_label_unresolvable_warns(caplog: pytest.LogCapture assert any('could not infer sort params' in record.message for record in caplog.records) +def test_add_sort_params_conflicting_ml_pred_args_warns(caplog: pytest.LogCaptureFixture) -> None: + # Issue #4: #Equals(X:Int, Y:Bool) has a genuine sort mismatch in its shared S1 argument + # positions (Int and Bool have no common supersort). This is distinct from the legitimate + # multi-unbound-param case (test_add_sort_params_multi_unbound_raises), where every param is + # either bound or has no candidates at all. A sort *conflict* must be reported as a warning + # with the term returned unchanged — NOT raised as a NotImplementedError that misattributes + # the failure to the single-sentinel scheme. + term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=BOOL)]) + with caplog.at_level(logging.WARNING): + result = DEFN.add_sort_params(term) + assert result == term + + # --------------------------------------------------------------------------- # KDefinition.infer_sort_params # --------------------------------------------------------------------------- @@ -325,12 +383,23 @@ def test_add_sort_params_user_label_unresolvable_warns(caplog: pytest.LogCapture # None arg skipped; N has no candidates → inferred=None (parametric prod, N unbound) ('unbound_absent', _BAR_PROD, (None,), None, {}, None), # Conflicting args: S1→[Int,Bool] LUB fails → S1:None; S2 no candidates; prod.sort=S2 → inferred=None - ('conflicting_args', _EQUALS_PROD, (INT, KSort('Bool')), None, {S1: None}, None), + ('conflicting_args', _EQUALS_PROD, (INT, BOOL), None, {S1: None}, None), # expected_sort head mismatch: MInt{N} vs Int → no structural match → N absent → inferred=None ('expected_sort_mismatch', _BAZ_PROD, (), INT, {}, None), # Non-parametric: no params to bind, result sort is always concrete → inferred=AccountCell ≠ None # This distinguishes "trivially complete (no params)" from "params exist but no candidates". ('no_params_trivial', _ACCOUNT_CELL, (INT, INT), None, {}, ACCOUNT_CELL), + # ----- regression cases for known sort-inference bugs (fixed in follow-up commits) ----- + # Issue #2 (matchExpected): N occurs in foo's declared argument sort MInt{N}, so it must be + # bound from the argument (Int) and the expected sort MInt{Bool} must be ignored — exactly + # as Java's matchExpected skips params present in a nonterminal sort. The buggy version + # checks the *concrete* actual sorts (which never contain the param N), wrongly also matches + # against the expected sort, and reports a conflict {N: None} from lub(Int, Bool). + ('expected_sort_param_in_arg', _FOO_PROD, (MINT_INT,), MINT_BOOL, {N: INT}, MINT_INT), + # Issue #3 (lub weakness): S gets candidates {Int, Float}; both are subsorts of Number with + # no subsort relation between them, so the LUB is Number. The buggy pairwise fold only + # succeeds when one candidate subsorts the other and reports a conflict {S: None}. + ('lub_common_supersort', _F2_PROD, (INT, FLOAT), None, {S: NUMBER}, NUMBER), ) @@ -352,6 +421,18 @@ def test_infer_sort_params( assert inferred_sort == expected_inferred_sort +def test_infer_sort_params_lub_order_independent() -> None: + # Issue #3: the candidate LUB must not depend on the order candidates were collected. + # f3 binds S from three positions; the candidate set {Int, Number, Float} must resolve to + # Number regardless of argument order. The buggy pairwise fold succeeds for + # (Int, Number, Float) — it reaches Number before encountering Float — but fails for + # (Int, Float, Number) because lub(Int, Float) has no pairwise chain. + bindings_a, _ = DEFN.infer_sort_params(_F3_PROD, (INT, NUMBER, FLOAT)) + bindings_b, _ = DEFN.infer_sort_params(_F3_PROD, (INT, FLOAT, NUMBER)) + assert bindings_a == {S: NUMBER} + assert bindings_b == {S: NUMBER} + + # --------------------------------------------------------------------------- # _match_sort_params (module-level helper) # --------------------------------------------------------------------------- @@ -376,7 +457,7 @@ def test_infer_sort_params( # Case 3 – subsort-aware: MInt{N} vs Int; DEFN.subsorts yields MInt{Int} → N=Int ('subsort_aware', MINT_N, INT, frozenset({N}), DEFN.subsorts, {N: [INT]}), # No match in any case - ('no_match', INT, KSort('Bool'), frozenset({N}), None, {}), + ('no_match', INT, BOOL, frozenset({N}), None, {}), ) From b2820786149b43bc34d4b5426ae12609436d249e Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Mon, 1 Jun 2026 18:54:49 +0000 Subject: [PATCH 14/21] outer: report ML-predicate sort conflicts in add_sort_params instead of raising MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit infer_sort_params maps a param to None when its candidate sorts have no common supersort (a genuine sort mismatch), versus omitting params that have no candidates at all. add_sort_params previously discarded that distinction: a conflicted param looked simply "unbound", so a clash on an ML predicate's argument sort raised NotImplementedError blaming the single-sentinel scheme. Detect conflicts explicitly and warn + return the term unchanged (best-effort), matching the pre-existing behaviour for irreconcilable sorts. The legitimate multi-unbound-result-param guard is unaffected — it only fires when params have no candidates, not when they conflict. Co-Authored-By: Claude Opus 4.8 --- pyk/src/pyk/kast/outer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index aaa8084c9c..634bff289c 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1676,6 +1676,17 @@ def _add_sort_params(_k: KInner) -> KInner: bindings_raw, _ = self.infer_sort_params(prod, tuple(inference_sorts)) bindings: dict[KSort, KSort] = {k: v for k, v in bindings_raw.items() if v is not None} + # A param mapped to None had candidate sorts that could not be unified (no common + # supersort): a genuine sort mismatch, not a missing binding. This is distinct from + # a param with no candidates at all (the result-sort case handled by the sentinel + # below), so report it and bail rather than papering over it with a sentinel. + conflicts = [p for p, v in bindings_raw.items() if v is None] + if conflicts: + _LOGGER.warning( + f'Failed to add sort parameter, conflicting sorts for sort parameter(s): {(prod, conflicts)}' + ) + return _k + # Sentinel propagation: if an arg carried the #SortParam sentinel (from a nested ML # pred) and inference left that arg's param slot empty, fill it with the sentinel. # Only direct-param positions (psort IS a param) propagate the sentinel; nested cases From 26bbeb4b481e8bdb247151523d77561ff5e86ac2 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Mon, 1 Jun 2026 18:55:03 +0000 Subject: [PATCH 15/21] outer: fix infer_sort_params matchExpected to test declared argument sorts The matchExpected path infers a sort param from the expected result sort only when that param appears in the result but in no argument sort. The membership check compared the concrete actual_sorts against the param, which can never contain a sort variable, so the condition was always satisfied: params already bound by an argument were wrongly re-matched against the expected sort, adding a spurious candidate that could turn a clean binding into a conflict. Test the production's declared (parametric) argument_sorts instead, mirroring Java's `nt.sort().contains(param)`. Co-Authored-By: Claude Opus 4.8 --- pyk/src/pyk/kast/outer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 634bff289c..39cb737799 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1609,11 +1609,15 @@ def infer_sort_params( candidates.setdefault(k, []).extend(vs) if expected_sort is not None: + # Only params that occur in the result sort but in NO declared argument sort are + # inferred from the expected sort (Java matchExpected, AddSortInjections.java:435). + # The membership test must use the production's *declared* (parametric) argument + # sorts — those are what can contain a param `p`; the concrete `actual_sorts` never + # do, so testing them would wrongly treat arg-bound params as expected-only. unbound_result_params = frozenset( p for p in params - if _sort_contains(prod.sort, p) - and not any(_sort_contains(asort, p) for asort in actual_sorts if asort is not None) + if _sort_contains(prod.sort, p) and not any(_sort_contains(psort, p) for psort in prod.argument_sorts) ) if unbound_result_params: for k, vs in _match_sort_params(prod.sort, expected_sort, unbound_result_params).items(): From 4c59f1b510142a9fc51221d3d5af5b208421dbff Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Mon, 1 Jun 2026 19:00:30 +0000 Subject: [PATCH 16/21] outer: add set-based least_upper_bound, use it for infer_sort_params resolution The candidate-resolution step folded sort candidates pairwise through least_common_supersort, which only succeeds when one sort is a subsort of the other. That made the result order-dependent and reported a conflict for sibling candidates that share a common supersort (e.g. Int and Float under Number). Add KDefinition.least_upper_bound, which computes the unique minimal common supersort over the whole subsort lattice (or None when none exists or the bound is ambiguous), mirroring Java AddSortInjections.lub. infer_sort_params now resolves each param's candidate set through it, so inference is order-independent and resolves siblings with a shared supersort. Co-Authored-By: Claude Opus 4.8 --- pyk/src/pyk/kast/outer.py | 48 +++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 39cb737799..338733fcc2 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1459,6 +1459,34 @@ def least_common_supersort(self, sort1: KSort, sort2: KSort) -> KSort | None: # vice versa. In that case there may be more than one LCS. return None + def least_upper_bound(self, sorts: Iterable[KSort]) -> KSort | None: + """Compute the least upper bound of a set of sorts in the subsort lattice. + + Returns the unique minimal common supersort, or `None` when none exists or the bound is + ambiguous (several incomparable minimal upper bounds). Unlike `least_common_supersort`, + this examines the whole lattice rather than only the pairwise subsort relation, so it is + order-independent and resolves siblings that share a common supersort (e.g. the LUB of + `Int` and `Float` is `Number` when both are subsorts of `Number`). Mirrors the set-based + `AddSortInjections.lub` in the Java frontend. + """ + sort_set = set(sorts) + if not sort_set: + return None + if len(sort_set) == 1: + return single(sort_set) + + def is_supersort(upper: KSort, lower: KSort) -> bool: + return upper == lower or lower in self.subsorts(upper) + + # Every proper supersort of some sort appears as a key in the subsort table; the sorts + # themselves are included to cover a candidate that is already an upper bound of the rest. + universe = set(self.subsort_table.keys()) | sort_set + upper_bounds = {u for u in universe if all(is_supersort(u, s) for s in sort_set)} + minimal = {u for u in upper_bounds if not any(o != u and is_supersort(u, o) for o in upper_bounds)} + if len(minimal) == 1: + return single(minimal) + return None + def greatest_common_subsort(self, sort1: KSort, sort2: KSort) -> KSort | None: """Compute the greatest-lower-bound of two sorts in the sort lattice using very simple approach, returning `None` on failure (not necessarily meaning there isn't a glb).""" if sort1 == sort2: @@ -1623,21 +1651,11 @@ def infer_sort_params( for k, vs in _match_sort_params(prod.sort, expected_sort, unbound_result_params).items(): candidates.setdefault(k, []).extend(vs) - result: dict[KSort, KSort | None] = {} - for p in prod.params: - if p not in candidates: - continue - lub: KSort = candidates[p][0] - for s in candidates[p][1:]: - if lub == s: - continue - new_lub = self.least_common_supersort(lub, s) - if new_lub is None: - result[p] = None # conflict: candidates found but no common supersort - break - lub = new_lub - else: - result[p] = lub + # Resolve each param to the least upper bound of its candidates. A param with candidates + # but no computable LUB maps to None (conflict); a param with no candidates is omitted. + result: dict[KSort, KSort | None] = { + p: self.least_upper_bound(candidates[p]) for p in prod.params if p in candidates + } successful: dict[KSort, KSort] = {k: v for k, v in result.items() if v is not None} inferred_sort = _resolve_sort_partial(prod.sort, successful, params) From 78c766ff4a3819bb4bcf7f674004a2363acc8d54 Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Mon, 1 Jun 2026 19:40:16 +0000 Subject: [PATCH 17/21] prelude/k, konvert/_kast_to_kore, outer: fail clearly on #SortParam sentinel; document full fix add_sort_params fills an uninferable ML-predicate result sort with the #SortParam sentinel sort, but _ksort_to_kore had no handling for it: it built SortApp('Sort#SortParam'), which crashes the Kore Id constructor ('#' is not a legal identifier char). Emitting it correctly needs a universally-quantified sort variable bound at the axiom level (Java's sortParams scheme), which is a larger change requiring integration-level validation. Until that lands, reject the sentinel in _ksort_to_kore with a clear, actionable error instead of a cryptic crash, and document the full fix. - prelude/k: define the shared SORT_PARAM_SENTINEL sort, used by both the inference site (add_sort_params) and the emission site (_ksort_to_kore). - konvert/_kast_to_kore: _ksort_to_kore raises a descriptive ValueError for any member of the sentinel family (name startswith match), pointing at the design doc. - test_konvert: cover the ordinary-sort path and sentinel rejection (bare and uniquely-named forms). - docs/2026-06-01-sortparam-kore-emission.md: design for the full sentinel -> sort-variable fix. - outer: source the sentinel from prelude/k; note the Kore emission step is not yet implemented. Co-Authored-By: Claude Opus 4.8 --- .../2026-06-01-sortparam-kore-emission.md | 59 +++++++++++++++++++ pyk/src/pyk/kast/outer.py | 8 ++- pyk/src/pyk/kast/prelude/k.py | 6 ++ pyk/src/pyk/konvert/_kast_to_kore.py | 14 ++++- pyk/src/tests/unit/test_konvert.py | 24 ++++++++ 5 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 pyk/docs/2026-06-01-sortparam-kore-emission.md diff --git a/pyk/docs/2026-06-01-sortparam-kore-emission.md b/pyk/docs/2026-06-01-sortparam-kore-emission.md new file mode 100644 index 0000000000..2c473b76b5 --- /dev/null +++ b/pyk/docs/2026-06-01-sortparam-kore-emission.md @@ -0,0 +1,59 @@ +# Emitting the `#SortParam` sentinel to Kore + +## Status + +Not implemented. +`_ksort_to_kore` raises a clear error when it encounters the sentinel, rather than producing invalid Kore. +This document describes the full fix so it can be implemented and validated against real Kore output later. + +## Background + +Some ML-predicate productions have a result sort that cannot be inferred from their arguments. +The canonical example is `#Equals{S1, S2}(S1, S1)`: `S1` is determined by the arguments, but `S2` (the result sort) is context-dependent. +When `KDefinition.add_sort_params` fills in a partially-applied ML-predicate label, it can determine `S1` but not `S2`, so it fills `S2` with the sentinel sort `KSort('#SortParam')` (see `add_sort_params` in `src/pyk/kast/outer.py`). + +The Java frontend (`org.kframework.compile.AddSortInjections`) does the same thing, but with two differences that matter for Kore emission: + +- It generates a *fresh, uniquely named* sentinel per unresolved position: `#SortParam{Q0}`, `#SortParam{Q1}`, …. +- It collects those `Qn` names during the traversal and declares them as universally-quantified sort variables on the resulting axiom (surfaced in the `sortParams(...)` rule attribute, and as sort parameters on the Kore `axiom{...}`). + +## The problem in pyk + +`_ksort_to_kore` (in `src/pyk/konvert/_kast_to_kore.py`) is a context-free, term-level converter: it maps every `KSort` to `SortApp('Sort' + name)`. +For the sentinel this yields `SortApp('Sort#SortParam')`, which is doubly wrong: + +- `#` is not a legal Kore identifier character, so the `SortApp`/`Id` constructor raises `ValueError: Expected identifier`. +- Even if it were constructible, a bare sort application referencing a sort that exists in no definition is not what the sentinel means — it must be a *bound sort variable*. + +`krule_to_kore` builds an axiom by calling `kast_to_kore` several independent times (lhs body, rhs body, each constraint), and sets the axiom's sort variables (`Axiom.vars`) to `()` for ordinary rules or `(SortVar('R'),)` for functional rules. +There is no mechanism to (a) emit the sentinel as a sort variable, (b) give it a unique name, or (c) collect it back up and bind it on the axiom. + +## Reachability + +In the normal pyk flow this path is rarely hit. +ML predicates constructed through the prelude (`mlEquals`, `mlEqualsTrue`, `bool_to_ml_pred`, …) always pass explicit sort params (e.g. `KLabel('#Equals', arg_sort, sort)`), so they hit the early return in `add_sort_params` and never produce the sentinel. +The sentinel only arises for ML predicates that reach `add_sort_params` with *missing* sort params (e.g. frontend-loaded or hand-constructed terms) and whose result sort cannot be inferred. + +## Proposed fix + +The goal is to mirror the Java behaviour: each unresolved result sort becomes a uniquely-named, universally-quantified Kore sort variable on the enclosing axiom. + +1. **Unique sentinels.** + Replace the single bare `KSort('#SortParam')` with parametric sentinels `KSort('#SortParam', (KSort('Q0'),))`, `KSort('#SortParam', (KSort('Q1'),))`, …. + This removes the current "at most one unbound param" restriction (the `NotImplementedError` guard in `add_sort_params`) and lets distinct unresolved positions stay distinct. + Generating fresh names requires a counter; since `add_sort_params` runs per `kast_to_kore` call, the counter must be threaded from `krule_to_kore` (or `add_sort_params` must accept a starting index and return the names it used) so that names are unique *across* the several `kast_to_kore` calls that make up one axiom. + +2. **Emit sentinels as sort variables.** + Teach `_ksort_to_kore` (or the `_kapply_to_pattern` sort-emission path) to map `#SortParam{Qn}` to `SortVar('Qn')` instead of a `SortApp`. + +3. **Bind them on the axiom.** + `krule_to_kore` collects every `SortVar` emitted from a sentinel across all of its `kast_to_kore` calls and adds them to `Axiom.vars` (alongside `R` for functional rules), and to the `sortParams(...)` attribute, matching the Java output. + +## Validation + +This change alters Kore axiom output and must be validated end-to-end, not just at the unit level: + +- The konvert integration tests (`src/tests/integration/konvert/`), which require the K toolchain. +- The pyk regression suite (`pyk/regression-new/`), comparing emitted Kore against the Java backend for a definition that actually exercises an uninferable ML-predicate result sort. + +A minimal repro to build first: a rule (loaded from a compiled definition, not constructed via the prelude) containing an ML predicate whose result sort is not determined by its arguments, asserting the emitted axiom binds a sort variable rather than referencing `Sort#SortParam`. diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 338733fcc2..38bc0abbdd 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -31,7 +31,7 @@ top_down, ) from .kast import kast_term -from .prelude.k import K_ITEM, K +from .prelude.k import K_ITEM, SORT_PARAM_SENTINEL, K from .rewrite import indexed_rewrite if TYPE_CHECKING: @@ -1667,8 +1667,10 @@ def add_sort_params(self, kast: KInner) -> KInner: # ML predicate labels whose result sort (Sort2) is context-dependent and not inferable # from the arguments alone. When Sort1 can be determined but Sort2 cannot, we fill Sort2 # with the sentinel KSort('#SortParam') so that downstream Kore emission can introduce a - # universally-quantified sort variable (Q0) in the axiom. - _ML_PRED_RESULT_SORT_PARAM = KSort('#SortParam') # noqa: N806 + # universally-quantified sort variable (Q0) in the axiom. NOTE: that Kore emission step + # is not yet implemented — _ksort_to_kore rejects the sentinel with a clear error. See + # pyk/docs/2026-06-01-sortparam-kore-emission.md for the design of the full fix. + _ML_PRED_RESULT_SORT_PARAM = SORT_PARAM_SENTINEL # noqa: N806 _ML_PRED_LABELS = frozenset({'#Equals', '#Ceil', '#Floor', '#In'}) # noqa: N806 def _add_sort_params(_k: KInner) -> KInner: diff --git a/pyk/src/pyk/kast/prelude/k.py b/pyk/src/pyk/kast/prelude/k.py index 42324339e8..dbd4e1f8d3 100644 --- a/pyk/src/pyk/kast/prelude/k.py +++ b/pyk/src/pyk/kast/prelude/k.py @@ -14,6 +14,12 @@ K_ITEM: Final = KSort('KItem') GENERATED_TOP_CELL: Final = KSort('GeneratedTopCell') +# Sentinel sort marking an ML-predicate result sort that could not be inferred from arguments +# (introduced by KDefinition.add_sort_params). The family also covers uniquely-named variants +# such as `#SortParam{Q0}` once those are generated. This sort cannot yet be emitted to Kore; +# see pyk/docs/2026-06-01-sortparam-kore-emission.md. +SORT_PARAM_SENTINEL: Final = KSort('#SortParam') + DOTS: Final = KToken('...', K) diff --git a/pyk/src/pyk/konvert/_kast_to_kore.py b/pyk/src/pyk/konvert/_kast_to_kore.py index 1f667e2752..f95de07a66 100644 --- a/pyk/src/pyk/konvert/_kast_to_kore.py +++ b/pyk/src/pyk/konvert/_kast_to_kore.py @@ -9,7 +9,7 @@ from ..kast.manip import bool_to_ml_pred, extract_lhs, extract_rhs, flatten_label, ml_pred_to_bool, var_occurrences from ..kast.outer import KRule from ..kast.prelude.bytes import BYTES, pretty_bytes_str -from ..kast.prelude.k import K_ITEM, K, inj +from ..kast.prelude.k import K_ITEM, SORT_PARAM_SENTINEL, K, inj from ..kast.prelude.kbool import BOOL, TRUE, andBool from ..kast.prelude.ml import mlAnd from ..kast.prelude.string import STRING, pretty_string @@ -356,6 +356,18 @@ def _kimport_to_kore(kimport: KImport) -> Import: def _ksort_to_kore(ksort: KSort) -> SortApp: + # Emitting the #SortParam sentinel correctly requires a universally-quantified sort variable + # bound at the axiom level (a `sortParams({Q0})` attribute, as Java's AddSortInjections does); + # this term-level converter has no way to introduce or bind one. Fail clearly rather than + # building the invalid Kore identifier `Sort#SortParam` ('#' is not a legal identifier char). + # `startswith` covers the whole sentinel family (e.g. a future `#SortParam{Q0}`). The full + # design is in pyk/docs/2026-06-01-sortparam-kore-emission.md. + if ksort.name.startswith(SORT_PARAM_SENTINEL.name): + raise ValueError( + f'Cannot emit the {SORT_PARAM_SENTINEL.name} sentinel sort to Kore: it marks an ML-predicate ' + 'result sort that add_sort_params could not infer and that must become an axiom-level ' + 'sort variable. This is not yet implemented; see pyk/docs/2026-06-01-sortparam-kore-emission.md.' + ) return SortApp('Sort' + ksort.name) diff --git a/pyk/src/tests/unit/test_konvert.py b/pyk/src/tests/unit/test_konvert.py index 7e070e3f6c..d1afb3f6b8 100644 --- a/pyk/src/tests/unit/test_konvert.py +++ b/pyk/src/tests/unit/test_konvert.py @@ -5,7 +5,11 @@ import pytest +from pyk.kast.inner import KSort +from pyk.kast.prelude.k import SORT_PARAM_SENTINEL from pyk.konvert import munge, unmunge +from pyk.konvert._kast_to_kore import _ksort_to_kore +from pyk.kore.syntax import SortApp if TYPE_CHECKING: from collections.abc import Iterator @@ -49,3 +53,23 @@ def test_unmunge(symbol: str, expected: str) -> None: # Then assert actual == expected + + +def test_ksort_to_kore_ordinary_sort() -> None: + assert _ksort_to_kore(KSort('Int')) == SortApp('SortInt') + + +@pytest.mark.parametrize( + 'sentinel', + ( + SORT_PARAM_SENTINEL, # bare sentinel + KSort(SORT_PARAM_SENTINEL.name, (KSort('Q0'),)), # uniquely-named family member + ), + ids=['bare', 'parametric'], +) +def test_ksort_to_kore_rejects_sortparam_sentinel(sentinel: KSort) -> None: + # The #SortParam sentinel family cannot yet be emitted to Kore (it needs an axiom-level sort + # variable); _ksort_to_kore must fail with a clear, actionable error rather than building the + # invalid identifier `Sort#SortParam`. See pyk/docs/2026-06-01-sortparam-kore-emission.md. + with pytest.raises(ValueError, match=SORT_PARAM_SENTINEL.name): + _ksort_to_kore(sentinel) From 0cae6261fd84ad1ac1b71326487b9d4461b603ea Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Tue, 2 Jun 2026 02:15:32 +0000 Subject: [PATCH 18/21] pyk/docs: date-prefix pipeline, regression-triage filenames for chronological sorting Prefix existing pyk/docs markdown with their yyyy-mm-dd add date so the directory sorts chronologically, matching the convention adopted for new design docs. Both files were added 2026-04-27; pure renames with no content or reference changes (neither is wired into the Sphinx toctree or linked elsewhere). Co-Authored-By: Claude Opus 4.8 --- pyk/docs/{pipeline.md => 2026-04-27-pipeline.md} | 0 .../{regression-triage.md => 2026-04-27-regression-triage.md} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename pyk/docs/{pipeline.md => 2026-04-27-pipeline.md} (100%) rename pyk/docs/{regression-triage.md => 2026-04-27-regression-triage.md} (100%) diff --git a/pyk/docs/pipeline.md b/pyk/docs/2026-04-27-pipeline.md similarity index 100% rename from pyk/docs/pipeline.md rename to pyk/docs/2026-04-27-pipeline.md diff --git a/pyk/docs/regression-triage.md b/pyk/docs/2026-04-27-regression-triage.md similarity index 100% rename from pyk/docs/regression-triage.md rename to pyk/docs/2026-04-27-regression-triage.md From a35b570771e903bceb9f57e7401884e06238b13a Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Tue, 2 Jun 2026 02:19:53 +0000 Subject: [PATCH 19/21] inner, outer, test_{inner,definition}: replace _sort_contains helper with KSort.contains method The _sort_contains module-level helper in outer was a pure structural query on a sort. Make it a KSort.contains method instead: it reads cleanly at the call site (prod.sort.contains(p)) and mirrors Java's Sort.contains used by the AddSortInjections algorithm this code ports. The lone caller in infer_sort_params is updated, and the unit test moves to test_inner alongside KSort. Co-Authored-By: Claude Opus 4.8 --- pyk/src/pyk/kast/inner.py | 4 ++++ pyk/src/pyk/kast/outer.py | 7 +------ pyk/src/tests/unit/kast/test_definition.py | 23 ---------------------- pyk/src/tests/unit/kast/test_inner.py | 22 ++++++++++++++++++++- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/pyk/src/pyk/kast/inner.py b/pyk/src/pyk/kast/inner.py index 806fdaeb3d..87b27ae2ef 100644 --- a/pyk/src/pyk/kast/inner.py +++ b/pyk/src/pyk/kast/inner.py @@ -51,6 +51,10 @@ def let(self, *, name: str | None = None, params: Iterable[KSort] | None = None) params = params if params is not None else self.params return KSort(name=name, params=params) + def contains(self, sort: KSort) -> bool: + """Return whether ``sort`` appears anywhere in this sort's parameter tree (including itself).""" + return self == sort or any(param.contains(sort) for param in self.params) + @final @dataclass(frozen=True) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 38bc0abbdd..02d5b1deb6 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1001,11 +1001,6 @@ def let(self, *, require: str | None = None) -> KRequire: return KRequire(require=require) -def _sort_contains(sort: KSort, param: KSort) -> bool: - """Return whether ``param`` appears anywhere in the sort tree of ``sort``.""" - return sort == param or any(_sort_contains(p, param) for p in sort.params) - - def _match_sort_params( parametric: KSort, actual: KSort, @@ -1645,7 +1640,7 @@ def infer_sort_params( unbound_result_params = frozenset( p for p in params - if _sort_contains(prod.sort, p) and not any(_sort_contains(psort, p) for psort in prod.argument_sorts) + if prod.sort.contains(p) and not any(psort.contains(p) for psort in prod.argument_sorts) ) if unbound_result_params: for k, vs in _match_sort_params(prod.sort, expected_sort, unbound_result_params).items(): diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index ec09e065c5..3fc1a6c7ad 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -14,7 +14,6 @@ KProduction, KTerminal, _match_sort_params, - _sort_contains, ) if TYPE_CHECKING: @@ -477,28 +476,6 @@ def test_match_sort_params( assert _match_sort_params(parametric, actual, params, subsorts_fn) == expected -# --------------------------------------------------------------------------- -# _sort_contains (module-level helper) -# --------------------------------------------------------------------------- - -SORT_CONTAINS_DATA: Final = ( - ('param_itself', N, N, True), - ('nested_one_level', MINT_N, N, True), - ('nested_two_levels', KSort('Foo', (MINT_N,)), N, True), - ('concrete_not_param', MINT_INT, N, False), - ('unrelated', INT, N, False), -) - - -@pytest.mark.parametrize( - 'test_id,sort,param,expected', - SORT_CONTAINS_DATA, - ids=[test_id for test_id, *_ in SORT_CONTAINS_DATA], -) -def test_sort_contains(test_id: str, sort: KSort, param: KSort, expected: bool) -> None: - assert _sort_contains(sort, param) == expected - - # --------------------------------------------------------------------------- # KDefinition.add_cell_map_items # --------------------------------------------------------------------------- diff --git a/pyk/src/tests/unit/kast/test_inner.py b/pyk/src/tests/unit/kast/test_inner.py index d34bc5fd34..6bba1c070b 100644 --- a/pyk/src/tests/unit/kast/test_inner.py +++ b/pyk/src/tests/unit/kast/test_inner.py @@ -5,7 +5,7 @@ import pytest -from pyk.kast.inner import KVariable, flatten_label, keep_vars_sorted +from pyk.kast.inner import KSort, KVariable, flatten_label, keep_vars_sorted from ..utils import a, f, g, x, y, z @@ -69,3 +69,23 @@ def test_keep_vars_sorted(occurrences: dict[str, list[KVariable]], expected: dic # Then assert actual == expected + + +_INT: Final = KSort('Int') +_N: Final = KSort('N') +_MINT_N: Final = KSort('MInt', (_N,)) +_MINT_INT: Final = KSort('MInt', (_INT,)) + +KSORT_CONTAINS_DATA: Final[tuple[tuple[str, KSort, KSort, bool], ...]] = ( + ('self', _N, _N, True), + ('nested_one_level', _MINT_N, _N, True), + ('nested_two_levels', KSort('Foo', (_MINT_N,)), _N, True), + ('concrete_does_not_contain_param', _MINT_INT, _N, False), + ('unrelated', _INT, _N, False), +) + + +@pytest.mark.parametrize('test_id,sort,other,expected', KSORT_CONTAINS_DATA, ids=[d[0] for d in KSORT_CONTAINS_DATA]) +def test_ksort_contains(test_id: str, sort: KSort, other: KSort, expected: bool) -> None: + # When/Then + assert sort.contains(other) == expected From 0c28054a210777fbe4ec46efa4091bba181fc03f Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Tue, 2 Jun 2026 04:22:52 +0000 Subject: [PATCH 20/21] outer, test_definition: rewrite add_sort_params as top-down/bottom-up sweep with fresh sort vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hardcoded ML-predicate special-casing and single #SortParam sentinel with a structural, context-aware algorithm mirroring Java's AddSortInjections. add_sort_params now takes an optional expected `sort` and runs one recursive sweep that threads the expected sort downwards (so a parametric *return* sort is taken from its context) and resolves the remaining parameters from the argument sorts coming back up. Each parameter is either argument-bound (bottom-up) or return-only (top-down); #SortParam sentinels are low-priority placeholders that a real sort always beats. A parameter left genuinely free becomes a unique fresh sort variable (#SortParam{Q0}, {Q1}, ...), and one such variable is shared down a constraint spine. It is asserted that a parametric return sort only stays free on the spine — a concrete expected sort it cannot satisfy is an ill-formed term. This removes the _ML_PRED_LABELS list, the single-sentinel limitation, and the NotImplementedError guard. kast_to_kore is left calling add_sort_params without an expected sort for now (opt-in), so pipeline emission is unchanged pending integration validation; _ksort_to_kore still rejects every #SortParam{Qn}. Co-Authored-By: Claude Opus 4.8 --- pyk/src/pyk/kast/outer.py | 213 ++++++++++++--------- pyk/src/tests/unit/kast/test_definition.py | 154 ++++++++++++--- 2 files changed, 254 insertions(+), 113 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 02d5b1deb6..138d3f590d 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1656,99 +1656,140 @@ def infer_sort_params( inferred_sort = _resolve_sort_partial(prod.sort, successful, params) return result, inferred_sort - # Best-effort addition of sort parameters to klabels, context insensitive - def add_sort_params(self, kast: KInner) -> KInner: - """Return a given term with the sort parameters on the `KLabel` filled in (which may be missing because of how the frontend works), best effort.""" - # ML predicate labels whose result sort (Sort2) is context-dependent and not inferable - # from the arguments alone. When Sort1 can be determined but Sort2 cannot, we fill Sort2 - # with the sentinel KSort('#SortParam') so that downstream Kore emission can introduce a - # universally-quantified sort variable (Q0) in the axiom. NOTE: that Kore emission step - # is not yet implemented — _ksort_to_kore rejects the sentinel with a clear error. See - # pyk/docs/2026-06-01-sortparam-kore-emission.md for the design of the full fix. - _ML_PRED_RESULT_SORT_PARAM = SORT_PARAM_SENTINEL # noqa: N806 - _ML_PRED_LABELS = frozenset({'#Equals', '#Ceil', '#Floor', '#In'}) # noqa: N806 - - def _add_sort_params(_k: KInner) -> KInner: - if type(_k) is not KApply: - return _k - prod = self.symbols[_k.label.name] - if len(_k.label.params) != 0 or len(prod.params) == 0: - return _k + # Best-effort addition of sort parameters to klabels + def add_sort_params(self, kast: KInner, sort: KSort | None = None) -> KInner: + """Return ``kast`` with missing sort parameters filled in on every parametric ``KLabel``, best effort. + + The frontend often leaves the sort parameters off a parametric ``KLabel``. This fills + them with a single recursive sweep that threads an *expected* sort downwards — so a + parametric **return** sort is taken from its context — and resolves the remaining + parameters from the argument sorts coming back up, mirroring ``AddSortInjections`` in the + Java frontend. + + ``sort`` is the expected sort of the whole term (e.g. ``GeneratedTopCell`` for a rule + body). When omitted, a fresh sort variable is used at the top, so the algorithm still + runs but a parametric return sort that cannot be pinned from context becomes a fresh + ``#SortParam{Qn}`` sort variable. + + Each parameter is either *argument-bound* (occurs in some argument sort, resolved + bottom-up) or *return-only* (resolved top-down from the expected sort). ``#SortParam`` + sentinels act as low-priority placeholders, so a real sort always wins when one is + available. The implementation is one interleaved sweep (``_go``); a future split into + explicit passes can reuse the same per-node logic. + + It is asserted that every parametric return sort lives on the "spine" of the term: a + return-only parameter is only allowed to stay free when its expected sort is itself free + — a concrete expected sort it cannot satisfy means the term is ill-formed. + """ + counter = [0] + + def _fresh() -> KSort: + param = KSort(SORT_PARAM_SENTINEL.name, (KSort(f'Q{counter[0]}'),)) + counter[0] += 1 + return param + + def _is_sentinel(s: KSort | None) -> bool: + return s is not None and s.name == SORT_PARAM_SENTINEL.name + + def _subst_sort(subst: dict[KSort, KSort], s: KSort) -> KSort: + if s in subst: + return subst[s] + if s.params: + return KSort(s.name, tuple(_subst_sort(subst, p) for p in s.params)) + return s + + def _lub(candidates: list[KSort]) -> KSort | None: + # Sentinels are placeholders; a real sort always wins (as Java's lub filters out + # #SortParam). Only when every candidate is a sentinel do we keep one. + real = [c for c in candidates if not _is_sentinel(c)] + if real: + return self.least_upper_bound(real) + return candidates[0] if candidates else None + + def _go(term: KInner, expected: KSort) -> KInner: + if isinstance(term, KRewrite): + return KRewrite(_go(term.lhs, expected), _go(term.rhs, expected)) + if isinstance(term, KAs): + return KAs(_go(term.pattern, expected), _go(term.alias, expected)) + if isinstance(term, KSequence): + return KSequence([_go(item, SORT_PARAM_SENTINEL) for item in term.items]) + if not isinstance(term, KApply): + return term + + prod = self.symbols.get(term.label.name) + if prod is None: + return term + + # Already-applied or non-parametric: thread the expected sort into the children using + # the known bindings, then recurse. + if len(prod.params) == 0 or len(term.label.params) == len(prod.params): + subst = dict(zip(prod.params, term.label.params, strict=True)) if prod.params else {} + child_expected = [_subst_sort(subst, asort) for asort in prod.argument_sorts] + return term.let(args=[_go(arg, e) for arg, e in zip(term.args, child_expected, strict=True)]) + + params = list(prod.params) + param_set = frozenset(params) + + # Mirror Java's `fresh`: the parameter that IS the whole result sort takes the + # expected sort (so it propagates into the children, sharing one sort variable down + # the spine); the rest take a bare sentinel placeholder. Used to type the children. + fresh_subst = {p: (expected if p == prod.sort else SORT_PARAM_SENTINEL) for p in params} + child_expected = [_subst_sort(fresh_subst, asort) for asort in prod.argument_sorts] + new_args = [_go(arg, e) for arg, e in zip(term.args, child_expected, strict=True)] + child_sorts = [self.sort(arg) for arg in new_args] + + # Bottom-up: argument-bound parameters, from the actual child sorts. + candidates: dict[KSort, list[KSort]] = {} + for psort, asort in zip(prod.argument_sorts, child_sorts, strict=True): + if asort is None or _is_sentinel(asort): + continue + for k, vs in _match_sort_params(psort, asort, param_set, self.subsorts).items(): + candidates.setdefault(k, []).extend(vs) - actual_sorts = tuple(map(self.sort, _k.args)) - param_set = frozenset(prod.params) - - # Separate sentinel args from real args; bail out on genuinely unsortable ones. - # Sentinels (#SortParam) propagate from nested ML preds and are handled below. - inference_sorts: list[KSort | None] = [] - for psort, asort in zip(prod.argument_sorts, actual_sorts, strict=True): - if asort == _ML_PRED_RESULT_SORT_PARAM: - inference_sorts.append(None) # skip in inference, propagate as sentinel below - elif asort is None: - _LOGGER.warning( - f'Failed to add sort parameter, unable to determine sort for argument in production: {(prod, psort, asort)}' - ) - return _k - else: - inference_sorts.append(asort) + # Top-down: return-only parameters, from the expected sort (only when it is concrete). + return_only = frozenset( + p + for p in params + if prod.sort.contains(p) and not any(asort.contains(p) for asort in prod.argument_sorts) + ) + if return_only and not _is_sentinel(expected): + for k, vs in _match_sort_params(prod.sort, expected, return_only).items(): + candidates.setdefault(k, []).extend(vs) - bindings_raw, _ = self.infer_sort_params(prod, tuple(inference_sorts)) - bindings: dict[KSort, KSort] = {k: v for k, v in bindings_raw.items() if v is not None} + bindings: dict[KSort, KSort] = {} + unsortable: list[KSort] = [] + for p in params: + value = _lub(candidates.get(p, []) + [fresh_subst[p]]) + if value is None: + _LOGGER.warning(f'Failed to add sort parameter, conflicting sorts for sort parameter: {(prod, p)}') + return term.let(args=new_args) + if not _is_sentinel(value) or value.params: + # A concrete sort, or an already-named sort variable propagated from the + # expected sort (e.g. one shared down the spine) — keep it as is. + bindings[p] = value + elif any(asort.contains(p) for asort in prod.argument_sorts): + # Bare sentinel for an argument-bound parameter: the argument was unsortable. + unsortable.append(p) + else: + # Bare sentinel for a return-only or phantom parameter: a fresh, free sort + # variable. A return sort is only allowed to stay free on the spine — i.e. + # when its own expected sort is itself free. + assert not prod.sort.contains(p) or _is_sentinel(expected), ( + f'parametric return sort {p.name} of {term.label.name!r} occurs off the spine, ' + f'in a position expecting concrete sort {expected.name}' + ) + bindings[p] = _fresh() - # A param mapped to None had candidate sorts that could not be unified (no common - # supersort): a genuine sort mismatch, not a missing binding. This is distinct from - # a param with no candidates at all (the result-sort case handled by the sentinel - # below), so report it and bail rather than papering over it with a sentinel. - conflicts = [p for p, v in bindings_raw.items() if v is None] - if conflicts: + if unsortable: _LOGGER.warning( - f'Failed to add sort parameter, conflicting sorts for sort parameter(s): {(prod, conflicts)}' + f'Failed to add sort parameter, unable to determine sort for argument: {(prod, unsortable)}' ) - return _k + return term.let(args=new_args) - # Sentinel propagation: if an arg carried the #SortParam sentinel (from a nested ML - # pred) and inference left that arg's param slot empty, fill it with the sentinel. - # Only direct-param positions (psort IS a param) propagate the sentinel; nested cases - # (psort = MInt{S}) do not, matching the current Java behaviour. - for psort, asort in zip(prod.argument_sorts, actual_sorts, strict=True): - if asort == _ML_PRED_RESULT_SORT_PARAM and psort in param_set and psort not in bindings: - bindings[psort] = _ML_PRED_RESULT_SORT_PARAM - - if all(p in bindings for p in prod.params): - return _k.let(label=KLabel(_k.label.name, [bindings[p] for p in prod.params])) - - # ML predicates have a context-dependent result sort (Sort2) that cannot be - # inferred from arguments. Fill it with the sentinel so that krule_to_kore can - # introduce a universally-quantified sort variable for the axiom. - if _k.label.name in _ML_PRED_LABELS: - unbound = [p for p in prod.params if p not in bindings] - # The single sentinel KSort('#SortParam') is only unambiguous when at most - # one parameter is unresolvable bottom-up. All current ML predicates - # (#Equals, #Ceil, #Floor, #In) have exactly two sort params {Sort1, - # Sort2}: Sort1 is always determined by the arguments, Sort2 (the result - # sort) is the one remaining unbound param. If more than one param is - # unbound, the sentinel scheme must be replaced with unique fresh params - # (e.g. KSort('#SortParam', (KSort('Q0'),)), KSort('#SortParam', (KSort('Q1'),)), ...) - # analogously to how Java's AddSortInjections generates #SortParam{Q0}, - # #SortParam{Q1}, etc. _ksort_to_kore would also need updating to emit - # these as sort variables rather than sort applications. - if len(unbound) > 1: - raise NotImplementedError( - f'ML predicate {_k.label.name!r} has {len(unbound)} unbound sort parameters ' - f'({unbound}); the single-sentinel scheme only handles at most one. ' - f'Implement unique fresh sentinels analogous to Java #SortParam{{Q0}}, ' - f'#SortParam{{Q1}}, ... and update _ksort_to_kore to emit them as sort variables.' - ) - filled = {p: bindings.get(p, _ML_PRED_RESULT_SORT_PARAM) for p in prod.params} - return _k.let(label=KLabel(_k.label.name, [filled[p] for p in prod.params])) - - unbound = [p for p in prod.params if p not in bindings] - _LOGGER.warning( - f'Failed to add sort parameter, could not infer sort params from arguments: {(prod, unbound)}' - ) - return _k + label = KLabel(term.label.name, [bindings[p] for p in params]) + return term.let(label=label, args=new_args) - return bottom_up(_add_sort_params, kast) + return _go(kast, sort if sort is not None else _fresh()) def add_cell_map_items(self, kast: KInner) -> KInner: """Wrap cell-map items in the syntactical wrapper that the frontend generates for them. diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index 3fc1a6c7ad..33d4480464 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -15,6 +15,7 @@ KTerminal, _match_sort_params, ) +from pyk.kast.prelude.k import GENERATED_TOP_CELL if TYPE_CHECKING: from collections.abc import Callable @@ -57,7 +58,6 @@ MINT_N: Final = KSort('MInt', (N,)) MINT_INT: Final = KSort('MInt', (INT,)) MINT_BOOL: Final = KSort('MInt', (BOOL,)) -SORT_PARAM: Final = KSort('#SortParam') ACCOUNT_CELL_MAP: Final = KSort('AccountCellMap') ACCOUNT_CELL: Final = KSort('AccountCell') @@ -82,9 +82,8 @@ klabel='#Equals', ) -# Hypothetical 3-param #Equals to test the multi-unbound-param guard. -# S1 is inferred from arguments; S2 and S3 are both unbound, which the single-sentinel -# scheme cannot handle — add_sort_params must raise NotImplementedError. +# Hypothetical 3-param #Equals. S1 is inferred from arguments; S2 (result) and S3 (occurring +# nowhere) are independent free sorts, so add_sort_params gives each a distinct fresh sort var. _EQUALS3_PROD: Final = KProduction( sort=S2, items=[KNonTerminal(S1), KNonTerminal(S1)], @@ -92,9 +91,8 @@ klabel='#Equals', ) -# User-defined label where S2 does not appear in any argument sort, so it remains -# unbound after argument processing. add_sort_params must emit a warning and -# return the term unchanged (best-effort). +# User-defined label whose S2 occurs only in the result sort (not in any argument). With no +# expected sort it becomes a fresh sort variable; with a matching expected sort it resolves. _PAIR_PROD: Final = KProduction( sort=KSort('Pair', (S1, S2)), items=[KTerminal('pair'), KTerminal('('), KNonTerminal(S1), KTerminal(')')], @@ -145,6 +143,28 @@ _NUMBER_INT_SUBSORT: Final = KProduction(sort=NUMBER, items=[KNonTerminal(INT)]) _NUMBER_FLOAT_SUBSORT: Final = KProduction(sort=NUMBER, items=[KNonTerminal(FLOAT)]) +# #Ceil: syntax S2 ::= #Ceil{S1,S2}(S1) -- ML pred, like #Equals (S2 is the free result sort) +_CEIL_PROD: Final = KProduction( + sort=S2, + items=[KNonTerminal(S1)], + params=[S1, S2], + klabel='#Ceil', +) + +# #And: syntax S ::= #And{S}(S, S) -- homogeneous ML connective (S is arg-bound and the result) +_AND_PROD: Final = KProduction( + sort=S, + items=[KNonTerminal(S), KNonTerminal(S)], + params=[S], + klabel='#And', +) + + +def _sp(n: int) -> KSort: + """The n-th fresh #SortParam sort variable (`#SortParam{Qn}`) the algorithm allocates.""" + return KSort('#SortParam', (KSort(f'Q{n}'),)) + + _ACCT_MAP_CONCAT: Final = KProduction( sort=ACCOUNT_CELL_MAP, items=[KNonTerminal(ACCOUNT_CELL_MAP), KNonTerminal(ACCOUNT_CELL_MAP)], @@ -194,6 +214,8 @@ _FOO_PROD, _BAZ_PROD, _EQUALS_PROD, + _CEIL_PROD, + _AND_PROD, _F2_PROD, _F3_PROD, _MINT_INT_SUBSORT, @@ -294,11 +316,11 @@ def test_resolve_sorts(test_id: str, label: KLabel, expected_result: KSort, expe KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]), KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]), ), - # ML pred: S1 inferred from args, S2 (result sort) filled with #SortParam sentinel + # ML pred, no expected sort: S1 inferred from args, S2 (result sort) becomes a fresh sort var. ( 'ml_pred_sentinel', KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]), - KApply(KLabel('#Equals', [INT, SORT_PARAM]), [KVariable('X', sort=INT), KVariable('Y', sort=INT)]), + KApply(KLabel('#Equals', [INT, _sp(0)]), [KVariable('X', sort=INT), KVariable('Y', sort=INT)]), ), # Unsortable argument (no sort annotation): cannot fill params, term returned unchanged ( @@ -327,37 +349,115 @@ def test_add_sort_params(test_id: str, term: KInner, expected: KInner) -> None: assert DEFN.add_sort_params(term) == expected -def test_add_sort_params_multi_unbound_raises() -> None: - # #Equals with 3 sort params: S1 is inferred from arguments, S2 and S3 are both unbound. - # The single-sentinel scheme cannot distinguish them, so NotImplementedError must be raised. +def test_add_sort_params_multi_unbound_distinct_sentinels() -> None: + # #Equals with 3 sort params and no expected sort: S1 is inferred from the arguments; S2 (the + # result sort) and S3 (a parameter occurring nowhere) are independent free sorts and so get + # distinct fresh sort variables — the single-sentinel scheme could not distinguish them. term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=INT)]) - with pytest.raises(NotImplementedError, match='2 unbound sort parameters'): - DEFN3.add_sort_params(term) + result = DEFN3.add_sort_params(term) + assert isinstance(result, KApply) + assert result.label.params == (INT, _sp(0), _sp(1)) -def test_add_sort_params_user_label_unresolvable_warns(caplog: pytest.LogCaptureFixture) -> None: - # pair(S1, S2) has S2 absent from arguments — S2 is unbound after inference. - # add_sort_params emits a warning and returns the term unchanged (best-effort). +def test_add_sort_params_user_label_free_return_sort() -> None: + # pair{S1,S2}(S1): S1 is bound from the argument; S2 occurs only in the result sort and has + # no determining context, so it becomes a fresh sort variable (best-effort). term = KApply(KLabel('pair'), [KVariable('X', sort=INT)]) - with caplog.at_level(logging.WARNING): - result = DEFN_PAIR.add_sort_params(term) - assert result == term - assert any('could not infer sort params' in record.message for record in caplog.records) + result = DEFN_PAIR.add_sort_params(term) + assert isinstance(result, KApply) + assert result.label.name == 'pair' + assert result.label.params[0] == INT + assert result.label.params[1].name == '#SortParam' def test_add_sort_params_conflicting_ml_pred_args_warns(caplog: pytest.LogCaptureFixture) -> None: - # Issue #4: #Equals(X:Int, Y:Bool) has a genuine sort mismatch in its shared S1 argument - # positions (Int and Bool have no common supersort). This is distinct from the legitimate - # multi-unbound-param case (test_add_sort_params_multi_unbound_raises), where every param is - # either bound or has no candidates at all. A sort *conflict* must be reported as a warning - # with the term returned unchanged — NOT raised as a NotImplementedError that misattributes - # the failure to the single-sentinel scheme. + # #Equals(X:Int, Y:Bool) has a genuine sort mismatch in its shared S1 argument positions (Int + # and Bool have no common supersort). A sort *conflict* must be reported as a warning with + # the label left unfilled (best-effort), not papered over with a sort variable. term = KApply('#Equals', [KVariable('X', sort=INT), KVariable('Y', sort=BOOL)]) with caplog.at_level(logging.WARNING): result = DEFN.add_sort_params(term) assert result == term +# --------------------------------------------------------------------------- +# KDefinition.add_sort_params with an expected sort (top-down resolution) +# --------------------------------------------------------------------------- + +_EQ_ARGS: Final = [KVariable('X', sort=INT), KVariable('Y', sort=INT)] +_CEIL_ARGS: Final = [KVariable('Z', sort=INT)] + +ADD_SORT_PARAMS_EXPECTED_DATA: Final = ( + # ML pred with a concrete expected sort: the free result sort resolves to it (no sort var). + ( + 'ml_pred_expected', + KApply('#Equals', _EQ_ARGS), + GENERATED_TOP_CELL, + KApply(KLabel('#Equals', [INT, GENERATED_TOP_CELL]), _EQ_ARGS), + ), + # #Ceil similarly resolves its result sort from the expected sort. + ( + 'ceil_expected', + KApply('#Ceil', _CEIL_ARGS), + GENERATED_TOP_CELL, + KApply(KLabel('#Ceil', [INT, GENERATED_TOP_CELL]), _CEIL_ARGS), + ), + # Spine: the expected sort threads down through the #And connective so every conjunct's + # result sort resolves to the same concrete sort. + ( + 'spine_concrete', + KApply('#And', [KApply('#Equals', _EQ_ARGS), KApply('#Ceil', _CEIL_ARGS)]), + GENERATED_TOP_CELL, + KApply( + KLabel('#And', [GENERATED_TOP_CELL]), + [ + KApply(KLabel('#Equals', [INT, GENERATED_TOP_CELL]), _EQ_ARGS), + KApply(KLabel('#Ceil', [INT, GENERATED_TOP_CELL]), _CEIL_ARGS), + ], + ), + ), +) + + +@pytest.mark.parametrize( + 'test_id,term,sort,expected', + ADD_SORT_PARAMS_EXPECTED_DATA, + ids=[test_id for test_id, *_ in ADD_SORT_PARAMS_EXPECTED_DATA], +) +def test_add_sort_params_expected(test_id: str, term: KInner, sort: KSort, expected: KInner) -> None: + assert DEFN.add_sort_params(term, sort) == expected + + +def test_add_sort_params_spine_shares_one_sort_variable() -> None: + # No expected sort: a fresh sort variable is synthesized at the top and threads down the + # #And spine, so every conjunct's free result sort is the *same* variable (Q0). + term = KApply('#And', [KApply('#Equals', _EQ_ARGS), KApply('#Ceil', _CEIL_ARGS)]) + expected = KApply( + KLabel('#And', [_sp(0)]), + [ + KApply(KLabel('#Equals', [INT, _sp(0)]), _EQ_ARGS), + KApply(KLabel('#Ceil', [INT, _sp(0)]), _CEIL_ARGS), + ], + ) + assert DEFN.add_sort_params(term) == expected + + +def test_add_sort_params_expected_pair_resolves_return_sort() -> None: + # A user parametric symbol's return-only param resolves from a concrete expected sort. + term = KApply(KLabel('pair'), [KVariable('X', sort=INT)]) + expected = KApply(KLabel('pair', [INT, BOOL]), [KVariable('X', sort=INT)]) + assert DEFN_PAIR.add_sort_params(term, KSort('Pair', (INT, BOOL))) == expected + + +def test_add_sort_params_spine_violation_asserts() -> None: + # A parametric return sort in a position expecting a concrete sort it cannot satisfy (pair, + # whose result Pair{S1,S2} cannot match GeneratedTopCell) is off the spine — an ill-formed + # term that the spine assertion must reject. + term = KApply(KLabel('pair'), [KVariable('X', sort=INT)]) + with pytest.raises(AssertionError, match='off the spine'): + DEFN_PAIR.add_sort_params(term, GENERATED_TOP_CELL) + + # --------------------------------------------------------------------------- # KDefinition.infer_sort_params # --------------------------------------------------------------------------- From 5411b3f01389defd740b1ed959d636946978817f Mon Sep 17 00:00:00 2001 From: Everett Hildenbrandt Date: Tue, 2 Jun 2026 19:30:30 +0000 Subject: [PATCH 21/21] outer, test_definition: drop dead infer_sort_params/_resolve_sort_partial; test least_upper_bound directly The add_sort_params rewrite inlined per-node inference into its recursive sweep, leaving infer_sort_params (and its only helper _resolve_sort_partial) with no callers. Remove both so the module presents a single sort-inference algorithm. Replace the infer_sort_params tests with a direct least_upper_bound test (the set-based, order-independent LUB those tests had been exercising) and add an add_sort_params case asserting an argument-bound parameter ignores a conflicting expected sort. Drop the now-unused baz/f2/f3 fixtures. Co-Authored-By: Claude Opus 4.8 --- pyk/src/pyk/kast/outer.py | 83 ------------- pyk/src/tests/unit/kast/test_definition.py | 134 +++++---------------- 2 files changed, 31 insertions(+), 186 deletions(-) diff --git a/pyk/src/pyk/kast/outer.py b/pyk/src/pyk/kast/outer.py index 138d3f590d..2d373f7470 100644 --- a/pyk/src/pyk/kast/outer.py +++ b/pyk/src/pyk/kast/outer.py @@ -1035,25 +1035,6 @@ def _match_sort_params( return {} -def _resolve_sort_partial( - sort: KSort, - bindings: dict[KSort, KSort], - params: frozenset[KSort], -) -> KSort | None: - """Substitute ``bindings`` into ``sort``, returning ``None`` if any param is unbound. - - A param in ``params`` is considered unbound if it does not appear in ``bindings``. - """ - if sort in params: - return bindings.get(sort) - if sort.params: - resolved = [_resolve_sort_partial(p, bindings, params) for p in sort.params] - if any(r is None for r in resolved): - return None - return KSort(sort.name, tuple(resolved)) # type: ignore[arg-type] - return sort - - @final @dataclass(frozen=True) class KDefinition(KOuter, WithKAtt, Iterable[KFlatModule]): @@ -1592,70 +1573,6 @@ def transform( return Subst(subst)(new_term) - def infer_sort_params( - self, - prod: KProduction, - actual_sorts: tuple[KSort | None, ...], - expected_sort: KSort | None = None, - ) -> tuple[dict[KSort, KSort | None], KSort | None]: - """Infer sort parameter bindings for a parametric production application. - - Returns ``(bindings, inferred_sort)`` where: - - ``bindings`` maps each sort param to one of three states: - - key present, value non-``None``: parameter successfully bound to that sort. - - key present, value ``None``: candidates were found but their LUB could not be - computed (conflicting sorts with no common supersort). - - key absent: no candidates were collected (unsortable / no structural match). - - ``inferred_sort`` is the production's result sort after substituting the - successful bindings. It is ``None`` if any sort param appearing in the result - sort is unbound or conflicted — including the case where ``prod`` is parametric - but no candidates were found. For non-parametric productions it is always the - concrete result sort. - - Mirrors ``AddSortInjections.substituteProd()`` in the Java frontend. - - ``actual_sorts`` must have the same length as ``prod.argument_sorts``. - ``None`` entries are skipped (unsortable arguments). - If ``expected_sort`` is given, parameters that appear only in the result sort - (not in any argument sort) are also inferred from it — this is the - ``matchExpected`` path in the Java algorithm. - """ - params = frozenset(prod.params) - candidates: dict[KSort, list[KSort]] = {} - - for psort, asort in zip(prod.argument_sorts, actual_sorts, strict=True): - if asort is None: - continue - for k, vs in _match_sort_params(psort, asort, params, self.subsorts).items(): - candidates.setdefault(k, []).extend(vs) - - if expected_sort is not None: - # Only params that occur in the result sort but in NO declared argument sort are - # inferred from the expected sort (Java matchExpected, AddSortInjections.java:435). - # The membership test must use the production's *declared* (parametric) argument - # sorts — those are what can contain a param `p`; the concrete `actual_sorts` never - # do, so testing them would wrongly treat arg-bound params as expected-only. - unbound_result_params = frozenset( - p - for p in params - if prod.sort.contains(p) and not any(psort.contains(p) for psort in prod.argument_sorts) - ) - if unbound_result_params: - for k, vs in _match_sort_params(prod.sort, expected_sort, unbound_result_params).items(): - candidates.setdefault(k, []).extend(vs) - - # Resolve each param to the least upper bound of its candidates. A param with candidates - # but no computable LUB maps to None (conflict); a param with no candidates is omitted. - result: dict[KSort, KSort | None] = { - p: self.least_upper_bound(candidates[p]) for p in prod.params if p in candidates - } - - successful: dict[KSort, KSort] = {k: v for k, v in result.items() if v is not None} - inferred_sort = _resolve_sort_partial(prod.sort, successful, params) - return result, inferred_sort - # Best-effort addition of sort parameters to klabels def add_sort_params(self, kast: KInner, sort: KSort | None = None) -> KInner: """Return ``kast`` with missing sort parameters filled in on every parametric ``KLabel``, best effort. diff --git a/pyk/src/tests/unit/kast/test_definition.py b/pyk/src/tests/unit/kast/test_definition.py index 33d4480464..fd8c3c01ca 100644 --- a/pyk/src/tests/unit/kast/test_definition.py +++ b/pyk/src/tests/unit/kast/test_definition.py @@ -29,10 +29,10 @@ # # bar: syntax N ::= bar(N) -- result sort is the param directly # foo: syntax MInt{N} ::= foo(MInt{N}) -- result/arg sorts nest the param -# baz: syntax MInt{N} ::= baz() -- no args; param bound only from expected sort # #Equals: syntax S2 ::= #Equals{S1,S2}(S1, S1) -- ML pred, result sort context-dependent -# f2: syntax {S} S ::= f2(S, S) -- one param bound from two argument positions -# f3: syntax {S} S ::= f3(S, S, S) -- one param bound from three argument positions +# #Ceil: syntax S2 ::= #Ceil{S1,S2}(S1) -- ML pred, S2 is the free result sort +# #And: syntax {S} S ::= #And{S}(S, S) -- homogeneous ML connective (S arg-bound) +# pair: syntax Pair{S1,S2} ::= pair(S1) -- user label; S2 is return-only # # Subsorts: # syntax Int ::= MInt{Int} -- MInt{Int} <: Int (enables subsort-aware matching) @@ -100,43 +100,10 @@ klabel='pair', ) -# syntax MInt{N} ::= baz() — no argument sorts; param N only bound via expected_sort -_BAZ_PROD: Final = KProduction( - sort=MINT_N, - items=[KTerminal('baz'), KTerminal('('), KTerminal(')')], - params=[N], - klabel='baz', -) - # syntax Int ::= MInt{Int} — subsort declaration: MInt{Int} <: Int # Enables the subsort-aware matching path (Java AddSortInjections.match step 3). _MINT_INT_SUBSORT: Final = KProduction(sort=INT, items=[KNonTerminal(MINT_INT)]) -# syntax {S} S ::= f2(S, S) — one sort param bound from two argument positions. -_F2_PROD: Final = KProduction( - sort=S, - items=[KTerminal('f2'), KTerminal('('), KNonTerminal(S), KTerminal(','), KNonTerminal(S), KTerminal(')')], - params=[S], - klabel='f2', -) - -# syntax {S} S ::= f3(S, S, S) — one sort param bound from three argument positions. -_F3_PROD: Final = KProduction( - sort=S, - items=[ - KTerminal('f3'), - KTerminal('('), - KNonTerminal(S), - KTerminal(','), - KNonTerminal(S), - KTerminal(','), - KNonTerminal(S), - KTerminal(')'), - ], - params=[S], - klabel='f3', -) - # Subsort lattice with two incomparable subsorts sharing a common supersort: # syntax Number ::= Int and syntax Number ::= Float # LUB(Int, Float) = Number, exercising set-based (not pairwise-chain) lub computation. @@ -212,12 +179,9 @@ def _sp(n: int) -> KSort: [ _BAR_PROD, _FOO_PROD, - _BAZ_PROD, _EQUALS_PROD, _CEIL_PROD, _AND_PROD, - _F2_PROD, - _F3_PROD, _MINT_INT_SUBSORT, _NUMBER_INT_SUBSORT, _NUMBER_FLOAT_SUBSORT, @@ -402,6 +366,15 @@ def test_add_sort_params_conflicting_ml_pred_args_warns(caplog: pytest.LogCaptur GENERATED_TOP_CELL, KApply(KLabel('#Ceil', [INT, GENERATED_TOP_CELL]), _CEIL_ARGS), ), + # An argument-bound parameter is taken from the argument, never from the expected sort: + # N occurs in foo's argument sort MInt{N}, so the conflicting expected sort MInt{Bool} is + # ignored and N resolves to Int (the expected-sort match only applies to return-only params). + ( + 'expected_ignored_for_argbound', + KApply(KLabel('foo'), [KVariable('X', sort=MINT_INT)]), + MINT_BOOL, + KApply(KLabel('foo', [INT]), [KVariable('X', sort=MINT_INT)]), + ), # Spine: the expected sort threads down through the #And connective so every conjunct's # result sort resolves to the same concrete sort. ( @@ -459,77 +432,32 @@ def test_add_sort_params_spine_violation_asserts() -> None: # --------------------------------------------------------------------------- -# KDefinition.infer_sort_params +# KDefinition.least_upper_bound # --------------------------------------------------------------------------- -# -# Tests the public method directly (not through add_sort_params), mirroring the -# Java AddSortInjections.substituteProd() test scenarios derived from the algorithm. -INFER_SORT_PARAMS_DATA: Final[ - tuple[ - tuple[str, KProduction, tuple[KSort | None, ...], KSort | None, dict[KSort, KSort | None], KSort | None], - ..., - ] -] = ( - # Direct param: psort IS the param (N → Int); prod.sort=N → inferred=Int - ('direct_param', _BAR_PROD, (INT,), None, {N: INT}, INT), - # Nested param: psort = MInt{N}, asort = MInt{Int} → N=Int; prod.sort=MInt{N} → inferred=MInt{Int} - ('nested_param', _FOO_PROD, (MINT_INT,), None, {N: INT}, MINT_INT), - # Subsort-aware: arg sort is Int, MInt{Int} <: Int in DEFN → N=Int via subsort iteration - ('subsort_aware', _FOO_PROD, (INT,), None, {N: INT}, MINT_INT), - # matchExpected: baz() has no arg sorts; N bound from expected_sort MInt{Int} → inferred=MInt{Int} - ('expected_sort', _BAZ_PROD, (), MINT_INT, {N: INT}, MINT_INT), - # None arg skipped; N has no candidates → inferred=None (parametric prod, N unbound) - ('unbound_absent', _BAR_PROD, (None,), None, {}, None), - # Conflicting args: S1→[Int,Bool] LUB fails → S1:None; S2 no candidates; prod.sort=S2 → inferred=None - ('conflicting_args', _EQUALS_PROD, (INT, BOOL), None, {S1: None}, None), - # expected_sort head mismatch: MInt{N} vs Int → no structural match → N absent → inferred=None - ('expected_sort_mismatch', _BAZ_PROD, (), INT, {}, None), - # Non-parametric: no params to bind, result sort is always concrete → inferred=AccountCell ≠ None - # This distinguishes "trivially complete (no params)" from "params exist but no candidates". - ('no_params_trivial', _ACCOUNT_CELL, (INT, INT), None, {}, ACCOUNT_CELL), - # ----- regression cases for known sort-inference bugs (fixed in follow-up commits) ----- - # Issue #2 (matchExpected): N occurs in foo's declared argument sort MInt{N}, so it must be - # bound from the argument (Int) and the expected sort MInt{Bool} must be ignored — exactly - # as Java's matchExpected skips params present in a nonterminal sort. The buggy version - # checks the *concrete* actual sorts (which never contain the param N), wrongly also matches - # against the expected sort, and reports a conflict {N: None} from lub(Int, Bool). - ('expected_sort_param_in_arg', _FOO_PROD, (MINT_INT,), MINT_BOOL, {N: INT}, MINT_INT), - # Issue #3 (lub weakness): S gets candidates {Int, Float}; both are subsorts of Number with - # no subsort relation between them, so the LUB is Number. The buggy pairwise fold only - # succeeds when one candidate subsorts the other and reports a conflict {S: None}. - ('lub_common_supersort', _F2_PROD, (INT, FLOAT), None, {S: NUMBER}, NUMBER), +LEAST_UPPER_BOUND_DATA: Final[tuple[tuple[str, tuple[KSort, ...], KSort | None], ...]] = ( + ('singleton', (INT,), INT), + # Comparable: Int <: Number, so the bound is the larger sort. + ('comparable', (INT, NUMBER), NUMBER), + # Siblings with a common supersort: Int and Float are incomparable but both <: Number. + # A pairwise-chain lub would fail here; the set-based one resolves to Number. + ('siblings_common_supersort', (INT, FLOAT), NUMBER), + # Order independence: the same candidate set resolves to Number regardless of order — a + # pairwise fold would succeed for one order and fail for the other. + ('order_independent_a', (INT, NUMBER, FLOAT), NUMBER), + ('order_independent_b', (INT, FLOAT, NUMBER), NUMBER), + # No common supersort: Bool shares no supersort with Int. + ('no_common_supersort', (INT, BOOL), None), ) @pytest.mark.parametrize( - 'test_id,prod,actual_sorts,expected_sort,expected_bindings,expected_inferred_sort', - INFER_SORT_PARAMS_DATA, - ids=[test_id for test_id, *_ in INFER_SORT_PARAMS_DATA], + 'test_id,sorts,expected', + LEAST_UPPER_BOUND_DATA, + ids=[test_id for test_id, *_ in LEAST_UPPER_BOUND_DATA], ) -def test_infer_sort_params( - test_id: str, - prod: KProduction, - actual_sorts: tuple[KSort | None, ...], - expected_sort: KSort | None, - expected_bindings: dict[KSort, KSort | None], - expected_inferred_sort: KSort | None, -) -> None: - bindings, inferred_sort = DEFN.infer_sort_params(prod, actual_sorts, expected_sort) - assert bindings == expected_bindings - assert inferred_sort == expected_inferred_sort - - -def test_infer_sort_params_lub_order_independent() -> None: - # Issue #3: the candidate LUB must not depend on the order candidates were collected. - # f3 binds S from three positions; the candidate set {Int, Number, Float} must resolve to - # Number regardless of argument order. The buggy pairwise fold succeeds for - # (Int, Number, Float) — it reaches Number before encountering Float — but fails for - # (Int, Float, Number) because lub(Int, Float) has no pairwise chain. - bindings_a, _ = DEFN.infer_sort_params(_F3_PROD, (INT, NUMBER, FLOAT)) - bindings_b, _ = DEFN.infer_sort_params(_F3_PROD, (INT, FLOAT, NUMBER)) - assert bindings_a == {S: NUMBER} - assert bindings_b == {S: NUMBER} +def test_least_upper_bound(test_id: str, sorts: tuple[KSort, ...], expected: KSort | None) -> None: + assert DEFN.least_upper_bound(sorts) == expected # ---------------------------------------------------------------------------