Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.PHONY: validate test scan-local-persistence validate-local-agents validate-local-agent-templates
.PHONY: validate test scan-local-persistence validate-local-agents validate-local-agent-templates validate-reasoning-cli

validate: test scan-local-persistence validate-local-agents validate-local-agent-templates
validate: test scan-local-persistence validate-local-agents validate-local-agent-templates validate-reasoning-cli
@test -f README.md
@test -f AGENTS.md
@test -f .github/copilot-instructions.md
Expand All @@ -20,3 +20,9 @@ validate-local-agents:

validate-local-agent-templates:
@python3 scripts/validate_local_agent_templates.py .

validate-reasoning-cli:
@python3 bin/sourceosctl reasoning validate tests/fixtures/reasoning/deterministic >/dev/null
@python3 bin/sourceosctl reasoning inspect tests/fixtures/reasoning/deterministic >/dev/null
@python3 bin/sourceosctl reasoning replay-plan tests/fixtures/reasoning/deterministic >/dev/null
@python3 bin/sourceosctl reasoning events tests/fixtures/reasoning/deterministic >/dev/null
5 changes: 5 additions & 0 deletions bin/sourceosctl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ if len(sys.argv) > 1 and sys.argv[1] == "policy":

sys.exit(policy_main(sys.argv[2:]))

if len(sys.argv) > 1 and sys.argv[1] == "reasoning":
from sourceosctl.commands.reasoning import reasoning_main

sys.exit(reasoning_main(sys.argv[2:]))

if len(sys.argv) > 1 and sys.argv[1] == "network":
from sourceosctl.commands.network import network_main

Expand Down
181 changes: 181 additions & 0 deletions sourceosctl/commands/reasoning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""Read-only Superconscious / SourceOS reasoning artifact helpers."""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path
from typing import Any, Dict, List


REQUIRED_CANONICAL = [
"reasoning-events.sourceos.jsonl",
"reasoning-run.sourceos.json",
"reasoning-receipt.json",
"reasoning-replay-plan.json",
"reasoning-benchmark.json",
]
VALID_REPLAY_CLASSES = {"exact", "best-effort", "evidence-only", "non-replayable-side-effect"}


def _print_json(payload: Dict[str, Any]) -> int:
print(json.dumps(payload, indent=2, sort_keys=True))
return 0


def _load_json(path: Path) -> Dict[str, Any]:
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)


def _load_jsonl(path: Path) -> List[Dict[str, Any]]:
with path.open("r", encoding="utf-8") as handle:
return [json.loads(line) for line in handle if line.strip()]


def validate_run_dir(run_dir: Path) -> Dict[str, Any]:
run_dir = run_dir.resolve()
errors: List[str] = []

for artifact in REQUIRED_CANONICAL:
if not (run_dir / artifact).exists():
errors.append(f"missing canonical artifact: {artifact}")

if errors:
return {"type": "ReasoningValidation", "result": "fail", "runDir": str(run_dir), "errors": errors}

events = _load_jsonl(run_dir / "reasoning-events.sourceos.jsonl")
reasoning_run = _load_json(run_dir / "reasoning-run.sourceos.json")
receipt = _load_json(run_dir / "reasoning-receipt.json")
replay = _load_json(run_dir / "reasoning-replay-plan.json")
benchmark = _load_json(run_dir / "reasoning-benchmark.json")

run_id = reasoning_run.get("id")
if reasoning_run.get("type") != "ReasoningRun":
errors.append("reasoning-run.sourceos.json type must be ReasoningRun")
if reasoning_run.get("safeTrace", {}).get("mode") != "operational-trace-only":
errors.append("safe trace mode must be operational-trace-only")
if reasoning_run.get("safeTrace", {}).get("rawPrivateReasoning") != "not-collected":
errors.append("raw private reasoning must be not-collected")

for index, event in enumerate(events, start=1):
if event.get("type") != "ReasoningEvent":
errors.append(f"event line {index} type must be ReasoningEvent")
if event.get("runRef") != run_id:
errors.append(f"event line {index} runRef mismatch")
if event.get("traceLevel") == "denied":
errors.append(f"event line {index} must not emit denied trace content")

if receipt.get("type") != "ReasoningReceipt" or receipt.get("runRef") != run_id:
errors.append("reasoning receipt mismatch")
if replay.get("type") != "ReasoningReplayPlan" or replay.get("runRef") != run_id:
errors.append("reasoning replay plan mismatch")
if replay.get("replayClass") not in VALID_REPLAY_CLASSES:
errors.append("invalid replay class")
if benchmark.get("type") != "ReasoningBenchmark" or benchmark.get("runRef") != run_id:
errors.append("reasoning benchmark mismatch")
if benchmark.get("passed") is not True:
errors.append("reasoning benchmark must pass")

return {
"type": "ReasoningValidation",
"result": "pass" if not errors else "fail",
"runDir": str(run_dir),
"runId": run_id,
"status": reasoning_run.get("status"),
"eventCount": len(events),
"replayClass": replay.get("replayClass"),
"benchmarkSuite": benchmark.get("suite"),
"benchmarkPassed": benchmark.get("passed"),
"safeTraceMode": reasoning_run.get("safeTrace", {}).get("mode"),
"rawPrivateReasoning": reasoning_run.get("safeTrace", {}).get("rawPrivateReasoning"),
"errors": errors,
}


def validate_cmd(args) -> int:
report = validate_run_dir(Path(args.run_dir))
_print_json(report)
return 0 if report["result"] == "pass" else 1


def inspect_cmd(args) -> int:
run_dir = Path(args.run_dir).resolve()
report = validate_run_dir(run_dir)
if report["result"] != "pass" and not args.allow_invalid:
_print_json(report)
return 1

reasoning_run = _load_json(run_dir / "reasoning-run.sourceos.json")
replay = _load_json(run_dir / "reasoning-replay-plan.json")
benchmark = _load_json(run_dir / "reasoning-benchmark.json")
events = _load_jsonl(run_dir / "reasoning-events.sourceos.jsonl")

return _print_json(
{
"type": "ReasoningInspection",
"runId": reasoning_run.get("id"),
"status": reasoning_run.get("status"),
"task": reasoning_run.get("task"),
"agentRef": reasoning_run.get("agentRef"),
"workspaceRef": reasoning_run.get("workspaceRef"),
"safeTrace": reasoning_run.get("safeTrace"),
"replayClass": replay.get("replayClass"),
"benchmark": {
"suite": benchmark.get("suite"),
"passed": benchmark.get("passed"),
"assertions": benchmark.get("assertions", []),
},
"eventTimeline": [
{
"id": event.get("id"),
"eventType": event.get("eventType"),
"summary": event.get("summary"),
"traceLevel": event.get("traceLevel"),
"trustLevel": event.get("trustLevel"),
}
for event in events
],
}
)


def replay_plan_cmd(args) -> int:
run_dir = Path(args.run_dir).resolve()
return _print_json(_load_json(run_dir / "reasoning-replay-plan.json"))


def events_cmd(args) -> int:
run_dir = Path(args.run_dir).resolve()
events = _load_jsonl(run_dir / "reasoning-events.sourceos.jsonl")
return _print_json({"type": "ReasoningEvents", "runDir": str(run_dir), "events": events})


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="sourceosctl reasoning", description="Inspect and validate SourceOS reasoning artifacts")
sub = parser.add_subparsers(dest="reasoning_command", required=True)

validate_p = sub.add_parser("validate", help="Validate a Superconscious/SourceOS reasoning run directory")
validate_p.add_argument("run_dir")
validate_p.set_defaults(func=validate_cmd)

inspect_p = sub.add_parser("inspect", help="Inspect a reasoning run directory")
inspect_p.add_argument("run_dir")
inspect_p.add_argument("--allow-invalid", action="store_true", default=False)
inspect_p.set_defaults(func=inspect_cmd)

replay_p = sub.add_parser("replay-plan", help="Print the reasoning replay plan")
replay_p.add_argument("run_dir")
replay_p.set_defaults(func=replay_plan_cmd)

events_p = sub.add_parser("events", help="Print reasoning events")
events_p.add_argument("run_dir")
events_p.set_defaults(func=events_cmd)
return parser


def reasoning_main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
return args.func(args) or 0
21 changes: 21 additions & 0 deletions tests/fixtures/reasoning/deterministic/reasoning-benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"id": "urn:srcos:reasoning-benchmark:sourceosctl-fixture",
"type": "ReasoningBenchmark",
"specVersion": "2.0.0",
"runRef": "urn:srcos:reasoning-run:sourceosctl-fixture",
"suite": "m1-deterministic-smoke",
"passed": true,
"assertions": [
{
"name": "run-completed",
"passed": true,
"summary": "The reasoning run reached completed status."
},
{
"name": "safe-trace-only",
"passed": true,
"summary": "The reasoning run emitted safe operational trace metadata only."
}
],
"capturedAt": "2026-05-05T00:00:01Z"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"capturedAt":"2026-05-05T00:00:00Z","eventType":"reasoning.run.created","id":"urn:srcos:reasoning-event:sourceosctl-fixture-created","runRef":"urn:srcos:reasoning-run:sourceosctl-fixture","summary":"Created deterministic Superconscious reasoning run.","traceLevel":"public-safe","trustLevel":"trusted-control-input","type":"ReasoningEvent","specVersion":"2.0.0"}
{"capturedAt":"2026-05-05T00:00:01Z","eventType":"reasoning.run.completed","id":"urn:srcos:reasoning-event:sourceosctl-fixture-completed","runRef":"urn:srcos:reasoning-run:sourceosctl-fixture","summary":"Completed deterministic Superconscious reasoning run.","traceLevel":"public-safe","trustLevel":"trusted-control-input","type":"ReasoningEvent","specVersion":"2.0.0"}
17 changes: 17 additions & 0 deletions tests/fixtures/reasoning/deterministic/reasoning-receipt.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"id": "urn:srcos:receipt:reasoning:sourceosctl-fixture",
"type": "ReasoningReceipt",
"specVersion": "2.0.0",
"runRef": "urn:srcos:reasoning-run:sourceosctl-fixture",
"taskRef": "urn:srcos:reasoning-task:sourceosctl-fixture",
"status": "completed",
"traceHash": "sha256:sourceosctl-fixture-trace",
"coordination": {
"policy": "allowed-safe-deterministic-mode",
"modelRoute": "deterministic-stub-route",
"memory": "proposal-only",
"approval": "not-required"
},
"replayClass": "exact",
"capturedAt": "2026-05-05T00:00:01Z"
}
22 changes: 22 additions & 0 deletions tests/fixtures/reasoning/deterministic/reasoning-replay-plan.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"id": "urn:srcos:reasoning-replay-plan:sourceosctl-fixture",
"type": "ReasoningReplayPlan",
"specVersion": "2.0.0",
"runRef": "urn:srcos:reasoning-run:sourceosctl-fixture",
"replayClass": "exact",
"inputs": {
"taskRef": "urn:srcos:reasoning-task:sourceosctl-fixture",
"taskHash": "sha256:sourceosctl-fixture-task",
"mode": "deterministic-local"
},
"constraints": {
"network": "denied",
"modelCalls": "denied",
"hostState": "unchanged"
},
"stepRefs": [
"urn:srcos:reasoning-event:sourceosctl-fixture-created",
"urn:srcos:reasoning-event:sourceosctl-fixture-completed"
],
"capturedAt": "2026-05-05T00:00:01Z"
}
32 changes: 32 additions & 0 deletions tests/fixtures/reasoning/deterministic/reasoning-run.sourceos.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"id": "urn:srcos:reasoning-run:sourceosctl-fixture",
"type": "ReasoningRun",
"specVersion": "2.0.0",
"status": "completed",
"task": {
"id": "urn:srcos:reasoning-task:sourceosctl-fixture",
"title": "sourceosctl reasoning fixture",
"objectiveHash": "sha256:sourceosctl-fixture-objective"
},
"agentRef": "urn:socioprophet:agent:superconscious-demo",
"workspaceRef": "urn:socioprophet:workspace:superconscious-m1",
"safeTrace": {
"mode": "operational-trace-only",
"rawPrivateReasoning": "not-collected",
"eventCount": 2
},
"eventRefs": [
"urn:srcos:reasoning-event:sourceosctl-fixture-created",
"urn:srcos:reasoning-event:sourceosctl-fixture-completed"
],
"artifactRefs": [
"reasoning-events.sourceos.jsonl",
"reasoning-run.sourceos.json",
"reasoning-receipt.json",
"reasoning-replay-plan.json",
"reasoning-benchmark.json"
],
"adapterRecords": [],
"startedAt": "2026-05-05T00:00:00Z",
"completedAt": "2026-05-05T00:00:01Z"
}
70 changes: 70 additions & 0 deletions tests/test_reasoning_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Unit tests for sourceosctl reasoning commands."""

import json
import os
import pathlib
import shutil
import sys
import tempfile
import unittest

_REPO_ROOT = pathlib.Path(__file__).parent.parent
sys.path.insert(0, str(_REPO_ROOT))

from sourceosctl.commands import reasoning


FIXTURE = _REPO_ROOT / "tests" / "fixtures" / "reasoning" / "deterministic"


class TestReasoningCommands(unittest.TestCase):
def test_reasoning_validate_passes_for_fixture(self):
self.assertEqual(reasoning.reasoning_main(["validate", str(FIXTURE)]), 0)

def test_reasoning_inspect_passes_for_fixture(self):
self.assertEqual(reasoning.reasoning_main(["inspect", str(FIXTURE)]), 0)

def test_reasoning_replay_plan_passes_for_fixture(self):
self.assertEqual(reasoning.reasoning_main(["replay-plan", str(FIXTURE)]), 0)

def test_reasoning_events_passes_for_fixture(self):
self.assertEqual(reasoning.reasoning_main(["events", str(FIXTURE)]), 0)

def test_validate_run_dir_returns_structured_pass(self):
report = reasoning.validate_run_dir(FIXTURE)
self.assertEqual(report["result"], "pass")
self.assertEqual(report["runId"], "urn:srcos:reasoning-run:sourceosctl-fixture")
self.assertEqual(report["replayClass"], "exact")
self.assertTrue(report["benchmarkPassed"])
self.assertEqual(report["rawPrivateReasoning"], "not-collected")

def test_reasoning_validate_fails_closed_when_benchmark_missing(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = pathlib.Path(tmp)
for source in FIXTURE.iterdir():
shutil.copy(source, tmp_path / source.name)
os.unlink(tmp_path / "reasoning-benchmark.json")

report = reasoning.validate_run_dir(tmp_path)

self.assertEqual(report["result"], "fail")
self.assertIn("missing canonical artifact: reasoning-benchmark.json", report["errors"])

def test_reasoning_validate_fails_on_raw_private_reasoning(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = pathlib.Path(tmp)
for source in FIXTURE.iterdir():
shutil.copy(source, tmp_path / source.name)
run_path = tmp_path / "reasoning-run.sourceos.json"
payload = json.loads(run_path.read_text(encoding="utf-8"))
payload["safeTrace"]["rawPrivateReasoning"] = "present"
run_path.write_text(json.dumps(payload), encoding="utf-8")

report = reasoning.validate_run_dir(tmp_path)

self.assertEqual(report["result"], "fail")
self.assertIn("raw private reasoning must be not-collected", report["errors"])


if __name__ == "__main__":
unittest.main()
Loading