diff --git a/src/orc/cli_commands/verify.py b/src/orc/cli_commands/verify.py index 270838f..17be062 100644 --- a/src/orc/cli_commands/verify.py +++ b/src/orc/cli_commands/verify.py @@ -45,6 +45,12 @@ default=None, help="Route mode by domain hint (e.g. 'financial', 'clinical', 'legal')", ) +@click.option( + "--mode", + default=None, + type=click.Choice(["evidence", "judgment", "binary", "decomposed", "arithmetic", "tiered"]), + help="Verify mode (overrides --domain routing)", +) @click.option("--yes", "-y", is_flag=True, help="Skip the confirmation prompt for batch verify") @click.option("--json", "as_json", is_flag=True, help="Emit raw JSON instead of formatted output") def verify_command( @@ -55,6 +61,7 @@ def verify_command( from_file: str | None, from_url: str | None, domain: str | None, + mode: str | None, yes: bool, as_json: bool, ) -> None: @@ -75,12 +82,13 @@ def verify_command( model=model, k=k, domain=domain, + mode=mode, yes=yes, as_json=as_json, ) return - _verify_one(ws, claim=claim, model=model, k=k, domain=domain, as_json=as_json) + _verify_one(ws, claim=claim, model=model, k=k, domain=domain, mode=mode, as_json=as_json) def _verify_one( @@ -90,6 +98,7 @@ def _verify_one( model: str | None, k: int | None, domain: str | None, + mode: str | None, as_json: bool, ) -> None: spec = directives.get("research") @@ -101,6 +110,8 @@ def _verify_one( kwargs["k"] = k if domain is not None: kwargs["domain"] = domain + if mode is not None: + kwargs["mode"] = mode with open_run(ws, directive="research", skill="verify_claim", inputs=dict(kwargs)) as run: run.record_effective_kwargs(kwargs) @@ -124,6 +135,7 @@ def _verify_from_document( model: str | None, k: int | None, domain: str | None, + mode: str | None, yes: bool, as_json: bool, ) -> None: @@ -176,6 +188,8 @@ def _verify_from_document( kwargs["k"] = k if domain is not None: kwargs["domain"] = domain + if mode is not None: + kwargs["mode"] = mode with open_run(ws, directive="research", skill="verify_claim", inputs=dict(kwargs)) as run: run.record_effective_kwargs(kwargs) try: diff --git a/tests/unit/test_verify_cli.py b/tests/unit/test_verify_cli.py new file mode 100644 index 0000000..3d552a9 --- /dev/null +++ b/tests/unit/test_verify_cli.py @@ -0,0 +1,36 @@ +"""`orc verify` CLI: the --mode flag routes to the verify mode.""" + +from __future__ import annotations + +from click.testing import CliRunner + +from orc.cli import main +from orc.ingest.pipeline import ingest as do_ingest +from orc.llm import client as client_module +from orc.storage import workspace as ws_module +from tests._fake_llm import FakeAnthropic, FakeContentBlock, FakeResponse + + +def _binary(*, faithful: bool, confidence: float) -> FakeResponse: + return FakeResponse(content=[FakeContentBlock( + type="tool_use", name="record_binary_verdict", + input={"faithful": faithful, "confidence": confidence, "reasoning": "r"})]) + + +def test_verify_mode_flag_routes_to_binary(orc_home, tmp_path, monkeypatch) -> None: + ws = ws_module.create("demo") + corpus = tmp_path / "corpus" + corpus.mkdir() + (corpus / "doc.md").write_text("# Doc\n\nThe sky is blue on a clear day.\n") + do_ingest(ws, str(corpus)) + fake = FakeAnthropic(responses=[_binary(faithful=True, confidence=0.9)]) + monkeypatch.setattr(client_module, "_client", fake) + monkeypatch.setattr(client_module, "_factory", None) + + res = CliRunner().invoke( + main, ["verify", "The sky is blue", "-w", "demo", "--mode", "binary", "--json"] + ) + assert res.exit_code == 0, res.output + assert "No such option" not in res.output + import json + assert json.loads(res.output)["label"] in {"supported", "not_found"}