diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 25466445..7981e5fe 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,6 +46,18 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt + - name: Run make init (full CodeQL install) + if: matrix.python-version == '3.14' + env: + GITHUB_TOKEN: ${{ github.token }} + run: make init + + - name: Run make init (skip CodeQL install) + if: matrix.python-version != '3.14' + env: + CODEQL_SKIP_INSTALL: 1 + run: make init + - name: Run pytest with coverage id: pytest run: | diff --git a/.gitignore b/.gitignore index 50a050ba..47faff15 100644 --- a/.gitignore +++ b/.gitignore @@ -220,6 +220,13 @@ __marimo__/ tmp/* !tmp/.gitkeep +# CodeQL managed install and cache +.tools/ +.cache/codeql/ + +# CodeQL runtime artifacts +itemdb/codeql/* + # CodeCome runtime artifacts itemdb/index.md diff --git a/.project/codeql-integration-plan.md b/.project/codeql-integration-plan.md new file mode 100644 index 00000000..f0f18bfd --- /dev/null +++ b/.project/codeql-integration-plan.md @@ -0,0 +1,1106 @@ +# CodeQL Integration Plan + +Status: Implementation complete +Branch: `wip/codeql-integration-plan` +Scope: install, config, pack resolver, runner, SARIF normalization, risk import, pipeline orchestration. + +## Goals + +Integrate CodeQL into CodeCome as a first-class static-analysis capability used by the normal workflow, not as a manual side tool. + +The integration should: + +- run automatically during Phase 1 unless explicitly disabled; +- run after the model has produced an initial target/build/language profile; +- enrich `itemdb/notes/file-risk-index.yml` and related reconnaissance notes; +- feed Phase 2 candidate hypothesis generation; +- inject per-file CodeQL context into `make sweep`; +- support official, GitHub Security Lab, Trail of Bits, coding standards, and local CodeCome query packs; +- keep the implementation simple and maintainable; +- avoid over-engineering such as external phase-definition YAMLs or a generic workflow engine. + +## Non-goals + +- Do not make CodeQL a replacement for model reasoning, counter-analysis, or validation. +- Do not confirm findings solely because CodeQL reported an alert. +- Do not require CodeQL for every target or make failures fatal by default. +- Do not introduce a `config/` directory just for CodeQL. +- Do not add declarative YAML phase orchestration. +- Do not keep the old raw `opencode run` bypass inside official phase targets. + +## Key design decisions + +### 1. Use `templates/codeql-packs.yml` + +Use a small, easy-to-maintain catalog at: + +```text +./templates/codeql-packs.yml +``` + +This avoids adding a new `config/` directory and keeps the pack mapping close to other CodeCome templates/schemas. + +The catalog should be a simple mapping from CodeQL language id to pack profile names and package references. + +### 2. Use `tools/codeql.py` as the dedicated CodeQL CLI + +Prefer: + +```bash +tools/codeql.py install +tools/codeql.py check +tools/codeql.py run --plan itemdb/notes/codeql-plan.yml +tools/codeql.py normalize +tools/codeql.py import-risk +tools/codeql.py create-candidates +tools/codeql.py context --file src/path/file.ext +tools/codeql.py check-artifacts +``` + +rather than: + +```bash +tools/codecome.py codeql ... +``` + +Rationale: + +- `tools/codecome.py` is currently a small workspace helper for `check`, `status`, and `next-id`. +- CodeQL will have enough subcommands and internal logic to deserve a focused CLI wrapper. +- The harness can call `tools/codeql.py` directly without bloating `tools/codecome.py`. +- This does not prevent a future CLI consolidation if/when `tools/codecome.py` becomes the single public entrypoint. + +Implementation shape: + +```text +tools/codeql.py # thin argparse CLI + +tools/codeql/ + __init__.py + config.py # env/config resolution + install.py # managed CodeQL CLI install + packs.py # templates/codeql-packs.yml resolver + runner.py # database create/analyze orchestration + sarif.py # SARIF loading/extraction helpers + normalize.py # SARIF -> normalized alerts + risk.py # normalized alerts -> file-risk-index enrichment + candidates.py # normalized alerts -> candidate findings/briefing + context.py # per-file sweep context + artifacts.py # manifest/check-artifact helpers +``` + +### 3. `run-agent.py` remains the CodeCome harness + +`run-agent.py` is not just a phase runner. It is the CodeCome harness used for phases and chat mode. + +For this integration, extend the existing harness directly. Do not introduce a YAML workflow definition or a generic step engine. + +Phase orchestration should be explicit Python code, for example: + +```python +def run_phase_1(args: Args) -> int: + run_gate("1") + + run_agent_step( + phase="1a", + label="Target Profile", + agent="recon", + prompt_file="prompts/phase-1a-profile.md", + ) + run_gate("1a") + + run_codeql_phase_1() + run_codeql_artifact_gate() + + run_agent_step( + phase="1b", + label="CodeQL-assisted Reconnaissance", + agent="recon", + prompt_file="prompts/phase-1b-codeql-recon.md", + ) + run_gate("1b") + + run_agent_step( + phase="1c", + label="Sandbox Bootstrap", + agent="recon", + prompt_file="prompts/phase-1c-sandbox.md", + ) + run_gate("1c") + + return 0 +``` + +Chat mode should continue to use the existing chat path and should not be forced into phase semantics. + +### 4. Remove `CODECOME_USE_WRAPPER` + +Remove the raw `opencode run` bypass from official phase targets immediately. + +Official phases must always pass through the CodeCome harness because the harness is now responsible for: + +- subphase orchestration; +- CodeQL execution; +- deterministic gates; +- candidate briefing/precreation; +- prompt enrichment; +- run logs and artifacts; +- future deterministic tooling. + +If a raw debug path is useful, add an explicit non-workflow target such as: + +```bash +make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md +``` + +but do not keep raw mode as an alternative implementation of `make phase-*`. + +## Updated Phase 1 flow + +Use clear subphase names: + +```text +Phase 1a — Target profile +Phase 1b — CodeQL-assisted reconnaissance +Phase 1c — Sandbox bootstrap +``` + +CodeQL runs between Phase 1a and Phase 1b. + +```text +make phase-1 + -> tools/run-agent.py --phase 1 + + 1. gate-check phase 1 + + 2. model: Phase 1a target profile + outputs: + itemdb/notes/target-profile.md + itemdb/notes/build-model.md + itemdb/notes/codeql-plan.yml + + 3. gate-check phase 1a + verifies: + - required 1a outputs exist + - codeql-plan.yml is valid YAML + - codeql-plan.yml has the required fields + - no accidental findings were created + + 4. deterministic CodeQL step + command: + tools/codeql.py run --plan itemdb/notes/codeql-plan.yml + outputs: + itemdb/codeql/run-manifest.yml + itemdb/codeql/selected-query-packs.yml + itemdb/codeql/sarif/*.sarif + itemdb/codeql/normalized/alerts.yml + itemdb/codeql/normalized/file-signals.yml + itemdb/codeql/codeql-summary.md + + 5. CodeQL artifact gate + verifies: + - skipped/soft-failed/running outcome is recorded clearly + - normalized artifacts exist when analysis succeeded + - run-manifest.yml exists even on skip/failure + + 6. model: Phase 1b CodeQL-assisted reconnaissance + reads: + - 1a notes + - CodeQL artifacts + outputs: + itemdb/notes/attack-surface.md + itemdb/notes/execution-model.md + itemdb/notes/trust-boundaries.md + itemdb/notes/data-flow.md + itemdb/notes/validation-model.md + itemdb/notes/interesting-files.md + itemdb/notes/file-risk-index.yml + itemdb/notes/security-assumptions.md + + 7. gate-check phase 1b + verifies: + - required recon notes exist + - file-risk-index.yml is valid + - scores are 1..5 + - paths are workspace-relative and under src/ + - no template placeholder entries remain + - no accidental findings were created + + 8. model: Phase 1c sandbox bootstrap + outputs: + sandbox/ + itemdb/notes/sandbox-plan.md + + 9. gate-check phase 1c + verifies: + - sandbox status/provenance + - sandbox validation result + - final frontmatter/checks +``` + +## Phase 1a prompt + +Create: + +```text +prompts/phase-1a-profile.md +``` + +Responsibilities: + +- broad source tree mapping; +- language/framework detection; +- build model detection; +- primary/secondary target identification; +- preliminary attack-surface hints; +- generate `itemdb/notes/codeql-plan.yml`; +- do not create vulnerability findings; +- do not bootstrap sandbox; +- do not run CodeQL manually. + +Required outputs: + +```text +itemdb/notes/target-profile.md +itemdb/notes/build-model.md +itemdb/notes/codeql-plan.yml +``` + +## Phase 1b prompt + +Create: + +```text +prompts/phase-1b-codeql-recon.md +``` + +Responsibilities: + +- read the Phase 1a outputs; +- read CodeQL artifacts if present; +- treat CodeQL results as reconnaissance evidence, not proof of vulnerability; +- complete the Phase 1 reconnaissance notes; +- enrich `file-risk-index.yml` with CodeQL file signals; +- prepare Phase 2 and sweep focus. + +Required outputs: + +```text +itemdb/notes/attack-surface.md +itemdb/notes/execution-model.md +itemdb/notes/trust-boundaries.md +itemdb/notes/data-flow.md +itemdb/notes/validation-model.md +itemdb/notes/interesting-files.md +itemdb/notes/file-risk-index.yml +itemdb/notes/security-assumptions.md +``` + +## Phase 1c prompt + +Create: + +```text +prompts/phase-1c-sandbox.md +``` + +This should contain the sandbox bootstrap portion currently embedded in `prompts/phase-1-recon.md`. + +Responsibilities: + +- inspect current sandbox state; +- select/apply/adapt a sandbox template; +- author missing helper scripts; +- run sandbox validation; +- write `itemdb/notes/sandbox-plan.md`; +- leave `sandbox/` ready for Phase 2/4/5 where possible. + +## `codeql-plan.yml` template + +Add: + +```text +templates/codeql-plan.yml +``` + +Example: + +```yaml +schema_version: 1 +generated_by: "phase-1a-profile" + +source_path: "./src" +recommended: true + +languages: + - id: "python" + confidence: "HIGH" + build_mode: "none" + build_command: null + packs: + - "official" + - "github-security-lab" + + - id: "javascript-typescript" + confidence: "MEDIUM" + build_mode: "none" + build_command: null + packs: + - "official" + +exclude: + - "src/**/tests/**" + - "src/**/fixtures/**" + - "src/**/vendor/**" + - "src/**/node_modules/**" + +notes: + - "Primary target appears to be a Python API service." +``` + +C/C++ example: + +```yaml +schema_version: 1 +generated_by: "phase-1a-profile" + +source_path: "./src" +recommended: true + +languages: + - id: "c-cpp" + confidence: "HIGH" + build_mode: "manual" + build_command: "make -C src" + packs: + - "official" + - "github-security-lab" + - "trailofbits" + - "coding-standards" + +exclude: + - "src/**/tests/**" + - "src/**/vendor/**" +``` + +Allowed pack profile names: + +```text +official +github-security-lab +trailofbits +coding-standards +local +``` + +The model chooses profiles, not exact package names. The harness resolves profiles via `templates/codeql-packs.yml`. + +## `templates/codeql-packs.yml` + +Add: + +```text +templates/codeql-packs.yml +``` + +Keep it intentionally simple: + +```yaml +schema_version: 1 + +packs: + python: + official: + - "codeql/python-queries" + github-security-lab: + - "githubsecuritylab/codeql-python-queries" + local: + - "./queries/codeql/python" + + javascript-typescript: + official: + - "codeql/javascript-queries" + github-security-lab: + - "githubsecuritylab/codeql-javascript-queries" + local: + - "./queries/codeql/javascript" + + c-cpp: + official: + - "codeql/cpp-queries" + github-security-lab: + - "githubsecuritylab/codeql-cpp-queries" + trailofbits: + - "trailofbits/cpp-queries" + coding-standards: + - "codeql/coding-standards-cpp" + local: + - "./queries/codeql/cpp" + + go: + official: + - "codeql/go-queries" + github-security-lab: + - "githubsecuritylab/codeql-go-queries" + trailofbits: + - "trailofbits/go-queries" + local: + - "./queries/codeql/go" + + csharp: + official: + - "codeql/csharp-queries" + github-security-lab: + - "githubsecuritylab/codeql-csharp-queries" + local: + - "./queries/codeql/csharp" + + java-kotlin: + official: + - "codeql/java-queries" + github-security-lab: + - "githubsecuritylab/codeql-java-queries" + local: + - "./queries/codeql/java" + +candidate_policy: + official: + allow_precreate: true + github-security-lab: + allow_precreate: true + trailofbits: + allow_precreate: true + coding-standards: + allow_precreate: false + local: + allow_precreate: true +``` + +Notes: + +- Some package names may require verification during implementation with `codeql pack download` / `codeql resolve packs`. +- Missing/unavailable packs should be recorded as warnings in `run-manifest.yml`, not crash the phase under soft fail policy. +- `coding-standards` packs should enrich risk and sweep context by default, but should not precreate findings unless explicitly allowed later. + +## CodeQL installation and `make init` + +Rename `make venv` to `make init`, keeping `venv` as an alias. + +```makefile +.PHONY: init venv venv-check + +init: + @python3 -m venv .venv + @$(PYTHON) -m pip install --upgrade pip + @$(PYTHON) -m pip install --no-input -r requirements.txt + @if [ "$$CODEQL" != "0" ] && [ "$$CODEQL_SKIP_INSTALL" != "1" ]; then \ + $(PYTHON) tools/codeql.py install; \ + fi + +venv: init +``` + +Install location: + +```text +.tools/codeql// +.tools/codeql/current -> +.cache/codeql/ +``` + +Update `.gitignore`: + +```text +.tools/ +.cache/ +``` + +Environment controls: + +```bash +CODEQL=0 make init +CODEQL_SKIP_INSTALL=1 make init +CODEQL_VERSION= make init +CODEQL_FORCE_INSTALL=1 make init +``` + +## CodeQL runtime controls + +Supported escape hatches: + +```bash +CODEQL=0 make phase-1 +CODEQL_SKIP=1 make phase-1 +CODEQL_FAIL_POLICY=hard make phase-1 +CODEQL_PACKS=0 make phase-1 +CODEQL_COMMUNITY_PACKS=0 make phase-1 +CODEQL_CANDIDATES=off make phase-2 +CODEQL_CANDIDATES=briefing make phase-2 +CODEQL_CANDIDATES=precreate make phase-2 +``` + +Resolution priority: + +```text +environment variables > codecome.yml > defaults +``` + +Default policy: + +```text +CodeQL enabled: yes +Failure policy: soft +Candidate mode: precreate +Community packs: enabled +``` + +## `codecome.yml` additions + +Keep this compact; do not embed the full pack map in `codecome.yml`. + +```yaml +static_analysis: + codeql: + enabled: true + fail_policy: "soft" + pack_catalog: "./templates/codeql-packs.yml" + + install: + managed: true + version: "latest" + path: ".tools/codeql/current/codeql" + + output_dir: "./itemdb/codeql" + database_dir: "./itemdb/codeql/databases" + cache_dir: "./.cache/codeql" + + phase_1: + enabled: true + + phase_2: + enabled: true + candidate_mode: "precreate" + max_candidates: 10 + + sweep: + enabled: true + inject_context: true +``` + +## CodeQL artifacts + +Use this layout: + +```text +itemdb/codeql/ + run-manifest.yml + selected-query-packs.yml + codeql-summary.md + + databases/ + python/ + c-cpp/ + + sarif/ + python.official.sarif + python.github-security-lab.sarif + cpp.trailofbits.sarif + cpp.coding-standards.sarif + + normalized/ + alerts.yml + file-signals.yml + candidate-findings.yml +``` + +`run-manifest.yml` should always exist after a CodeQL step, even when CodeQL was skipped or soft-failed. + +Example: + +```yaml +schema_version: 1 +phase: "phase-1" +status: "completed" # completed | skipped | soft-failed | failed +codeql_enabled: true +codeql_version: "2.x.y" +started_at: "YYYY-MM-DDTHH:MM:SSZ" +finished_at: "YYYY-MM-DDTHH:MM:SSZ" +plan_file: "itemdb/notes/codeql-plan.yml" +pack_catalog: "templates/codeql-packs.yml" +languages: + - "python" +warnings: [] +failures: [] +``` + +## SARIF normalization + +Do not expose raw SARIF directly to model prompts. Normalize it first. + +`itemdb/codeql/normalized/alerts.yml`: + +```yaml +schema_version: 1 +generated_by: "codeql-normalize" +codeql_version: "2.x.y" +target: "codecome-target" + +alerts: + - id: "CQ-0001" + fingerprint: "..." + language: "python" + pack_profile: "github-security-lab" + pack: "githubsecuritylab/codeql-python-queries" + rule_id: "py/path-injection" + rule_name: "Uncontrolled data used in path expression" + severity: "warning" + security_severity: "7.5" + precision: "high" + kind: "path-problem" + primary_location: + path: "src/api/upload.py" + start_line: 88 + end_line: 88 + flow: + source: + path: "src/api/routes.py" + line: 42 + label: "request file name" + sink: + path: "src/api/upload.py" + line: 88 + label: "filesystem write" + steps: + - path: "src/api/routes.py" + line: 42 + message: "..." + - path: "src/api/upload.py" + line: 88 + message: "..." + mapped: + category: "Path traversal" + suggested_validation_methods: + - "static_proof" + - "http_exploit" +``` + +`file-signals.yml`: + +```yaml +schema_version: 1 +files: + - path: "src/api/upload.py" + codeql_score_boost: 2 + suggested_sweep: true + alerts: + total: 3 + path_problems: 1 + high_precision: 1 + rules: + - "py/path-injection" +``` + +## File risk enrichment + +`tools/codeql.py import-risk` should enrich `itemdb/notes/file-risk-index.yml`. + +Rules: + +- Preserve existing entries and model-authored reasons. +- Do not duplicate file entries. +- Cap scores at 5. +- Explain every score boost in `reasons`. +- Add an optional `external_signals.codeql` block. + +Example: + +```yaml +- path: "src/api/upload.py" + score: 5 + confidence: "HIGH" + target_area: "file upload API" + reasons: + - "Handles attacker-controlled multipart upload data." + - "CodeQL signal: py/path-injection reports user-controlled path reaching filesystem sink." + sources: + - "HTTP multipart filename" + sinks: + - "filesystem write" + trust_boundaries: + - "remote client -> server filesystem" + suggested_vulnerability_classes: + - "Path traversal" + - "File upload vulnerabilities" + suggested_skills: + - "web-security" + suggested_validation_methods: + - "static_proof" + - "http_exploit" + external_signals: + codeql: + alerts: 3 + path_problems: 1 + highest_precision: "high" + rules: + - "py/path-injection" +``` + +## Phase 2 candidate handling + +Before the Phase 2 model invocation, the harness should call: + +```bash +tools/codeql.py create-candidates +``` + +Inputs: + +```text +itemdb/codeql/normalized/alerts.yml +itemdb/codeql/normalized/file-signals.yml +itemdb/notes/file-risk-index.yml +itemdb/findings/**/CC-*.md +``` + +Outputs: + +```text +itemdb/codeql/normalized/candidate-findings.yml +itemdb/notes/codeql-candidate-findings.md +``` + +Candidate modes: + +```text +off -> do nothing +precreate -> write candidate finding files +precreate -> create filtered PENDING findings before model runs +``` + +Default: `precreate`. + +Precreate only when: + +- candidate is not under ignored/test/vendor/generated paths; +- a CodeCome category can be inferred; +- affected files are concrete; +- there is a plausible sink or security decision; +- the candidate is from an allowed pack profile; +- max candidate limit is not exceeded. + +Phase 2 prompt must require candidate disposition. + +Add to `prompts/phase-2-audit.md`: + +```md +## CodeQL candidate handling + +If `itemdb/notes/codeql-candidate-findings.md` or +`itemdb/codeql/normalized/candidate-findings.yml` exists, you must +account for each candidate. + +For each candidate, choose one: + +- create or complete a PENDING finding, +- merge it into an existing finding, +- defer it to `make sweep` with a concrete file target, +- reject it as non-security-relevant or out of scope. + +Write the decision table to: + + itemdb/notes/codeql-candidate-disposition.md +``` + +Add a Phase 2 gate: + +- if candidate findings exist, `itemdb/notes/codeql-candidate-disposition.md` must exist; +- each candidate id should appear in the disposition table; +- created findings must pass frontmatter validation. + +## Sweep context injection + +`tools/run-sweep.py` should request per-file CodeQL context before writing the temporary sweep prompt. + +Command: + +```bash +tools/codeql.py context --file src/path/file.ext +``` + +If context exists, inject a section like: + +```md +## CodeQL context for this file + +Relevant alerts: + +- `CQ-0001` / `py/path-injection` + - pack: `githubsecuritylab/codeql-python-queries` + - source: `src/api/routes.py:42` + - sink: `src/api/upload.py:88` + - summary: user-controlled path reaches filesystem write + +Treat this as a static-analysis hint, not proof. Verify attacker control, +reachability, sanitizers, authorization, and impact before creating a finding. +``` + +Add `SWEEP_ARGS` support to the Makefile: + +```makefile +sweep: venv-check + @if [ -n "$(FILE)" ]; then \ + $(PYTHON) tools/run-sweep.py --file "$(FILE)" $(SWEEP_ARGS); \ + else \ + $(PYTHON) tools/run-sweep.py $(SWEEP_ARGS); \ + fi +``` + +## Makefile changes + +### Remove raw wrapper mode + +Remove all `CODECOME_USE_WRAPPER` branches from phase targets. + +Phase targets become: + +```makefile +phase-1: venv-check + @$(PYTHON) tools/run-agent.py --phase 1 + +phase-2: venv-check + @$(PYTHON) tools/run-agent.py --phase 2 + +phase-3: venv-check + @$(PYTHON) tools/run-agent.py --phase 3 + +phase-4: venv-check + @test -n "$(FINDING)" || (...) + @$(PYTHON) tools/run-agent.py --phase 4 --finding "$(FINDING)" + +phase-5: venv-check + @test -n "$(FINDING)" || (...) + @$(PYTHON) tools/run-agent.py --phase 5 --finding "$(FINDING)" + +phase-6: venv-check + @$(PYTHON) tools/run-agent.py --phase 6 +``` + +### Optional raw debug target + +```makefile +opencode-raw: + @test -n "$(AGENT)" || (echo "AGENT is required" && exit 1) + @test -n "$(PROMPT_FILE)" || (echo "PROMPT_FILE is required" && exit 1) + @opencode run --agent "$(AGENT)" "$$(cat "$(PROMPT_FILE)")" +``` + +## Gates + +Extend `tools/gate-check.py` with subphase gates. + +### `gate-check.py 1a` + +Checks: + +- `itemdb/notes/target-profile.md` exists; +- `itemdb/notes/build-model.md` exists; +- `itemdb/notes/codeql-plan.yml` exists; +- `codeql-plan.yml` is valid YAML; +- if `recommended: true`, at least one language entry exists; +- each language entry has `id`, `confidence`, `build_mode`, `packs`; +- no new findings were created during 1a. + +### `gate-check.py 1b` + +Checks: + +- all required recon notes exist; +- `itemdb/notes/file-risk-index.yml` exists; +- YAML is valid; +- `schema_version` is present; +- `files` is a list; +- all file paths are workspace-relative; +- all scores are integers 1..5; +- template placeholder entry is gone; +- no new findings were created during 1b. + +### `gate-check.py 1c` + +Checks: + +- `itemdb/notes/sandbox-plan.md` exists; +- sandbox status/provenance exists or clear halt protocol exists; +- sandbox validation was attempted or static-only/nested-virt justification exists; +- frontmatter check passes. + +### CodeQL artifact gate + +Can live in `tools/codeql.py check-artifacts` rather than `gate-check.py`. + +Checks: + +- `run-manifest.yml` exists after a CodeQL step; +- manifest status is one of `completed`, `skipped`, `soft-failed`, `failed`; +- if completed, normalized outputs exist; +- if skipped/soft-failed, reason is recorded; +- no raw exception trace is left as the only diagnostic. + +## Candidate finding frontmatter + +If precreate mode is used, generated findings should include the normal finding frontmatter plus optional origin/static-analysis metadata if the current frontmatter checker allows it. + +Preferred fields if allowed: + +```yaml +origin: + - "codeql" + +static_analysis: + codeql: + alerts: + - "CQ-0001" + rules: + - "py/path-injection" + packs: + - "githubsecuritylab/codeql-python-queries" + sarif: + - "itemdb/codeql/sarif/python.github-security-lab.sarif" +``` + +If the frontmatter checker rejects extra fields, place this information in the finding body under: + +```md +# Static-analysis evidence +``` + +Do not weaken the frontmatter gate to accept arbitrary fields without a deliberate schema update. + +## Testing plan + +Add fixtures: + +```text +tests/fixtures/codeql/ + sarif-path-problem.json + sarif-local-problem.json + sarif-multiple-packs.json + file-risk-index.base.yml + codeql-plan.python.yml + codeql-plan.cpp.yml + codeql-packs.yml +``` + +Add tests: + +```text +tests/test_codeql_packs.py +tests/test_codeql_normalize.py +tests/test_codeql_risk.py +tests/test_codeql_candidates.py +tests/test_codeql_context.py +tests/test_phase1_subphase_gates.py +``` + +Required cases: + +- pack catalog resolves requested profiles by language; +- unavailable pack profile is reported clearly; +- SARIF path-problem extracts source/sink/steps; +- local SARIF problem without flow is normalized without crashing; +- file risk enrichment preserves existing entries; +- file risk enrichment caps score at 5; +- ignored paths do not create candidates; +- coding-standards alerts enrich risk but do not precreate candidates by default; +- context lookup returns alerts where file is primary or related location; +- Phase 1a gate rejects missing/invalid `codeql-plan.yml`; +- Phase 1b gate rejects placeholder file-risk-index entries. + +## Implementation PR sequence + +### PR 1 — Harness simplification and init rename + +- Remove `CODECOME_USE_WRAPPER` branches from Makefile. +- Make all `phase-*` targets call `tools/run-agent.py`. +- Add optional `opencode-raw` debug target. +- Rename `make venv` to `make init`. +- Keep `venv: init` alias. +- Update help text. + +### PR 2 — Split Phase 1 into 1a/1b/1c + +- Add prompts: + - `prompts/phase-1a-profile.md` + - `prompts/phase-1b-codeql-recon.md` + - `prompts/phase-1c-sandbox.md` +- Add `templates/codeql-plan.yml`. +- Extend `run-agent.py` with explicit Phase 1 orchestration. +- Add `gate-check.py 1a`, `1b`, `1c`. + +### PR 3 — CodeQL CLI and install/check + +- Add `tools/codeql.py`. +- Add `tools/codeql/` modules. +- Implement `install` and `check`. +- Install CodeQL into `.tools/codeql/`. +- Add `.tools/` and `.cache/` to `.gitignore`. +- Respect `CODEQL=0` and `CODEQL_SKIP_INSTALL=1`. + +### PR 4 — Pack catalog and resolver + +- Add `templates/codeql-packs.yml`. +- Implement pack resolver. +- Support profiles: + - `official` + - `github-security-lab` + - `trailofbits` + - `coding-standards` + - `local` +- Write `selected-query-packs.yml`. +- Validate pack catalog schema. + +### PR 5 — CodeQL run and SARIF normalization + +- Implement `tools/codeql.py run`. +- Read `itemdb/notes/codeql-plan.yml`. +- Create databases per language. +- Analyze with selected packs. +- Normalize SARIF. +- Write: + - `run-manifest.yml` + - `alerts.yml` + - `file-signals.yml` + - `codeql-summary.md` +- Implement soft/hard fail policy. + +### PR 6 — Phase 1 CodeQL integration + +- Call CodeQL between Phase 1a and Phase 1b. +- Add CodeQL artifact gate. +- Ensure Phase 1b prompt reads CodeQL artifacts. +- Enrich file-risk-index from CodeQL signals. + +### PR 7 — Phase 2 candidates + +- Implement `tools/codeql.py create-candidates`. +- Generate `candidate-findings.yml`. +- Generate `codeql-candidate-findings.md`. +- Support `off`, `briefing`, and `precreate` modes. +- Update Phase 2 prompt with candidate disposition requirement. +- Add gate for candidate disposition. + +### PR 8 — Sweep context + +- Implement `tools/codeql.py context --file`. +- Inject context into `tools/run-sweep.py` prompts. +- Add `SWEEP_ARGS` to Makefile. +- Update sweep prompt with CodeQL context rules. + +## Review checklist before implementation + +- Confirm `tools/codeql.py` vs `tools/codecome.py codeql` decision. +- Confirm exact CodeQL install source/version policy. +- Verify package names in `templates/codeql-packs.yml`. +- Confirm default `CODEQL_CANDIDATES` mode: `precreate`. +- Confirm whether finding frontmatter schema should accept `origin` / `static_analysis`. +- Confirm whether `coding-standards` should ever precreate findings by default. +- Confirm whether Phase 1c sandbox prompt should be copied from current `phase-1-recon.md` or rewritten tighter. diff --git a/Makefile b/Makefile index 2b7c1260..e0c65e0a 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -.PHONY: help venv venv-check check status next-id frontmatter tests test-parity itemdb-reset index report +.PHONY: help init venv env-check check status next-id frontmatter tests test-parity itemdb-reset codeql-clean index report .PHONY: findings findings-create findings-move findings-evidence findings-package -.PHONY: phase-1 phase-2 phase-3 phase-4 phase-5 phase-6 validate-all exploit-all +.PHONY: phase-1 phase-2 phase-3 phase-4 phase-5 phase-6 validate-all exploit-all opencode-raw .PHONY: sandbox-setup sandbox-check sandbox-up sandbox-down sandbox-shell sandbox-logs sandbox-clean sandbox-reset sandbox-build sandbox-test .PHONY: sandbox-list sandbox-inspect sandbox-detect sandbox-bootstrap sandbox-validate sandbox-regenerate sandbox-status show-model @@ -12,25 +12,19 @@ export PATH := $(CURDIR)/.venv/bin:$(PATH) export PROMPT_EXTRA export PROMPT_EXTRA_FILE -CHAT ?= 0 -ifeq ($(CHAT),1) -WRAPPER_ARGS += --chat -endif - -# Env vars injected into opencode serve (wrapper mode) and opencode run (raw mode) -CODECOME_OPENCODE_ENV_EXPORT := OPENCODE_ENABLE_EXA=1 - # Pass --thinking to raw opencode run when CODECOME_THINKING=1 OPENCODE_THINKING_FLAG := $(if $(filter 1,$(CODECOME_THINKING)),--thinking,) ifndef NO_COLOR RED := \033[31m +GREEN := \033[32m YELLOW := \033[33m CYAN := \033[36m BOLD := \033[1m RESET := \033[0m else RED := +GREEN := YELLOW := CYAN := BOLD := @@ -44,7 +38,7 @@ help: @printf "\n" @printf " $(BOLD)$(CYAN)Workflow phases:$(RESET)\n" @printf "\n" - @printf " $(BOLD)make venv$(RESET) Create/update repo-local virtualenv\n" + @printf " $(BOLD)make init$(RESET) Create/update repo-local virtualenv\n" @printf " $(BOLD)make phase-1$(RESET) Run reconnaissance\n" @printf " $(BOLD)make phase-2$(RESET) Run hypothesis generation\n" @printf " $(BOLD)make phase-3$(RESET) Run counter-analysis\n" @@ -60,11 +54,9 @@ help: @printf " $(BOLD)make sweep$(RESET) Run deep sweep on top-scoring files\n" @printf " $(BOLD)make sweep FILE=\"src/foo.*\"$(RESET) Run deep sweep on specific file(s)\n" @printf "\n" - @printf " $(BOLD)$(CYAN)Wrapper controls:$(RESET)\n" + @printf " $(BOLD)$(CYAN)Phase controls:$(RESET)\n" @printf "\n" - @printf " $(BOLD)CODECOME_USE_WRAPPER=0$(RESET) Bypass styled wrapper and use raw opencode run\n" - @printf " $(BOLD)CODECOME_THINKING=1$(RESET) Show model reasoning/thinking blocks in output\n" - @printf " $(BOLD)OPENCODE_ARGS='...'$(RESET) Extra flags for opencode run (forwarded directly when CODECOME_USE_WRAPPER=0; in wrapper mode only --model, --variant and --thinking are used)\n" + @printf " $(BOLD)CODECOME_THINKING=1$(RESET) Show model reasoning/thinking blocks in output\n" @printf " $(BOLD)CODECOME_MODEL=$(RESET) Pin the model per phase (e.g. anthropic/claude-opus-4-7)\n" @printf " $(BOLD)CODECOME_MODEL_VARIANT=$(RESET) Pin the model variant (e.g. high, max)\n" @printf " $(BOLD)PROMPT_EXTRA=\"...\"$(RESET) Append extra instructions to phase prompt\n" @@ -73,6 +65,13 @@ help: @printf " $(BOLD)make show-model$(RESET) Print the model resolution table for an agent\n" @printf " $(BOLD)make show-model AGENT=auditor$(RESET)\n" @printf "\n" + @printf " $(BOLD)$(CYAN)Raw debug (non-workflow):$(RESET)\n" + @printf "\n" + @printf " $(BOLD)make opencode-raw$(RESET) Run opencode directly (bypasses harness)\n" + @printf " $(BOLD)AGENT=$(RESET) Required. Agent to run (e.g. auditor)\n" + @printf " $(BOLD)PROMPT_FILE=path$(RESET) Required. Prompt file to send\n" + @printf " $(BOLD)CODECOME_THINKING=1$(RESET) Show reasoning/thinking blocks\n" + @printf "\n" @printf " $(BOLD)$(CYAN)Workspace tools:$(RESET)\n" @printf "\n" @printf " $(BOLD)make check$(RESET) Validate workspace structure and config\n" @@ -81,6 +80,7 @@ help: @printf " $(BOLD)make frontmatter$(RESET) Validate finding frontmatter\n" @printf " $(BOLD)make tests$(RESET) Run dev test suite + frontmatter gate\n" @printf " $(BOLD)make itemdb-reset$(RESET) Remove local audit artifacts and recreate .gitkeep files\n" + @printf " $(BOLD)make codeql-clean$(RESET) Remove generated CodeQL artifacts and cache\n" @printf " $(BOLD)make index$(RESET) Regenerate itemdb/index.md\n" @printf " $(BOLD)make report$(RESET) Regenerate itemdb/reports/report.md (local, no AI)\n" @printf "\n" @@ -106,7 +106,7 @@ help: @printf " $(BOLD)make sandbox-build$(RESET) Build the target inside the sandbox\n" @printf " $(BOLD)make sandbox-test$(RESET) Test the target inside the sandbox\n" @printf "\n" - @printf " $(BOLD)$(CYAN)Sandbox bootstrap (Phase 1b):$(RESET)\n" + @printf " $(BOLD)$(CYAN)Sandbox bootstrap (Phase 1c):$(RESET)\n" @printf "\n" @printf " $(BOLD)make sandbox-list$(RESET) List curated example sandboxes\n" @printf " $(BOLD)make sandbox-inspect ID=python$(RESET) Inspect one example\n" @@ -127,88 +127,99 @@ help: # Python environment # --------------------------------------------------------------------------- -venv: - @python3 -m venv .venv - @$(PYTHON) -m pip install --upgrade pip - @$(PYTHON) -m pip install --no-input -r requirements.txt +init: + @printf "\n$(BOLD)$(CYAN)==> [1/4] Creating Python virtual environment$(RESET)\n" + @python3 -m venv .venv || { printf "$(BOLD)$(RED)[FAIL]$(RESET) Could not create .venv\n"; exit 1; } + @printf "$(BOLD)$(GREEN)[OK]$(RESET) Virtual environment ready at .venv\n\n" + @printf "$(BOLD)$(CYAN)==> [2/4] Upgrading pip$(RESET)\n" + @$(PYTHON) -m pip install --upgrade pip || { printf "$(BOLD)$(RED)[FAIL]$(RESET) pip upgrade failed\n"; exit 1; } + @printf "$(BOLD)$(GREEN)[OK]$(RESET) pip upgraded\n\n" + @printf "$(BOLD)$(CYAN)==> [3/4] Installing Python requirements$(RESET)\n" + @$(PYTHON) -m pip install --no-input -r requirements.txt || { printf "$(BOLD)$(RED)[FAIL]$(RESET) requirements install failed\n"; exit 1; } + @printf "$(BOLD)$(GREEN)[OK]$(RESET) Python dependencies installed\n\n" + @printf "$(BOLD)$(CYAN)==> [4/4] Installing managed CodeQL CLI$(RESET)\n" + @rm -f .tools/codeql/.disabled + @if [ "$$CODEQL" != "0" ] && [ "$$CODEQL_SKIP_INSTALL" != "1" ]; then \ + if $(PYTHON) -c "import yaml,sys; cfg=yaml.safe_load(open('codecome.yml')); sys.exit(0 if cfg.get('codeql',{}).get('enabled',True) else 1)" 2>/dev/null; then \ + $(PYTHON) tools/codeql.py install || { printf "$(BOLD)$(RED)[FAIL]$(RESET) managed CodeQL install failed\n"; exit 1; }; \ + printf "$(BOLD)$(GREEN)[OK]$(RESET) Managed CodeQL CLI ready\n"; \ + else \ + mkdir -p .tools/codeql && touch .tools/codeql/.disabled; \ + printf "$(BOLD)$(YELLOW)[SKIP]$(RESET) CodeQL disabled in codecome.yml\n"; \ + fi; \ + else \ + mkdir -p .tools/codeql && touch .tools/codeql/.disabled; \ + printf "$(BOLD)$(YELLOW)[SKIP]$(RESET) Managed CodeQL install skipped (CODEQL=0 or CODEQL_SKIP_INSTALL=1)\n"; \ + fi + @printf "\n$(BOLD)$(GREEN)Setup complete.$(RESET)\n" -venv-check: - @test -x "$(PYTHON)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing repo virtualenv at .venv\n\nRun:\n\n make venv\n\n" && exit 1) - @$(PYTHON) -c "import yaml, rich" >/dev/null 2>&1 || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) .venv is missing required Python packages\n\nRun:\n\n make venv\n\nIf you updated requirements, rerun the same command to resync .venv.\n\n" && exit 1) +venv: init + +env-check: + @test -x "$(PYTHON)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing repo virtualenv at .venv\n\nRun:\n\n make init\n\n" && exit 1) + @$(PYTHON) -c "import yaml, rich" >/dev/null 2>&1 || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) .venv is missing required Python packages\n\nRun:\n\n make init\n\nIf you updated requirements, rerun the same command to resync .venv.\n\n" && exit 1) + @if [ ! -f .tools/codeql/.disabled ]; then \ + test -x .tools/codeql/current/codeql || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) CodeQL is enabled but the managed binary is missing.\n\nRun:\n\n make init\n\nOr to explicitly disable CodeQL:\n\n CODEQL=0 make init\n\n" && exit 1); \ + fi # --------------------------------------------------------------------------- # Workflow phases # --------------------------------------------------------------------------- -phase-1: venv-check +phase-1: env-check @$(PYTHON) tools/gate-check.py 1 - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent recon $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-1-recon.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 1 --label "Target Reconnaissance + Sandbox Bootstrap" --agent recon --prompt-file prompts/phase-1-recon.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 1 --label "Phase 1: Reconnaissance" --agent recon -phase-2: venv-check +phase-2: env-check @$(PYTHON) tools/gate-check.py 2 @$(PYTHON) tools/sandbox-bootstrap.py status --gate || ( \ printf "\n$(BOLD)$(YELLOW)[BLOCK]$(RESET) Phase 2 sandbox gate failed.\n" ; \ printf "Run: make sandbox-status\n" ; \ printf "Or override (not recommended): CODECOME_ALLOW_NO_SANDBOX=1 make phase-2\n\n" ; \ exit 1 ) - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent auditor $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-2-audit.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 2 --label "Hypothesis Generation" --agent auditor --prompt-file prompts/phase-2-audit.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 2 --label "Hypothesis Generation" --agent auditor --prompt-file prompts/phase-2-audit.md -phase-3: venv-check +phase-3: env-check @$(PYTHON) tools/gate-check.py 3 - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent reviewer $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-3-review.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 3 --label "Counter-analysis" --agent reviewer --prompt-file prompts/phase-3-review.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 3 --label "Counter-analysis" --agent reviewer --prompt-file prompts/phase-3-review.md -phase-4: venv-check +phase-4: env-check @test -n "$(FINDING)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing required FINDING argument for Phase 4 (Validation).\n\nSpecify which finding you want to validate:\n\n $(BOLD)make phase-4 FINDING=CC-0001$(RESET)\n\nTo list available pending findings: $(BOLD)make findings STATUS=PENDING$(RESET)\n\n" && exit 1) @$(PYTHON) tools/gate-check.py 4 $(FINDING) - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent validator $(OPENCODE_THINKING_FLAG) "$$(sed 's#FINDING_PATH_OR_ID#$(FINDING)#g' prompts/phase-4-validate.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 4 --label "Validation" --agent validator --prompt-file prompts/phase-4-validate.md --finding "$(FINDING)"; \ - fi + @$(PYTHON) tools/run-agent.py --phase 4 --label "Validation" --agent validator --prompt-file prompts/phase-4-validate.md --finding "$(FINDING)" -phase-5: venv-check +phase-5: env-check @test -n "$(FINDING)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing required FINDING argument for Phase 5 (Exploitation).\n\nSpecify which finding you want to exploit:\n\n $(BOLD)make phase-5 FINDING=CC-0001$(RESET)\n\nTo list available confirmed findings: $(BOLD)make findings STATUS=CONFIRMED$(RESET)\n\n" && exit 1) @$(PYTHON) tools/gate-check.py 5 $(FINDING) - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent exploiter $(OPENCODE_THINKING_FLAG) "$$(sed 's#FINDING_PATH_OR_ID#$(FINDING)#g' prompts/phase-5-exploit.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 5 --label "Exploit Development" --agent exploiter --prompt-file prompts/phase-5-exploit.md --finding "$(FINDING)"; \ - fi + @$(PYTHON) tools/run-agent.py --phase 5 --label "Exploit Development" --agent exploiter --prompt-file prompts/phase-5-exploit.md --finding "$(FINDING)" -phase-6: venv-check +phase-6: env-check @$(PYTHON) tools/gate-check.py 6 - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent reporter $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-6-report.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 6 --label "Reporting" --agent reporter --prompt-file prompts/phase-6-report.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 6 --label "Reporting" --agent reporter --prompt-file prompts/phase-6-report.md -chat: venv-check +chat: env-check @$(PYTHON) tools/run-agent.py --chat --label "Interactive Chat" --agent $(or $(AGENT),chat) --prompt-file prompts/chat-initial.md $(if $(DEBUG),--debug,) -list-risk-files: venv-check +list-risk-files: env-check @$(PYTHON) tools/list-risk-files.py -sweep: venv-check +sweep: env-check @if [ -n "$(FILE)" ]; then \ $(PYTHON) tools/run-sweep.py --file "$(FILE)"; \ else \ $(PYTHON) tools/run-sweep.py; \ fi -validate-all: venv-check +# --------------------------------------------------------------------------- +# Raw opencode debug target (non-workflow) +# --------------------------------------------------------------------------- + +opencode-raw: + @test -n "$(AGENT)" || (echo "AGENT is required. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) + @test -r "$(PROMPT_FILE)" || (echo "PROMPT_FILE must be a readable file. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) + @opencode run --agent "$(AGENT)" $(OPENCODE_THINKING_FLAG) "$$(cat "$(PROMPT_FILE)")" + +validate-all: env-check @ids=$$($(PYTHON) tools/list-findings.py --status PENDING --format ids 2>/dev/null); \ if [ -z "$$ids" ]; then \ echo "No PENDING findings to validate."; \ @@ -221,7 +232,7 @@ validate-all: venv-check $(MAKE) phase-4 FINDING=$$f; \ done -exploit-all: venv-check +exploit-all: env-check @ids=$$($(PYTHON) tools/list-findings.py --status CONFIRMED --eligible-for-exploit --format ids 2>/dev/null); \ if [ -z "$$ids" ]; then \ echo "No eligible CONFIRMED findings to exploit."; \ @@ -238,26 +249,26 @@ exploit-all: venv-check # Workspace tools # --------------------------------------------------------------------------- -check: venv-check +check: env-check $(PYTHON) tools/codecome.py check -status: venv-check +status: env-check $(PYTHON) tools/codecome.py status -next-id: venv-check +next-id: env-check $(PYTHON) tools/codecome.py next-id -frontmatter: venv-check +frontmatter: env-check $(PYTHON) tools/check-frontmatter.py -tests: venv-check +tests: env-check $(PYTHON) -m pytest -q tests $(PYTHON) tools/check-frontmatter.py -test-parity: venv-check +test-parity: env-check $(PYTHON) -m pytest tests/test_mock_llm_parity.py -v -itemdb-reset: venv-check +itemdb-reset: env-check rm -f itemdb/notes/*.md rm -rf itemdb/evidence/CC-* rm -f itemdb/reports/*.md @@ -280,27 +291,32 @@ itemdb-reset: venv-check touch tmp/.gitkeep $(PYTHON) tools/render-index.py -index: venv-check +codeql-clean: + rm -rf itemdb/codeql + rm -rf .cache/codeql + rm -rf src/_codeql_detected_source_root + +index: env-check $(PYTHON) tools/render-index.py -report: venv-check +report: env-check $(PYTHON) tools/render-report.py -findings: venv-check +findings: env-check ifdef STATUS $(PYTHON) tools/list-findings.py --status $(STATUS) else $(PYTHON) tools/list-findings.py endif -findings-create: venv-check +findings-create: env-check @test -n "$(strip $(TITLE))" || (printf "TITLE is required. Usage: make findings-create TITLE=\"Short descriptive title\" [ARGS='...']\n" && exit 2) $(PYTHON) tools/create-finding.py "$(TITLE)" $(ARGS) -findings-move: venv-check +findings-move: env-check $(PYTHON) tools/move-finding.py $(FINDING) $(STATUS) -findings-evidence: venv-check +findings-evidence: env-check $(PYTHON) tools/create-evidence.py $(FINDING) findings-package: @@ -311,7 +327,7 @@ findings-package: # Sandbox # --------------------------------------------------------------------------- -SANDBOX_SCRIPT_HINT := "No sandbox helper script found. Run 'make phase-1' (sub-stage 1b) to bootstrap sandbox/ from templates/sandboxes/, or place the helper script under sandbox/scripts/ manually." +SANDBOX_SCRIPT_HINT := "No sandbox helper script found. Run 'make phase-1' (sub-stage 1c) to bootstrap sandbox/ from templates/sandboxes/, or place the helper script under sandbox/scripts/ manually." sandbox-setup: @if [ -x sandbox/scripts/setup.sh ]; then \ @@ -360,35 +376,35 @@ sandbox-test: ./sandbox/scripts/test.sh # --------------------------------------------------------------------------- -# Sandbox bootstrap (Phase 1b) +# Sandbox bootstrap (Phase 1c) # --------------------------------------------------------------------------- -sandbox-list: venv-check +sandbox-list: env-check @$(PYTHON) tools/sandbox-bootstrap.py list -sandbox-inspect: venv-check +sandbox-inspect: env-check @test -n "$(ID)" || (echo "Usage: make sandbox-inspect ID=" && exit 1) @$(PYTHON) tools/sandbox-bootstrap.py inspect $(ID) -sandbox-detect: venv-check +sandbox-detect: env-check @$(PYTHON) tools/sandbox-bootstrap.py detect -sandbox-bootstrap: venv-check +sandbox-bootstrap: env-check @test -n "$(ID)" || (echo "Usage: make sandbox-bootstrap ID=" && exit 1) @$(PYTHON) tools/sandbox-bootstrap.py apply $(ID) $(BOOTSTRAP_ARGS) -sandbox-validate: venv-check +sandbox-validate: env-check @$(PYTHON) tools/sandbox-bootstrap.py validate $(BOOTSTRAP_ARGS) -sandbox-regenerate: venv-check +sandbox-regenerate: env-check @$(PYTHON) tools/sandbox-bootstrap.py regenerate $(BOOTSTRAP_ARGS) -sandbox-status: venv-check +sandbox-status: env-check @$(PYTHON) tools/sandbox-bootstrap.py status # Print the model that would be picked for a given AGENT (default: recon). # Usage: # make show-model # make show-model AGENT=auditor -show-model: venv-check +show-model: env-check @$(PYTHON) tools/run-agent.py --show-model --agent $(or $(AGENT),recon) diff --git a/README.md b/README.md index 36081c20..ff8c16ad 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,8 @@ CodeCome runs on top of [OpenCode](https://opencode.ai), an open-source AI codin 3. **Python 3.10+** — needed for workspace tooling (`make venv` creates a local virtualenv). 4. **GNU Make** — drives the workflow. 5. **Docker** — required for the sandboxed validation environment. -6. **Optional: exploit recording tools** — for Phase 5 visual evidence: +6. **Optional: CodeQL CLI** — for static analysis integration. Managed install via `make init`, or set `CODEQL_SKIP=1` to skip. +7. **Optional: exploit recording tools** — for Phase 5 visual evidence: - `asciinema` — terminal recordings. - `agg` — renders `.cast` files to GIFs (CodeCome falls back to a Docker container if missing). - `ffmpeg` and `xvfb` (or `xvfb-run`) — for GUI/browser exploits. @@ -121,6 +122,7 @@ A few things to know up front about `src/`: When you're ready: make venv # set up the local Python virtualenv + make init # install CodeQL CLI (optional, skip with CODEQL_SKIP=1) make check # sanity-check the workspace make phase-1 # recon + sandbox bootstrap make phase-2 # generate candidate findings @@ -135,7 +137,7 @@ There are convenience targets too — `make validate-all`, `make exploit-all`, ` Six phases. Each one is a `make` target. Each one writes to disk. -1. **Recon (`make phase-1`)** — agent reads `src/`, infers the target type, languages, build model, attack surface, and writes notes under `itemdb/notes/`. Also bootstraps a Docker sandbox suited to the stack. +1. **Recon (`make phase-1`)** — runs as three subphases: (1a) target profiling and CodeQL plan generation, (1b) CodeQL-assisted reconnaissance using static analysis signals, and (1c) sandbox bootstrap. Writes notes under `itemdb/notes/` including a file-risk-index informed by CodeQL findings. 2. **Hypothesis (`make phase-2`)** — agent writes candidate findings under `itemdb/findings/PENDING/`. Each one points at specific code, sources, sinks, and a trust boundary. 3. **Counter-analysis (`make phase-3`)** — a reviewer pass tries to disprove or deduplicate findings. Weak ones move to `REJECTED/`, repeats to `DUPLICATE/`. 4. **Validation (`make phase-4 FINDING=CC-XXXX`)** — one finding at a time, in the sandbox. Build the target, write a small PoC, capture evidence, decide CONFIRMED or REJECTED. @@ -159,6 +161,16 @@ stateDiagram-v2 Phases 1–3 are batch operations. Phases 4 and 5 are run **per finding** — that's intentional. One finding at a time keeps evidence traceable and lets you mix model choices, prompt overrides, and rerun loops without polluting the audit. +## CodeQL integration + +CodeCome integrates GitHub's [CodeQL](https://codeql.github.com/) as an optional first-class static-analysis capability during Phase 1. + +- **Managed install** — `make init` (or `tools/codeql.py install`) downloads and manages the CodeQL CLI bundle under `.tools/codeql/`. +- **Automatic language detection** — Phase 1a generates `itemdb/notes/codeql-plan.yml` with detected languages and build modes. +- **SARIF normalization** — raw CodeQL results are normalized into `file-signals.yml`, which feeds into the `file-risk-index.yml` used by Phase 1b recon. +- **Configuration** — controlled via `codecome.yml` under `audit.static_analysis.codeql` (enable/disable, pack selection, fail policy, timeouts). +- **Opt-out** — set `CODEQL_SKIP=1` or `enabled: false` in config to skip CodeQL entirely. + ## Who is this for? - **Solo security researchers** who want LLM help on source-code audits but refuse to trust an opaque chat session. diff --git a/codecome.yml b/codecome.yml index a665eeac..59841b1b 100644 --- a/codecome.yml +++ b/codecome.yml @@ -85,6 +85,34 @@ audit: - "Race conditions" - "Privilege escalation" + static_analysis: + codeql: + enabled: true + fail_policy: "soft" + + pack_catalog: "./templates/codeql-packs.yml" + + install: + managed: true + version: "latest" + path: ".tools/codeql/current/codeql" + + output_dir: "./itemdb/codeql" + database_dir: "./itemdb/codeql/databases" + cache_dir: "./.cache/codeql" + + phase_1: + enabled: true + + phase_2: + enabled: true + candidate_mode: "precreate" + max_candidates: 10 + + sweep: + enabled: true + inject_context: true + environment: type: "sandbox" path: "./sandbox" @@ -191,4 +219,3 @@ agents: # model: "anthropic/claude-opus-4-7" # reporter: # model: "anthropic/claude-opus-4-7" - diff --git a/prompts/phase-1-codeql-repair.md b/prompts/phase-1-codeql-repair.md new file mode 100644 index 00000000..a31fc614 --- /dev/null +++ b/prompts/phase-1-codeql-repair.md @@ -0,0 +1,81 @@ +# CodeCome Phase 1: CodeQL Build Repair + +You are performing a narrow repair step after Phase 1a generated a CodeQL plan and the CodeQL database creation step failed. + +Your task is to make the smallest durable change needed so CodeQL can create a database on the next run. + +## Required Reading + +Read these files if they exist: + +- `AGENTS.md` +- `itemdb/notes/target-profile.md` +- `itemdb/notes/build-model.md` +- `itemdb/notes/codeql-plan.yml` +- `itemdb/codeql/run-manifest.yml` +- `itemdb/codeql/codeql-summary.md` + +Also inspect relevant CodeQL database logs under: + +- `itemdb/codeql/databases/**/log/*.log` + +Focus on the last useful `[build-stderr]`, `[build-stdout]`, `ERROR`, and `Exception caught` lines. + +## Goal + +Repair `itemdb/notes/codeql-plan.yml` so the next CodeQL run can create databases. + +For C/C++, Go, and Swift, do not use `build_mode: none`. Use only `manual` or `autobuild` as supported by the CodeQL integration. + +If autobuild failed because no supported root build system was detected, prefer `build_mode: manual` with a concrete `build_command`. + +## Allowed Writes + +You may write only: + +- `itemdb/notes/codeql-plan.yml` +- helper scripts under `tmp/` +- helper scripts under `sandbox/` +- a short run summary under `runs/` if useful + +Do not write helper scripts under `tools/`. + +Do not write helper scripts under `itemdb/`. + +Do not modify files under `src/`. + +Do not modify project orchestration or configuration files. + +If the manual command is simple enough, put it directly in `build_command` instead of creating a helper script. + +## Build Command Rules + +- CodeQL runs the manual `build_command` from the analysis unit source path. +- CodeQL does not run `build_command` from the workspace root or from the helper script directory. +- CodeQL tokenizes `build_command` as argv; it does not execute it as a shell script. +- Do not put shell control syntax in `build_command`: no `&&`, `||`, `;`, pipes, comments, multi-line commands, or `bash -c` / `sh -c` snippets. +- Good direct commands: `make`, `make -C challenge`, `gcc main.c -o app`. +- If more than one command is needed, create a helper script under workspace-relative `tmp/` and set `build_command` to invoke it from the analysis unit source path, for example `bash ../../tmp/codeql-build.sh`. +- Prefer commands that are deterministic and non-interactive. +- Prefer commands that avoid modifying `src/` when possible. +- If existing target build files naturally write object files or binaries into `src/`, document that limitation in the `notes` field. +- Use workspace-relative helper script paths that work from the CodeQL source path. +- Never use absolute `/tmp/` paths. Use workspace-relative `tmp/` paths for scratch/build output. +- Do not embed this workspace's absolute path in `build_command`; prefer paths relative to the analysis unit source path. +- If a helper script changes directory, it must change to the analysis unit source path or to a path explicitly derived from that execution model, not blindly to the helper script directory. +- Keep the plan schema and existing pack selections intact unless a minimal change requires otherwise. + +## Output Requirements + +Make the repair directly in files. At the end, summarize: + +- why the previous CodeQL build failed, +- what changed in `itemdb/notes/codeql-plan.yml`, +- any helper script created, +- the exact manual build command CodeQL will run next. + +Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid and follows CodeCome rules by running: + + rtk python3 tools/codecome.py check-codeql-plan + +If validation fails, repair only the reported issue before summarizing. diff --git a/prompts/phase-1a-profile.md b/prompts/phase-1a-profile.md new file mode 100644 index 00000000..00074606 --- /dev/null +++ b/prompts/phase-1a-profile.md @@ -0,0 +1,116 @@ +# CodeCome Phase 1a: Target Profile + +You are performing CodeCome **Phase 1a** — the first sub-stage of Phase 1. + +This sub-stage is scoped to: broad source tree mapping, language/framework detection, build model identification, and CodeQL plan generation. Do not produce full reconnaissance notes, file-risk-index, or sandbox artifacts here. Those are handled by Phase 1b and 1c. + +## Required reading + +Read the following files (all paths are relative to the project/workspace root): + +- `AGENTS.md` +- `codecome.yml` +- `templates/target-recon.md` +- `templates/codeql-plan.yml` +- `.opencode/agents/recon.md` +- `.opencode/skills/source-recon/SKILL.md` + +Do not load target-specific security skills or vulnerability-family skills during Phase 1a. Keep the scope structural. + +## Target + +Analyze the source tree under: + + ./src + +## Required outputs + +Create these files under `itemdb/notes/`: + +- `target-profile.md` +- `build-model.md` +- `codeql-plan.yml` + +### `target-profile.md` + +Document: + +- **Target type**: web application, CLI tool, library, service, firmware, IaC, mobile app, desktop app, benchmark corpus, or mixed repository. +- **Primary languages and frameworks**: detected language, version indicators, major frameworks. +- **Secondary languages**: tooling, scripting, configuration DSLs. +- **Repository structure**: top-level layout, key directories, monorepo vs single-project. +- **Primary target component**: the main application, service, or library. If multiple, identify the primary and note secondary surfaces as optional follow-up. + +Do not yet produce detailed attack surface, trust boundary, data flow, or validation notes. Those are Phase 1b. + +### `build-model.md` + +Document: + +- **Build system**: Make, CMake, Maven, Gradle, npm, pip, Cargo, Go modules, etc. +- **Build commands**: how to compile/build the target from source. +- **Dependencies**: package manager files, vendored dependencies, external dependencies. +- **Build prerequisites**: toolchain versions, system packages, Docker images. +- **Whether the target can be built** within the workspace. Be honest about blockers. + +### `codeql-plan.yml` + +Create `itemdb/notes/codeql-plan.yml` by filling in the template from `templates/codeql-plan.yml`. + +Rules: + +- Discover analysis units under `./src`. An analysis unit is a coherent project/component with one source root and one or more languages/stacks, such as an API service, frontend app, native library, CLI, package, firmware tree, or benchmark corpus. +- Use stable, lowercase `analysis_units[].id` values such as `api`, `frontend`, `native-lib`, or `root`. These IDs are discovered here; users do not define them in `codecome.yml`. +- Set `analysis_units[].path` to the real source path under `./src` for that unit. Do not use CodeQL-generated helper paths such as `_codeql_detected_source_root`. +- Use one `analysis_units` entry for a single-project repository and multiple entries for monorepos or mixed stacks. +- Only include languages you have detected with **HIGH** or **MEDIUM** confidence. +- For each language in each analysis unit, select the appropriate pack profiles: + - `official` — always include for languages with CodeQL support. + - `github-security-lab` — include for security-focused audits. + - `trailofbits` — include for C/C++ and Go targets. + - `coding-standards` — include for C/C++ targets where coding standards queries apply. + - `local` — include if custom queries exist under `queries/codeql//`. +- Set `build_mode` according to CodeQL language support: + - `none`: python, javascript-typescript, ruby, csharp, java-kotlin. + - `manual` or `autobuild`: c-cpp, go, csharp, java-kotlin, swift. +- Do not set `build_mode: none` for C/C++, Go, or Swift. +- Use `manual` only when you identified a concrete build command for that analysis unit. +- Use `autobuild` only as an explicit choice when build files exist but the exact command is uncertain. +- Fill in `build_command` when `build_mode` is `manual`. +- Estimate `db_create_timeout` (seconds) for each language when `build_mode` is `manual` or `autobuild`: + - For `none` mode leave it unset; harness default is 600s. + - Estimate based on approximate source file count, build complexity, and whether compilation is involved. + - Rule of thumb: ~300s for small projects, ~600s for medium, ~1200-1800s for large C/C++ corpora. + - Round up to be safe; CodeQL extraction adds significant overhead per compiled file. +- Estimate `analyze_timeout` (seconds) per profile if query packs are known to be heavy (e.g. security suites on large codebases); otherwise omit to use harness default. +- Set `recommended: false` if you cannot confidently profile any language. +- Add relevant `notes` explaining your language choices and any uncertainties. +- Update `exclude` patterns to match the target's test, fixture, vendor, and generated code directories if different from the defaults. + +## Important rules + +- Do not assume the target is a web application. +- Do not modify files under `src/`. +- Do not generate vulnerability findings. +- Do not produce full reconnaissance notes (attack-surface, trust-boundaries, etc.) — those are Phase 1b. +- Do not bootstrap the sandbox — that is Phase 1c. +- Do not run CodeQL manually. The harness runs it after this sub-stage. +- Be explicit about uncertainty. +- Prefer useful notes over exhaustive dumps. +- Focus on what later sub-stages need. + +## Final response + +At the end, summarize: + +- Target type and primary language(s) +- Build system and buildability assessment +- Languages selected for CodeQL analysis and their confidence levels +- Files created: `target-profile.md`, `build-model.md`, `codeql-plan.yml` +- Key uncertainties or blockers + +Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid and follows CodeCome rules by running: + + rtk python3 tools/codecome.py check-codeql-plan + +If validation fails, repair only the reported issue before summarizing. diff --git a/prompts/phase-1b-codeql-recon.md b/prompts/phase-1b-codeql-recon.md new file mode 100644 index 00000000..7301d391 --- /dev/null +++ b/prompts/phase-1b-codeql-recon.md @@ -0,0 +1,202 @@ +# CodeCome Phase 1b: CodeQL-assisted Reconnaissance + +You are performing CodeCome **Phase 1b** — the second sub-stage of Phase 1. + +This sub-stage produces the detailed reconnaissance notes. Phase 1a already created the target profile, build model, and CodeQL plan. If CodeQL analysis has completed (it may not have — treat it as optional), you now have normalized CodeQL artifacts to incorporate as reconnaissance evidence. + +## Required reading + +Read the following files (all paths are relative to the project/workspace root): + +- `AGENTS.md` +- `codecome.yml` +- `templates/target-recon.md` +- `templates/file-risk-index.yml` +- `.opencode/agents/recon.md` +- `.opencode/skills/source-recon/SKILL.md` + +Also read the Phase 1a outputs: + +- `itemdb/notes/target-profile.md` +- `itemdb/notes/build-model.md` +- `itemdb/notes/codeql-plan.yml` + +## CodeQL artifacts (conditional) + +If CodeQL analysis was performed, the following artifacts may exist. Treat them as reconnaissance evidence, not proof of vulnerability: + +- `itemdb/codeql/run-manifest.yml` — CodeQL run outcome and metadata. +- `itemdb/codeql/normalized/alerts.yml` — Normalized CodeQL alerts with source/sink/flow. +- `itemdb/codeql/normalized/file-signals.yml` — Per-file CodeQL signal scores. +- `itemdb/codeql/codeql-summary.md` — Human-readable CodeQL summary. + +If these files exist: + +1. Read them and extract relevant signals. +2. Use alert data to enrich your understanding of potential sources, sinks, and trust-boundary crossings. +3. Use file-signals to prioritize files for the file-risk-index. +4. Do not treat CodeQL alerts as confirmed vulnerabilities. They are static-analysis hints. + +If these files do not exist, proceed with reconnaissance based on source analysis alone. Phase 1b must complete regardless of CodeQL availability. + +## Target + +Analyze the source tree under: + + ./src + +## Required outputs + +Create these files under `itemdb/notes/`: + +- `attack-surface.md` +- `execution-model.md` +- `trust-boundaries.md` +- `data-flow.md` +- `validation-model.md` +- `interesting-files.md` +- `file-risk-index.yml` +- `security-assumptions.md` + +### `attack-surface.md` + +Document: + +- **Network-facing attack surfaces**: HTTP endpoints, RPC services, WebSocket handlers, TCP/UDP listeners, message queue consumers. +- **Local attack surfaces**: CLI argument parsing, config file loading, environment variable consumption, file I/O, IPC. +- **API surface**: routes, controllers, handlers, middleware, GraphQL schemas, gRPC service definitions. +- **Input vectors**: query parameters, request bodies, file uploads, headers, cookies, WebSocket frames, serialized objects. +- **Output vectors**: response bodies, rendered templates, log emissions, file writes. + +### `execution-model.md` + +Document: + +- **Runtime environment**: interpreter, JVM, CLR, native binary, container, serverless. +- **Process model**: single-process, multi-process, worker pool, event loop, thread pool. +- **Startup and lifecycle**: initialization, configuration loading, connection pooling, shutdown. +- **Concurrency model**: async/await, threads, multiprocessing, greenlets, coroutines. + +### `trust-boundaries.md` + +Document: + +- **Network boundary**: remote client ↔ server. +- **Process boundary**: separate processes or containers. +- **User boundary**: authenticated vs unauthenticated, role-based. +- **Data boundary**: tenant isolation, database per tenant, shared database. +- **Component boundary**: plugin system, library interfaces, IPC channels. + +### `data-flow.md` + +Document key data flows from entry points to dangerous sinks: + +- Source (entry point) → transformation/validation → sink (filesystem, DB, network, command execution). +- For each flow, note whether input is attacker-controlled, partially controlled, or trusted. +- Flag missing or weak validation points. + +### `validation-model.md` + +Document: + +- How the target is tested (unit, integration, E2E, fuzzing). +- Whether a sandbox runtime is achievable. +- Recommended validation methods for each vulnerability class identified in `attack-surface.md`. +- Whether static-only or nested-virt validation models apply (requires explicit justification). + +### `interesting-files.md` + +List files that warrant deeper Phase 2 or sweep attention: + +- Files containing authentication/authorization logic. +- Files with dangerous sink usage (exec, eval, SQL construction, file I/O, crypto). +- Files handling deserialization, parsing, or format conversion. +- Files at trust boundaries. +- Files with high CodeQL alert density (if CodeQL artifacts exist). +- Configuration files affecting security behavior. + +### `file-risk-index.yml` + +Create `itemdb/notes/file-risk-index.yml` using the schema in `templates/file-risk-index.yml`. + +This is a structured, machine-readable companion to `interesting-files.md`. It is consumed by optional file-scoped Phase 2 sweeps. + +Score files from 1 to 5 using the scoring scale in the template: + +- `1`: low security interest, +- `2`: weak or indirect security relevance, +- `3`: moderate security interest, +- `4`: high security interest, +- `5`: very high security interest. + +Prioritize files that contain or strongly influence: + +- attacker-controlled or externally influenced input, +- trust-boundary crossings, +- authentication or authorization decisions, +- dangerous sinks, +- parsers and decoders, +- file upload or archive handling, +- cryptographic or secret-handling logic, +- privilege boundaries, +- tenant/account/resource isolation, +- network-facing protocol handlers, +- sandbox, policy, or permission enforcement. + +For each high-risk file, include concrete reasons, likely entry points, sources, sinks, trust boundaries, suggested vulnerability classes, suggested skills, and suggested validation methods when inferable. + +If CodeQL file signals exist (`itemdb/codeql/normalized/file-signals.yml`), incorporate them: +- Add `external_signals.codeql` blocks to file entries with CodeQL alerts. +- Boost scores where CodeQL reports high-precision alerts, but cap at 5. +- Explain every CodeQL-driven score boost in the `reasons` field. + +Do not include every source file. Prefer a concise ranked set that Phase 2 can act on. + +### `security-assumptions.md` + +Document: + +- Assumptions the codebase appears to make about its environment, inputs, and callers. +- Implicit trust relationships (e.g., "this internal API assumes the caller is already authorized"). +- Cryptographic assumptions. +- Assumptions about input validation performed by upstream components. + +## Additional reconnaissance + +Recursively scan `src/` for high-signal documentation such as `README*`, `SECURITY*`, `THREAT_MODEL*`, `CONTRIBUTING*`, `docs/`, and similar. Also inspect `CHANGELOG*`, `HISTORY*`, and `NEWS*`, but prefer top-level or component-relevant files. + +If the repository has dozens of changelog/history/news files, do not process them exhaustively. Summarize the pattern, prioritize files near the primary target or security-relevant components, and record that scope decision. + +Review external public context for prior security advisories, CVE references, historical security fixes, release notes, and recurring bug classes affecting this project or closely related upstream components. Prefer project advisories, GitHub Security Advisories, NVD/CVE entries, issue trackers, release notes, and distribution advisories. + +Use external context only as reconnaissance input: distill affected components, historical bug patterns, trust boundaries, and fixed attack surfaces into the notes. Do not treat external claims as proof that the current source tree is affected; verify everything against `src/` before creating findings. + +Distill declared threat model, past CVEs, trust boundaries, and third-party components into the relevant notes; treat author claims as input to verify, not facts. + +## Important rules + +- Do not assume the target is a web application. +- Do not assume the target can be built. +- Do not assume the target can be executed. +- Do not modify files under `src/`. +- Do not generate low-confidence vulnerability findings during reconnaissance. +- Do not rely only on filenames, comments, or labels. +- Be explicit about uncertainty. +- Prefer useful notes over exhaustive dumps. +- Focus on what later phases need. +- Do not let any target-specific skill narrow the target model before broad mapping is complete. +- Do not ask the user to choose Phase 2 scope when a reasonable default can be inferred. Pick the primary target from repository evidence, document secondary surfaces as optional follow-up, and continue. +- Do not phrase optional preferences as "User input requested". Use "Optional follow-up" unless Phase 1 is blocked. +- Reading `.env` files is allowed only in two places during reconnaissance: target inputs under `src/**` and CodeCome-generated sandbox metadata in `sandbox/.env`. Avoid unrelated `.env` files elsewhere in the workspace. + +## Final response + +At the end, summarize: + +- Target type (from Phase 1a), +- Most important attack surfaces identified, +- Recommended Phase 2 focus, +- Highest-risk files from `file-risk-index.yml`, +- CodeQL signals incorporated (if any), +- Files created in this sub-stage, +- Key limitations and uncertainties. diff --git a/prompts/phase-1c-sandbox.md b/prompts/phase-1c-sandbox.md new file mode 100644 index 00000000..077f3780 --- /dev/null +++ b/prompts/phase-1c-sandbox.md @@ -0,0 +1,125 @@ +# CodeCome Phase 1c: Sandbox Bootstrap + +You are performing CodeCome **Phase 1c** — the third and final sub-stage of Phase 1. + +This sub-stage bootstraps the sandbox environment. Phase 1a produced the target profile and build model. Phase 1b produced the full reconnaissance notes. Your job is to leave `sandbox/` in a state where Phase 2 can run. + +## Required reading + +Read the following files (all paths are relative to the project/workspace root): + +- `AGENTS.md` +- `codecome.yml` +- `.opencode/agents/recon.md` +- `.opencode/skills/sandbox-bootstrap/SKILL.md` +- `itemdb/notes/target-profile.md` +- `itemdb/notes/build-model.md` + +## Required output + +- `itemdb/notes/sandbox-plan.md` + +## Workflow + +1. Inspect current sandbox state: + + make sandbox-status + +2. Inspect target runtime artifacts under `src/`. At minimum consider: + + src/Dockerfile + src/docker-compose.yml + src/docker-compose.yaml + src/compose.yml + src/compose.yaml + src/Makefile + src/scripts/ + src/README* + src/INSTALL* + src/CONTRIBUTING* + src/RUN* + src/docs/ + + Decide what to honor. Document the decision in `sandbox-plan.md`. + +3. Detect candidates: + + make sandbox-detect + +4. Inspect the chosen example: + + make sandbox-inspect ID= + +5. Apply the example: + + BOOTSTRAP_ARGS='--var KEY1=VAL1 --var KEY2=VAL2' \ + make sandbox-bootstrap ID= + + Or, for a preview without writing: + + BOOTSTRAP_ARGS='--dry-run --var KEY=VAL' \ + make sandbox-bootstrap ID= + + `apply` refuses to overwrite a user-managed `sandbox/` (one without `CODECOME-GENERATED.md`). If the user has accepted the loss, re-run with `--force` and the prior content is moved to `sandbox/.backup-/`. + +5b. Implement the required sandbox capabilities. + + Templates are seeds, not finished sandboxes. Each `templates/sandboxes//` ships only `Dockerfile`, `docker-compose.yml`, a starter `build.sh`, and a starter `test.sh`. After `apply`, you must leave `sandbox/` with working mechanisms for: + + sandbox setup + sandbox start + sandbox sanity + target build + target test + sandbox stop + + Prefer helper scripts under `sandbox/scripts/` such as: + + setup.sh up.sh check.sh build.sh test.sh + + Add operational helpers when they make sense for the target: + + down.sh shell.sh logs.sh clean.sh reset.sh + + Prefer a realistic runtime environment when it is reasonably derivable from the repository. For web apps, APIs, and other services, Phase 1c should attempt to start the real application stack, not just compile it. If the target appears to need a database, cache, queue, reverse proxy, migrations, seed data, or health checks, include those when the source tree or docs make them inferable. + + Do not stop at a toolchain-only or build-only sandbox when later Phase 4 or Phase 5 validation would realistically require a running application. If full runtime is not feasible, document the closest achievable runtime model and the blocker in `itemdb/notes/sandbox-plan.md`. + + Adapt `build.sh` and `test.sh` to the actual project layout (the source may be nested under `src//`, not directly under `src/`). Author additional scripts when they help the target (sanitizer build, fuzzing harness, debugger attach, target-specific reset, etc.). Make every script executable. Document any extras in `itemdb/notes/sandbox-plan.md` under "Extra scripts authored". + + Do not record any validation tier as `skipped` because the required capability is missing. Either implement the helper and run the tier, or accept the `failed` outcome the validator emits. + + Do not replace authoring a script with an in-chat manual spot-check. Manual checks do not survive future runs. + + See `.opencode/skills/sandbox-bootstrap/SKILL.md` for authoring conventions and the sandbox capability contract. + +6. Validate: + + make sandbox-validate + + Use `BOOTSTRAP_ARGS='--keep-going'` to run all tiers even after a failure, or `--scripts-only` / `--docker-only` to constrain which mode is used. + + `validate` appends a "Validation run " table to `sandbox/CODECOME-GENERATED.md` and returns JSON with `--format json`. Capture per-tier outcomes (passed / failed / skipped, exit code, last 50 lines of stderr) into the validation matrix in `sandbox-plan.md`. A missing required capability makes the tier `failed`; that means you still need to complete step 5b. + +7. If validation fails, attempt automatic remediations within the retry budget (`CODECOME_BOOTSTRAP_MAX_RETRIES`, default 3). Each attempt must be logged in `sandbox-plan.md`. When the budget is exhausted, write the halt protocol in `sandbox-plan.md` and stop Phase 1c. + +8. Special validation models: + + - `static-only`: requires explicit justification in `sandbox-plan.md`. + - `nested-virt`: requires explicit justification and arch declaration. + +## Important rules + +- Do not modify files under `src/`. +- Do not overwrite a `sandbox/` that lacks `CODECOME-GENERATED.md`. If the sandbox already works, move on; if it needs replacement, halt with the halt protocol and inform the user to re-run with `--force` (which moves the prior content to `sandbox/.backup-/`). +- Do not generate vulnerability findings. + +## Final response + +At the end, summarize: + +- Chosen sandbox example and `validation_model`, +- Validation outcome (`passed`, `passed-with-warnings`, `halted`), +- `itemdb/notes/sandbox-plan.md` created, +- Key limitations, +- Halt requirements if sandbox bootstrap is blocked. diff --git a/templates/codeql-packs.yml b/templates/codeql-packs.yml new file mode 100644 index 00000000..a9e8885a --- /dev/null +++ b/templates/codeql-packs.yml @@ -0,0 +1,70 @@ +# CodeQL pack catalog used to resolve model-selected pack profiles. + +schema_version: 1 + +packs: + python: + official: + - "codeql/python-queries" + github-security-lab: + - "githubsecuritylab/codeql-python-queries" + local: + - "./queries/codeql/python" + + javascript-typescript: + official: + - "codeql/javascript-queries" + github-security-lab: + - "githubsecuritylab/codeql-javascript-queries" + local: + - "./queries/codeql/javascript" + + c-cpp: + official: + - "codeql/cpp-queries" + github-security-lab: + - "githubsecuritylab/codeql-cpp-queries" + trailofbits: + - "trailofbits/cpp-queries" + coding-standards: + - "codeql/coding-standards-cpp" + local: + - "./queries/codeql/cpp" + + go: + official: + - "codeql/go-queries" + github-security-lab: + - "githubsecuritylab/codeql-go-queries" + trailofbits: + - "trailofbits/go-queries" + local: + - "./queries/codeql/go" + + csharp: + official: + - "codeql/csharp-queries" + github-security-lab: + - "githubsecuritylab/codeql-csharp-queries" + local: + - "./queries/codeql/csharp" + + java-kotlin: + official: + - "codeql/java-queries" + github-security-lab: + - "githubsecuritylab/codeql-java-queries" + local: + - "./queries/codeql/java" + +candidate_policy: + official: + allow_precreate: true + github-security-lab: + allow_precreate: true + trailofbits: + allow_precreate: true + coding-standards: + allow_precreate: false + local: + allow_precreate: true diff --git a/templates/codeql-plan.yml b/templates/codeql-plan.yml new file mode 100644 index 00000000..7be3fc0d --- /dev/null +++ b/templates/codeql-plan.yml @@ -0,0 +1,63 @@ +# CodeQL analysis plan generated by Phase 1a target profiling. +# The model fills in language entries based on source tree analysis. +# Consumed by CodeQL run orchestration (tools/codeql/runner.py). + +schema_version: 1 +generated_by: "phase-1a-profile" + +source_path: "./src" +recommended: true + +analysis_units: [] +# Example analysis units discovered under ./src: +# +# analysis_units: +# - id: "api" +# path: "./src/api" +# kind: "service" +# primary: true +# languages: +# - id: "python" +# confidence: "HIGH" +# build_mode: "none" +# build_command: null +# packs: +# - "official" +# - "github-security-lab" +# +# - id: "native-lib" +# path: "./src/native" +# kind: "library" +# primary: false +# languages: +# - id: "c-cpp" +# confidence: "HIGH" +# build_mode: "manual" +# build_command: "make -C src/native" +# db_create_timeout: 1800 # optional: seconds, model-estimated from source size +# analyze_timeout: 900 # optional: seconds, per query-profile run +# packs: +# - "official" +# - "github-security-lab" +# - "trailofbits" +# - "coding-standards" +# +# Allowed language IDs: python, javascript-typescript, ruby, c-cpp, go, csharp, java-kotlin, swift +# Allowed confidence values: HIGH, MEDIUM, LOW +# Allowed build_mode values by language: +# none: python, javascript-typescript, ruby, csharp, java-kotlin +# manual/autobuild: c-cpp, go, csharp, java-kotlin, swift +# Allowed pack profile names: official, github-security-lab, trailofbits, coding-standards, local +# +# Optional per-language timeout overrides (seconds): +# - db_create_timeout: max seconds for 'codeql database create' (default 600) +# - analyze_timeout: max seconds for 'codeql database analyze' per profile (default 600) +# Use CODEQL_DB_CREATE_TIMEOUT / CODEQL_ANALYZE_TIMEOUT env vars to override at runtime. + +exclude: + - "src/**/tests/**" + - "src/**/fixtures/**" + - "src/**/vendor/**" + - "src/**/node_modules/**" + +notes: [] diff --git a/tests/test_chat_app.py b/tests/test_chat_app.py index a6ea3d22..6f9a4448 100644 --- a/tests/test_chat_app.py +++ b/tests/test_chat_app.py @@ -36,7 +36,7 @@ class DummyThread(threading.Thread): mock_log.write.assert_not_called() mock_app.post_message.assert_called_once() -def test_chat_render_and_log(monkeypatch): +def test_chat_render(monkeypatch): mock_transcript = MagicMock() mock_args = MagicMock() mock_args.debug = True @@ -65,12 +65,11 @@ def fake_render(console, phase, label, event): event = {"type": "message.updated", "info": {"role": "assistant", "modelID": "gpt-5"}} - app._chat_render_and_log(fake_self, None, "1", "label", event) + app._chat_render(fake_self, None, "1", "label", event) assert len(rendered) == 1 assert "gpt-5" in fake_self._modeline_meta assert fake_self._modeline_state == "busy" - mock_transcript.write_event.assert_called() def test_chat_update_modeline_info(): class FakeSelf: diff --git a/tests/test_codecome_check_codeql.py b/tests/test_codecome_check_codeql.py new file mode 100644 index 00000000..2148c88d --- /dev/null +++ b/tests/test_codecome_check_codeql.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from unittest.mock import patch + +import yaml + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.config import CodeQLConfig + + +def _ensure_codecome_package(): + """Ensure 'codecome' is imported as the package (dir), not the module (.py). + + Some tests (e.g. test_codecome.py) import ``codecome.py`` as a module, + which blocks accessing ``codecome.phase_1`` as a submodule. Remove the + module from sys.modules so the package can be imported instead. + """ + if "codecome" in sys.modules and not getattr( + sys.modules["codecome"], "__path__", None + ): + del sys.modules["codecome"] + + +def _load_codecome_cli(): + spec = importlib.util.spec_from_file_location("codecome_cli_script", ROOT / "tools" / "codecome.py") + assert spec is not None + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return module + + +def _config(tmp_path: Path, *, enabled: bool = True, fail_policy: str = "soft") -> CodeQLConfig: + return CodeQLConfig( + enabled=enabled, + fail_policy=fail_policy, + abs_install_path=tmp_path / ".tools" / "codeql" / "current" / "codeql", + abs_pack_catalog=tmp_path / "templates" / "codeql-packs.yml", + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + abs_cache_dir=tmp_path / ".cache" / "codeql", + ) + + +def test_codeql_check_accepts_recorded_disabled_run(tmp_path: Path, capsys) -> None: + module = _load_codecome_cli() + config = _config(tmp_path, enabled=True) + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "skipped", + "codeql_enabled": False, + "skip_reason": "CodeQL disabled for Phase 1", + "fail_policy": "soft", + "failures": ["CodeQL disabled for Phase 1"], + } + ), + encoding="utf-8", + ) + + with patch.object(module, "ROOT", tmp_path), patch("codeql.config.resolve_config", return_value=config): + rc = module.check_codeql_status() + + out = capsys.readouterr().out + assert rc == 0 + assert "last phase-1 CodeQL state: skipped" in out + + +def test_codeql_check_fails_failed_artifacts(tmp_path: Path, capsys) -> None: + module = _load_codecome_cli() + config = _config(tmp_path, enabled=True) + config.abs_install_path.parent.mkdir(parents=True) + config.abs_install_path.write_text("", encoding="utf-8") + config.abs_pack_catalog.parent.mkdir(parents=True) + config.abs_pack_catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\nanalysis_units:\n - id: root\n path: ./src\n languages:\n - id: python\n packs:\n - official\n", + encoding="utf-8", + ) + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "failed", "codeql_enabled": True, "fail_policy": "hard", "failures": ["boom"]}), + encoding="utf-8", + ) + + with patch.object(module, "ROOT", tmp_path), patch("codeql.config.resolve_config", return_value=config): + rc = module.check_codeql_status() + + out = capsys.readouterr().out + assert rc == 1 + assert "artifacts: failed" in out + assert "boom" in out + + +def test_check_codeql_artifacts_failed_soft_policy_returns_0(tmp_path: Path, capsys) -> None: + """_check_codeql_artifacts with status=failed and soft fail_policy should return 0.""" + config = _config(tmp_path, enabled=True, fail_policy="soft") + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "failed", + "codeql_enabled": True, + "fail_policy": "soft", + "failures": ["boom"], + } + ), + encoding="utf-8", + ) + + _ensure_codecome_package() + from codecome.phase_1 import _check_codeql_artifacts as _check + import codecome.phase_1 as p1 + + saved = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch("codeql.config.resolve_config", return_value=config): + rc = _check(None) + finally: + p1.HAVE_RICH = saved + + out = capsys.readouterr().out + assert rc == 0 + assert "fail_policy is soft" in out + + +def test_check_codeql_artifacts_failed_hard_policy_returns_1(tmp_path: Path, capsys) -> None: + """_check_codeql_artifacts with status=failed and hard fail_policy should return 1.""" + config = _config(tmp_path, enabled=True, fail_policy="hard") + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "failed", + "codeql_enabled": True, + "fail_policy": "hard", + "failures": ["boom"], + } + ), + encoding="utf-8", + ) + + _ensure_codecome_package() + from codecome.phase_1 import _check_codeql_artifacts as _check + import codecome.phase_1 as p1 + + saved = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch("codeql.config.resolve_config", return_value=config): + rc = _check(None) + finally: + p1.HAVE_RICH = saved + + assert rc == 1 + + +def test_codeql_repair_needed_for_autobuild_database_failure(tmp_path: Path) -> None: + _ensure_codecome_package() + from codecome.phase_1 import _codeql_repair_needed + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "soft-failed", + "failures": ["Database create failed for c-cpp:\nNo supported build system detected."], + } + ), + encoding="utf-8", + ) + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text( + yaml.safe_dump( + { + "schema_version": 1, + "analysis_units": [ + { + "id": "native", + "path": "./src/native", + "languages": [ + {"id": "c-cpp", "build_mode": "autobuild", "packs": ["official"]} + ], + } + ], + } + ), + encoding="utf-8", + ) + + assert _codeql_repair_needed(output_dir, plan_path) is True + + +def test_codeql_repair_needed_after_manual_database_failure(tmp_path: Path) -> None: + _ensure_codecome_package() + from codecome.phase_1 import _codeql_repair_needed + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "soft-failed", + "failures": ["Database create failed for c-cpp:\nmanual build failed."], + } + ), + encoding="utf-8", + ) + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text( + yaml.safe_dump( + { + "schema_version": 1, + "analysis_units": [ + { + "id": "native", + "path": "./src/native", + "languages": [ + {"id": "c-cpp", "build_mode": "manual", "build_command": "make", "packs": ["official"]} + ], + } + ], + } + ), + encoding="utf-8", + ) + + assert _codeql_repair_needed(output_dir, plan_path) is True + + +def test_phase_1_pipeline_structure() -> None: + _ensure_codecome_package() + import codecome.phase_1 as p1 + + saved = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "count_findings_snapshot", return_value={}), \ + patch.object(p1, "_run_subphase", return_value=0) as subphase, \ + patch.object(p1, "check_phase_1a", return_value=0), \ + patch.object(p1, "check_phase_1b", return_value=0), \ + patch.object(p1, "check_phase_1c", return_value=0), \ + patch.object(p1, "_run_codeql", return_value=None) as run_codeql, \ + patch.object(p1, "_run_codeql_repair_if_needed", return_value=0), \ + patch.object(p1, "_check_codeql_artifacts", return_value=0): + rc = p1.run_phase_1(object(), None, None, object(), "http://127.0.0.1") + finally: + p1.HAVE_RICH = saved + + assert rc == 0 + assert run_codeql.call_count == 1 + assert subphase.call_count == 3 diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py index 9b8e7547..de3b9428 100644 --- a/tests/test_codecome_runner.py +++ b/tests/test_codecome_runner.py @@ -29,8 +29,10 @@ def test_consume_events_renders_and_logs(mock_args, mock_console, monkeypatch): class FakePhaseEventLoop: def __init__(self, **kwargs): pass - def run(self, render_and_log_fn): + def run(self, render_and_log_fn, record_raw_event_fn=None): event = {"type": "text", "content": "hello"} + if record_raw_event_fn is not None: + record_raw_event_fn(event) render_and_log_fn(mock_console, "1", "Recon", event) return RunResult() @@ -52,6 +54,33 @@ def fake_render(console, phase, label, event): assert rendered_events[0]["content"] == "hello" fake_transcript.write_event.assert_called_once() + +def test_run_single_attempt_uses_explicit_transcript_phase(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") + monkeypatch.setattr(runner, "send_prompt_to_session", lambda *a, **kw: None) + monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: RunResult()) + + captured = {} + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake-1a.jsonl") + + def fake_for_phase(cls, phase, finding): + captured["phase"] = phase + captured["finding"] = finding + return fake_transcript + + monkeypatch.setattr(Transcript, "for_phase", classmethod(fake_for_phase)) + + code, session_id, _res, _path = runner._run_single_attempt( + mock_args, mock_console, "do work", "model", "var", + "http://base", "auth", "dir", lambda *a: None, + transcript_phase="1a", + ) + + assert code == 0 + assert session_id == "new_session" + assert captured == {"phase": "1a", "finding": None} + def test_run_single_attempt_success(mock_args, mock_console, monkeypatch): monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") @@ -68,7 +97,7 @@ def fake_consume(*a, **kw): fake_transcript.path = Path("fake.jsonl") monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) - code, session_id, res, path = runner._run_single_attempt( + code, session_id, res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None ) @@ -95,7 +124,7 @@ def fake_consume(*a, **kw): def fake_fatal(console, title, msg): fatal_errors.append(msg) - code, session_id, res, path = runner._run_single_attempt( + code, _session_id, _res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None, emit_fatal_error_fn=fake_fatal @@ -116,7 +145,7 @@ def test_run_single_attempt_existing_session(mock_args, mock_console, monkeypatc fake_transcript.path = Path("fake.jsonl") monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) - code, session_id, res, path = runner._run_single_attempt( + code, session_id, _res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None, existing_session_id="existing_123" @@ -125,3 +154,64 @@ def test_run_single_attempt_existing_session(mock_args, mock_console, monkeypatc assert code == 0 assert session_id == "existing_123" assert len(created) == 0 + + +def test_run_single_attempt_records_prompt_timeout(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") + monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: RunResult()) + + def fake_send(*_a, **_kw): + raise TimeoutError("timed out") + + monkeypatch.setattr(runner, "send_prompt_to_session", fake_send) + + events = [] + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake.jsonl") + fake_transcript.write_event.side_effect = events.append + monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) + + fatal_errors = [] + code, session_id, _res, _path = runner._run_single_attempt( + mock_args, mock_console, "do work", "model", "var", + "http://base", "auth", "dir", lambda *a: None, + emit_fatal_error_fn=lambda _console, _title, msg: fatal_errors.append(msg), + ) + + assert code == 1 + assert session_id == "" + assert fatal_errors == ["timed out"] + event_types = [event["type"] for event in events] + assert "codecome.prompt.send_started" in event_types + assert "codecome.prompt.send_failed" in event_types + assert "codecome.attempt.failed" in event_types + failed = next(event for event in events if event["type"] == "codecome.prompt.send_failed") + assert failed["properties"]["errorType"] == "TimeoutError" + assert failed["properties"]["message"] == "timed out" + + +def test_existing_session_busy_guard_blocks_resume_prompt(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: pytest.fail("should not create session")) + monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: pytest.fail("should not consume events")) + monkeypatch.setattr(runner, "send_prompt_to_session", lambda *a, **kw: pytest.fail("should not send prompt")) + monkeypatch.setattr(runner, "get_session_status", lambda *a, **kw: "busy") + monkeypatch.setenv("CODECOME_RESUME_IDLE_TIMEOUT", "0") + + events = [] + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake.jsonl") + fake_transcript.write_event.side_effect = events.append + monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) + + code, session_id, _res, _path = runner._run_single_attempt( + mock_args, mock_console, "resume", "model", "var", + "http://base", "auth", "dir", lambda *a: None, + existing_session_id="existing_123", + emit_fatal_error_fn=lambda *_a: None, + ) + + assert code == 1 + assert session_id == "existing_123" + event_types = [event["type"] for event in events] + assert "codecome.resume.blocked_busy" in event_types + assert "codecome.prompt.send_started" not in event_types diff --git a/tests/test_codeql_artifacts.py b/tests/test_codeql_artifacts.py new file mode 100644 index 00000000..2a35b6d7 --- /dev/null +++ b/tests/test_codeql_artifacts.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +import yaml + +from codeql.artifacts import check_artifacts + + +def _write_manifest(output_dir: Path, manifest: dict) -> None: + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump(manifest, sort_keys=False), encoding="utf-8" + ) + + +def test_missing_manifest(tmp_path: Path) -> None: + status, warnings = check_artifacts(tmp_path / "nonexistent") + assert status == "missing" + assert len(warnings) == 1 + assert "not found" in warnings[0] + + +def test_completed_all_present(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "completed", "failures": []}) + normalized = out / "normalized" + normalized.mkdir() + (normalized / "alerts.yml").write_text("alerts: []\n") + (normalized / "file-signals.yml").write_text("files: []\n") + + status, warnings = check_artifacts(out) + assert status == "completed" + assert warnings == [] + + +def test_completed_missing_normalized(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "completed", "languages": ["python"], "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "completed" + assert len(warnings) == 2 + assert any("alerts.yml" in w for w in warnings) + assert any("file-signals.yml" in w for w in warnings) + + +def test_skipped(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "skipped", "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "skipped" + assert warnings == [] + + +def test_soft_failed_with_failures(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "soft-failed", "failures": ["db create timed out"]}) + + status, warnings = check_artifacts(out) + assert status == "soft-failed" + assert "db create timed out" in warnings + + +def test_failed(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "failed", "failures": ["binary not found"]}) + + status, warnings = check_artifacts(out) + assert status == "failed" + assert "binary not found" in warnings + + +def test_invalid_status(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "bogus", "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "unknown" + assert any("bogus" in w for w in warnings) + + +def test_completed_empty_languages_skips_normalized_check(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "completed", "languages": [], "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "completed" + assert warnings == [] diff --git a/tests/test_codeql_config.py b/tests/test_codeql_config.py new file mode 100644 index 00000000..433f2cef --- /dev/null +++ b/tests/test_codeql_config.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql import config as config_module + + +def test_load_codecome_yml_reads_audit_static_analysis(tmp_path: Path, monkeypatch) -> None: + config_path = tmp_path / "codecome.yml" + config_path.write_text( + "audit:\n static_analysis:\n codeql:\n candidate_mode: audit\n", + encoding="utf-8", + ) + monkeypatch.setattr(config_module, "ROOT", tmp_path) + + data = config_module._load_codecome_yml() + assert data == {"candidate_mode": "audit"} + + +def test_load_codecome_yml_ignores_top_level_static_analysis(tmp_path: Path, monkeypatch) -> None: + config_path = tmp_path / "codecome.yml" + config_path.write_text( + "static_analysis:\n codeql:\n candidate_mode: top-level\n", + encoding="utf-8", + ) + monkeypatch.setattr(config_module, "ROOT", tmp_path) + + data = config_module._load_codecome_yml() + assert data is None + + +def test_load_codecome_yml_returns_none_for_invalid_yaml(tmp_path: Path, monkeypatch) -> None: + config_path = tmp_path / "codecome.yml" + config_path.write_text("audit:\n static_analysis: [\n", encoding="utf-8") + monkeypatch.setattr(config_module, "ROOT", tmp_path) + + data = config_module._load_codecome_yml() + assert data is None + + +def test_resolve_config_falls_back_on_invalid_max_candidates(monkeypatch) -> None: + monkeypatch.delenv("CODEQL", raising=False) + monkeypatch.delenv("CODEQL_SKIP", raising=False) + monkeypatch.setenv("CODEQL_MAX_CANDIDATES", "not-a-number") + + config = config_module.resolve_config() + assert config.max_candidates == config_module.DEFAULTS["max_candidates"] diff --git a/tests/test_codeql_import_risk.py b/tests/test_codeql_import_risk.py new file mode 100644 index 00000000..4e42b48b --- /dev/null +++ b/tests/test_codeql_import_risk.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.import_risk import import_risk + + +def _write_yaml(path: Path, data: dict) -> None: + import yaml + path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8") + + +def test_import_risk_no_signals_file(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + risk_path.write_text("files: []\n") + status, warnings = import_risk(tmp_path / "missing.yml", risk_path) + assert status is None + assert any("not found" in w for w in warnings) + + +def test_import_risk_no_risk_index(tmp_path: Path) -> None: + signals_path = tmp_path / "signals.yml" + _write_yaml(signals_path, {"files": []}) + status, warnings = import_risk(signals_path, tmp_path / "missing.yml") + assert status == "skipped" + assert any("not found" in w for w in warnings) + + +def test_import_risk_adds_new_entry(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + _write_yaml( + risk_path, + { + "schema_version": 1, + "files": [{"path": "src/existing.py", "score": 3, "reasons": ["old"]}], + }, + ) + + signals_path = tmp_path / "signals.yml" + _write_yaml( + signals_path, + { + "schema_version": 1, + "files": [ + { + "path": "src/new.py", + "codeql_score_boost": 2, + "alerts": {"total": 2, "path_problems": 1, "high_precision": 1}, + "rules": ["py/injection"], + } + ], + }, + ) + + status, warnings = import_risk(signals_path, risk_path) + assert status is None + assert len(warnings) == 0 + + import yaml + risk = yaml.safe_load(risk_path.read_text()) + files = risk["files"] + assert len(files) == 2 + new_entry = [f for f in files if f["path"] == "src/new.py"][0] + assert new_entry["score"] == 2 + assert new_entry["external_signals"]["codeql"]["alerts"] == 2 + assert new_entry["external_signals"]["codeql"]["rules"] == ["py/injection"] + + +def test_import_risk_updates_existing_entry(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + _write_yaml( + risk_path, + { + "schema_version": 1, + "files": [ + { + "path": "src/upload.py", + "score": 3, + "reasons": ["manual review"], + } + ], + }, + ) + + signals_path = tmp_path / "signals.yml" + _write_yaml( + signals_path, + { + "schema_version": 1, + "files": [ + { + "path": "src/upload.py", + "codeql_score_boost": 2, + "alerts": {"total": 3, "path_problems": 2, "high_precision": 1}, + "rules": ["py/path-injection", "py/xss"], + } + ], + }, + ) + + status, _ = import_risk(signals_path, risk_path) + assert status is None + + import yaml + risk = yaml.safe_load(risk_path.read_text()) + files = risk["files"] + assert len(files) == 1 + entry = files[0] + assert entry["score"] == 5 # capped at 5 + assert "manual review" in entry["reasons"] + assert entry["external_signals"]["codeql"]["alerts"] == 3 + assert entry["external_signals"]["codeql"]["rules"] == ["py/path-injection", "py/xss"] + + +def test_import_risk_caps_score(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + _write_yaml( + risk_path, + { + "schema_version": 1, + "files": [{"path": "src/x.py", "score": 4, "reasons": []}], + }, + ) + + signals_path = tmp_path / "signals.yml" + _write_yaml( + signals_path, + { + "schema_version": 1, + "files": [ + {"path": "src/x.py", "codeql_score_boost": 5, "alerts": {}} + ], + }, + ) + + status, _ = import_risk(signals_path, risk_path) + assert status is None + + import yaml + risk = yaml.safe_load(risk_path.read_text()) + assert risk["files"][0]["score"] == 5 diff --git a/tests/test_codeql_install.py b/tests/test_codeql_install.py new file mode 100644 index 00000000..97222692 --- /dev/null +++ b/tests/test_codeql_install.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import sys +import zipfile +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.install import _codeql_binary, _extract + + +def test_extract_strips_leading_codeql_prefix(tmp_path: Path) -> None: + zip_path = tmp_path / "codeql-test.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("codeql/", "") + zf.writestr("codeql/codeql", "#!/bin/sh\necho codeql\n") + zf.writestr("codeql/codeql.cmd", "@echo off\r\n") + zf.writestr("codeql/cpp/extractor.txt", "cpp") + zf.writestr("codeql/LICENSE.md", "license") + + dest_dir = tmp_path / "install" + _extract(zip_path, dest_dir) + + assert (dest_dir / "codeql").is_file() + assert (dest_dir / "codeql.cmd").is_file() + assert (dest_dir / "cpp" / "extractor.txt").read_text(encoding="utf-8") == "cpp" + assert (dest_dir / "LICENSE.md").read_text(encoding="utf-8") == "license" + assert not (dest_dir / "codeql" / "codeql").exists() + assert _codeql_binary(dest_dir) == dest_dir / "codeql" + + +def test_extract_rejects_path_traversal(tmp_path: Path) -> None: + zip_path = tmp_path / "codeql-traversal.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("codeql/../../escape.txt", "boom") + + dest_dir = tmp_path / "install" + try: + _extract(zip_path, dest_dir) + except RuntimeError as exc: + assert "outside target dir" in str(exc) + else: + raise AssertionError("expected traversal-protection RuntimeError") + + +def test_codeql_binary_supports_legacy_nested_layout(tmp_path: Path) -> None: + legacy = tmp_path / "legacy" / "codeql" + legacy.mkdir(parents=True) + binary = legacy / "codeql" + binary.write_text("#!/bin/sh\n", encoding="utf-8") + + assert _codeql_binary(tmp_path / "legacy") == binary diff --git a/tests/test_codeql_normalize.py b/tests/test_codeql_normalize.py new file mode 100644 index 00000000..6f49b4c4 --- /dev/null +++ b/tests/test_codeql_normalize.py @@ -0,0 +1,325 @@ +from __future__ import annotations + +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.normalize import ( + _build_file_signals, + _extract_flow, + _extract_location, + _map_category, + _normalize_severity, + normalize_all, +) + + +def _minimal_sarif(results: list[dict]) -> dict: + return { + "version": "2.1.0", + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": "CodeQL", + "rules": [ + { + "id": "py/path-injection", + "name": "Uncontrolled data used in path expression", + "properties": { + "precision": "high", + "security-severity": "7.5", + "problem.severity": "error", + }, + } + ], + } + }, + "results": results, + } + ], + } + + +def _simple_result( + rule_id: str, + uri: str, + line: int = 42, + kind: str | None = "path-problem", + severity: str = "warning", + fingerprint: str = "abc123", +) -> dict: + return { + "ruleId": rule_id, + "ruleIndex": 0, + "kind": kind, + "level": severity, + "message": {"text": "Test message"}, + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": uri}, + "region": {"startLine": line, "endLine": line}, + } + } + ], + "partialFingerprints": {"primaryLocationLineHash": fingerprint}, + } + + +def test_normalize_all_empty_sarif_dir(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + out_dir = tmp_path / "normalized" + + resolved = {"analysis_units": []} + alerts_path, signals_path = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + assert alerts_path.is_file() + assert signals_path.is_file() + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert alerts["alerts"] == [] + + +def test_normalize_one_sarif(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + sarif_file = sarif_dir / "api.python.official.sarif" + sarif_file.write_text( + json.dumps( + _minimal_sarif( + [ + _simple_result("py/path-injection", "src/upload.py", 88), + ] + ) + ), + encoding="utf-8", + ) + out_dir = tmp_path / "normalized" + + resolved = {"analysis_units": [{"id": "api", "languages": [{"id": "python", "profiles": ["official"]}]}]} + alerts_path, signals_path = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert len(alerts["alerts"]) == 1 + a = alerts["alerts"][0] + assert a["id"] == "CQ-0001" + assert a["analysis_unit_id"] == "api" + assert a["language"] == "python" + assert a["pack_profile"] == "official" + assert a["rule_id"] == "py/path-injection" + assert a["primary_location"]["path"] == "src/upload.py" + assert a["primary_location"]["start_line"] == 88 + assert a["mapped"]["category"] == "Path traversal" + + signals = yaml.safe_load(signals_path.read_text()) + assert len(signals["files"]) == 1 + assert signals["files"][0]["path"] == "src/upload.py" + assert signals["files"][0]["rules"] == ["py/path-injection"] + + +def test_normalize_ignores_non_matching_filenames(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + (sarif_dir / "not-a-match.json").write_text("{}") + (sarif_dir / "single.sarif").write_text(json.dumps(_minimal_sarif([]))) + out_dir = tmp_path / "normalized" + + resolved = {"analysis_units": []} + alerts_path, _ = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert alerts["alerts"] == [] + + +def test_normalize_handles_invalid_json(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + (sarif_dir / "api.python.bad.sarif").write_text("not json", encoding="utf-8") + out_dir = tmp_path / "normalized" + + resolved = {"analysis_units": []} + alerts_path, _ = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert alerts["alerts"] == [] + + +def test_extract_location() -> None: + result = { + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": "src/x.py"}, + "region": {"startLine": 42, "endLine": 44}, + } + } + ] + } + loc = _extract_location(result) + assert loc is not None + assert loc["path"] == "src/x.py" + assert loc["start_line"] == 42 + assert loc["end_line"] == 44 + + +def test_extract_location_empty() -> None: + assert _extract_location({"locations": []}) is None + assert _extract_location({}) is None + + +def test_extract_flow_with_code_flows() -> None: + result = { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/a.py"}, + "region": {"startLine": 10}, + } + }, + "message": {"text": "source"}, + }, + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/b.py"}, + "region": {"startLine": 20}, + } + }, + "message": {"text": "mid"}, + }, + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/c.py"}, + "region": {"startLine": 30}, + } + }, + "message": {"text": "sink"}, + }, + ] + } + ] + } + ] + } + flow = _extract_flow(result, Path(".")) + assert flow is not None + assert flow["source"]["path"] == "src/a.py" + assert flow["source"]["line"] == 10 + assert flow["sink"]["path"] == "src/c.py" + assert flow["sink"]["line"] == 30 + assert len(flow["steps"]) == 1 + assert flow["steps"][0]["path"] == "src/b.py" + + +def test_extract_flow_single_step_no_steps() -> None: + """Two-location flow yields source+sink but no intermediate steps.""" + result = { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/x.py"}, + "region": {"startLine": 1}, + } + }, + "message": {"text": "s"}, + }, + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/x.py"}, + "region": {"startLine": 99}, + } + }, + "message": {"text": "k"}, + }, + ] + } + ] + } + ] + } + flow = _extract_flow(result, Path(".")) + assert flow is not None + assert flow["steps"] == [] + + +def test_extract_flow_no_code_flows() -> None: + assert _extract_flow({}, Path(".")) is None + + +def test_build_file_signals() -> None: + alerts = [ + { + "id": "CQ-0001", + "rule_id": "py/injection", + "kind": "path-problem", + "precision": "high", + "primary_location": {"path": "src/a.py", "start_line": 10, "end_line": 10}, + }, + { + "id": "CQ-0002", + "rule_id": "py/injection", + "kind": "path-problem", + "precision": "high", + "primary_location": {"path": "src/a.py", "start_line": 20, "end_line": 20}, + }, + { + "id": "CQ-0003", + "rule_id": "py/xss", + "kind": "problem", + "precision": "medium", + "primary_location": {"path": "src/b.py", "start_line": 5, "end_line": 5}, + }, + ] + signals = _build_file_signals(alerts) + assert len(signals) == 2 + a = [s for s in signals if s["path"] == "src/a.py"][0] + assert a["alerts"]["total"] == 2 + assert a["alerts"]["path_problems"] == 2 + assert a["alerts"]["high_precision"] == 2 + assert a["suggested_sweep"] is True + assert a["codeql_score_boost"] >= 4 + + +def test_map_category() -> None: + assert _map_category("py/path-injection", {}) == "Path traversal" + assert _map_category("java/sql-injection", {}) == "SQL injection" + assert _map_category("js/nosql-injection", {}) == "NoSQL injection" + assert _map_category("js/xss", {}) == "Cross-site scripting" + assert _map_category("unknown-rule", {}) == "unknown-rule" + + +def test_normalize_severity() -> None: + assert _normalize_severity("error") == "error" + assert _normalize_severity("warning") == "warning" + assert _normalize_severity("note") == "note" + assert _normalize_severity("none") == "info" + assert _normalize_severity("unknown") == "warning" diff --git a/tests/test_codeql_packs.py b/tests/test_codeql_packs.py new file mode 100644 index 00000000..c2b0a78a --- /dev/null +++ b/tests/test_codeql_packs.py @@ -0,0 +1,300 @@ +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.packs import PackResolverError, load_codeql_plan, load_pack_catalog, resolve_pack_profiles, resolve_plan_packs, _resolve_profile_packs + + +def _write_catalog(path: Path) -> None: + path.write_text( + ( + "schema_version: 1\n" + "packs:\n" + " python:\n" + " official:\n" + " - codeql/python-queries\n" + " github-security-lab:\n" + " - githubsecuritylab/codeql-python-queries\n" + " local:\n" + " - ./queries/codeql/python\n" + " c-cpp:\n" + " official:\n" + " - codeql/cpp-queries\n" + " trailofbits:\n" + " - trailofbits/cpp-queries\n" + " coding-standards:\n" + " - codeql/coding-standards-cpp\n" + "candidate_policy:\n" + " official:\n" + " allow_precreate: true\n" + " coding-standards:\n" + " allow_precreate: false\n" + ), + encoding="utf-8", + ) + + +def _write_plan(path: Path) -> None: + path.write_text( + ( + "schema_version: 1\n" + "analysis_units:\n" + " - id: root\n" + " path: ./src\n" + " languages:\n" + " - id: python\n" + " packs:\n" + " - official\n" + " - github-security-lab\n" + " - id: c-cpp\n" + " packs:\n" + " - official\n" + " - coding-standards\n" + ), + encoding="utf-8", + ) + + +def test_load_pack_catalog_validates_schema(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + + catalog = load_pack_catalog(catalog_path) + assert catalog["schema_version"] == 1 + assert catalog["packs"]["python"]["official"] == ["codeql/python-queries"] + + +def test_resolve_pack_profiles_preserves_order_and_dedupes(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + catalog["packs"]["python"]["dup"] = ["codeql/python-queries"] + + resolved = resolve_pack_profiles("python", ["official", "dup", "github-security-lab"], catalog) + assert resolved == ["codeql/python-queries", "githubsecuritylab/codeql-python-queries"] + + +def test_resolve_pack_profiles_rejects_unknown_language(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + try: + resolve_pack_profiles("ruby", ["official"], catalog) + except PackResolverError as exc: + assert "Unsupported CodeQL language id" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_resolve_pack_profiles_rejects_unknown_profile(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + try: + resolve_pack_profiles("python", ["trailofbits"], catalog) + except PackResolverError as exc: + assert "Unknown CodeQL pack profile" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_resolve_plan_packs_includes_profile_packs(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + plan_path = tmp_path / "plan.yml" + _write_catalog(catalog_path) + _write_plan(plan_path) + + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + resolved = resolve_plan_packs(plan, catalog) + + languages = resolved["analysis_units"][0]["languages"] + assert languages[0]["packs"] == [ + "codeql/python-queries", + "githubsecuritylab/codeql-python-queries", + ] + # profile_packs maps each profile to its individual packs (no dedup across profiles) + assert languages[0]["profile_packs"] == { + "official": ["codeql/python-queries"], + "github-security-lab": ["githubsecuritylab/codeql-python-queries"], + } + assert languages[1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False + + +def test_resolve_profile_packs_rejects_unknown_profile() -> None: + catalog = { + "schema_version": 1, + "packs": { + "python": { + "official": ["codeql/python-queries"], + } + }, + } + try: + _resolve_profile_packs("python", ["trailofbits"], catalog) + except PackResolverError as exc: + assert "Unknown CodeQL pack profile" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_resolve_profile_packs_rejects_unknown_language() -> None: + catalog = { + "schema_version": 1, + "packs": {}, + } + try: + _resolve_profile_packs("ruby", ["official"], catalog) + except PackResolverError as exc: + assert "Unsupported CodeQL language id" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_resolve_plan_packs_candidate_policy(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + plan_path = tmp_path / "plan.yml" + _write_catalog(catalog_path) + _write_plan(plan_path) + + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + resolved = resolve_plan_packs(plan, catalog) + + languages = resolved["analysis_units"][0]["languages"] + assert languages[0]["packs"] == [ + "codeql/python-queries", + "githubsecuritylab/codeql-python-queries", + ] + assert languages[1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False + + +def test_load_codeql_plan_rejects_invalid_language_entry(tmp_path: Path) -> None: + plan_path = tmp_path / "bad-plan.yml" + plan_path.write_text("analysis_units:\n - nope\n", encoding="utf-8") + + try: + load_codeql_plan(plan_path) + except PackResolverError as exc: + assert "non-mapping analysis unit" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_load_codeql_plan_allows_non_recommended_unit_without_languages(tmp_path: Path) -> None: + plan_path = tmp_path / "plan.yml" + plan_path.write_text( + ( + "schema_version: 1\n" + "analysis_units:\n" + " - id: api\n" + " path: ./src/api\n" + " languages:\n" + " - id: python\n" + " packs:\n" + " - official\n" + " - id: gilroy\n" + " path: ./src/gilroy\n" + " recommended: false\n" + ), + encoding="utf-8", + ) + + plan = load_codeql_plan(plan_path) + + assert plan["analysis_units"][1]["id"] == "gilroy" + + +def test_resolve_plan_packs_skip_unsupported(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + plan = { + "schema_version": 1, + "analysis_units": [ + { + "id": "gilroy", + "path": "./src", + "languages": [ + {"id": "elixir", "packs": ["official"]}, + {"id": "python", "packs": ["official"]}, + ], + }, + ], + } + + resolved = resolve_plan_packs(plan, catalog, skip_unsupported=True) + + languages = resolved["analysis_units"][0]["languages"] + assert len(languages) == 1 + assert languages[0]["id"] == "python" + warnings = resolved.get("warnings", []) + assert len(warnings) == 1 + assert "elixir" in warnings[0] + + +def test_resolve_plan_packs_skip_unsupported_raises_by_default(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + plan = { + "schema_version": 1, + "analysis_units": [ + { + "id": "gilroy", + "path": "./src", + "languages": [ + {"id": "elixir", "packs": ["official"]}, + ], + }, + ], + } + + try: + resolve_plan_packs(plan, catalog) + except PackResolverError as exc: + assert "Unsupported CodeQL language id" in str(exc) + else: + raise AssertionError("expected PackResolverError with skip_unsupported=False") + + +def test_resolve_plan_packs_skips_non_recommended_units(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + plan = { + "schema_version": 1, + "analysis_units": [ + { + "id": "api", + "path": "./src/api", + "languages": [ + {"id": "python", "packs": ["official"]}, + ], + }, + { + "id": "gilroy", + "path": "./src/gilroy", + "recommended": False, + }, + ], + } + + resolved = resolve_plan_packs(plan, catalog, skip_unsupported=True) + + assert [unit["id"] for unit in resolved["analysis_units"]] == ["api"] + warnings = resolved.get("warnings", []) + assert len(warnings) == 1 + assert "gilroy" in warnings[0] + assert "recommended=false" in warnings[0] diff --git a/tests/test_codeql_pipeline.py b/tests/test_codeql_pipeline.py new file mode 100644 index 00000000..a736b8d2 --- /dev/null +++ b/tests/test_codeql_pipeline.py @@ -0,0 +1,254 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +import yaml + +from codeql.config import CodeQLConfig +from codeql.pipeline import record_skipped_run + + +def _make_config(tmp_path: Path) -> CodeQLConfig: + """Create a minimal CodeQLConfig pointing at tmp_path.""" + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True, exist_ok=True) + return CodeQLConfig( + enabled=True, + phase_1_enabled=True, + install_path=".tools/codeql/current/codeql", + pack_catalog="codeql-pack-catalog.yml", + output_dir="itemdb/codeql", + database_dir="itemdb/codeql/databases", + fail_policy="soft", + abs_output_dir=output_dir, + abs_install_path=tmp_path / ".tools" / "codeql" / "current" / "codeql", + abs_pack_catalog=tmp_path / "codeql-pack-catalog.yml", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + abs_cache_dir=tmp_path / ".cache" / "codeql", + ) + + +def test_pipeline_skipped_no_plan(tmp_path: Path) -> None: + """When run_codeql returns skipped, pipeline returns manifest without calling normalize.""" + config = _make_config(tmp_path) + + skipped_manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "skipped", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:00:01Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": [], + "warnings": [], + "failures": ["codeql-plan.yml not found"], + } + + with patch("codeql.runner.run_codeql", return_value=skipped_manifest) as mock_run, \ + patch("codeql.normalize.normalize_all") as mock_normalize, \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "skipped" + mock_run.assert_called_once_with(config, progress=None) + mock_normalize.assert_not_called() + + +def test_pipeline_emits_progress(tmp_path: Path) -> None: + config = _make_config(tmp_path) + messages: list[str] = [] + + manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "skipped", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:00:01Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "analysis_units": [], + "languages": [], + "warnings": [], + "failures": ["codeql-plan.yml not found"], + } + + with patch("codeql.runner.run_codeql", return_value=manifest) as mock_run, \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config, progress=messages.append) + + assert result["status"] == "skipped" + mock_run.assert_called_once() + assert mock_run.call_args.args == (config,) + assert mock_run.call_args.kwargs["progress"] is not None + assert "CodeQL: manifest written" in messages + assert "CodeQL: summary written" in messages + + +def test_pipeline_completed_writes_manifest(tmp_path: Path) -> None: + """When run_codeql returns completed, manifest file is written.""" + config = _make_config(tmp_path) + + completed_manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "completed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:01:00Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": ["python"], + "warnings": [], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=completed_manifest), \ + patch("codeql.normalize.normalize_all") as mock_normalize, \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "completed" + manifest_path = config.abs_output_dir / "run-manifest.yml" + assert manifest_path.is_file() + data = yaml.safe_load(manifest_path.read_text()) + assert data["status"] == "completed" + + +def test_pipeline_soft_failed_continues(tmp_path: Path) -> None: + """When run_codeql returns soft-failed, pipeline returns without raising.""" + config = _make_config(tmp_path) + + soft_failed_manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "soft-failed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:00:30Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": ["python"], + "warnings": ["analyze timed out"], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=soft_failed_manifest), \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "soft-failed" + # Should not raise + + +def test_pipeline_normalize_failure_marks_failed_for_hard_policy(tmp_path: Path) -> None: + config = _make_config(tmp_path) + config.fail_policy = "hard" + (config.abs_output_dir / "selected-query-packs.yml").write_text( + "schema_version: 1\nanalysis_units: []\n", + encoding="utf-8", + ) + sarif_dir = config.abs_output_dir / "sarif" + sarif_dir.mkdir(parents=True) + (sarif_dir / "root.python.official.sarif").write_text("{}", encoding="utf-8") + + manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "completed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:01:00Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "hard", + "languages": ["root:python"], + "warnings": [], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=manifest), \ + patch("codeql.normalize.normalize_all", side_effect=RuntimeError("bad sarif")), \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "failed" + assert "SARIF normalization failed: bad sarif" in result["warnings"] + data = yaml.safe_load((config.abs_output_dir / "run-manifest.yml").read_text()) + assert data["status"] == "failed" + + +def test_pipeline_normalize_failure_marks_soft_failed_for_soft_policy(tmp_path: Path) -> None: + config = _make_config(tmp_path) + (config.abs_output_dir / "selected-query-packs.yml").write_text( + "schema_version: 1\nanalysis_units: []\n", + encoding="utf-8", + ) + sarif_dir = config.abs_output_dir / "sarif" + sarif_dir.mkdir(parents=True) + (sarif_dir / "root.python.official.sarif").write_text("{}", encoding="utf-8") + + manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "completed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:01:00Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": ["root:python"], + "warnings": [], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=manifest), \ + patch("codeql.normalize.normalize_all", side_effect=RuntimeError("bad sarif")), \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "soft-failed" + assert "SARIF normalization failed: bad sarif" in result["warnings"] + + +def test_record_skipped_run_writes_manifest_and_summary(tmp_path: Path) -> None: + config = _make_config(tmp_path) + config.enabled = False + + manifest = record_skipped_run(config, "CodeQL disabled for Phase 1") + + assert manifest["status"] == "skipped" + assert manifest["codeql_enabled"] is False + assert manifest["skip_reason"] == "CodeQL disabled for Phase 1" + assert (config.abs_output_dir / "run-manifest.yml").is_file() + assert (config.abs_output_dir / "codeql-summary.md").is_file() diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py new file mode 100644 index 00000000..6e6de134 --- /dev/null +++ b/tests/test_codeql_runner.py @@ -0,0 +1,688 @@ +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.config import CodeQLConfig +from codeql.runner import ( + _create_database, + _ensure_query_packs_available, + _lookup_build, + _lookup_timeout, + _manifest, + _run_analyze, + run_codeql, + write_manifest, +) + + +def test_manifest_completed() -> None: + config = CodeQLConfig(enabled=True, fail_policy="soft") + result = _manifest( + "completed", + "2025-01-01T00:00:00Z", + config, + ["2.20.0"], + [], + languages=["python"], + ) + assert result["schema_version"] == 1 + assert result["status"] == "completed" + assert result["codeql_version"] == "2.20.0" + assert result["languages"] == ["python"] + assert result["failures"] == [] + assert result["warnings"] == [] + assert "finished_at" in result + assert "started_at" in result + + +def test_manifest_failed_with_failures() -> None: + config = CodeQLConfig(enabled=True, fail_policy="hard") + result = _manifest( + "failed", + "2025-01-01T00:00:00Z", + config, + ["2.20.0"], + ["warn1"], + failures=["fail1", "fail2"], + ) + assert result["status"] == "failed" + assert result["fail_policy"] == "hard" + assert result["failures"] == ["fail1", "fail2"] + assert result["warnings"] == ["warn1"] + + +def test_manifest_skipped_with_failures() -> None: + config = CodeQLConfig(enabled=False) + result = _manifest( + "skipped", + "2025-01-01T00:00:00Z", + config, + [], + [], + failures=["no plan"], + ) + assert result["status"] == "skipped" + assert result["codeql_enabled"] is False + assert result["failures"] == ["no plan"] + + +def test_manifest_defaults() -> None: + config = CodeQLConfig(enabled=True) + result = _manifest( + "completed", + "2025-01-01T00:00:00Z", + config, + [], + [], + ) + assert result["languages"] == [] + assert result["failures"] == [] + assert result["warnings"] == [] + + +def test_write_manifest(tmp_path: Path) -> None: + config = CodeQLConfig(enabled=True) + manifest = _manifest( + "completed", + "2025-01-01T00:00:00Z", + config, + ["2.21.0"], + [], + languages=["python", "c-cpp"], + ) + out_dir = tmp_path / "codeql" + path = write_manifest(manifest, out_dir) + assert path == out_dir / "run-manifest.yml" + assert path.is_file() + + import yaml + data = yaml.safe_load(path.read_text()) + assert data["status"] == "completed" + assert data["languages"] == ["python", "c-cpp"] + + +def test_lookup_build_match() -> None: + languages = [ + {"id": "python", "build_mode": "none", "build_command": None}, + {"id": "c-cpp", "build_mode": "manual", "build_command": "make -C src"}, + ] + mode, cmd = _lookup_build("c-cpp", languages) + assert mode == "manual" + assert cmd == "make -C src" + + +def test_lookup_build_fallback() -> None: + languages: list = [] + mode, cmd = _lookup_build("python", languages) + assert mode == "none" + assert cmd is None + + +def test_lookup_build_no_match_within_plan() -> None: + languages = [{"id": "go", "build_mode": "autobuild"}] + mode, cmd = _lookup_build("python", languages) + assert mode == "none" + assert cmd is None + + +def test_create_database_creates_parent_dir(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "c-cpp" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: + ok, msg = _create_database( + tmp_path / "codeql", + "c-cpp", + "./src", + db_dir, + "none", + None, + [], + ) + + assert ok is True + assert msg == "" + assert db_dir.parent.is_dir() + assert mock_popen.call_args.args[0][3] == str(db_dir) + assert "--build-mode=none" in mock_popen.call_args.args[0] + assert mock_popen.call_args.kwargs["stdout"] == subprocess.DEVNULL + + +def test_create_database_uses_workspace_common_cache(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "python" + cache_dir = tmp_path / ".cache" / "codeql" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: + ok, msg = _create_database( + tmp_path / "codeql", + "python", + "./src", + db_dir, + "none", + None, + [], + cache_dir, + ) + + assert ok is True + assert msg == "" + assert f"--common-caches={cache_dir}" in mock_popen.call_args.args[0] + assert cache_dir.is_dir() + + +def test_run_analyze_uses_workspace_common_cache(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "root" / "python" + sarif_path = tmp_path / "itemdb" / "codeql" / "sarif" / "root.python.official.sarif" + cache_dir = tmp_path / ".cache" / "codeql" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: + ok, msg = _run_analyze( + tmp_path / "codeql", + db_dir, + ["codeql/python-queries"], + sarif_path, + cache_dir, + ) + + assert ok is True + assert msg == "" + cmd = mock_popen.call_args.args[0] + assert f"--common-caches={cache_dir}" in cmd + assert cmd[-1] == "codeql/python-queries" + assert cache_dir.is_dir() + + +def test_query_pack_resolution_uses_workspace_common_cache(tmp_path: Path) -> None: + binary = tmp_path / "codeql" + cache_dir = tmp_path / ".cache" / "codeql" + config = CodeQLConfig(enabled=True, fail_policy="soft", abs_cache_dir=cache_dir) + commands: list[list[str]] = [] + + def fake_run_quiet(cmd, timeout): + commands.append(cmd) + return True, "" + + with patch("codeql.runner._run_quiet", side_effect=fake_run_quiet): + ok, msg = _ensure_query_packs_available(binary, ["codeql/python-queries"], "official", config) + + assert ok is True + assert msg == "" + assert commands == [[ + str(binary), + "resolve", + "queries", + "--format=json", + f"--common-caches={cache_dir}", + "--", + "codeql/python-queries", + ]] + assert cache_dir.is_dir() + + +def test_query_pack_download_uses_workspace_common_cache(tmp_path: Path) -> None: + binary = tmp_path / "codeql" + cache_dir = tmp_path / ".cache" / "codeql" + config = CodeQLConfig(enabled=True, fail_policy="soft", abs_cache_dir=cache_dir) + commands: list[list[str]] = [] + + def fake_run_quiet(cmd, timeout): + commands.append(cmd) + return (False, "pack missing") if len(commands) == 1 else (True, "") + + with patch("codeql.runner._run_quiet", side_effect=fake_run_quiet): + ok, msg = _ensure_query_packs_available(binary, ["codeql/python-queries"], "official", config) + + assert ok is True + assert msg == "" + assert commands[1] == [ + str(binary), + "pack", + "download", + f"--common-caches={cache_dir}", + "--", + "codeql/python-queries", + ] + assert commands[2] == commands[0] + + +def test_create_database_manual_build_mode_and_command(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "root" / "c-cpp" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: + ok, msg = _create_database( + tmp_path / "codeql", + "c-cpp", + "./src/native", + db_dir, + "manual", + "make -C src/native", + [], + ) + + assert ok is True + assert msg == "" + cmd = mock_popen.call_args.args[0] + assert "--build-mode=manual" in cmd + assert "-c" in cmd + assert "make -C src/native" in cmd + + +def test_run_codeql_database_failure_honors_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["official"], + "profile_packs": {"official": ["codeql/cpp-queries"]}, + } + ], + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "build_command": None}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(False, "db create failed")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert manifest["fail_policy"] == "soft" + assert manifest["failures"] == ["db create failed"] + assert manifest["analysis_units"] == ["root"] + assert manifest["languages"] == ["root:c-cpp"] + + +def test_lookup_timeout_plan_takes_priority() -> None: + languages = [ + {"id": "c-cpp", "db_create_timeout": 1800, "analyze_timeout": 900}, + ] + assert _lookup_timeout("db_create_timeout", "c-cpp", languages, 600) == 1800 + assert _lookup_timeout("analyze_timeout", "c-cpp", languages, 600) == 900 + + +def test_lookup_timeout_falls_back_to_default() -> None: + languages = [{"id": "c-cpp"}] + assert _lookup_timeout("db_create_timeout", "c-cpp", languages, 600) == 600 + assert _lookup_timeout("analyze_timeout", "c-cpp", [], 600) == 600 + + +def test_create_database_streams_stderr_to_progress(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "n" / "c-cpp" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = ["extracting file\n", "compiling done\n"] + + messages: list[str] = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process): + ok, msg = _create_database( + tmp_path / "codeql", "c-cpp", "./src", db_dir, + "none", None, [], progress=messages.append, + ) + + assert ok is True + assert "CodeQL: extracting file" in messages + assert "CodeQL: compiling done" in messages + + +def test_run_codeql_empty_languages_returns_skipped(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"): + manifest = run_codeql(config) + + assert manifest["status"] == "skipped" + assert manifest["languages"] == [] + assert any("No languages resolved" in f for f in manifest["failures"]) + + +def test_run_codeql_pack_resolver_error_soft_policy(tmp_path: Path) -> None: + from codeql.packs import PackResolverError + + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", side_effect=PackResolverError("boom")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert manifest["fail_policy"] == "soft" + + +def test_run_codeql_skips_unsupported_languages_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + resolved = { + "warnings": ["Skipping unsupported CodeQL language 'elixir' in analysis unit 'gilroy'"], + "analysis_units": [], + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={"packs": {"python": {"official": ["codeql/python-queries"]}}}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "gilroy", "path": "./src", "languages": [{"id": "elixir", "packs": ["official"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved): + manifest = run_codeql(config) + + assert manifest["status"] == "skipped" + assert "elixir" in manifest["warnings"][0] + + +def test_run_codeql_downloads_and_skips_unavailable_optional_profile_under_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n official:\n - codeql/cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["official", "github-security-lab"], + "profile_packs": { + "official": ["codeql/cpp-queries"], + "github-security-lab": ["githubsecuritylab/codeql-cpp-queries"], + }, + } + ], + } + ] + } + + def fake_run_quiet(cmd, timeout): + joined = " ".join(cmd) + if "githubsecuritylab/codeql-cpp-queries" in joined: + return False, "pack missing" + return True, "" + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["official", "github-security-lab"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze", return_value=(True, "")) as analyze, \ + patch("codeql.runner._run_quiet", side_effect=fake_run_quiet): + manifest = run_codeql(config) + + assert manifest["status"] == "completed" + assert any("githubsecuritylab/codeql-cpp-queries" in warning for warning in manifest["warnings"]) + assert analyze.call_count == 1 + assert analyze.call_args.args[2] == ["codeql/cpp-queries"] + + +def test_run_codeql_fails_unavailable_official_profile_under_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n official:\n - codeql/cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["official"], + "profile_packs": {"official": ["codeql/cpp-queries"]}, + } + ], + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["official"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze") as analyze, \ + patch("codeql.runner._run_quiet", return_value=(False, "pack missing")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert "required official profile" in manifest["failures"][0] + analyze.assert_not_called() + + +def test_run_codeql_fails_unavailable_optional_profile_under_hard_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n github-security-lab:\n - githubsecuritylab/codeql-cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="hard", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["github-security-lab"], + "profile_packs": {"github-security-lab": ["githubsecuritylab/codeql-cpp-queries"]}, + } + ], + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["github-security-lab"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze") as analyze, \ + patch("codeql.runner._run_quiet", return_value=(False, "pack missing")): + manifest = run_codeql(config) + + assert manifest["status"] == "failed" + assert "optional profile 'github-security-lab'" in manifest["failures"][0] + analyze.assert_not_called() + + +def test_run_codeql_soft_fails_when_all_profiles_are_skipped(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n github-security-lab:\n - githubsecuritylab/codeql-cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["github-security-lab"], + "profile_packs": {"github-security-lab": ["githubsecuritylab/codeql-cpp-queries"]}, + } + ], + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["github-security-lab"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze") as analyze, \ + patch("codeql.runner._run_quiet", return_value=(False, "pack missing")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert "No CodeQL query profiles ran successfully" in manifest["failures"][0] + analyze.assert_not_called() diff --git a/tests/test_event_recording.py b/tests/test_event_recording.py index c39502d2..a4f85251 100644 --- a/tests/test_event_recording.py +++ b/tests/test_event_recording.py @@ -8,6 +8,7 @@ from codecome.recording import EventRecorder from codecome.transcript import Transcript +import codecome.transcript as transcript_mod @pytest.fixture @@ -57,4 +58,23 @@ def test_record_always_forwards_all_events(mock_transcript): for event_type in ("reasoning", "text", "message.updated", "tool_use"): mock_transcript.write_event.reset_mock() recorder.record({"type": event_type}) - mock_transcript.write_event.assert_called_once() \ No newline at end of file + mock_transcript.write_event.assert_called_once() + + +def test_phase_transcript_does_not_truncate_existing_file(tmp_path, monkeypatch): + monkeypatch.setattr(transcript_mod, "ROOT", tmp_path) + transcript_mod._ATTEMPT_COUNTER.clear() + + existing = tmp_path / "tmp" / "last-phase-1c-no-finding-attempt-1.jsonl" + existing.parent.mkdir(parents=True) + existing.write_text("keep me\n", encoding="utf-8") + + transcript = Transcript.for_phase("1c", None) + try: + transcript.write_event({"type": "test"}) + finally: + transcript.close() + + assert existing.read_text(encoding="utf-8") == "keep me\n" + assert transcript.path != existing + assert transcript.path.name.startswith("last-phase-1c-no-finding-attempt-1-") diff --git a/tests/test_events_loops.py b/tests/test_events_loops.py index 28513c36..36d9cf4f 100644 --- a/tests/test_events_loops.py +++ b/tests/test_events_loops.py @@ -62,7 +62,8 @@ def stop(self): def render_fn(console, phase, label, event): rendered.append((phase, label, event)) - result = loop.run(render_fn) + raw_events = [] + result = loop.run(render_fn, raw_events.append) assert isinstance(result, RunResult) assert result.any_step_finish_seen is True @@ -70,6 +71,7 @@ def render_fn(console, phase, label, event): assert result.last_finish_reason == "stop" assert result.last_finish_tokens == {"output": 3} assert rendered[-1][2]["properties"]["status"]["type"] == "idle" + assert raw_events == events def test_chat_event_loop_recovery_sync_emits_synced_events(monkeypatch): @@ -96,13 +98,15 @@ def stop(self): monkeypatch.setattr(loop, "_sync_session_messages", lambda: [synced]) rendered = [] + raw_events = [] def render_fn(console, phase, label, event): rendered.append(event) - loop._consumer_worker(render_fn) + loop._consumer_worker(render_fn, raw_events.append) assert synced in rendered assert any(event.get("type") == "session.status" and event.get("properties", {}).get("status", {}).get("type") == "idle" for event in rendered) assert loop.get_state(timeout=0.1)[0] == ChatState.BUSY assert loop.get_state(timeout=0.1)[0] == ChatState.IDLE + assert raw_events == events diff --git a/tests/test_gate_check.py b/tests/test_gate_check.py index 2e715345..4ab99826 100644 --- a/tests/test_gate_check.py +++ b/tests/test_gate_check.py @@ -1,98 +1,129 @@ from __future__ import annotations -from conftest import load_tool_module +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from phases import gates as gates_module def test_has_meaningful_evidence_detects_template_only_readme(tmp_path): - module = load_tool_module("gate_check", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path evidence_dir = tmp_path / "itemdb" / "evidence" / "CC-0001" evidence_dir.mkdir(parents=True) readme = evidence_dir / "README.md" readme.write_text("Briefly summarize what this evidence proves or disproves.", encoding="utf-8") - assert module.has_meaningful_evidence("CC-0001") is False + try: + assert gates_module.has_meaningful_evidence("CC-0001") is False + finally: + gates_module.ROOT = original_root def test_has_meaningful_evidence_detects_non_readme_artifact(tmp_path): - module = load_tool_module("gate_check_artifact", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path evidence_dir = tmp_path / "itemdb" / "evidence" / "CC-0002" evidence_dir.mkdir(parents=True) (evidence_dir / "output.txt").write_text("proof", encoding="utf-8") - assert module.has_meaningful_evidence("CC-0002") is True + try: + assert gates_module.has_meaningful_evidence("CC-0002") is True + finally: + gates_module.ROOT = original_root def test_find_finding_exact_match_bare_cc_xxxx(tmp_path): - module = load_tool_module("gate_check_exact", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - result = module.find_finding("CC-0003") - assert result is not None - assert result.name == "CC-0003.md" + try: + result = gates_module.find_finding("CC-0003") + assert result is not None + assert result.name == "CC-0003.md" + finally: + gates_module.ROOT = original_root def test_find_finding_slug_match(tmp_path): - module = load_tool_module("gate_check_slug", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003-some-finding.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - result = module.find_finding("CC-0003") - assert result is not None - assert result.name == "CC-0003-some-finding.md" + try: + result = gates_module.find_finding("CC-0003") + assert result is not None + assert result.name == "CC-0003-some-finding.md" + finally: + gates_module.ROOT = original_root def test_find_finding_exact_wins_over_slug(tmp_path): - module = load_tool_module("gate_check_priority", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") (pending / "CC-0003-other-finding.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - result = module.find_finding("CC-0003") - assert result is not None - assert result.name == "CC-0003.md" + try: + result = gates_module.find_finding("CC-0003") + assert result is not None + assert result.name == "CC-0003.md" + finally: + gates_module.ROOT = original_root def test_find_finding_returns_none_for_missing(tmp_path): - module = load_tool_module("gate_check_missing", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path - result = module.find_finding("CC-9999") - assert result is None + try: + result = gates_module.find_finding("CC-9999") + assert result is None + finally: + gates_module.ROOT = original_root def test_gate_phase_4_accepts_bare_id(tmp_path, monkeypatch): - module = load_tool_module("gate_check_phase4", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - exit_code = module.gate_phase_4("CC-0003") - assert exit_code == 0 + try: + exit_code = gates_module.gate_phase_4("CC-0003") + assert exit_code == 0 + finally: + gates_module.ROOT = original_root def test_gate_phase_4_rejects_wrong_status(tmp_path, monkeypatch): - module = load_tool_module("gate_check_phase4_wrong", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path confirmed = tmp_path / "itemdb" / "findings" / "CONFIRMED" confirmed.mkdir(parents=True) (confirmed / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - exit_code = module.gate_phase_4("CC-0003") - assert exit_code == 1 + try: + exit_code = gates_module.gate_phase_4("CC-0003") + assert exit_code == 1 + finally: + gates_module.ROOT = original_root diff --git a/tests/test_phase_1_codeql_plan_repair.py b/tests/test_phase_1_codeql_plan_repair.py new file mode 100644 index 00000000..e325102c --- /dev/null +++ b/tests/test_phase_1_codeql_plan_repair.py @@ -0,0 +1,397 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import patch + +import yaml + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from events.phase_loop import RunResult + + +def _write_invalid_plan(root: Path) -> None: + plan = root / "itemdb" / "notes" / "codeql-plan.yml" + plan.parent.mkdir(parents=True, exist_ok=True) + plan.write_text( + "schema_version: 1\n" + "analysis_units:\n" + " - id: native\n" + " path: ./src/native\n" + " languages:\n" + " - id: c-cpp\n" + " packs:\n" + " - official\n" + "- outdented-note\n", + encoding="utf-8", + ) + + +def _write_valid_plan(root: Path) -> None: + plan = root / "itemdb" / "notes" / "codeql-plan.yml" + plan.parent.mkdir(parents=True, exist_ok=True) + plan.write_text( + "schema_version: 1\n" + "analysis_units:\n" + " - id: native\n" + " path: ./src/native\n" + " languages:\n" + " - id: c-cpp\n" + " packs:\n" + " - official\n" + "notes:\n" + " - repaired\n", + encoding="utf-8", + ) + + +def _runtime_config() -> SimpleNamespace: + return SimpleNamespace( + model="test-model", + variant=None, + thinking_on=False, + model_source="test", + variant_source="test", + thinking_source="test", + ) + + +def _runner() -> SimpleNamespace: + return SimpleNamespace(info=SimpleNamespace(password="")) + + +def _ok_result() -> RunResult: + return RunResult(any_step_finish_seen=True, step_finish_count=1, last_finish_reason="stop") + + +def _write_manual_plan(root: Path, build_command: str) -> None: + plan = root / "itemdb" / "notes" / "codeql-plan.yml" + plan.parent.mkdir(parents=True, exist_ok=True) + plan.write_text( + yaml.safe_dump( + { + "schema_version": 1, + "analysis_units": [ + { + "id": "native", + "path": "./src/native", + "languages": [ + { + "id": "c-cpp", + "build_mode": "manual", + "build_command": build_command, + "packs": ["official"], + } + ], + } + ], + }, + sort_keys=False, + ), + encoding="utf-8", + ) + + +def test_subphase_resumes_same_session_to_repair_invalid_codeql_plan(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + transcript = tmp_path / "transcript.jsonl" + calls: list[tuple[str, str | None]] = [] + + def fake_run_single_attempt(_args, _console, prompt, *_rest, existing_session_id=None, **_kwargs): + calls.append((prompt, existing_session_id)) + if len(calls) == 1: + _write_invalid_plan(tmp_path) + return 0, "sess-1", _ok_result(), transcript + assert existing_session_id == "sess-1" + assert "itemdb/notes/codeql-plan.yml" in prompt + assert "Validation errors:" in prompt + _write_valid_plan(tmp_path) + return 0, "sess-1", _ok_result(), transcript + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch.object(p1, "load_prompt", return_value="initial prompt"), \ + patch.object(p1, "resolve_runtime_config", return_value=_runtime_config()), \ + patch.object(p1, "configure_rendering"), \ + patch.object(p1, "_run_single_attempt", side_effect=fake_run_single_attempt), \ + patch("findings.checks_entry.run_frontmatter_validation", return_value=(0, "")): + rc = p1._run_subphase( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + phase_id="1a", + label="Target Profile", + agent="recon", + prompt_file="prompts/phase-1a-profile.md", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 + assert len(calls) == 2 + assert calls[1][1] == "sess-1" + + +def test_subphase_fails_after_codeql_plan_auto_repair_retries_exhausted(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + transcript = tmp_path / "transcript.jsonl" + + def fake_run_single_attempt(*_args, **_kwargs): + _write_invalid_plan(tmp_path) + return 0, "sess-1", _ok_result(), transcript + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch.object(p1, "load_prompt", return_value="initial prompt"), \ + patch.object(p1, "resolve_runtime_config", return_value=_runtime_config()), \ + patch.object(p1, "configure_rendering"), \ + patch.object(p1, "_run_single_attempt", side_effect=fake_run_single_attempt) as run_attempt, \ + patch("findings.checks_entry.run_frontmatter_validation", return_value=(0, "")): + rc = p1._run_subphase( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + phase_id="1-codeql-repair", + label="CodeQL Build Repair", + agent="recon", + prompt_file="prompts/phase-1-codeql-repair.md", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 2 + assert run_attempt.call_count == 3 + + +def test_codeql_plan_validation_rejects_absolute_tmp_in_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "bash -c 'mkdir -p /tmp/codeql-build'") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "absolute /tmp/" in output + + +def test_codeql_plan_validation_rejects_shell_operators_in_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "mkdir -p out && gcc main.c -o out/app") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "shell operator" in output + assert "helper script" in output + + +def test_codeql_plan_validation_rejects_multiline_and_comments_in_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "# build\nmkdir -p out\ngcc main.c -o out/app") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "multi-line" in output + assert "shell comments" in output + + +def test_codeql_plan_validation_rejects_bash_c_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "bash -c 'mkdir -p out && gcc main.c -o out/app'") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "bash -c" in output + + +def test_codeql_plan_validation_checks_helper_from_analysis_root(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + analysis_root = tmp_path / "src" / "native" + helper = tmp_path / "tmp" / "codeql-build.sh" + analysis_root.mkdir(parents=True) + helper.parent.mkdir(parents=True) + helper.write_text("#!/usr/bin/env bash\necho ok\n", encoding="utf-8") + _write_manual_plan(tmp_path, "bash ../../tmp/codeql-build.sh") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 0, output + + +def test_codeql_plan_validation_rejects_missing_helper_from_analysis_root(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + (tmp_path / "src" / "native").mkdir(parents=True) + _write_manual_plan(tmp_path, "bash tmp/codeql-build.sh") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "referenced helper script does not exist from analysis root" in output + + +def test_codeql_repair_loop_resumes_same_session_after_failed_rerun(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nautobuild failed"]}), + encoding="utf-8", + ) + _write_manual_plan(tmp_path, "make") + config = SimpleNamespace(abs_output_dir=output_dir) + calls: list[tuple[str | None, str | None]] = [] + + def fake_subphase(**kwargs): + calls.append((kwargs.get("existing_session_id"), kwargs.get("initial_prompt"))) + if len(calls) == 1: + return p1._SubphaseOutcome(0, "repair-session", tmp_path / "one.jsonl") + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "completed", "failures": []}), + encoding="utf-8", + ) + return p1._SubphaseOutcome(0, "repair-session", tmp_path / "two.jsonl") + + def fake_run_codeql(_console): + if len(calls) == 1: + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nmanual failed"]}), + encoding="utf-8", + ) + return None + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch("codeql.config.resolve_config", return_value=config), \ + patch.object(p1, "_run_subphase", side_effect=fake_subphase), \ + patch.object(p1, "_run_codeql", side_effect=fake_run_codeql): + rc = p1._run_codeql_repair_if_needed( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 + assert len(calls) == 2 + assert calls[0] == (None, None) + assert calls[1][0] == "repair-session" + assert calls[1][1] is not None + assert "Latest CodeQL failure details" in calls[1][1] + + +def test_codeql_repair_loop_does_not_block_after_retries_exhausted(tmp_path: Path, monkeypatch) -> None: + import codecome.phase_1 as p1 + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nautobuild failed"]}), + encoding="utf-8", + ) + _write_manual_plan(tmp_path, "make") + config = SimpleNamespace(abs_output_dir=output_dir) + + def fake_subphase(**_kwargs): + return p1._SubphaseOutcome(0, "repair-session", tmp_path / "repair.jsonl") + + def fake_run_codeql(_console): + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nmanual failed"]}), + encoding="utf-8", + ) + return None + + monkeypatch.setenv("CODEQL_REPAIR_RETRIES", "1") + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch("codeql.config.resolve_config", return_value=config), \ + patch.object(p1, "_run_subphase", side_effect=fake_subphase), \ + patch.object(p1, "_run_codeql", side_effect=fake_run_codeql): + rc = p1._run_codeql_repair_if_needed( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 + + +def test_phase1c_accepts_no_step_finish_when_artifacts_are_fresh(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + transcript = tmp_path / "tmp" / "last-phase-1c-no-finding-attempt-1.jsonl" + transcript.parent.mkdir(parents=True) + transcript.write_text("", encoding="utf-8") + + args = SimpleNamespace(phase="1", finding=None, label="sandbox", debug=False) + calls = [] + + def fake_run_single_attempt(*_args, **_kwargs): + calls.append(_kwargs) + return 0, "session-1", RunResult(any_step_finish_seen=False), transcript + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch.object(p1, "load_prompt", return_value="prompt"), \ + patch.object(p1, "resolve_runtime_config", return_value=_runtime_config()), \ + patch.object(p1, "configure_rendering", return_value=None), \ + patch.object(p1, "_run_single_attempt", side_effect=fake_run_single_attempt), \ + patch.object(p1, "check_phase_graceful_completion", return_value=True), \ + patch("findings.checks_entry.run_frontmatter_validation", return_value=(0, "")): + rc = p1._run_subphase( + args=args, + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + phase_id="1c", + label="Sandbox", + agent="recon", + prompt_file="prompts/phase-1c-sandbox.md", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 + assert len(calls) == 1 diff --git a/tests/test_phase_1_gates.py b/tests/test_phase_1_gates.py new file mode 100644 index 00000000..7273d63b --- /dev/null +++ b/tests/test_phase_1_gates.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import patch + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from phases.phase_1_gates import _emit + + +def test_emit_plain_fallback_prints_formatted_text(capsys) -> None: + _emit(None, "ok", "plain gate output") + + out = capsys.readouterr().out + assert "plain gate output" in out + + +def test_unsupported_language_soft_policy_warns_not_fails(tmp_path: Path, capsys) -> None: + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "target-profile.md").write_text("profile", encoding="utf-8") + (notes / "build-model.md").write_text("model", encoding="utf-8") + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\n" + "recommended: true\n" + "analysis_units:\n" + " - id: gilroy\n" + " path: ./src\n" + " languages:\n" + " - id: elixir\n" + " packs:\n" + " - official\n", + encoding="utf-8", + ) + + (tmp_path / "src").mkdir() + + mock_config = type("cfg", (), {"fail_policy": "soft", "enabled": True})() + + from phases.phase_1_gates import check_phase_1a + + with patch("phases.phase_1_gates.ROOT", tmp_path), \ + patch("phases.phase_1_gates._resolve_codeql_config", return_value=mock_config): + rc = check_phase_1a() + + out = capsys.readouterr().out + assert rc == 0 + assert "will be skipped" in out + + +def test_unsupported_language_hard_policy_fails(tmp_path: Path, capsys) -> None: + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "target-profile.md").write_text("profile", encoding="utf-8") + (notes / "build-model.md").write_text("model", encoding="utf-8") + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\n" + "recommended: true\n" + "analysis_units:\n" + " - id: gilroy\n" + " path: ./src\n" + " languages:\n" + " - id: elixir\n" + " packs:\n" + " - official\n", + encoding="utf-8", + ) + + (tmp_path / "src").mkdir() + + mock_config = type("cfg", (), {"fail_policy": "hard", "enabled": True})() + + from phases.phase_1_gates import check_phase_1a + + with patch("phases.phase_1_gates.ROOT", tmp_path), \ + patch("phases.phase_1_gates._resolve_codeql_config", return_value=mock_config): + rc = check_phase_1a() + + out = capsys.readouterr().out + assert rc == 1 + assert "unsupported CodeQL language 'elixir'" in out + + +def test_non_recommended_unit_without_languages_is_skipped(tmp_path: Path, capsys) -> None: + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "target-profile.md").write_text("profile", encoding="utf-8") + (notes / "build-model.md").write_text("model", encoding="utf-8") + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\n" + "recommended: true\n" + "analysis_units:\n" + " - id: api\n" + " path: ./src/api\n" + " languages:\n" + " - id: python\n" + " confidence: HIGH\n" + " build_mode: none\n" + " packs:\n" + " - official\n" + " - id: gilroy\n" + " path: ./src/gilroy\n" + " recommended: false\n", + encoding="utf-8", + ) + + (tmp_path / "src" / "api").mkdir(parents=True) + (tmp_path / "src" / "gilroy").mkdir(parents=True) + + mock_config = type("cfg", (), {"fail_policy": "hard", "enabled": True})() + + from phases.phase_1_gates import check_phase_1a + + with patch("phases.phase_1_gates.ROOT", tmp_path), \ + patch("phases.phase_1_gates._resolve_codeql_config", return_value=mock_config): + rc = check_phase_1a() + + out = capsys.readouterr().out + assert rc == 0 + assert "not recommended for CodeQL" in out diff --git a/tests/test_phases_completion.py b/tests/test_phases_completion.py index b70aed95..8c1247ff 100644 --- a/tests/test_phases_completion.py +++ b/tests/test_phases_completion.py @@ -101,6 +101,37 @@ def test_phase1_check_patches_notes_root_and_sandbox_plan(self, tmp_path): try: result = completion_mod.check_phase_graceful_completion("1", None, fake_time) assert result is True, "Phase 1 should succeed when all artifacts exist under patched NOTES_ROOT" + result = completion_mod.check_phase_graceful_completion("1c", None, fake_time) + assert result is True, "Phase 1c should use the same artifact gate as Phase 1" + finally: + completion_mod.NOTES_ROOT = orig_notes_root + completion_mod.SANDBOX_PLAN_PATH = orig_sandbox_plan + completion_mod.ROOT = orig_root + + def test_phase1c_accepts_fresh_sandbox_state_with_existing_notes(self, tmp_path): + import phases.completion as completion_mod + + orig_notes_root = completion_mod.NOTES_ROOT + orig_sandbox_plan = completion_mod.SANDBOX_PLAN_PATH + orig_root = completion_mod.ROOT + + completion_mod.NOTES_ROOT = tmp_path / "notes" + completion_mod.SANDBOX_PLAN_PATH = completion_mod.NOTES_ROOT / "sandbox-plan.md" + completion_mod.ROOT = tmp_path / "codecome_workspace" + + for name in completion_mod._PHASE1_REQUIRED_ARTIFACT_NAMES: + artifact = completion_mod.NOTES_ROOT / name + artifact.parent.mkdir(parents=True, exist_ok=True) + artifact.write_text("", encoding="utf-8") + + run_start = time.time() + sandbox_generated = completion_mod.ROOT / "sandbox" / "CODECOME-GENERATED.md" + sandbox_generated.parent.mkdir(parents=True) + + try: + assert completion_mod.check_phase_graceful_completion("1", None, run_start) is False + sandbox_generated.write_text("validated", encoding="utf-8") + assert completion_mod.check_phase_graceful_completion("1c", None, run_start) is True finally: completion_mod.NOTES_ROOT = orig_notes_root completion_mod.SANDBOX_PLAN_PATH = orig_sandbox_plan diff --git a/tests/test_rendering_events.py b/tests/test_rendering_events.py index 56ed586b..527b9d3d 100644 --- a/tests/test_rendering_events.py +++ b/tests/test_rendering_events.py @@ -99,14 +99,16 @@ def test_renders_text_rich(self): r = TextEventRenderer(_ctx("rich")) assert r.render({"part": {"text": "Hello world"}}) is True - def test_skips_empty_text(self): + def test_skips_empty_text(self, capsys): r = TextEventRenderer(_ctx("plain")) - assert r.render({"part": {"text": ""}}) is False - assert r.render({"part": {"text": " \n\t "}}) is False + assert r.render({"part": {"text": ""}}) is True + assert r.render({"part": {"text": " \n\t "}}) is True + assert capsys.readouterr().out == "" - def test_skips_missing_text(self): + def test_skips_missing_text(self, capsys): r = TextEventRenderer(_ctx("plain")) - assert r.render({"part": {}}) is False + assert r.render({"part": {}}) is True + assert capsys.readouterr().out == "" # --------------------------------------------------------------------------- diff --git a/tests/test_sandbox_bootstrap.py b/tests/test_sandbox_bootstrap.py index 2799941e..7425f006 100644 --- a/tests/test_sandbox_bootstrap.py +++ b/tests/test_sandbox_bootstrap.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from types import SimpleNamespace from conftest import ROOT, load_tool_module @@ -80,6 +81,59 @@ def test_opencode_json_allows_src_and_sandbox_env_reads(): assert read_rules["sandbox/.env"] == "allow" +def test_sandbox_status_is_pending_before_phase_1c(tmp_path, monkeypatch, capsys): + module = load_tool_module("sandbox_bootstrap_pending_status", "tools/sandbox-bootstrap.py") + root = tmp_path + sandbox_root = root / "sandbox" + notes_root = root / "itemdb" / "notes" + sandbox_root.mkdir(parents=True) + notes_root.mkdir(parents=True) + (sandbox_root / ".gitkeep").write_text("", encoding="utf-8") + + monkeypatch.setattr(module, "ROOT", root) + monkeypatch.setattr(module, "SANDBOX_ROOT", sandbox_root) + monkeypatch.setattr(module, "NOTES_ROOT", notes_root) + monkeypatch.setattr(module, "PROVENANCE_FILE", sandbox_root / "CODECOME-GENERATED.md") + + assert module.classify_sandbox_state() == "pending" + + rc = module.cmd_status(SimpleNamespace(format="text", gate=False)) + out = capsys.readouterr().out + + assert rc == 0 + assert "state:" in out + assert "pending" in out + assert "sandbox bootstrap pending; run make phase-1" in out + assert "setup pending" in out + + +def test_sandbox_status_is_missing_after_phase_1c_without_sandbox(tmp_path, monkeypatch, capsys): + module = load_tool_module("sandbox_bootstrap_missing_status", "tools/sandbox-bootstrap.py") + root = tmp_path + sandbox_root = root / "sandbox" + notes_root = root / "itemdb" / "notes" + sandbox_root.mkdir(parents=True) + notes_root.mkdir(parents=True) + (sandbox_root / ".gitkeep").write_text("", encoding="utf-8") + (notes_root / "sandbox-plan.md").write_text("# Sandbox Plan\n", encoding="utf-8") + + monkeypatch.setattr(module, "ROOT", root) + monkeypatch.setattr(module, "SANDBOX_ROOT", sandbox_root) + monkeypatch.setattr(module, "NOTES_ROOT", notes_root) + monkeypatch.setattr(module, "PROVENANCE_FILE", sandbox_root / "CODECOME-GENERATED.md") + + assert module.classify_sandbox_state() == "missing" + + rc = module.cmd_status(SimpleNamespace(format="text", gate=False)) + out = capsys.readouterr().out + + assert rc == 0 + assert "state:" in out + assert "missing" in out + assert "sandbox is missing" in out + assert "setup missing" in out + + def test_detect_signals_prefers_erlang_otp_for_rebar_targets(tmp_path, monkeypatch): module = load_tool_module("sandbox_bootstrap_erlang_detect", "tools/sandbox-bootstrap.py") diff --git a/tests/test_session.py b/tests/test_session.py index 18ececa5..40d0dc77 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from io import BytesIO from unittest.mock import MagicMock, patch import pytest @@ -167,10 +168,24 @@ def test_send_prompt_http_error_raises(self, mock_urlopen): 500, "Internal Server Error", {}, - None, + BytesIO(b"server says no"), ) - with pytest.raises(RuntimeError, match="Failed to send prompt: HTTP 500"): + with pytest.raises(RuntimeError, match="Failed to send prompt: HTTP 500: server says no"): module.send_prompt_to_session( "http://localhost:8080", "sess-1", "hello", "recon", None, None, None, None ) + + @patch("urllib.request.urlopen") + def test_get_session_status_busy(self, mock_urlopen): + module = _load_session_module() + mock_resp = MagicMock() + mock_resp.__enter__.return_value = mock_resp + mock_resp.read.return_value = json.dumps({"status": {"type": "busy"}}).encode("utf-8") + mock_urlopen.return_value = mock_resp + + status = module.get_session_status("http://localhost:8080", "sess-1", None, None) + + assert status == "busy" + req = mock_urlopen.call_args[0][0] + assert req.full_url == "http://localhost:8080/session/sess-1" diff --git a/tools/AGENTS.md b/tools/AGENTS.md index 8018f8fb..a5d5f9e7 100644 --- a/tools/AGENTS.md +++ b/tools/AGENTS.md @@ -213,3 +213,19 @@ Avoid circular imports. When two packages need each other, prefer callable injec - Event loops are tested with deterministic event generators — not live OpenCode servers. - CLI and wrapper compatibility is verified with `--help` and `--show-model` smoke tests. - Thin wrappers must remain thin — their only responsibility is delegation. + +### 12. No subprocess for internal module communication + +CodeCome tools must **never** shell out via `subprocess` to invoke other CodeCome Python scripts. Instead, import the target module's functions directly: + +```python +# BAD — subprocess call to another CodeCome script +result = subprocess.run([sys.executable, "tools/sandbox-bootstrap.py", "status", "--format", "json"], ...) + +# GOOD — direct import (use importlib for hyphenated module names) +import importlib +sb = importlib.import_module("sandbox-bootstrap") +provenance = sb.read_provenance() +``` + +Subprocess is acceptable only for invoking **external** tools (codeql, docker, git, asciinema, etc.) that are not part of the CodeCome Python codebase. diff --git a/tools/chat/app.py b/tools/chat/app.py index cf8190c4..467e9501 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -7,7 +7,7 @@ Provides: - TextualConsoleProxy: RichLog bridge for background-thread console output. - ChatApp / QuitScreen: module-level type hints (real classes set after try/except). - - _chat_render_and_log / _chat_update_modeline_info: standalone helpers, + - _chat_render / _chat_update_modeline_info: standalone helpers, callable without Textual (for testing parity). - _QuitScreen: quit confirmation modal. - _ChatApp: the Textual App. @@ -105,13 +105,15 @@ def _write(self, renderable): # launching a real TUI. # --------------------------------------------------------------------------- -def _chat_render_and_log(self, console, phase, label, event): +def _chat_render(self, console, phase, label, event): """Standalone version of _ChatApp._render_and_log. See the docstring on the class for the full contract. + Raw event recording is handled separately by the chat event loop; + this function only drives rendering and UI updates. + When bound via ``__get__`` to a _ChatApp instance, ``self`` is guaranteed to carry the attributes accessed below.""" - self.event_recorder.record(event) render_event(console, phase, label, event) _chat_update_activity_state(self, event) if event.get("type") == "message.updated": @@ -394,6 +396,7 @@ def __init__(self, server_info=None, session_id=None, initial_prompt="", args=No self.thinking_on = thinking_on from codecome.transcript import Transcript self.transcript = transcript if transcript is not None else Transcript.null() + # TODO: refactor event recording/dedup — consider separate sinks (see GH issue) from codecome.recording import EventRecorder self.event_recorder = EventRecorder( self.transcript, @@ -493,6 +496,7 @@ def on_mount(self) -> None: workspace_dir=str(Path(__file__).resolve().parents[2]), debug=_chat_debug if self.args and self.args.debug else None, ) + self.chat_loop.set_raw_event_recorder(self.event_recorder.record) # Raw daemon thread — the SSE consumer. _chat_debug("on_mount: starting SSE consumer (raw daemon thread)") @@ -607,7 +611,7 @@ def _on_render_message(self, message: RenderMessage) -> None: # --- Consumer-thread callback --- def _render_and_log(self, console, phase, label, event): - _chat_render_and_log(self, console, phase, label, event) + _chat_render(self, console, phase, label, event) def _update_modeline_info(self, event: dict[str, Any]) -> None: _chat_update_modeline_info(self, event) diff --git a/tools/codecome.py b/tools/codecome.py index f58ef089..ec47c8d0 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -345,6 +345,256 @@ def count_findings() -> Dict[str, int]: return counts +def _phase_1_notes_exist() -> bool: + notes_dir = ROOT / "itemdb" / "notes" + return (notes_dir / "target-profile.md").is_file() and (notes_dir / "build-model.md").is_file() + + +def check_phase_progress() -> None: + """Print a summary of which phases have been run based on durable artifacts.""" + from phases.phase_1_gates import REQUIRED_NOTES_1B + + notes_dir = ROOT / "itemdb" / "notes" + evidence_root = ROOT / "itemdb" / "evidence" + counts = count_findings() + rows: list[tuple[str, str, str]] = [] + + # Phase 1a + has_1a = all( + (notes_dir / name).is_file() + for name in ("target-profile.md", "build-model.md", "codeql-plan.yml") + ) + rows.append(("Phase 1a", "ok" if has_1a else "info", "completed" if has_1a else "not run")) + + # CodeQL + manifest_path = ROOT / "itemdb" / "codeql" / "run-manifest.yml" + if manifest_path.is_file(): + try: + manifest = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + status = manifest.get("status", "unknown") if isinstance(manifest, dict) else "unknown" + except Exception: + status = "unknown" + level = "ok" if status == "completed" else "warn" if status == "soft-failed" else "info" + rows.append(("CodeQL", level, status)) + else: + rows.append(("CodeQL", "info", "not run")) + + # Phase 1b + missing_1b = [n for n in REQUIRED_NOTES_1B if not (notes_dir / n).is_file()] + if not missing_1b: + rows.append(("Phase 1b", "ok", "completed")) + elif len(missing_1b) < len(REQUIRED_NOTES_1B): + rows.append(("Phase 1b", "warn", f"{len(missing_1b)} of {len(REQUIRED_NOTES_1B)} notes missing")) + else: + rows.append(("Phase 1b", "info", "not run")) + + # Phase 1c + has_1c = (notes_dir / "sandbox-plan.md").is_file() + rows.append(("Phase 1c", "ok" if has_1c else "info", "completed" if has_1c else "not run")) + + # Phase 2 + pending = counts["PENDING"] + rows.append(("Phase 2", "ok" if pending else "info", f"{pending} PENDING findings" if pending else "not run")) + + # Phase 3 + reviewed = counts["CONFIRMED"] + counts["EXPLOITED"] + counts["REJECTED"] + counts["DUPLICATE"] + rows.append(("Phase 3", "ok" if reviewed else "info", f"{reviewed} reviewed" if reviewed else "not run")) + + # Phase 4 + confirmed = counts["CONFIRMED"] + counts["EXPLOITED"] + rows.append(("Phase 4", "ok" if confirmed else "info", f"{confirmed} confirmed" if confirmed else "not run")) + + # Phase 5 + exploited = counts["EXPLOITED"] + rows.append(("Phase 5", "ok" if exploited else "info", f"{exploited} exploited" if exploited else "not run")) + + # Phase 6 + has_report = (ROOT / "itemdb" / "reports" / "report.md").is_file() + rows.append(("Phase 6", "ok" if has_report else "info", "completed" if has_report else "not run")) + + print() + print(C.header("Phase progress:")) + label_width = max(len(label) for label, _, _ in rows) + for label, level, detail in rows: + prefix = " " + label.ljust(label_width) + if level == "ok": + print(C.ok(f"{prefix} {detail}")) + elif level == "warn": + print(C.warn(f"{prefix} {detail}")) + else: + print(C.info(f"{prefix} {detail}")) + + +def check_codeql_status() -> int: + """Check CodeQL configuration and last recorded artifact state.""" + print() + print(C.header("CodeQL:")) + # TODO: move CodeQL check logic to tools/codecome/checks.py (see GH issue) + try: + from codeql.config import resolve_config + from codeql.artifacts import check_artifacts + from codeql.packs import load_codeql_plan + except ImportError as exc: + print(C.warn(f"CodeQL checks unavailable: {exc}")) + return 0 + + config = resolve_config() + manifest_path = config.abs_output_dir / "run-manifest.yml" + manifest = None + + if manifest_path.is_file() and yaml is not None: + try: + loaded = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + manifest = loaded if isinstance(loaded, dict) else None + except (OSError, yaml.YAMLError, UnicodeDecodeError): + manifest = None + + current_state = "enabled" if config.enabled else "disabled" + print(C.ok(f"current config: CodeQL {current_state}")) + + if manifest and manifest.get("status") == "skipped" and manifest.get("codeql_enabled") is False: + reason = manifest.get("skip_reason") or "CodeQL disabled during recorded run" + print(C.ok(f"last phase-1 CodeQL state: skipped ({reason})")) + print(C.info("No CodeQL artifacts are required for that recorded run.")) + return 0 + + if not config.enabled: + print(C.ok("CodeQL disabled for current invocation; artifact checks skipped.")) + return 0 + + exit_code = 0 + if config.phase_1_enabled: + print(C.ok("phase-1 integration: enabled")) + else: + print(C.ok("phase-1 integration: disabled; artifact checks skipped.")) + return 0 + + if config.abs_install_path.is_file(): + print(C.ok(f"binary: {config.abs_install_path.relative_to(ROOT) if config.abs_install_path.is_relative_to(ROOT) else config.abs_install_path}")) + else: + print(C.fail(f"binary missing: {config.abs_install_path}")) + exit_code = 1 + + if config.abs_pack_catalog.is_file(): + print(C.ok(f"pack catalog: {config.abs_pack_catalog.relative_to(ROOT) if config.abs_pack_catalog.is_relative_to(ROOT) else config.abs_pack_catalog}")) + else: + print(C.fail(f"pack catalog missing: {config.abs_pack_catalog}")) + exit_code = 1 + + plan_path = ROOT / "itemdb" / "notes" / "codeql-plan.yml" + if plan_path.is_file(): + try: + load_codeql_plan(plan_path) + print(C.ok("plan: itemdb/notes/codeql-plan.yml")) + except Exception as exc: + print(C.fail(f"plan invalid: {exc}")) + exit_code = 1 + elif _phase_1_notes_exist(): + print(C.warn("plan missing after Phase 1 notes exist: itemdb/notes/codeql-plan.yml")) + else: + print(C.info("Phase 1 has not produced a CodeQL plan yet; no artifacts expected.")) + return exit_code + + artifact_status, warnings = check_artifacts(config.abs_output_dir) + if artifact_status == "missing": + if _phase_1_notes_exist(): + print(C.warn("artifacts: missing run-manifest.yml; run make phase-1 to refresh CodeQL state.")) + else: + print(C.info("artifacts: not present yet; Phase 1 has not run.")) + elif artifact_status == "completed" and not warnings: + print(C.ok("artifacts: completed")) + elif artifact_status == "soft-failed": + print(C.warn("artifacts: soft-failed")) + for warning in warnings: + print(C.warn(f" {warning}")) + if (manifest or {}).get("fail_policy", config.fail_policy) == "hard": + exit_code = 1 + elif artifact_status == "skipped": + print(C.ok("artifacts: skipped")) + for warning in warnings: + print(C.info(f" {warning}")) + else: + formatter = C.fail if artifact_status in {"failed", "unknown"} else C.warn + print(formatter(f"artifacts: {artifact_status}")) + for warning in warnings: + print(formatter(f" {warning}")) + if artifact_status in {"completed", "failed", "unknown"}: + exit_code = 1 + + return exit_code + + +def check_sandbox_status() -> None: + """Print sandbox state, gate result, and capability summary.""" + import importlib + + try: + sb = importlib.import_module("sandbox-bootstrap") + except Exception: + print() + print(C.header("Sandbox:")) + print(C.warn("sandbox-bootstrap module unavailable")) + return + + print() + print(C.header("Sandbox:")) + + provenance = sb.read_provenance() + last_validation = sb._last_validation_outcome() + allow_no_sandbox = bool(os.environ.get("CODECOME_ALLOW_NO_SANDBOX")) + sandbox_state = sb.classify_sandbox_state() + + # Gate logic (mirrors cmd_status) + if allow_no_sandbox: + gate_pass = True + gate_reason = "override (CODECOME_ALLOW_NO_SANDBOX=1)" + elif sandbox_state == "pending": + gate_pass = False + gate_reason = "sandbox bootstrap pending; run make phase-1" + elif sandbox_state == "missing": + gate_pass = False + gate_reason = "sandbox is missing" + elif sandbox_state == "generated" and last_validation == "failed": + gate_pass = False + gate_reason = "last validation failed" + elif sandbox_state == "generated" and last_validation == "skipped": + gate_pass = False + gate_reason = "last validation has no real outcomes (all tiers skipped)" + else: + gate_pass = True + if sandbox_state == "user-managed": + gate_reason = "sandbox is user-managed (validation not enforced)" + elif last_validation is None: + gate_reason = "no validation run on record" + elif last_validation == "passed": + gate_reason = "last validation passed" + elif last_validation == "mixed": + gate_reason = "last validation passed (some tiers skipped)" + else: + gate_reason = f"last validation: {last_validation}" + + # Print summary + state_detail = sandbox_state + if sandbox_state == "generated" and provenance: + state_detail = "generated (provenance present)" + print(f" {C.DIM}state:{C.RESET} {state_detail}") + print(f" {C.DIM}last validation:{C.RESET} {last_validation or '-'}") + if gate_pass: + print(C.ok(f" Phase 2 gate: pass ({gate_reason})")) + else: + print(C.warn(f" Phase 2 gate: block ({gate_reason})")) + + # Capabilities + capability_status = sb._capability_status() + print(f" {C.DIM}capabilities:{C.RESET}") + for name in ("setup", "start", "check", "build", "test", "stop", "shell", "logs", "clean", "reset"): + status = capability_status[name] + satisfied = status.get("satisfied", False) + missing_label = "pending" if sandbox_state == "pending" else "missing" + state_str = C.ok("ok") if satisfied else C.warn(missing_label) + print(f" {name:<8} {state_str} {status['path']}") + + def command_check(_: argparse.Namespace) -> int: missing = [] @@ -375,15 +625,21 @@ def command_check(_: argparse.Namespace) -> int: if not has_source: print(C.warn("src/ is empty — place your target source code there before running phase-1.")) + check_phase_progress() + check_exit = check_codeql_status() + check_sandbox_status() + + print() + # Warn (do not fail) about missing optional recording tools used by Phase 5. recording_warnings = check_recording_tools() if recording_warnings: - print() - print(C.header("Optional recording tools (used by phase-5 exploit demonstrations):")) + print(C.header("Recording tools:")) for message in recording_warnings: print(C.warn(message)) else: - print(C.ok("Optional recording tools available (asciinema, agg, ffmpeg, Xvfb).")) + print(C.header("Recording tools:")) + print(C.ok("all tools available (asciinema, agg, ffmpeg, Xvfb).")) # Probe only the current helper invocation context; phase-5 may later run # from a different shell, container, or PTY wrapper. @@ -408,7 +664,7 @@ def command_check(_: argparse.Namespace) -> int: "and PTY-acquisition guidance." ) - return 0 + return check_exit def command_status(_: argparse.Namespace) -> int: @@ -439,6 +695,14 @@ def command_next_id(_: argparse.Namespace) -> int: return 0 +def command_check_codeql_plan(_: argparse.Namespace) -> int: + from codecome.phase_1 import _validate_codeql_plan_for_repair + rc, out = _validate_codeql_plan_for_repair() + if out: + print(out) + return rc + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="codecome", @@ -456,6 +720,9 @@ def build_parser() -> argparse.ArgumentParser: next_id_parser = subparsers.add_parser("next-id", help="Print the next available finding id.") next_id_parser.set_defaults(func=command_next_id) + check_plan_parser = subparsers.add_parser("check-codeql-plan", help="Validate itemdb/notes/codeql-plan.yml") + check_plan_parser.set_defaults(func=command_check_codeql_plan) + return parser diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py index 84c1869e..89c62998 100644 --- a/tools/codecome/cli.py +++ b/tools/codecome/cli.py @@ -66,7 +66,11 @@ def main() -> int: from chat.harness import run_harness return run_harness(parser, args) - missing = [n for n in ("phase", "label", "agent", "prompt_file") if getattr(args, n) is None] + # Phase 1 handles its own prompt files via subphase orchestration. + required = ["phase", "label", "agent"] + if str(args.phase) != "1": + required.append("prompt_file") + missing = [n for n in required if getattr(args, n) is None] if missing: parser.error( "the following arguments are required when not using --show-model or --chat: " diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py index 1cd4633e..e44f2edf 100644 --- a/tools/codecome/harness.py +++ b/tools/codecome/harness.py @@ -15,8 +15,6 @@ import dataclasses import os import signal -import subprocess -import sys import time from pathlib import Path from typing import Any, Optional @@ -59,6 +57,37 @@ def run_phase_mode(args: argparse.Namespace) -> int: if _overrides: _rendering_ctx.settings = dataclasses.replace(_rendering_ctx.settings, **_overrides) + # ── Phase 1: subphase orchestration with own server lifecycle ── + if str(args.phase) == "1": + os.environ["_CODECOME_INSIDE_HARNESS"] = "1" + _p1_runner = ServerRunner() + try: + _p1_server_info = _p1_runner.start(hostname="127.0.0.1", log_level=args.log_level) + except ServerRunnerError as exc: + _emit_fatal_error(console, "Server Error", str(exc)) + return 1 + + def _p1_forward_signal(signum: int, _frame: Any) -> None: + info = _p1_runner.info + if info is not None: + try: + os.killpg(info.pid, signum) + except ProcessLookupError: + pass + signal.signal(signum, signal.SIG_DFL) + os.kill(os.getpid(), signum) + + _p1_prev_sigint = signal.signal(signal.SIGINT, _p1_forward_signal) + _p1_prev_sigterm = signal.signal(signal.SIGTERM, _p1_forward_signal) + try: + from codecome.phase_1 import run_phase_1 as _run_phase_1 + return _run_phase_1(args, console, _rendering_ctx, _p1_runner, _p1_server_info.base_url) + finally: + signal.signal(signal.SIGINT, _p1_prev_sigint) + signal.signal(signal.SIGTERM, _p1_prev_sigterm) + _p1_runner.stop() + + # ── Phases 2-6 below this point ── prompt_file = ROOT / args.prompt_file prompt = load_prompt(prompt_file, args.finding, phase=args.phase) rc = resolve_runtime_config(args.agent) @@ -223,15 +252,11 @@ def _forward_signal(signum: int, _frame: Any) -> None: returncode = 2 if returncode == 0: - validation_result = subprocess.run( - [sys.executable, "tools/check-frontmatter.py"], - cwd=ROOT, - capture_output=True, - text=True - ) - if validation_result.returncode != 0: + from findings.checks_entry import run_frontmatter_validation + + validation_rc, validation_output = run_frontmatter_validation() + if validation_rc != 0: max_frontmatter_retries = 2 - validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" if frontmatter_retry_count < max_frontmatter_retries: frontmatter_retry_count += 1 msg = ( diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py new file mode 100644 index 00000000..1a0fb47c --- /dev/null +++ b/tools/codecome/phase_1.py @@ -0,0 +1,969 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Phase 1 subphase orchestration. + +Runs Phase 1 as three subphases (1a / 1b / 1c) with gates and CodeQL +analysis between 1a and 1b. The opencode server is started once and +reused across all three subphase sessions. +""" + +from __future__ import annotations + +import hashlib +import os +import re +import shlex +import subprocess +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from opencode.serve import ServerRunner, ServerRunnerError + +from codecome.console import build_console, _emit_fatal_error +from codecome.config import ROOT, resolve_color_mode, load_prompt, resolve_runtime_config +from codecome.runner import _run_single_attempt +from phases.phase_1_gates import ( + check_phase_1a, + check_phase_1b, + check_phase_1c, + count_findings_snapshot, +) +from rendering.dispatch import HAVE_RICH, _get_rendering_ctx, configure_rendering, render_event +from rendering.events import ( + _FINISH_TERMINAL_OK, + _FINISH_MID_TURN, + _FINISH_FAILURE, + _reset_subagent_state, +) +from phases.completion import ( + check_phase_graceful_completion, + build_phase_resume_prompt, + build_frontmatter_resume_prompt, + build_codeql_plan_resume_prompt, + build_codeql_build_failure_resume_prompt, +) + + +@dataclass(frozen=True) +class _SubphaseOutcome: + returncode: int + session_id: str + transcript_path: Path +# --------------------------------------------------------------------------- +# CodeQL analysis (between 1a gate and 1b) +# --------------------------------------------------------------------------- + +def _run_codeql(console: Any) -> None: + """Run full CodeQL pipeline and report results. + + This function always succeeds (returns None). Pass/fail enforcement + is handled separately by ``_check_codeql_artifacts``. + """ + from codeql.config import resolve_config as _resolve_codeql_config + + config = _resolve_codeql_config() + + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(title="CodeQL", style="cyan")) + else: + import _colors as C + print(C.header("CodeQL")) + + if not config.enabled: + msg = "CodeQL disabled — skipping." + from codeql.pipeline import record_skipped_run + record_skipped_run(config, "CodeQL disabled for Phase 1") + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + return + + if not config.phase_1_enabled: + msg = "CodeQL phase 1 disabled — skipping." + from codeql.pipeline import record_skipped_run + record_skipped_run(config, "CodeQL phase 1 disabled") + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + return + + if HAVE_RICH: + from rich.text import Text + console.print(Text("Running CodeQL analysis…", style="dim")) + else: + print("Running CodeQL analysis…") + + from codeql.pipeline import run_full_pipeline + + def progress(message: str) -> None: + if HAVE_RICH: + from rich.text import Text + console.print(Text(message, style="dim")) + else: + print(message, flush=True) + + try: + manifest = run_full_pipeline(config, progress=progress) + except Exception as exc: + msg = f"CodeQL: FAILED — {exc}" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + return + + status = manifest["status"] + warnings = manifest.get("warnings", []) + failures = manifest.get("failures", []) + + if status == "completed": + msg = f"CodeQL: analysis completed ({len(manifest.get('languages', []))} language(s))" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="green")) + else: + import _colors as C + print(C.ok(msg)) + elif status == "skipped": + reason = failures[0] if failures else "no plan" + msg = f"CodeQL: skipped — {reason}" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + elif status == "soft-failed": + msg = "CodeQL: soft-failed — continuing" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + for w in warnings + failures: + if HAVE_RICH: + console.print(Text(f" {w}", style="yellow")) + else: + print(C.warn(f" {w}")) + elif status == "failed": + msg = "CodeQL: FAILED" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + for f in failures: + if HAVE_RICH: + console.print(Text(f" {f}", style="red")) + else: + print(C.fail(f" {f}")) + + +def _check_codeql_artifacts(console: Any) -> int: + """Validate CodeQL artifacts; block 1b only on hard fail policy.""" + from codeql.config import resolve_config as _resolve_codeql_config + from codeql.artifacts import check_artifacts + + config = _resolve_codeql_config() + + if not config.enabled or not config.phase_1_enabled: + return 0 + + status, warnings = check_artifacts(config.abs_output_dir) + + for w in warnings: + if HAVE_RICH: + from rich.text import Text + console.print(Text(f" WARN: {w}", style="yellow")) + else: + import _colors as C + print(C.warn(f" WARN: {w}")) + + if config.fail_policy == "hard" and status == "failed": + msg = "CodeQL artifact gate: FAILED — blocking Phase 1b" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + return 1 + + if status == "failed": + # fail_policy is soft, so treat as a non-blocking warning + if HAVE_RICH: + from rich.text import Text + console.print(Text("CodeQL artifact gate: execution crashed but fail_policy is soft — continuing", style="yellow")) + else: + import _colors as C + print(C.warn("CodeQL artifact gate: execution crashed but fail_policy is soft — continuing")) + + label = f"CodeQL artifact gate: {status}" + if HAVE_RICH: + from rich.text import Text + style = "green" if status == "completed" else "yellow" + console.print(Text(label, style=style)) + else: + import _colors as C + if status == "completed": + print(C.ok(label)) + else: + print(C.info(label)) + + return 0 + + +def _load_codeql_yaml(path: Path) -> dict[str, Any]: + """Load a CodeQL YAML artifact as a mapping, returning {} on absence/errors.""" + if not path.is_file(): + return {} + try: + from codeql.packs import load_yaml_mapping + + return load_yaml_mapping(path, what=path.name) + except Exception: + return {} + + +def _validate_codeql_plan_for_repair() -> tuple[int, str]: + """Validate the generated CodeQL plan, returning CLI-style (rc, output).""" + plan_path = ROOT / "itemdb" / "notes" / "codeql-plan.yml" + if not plan_path.exists(): + return 0, "" + + try: + from codeql.packs import load_codeql_plan + + plan = load_codeql_plan(plan_path) + except Exception as exc: + return 1, f"itemdb/notes/codeql-plan.yml is invalid: {exc}" + + errors: list[str] = [] + for unit in plan.get("analysis_units", []): + if not isinstance(unit, dict): + continue + unit_id = str(unit.get("id", "")) + unit_path = unit.get("path") + analysis_root = ROOT / unit_path if isinstance(unit_path, str) else ROOT + languages = unit.get("languages", []) + if not isinstance(languages, list): + continue + for language in languages: + if not isinstance(language, dict): + continue + language_id = str(language.get("id", "")) + build_command = language.get("build_command") + if not isinstance(build_command, str) or not build_command.strip(): + continue + context = f"analysis unit {unit_id!r} language {language_id!r}" + errors.extend(_validate_codeql_build_command(build_command, analysis_root, context)) + + if errors: + return 1, "itemdb/notes/codeql-plan.yml failed CodeQL build-command validation:\n" + "\n".join( + f"- {error}" for error in errors + ) + + return 0, "" + + +def _validate_codeql_build_command(build_command: str, analysis_root: Path, context: str) -> list[str]: + """Return generic portability/safety validation errors for a manual build command.""" + errors: list[str] = [] + if _contains_absolute_tmp(build_command): + errors.append(f"{context}: build_command uses absolute /tmp/; use workspace-relative tmp/ instead") + if str(ROOT) in build_command: + errors.append(f"{context}: build_command embeds the absolute workspace path {ROOT}") + errors.extend(_validate_codeql_build_command_shape(build_command, context)) + + try: + tokens = shlex.split(build_command) + except ValueError as exc: + return errors + [f"{context}: build_command is not shell-parseable: {exc}"] + + for token in tokens: + if not token.endswith(".sh"): + continue + script_path = Path(token) + if not script_path.is_absolute(): + script_path = analysis_root / script_path + if not script_path.is_file(): + errors.append(f"{context}: referenced helper script does not exist from analysis root: {token}") + continue + try: + content = script_path.read_text(encoding="utf-8") + except OSError as exc: + errors.append(f"{context}: referenced helper script cannot be read: {token}: {exc}") + continue + if _contains_absolute_tmp(content): + errors.append(f"{context}: referenced helper script {token} uses absolute /tmp/; use workspace-relative tmp/") + if str(ROOT) in content: + errors.append(f"{context}: referenced helper script {token} embeds the absolute workspace path {ROOT}") + result = subprocess.run(["bash", "-n", str(script_path)], capture_output=True, text=True, timeout=30) + if result.returncode != 0: + detail = (result.stderr or result.stdout).strip() + suffix = f": {detail}" if detail else "" + errors.append(f"{context}: referenced helper script {token} failed bash -n{suffix}") + + return errors + + +def _validate_codeql_build_command_shape(build_command: str, context: str) -> list[str]: + """Reject shell-script constructs because CodeQL tokenizes build_command as argv.""" + errors: list[str] = [] + if "\n" in build_command: + errors.append( + f"{context}: build_command is multi-line; CodeQL tokenizes build_command instead of running it as a shell script. " + "Move multi-step logic into a helper script under tmp/ and invoke it with a single command such as `bash ../../tmp/codeql-build.sh`." + ) + if re.search(r"(^|\s)#", build_command): + errors.append( + f"{context}: build_command contains shell comments; CodeQL passes comments as literal argv tokens. " + "Move comments and multi-step logic into a helper script under tmp/." + ) + for operator in ("&&", ";", "|", "||"): + if operator in build_command: + errors.append( + f"{context}: build_command contains shell operator {operator!r}; CodeQL tokenizes build_command, it is not shell-interpreted. " + "Use a helper script under tmp/ for compound commands." + ) + break + try: + tokens = shlex.split(build_command) + except ValueError: + return errors + if len(tokens) >= 3 and tokens[0] in {"bash", "sh"} and tokens[1] == "-c": + errors.append( + f"{context}: build_command uses `{tokens[0]} -c`; CodeQL command tokenization makes nested shell snippets fragile. " + "Write the snippet to a helper script under tmp/ and invoke that script instead." + ) + return errors + + +def _contains_absolute_tmp(text: str) -> bool: + """Return whether text contains an absolute /tmp path, not a relative tmp/ component.""" + return re.search(r"(^|[\s\"'=])/(tmp)(/|$)", text) is not None + + +def _subphase_should_validate_codeql_plan(phase_id: str) -> bool: + """Return whether a subphase is responsible for producing/editing codeql-plan.yml.""" + return phase_id in {"1a", "1-codeql-repair"} + + +def _codeql_repair_needed(output_dir: Path, plan_path: Path) -> bool: + """Return whether a failed CodeQL run should get one model repair attempt.""" + manifest = _load_codeql_yaml(output_dir / "run-manifest.yml") + status = manifest.get("status") + if status not in {"soft-failed", "failed"}: + return False + + failures = manifest.get("failures", []) + if not isinstance(failures, list): + return False + if not any("Database create failed" in str(failure) for failure in failures): + return False + + plan = _load_codeql_yaml(plan_path) + for unit in plan.get("analysis_units", []) if isinstance(plan.get("analysis_units"), list) else []: + languages = unit.get("languages", []) if isinstance(unit, dict) else [] + if not isinstance(languages, list): + continue + for language in languages: + if isinstance(language, dict) and language.get("build_mode") in {"autobuild", "manual"}: + return True + return False + + +def _latest_codeql_database_log(output_dir: Path) -> Path | None: + logs = [p for p in output_dir.glob("databases/**/log/database-create-*.log") if p.is_file()] + if not logs: + return None + return max(logs, key=lambda p: p.stat().st_mtime) + + +def _codeql_repair_failure_context(output_dir: Path) -> str: + """Return target-agnostic failure context for the repair model.""" + lines: list[str] = [] + manifest = _load_codeql_yaml(output_dir / "run-manifest.yml") + failures = manifest.get("failures", []) + if isinstance(failures, list) and failures: + lines.append("Manifest failures:") + lines.extend(str(failure) for failure in failures[-3:]) + + latest_log = _latest_codeql_database_log(output_dir) + if latest_log is not None: + interesting: list[str] = [] + try: + for line in latest_log.read_text(encoding="utf-8", errors="replace").splitlines(): + if any(marker in line for marker in ("[build-stderr]", "[build-stdout]", "[ERROR]", "Exception caught", "A fatal error")): + interesting.append(line) + except OSError as exc: + interesting.append(f"Failed to read latest database log {latest_log}: {exc}") + if interesting: + lines.append(f"Latest database-create log: {latest_log.relative_to(ROOT) if latest_log.is_relative_to(ROOT) else latest_log}") + lines.extend(interesting[-40:]) + + return "\n".join(lines) if lines else "CodeQL database creation failed; no additional log details were available." + + +def _file_digest(path: Path) -> str | None: + """Return a stable digest for a file, or None when it cannot be read.""" + try: + return hashlib.sha256(path.read_bytes()).hexdigest() + except OSError: + return None + + +def _run_codeql_repair_if_needed( + *, + args: Any, + console: Any, + rendering_ctx: Any, + runner: ServerRunner, + base_url: str, +) -> int: + """ + Ask the model to repair CodeQL build instructions and rerun CodeQL until stable. + + Architecture / Retries Logic: + 1. CodeCome generates a `codeql-plan.yml` in Phase 1a. + 2. We attempt to run CodeQL using that plan. + 3. If CodeQL database creation fails (e.g., due to build errors), this function is + triggered. It allocates a retry budget (`CODEQL_REPAIR_RETRIES`) to use the model + to debug the failure and output a repaired `codeql-plan.yml`. + 4. If the agent itself fails to produce a valid plan (e.g. gets stuck validating its + YAML repeatedly) or the user hits Ctrl+C, we break out of the repair loop. + 5. We NEVER halt the entire pipeline in this function. We simply exhaust the allocated + budget. Only after all repair attempts finish does `_check_codeql_artifacts` finally + enforce the `fail_policy: hard` gate and halt the pipeline if the database is still missing. + """ + from codeql.config import resolve_config as _resolve_codeql_config + + max_retries = int(os.environ.get("CODEQL_REPAIR_RETRIES", "2")) + if max_retries <= 0: + return 0 + + config = _resolve_codeql_config() + plan_path = ROOT / "itemdb" / "notes" / "codeql-plan.yml" + if not _codeql_repair_needed(config.abs_output_dir, plan_path): + return 0 + + msg = "CodeQL database creation failed; asking the model to repair build instructions." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + + plan_digest = _file_digest(plan_path) + repair_session_id: str | None = None + repair_prompt: str | None = None + for attempt in range(1, max_retries + 1): + outcome = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1-codeql-repair", + label=f"CodeQL Build Repair ({attempt}/{max_retries})", + agent="recon", + prompt_file="prompts/phase-1-codeql-repair.md", + existing_session_id=repair_session_id, + initial_prompt=repair_prompt, + return_outcome=True, + ) + assert isinstance(outcome, _SubphaseOutcome) + repair_session_id = outcome.session_id or repair_session_id + + if outcome.returncode == 130: + return 130 # Honor user interrupt immediately + + if outcome.returncode != 0: + # The agent exhausted its internal validation retries or failed fatally. + # Continuing here would just loop the same broken state, so we break + # out of the repair loop to let the phase proceed (and potentially halt). + break + + next_plan_digest = _file_digest(plan_path) + if next_plan_digest == plan_digest: + unchanged_msg = "CodeQL repair completed but did not change itemdb/notes/codeql-plan.yml." + if HAVE_RICH: + from rich.text import Text + console.print(Text(unchanged_msg, style="yellow")) + else: + import _colors as C + print(C.warn(unchanged_msg)) + plan_digest = next_plan_digest + + _run_codeql(console) + if not _codeql_repair_needed(config.abs_output_dir, plan_path): + return 0 + + repair_prompt = build_codeql_build_failure_resume_prompt( + _codeql_repair_failure_context(config.abs_output_dir) + ) + + if _codeql_repair_needed(config.abs_output_dir, plan_path): + msg = f"CodeQL database creation still fails after {max_retries} repair attempt(s); continuing to artifact gate." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + + return 0 + + +# --------------------------------------------------------------------------- +# Subphase runner +# --------------------------------------------------------------------------- + +def _run_subphase( + *, + args: Any, + console: Any, + rendering_ctx: Any, + runner: ServerRunner, + base_url: str, + phase_id: str, + label: str, + agent: str, + prompt_file: str, + finding: str | None = None, + existing_session_id: str | None = None, + initial_prompt: str | None = None, + return_outcome: bool = False, +) -> int | _SubphaseOutcome: + """Run a single subphase agent session with retry/resume.""" + prompt_path = ROOT / prompt_file + prompt = initial_prompt if initial_prompt is not None else load_prompt(prompt_path, finding, phase=phase_id) + rc = resolve_runtime_config(agent) + model = rc.model + variant = rc.variant + thinking_on = rc.thinking_on + configure_rendering(console, render_reasoning=thinking_on) + + model_label = model or "(unknown)" + variant_label = variant or "(unknown)" + + parts = [f"agent={agent}", f"model={model_label}"] + if variant is not None: + parts.append(f"variant={variant_label}") + parts.append(f"thinking={'on' if thinking_on else 'off'}") + parts.append(f"prompt={prompt_file}") + + if variant is not None: + sources_tail = ( + f"(model source: {rc.model_source}, variant source: {rc.variant_source}, " + f"thinking source: {rc.thinking_source})" + ) + else: + sources_tail = f"(model source: {rc.model_source}, thinking source: {rc.thinking_source})" + + main_line = " ".join(parts) + " " + sources_tail + + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(title=f"Phase {phase_id}: {label}", style="bold cyan")) + console.print(Text(main_line, style="dim")) + if finding: + console.print(Text(f"finding={finding}", style="dim")) + else: + import _colors as C + print(C.header(f"Phase {phase_id}: {label}")) + print(C.info(main_line)) + if finding: + print(C.info(f"finding={finding}")) + + iteration_retry_count = 0 + frontmatter_retry_count = 0 + codeql_plan_retry_count = 0 + attempt_number = 0 + last_session_id: str = existing_session_id or "" + last_finish_reason: str | None = None + last_finish_tokens: dict[str, Any] = {} + last_permission_error: str | None = None + any_step_finish_seen = False + step_finish_count = 0 + transcript_path: Path = Path() + finish_warning: str | None = None + subphase_start_time = time.time() + + password = runner.info.password if runner.info else "" + + # --- Retry loop (mirrors harness.run_phase_mode) --- + while True: + attempt_number += 1 + _reset_subagent_state() + finish_warning = None + + returncode, session_id, run_result, transcript_path = _run_single_attempt( + args, console, prompt, model, variant, base_url, + password, str(ROOT), + render_event_fn=render_event, + emit_fatal_error_fn=_emit_fatal_error, + existing_session_id=last_session_id or None, + transcript_phase=phase_id, + phase_override=phase_id, + label_override=label, + ) + + if returncode != 0: + break + + last_session_id = session_id + last_finish_reason = run_result.last_finish_reason + last_finish_tokens = run_result.last_finish_tokens + last_permission_error = run_result.last_permission_error + any_step_finish_seen = run_result.any_step_finish_seen + step_finish_count = run_result.step_finish_count + + if not any_step_finish_seen: + finish_warning = ( + "CodeCome observed no step_finish events in the JSON stream, so the model/provider did not emit a " + "completion signal. Treating the run as incomplete." + ) + elif last_finish_reason is None: + finish_warning = ( + "CodeCome observed a step_finish event without a finish reason, so the model/provider completion " + "state is ambiguous. Treating the run as incomplete." + ) + elif last_finish_reason in _FINISH_FAILURE: + finish_warning = ( + f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped " + "before completing the subphase. Treating the run as incomplete rather than as a CodeCome logic error." + ) + elif last_finish_reason in _FINISH_MID_TURN: + if last_permission_error: + finish_warning = ( + f"{last_permission_error}; CodeCome observed the model/provider stop mid-turn with finish " + f"reason '{last_finish_reason}', so the subphase did not reach a final completion signal." + ) + else: + finish_warning = ( + f"CodeCome observed the model/provider stop mid-turn with finish reason '{last_finish_reason}' " + f"after {step_finish_count} completed loops, without a terminal completion signal. Treating the " + "subphase as incomplete because the model/provider cut off the response." + ) + elif last_finish_reason not in _FINISH_TERMINAL_OK: + finish_warning = ( + f"CodeCome observed an unrecognised model/provider finish reason '{last_finish_reason}'. Treating " + "the run as incomplete rather than assuming success." + ) + + if finish_warning is not None: + if ( + (not any_step_finish_seen or last_finish_reason in _FINISH_MID_TURN) + and last_permission_error is None + and check_phase_graceful_completion(phase_id, finding, subphase_start_time) + ): + msg = ( + f"CodeCome observed an incomplete model/provider completion signal for Phase {phase_id} after " + f"{step_finish_count} completed loops, but the required durable artifacts were already written. " + "Treating the subphase as complete." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold green")) + else: + import _colors as C + print(C.ok(msg)) + finish_warning = None + last_finish_reason = "graceful_forgiveness" + else: + returncode = 2 + + if returncode == 0: + if _subphase_should_validate_codeql_plan(phase_id): + validation_rc, validation_output = _validate_codeql_plan_for_repair() + if validation_rc != 0: + max_codeql_plan_retries = 2 + if codeql_plan_retry_count < max_codeql_plan_retries: + codeql_plan_retry_count += 1 + msg = ( + "\n[Auto-Correction] The model completed a turn, but itemdb/notes/codeql-plan.yml " + "failed local CodeQL plan validation. CodeCome will resume the same session and ask " + f"for a minimal YAML/plan repair (retry {codeql_plan_retry_count}/{max_codeql_plan_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_codeql_plan_resume_prompt(validation_output) + continue + else: + returncode = 2 + finish_warning = ( + "The model output failed CodeQL plan validation, and CodeCome could not determine " + "a session ID to resume for repair. Treating the subphase as incomplete so the " + "validator output can be reported back with the saved transcript." + ) + else: + returncode = 2 + finish_warning = ( + f"itemdb/notes/codeql-plan.yml still fails validation after {max_codeql_plan_retries} " + "auto-repair attempts. Treating the subphase as incomplete so the validation errors " + "can be reported back." + ) + msg = f"\n[Warning] CodeQL plan validation errors persist after {max_codeql_plan_retries} auto-retries." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + print(validation_output) + break + + from findings.checks_entry import run_frontmatter_validation + + validation_rc, validation_output = run_frontmatter_validation() + if validation_rc != 0: + max_frontmatter_retries = 2 + if frontmatter_retry_count < max_frontmatter_retries: + frontmatter_retry_count += 1 + msg = ( + "\n[Auto-Correction] The model completed a turn, but its output failed local frontmatter " + f"validation. CodeCome will resume the same session and ask for a minimal repair " + f"(retry {frontmatter_retry_count}/{max_frontmatter_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_frontmatter_resume_prompt(phase_id, finding, validation_output) + continue + else: + returncode = 2 + finish_warning = ( + "The model output failed local frontmatter validation, and CodeCome could not determine a " + "session ID to resume for repair. Treating the subphase as incomplete so the validator output " + "can be reported back with the saved transcript." + ) + else: + returncode = 2 + finish_warning = ( + f"The model output still fails local frontmatter validation after {max_frontmatter_retries} " + "auto-repair attempts. Treating the subphase as incomplete so the validation errors can be reported back." + ) + msg = f"\n[Warning] Frontmatter errors persist after {max_frontmatter_retries} auto-retries." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + print(validation_output) + break + break + + if returncode == 2 and last_finish_reason in _FINISH_MID_TURN: + import os + max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1")) + if iteration_retry_count < max_iteration_retries: + iteration_retry_count += 1 + msg = ( + "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same " + f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_phase_resume_prompt( + phase_id, finding, last_finish_reason, step_finish_count, + ) + continue + else: + finish_warning = ( + "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine " + "a session ID for automatic continuation. Treating the subphase as incomplete." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text("Could not determine session ID to resume.", style="red")) + else: + import _colors as C + print(C.fail("Could not determine session ID to resume.")) + break + + break + # --- end retry loop --- + + # Report subphase outcome + if returncode == 0: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="green")) + console.print(Text(f"{'OK' if not HAVE_RICH else ''}Phase {phase_id} completed successfully", style="green")) + console.print(Text( + f" finish reason: {last_finish_reason!r} " + f"transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}", + style="dim", + )) + else: + import _colors as C + print(C.ok(f"Phase {phase_id} completed successfully")) + print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}") + elif returncode == 130: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="yellow")) + console.print(Text(f"Phase {phase_id} interrupted", style="yellow")) + else: + import _colors as C + print(C.warn(f"Phase {phase_id} interrupted")) + else: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="red")) + console.print(Text( + f"Phase {phase_id} did not complete cleanly (exit code {returncode})", + style="red", + )) + if finish_warning: + console.print(Text(f" reason: {finish_warning}", style="red")) + console.print(Text(f" transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}", style="dim")) + else: + import _colors as C + print(C.fail(f"Phase {phase_id} did not complete cleanly (exit code {returncode})")) + if finish_warning: + print(C.fail(f" reason: {finish_warning}")) + print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}") + + if return_outcome: + return _SubphaseOutcome(returncode=returncode, session_id=last_session_id, transcript_path=transcript_path) + return returncode + + +# --------------------------------------------------------------------------- +# Phase 1 orchestration +# --------------------------------------------------------------------------- + +def run_phase_1( + args: Any, + console: Any, + rendering_ctx: Any, + runner: ServerRunner, + base_url: str, +) -> int: + """Orchestrate Phase 1 subphases 1a → 1b → 1c with gates.""" + # ---- Phase 1a: Target Profile ---- + findings_snapshot_1a = count_findings_snapshot() + rc = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1a", + label="Target Profile", + agent="recon", + prompt_file="prompts/phase-1a-profile.md", + ) + if rc != 0: + return rc + + gate_rc = check_phase_1a(console, findings_snapshot=findings_snapshot_1a) + if gate_rc != 0: + return gate_rc + + # ---- CodeQL analysis ---- + _run_codeql(console) + rc = _run_codeql_repair_if_needed( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + ) + if rc != 0: + return rc + rc = _check_codeql_artifacts(console) + if rc != 0: + return rc + + # Snapshot findings immediately before 1b so the warning scope matches 1b. + findings_snapshot = count_findings_snapshot() + + # ---- Phase 1b: CodeQL-assisted Reconnaissance ---- + rc = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1b", + label="CodeQL-assisted Reconnaissance", + agent="recon", + prompt_file="prompts/phase-1b-codeql-recon.md", + ) + if rc != 0: + return rc + + gate_rc = check_phase_1b(console, findings_snapshot=findings_snapshot) + if gate_rc != 0: + return gate_rc + + # ---- Phase 1c: Sandbox Bootstrap ---- + rc = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1c", + label="Sandbox Bootstrap", + agent="recon", + prompt_file="prompts/phase-1c-sandbox.md", + ) + if rc != 0: + return rc + + gate_rc = check_phase_1c(console) + if gate_rc != 0: + return gate_rc + + # ---- Phase 1 complete ---- + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="bold green")) + console.print(Text("Phase 1 complete — all subphases passed.", style="bold green")) + else: + import _colors as C + print() + print(C.ok("Phase 1 complete — all subphases passed.")) + + return 0 diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index 7f1de33c..821d002c 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -9,6 +9,7 @@ import os import sys import threading +import time from pathlib import Path from typing import Any, Callable @@ -17,7 +18,7 @@ import _colors as C from events.phase_loop import PhaseEventLoop, RunResult from codecome.config import ROOT -from codecome.session import create_session, send_prompt_to_session +from codecome.session import create_session, get_session_status, send_prompt_to_session from codecome.transcript import Transcript from codecome.recording import EventRecorder @@ -33,6 +34,7 @@ def _consume_events( auth_token: str | None, workspace_dir: str | None, render_event_fn: Callable[..., None], + event_loop_box: dict[str, Any] | None = None, ) -> RunResult: event_loop = PhaseEventLoop( base_url=base_url, @@ -43,14 +45,47 @@ def _consume_events( auth_token=auth_token, workspace_dir=workspace_dir, ) + if event_loop_box is not None: + event_loop_box["loop"] = event_loop recorder = EventRecorder(transcript, debug=args.debug) def _handle_event(console_: Any, phase_: str, label_: str, event: dict[str, Any]) -> None: - recorder.record(event) render_event_fn(console_, phase_, label_, event) - return event_loop.run(_handle_event) + return event_loop.run(_handle_event, recorder.record) + + +def _record_codecome_event(transcript: Transcript, event_type: str, **properties: Any) -> None: + transcript.write_event({ + "type": event_type, + "timestamp": int(time.time() * 1000), + "properties": properties, + }) + + +def _wait_for_resume_idle( + base_url: str, + session_id: str, + auth_token: str | None, + workspace_dir: str | None, + transcript: Transcript, +) -> None: + timeout_s = float(os.environ.get("CODECOME_RESUME_IDLE_TIMEOUT", "15")) + poll_s = float(os.environ.get("CODECOME_RESUME_IDLE_POLL", "1")) + deadline = time.monotonic() + max(timeout_s, 0.0) + + while True: + status = get_session_status(base_url, session_id, auth_token, workspace_dir) + if status != "busy": + if status is not None: + _record_codecome_event(transcript, "codecome.resume.status", sessionID=session_id, status=status) + return + + _record_codecome_event(transcript, "codecome.resume.blocked_busy", sessionID=session_id, status=status) + if time.monotonic() >= deadline: + raise RuntimeError(f"session {session_id} is still busy; refusing to send resume prompt") + time.sleep(max(poll_s, 0.1)) def _run_single_attempt( @@ -65,37 +100,59 @@ def _run_single_attempt( render_event_fn: Callable[..., None], emit_fatal_error_fn: Callable[..., None] | None = None, existing_session_id: str | None = None, + transcript_phase: str | None = None, + phase_override: str | None = None, + label_override: str | None = None, ) -> tuple[int, str, RunResult, Path]: transcript: Transcript try: - transcript = Transcript.for_phase(str(args.phase), args.finding) + transcript = Transcript.for_phase(transcript_phase or str(args.phase), args.finding) except OSError as exc: finding_tag = (args.finding or "no-finding").replace("/", "_") transcript = Transcript.null() - transcript.path = ROOT / "tmp" / f"last-phase-{args.phase}-{finding_tag}-attempt-N.jsonl" + transcript.path = ROOT / "tmp" / f"last-phase-{transcript_phase or args.phase}-{finding_tag}-attempt-N.jsonl" try: console.print("warning: could not open transcript ", transcript.path, ": ", exc) except AttributeError: print(C.warn(f"warning: could not open transcript {transcript.path}: {exc}")) try: + _record_codecome_event( + transcript, + "codecome.attempt.started", + phase=transcript_phase or str(args.phase), + label=str(args.label), + existingSession=bool(existing_session_id), + ) if existing_session_id: session_id = existing_session_id + _wait_for_resume_idle(base_url, session_id, auth_token, workspace_dir, transcript) else: session_id = create_session(base_url, str(args.phase), args.agent, model, auth_token, workspace_dir) + _record_codecome_event( + transcript, + "codecome.session.ready", + sessionID=session_id, + existingSession=bool(existing_session_id), + ) + run_result_box: dict[str, Any] = {} consume_error_box: dict[str, Exception] = {} + event_loop_box: dict[str, Any] = {} def _consume() -> None: try: run_result_box["result"] = _consume_events( base_url, session_id, console, - str(args.phase), str(args.label), args, + phase_override or str(args.phase), + label_override or str(args.label), + args, transcript, auth_token, workspace_dir, render_event_fn=render_event_fn, + event_loop_box=event_loop_box, ) except Exception as exc: # noqa: BLE001 consume_error_box["error"] = exc @@ -103,15 +160,51 @@ def _consume() -> None: consumer = threading.Thread(target=_consume, name=f"codecome-events-{session_id}", daemon=True) consumer.start() - send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) + _record_codecome_event(transcript, "codecome.prompt.send_started", sessionID=session_id) + try: + send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) + except Exception as exc: + _record_codecome_event( + transcript, + "codecome.prompt.send_failed", + sessionID=session_id, + errorType=type(exc).__name__, + message=str(exc), + ) + loop = event_loop_box.get("loop") + if loop is not None: + try: + loop.stop() + except Exception: + pass + consumer.join(timeout=5.0) + if consumer.is_alive(): + _record_codecome_event(transcript, "codecome.event_loop.stop_timeout", sessionID=session_id) + raise + _record_codecome_event(transcript, "codecome.prompt.send_completed", sessionID=session_id) consumer.join() if "error" in consume_error_box: - raise consume_error_box["error"] + exc = consume_error_box["error"] + _record_codecome_event( + transcript, + "codecome.event_loop.failed", + sessionID=session_id, + errorType=type(exc).__name__, + message=str(exc), + ) + raise exc run_result = run_result_box.get("result") if not isinstance(run_result, RunResult): raise RuntimeError("Event loop ended without a RunResult") except Exception as exc: + _record_codecome_event( + transcript, + "codecome.attempt.failed", + errorType=type(exc).__name__, + message=str(exc), + existingSession=bool(existing_session_id), + ) if emit_fatal_error_fn: emit_fatal_error_fn(console, "Server Error", str(exc)) else: diff --git a/tools/codecome/session.py b/tools/codecome/session.py index d456abad..ad2ca5b0 100644 --- a/tools/codecome/session.py +++ b/tools/codecome/session.py @@ -63,7 +63,42 @@ def send_prompt_to_session( with urllib.request.urlopen(req, timeout=30.0) as resp: pass # 204 expected except urllib.error.HTTPError as exc: - raise RuntimeError(f"Failed to send prompt: HTTP {exc.code}") from exc + body = "" + try: + body = exc.read().decode("utf-8", errors="replace").strip() + except Exception: + body = "" + detail = f"Failed to send prompt: HTTP {exc.code}" + if body: + detail = f"{detail}: {body}" + raise RuntimeError(detail) from exc + + +def get_session_status( + base_url: str, + session_id: str, + auth_token: str | None, + workspace_dir: str | None, +) -> str | None: + """Best-effort lookup of an opencode session status type.""" + req = urllib.request.Request( + f"{base_url}/session/{session_id}", + headers=_get_headers(auth_token, workspace_dir), + method="GET", + ) + try: + with urllib.request.urlopen(req, timeout=5.0) as resp: + data = json.loads(resp.read().decode("utf-8")) + except Exception: + return None + + status = data.get("status") if isinstance(data, dict) else None + if isinstance(status, dict): + status_type = status.get("type") + return status_type if isinstance(status_type, str) else None + if isinstance(status, str): + return status + return None def create_session( diff --git a/tools/codecome/transcript.py b/tools/codecome/transcript.py index 1ae0af13..a3316e58 100644 --- a/tools/codecome/transcript.py +++ b/tools/codecome/transcript.py @@ -30,6 +30,22 @@ def _transcript_dir() -> Path: return d +def _unique_transcript_path(path: Path) -> Path: + """Return a transcript path that will not truncate an existing file.""" + if not path.exists(): + return path + + stamp = time.strftime("%Y%m%d-%H%M%S") + pid = os.getpid() + stem = path.stem + suffix = path.suffix + for n in range(1, 1000): + candidate = path.with_name(f"{stem}-{stamp}-pid{pid}-{n}{suffix}") + if not candidate.exists(): + return candidate + raise OSError(f"could not allocate unique transcript path for {path}") + + class Transcript: """JSONL event transcript — handles open, write, close. @@ -54,8 +70,8 @@ def for_phase(cls, phase: str, finding: str | None) -> Transcript: counter = _ATTEMPT_COUNTER.get(key, 1) _ATTEMPT_COUNTER[key] = counter + 1 - path = _transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl" - return cls(path, path.open("w", encoding="utf-8")) + path = _unique_transcript_path(_transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl") + return cls(path, path.open("x", encoding="utf-8", buffering=1)) @classmethod def for_chat(cls) -> Transcript: diff --git a/tools/codeql.py b/tools/codeql.py new file mode 100644 index 00000000..593a7375 --- /dev/null +++ b/tools/codeql.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL CLI wrapper for CodeCome. + +Usage:: + + tools/codeql.py install + tools/codeql.py check + tools/codeql.py resolve-packs + tools/codeql.py run + tools/codeql.py import-risk + tools/codeql.py check-artifacts +""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from codeql.config import ROOT, resolve_config +from codeql.packs import PackResolverError, dump_yaml, load_codeql_plan, load_pack_catalog, resolve_plan_packs + + +def _cmd_install() -> int: + """Install the managed CodeQL CLI.""" + from codeql.install import install + return install() + + +def _cmd_check() -> int: + """Check that CodeQL CLI is available and working.""" + config = resolve_config() + + if not config.enabled: + print("CodeQL is disabled (CODEQL=0 or CODEQL_SKIP=1).") + return 0 + + binary_path = config.abs_install_path + + if not binary_path.is_file(): + print(f"FAIL: CodeQL binary not found at {binary_path}") + print("Run 'tools/codeql.py install' to install the managed CodeQL CLI.") + return 1 + + try: + result = subprocess.run( + [str(binary_path), "--version"], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + print(f"FAIL: codeql --version failed: {result.stderr}") + return 1 + version_line = result.stdout.strip().split("\n")[0] + print(f"CodeQL CLI: {version_line}") + except Exception as exc: + print(f"FAIL: {exc}") + return 1 + + print("Checking pack resolution …") + try: + result = subprocess.run( + [str(binary_path), "resolve", "qlpacks", f"--common-caches={config.abs_cache_dir}"], + capture_output=True, + text=True, + timeout=60, + ) + if result.returncode != 0: + print(f"WARN: codeql resolve qlpacks failed: {result.stderr}") + else: + print("Pack resolution OK.") + except Exception as exc: + print(f"WARN: pack resolution check failed: {exc}") + + print("CodeQL CLI check passed.") + return 0 + + +def _cmd_resolve_packs(args: argparse.Namespace) -> int: + """Resolve CodeQL plan pack profiles to concrete pack references.""" + config = resolve_config() + + plan_path = ROOT / args.plan if not Path(args.plan).is_absolute() else Path(args.plan) + catalog_path = config.abs_pack_catalog + output_path = ROOT / args.output if not Path(args.output).is_absolute() else Path(args.output) + + try: + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + resolved = resolve_plan_packs(plan, catalog) + except PackResolverError as exc: + print(f"FAIL: {exc}") + return 1 + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(dump_yaml(resolved), encoding="utf-8") + + if args.format == "json": + print(json.dumps(resolved, indent=2)) + else: + print(f"Resolved CodeQL packs written to {output_path.relative_to(ROOT) if output_path.is_relative_to(ROOT) else output_path}") + for unit in resolved["analysis_units"]: + print(f"- {unit['id']} ({unit['path']})") + for language in unit["languages"]: + print(f" {language['id']}: {', '.join(language['profiles'])}") + for pack in language["packs"]: + print(f" {pack}") + return 0 + + +def _cmd_run() -> int: + """Run CodeQL analysis: create databases, analyze, normalize SARIF.""" + config = resolve_config() + + if not config.enabled: + print("CodeQL is disabled (CODEQL=0 or CODEQL_SKIP=1). Skipping run.") + from codeql.pipeline import record_skipped_run + record_skipped_run(config, "CodeQL disabled for this run") + return 0 + + binary_path = config.abs_install_path + if not binary_path.is_file(): + print(f"FAIL: CodeQL binary not found at {binary_path}") + print("Run 'tools/codeql.py install' to install the managed CodeQL CLI.") + return 1 + + from codeql.pipeline import run_full_pipeline + + def progress(message: str) -> None: + print(message, flush=True) + + manifest = run_full_pipeline(config, progress=progress) + + status = manifest["status"] + print(f"CodeQL run: {status}") + + if manifest.get("warnings"): + for w in manifest["warnings"]: + print(f" WARN: {w}") + if manifest.get("failures"): + for f in manifest["failures"]: + print(f" FAIL: {f}") + + output_dir = config.abs_output_dir + normalized_dir = output_dir / "normalized" + alerts_path = normalized_dir / "alerts.yml" + signals_path = normalized_dir / "file-signals.yml" + + if alerts_path.is_file(): + print(f"Normalized alerts: {alerts_path.relative_to(ROOT) if alerts_path.is_relative_to(ROOT) else alerts_path}") + if signals_path.is_file(): + print(f"File signals: {signals_path.relative_to(ROOT) if signals_path.is_relative_to(ROOT) else signals_path}") + + summary_path = output_dir / "codeql-summary.md" + if summary_path.is_file(): + print(f"Summary: {summary_path.relative_to(ROOT) if summary_path.is_relative_to(ROOT) else summary_path}") + + if status == "failed": + return 1 + return 0 + + +def _cmd_import_risk() -> int: + """Import CodeQL file signals into file-risk-index.yml.""" + config = resolve_config() + if not config.enabled: + print("CodeQL is disabled — skipping risk import.") + return 0 + + from codeql.import_risk import import_risk + + signals_path = config.abs_output_dir / "normalized" / "file-signals.yml" + risk_path = ROOT / "itemdb/notes/file-risk-index.yml" + + status, warnings = import_risk(signals_path, risk_path) + for w in warnings: + print(f"WARN: {w}") + if status == "skipped": + print("Risk import skipped — no risk index to enrich.") + return 0 + + print(f"File risk index enriched from {signals_path.relative_to(ROOT) if signals_path.is_relative_to(ROOT) else signals_path}") + return 0 + + +def _cmd_check_artifacts() -> int: + """Check CodeQL artifact state after a run.""" + config = resolve_config() + from codeql.artifacts import check_artifacts + + status, warnings = check_artifacts(config.abs_output_dir) + for w in warnings: + print(f" WARN: {w}") + print(f"CodeQL artifacts: {status}") + return 0 if status in ("completed", "skipped", "soft-failed") else 1 + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="CodeQL CLI wrapper for CodeCome.", + ) + sub = parser.add_subparsers(dest="command", required=True) + + sub.add_parser("install", help="Install the managed CodeQL CLI.") + sub.add_parser("check", help="Verify the CodeQL CLI is installed and working.") + sub.add_parser("run", help="Run CodeQL analysis (create DBs, analyze, normalize SARIF).") + sub.add_parser("import-risk", help="Import CodeQL file signals into file-risk-index.yml.") + sub.add_parser("check-artifacts", help="Check CodeQL artifact state after a run.") + resolve = sub.add_parser("resolve-packs", help="Resolve plan pack profiles to concrete pack references.") + resolve.add_argument("--plan", default="itemdb/notes/codeql-plan.yml", help="Path to codeql-plan.yml") + resolve.add_argument( + "--output", + default="itemdb/codeql/selected-query-packs.yml", + help="Path to write resolved pack selections", + ) + resolve.add_argument("--format", choices=["text", "json"], default="text", help="Output format") + + return parser + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + if args.command == "install": + return _cmd_install() + elif args.command == "check": + return _cmd_check() + elif args.command == "resolve-packs": + return _cmd_resolve_packs(args) + elif args.command == "run": + return _cmd_run() + elif args.command == "import-risk": + return _cmd_import_risk() + elif args.command == "check-artifacts": + return _cmd_check_artifacts() + + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/codeql/__init__.py b/tools/codeql/__init__.py new file mode 100644 index 00000000..61f74eac --- /dev/null +++ b/tools/codeql/__init__.py @@ -0,0 +1 @@ +# CodeQL integration package. diff --git a/tools/codeql/artifacts.py b/tools/codeql/artifacts.py new file mode 100644 index 00000000..e5b1ec92 --- /dev/null +++ b/tools/codeql/artifacts.py @@ -0,0 +1,57 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL artifact gate: validate post-run artifacts exist and are consistent.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +VALID_STATUSES = frozenset({"completed", "skipped", "soft-failed", "failed"}) + + +def check_artifacts(output_dir: Path) -> tuple[str, list[str]]: + """Check CodeQL artifact state after a run. + + Returns (status_string, warnings). + + status_string values: + "missing" — run-manifest.yml does not exist + "completed" — analysis ran; normalized outputs expected + "skipped" — CodeQL was disabled or no plan existed + "soft-failed" — analysis failed but phase may continue + "failed" — hard failure + "unknown" — unrecognized status value in manifest + """ + manifest_path = output_dir / "run-manifest.yml" + if not manifest_path.is_file(): + return ("missing", [f"run-manifest.yml not found at {manifest_path}"]) + + try: + from codeql.packs import load_yaml_mapping + + manifest = load_yaml_mapping(manifest_path, what="run manifest") + except Exception as exc: + return ("unknown", [f"run-manifest.yml is not valid YAML: {exc}"]) + + status = manifest.get("status", "") + if status not in VALID_STATUSES: + return ("unknown", [f"unrecognized status {status!r} in run-manifest.yml"]) + + warnings: list[str] = [] + + # Propagate recorded failures as warnings for the gate consumer. + failures = manifest.get("failures", []) + if isinstance(failures, list): + warnings.extend(failures) + + # For completed runs, verify normalized outputs exist (only if languages were analyzed). + languages = manifest.get("languages") or manifest.get("language_ids", []) + if status == "completed" and languages: + normalized_dir = output_dir / "normalized" + for expected in ("alerts.yml", "file-signals.yml"): + if not (normalized_dir / expected).is_file(): + warnings.append(f"expected normalized output missing: {expected}") + + return (status, warnings) diff --git a/tools/codeql/capabilities.py b/tools/codeql/capabilities.py new file mode 100644 index 00000000..f0604459 --- /dev/null +++ b/tools/codeql/capabilities.py @@ -0,0 +1,28 @@ +# Copyright (C) 2025-2026 Pablo Ruiz Garcia +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL language capability metadata.""" + +from __future__ import annotations + + +BUILD_MODES_BY_LANGUAGE: dict[str, set[str]] = { + "python": {"none"}, + "javascript-typescript": {"none"}, + "ruby": {"none"}, + "c-cpp": {"manual", "autobuild"}, + "go": {"manual", "autobuild"}, + "csharp": {"none", "manual", "autobuild"}, + "java-kotlin": {"none", "manual", "autobuild"}, + "swift": {"manual", "autobuild"}, +} + + +def supported_build_modes(language_id: str) -> set[str]: + """Return supported CodeQL build modes for *language_id*.""" + return set(BUILD_MODES_BY_LANGUAGE.get(language_id, set())) + + +def is_supported_language(language_id: str) -> bool: + """Return whether *language_id* is known to this CodeQL integration.""" + return language_id in BUILD_MODES_BY_LANGUAGE diff --git a/tools/codeql/config.py b/tools/codeql/config.py new file mode 100644 index 00000000..f9ab8960 --- /dev/null +++ b/tools/codeql/config.py @@ -0,0 +1,260 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL configuration resolution. + +Priority: environment variables > codecome.yml > hard-coded defaults. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None # type: ignore[assignment] + + +# --------------------------------------------------------------------------- +# Resolve the workspace root. When imported from the tools/codeql/ package, +# three levels above __file__ gives the repo root. +# --------------------------------------------------------------------------- +ROOT = Path(__file__).resolve().parents[2] + + +# --------------------------------------------------------------------------- +# Defaults (lowest priority) +# --------------------------------------------------------------------------- + +DEFAULTS: dict[str, Any] = { + "enabled": True, + "fail_policy": "soft", + "pack_catalog": "./templates/codeql-packs.yml", + "install_managed": True, + "install_version": "latest", + "install_path": ".tools/codeql/current/codeql", + "output_dir": "./itemdb/codeql", + "database_dir": "./itemdb/codeql/databases", + "cache_dir": "./.cache/codeql", + "phase_1_enabled": True, + "phase_2_enabled": True, + "candidate_mode": "precreate", + "max_candidates": 10, + "sweep_enabled": True, + "sweep_inject_context": True, + "db_create_timeout": 600, + "analyze_timeout": 600, +} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _load_codecome_yml() -> dict[str, Any] | None: + """Load codecome.yml and return the configured CodeQL block.""" + if yaml is None: + return None + path = ROOT / "codecome.yml" + if not path.is_file(): + return None + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) + except (yaml.YAMLError, OSError, UnicodeDecodeError): + return None + if not isinstance(data, dict): + return None + audit = data.get("audit") + if not isinstance(audit, dict): + return None + sa = audit.get("static_analysis") + if not isinstance(sa, dict): + return None + cq = sa.get("codeql") + return cq if isinstance(cq, dict) else None + + +def _bool_env(name: str) -> bool | None: + """Return a tri-state bool from an env var (0/false/no → False, 1/true/yes → True).""" + raw = os.environ.get(name) + if raw is None: + return None + return raw.strip().lower() in ("1", "true", "yes") + + +def _str_env(name: str) -> str | None: + raw = os.environ.get(name) + return raw.strip() if raw else None + + +def _safe_int(value: Any, default: int) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default + + +# --------------------------------------------------------------------------- +# Resolution +# --------------------------------------------------------------------------- + +@dataclass +class CodeQLConfig: + """Resolved CodeQL configuration.""" + + enabled: bool = True + fail_policy: str = "soft" + + pack_catalog: str = "./templates/codeql-packs.yml" + + install_managed: bool = True + install_version: str = "latest" + install_path: str = ".tools/codeql/current/codeql" + + output_dir: str = "./itemdb/codeql" + database_dir: str = "./itemdb/codeql/databases" + cache_dir: str = "./.cache/codeql" + + phase_1_enabled: bool = True + phase_2_enabled: bool = True + candidate_mode: str = "precreate" + max_candidates: int = 10 + + sweep_enabled: bool = True + sweep_inject_context: bool = True + + db_create_timeout: int = 600 + analyze_timeout: int = 600 + + # Absolute paths (resolved from ROOT) + abs_pack_catalog: Path = field(default_factory=Path) + abs_install_path: Path = field(default_factory=Path) + abs_output_dir: Path = field(default_factory=Path) + abs_database_dir: Path = field(default_factory=Path) + abs_cache_dir: Path = field(default_factory=Path) + + +def resolve_config() -> CodeQLConfig: + """Resolve the CodeQL configuration. + + Priority: env vars > codecome.yml > defaults. + """ + yml = _load_codecome_yml() or {} + + def _get(key: str, default: Any, env: str | None = None, coerce: Any = None) -> Any: + """Pick the highest-priority value.""" + # 1. Environment variable + if env is not None: + raw = os.environ.get(env) + if raw is not None and raw.strip() != "": + if coerce is bool: + return raw.strip().lower() in ("1", "true", "yes") + if coerce is int: + try: + return int(raw) + except ValueError: + pass + return raw.strip() + + # 2. codecome.yml + m_key = key.replace("install_", "install.").replace("phase_1_", "phase_1.").replace("phase_2_", "phase_2.").replace("sweep_", "sweep.") + # Try nested lookup + parts = m_key.split(".") + node: Any = yml + for p in parts: + if isinstance(node, dict) and p in node: + node = node[p] + else: + node = None + break + if node is not None: + return node + + # 3. Defaults + return default + + # Top-level overrides + enabled = _bool_env("CODEQL") + if enabled is not None: + # CODEQL=0 → disabled, CODEQL=1 → enabled + pass + else: + enabled = _get("enabled", DEFAULTS["enabled"], coerce=bool) + + # Also check CODEQL_SKIP + skip = _bool_env("CODEQL_SKIP") + if skip is True: + enabled = False + + fail_policy = _str_env("CODEQL_FAIL_POLICY") or _get("fail_policy", DEFAULTS["fail_policy"]) + + # Install settings + install_managed = _get("install_managed", DEFAULTS["install_managed"], + env="CODEQL_MANAGED_INSTALL", coerce=bool) + install_version = _str_env("CODEQL_VERSION") or _get("install_version", DEFAULTS["install_version"]) + install_path = _get("install_path", DEFAULTS["install_path"]) + + # Paths + pack_catalog = _get("pack_catalog", DEFAULTS["pack_catalog"]) + output_dir = _get("output_dir", DEFAULTS["output_dir"]) + database_dir = _get("database_dir", DEFAULTS["database_dir"]) + cache_dir = _get("cache_dir", DEFAULTS["cache_dir"]) + + # Phase settings + phase_1_enabled = _get("phase_1_enabled", DEFAULTS["phase_1_enabled"], + env="CODEQL_PHASE_1", coerce=bool) + phase_2_enabled = _get("phase_2_enabled", DEFAULTS["phase_2_enabled"], + env="CODEQL_PHASE_2", coerce=bool) + candidate_mode = _str_env("CODEQL_CANDIDATES") or _get("candidate_mode", DEFAULTS["candidate_mode"]) + max_candidates_raw = _str_env("CODEQL_MAX_CANDIDATES") + if max_candidates_raw is None: + max_candidates = _safe_int(_get("max_candidates", DEFAULTS["max_candidates"]), DEFAULTS["max_candidates"]) + else: + max_candidates = _safe_int(max_candidates_raw, DEFAULTS["max_candidates"]) + + # Sweep settings + sweep_enabled = _get("sweep_enabled", DEFAULTS["sweep_enabled"], + env="CODEQL_SWEEP", coerce=bool) + sweep_inject_context = _get("sweep_inject_context", DEFAULTS["sweep_inject_context"], + coerce=bool) + + # Timeout settings + db_create_timeout = _safe_int( + _str_env("CODEQL_DB_CREATE_TIMEOUT") + or _get("db_create_timeout", DEFAULTS["db_create_timeout"]), + DEFAULTS["db_create_timeout"], + ) + analyze_timeout = _safe_int( + _str_env("CODEQL_ANALYZE_TIMEOUT") + or _get("analyze_timeout", DEFAULTS["analyze_timeout"]), + DEFAULTS["analyze_timeout"], + ) + + return CodeQLConfig( + enabled=enabled, + fail_policy=fail_policy, + pack_catalog=pack_catalog, + install_managed=install_managed, + install_version=install_version, + install_path=install_path, + output_dir=output_dir, + database_dir=database_dir, + cache_dir=cache_dir, + phase_1_enabled=phase_1_enabled, + phase_2_enabled=phase_2_enabled, + candidate_mode=candidate_mode, + max_candidates=max_candidates, + sweep_enabled=sweep_enabled, + sweep_inject_context=sweep_inject_context, + db_create_timeout=db_create_timeout, + analyze_timeout=analyze_timeout, + abs_pack_catalog=(ROOT / pack_catalog).resolve(), + abs_install_path=(ROOT / install_path).resolve(), + abs_output_dir=(ROOT / output_dir).resolve(), + abs_database_dir=(ROOT / database_dir).resolve(), + abs_cache_dir=(ROOT / cache_dir).resolve(), + ) diff --git a/tools/codeql/import_risk.py b/tools/codeql/import_risk.py new file mode 100644 index 00000000..a6f3e131 --- /dev/null +++ b/tools/codeql/import_risk.py @@ -0,0 +1,135 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Enrich file-risk-index.yml from CodeQL file-signals.yml.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from codeql.packs import PackResolverError, load_yaml_mapping, dump_yaml + + +def import_risk(file_signals_path: Path, risk_index_path: Path) -> tuple[str | None, list[str]]: + """Enrich the file-risk-index with CodeQL signals. + + Returns (status or None, warnings). + + - Preserves existing entries and model-authored reasons. + - Does not duplicate file entries. + - Caps scores at 5. + - Adds ``codeql_score_boost`` and ``external_signals.codeql`` block. + + If the risk index does not exist, no-op with a warning. + """ + warnings: list[str] = [] + + if not file_signals_path.is_file(): + return None, [f"file-signals.yml not found at {file_signals_path}"] + + if not risk_index_path.is_file(): + return "skipped", [f"file-risk-index.yml not found at {risk_index_path}"] + + try: + signals = load_yaml_mapping(file_signals_path, what="CodeQL file signals") + except PackResolverError as exc: + return None, [str(exc)] + + try: + risk_index = load_yaml_mapping(risk_index_path, what="file risk index") + except PackResolverError as exc: + return None, [str(exc)] + + risks = risk_index.get("files") + if not isinstance(risks, list): + return None, ["file-risk-index.yml missing 'files' list"] + + signal_files = signals.get("files", []) + if not isinstance(signal_files, list): + return "skipped", ["file-signals.yml has no files"] + + existing_paths = {entry.get("path", "") for entry in risks if isinstance(entry, dict)} + modified = False + + for signal in signal_files: + if not isinstance(signal, dict): + continue + file_path = signal.get("path", "") + if not file_path: + continue + if file_path in existing_paths: + _update_existing_entry(risks, file_path, signal) + modified = True + else: + _add_new_entry(risks, file_path, signal) + existing_paths.add(file_path) + modified = True + + if modified: + risk_index["files"] = risks + risk_index_path.write_text(dump_yaml(risk_index), encoding="utf-8") + + return None, warnings + + +def _update_existing_entry(entries: list[dict[str, Any]], file_path: str, signal: dict[str, Any]) -> None: + """Enrich an existing file-risk-index entry with CodeQL signals.""" + for entry in entries: + if entry.get("path") != file_path: + continue + + boost = signal.get("codeql_score_boost", 0) + if isinstance(boost, (int, float)): + current = entry.get("score", 1) + current = int(current) if isinstance(current, (int, float)) else 1 + new_score = min(5, current + int(boost)) + if new_score > current: + entry["score"] = new_score + reasons = entry.setdefault("reasons", []) + if isinstance(reasons, list): + reasons.append(f"CodeQL static analysis signal increased score by +{new_score - current}.") + + codeql_alerts = signal.get("alerts", {}) + rules = signal.get("rules", []) + if isinstance(codeql_alerts, dict): + entry.setdefault("external_signals", {}) + entry["external_signals"]["codeql"] = { + "alerts": codeql_alerts.get("total", 0), + "path_problems": codeql_alerts.get("path_problems", 0), + "highest_precision": "high" if codeql_alerts.get("high_precision", 0) > 0 else "medium", + "rules": rules if isinstance(rules, list) else [], + } + + return + + +def _add_new_entry(entries: list[dict[str, Any]], file_path: str, signal: dict[str, Any]) -> None: + """Append a new file-risk-index entry from CodeQL signals.""" + boost = signal.get("codeql_score_boost", 1) + codeql_alerts = signal.get("alerts", {}) + rules = signal.get("rules", []) + + entry: dict[str, Any] = { + "path": file_path, + "score": min(5, int(boost) if isinstance(boost, (int, float)) else 1), + "confidence": "MEDIUM", + "target_area": "", + "reasons": ["CodeQL static analysis signal."], + "sources": [], + "sinks": [], + "trust_boundaries": [], + "suggested_vulnerability_classes": [], + "suggested_skills": [], + "suggested_validation_methods": [], + "external_signals": { + "codeql": { + "alerts": codeql_alerts.get("total", 0) if isinstance(codeql_alerts, dict) else 0, + "path_problems": codeql_alerts.get("path_problems", 0) if isinstance(codeql_alerts, dict) else 0, + "highest_precision": "high" if (isinstance(codeql_alerts, dict) and codeql_alerts.get("high_precision", 0) > 0) else "medium", + "rules": rules if isinstance(rules, list) else [], + } + }, + } + + entries.append(entry) diff --git a/tools/codeql/install.py b/tools/codeql/install.py new file mode 100644 index 00000000..94605077 --- /dev/null +++ b/tools/codeql/install.py @@ -0,0 +1,341 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Managed CodeQL CLI installation. + +Downloads the CodeQL CLI bundle from GitHub Releases, extracts it to a +versioned directory under ``.tools/codeql/``, and maintains a ``current`` +symlink pointing to the active version. +""" + +from __future__ import annotations + +import os +import platform +import re +import shutil +import stat +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Optional +from urllib.request import urlopen, Request + +from codeql.config import resolve_config, CodeQLConfig, ROOT + + +GITHUB_API_RELEASES = "https://api.github.com/repos/github/codeql-cli-binaries/releases" + +_VERSION_RE = re.compile(r"^v?\d+\.\d+\.\d+$") + + +def _validate_version(version: str) -> bool: + """Return True if *version* is a safe semver-like string (no path traversal).""" + return bool(_VERSION_RE.match(version)) + + +def _github_headers() -> dict[str, str]: + """Return GitHub API headers, using a token when available.""" + headers = { + "Accept": "application/vnd.github+json", + "User-Agent": "CodeCome-CodeQL-Installer/1.0", + "X-GitHub-Api-Version": "2022-11-28", + } + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + + +# --------------------------------------------------------------------------- +# Platform detection +# --------------------------------------------------------------------------- + +def _detect_platform() -> str: + system = platform.system().lower() + machine = platform.machine().lower() + if system == "darwin": + return "osx64" + if system == "linux": + return "linux64" + if system == "windows": + return "win64" + raise RuntimeError(f"Unsupported platform: system={system} machine={machine}") + + +def _bundle_suffix(plat: str) -> str: + """Return the asset name suffix for a given platform.""" + return f"{plat}.zip" + + +# --------------------------------------------------------------------------- +# Release discovery +# --------------------------------------------------------------------------- + +def _fetch_latest_version() -> str: + """Fetch the latest CodeQL CLI version tag from the GitHub API.""" + import json + + url = f"{GITHUB_API_RELEASES}/latest" + req = Request(url, headers=_github_headers()) + try: + with urlopen(req, timeout=30) as resp: + data = json.loads(resp.read().decode()) + except Exception as exc: + raise RuntimeError(f"Failed to fetch latest CodeQL CLI release: {exc}") + + tag = data.get("tag_name", "") + # tag_name looks like "v2.20.4" — strip leading "v" + return tag.lstrip("v") if tag.startswith("v") else tag + + +def _fetch_release_assets(version: str) -> list[dict]: + """Fetch the assets for a specific release version.""" + import json + + url = f"{GITHUB_API_RELEASES}/tags/v{version}" + req = Request(url, headers=_github_headers()) + try: + with urlopen(req, timeout=30) as resp: + data = json.loads(resp.read().decode()) + except Exception as exc: + raise RuntimeError( + f"Failed to fetch CodeQL CLI release v{version}: {exc}" + ) + + return data.get("assets", []) + + +def _find_download_url(assets: list[dict], plat: str) -> Optional[str]: + """Find the browser_download_url for the platform-specific bundle.""" + suffix = _bundle_suffix(plat) + for asset in assets: + name = asset.get("name", "") + if name.endswith(suffix): + return asset.get("browser_download_url") + return None + + +# --------------------------------------------------------------------------- +# Download and extract +# --------------------------------------------------------------------------- + +def _download(url: str, dest: Path) -> None: + """Download a file from *url* to *dest*.""" + print(f"Downloading {url} …") + req = Request(url, headers=_github_headers()) + try: + with urlopen(req, timeout=300) as resp: + with open(dest, "wb") as f: + while True: + chunk = resp.read(8192) + if not chunk: + break + f.write(chunk) + except Exception as exc: + if dest.exists(): + dest.unlink() + raise RuntimeError(f"Download failed: {exc}") + + +def _extract(zip_path: Path, dest_dir: Path) -> None: + """Extract a zip archive to *dest_dir*, stripping the leading ``codeql/``. + + GitHub's CodeQL bundles contain a single top-level ``codeql/`` directory. + We strip that prefix during extraction so the launcher ends up at + ``dest_dir/codeql`` and the rest of the bundle contents sit directly under + the version directory. + """ + import zipfile + + prefix = "codeql/" + dest_root = dest_dir.resolve() + dest_dir.mkdir(parents=True, exist_ok=True) + print(f"Extracting to {dest_dir} …") + with zipfile.ZipFile(zip_path, "r") as zf: + for info in zf.infolist(): + if not info.filename.startswith(prefix): + raise RuntimeError( + f"Unexpected CodeQL bundle layout: {info.filename!r} does not start with {prefix!r}" + ) + + relative_name = info.filename[len(prefix):] + if not relative_name: + continue + + target = (dest_dir / relative_name).resolve() + if target != dest_root and dest_root not in target.parents: + raise RuntimeError(f"Refusing to extract CodeQL bundle member outside target dir: {info.filename!r}") + if info.is_dir(): + target.mkdir(parents=True, exist_ok=True) + continue + + target.parent.mkdir(parents=True, exist_ok=True) + with zf.open(info, "r") as src, target.open("wb") as dst: + shutil.copyfileobj(src, dst) + + mode = (info.external_attr >> 16) & 0o777 + if mode: + target.chmod(mode) + + launcher = dest_dir / "codeql" + if launcher.is_file(): + launcher.chmod(launcher.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + +# --------------------------------------------------------------------------- +# Install +# --------------------------------------------------------------------------- + +def _codeql_binary(base_dir: Path) -> Path: + """Return the path to the codeql executable inside an extracted bundle. + + New installs place the binary at ``base_dir/codeql``. Keep a temporary + fallback for older nested local installs. + """ + binary = base_dir / "codeql" + if binary.is_file(): + return binary + legacy_binary = base_dir / "codeql" / "codeql" + if legacy_binary.is_file(): + return legacy_binary + return binary # fall back; will fail usefully in _verify if missing + + +def install(config: Optional[CodeQLConfig] = None) -> int: + """Install (or reinstall) the managed CodeQL CLI. + + Returns 0 on success, 1 on failure. + """ + if config is None: + config = resolve_config() + + if not config.enabled: + print("CodeQL is disabled (CODEQL=0 or CODEQL_SKIP=1). Skipping install.") + return 0 + + if os.environ.get("CODEQL_SKIP_INSTALL") == "1": + print("CODEQL_SKIP_INSTALL=1 — skipping managed install.") + return 0 + + if not config.install_managed: + print("Managed install disabled in config. Skipping.") + return 0 + + # --- Determine version --- + version = config.install_version + if version == "latest": + print("Determining latest CodeQL CLI version …") + try: + version = _fetch_latest_version() + except RuntimeError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 1 + print(f"Latest version: {version}") + + if not _validate_version(version): + print(f"ERROR: invalid version '{version}' — must be semver-like (e.g. 2.25.5)", file=sys.stderr) + return 1 + + # Normalize: strip optional leading 'v' to avoid double-v in URLs/paths. + version = version.lstrip("v") + + # --- Determine target directories --- + tools_dir = ROOT / ".tools" / "codeql" + version_dir = tools_dir / version + current_link = tools_dir / "current" + binary_path = _codeql_binary(version_dir) + + # Check if already installed + force = os.environ.get("CODEQL_FORCE_INSTALL") == "1" + if not force and binary_path.is_file(): + print(f"CodeQL CLI v{version} already installed at {version_dir}") + # Ensure the 'current' symlink points to this version + _ensure_symlink(version_dir, current_link) + return _verify(binary_path) + + # --- Download --- + try: + plat = _detect_platform() + except RuntimeError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 1 + print(f"Platform: {plat}") + + if version == "latest": + # Re-fetch since we already resolved it above + pass + + assets = _fetch_release_assets(version) + url = _find_download_url(assets, plat) + if url is None: + print(f"ERROR: No CodeQL CLI bundle found for platform '{plat}' in release v{version}", + file=sys.stderr) + print("Available assets:", file=sys.stderr) + for a in assets: + print(f" - {a.get('name', '?')}", file=sys.stderr) + return 1 + + # --- Download and extract --- + tmp_root = ROOT / "tmp" + tmp_root.mkdir(parents=True, exist_ok=True) + tmp_dir = Path(tempfile.mkdtemp(prefix="codecome-codeql-", dir=tmp_root)) + try: + zip_path = tmp_dir / f"codeql-{version}-{plat}.zip" + _download(url, zip_path) + + # Replace stale partial installs before extracting a fresh bundle. + if version_dir.exists(): + shutil.rmtree(version_dir) + + _extract(zip_path, version_dir) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + # Recompute the binary path after extraction — a legacy nested install + # may have been replaced with the new flat layout during this run. + binary_path = _codeql_binary(version_dir) + + # --- Create current symlink --- + _ensure_symlink(version_dir, current_link) + + # --- Verify --- + return _verify(binary_path) + + +def _ensure_symlink(target: Path, link: Path) -> None: + """Create or update ``link -> target``.""" + link.parent.mkdir(parents=True, exist_ok=True) + if link.is_symlink() or link.is_file(): + link.unlink() + elif link.is_dir(): + shutil.rmtree(link) + link.symlink_to(target.name, target_is_directory=True) + + +def _verify(binary_path: Path) -> int: + """Verify the CodeQL binary works.""" + if not binary_path.is_file(): + print(f"ERROR: CodeQL binary not found at {binary_path}", file=sys.stderr) + return 1 + + try: + result = subprocess.run( + [str(binary_path), "--version"], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + print(f"ERROR: codeql --version failed: {result.stderr}", file=sys.stderr) + return 1 + version_line = result.stdout.strip().split("\n")[0] + print(f"CodeQL CLI ready: {version_line}") + return 0 + except FileNotFoundError: + print(f"ERROR: CodeQL binary not found at {binary_path}", file=sys.stderr) + return 1 + except Exception as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 1 diff --git a/tools/codeql/normalize.py b/tools/codeql/normalize.py new file mode 100644 index 00000000..af634655 --- /dev/null +++ b/tools/codeql/normalize.py @@ -0,0 +1,371 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""SARIF normalization: parse CodeQL SARIF into alerts.yml and file-signals.yml.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +def normalize_all( + sarif_dir: Path, + output_dir: Path, + resolved_plan: dict[str, Any], + codeql_version: str, + source_root: Path, +) -> tuple[Path, Path]: + """Normalize all SARIF files, write alerts.yml and file-signals.yml. + + Returns (alerts_path, file_signals_path). + """ + alerts: list[dict[str, Any]] = [] + alert_counter = 0 + + for sarif_file in sorted(sarif_dir.glob("*.sarif")): + stem = sarif_file.stem # e.g. "api.python.official" + parts = stem.split(".", 2) + if len(parts) != 3: + continue + analysis_unit_id, language_id, profile = parts + + new_alerts = _parse_sarif(sarif_file, analysis_unit_id, language_id, profile, alert_counter, source_root) + alert_counter += len(new_alerts) + alerts.extend(new_alerts) + + file_signals = _build_file_signals(alerts) + + output_dir.mkdir(parents=True, exist_ok=True) + + alerts_path = output_dir / "alerts.yml" + file_signals_path = output_dir / "file-signals.yml" + + from codeql.packs import dump_yaml + + alerts_path.write_text( + dump_yaml( + { + "schema_version": 1, + "generated_by": "codeql-normalize", + "codeql_version": codeql_version, + "target": "codecome-target", + "alerts": alerts, + } + ), + encoding="utf-8", + ) + + file_signals_path.write_text( + dump_yaml( + { + "schema_version": 1, + "generated_by": "codeql-normalize", + "codeql_version": codeql_version, + "files": file_signals, + } + ), + encoding="utf-8", + ) + + return alerts_path, file_signals_path + + +def _parse_sarif( + path: Path, + analysis_unit_id: str, + language_id: str, + pack_profile: str, + start_index: int, + source_root: Path, +) -> list[dict[str, Any]]: + """Parse one SARIF file and return a list of normalized alert dicts.""" + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return [] + + alerts: list[dict[str, Any]] = [] + runs = data.get("runs", []) + if not isinstance(runs, list): + return alerts + + for run in runs: + if not isinstance(run, dict): + continue + results = run.get("results", []) + if not isinstance(results, list): + continue + rules_lookup = _build_rules_lookup(run) + + for result in results: + if not isinstance(result, dict): + continue + alert = _normalize_one_result( + result, rules_lookup, analysis_unit_id, language_id, pack_profile, + start_index + len(alerts) + 1, source_root, + ) + if alert: + alerts.append(alert) + + return alerts + + +def _build_rules_lookup(run: dict[str, Any]) -> dict[str, dict[str, Any]]: + """Build {ruleId: {name, precision, ...}} from tool.driver.rules.""" + driver = run.get("tool", {}).get("driver", {}) + if not isinstance(driver, dict): + return {} + rules = driver.get("rules", []) + if not isinstance(rules, list): + return {} + lookup: dict[str, dict[str, Any]] = {} + for rule in rules: + if not isinstance(rule, dict): + continue + rid = rule.get("id") + if rid: + props = rule.get("properties", {}) + lookup[rid] = { + "name": rule.get("name", rid), + "precision": _coerce_str(props.get("precision")) or _coerce_str(rule.get("precision")), + "security_severity": _coerce_str(props.get("security-severity")) or _coerce_str(rule.get("security-severity")), + "severity": _coerce_str(props.get("problem.severity")), + } + return lookup + + +def _normalize_one_result( + result: dict[str, Any], + rules_lookup: dict[str, dict[str, Any]], + analysis_unit_id: str, + language_id: str, + pack_profile: str, + index: int, + source_root: Path, +) -> dict[str, Any] | None: + """Normalize a single SARIF result into a CodeCome alert dict.""" + rule_id = result.get("ruleId") or result.get("ruleIndex") + if rule_id is None: + return None + + rule_meta = rules_lookup.get(str(rule_id), {}) + + primary_location = _extract_location(result) + if primary_location is None: + return None + + severity = result.get("level") if result.get("level") else "warning" + + fingerprints = result.get("partialFingerprints") or {} + fingerprint = fingerprints.get("primaryLocationLineHash", "") + + flow = _extract_flow(result, source_root) + + return { + "id": f"CQ-{index:04d}", + "fingerprint": fingerprint, + "analysis_unit_id": analysis_unit_id, + "language": language_id, + "pack_profile": pack_profile, + "pack": _first_pack(result, rules_lookup), + "rule_id": str(rule_id), + "rule_name": rule_meta.get("name", str(rule_id)), + "severity": _normalize_severity(severity), + "security_severity": rule_meta.get("security_severity"), + "precision": rule_meta.get("precision"), + "kind": result.get("kind"), + "primary_location": primary_location, + "flow": flow, + "mapped": { + "category": _map_category(str(rule_id), result), + "suggested_validation_methods": _suggested_validation_methods(str(rule_id)), + }, + } + + +def _extract_location(result: dict[str, Any]) -> dict[str, Any] | None: + """Extract the primary_location from the first result location.""" + locations = result.get("locations", []) + if not isinstance(locations, list) or not locations: + return None + first = locations[0] + if not isinstance(first, dict): + return None + pl = first.get("physicalLocation", {}) + if not isinstance(pl, dict): + return None + artifact = pl.get("artifactLocation", {}) + if not isinstance(artifact, dict): + return None + uri = artifact.get("uri", "") + if not uri: + return None + region = pl.get("region", {}) + if not isinstance(region, dict): + return {"path": uri, "start_line": 1, "end_line": 1} + start_line = region.get("startLine", 1) + return { + "path": uri, + "start_line": start_line, + "end_line": region.get("endLine", start_line), + } + + +def _extract_flow(result: dict[str, Any], source_root: Path) -> dict[str, Any] | None: + """Extract source/sink/steps from codeFlows.""" + code_flows = result.get("codeFlows", []) + if not isinstance(code_flows, list) or not code_flows: + return None + + first_flow = code_flows[0] + if not isinstance(first_flow, dict): + return None + + thread_flows = first_flow.get("threadFlows", []) + if not isinstance(thread_flows, list) or not thread_flows: + return None + + locations = thread_flows[0].get("locations", []) + if not isinstance(locations, list) or not locations: + return None + + def _loc_to_entry(loc: dict[str, Any]) -> dict[str, Any] | None: + loc_obj = loc.get("location", {}) + if not isinstance(loc_obj, dict): + return None + pl = loc_obj.get("physicalLocation", {}) + if not isinstance(pl, dict): + return None + artifact = pl.get("artifactLocation", {}) + if not isinstance(artifact, dict): + return None + uri = artifact.get("uri", "") + region = pl.get("region", {}) + start_line = region.get("startLine", 1) if isinstance(region, dict) else 1 + message = loc.get("message", {}) + text = message.get("text", "") if isinstance(message, dict) else "" + return {"path": uri, "line": start_line, "message": text} + + entries = [] + for loc in locations: + if isinstance(loc, dict): + entry = _loc_to_entry(loc) + if entry: + entries.append(entry) + + if len(entries) < 2: + return None + + source = {"path": entries[0]["path"], "line": entries[0]["line"], "label": entries[0]["message"]} + sink = {"path": entries[-1]["path"], "line": entries[-1]["line"], "label": entries[-1]["message"]} + steps = [] + for entry in entries[1:-1]: + steps.append({"path": entry["path"], "line": entry["line"], "message": entry["message"]}) + + return {"source": source, "sink": sink, "steps": steps} + + +def _build_file_signals(alerts: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Aggregate alerts into per-file signals.""" + groups: dict[str, dict[str, Any]] = {} + + for alert in alerts: + path = alert.get("primary_location", {}).get("path", "") + if not path: + continue + if path not in groups: + groups[path] = { + "path": path, + "codeql_score_boost": 0, + "suggested_sweep": False, + "alerts": {"total": 0, "path_problems": 0, "high_precision": 0}, + "rules": [], + } + grp = groups[path] + grp["alerts"]["total"] += 1 + if alert.get("kind") == "path-problem": + grp["alerts"]["path_problems"] += 1 + if alert.get("precision") == "high": + grp["alerts"]["high_precision"] += 1 + rule_id = alert.get("rule_id", "") + if rule_id and rule_id not in grp["rules"]: + grp["rules"].append(rule_id) + + for grp in groups.values(): + total = grp["alerts"]["total"] + path_problems = grp["alerts"]["path_problems"] + high_prec = grp["alerts"]["high_precision"] + boost = min(5, max(1, total + path_problems)) + if high_prec >= 2: + boost = min(5, boost + 1) + grp["codeql_score_boost"] = boost + grp["suggested_sweep"] = total >= 2 + + return sorted(groups.values(), key=lambda g: g["path"]) + + +def _map_category(rule_id: str, result: dict[str, Any]) -> str: + """Map a CodeQL rule ID to a vulnerability category.""" + mapping = { + "path-injection": "Path traversal", + "command-line-injection": "Command injection", + "code-injection": "Code injection", + "nosql-injection": "NoSQL injection", + "sql-injection": "SQL injection", + "xss": "Cross-site scripting", + "hardcoded-credentials": "Hardcoded credentials", + "incomplete-url-substring-sanitization": "URL redirection", + "uncontrolled-deserialization": "Insecure deserialization", + "open-redirect": "Open redirect", + "information-exposure": "Information exposure", + "cleartext-transmission": "Cleartext transmission", + "codeql": "", # catch-all + } + for suffix, category in mapping.items(): + if rule_id.endswith(suffix): + return category + # For CWE-prefixed rules or other unknown forms + if "/" in rule_id: + last = rule_id.rsplit("/", 1)[-1].replace("-", " ").title() + return last + return rule_id + + +def _suggested_validation_methods(rule_id: str) -> list[str]: + """Suggest validation methods based on rule type.""" + if "sql" in rule_id or "nosql" in rule_id: + return ["static_proof", "database_evidence"] + if "injection" in rule_id: + return ["static_proof", "runtime_reproduction"] + if "xss" in rule_id or "cross-site" in rule_id.lower(): + return ["http_exploit"] + return ["static_proof"] + + +def _normalize_severity(level: str) -> str: + """Normalize SARIF severity levels.""" + mapping = {"error": "error", "warning": "warning", "note": "note", "none": "info"} + return mapping.get(level, "warning") + + +def _first_pack(result: dict[str, Any], rules_lookup: dict[str, dict[str, Any]]) -> str: + """Guess a pack reference from the result, fall back to rule metadata.""" + for loc in result.get("relatedLocations", []) or []: + if isinstance(loc, dict): + try: + pr = loc.get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "") + if pr and "codeql/" in pr: + return pr + except Exception: + pass + return "" + + +def _coerce_str(value: Any) -> str | None: + if value is None: + return None + return str(value) + + diff --git a/tools/codeql/packs.py b/tools/codeql/packs.py new file mode 100644 index 00000000..02c0419f --- /dev/null +++ b/tools/codeql/packs.py @@ -0,0 +1,234 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL pack catalog loading and plan resolution.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None # type: ignore[assignment] + + +class PackResolverError(RuntimeError): + """Raised when the pack catalog or plan is invalid.""" + + +def _require_yaml() -> None: + if yaml is None: + raise PackResolverError("PyYAML is required to load CodeQL pack catalogs and plans.") + + +def load_yaml_mapping(path: Path, *, what: str) -> dict[str, Any]: + _require_yaml() + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) + except (yaml.YAMLError, OSError, UnicodeDecodeError) as exc: + raise PackResolverError(f"Failed to read {what} at {path}: {exc}") from exc + if not isinstance(data, dict): + raise PackResolverError(f"{what} at {path} must be a YAML mapping.") + return data + + +def load_pack_catalog(path: Path) -> dict[str, Any]: + """Load and validate the CodeQL pack catalog.""" + data = load_yaml_mapping(path, what="CodeQL pack catalog") + + if data.get("schema_version") != 1: + raise PackResolverError(f"CodeQL pack catalog at {path} must have schema_version: 1.") + + packs = data.get("packs") + if not isinstance(packs, dict) or not packs: + raise PackResolverError(f"CodeQL pack catalog at {path} must define a non-empty 'packs' mapping.") + + for language_id, profiles in packs.items(): + if not isinstance(language_id, str) or not language_id: + raise PackResolverError(f"CodeQL pack catalog at {path} contains an invalid language id: {language_id!r}.") + if not isinstance(profiles, dict) or not profiles: + raise PackResolverError( + f"CodeQL pack catalog at {path} must define non-empty profiles for language {language_id!r}." + ) + for profile_name, refs in profiles.items(): + if not isinstance(profile_name, str) or not profile_name: + raise PackResolverError( + f"CodeQL pack catalog at {path} contains an invalid profile name for {language_id!r}." + ) + if not isinstance(refs, list) or not all(isinstance(ref, str) and ref for ref in refs): + raise PackResolverError( + f"CodeQL pack catalog at {path} must define {language_id!r}/{profile_name!r} as a list of pack references." + ) + + candidate_policy = data.get("candidate_policy") + if candidate_policy is not None: + if not isinstance(candidate_policy, dict): + raise PackResolverError(f"CodeQL pack catalog at {path} has a non-mapping 'candidate_policy' section.") + for profile_name, policy in candidate_policy.items(): + if not isinstance(policy, dict): + raise PackResolverError( + f"CodeQL pack catalog at {path} has a non-mapping candidate policy for profile {profile_name!r}." + ) + allow_precreate = policy.get("allow_precreate") + if not isinstance(allow_precreate, bool): + raise PackResolverError( + f"CodeQL pack catalog at {path} must define candidate_policy.{profile_name}.allow_precreate as a boolean." + ) + + return data + + +def load_codeql_plan(path: Path) -> dict[str, Any]: + """Load and validate a CodeQL plan file.""" + data = load_yaml_mapping(path, what="CodeQL plan") + + units = data.get("analysis_units") + if not isinstance(units, list): + raise PackResolverError(f"CodeQL plan at {path} must define 'analysis_units' as a list.") + + for i, unit in enumerate(units): + if not isinstance(unit, dict): + raise PackResolverError(f"CodeQL plan at {path} has non-mapping analysis unit at index {i}.") + unit_id = unit.get("id") + if not isinstance(unit_id, str) or not unit_id: + raise PackResolverError(f"CodeQL plan at {path} has analysis unit {i} without a valid 'id'.") + unit_path = unit.get("path") + if not isinstance(unit_path, str) or not unit_path: + raise PackResolverError(f"CodeQL plan at {path} has analysis unit {unit_id!r} without a valid 'path'.") + languages = unit.get("languages") + if unit.get("recommended") is False and (languages is None or languages == []): + continue + if not isinstance(languages, list) or not languages: + raise PackResolverError(f"CodeQL plan at {path} must define analysis unit {unit_id!r} languages as a non-empty list.") + for j, entry in enumerate(languages): + if not isinstance(entry, dict): + raise PackResolverError(f"CodeQL plan at {path} has non-mapping language entry {j} in analysis unit {unit_id!r}.") + language_id = entry.get("id") + if not isinstance(language_id, str) or not language_id: + raise PackResolverError(f"CodeQL plan at {path} has language entry {j} in analysis unit {unit_id!r} without a valid 'id'.") + profiles = entry.get("packs") + if not isinstance(profiles, list) or not all(isinstance(p, str) and p for p in profiles): + raise PackResolverError( + f"CodeQL plan at {path} must define language {language_id!r} packs as a list of profile names." + ) + + return data + + +def resolve_pack_profiles(language_id: str, profiles: list[str], catalog: dict[str, Any]) -> list[str]: + """Resolve pack profile names for one language to concrete pack references.""" + packs = catalog["packs"] + language_profiles = packs.get(language_id) + if not isinstance(language_profiles, dict): + raise PackResolverError(f"Unsupported CodeQL language id: {language_id!r}.") + + resolved: list[str] = [] + seen: set[str] = set() + for profile_name in profiles: + refs = language_profiles.get(profile_name) + if not isinstance(refs, list): + raise PackResolverError( + f"Unknown CodeQL pack profile {profile_name!r} for language {language_id!r}." + ) + for ref in refs: + if ref not in seen: + resolved.append(ref) + seen.add(ref) + return resolved + + +def allow_precreate(profile_name: str, catalog: dict[str, Any]) -> bool: + """Return whether a profile allows precreating findings by default.""" + candidate_policy = catalog.get("candidate_policy") or {} + if not isinstance(candidate_policy, dict): + return True + policy = candidate_policy.get(profile_name) + if not isinstance(policy, dict): + return True + value = policy.get("allow_precreate") + return value if isinstance(value, bool) else True + + +def _resolve_profile_packs(language_id: str, profiles: list[str], catalog: dict[str, Any]) -> dict[str, list[str]]: + """Resolve each profile to its own pack list (no dedup across profiles).""" + packs = catalog["packs"] + language_profiles = packs.get(language_id) + if not isinstance(language_profiles, dict): + raise PackResolverError(f"Unsupported CodeQL language id: {language_id!r}.") + + result: dict[str, list[str]] = {} + for profile_name in profiles: + refs = language_profiles.get(profile_name) + if not isinstance(refs, list): + raise PackResolverError( + f"Unknown CodeQL pack profile {profile_name!r} for language {language_id!r}." + ) + result[profile_name] = list(refs) + return result + + +def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any], skip_unsupported: bool = False) -> dict[str, Any]: + """Resolve all language entries in a CodeQL plan to concrete pack references. + + If *skip_unsupported* is True, language IDs not found in the catalog are + skipped with a warning instead of raising PackResolverError. + """ + units_out: list[dict[str, Any]] = [] + plan_warnings: list[str] = [] + + for unit in plan.get("analysis_units", []): + if unit.get("recommended") is False: + plan_warnings.append(f"Skipping analysis unit '{unit['id']}' because recommended=false") + continue + + languages_out: list[dict[str, Any]] = [] + for entry in unit.get("languages", []): + language_id = entry["id"] + profiles = list(entry.get("packs", [])) + + if language_id not in catalog.get("packs", {}): + if skip_unsupported: + plan_warnings.append( + f"Skipping unsupported CodeQL language '{language_id}' in analysis unit '{unit['id']}'" + ) + continue + raise PackResolverError(f"Unsupported CodeQL language id: {language_id!r}.") + + languages_out.append( + { + "id": language_id, + "profiles": profiles, + "packs": resolve_pack_profiles(language_id, profiles, catalog), + "profile_packs": _resolve_profile_packs(language_id, profiles, catalog), + "candidate_policy": { + profile: {"allow_precreate": allow_precreate(profile, catalog)} + for profile in profiles + }, + } + ) + units_out.append( + { + "id": unit["id"], + "path": unit["path"], + "kind": unit.get("kind"), + "primary": unit.get("primary", False), + "languages": languages_out, + } + ) + + result: dict[str, Any] = { + "schema_version": 1, + "generated_by": "codeql-pack-resolver", + "analysis_units": units_out, + } + if plan_warnings: + result["warnings"] = plan_warnings + return result + + +def dump_yaml(data: dict[str, Any]) -> str: + """Serialize resolved pack data to YAML.""" + _require_yaml() + return yaml.safe_dump(data, sort_keys=False) diff --git a/tools/codeql/pipeline.py b/tools/codeql/pipeline.py new file mode 100644 index 00000000..1ef412c8 --- /dev/null +++ b/tools/codeql/pipeline.py @@ -0,0 +1,110 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL full pipeline: run analysis, normalize SARIF, import risk, write summary.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Callable +from datetime import datetime, timezone + +from codeql.config import ROOT, CodeQLConfig + + +def record_skipped_run(config: CodeQLConfig, reason: str) -> dict[str, Any]: + """Write a skipped CodeQL manifest and summary for a deliberate skip.""" + from codeql.runner import _manifest, write_manifest, write_summary + + started_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + manifest = _manifest( + "skipped", + started_at, + config, + [], + [], + failures=[reason], + skip_reason=reason, + ) + output_dir = config.abs_output_dir + normalized_dir = output_dir / "normalized" + write_manifest(manifest, output_dir) + write_summary(manifest, normalized_dir, output_dir) + return manifest + + +def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | None = None) -> dict[str, Any]: + """Run the complete CodeQL analysis pipeline. + + Steps (all internal, no printing): + 1. run_codeql(config) -> manifest + 2. write_manifest(manifest, output_dir) + 3. normalize_all(sarif_dir, ...) -> alerts.yml, file-signals.yml (if SARIF exist) + 4. import_risk(signals_path, risk_path) + 5. write_summary(manifest, normalized_dir, output_dir) + + Returns the manifest dict (with extra keys for artifact paths). + """ + from codeql.runner import run_codeql, write_manifest, write_summary + from codeql.normalize import normalize_all + from codeql.import_risk import import_risk + from codeql.packs import load_yaml_mapping + + output_dir = config.abs_output_dir + output_dir.mkdir(parents=True, exist_ok=True) + + # Step 1: run analysis + manifest = run_codeql(config, progress=progress) + + # Step 2: write manifest + write_manifest(manifest, output_dir) + _progress(progress, "CodeQL: manifest written") + + status = manifest["status"] + normalized_dir = output_dir / "normalized" + resolved_path = output_dir / "selected-query-packs.yml" + + # Step 3: normalize SARIF (completed or soft-failed, with SARIF files present) + normalized_ok = False + if status in ("completed", "soft-failed") and resolved_path.is_file(): + sarif_dir = output_dir / "sarif" + if list(sarif_dir.glob("*.sarif")): + try: + resolved = load_yaml_mapping(resolved_path, what="resolved packs") + normalize_all( + sarif_dir, normalized_dir, resolved, + manifest.get("codeql_version", "unknown"), ROOT, + ) + normalized_ok = True + _progress(progress, "CodeQL: normalized SARIF artifacts") + except Exception as exc: + manifest.setdefault("warnings", []).append( + f"SARIF normalization failed: {exc}" + ) + manifest["status"] = "failed" if config.fail_policy == "hard" else "soft-failed" + + # Step 4: import risk (only if normalization succeeded — avoid importing stale signals) + signals_path = normalized_dir / "file-signals.yml" + risk_path = ROOT / "itemdb/notes/file-risk-index.yml" + if normalized_ok and signals_path.is_file(): + try: + import_risk(signals_path, risk_path) + _progress(progress, "CodeQL: imported file risk signals") + except Exception as exc: + manifest.setdefault("warnings", []).append( + f"Risk import failed: {exc}" + ) + + # Re-write manifest so any warnings appended above are on disk. + write_manifest(manifest, output_dir) + + # Step 5: write summary + write_summary(manifest, normalized_dir, output_dir) + _progress(progress, "CodeQL: summary written") + + return manifest + + +def _progress(progress: Callable[[str], None] | None, message: str) -> None: + if progress is not None: + progress(message) diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py new file mode 100644 index 00000000..c4e54f06 --- /dev/null +++ b/tools/codeql/runner.py @@ -0,0 +1,543 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL runner: database create, analyze, and run manifest.""" + +from __future__ import annotations + +import subprocess +import sys +import tempfile +import threading +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Callable + +from codeql.capabilities import supported_build_modes +from codeql.config import ROOT, CodeQLConfig +from codeql.packs import PackResolverError, dump_yaml, load_codeql_plan, load_pack_catalog, resolve_plan_packs + + +def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = None) -> dict[str, Any]: + """Run CodeQL analysis for every language in the plan. + + Returns the run manifest as a dict. + """ + now_utc = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + binary_path = config.abs_install_path + if not binary_path.is_file(): + if config.fail_policy == "hard": + return _manifest("failed", now_utc, config, [], [], failures=[f"CodeQL binary not found at {binary_path}"]) + else: + return _manifest("soft-failed", now_utc, config, [], [], failures=[f"CodeQL binary not found at {binary_path}"]) + + version = _get_codeql_version(binary_path) + _progress(progress, f"CodeQL: using {version}") + + plan_path = ROOT / "itemdb/notes/codeql-plan.yml" + if not plan_path.is_file(): + return _manifest("skipped", now_utc, config, [version], [], failures=["codeql-plan.yml not found"]) + + catalog_path = config.abs_pack_catalog + if not catalog_path.is_file(): + return _manifest("skipped", now_utc, config, [version], [], failures=[f"Pack catalog not found at {catalog_path}"]) + + try: + _progress(progress, f"CodeQL: loading plan {_rel(plan_path)}") + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + skip_unsupported = config.fail_policy == "soft" + resolved = resolve_plan_packs(plan, catalog, skip_unsupported=skip_unsupported) + except PackResolverError as exc: + return _manifest(_tool_failure_status(config), now_utc, config, [version], [], failures=[str(exc)]) + + resolved_path = config.abs_output_dir / "selected-query-packs.yml" + resolved_path.parent.mkdir(parents=True, exist_ok=True) + resolved_path.write_text(dump_yaml(resolved), encoding="utf-8") + _progress(progress, f"CodeQL: resolved packs for {len(resolved['analysis_units'])} analysis unit(s)") + + exclude_patterns = plan.get("exclude", []) + + warnings: list[str] = list(resolved.get("warnings", [])) + failures: list[str] = [] + language_ids: list[str] = [] + analysis_units: list[str] = [] + analyzed_profiles = 0 + + for unit_entry in resolved["analysis_units"]: + unit_id = unit_entry["id"] + source_path = unit_entry["path"] + analysis_units.append(unit_id) + plan_unit = _lookup_unit(unit_id, plan.get("analysis_units", [])) + + for lang_entry in unit_entry["languages"]: + language_id = lang_entry["id"] + profiles = lang_entry.get("profiles", []) + profile_packs = lang_entry.get("profile_packs", {}) + language_ids.append(f"{unit_id}:{language_id}") + + build_mode, build_command = _lookup_build(language_id, plan_unit.get("languages", [])) + plan_languages = plan_unit.get("languages", []) + db_timeout = _lookup_timeout("db_create_timeout", language_id, plan_languages, config.db_create_timeout) + analyze_timeout = _lookup_timeout("analyze_timeout", language_id, plan_languages, config.analyze_timeout) + + supported_modes = supported_build_modes(language_id) + if build_mode not in supported_modes: + failures.append( + f"Unsupported build_mode '{build_mode}' for {language_id} in analysis unit {unit_id}. " + f"Allowed: {', '.join(sorted(supported_modes))}" + ) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + + db_dir = config.abs_database_dir / unit_id / language_id + sarif_dir = config.abs_output_dir / "sarif" + sarif_dir.mkdir(parents=True, exist_ok=True) + + _progress(progress, f"CodeQL: creating database {unit_id}:{language_id} ({build_mode})") + ok, msg = _create_database( + binary_path, + language_id, + source_path, + db_dir, + build_mode, + build_command, + exclude_patterns, + config.abs_cache_dir, + timeout=db_timeout, + progress=progress, + ) + if not ok: + failures.append(msg) + if config.fail_policy == "soft": + _progress(progress, f"CodeQL: {msg}") + continue + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + _progress(progress, f"CodeQL: database ready {unit_id}:{language_id}") + + for profile in profiles: + packs = profile_packs.get(profile, []) + if not packs: + continue + ok, msg = _ensure_query_packs_available(binary_path, packs, profile, config, progress) + if not ok: + if config.fail_policy == "soft" and profile != "official": + warnings.append(msg) + _progress(progress, f"CodeQL: {msg}") + continue + failures.append(msg) + if config.fail_policy == "soft": + _progress(progress, f"CodeQL: {msg}") + continue + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + + sarif_path = sarif_dir / f"{unit_id}.{language_id}.{profile}.sarif" + _progress(progress, f"CodeQL: analyzing {unit_id}:{language_id} profile {profile}") + ok, msg = _run_analyze( + binary_path, + db_dir, + packs, + sarif_path, + config.abs_cache_dir, + timeout=analyze_timeout, + progress=progress, + ) + if not ok: + if config.fail_policy == "soft" and profile != "official": + warnings.append(msg) + _progress(progress, f"CodeQL: {msg}") + continue + failures.append(msg) + if config.fail_policy == "soft": + _progress(progress, f"CodeQL: {msg}") + continue + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + analyzed_profiles += 1 + _progress(progress, f"CodeQL: SARIF written {_rel(sarif_path)}") + + if failures: + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + + if not language_ids: + return _manifest("skipped", now_utc, config, [version], warnings, + failures=["No languages resolved from analysis plan."], + languages=language_ids, analysis_units=analysis_units) + + if analyzed_profiles == 0: + failures.append("No CodeQL query profiles ran successfully.") + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + + return _manifest("completed", now_utc, config, [version], warnings, failures, language_ids, analysis_units) + + +def _tool_failure_status(config: CodeQLConfig) -> str: + return "failed" if config.fail_policy == "hard" else "soft-failed" + + +def _progress(progress: Callable[[str], None] | None, message: str) -> None: + if progress is not None: + progress(message) + + +def _lookup_unit(unit_id: str, plan_units: list[dict]) -> dict: + """Return the plan analysis unit with *unit_id*.""" + for unit in plan_units: + if unit.get("id") == unit_id: + return unit + return {} + + +def _lookup_build(language_id: str, plan_languages: list[dict]) -> tuple[str, str | None]: + """Return (build_mode, build_command) for a language entry.""" + for pl in plan_languages: + if pl.get("id") == language_id: + mode = pl.get("build_mode", "none") + cmd = pl.get("build_command") + return mode if isinstance(mode, str) and mode else "none", cmd if isinstance(cmd, str) and cmd else None + return "none", None + + +def _lookup_timeout(field: str, language_id: str, plan_languages: list[dict], default: int) -> int: + """Return a per-language timeout, falling back to *default*.""" + for pl in plan_languages: + if pl.get("id") == language_id: + value = pl.get(field) + if isinstance(value, (int, float)) and value > 0: + return int(value) + return default + + +def _get_codeql_version(binary: Path) -> str: + try: + result = subprocess.run( + [str(binary), "--version"], + capture_output=True, text=True, timeout=30, + ) + line = result.stdout.strip().split("\n")[0] + return line.removeprefix("CodeQL version ") + except Exception: + return "unknown" + + +def _create_database( + binary: Path, + language_id: str, + source_path: str, + db_dir: Path, + build_mode: str, + build_command: str | None, + exclude_patterns: list[str], + cache_dir: Path | None = None, + timeout: int = 600, + progress: Callable[[str], None] | None = None, +) -> tuple[bool, str]: + """Create a CodeQL database. Returns (success, message).""" + db_dir.parent.mkdir(parents=True, exist_ok=True) + + cmd = [ + str(binary), "database", "create", + str(db_dir), + "-l", language_id, + "-s", str(ROOT / source_path), + "--overwrite", + "--no-run-unnecessary-builds", + ] + _add_common_caches(cmd, cache_dir) + + if build_mode == "none": + cmd += ["--build-mode=none"] + elif build_mode == "manual": + if not build_command: + return False, f"build_mode is 'manual' for {language_id} but no build_command provided in the plan" + cmd += ["--build-mode=manual", "-c", build_command] + elif build_mode == "autobuild": + cmd += ["--build-mode=autobuild"] + + temp_config: Path | None = None + if exclude_patterns: + import yaml as _yaml + workspace_tmp = ROOT / "tmp" + workspace_tmp.mkdir(parents=True, exist_ok=True) + temp_config = Path(tempfile.mkdtemp(prefix="codeql-codescanning-", dir=str(workspace_tmp))) / "codescanning-config.yml" + temp_config.parent.mkdir(parents=True, exist_ok=True) + config_content = {"paths-ignore": exclude_patterns} + temp_config.write_text(_yaml.dump(config_content, default_flow_style=False), encoding="utf-8") + cmd += ["--codescanning-config=" + str(temp_config)] + + try: + return _run_with_progress(cmd, f"Database create timed out for {language_id} after {timeout}s", + f"Database create failed for {language_id}", timeout, progress) + finally: + if temp_config is not None and temp_config.parent.exists(): + import shutil as _shutil + _shutil.rmtree(temp_config.parent, ignore_errors=True) + + +def _run_analyze( + binary: Path, + db_dir: Path, + packs: list[str], + sarif_path: Path, + cache_dir: Path | None = None, + timeout: int = 600, + progress: Callable[[str], None] | None = None, +) -> tuple[bool, str]: + """Run codeql database analyze. Returns (success, message).""" + cmd = [ + str(binary), "database", "analyze", + str(db_dir), + "--format=sarif-latest", + f"--output={sarif_path}", + "--sarif-include-query-help=never", + ] + _add_common_caches(cmd, cache_dir) + cmd += packs + + return _run_with_progress(cmd, f"Analyze timed out for {db_dir.name} after {timeout}s", + f"Analyze failed for {db_dir.name}", timeout, progress) + + +def _ensure_query_packs_available( + binary: Path, + packs: list[str], + profile: str, + config: CodeQLConfig, + progress: Callable[[str], None] | None = None, +) -> tuple[bool, str]: + """Resolve query packs, downloading registry packs once when missing.""" + ok, detail = _run_quiet( + _codeql_pack_cmd(binary, config.abs_cache_dir, "resolve", "queries", "--format=json", "--", *packs), + timeout=120, + ) + if ok: + return True, "" + + downloadable = [pack for pack in packs if _is_registry_pack_ref(pack)] + for pack in downloadable: + _progress(progress, f"CodeQL: downloading query pack {pack}") + download_ok, download_detail = _run_quiet( + _codeql_pack_cmd(binary, config.abs_cache_dir, "pack", "download", "--", pack), + timeout=300, + ) + if not download_ok: + detail = download_detail or detail + return False, _pack_failure_message(profile, packs, detail, config) + + if downloadable: + ok, detail = _run_quiet( + _codeql_pack_cmd(binary, config.abs_cache_dir, "resolve", "queries", "--format=json", "--", *packs), + timeout=120, + ) + if ok: + return True, "" + + return False, _pack_failure_message(profile, packs, detail, config) + + +def _is_registry_pack_ref(pack: str) -> bool: + """Return whether a pack reference can be downloaded from a registry.""" + if pack.startswith((".", "/")): + return False + return "/" in pack + + +def _add_common_caches(cmd: list[str], cache_dir: Path | None) -> None: + """Append CodeQL's workspace-local common cache option when configured.""" + if cache_dir is None or str(cache_dir) in {"", "."}: + return + cache_dir.mkdir(parents=True, exist_ok=True) + option = f"--common-caches={cache_dir}" + if "--" in cmd: + cmd.insert(cmd.index("--"), option) + else: + cmd.append(option) + + +def _codeql_pack_cmd(binary: Path, cache_dir: Path | None, *args: str) -> list[str]: + """Build a CodeQL command that uses the workspace-local common cache.""" + cmd = [str(binary), *args] + _add_common_caches(cmd, cache_dir) + return cmd + + +def _pack_failure_message(profile: str, packs: list[str], detail: str, config: CodeQLConfig) -> str: + policy = "required official profile" if profile == "official" else f"optional profile {profile!r}" + action = "failing CodeQL step" if config.fail_policy == "hard" or profile == "official" else "skipping profile" + suffix = f":\n{detail}" if detail else "" + return f"CodeQL query packs unavailable for {policy} ({', '.join(packs)}); {action}{suffix}" + + +def _run_quiet(cmd: list[str], timeout: int) -> tuple[bool, str]: + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout) + except Exception as exc: + return False, str(exc) + if result.returncode == 0: + return True, "" + detail = (result.stderr or result.stdout).strip() + return False, detail + + +def _run_with_progress( + cmd: list[str], + timeout_msg_prefix: str, + failure_msg_prefix: str, + timeout: int, + progress: Callable[[str], None] | None, +) -> tuple[bool, str]: + """Run a subprocess, streaming stderr line-by-line to *progress*.""" + try: + process = subprocess.Popen( + cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + except Exception as exc: + return False, f"{failure_msg_prefix}: {exc}" + + stderr_lines: list[str] = [] + + def _read_stderr() -> None: + for line in process.stderr: + stripped = line.rstrip() + if stripped: + stderr_lines.append(stripped) + _progress(progress, f"CodeQL: {stripped}") + + reader = threading.Thread(target=_read_stderr, daemon=True) + reader.start() + + try: + returncode = process.wait(timeout=timeout) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + reader.join(timeout=5) + detail = "\n".join(stderr_lines[-40:]) + return False, f"{timeout_msg_prefix}\n{detail}" if detail else timeout_msg_prefix + + reader.join(timeout=5) + + if returncode != 0: + detail = "\n".join(stderr_lines[-40:]) + return False, f"{failure_msg_prefix}:\n{detail}" if detail else failure_msg_prefix + + return True, "" + + +def _manifest( + status: str, + started_at: str, + config: CodeQLConfig, + versions: list[str], + warnings: list[str], + failures: list[str] | None = None, + languages: list[str] | None = None, + analysis_units: list[str] | None = None, + skip_reason: str | None = None, +) -> dict[str, Any]: + if failures is None: + failures = [] + if languages is None: + languages = [] + if analysis_units is None: + analysis_units = [] + + codeql_version = versions[0] if versions else "unknown" + now_utc = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": status, + "codeql_enabled": config.enabled, + "codeql_version": codeql_version, + "started_at": started_at, + "finished_at": now_utc, + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": str(_rel(config.abs_pack_catalog)), + "fail_policy": config.fail_policy, + "analysis_units": analysis_units, + "languages": languages, + "warnings": warnings, + "failures": failures if failures else [], + } + if skip_reason: + manifest["skip_reason"] = skip_reason + return manifest + + +def write_manifest(manifest: dict[str, Any], output_dir: Path) -> Path: + """Write the run manifest to *output_dir*/run-manifest.yml.""" + import json + + path = output_dir / "run-manifest.yml" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(dump_yaml(manifest), encoding="utf-8") + return path + + +def write_summary(manifest: dict[str, Any], normalized_dir: Path, output_dir: Path) -> Path: + """Write codeql-summary.md.""" + status = manifest.get("status", "unknown") + version = manifest.get("codeql_version", "unknown") + languages = manifest.get("languages", []) + warnings = manifest.get("warnings", []) + failures = manifest.get("failures", []) + fail_policy = manifest.get("fail_policy", "soft") + + lines = [ + "# CodeQL Analysis Summary", + "", + f"- **Status**: {status}", + f"- **CodeQL version**: {version}", + f"- **Fail policy**: {fail_policy}", + f"- **Started**: {manifest.get('started_at', '')}", + f"- **Finished**: {manifest.get('finished_at', '')}", + "", + ] + + if languages: + lines.append(f"- **Languages**: {', '.join(languages)}") + lines.append("") + + alerts_path = normalized_dir / "alerts.yml" + + if alerts_path.is_file(): + from codeql.packs import load_yaml_mapping + try: + data = load_yaml_mapping(alerts_path, what="alerts") + total_alerts = len(data.get("alerts", [])) + lines.append(f"- **Total alerts**: {total_alerts}") + lines.append("") + except Exception: + pass + + if warnings: + lines.append("## Warnings") + lines.append("") + for w in warnings: + lines.append(f"- {w}") + lines.append("") + + if failures: + lines.append("## Failures") + lines.append("") + for f in failures: + lines.append(f"- {f}") + lines.append("") + + path = output_dir / "codeql-summary.md" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return path + + +def _rel(path: Path) -> str: + """Return a workspace-relative path when under ROOT, else the absolute path.""" + try: + rel = path.relative_to(ROOT) + return str(rel) + except ValueError: + return str(path) diff --git a/tools/events/chat_loop.py b/tools/events/chat_loop.py index af3d51d4..7b0fdc0c 100644 --- a/tools/events/chat_loop.py +++ b/tools/events/chat_loop.py @@ -72,12 +72,16 @@ def start_consumer(self, render_fn: Callable[[Any, str, str, dict[str, Any]], No """Start the SSE consumer in a background daemon thread.""" self._consumer_thread = threading.Thread( target=self._consumer_worker, - args=(render_fn,), + args=(render_fn, None), name="codecome-chat-consumer", daemon=True, ) self._consumer_thread.start() + def set_raw_event_recorder(self, recorder: Callable[[dict[str, Any]], None] | None) -> None: + """Install an optional raw-event recorder for transcript/debug capture.""" + self._raw_event_recorder = recorder + def send_prompt( self, text: str, @@ -131,7 +135,11 @@ def stop(self) -> None: # Internal # ------------------------------------------------------------------ - def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None]) -> None: + def _consumer_worker( + self, + render_fn: Callable[[Any, str, str, dict[str, Any]], None], + record_raw_event_fn: Callable[[dict[str, Any]], None] | None, + ) -> None: """Background thread: consume SSE, render events, signal idle.""" if self.debug: self.debug("_consumer_worker: starting SSE client") @@ -157,6 +165,10 @@ def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], if not self._belongs_to_session(event): continue + recorder = record_raw_event_fn or getattr(self, "_raw_event_recorder", None) + if recorder is not None: + recorder(event) + event_count += 1 if self.debug and (event_count <= 5 or event_count % 20 == 0): self.debug(f"_consumer_worker: event #{event_count} type={event.get('type')}") diff --git a/tools/events/phase_loop.py b/tools/events/phase_loop.py index d4efb45f..aba10abb 100644 --- a/tools/events/phase_loop.py +++ b/tools/events/phase_loop.py @@ -59,6 +59,7 @@ def __init__( def run( self, render_fn: Callable[[Any, str, str, dict[str, Any]], None], + record_raw_event_fn: Callable[[dict[str, Any]], None] | None = None, ) -> RunResult: _any_step_finish_seen = False _step_finish_count = 0 @@ -83,6 +84,9 @@ def run( if not self._belongs_to_session(event): continue + if record_raw_event_fn is not None: + record_raw_event_fn(event) + if self._should_skip_message_updated(event): continue diff --git a/tools/events/state_tracker.py b/tools/events/state_tracker.py index 566839a3..aadd8565 100644 --- a/tools/events/state_tracker.py +++ b/tools/events/state_tracker.py @@ -110,9 +110,12 @@ def _handle_updated(self, event: dict[str, Any]) -> list[dict[str, Any]]: return [] def _build_finalized_event(self, event: dict[str, Any]) -> dict[str, Any] | None: - """ Convert a message.part.updated into the ND-JSON shape expected by render_event(). + """Convert a message.part.updated into the ND-JSON shape expected by render_event(). - Returns None for event types we don't translate yet (e.g. async progress). + Returns None only for parts that are not yet finalized: text and reasoning + parts without ``time.end``, and tool parts that are still pending/running. + Unknown part types are normalized into a ``message.part.updated`` envelope + with a top-level ``"part"`` key instead of returning None. """ props = event.get("properties", {}) part = props.get("part", {}) @@ -166,8 +169,22 @@ def _build_finalized_event(self, event: dict[str, Any]) -> dict[str, Any] | None } return None - # Pass through unknown part types as raw event. - return event + if part_type == "patch": + return { + "type": "patch", + "timestamp": event.get("timestamp", 0), + "sessionID": props.get("sessionID", ""), + "part": part, + } + + # Pass through unknown part types with a normalized envelope so that + # downstream renderers always receive a top-level "part" key. + return { + "type": "message.part.updated", + "timestamp": event.get("timestamp", 0), + "sessionID": props.get("sessionID", ""), + "part": part, + } def _map_session_diff(self, event: dict[str, Any]) -> dict[str, Any] | None: """Map non-empty session.diff into a compact compatibility event.""" diff --git a/tools/findings/checks_entry.py b/tools/findings/checks_entry.py index 4925f8da..afea2744 100644 --- a/tools/findings/checks_entry.py +++ b/tools/findings/checks_entry.py @@ -5,10 +5,16 @@ import sys from pathlib import Path +from typing import Optional import _colors as C -from findings.constants import FILE_RISK_INDEX_PATH, FILE_RISK_INDEX_REL, ROOT +from findings.constants import ( + FILE_RISK_INDEX_PATH, + FILE_RISK_INDEX_REL, + FindingsContext, + ROOT, +) from findings.checks import validate_finding, validate_file_risk_index, iter_all_finding_files @@ -20,48 +26,76 @@ def build_parser(): return parser -def main() -> int: - import argparse - parser = build_parser() - parser.parse_args() +def run_frontmatter_validation( + ctx: Optional[FindingsContext] = None, +) -> tuple[int, str]: + """Run frontmatter validation in-process and return (exit_code, output_text). - paths = iter_all_finding_files() + This is the reusable entrypoint for code paths that need to validate + frontmatter without shelling out to a subprocess (phase retry loops, + gate checks, etc.). + Parameters + ---------- + ctx : FindingsContext, optional + Injectable context for testing. When None, uses default global paths. + """ + import io + + if ctx is None: + ctx = FindingsContext.default() + + _root = ctx.root + _risk_index_path = _root / "itemdb" / "notes" / "file-risk-index.yml" + _risk_index_rel = Path("itemdb/notes/file-risk-index.yml") + + out = io.StringIO() + + paths = iter_all_finding_files() total_errors = 0 index_errors = validate_file_risk_index() if index_errors: total_errors += len(index_errors) - print(C.fail(str(FILE_RISK_INDEX_REL))) + out.write(C.fail(str(_risk_index_rel)) + "\n") for error in index_errors: - print(f" {C.SYM_BULLET} {error}") + out.write(f" {C.SYM_BULLET} {error}\n") else: - if FILE_RISK_INDEX_PATH.exists(): - print(C.ok(str(FILE_RISK_INDEX_REL))) + if _risk_index_path.exists(): + out.write(C.ok(str(_risk_index_rel)) + "\n") - if not paths: - if not FILE_RISK_INDEX_PATH.exists(): - print(C.info("No findings or index to validate.")) - return 0 if total_errors == 0 else 1 + if not paths and not _risk_index_path.exists(): + out.write(C.info("No findings or index to validate.") + "\n") for path in paths: errors = validate_finding(path) - if not errors: - print(C.ok(str(path.relative_to(ROOT)))) + out.write(C.ok(str(path.relative_to(_root))) + "\n") continue - total_errors += len(errors) - print(C.fail(str(path.relative_to(ROOT)))) + out.write(C.fail(str(path.relative_to(_root))) + "\n") for error in errors: - print(f" {C.SYM_BULLET} {error}") + out.write(f" {C.SYM_BULLET} {error}\n") + + if paths or _risk_index_path.exists(): + if total_errors: + out.write(f"\n{C.fail(f'Found {total_errors} frontmatter error(s).')}\n") + else: + out.write(f"\n{C.ok(f'Validated {len(paths)} finding(s).')}\n") + + return (1 if total_errors else 0, out.getvalue()) + - if total_errors: - print(f"\n{C.fail(f'Found {total_errors} frontmatter error(s).')}", file=sys.stderr) - return 1 +def main() -> int: + import argparse + parser = build_parser() + parser.parse_args() - print(f"\n{C.ok(f'Validated {len(paths)} finding(s).')}") - return 0 + exit_code, output = run_frontmatter_validation() + sys.stdout.write(output) + if exit_code != 0: + print(output.split("\n")[-2] if output.strip() else "", file=sys.stderr) + return exit_code if __name__ == "__main__": diff --git a/tools/gate-check.py b/tools/gate-check.py index cb3f9814..9572e854 100755 --- a/tools/gate-check.py +++ b/tools/gate-check.py @@ -20,285 +20,23 @@ from __future__ import annotations import argparse -import re import sys from pathlib import Path -try: - import yaml -except ImportError: # pragma: no cover - yaml = None - # Allow importing sibling modules. sys.path.insert(0, str(Path(__file__).resolve().parent)) -from _colors import ok, fail, warn, header, info, GREEN, RESET, BOLD, SYM_OK - -ROOT = Path(__file__).resolve().parents[1] -FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) -EVIDENCE_TEMPLATE_MARKERS = [ - "Briefly summarize what this evidence proves or disproves.", - "Describe the validation method used.", - "command goes here", - "Describe what happened.", -] - -REQUIRED_NOTES = [ - "target-profile.md", - "attack-surface.md", -] - -FINDING_STATUS_DIRS = [ - "PENDING", - "CONFIRMED", - "EXPLOITED", - "REJECTED", - "DUPLICATE", -] - - -def has_source_files() -> bool: - """Return True if src/ contains at least one file (not just .gitkeep).""" - src_dir = ROOT / "src" - if not src_dir.exists(): - return False - for child in src_dir.rglob("*"): - if child.is_file() and child.name != ".gitkeep": - return True - return False - - -def has_notes(*names: str) -> list[str]: - """Return list of missing note files.""" - notes_dir = ROOT / "itemdb" / "notes" - missing = [] - for name in names: - if not (notes_dir / name).exists(): - missing.append(name) - return missing - - -def count_findings(status: str) -> int: - """Count finding files in a status directory.""" - status_dir = ROOT / "itemdb" / "findings" / status - if not status_dir.exists(): - return 0 - return len(list(status_dir.glob("CC-*.md"))) - - -def count_all_findings() -> int: - """Count finding files across all status directories.""" - return sum(count_findings(s) for s in FINDING_STATUS_DIRS) - - -def load_frontmatter(path: Path) -> dict[str, object]: - """Load YAML frontmatter from a finding file.""" - if yaml is None: - raise RuntimeError("PyYAML is not installed. Run: pip install -r requirements.txt") - - content = path.read_text(encoding="utf-8") - match = FRONTMATTER_RE.match(content) - if not match: - return {} - - data = yaml.safe_load(match.group(1)) - return data if isinstance(data, dict) else {} - - -def find_finding(identifier: str) -> Path | None: - """Locate a finding file by path or ID.""" - candidate = Path(identifier) - if candidate.is_absolute() and candidate.exists(): - return candidate.resolve() - - root_relative = ROOT / identifier - if root_relative.exists(): - return root_relative.resolve() - - findings_root = ROOT / "itemdb" / "findings" - for status in FINDING_STATUS_DIRS: - status_dir = findings_root / status - if not status_dir.exists(): - continue - # Exact match: CC-0003.md (no slug) - exact = status_dir / f"{identifier}.md" - if exact.exists(): - return exact.resolve() - # Slug match: CC-0003-some-title.md - matches = list(status_dir.glob(f"{identifier}-*.md")) - if matches: - return matches[0].resolve() - return None - - -def has_meaningful_evidence(finding_id: str) -> bool: - """Return True when the evidence directory contains more than scaffolding.""" - evidence_dir = ROOT / "itemdb" / "evidence" / finding_id - if not evidence_dir.exists(): - return False - - files = [path for path in evidence_dir.rglob("*") if path.is_file()] - if not files: - return False - - non_readme_files = [path for path in files if path.name != "README.md"] - if non_readme_files: - return True - - readme_path = evidence_dir / "README.md" - if not readme_path.exists(): - return False - - content = readme_path.read_text(encoding="utf-8") - return not any(marker in content for marker in EVIDENCE_TEMPLATE_MARKERS) - - -def gate_phase_1() -> int: - """Phase 1: src/ must contain target source code.""" - print(header("Phase 1: Target Reconnaissance")) - print() - - if not has_source_files(): - print(fail("src/ is empty or does not exist.")) - print() - print(info("Place target source code under src/ before running Phase 1.")) - print(info("See docs/target-setup.md for instructions.")) - return 1 - - print(ok("src/ contains source files.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 1.") - return 0 - - -def gate_phase_2() -> int: - """Phase 2: reconnaissance notes must exist.""" - print(header("Phase 2: Vulnerability Hypothesis Generation")) - print() - - missing = has_notes(*REQUIRED_NOTES) - if missing: - print(fail("Required reconnaissance notes are missing:")) - for name in missing: - print(f" {name}") - print() - print(info("Run Phase 1 first: make phase-1")) - return 1 - - print(ok("Required reconnaissance notes exist.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 2.") - return 0 - - -def gate_phase_3() -> int: - """Phase 3: at least one PENDING finding must exist.""" - print(header("Phase 3: Counter-analysis")) - print() - - nv_count = count_findings("PENDING") - if nv_count == 0: - print(fail("No findings in PENDING.")) - print() - print(info("Run Phase 2 first: make phase-2")) - return 1 - - print(ok(f"{nv_count} finding(s) in PENDING.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 3.") - return 0 - - -def gate_phase_4(identifier: str) -> int: - """Phase 4: finding must exist and be in PENDING.""" - print(header(f"Phase 4: Validate {identifier}")) - print() - - path = find_finding(identifier) - if path is None: - print(fail(f"Finding not found: {identifier}")) - print() - print(info("Check available findings: make status")) - return 1 - - if path.parent.name != "PENDING": - print(warn(f"{path.stem} is in {path.parent.name}, not PENDING.")) - print() - print(info("Only PENDING findings can be validated.")) - return 1 - - print(ok(f"Found: {path.relative_to(ROOT)}")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to validate {path.stem}.") - return 0 - - -def gate_phase_5(identifier: str) -> int: - """Phase 5: finding must be CONFIRMED with evidence.""" - print(header(f"Phase 5: Exploit Development for {identifier}")) - print() - - path = find_finding(identifier) - if path is None: - print(fail(f"Finding not found: {identifier}")) - print() - print(info("Check available findings: make status")) - return 1 - - if path.parent.name != "CONFIRMED": - print(warn(f"{path.stem} is in {path.parent.name}, not CONFIRMED.")) - print() - print(info("Only CONFIRMED findings can have exploits developed.")) - return 1 - - frontmatter = load_frontmatter(path) - validation = frontmatter.get("validation") - validation_status = validation.get("status") if isinstance(validation, dict) else None - if validation_status != "CONFIRMED": - print(warn(f"{path.stem} has validation.status={validation_status!r}, not 'CONFIRMED'.")) - print() - print(info("Only findings with confirmed validation evidence can enter Phase 5.")) - return 1 - - finding_id = str(frontmatter.get("id", "-".join(path.stem.split("-", 2)[:2]))) - - evidence_dir = ROOT / "itemdb" / "evidence" / finding_id - if not has_meaningful_evidence(finding_id): - print(warn(f"No meaningful validation evidence found under itemdb/evidence/{finding_id}/.")) - print() - print(info("Run Phase 4 first and record actual evidence before Phase 5.")) - return 1 - - print(ok(f"Found: {path.relative_to(ROOT)}")) - print(ok(f"Evidence exists: itemdb/evidence/{finding_id}/")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to develop exploit for {finding_id}.") - return 0 - - -def gate_phase_6() -> int: - """Phase 6: at least one finding must exist.""" - print(header("Phase 6: Reporting")) - print() - - total = count_all_findings() - if total == 0: - print(fail("No findings exist in any status directory.")) - print() - print(info("Run Phases 1-5 first to produce findings.")) - return 1 - - print(ok(f"{total} finding(s) across all status directories.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 6.") - return 0 +from phases.gates import run_from_cli def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Check readiness gates for a CodeCome phase.", ) - parser.add_argument("phase", type=int, choices=[1, 2, 3, 4, 5, 6], help="Phase number.") + parser.add_argument( + "phase", + help="Phase number (1-6) or subphase (1a, 1b, 1c).", + ) parser.add_argument("finding_id", nargs="?", help="Finding ID or path (required for Phase 4 and 5).") return parser @@ -306,33 +44,7 @@ def build_parser() -> argparse.ArgumentParser: def main() -> int: parser = build_parser() args = parser.parse_args() - - if args.phase == 1: - return gate_phase_1() - elif args.phase == 2: - return gate_phase_2() - elif args.phase == 3: - return gate_phase_3() - elif args.phase == 4: - if not args.finding_id: - print(fail("Phase 4 requires a finding ID.")) - print() - print(info("Usage: ./tools/gate-check.py 4 CC-0001")) - print(info(" or: ./tools/gate-check.py 4 itemdb/findings/PENDING/CC-0001-test.md")) - return 1 - return gate_phase_4(args.finding_id) - elif args.phase == 5: - if not args.finding_id: - print(fail("Phase 5 requires a finding ID.")) - print() - print(info("Usage: ./tools/gate-check.py 5 CC-0001")) - print(info(" or: ./tools/gate-check.py 5 itemdb/findings/CONFIRMED/CC-0001-test.md")) - return 1 - return gate_phase_5(args.finding_id) - elif args.phase == 6: - return gate_phase_6() - - return 1 + return run_from_cli(args) if __name__ == "__main__": diff --git a/tools/phases/completion.py b/tools/phases/completion.py index 05338221..3ca941b7 100644 --- a/tools/phases/completion.py +++ b/tools/phases/completion.py @@ -87,32 +87,39 @@ def _exploitation_status_looks_real(frontmatter: dict[str, Any] | None) -> bool: def check_phase_graceful_completion(phase: str, finding: str | None, run_start_time: float) -> bool: + phase_key = str(phase) + phase_is_1c = phase_key == "1c" + if phase_key in ("1a", "1b", "1c"): + phase_key = "1" + try: - if str(phase) == "1": + if phase_key == "1": required_artifacts = _phase1_required_artifacts() if all(path.exists() for path in required_artifacts): - fresh_required = any(_path_is_fresh(path, run_start_time) for path in required_artifacts) sandbox_generated = ROOT / "sandbox" / "CODECOME-GENERATED.md" sandbox_state_recorded = _path_is_fresh(sandbox_generated, run_start_time) or _path_is_fresh( SANDBOX_PLAN_PATH, run_start_time ) + if phase_is_1c: + return sandbox_state_recorded + fresh_required = any(_path_is_fresh(path, run_start_time) for path in required_artifacts) return fresh_required and sandbox_state_recorded return False - elif str(phase) in ("2", "sweep"): + elif phase_key in ("2", "sweep"): pending_dir = finding_status_dir("PENDING") if pending_dir.exists(): return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in pending_dir.iterdir()) return False - elif str(phase) == "3": + elif phase_key == "3": findings_dir = FINDINGS_ROOT return any( path.suffix == ".md" and path.name != ".gitkeep" and path.stat().st_mtime >= run_start_time for path in _iter_files(findings_dir) ) - elif str(phase) == "4" and finding: + elif phase_key == "4" and finding: evidence_dir = evidence_dir_for(finding) return any(path.stat().st_mtime >= run_start_time for path in _iter_files(evidence_dir)) - elif str(phase) == "5" and finding: + elif phase_key == "5" and finding: exploited_file = finding_status_dir("EXPLOITED") / f"{finding}.md" if ( exploited_file.exists() @@ -147,7 +154,7 @@ def check_phase_graceful_completion(phase: str, finding: str | None, run_start_t return True return False - elif str(phase) == "6": + elif phase_key == "6": reports_dir = REPORTS_ROOT if reports_dir.exists(): return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in reports_dir.iterdir()) @@ -232,6 +239,42 @@ def build_frontmatter_resume_prompt(phase: str, finding: str | None, validation_ ) +def build_codeql_plan_resume_prompt(validation_output: str) -> str: + return ( + "Your previous run created or edited `itemdb/notes/codeql-plan.yml`, but the file failed local " + "CodeQL plan validation.\n\n" + "Validation errors:\n" + f"{validation_output}\n\n" + "Repair only `itemdb/notes/codeql-plan.yml` with the smallest change needed. Do not redo unrelated " + "reconnaissance or modify target source code. Preserve the existing analysis units, pack selections, " + "manual build commands, and notes unless a reported validation error requires changing them.\n\n" + "Before ending, verify that the repaired plan passes local validation by running `rtk python3 tools/codecome.py check-codeql-plan`." + ) + + +def build_codeql_build_failure_resume_prompt(validation_output: str) -> str: + return ( + "The repaired `itemdb/notes/codeql-plan.yml` was valid, but the next CodeQL database creation run still " + "failed. Continue the same narrow CodeQL build repair task.\n\n" + "Latest CodeQL failure details:\n" + f"{validation_output}\n\n" + "Repair only `itemdb/notes/codeql-plan.yml` and any helper scripts under workspace-relative `tmp/` or " + "`sandbox/`. Do not modify target source code.\n\n" + "Important execution model: CodeQL runs the manual `build_command` with the current working directory set " + "to the analysis unit source path (`analysis_units[].path`). It is not run from the workspace root, and it " + "is not run from the helper script directory. If a helper script changes directory, it must do so based on " + "the analysis source root or explicit paths that work from that source root.\n\n" + "CodeQL tokenizes `build_command` as argv; it does not execute it as a shell script. Do not put shell " + "control syntax in `build_command`: no `&&`, `||`, `;`, pipes, comments, multi-line commands, or " + "`bash -c` / `sh -c` snippets. If more than one command is needed, create a helper script under " + "workspace-relative `tmp/` and set `build_command` to invoke it, for example `bash ../../tmp/codeql-build.sh`.\n\n" + "Do not use absolute `/tmp/` paths. Use workspace-relative `tmp/` paths. Do not embed this workspace's " + "absolute path in `build_command`; prefer paths relative to the analysis unit source path.\n\n" + "Before ending, verify that the plan is valid YAML, that referenced helper scripts exist, and that shell " + "helpers pass syntax-only validation." + ) + + def build_resume_command(initial_command: list[str], session_id: str, prompt: str) -> list[str]: """Preserve connection/runtime flags needed to reach the original session.""" resume = ["opencode", "run"] diff --git a/tools/phases/gates.py b/tools/phases/gates.py new file mode 100644 index 00000000..4595d778 --- /dev/null +++ b/tools/phases/gates.py @@ -0,0 +1,316 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Phase gate implementation for the gate-check CLI.""" + +from __future__ import annotations + +import re +from pathlib import Path + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None + +from _colors import ok, fail, warn, header, info, GREEN, RESET, SYM_OK +from codecome.config import ROOT +from phases.phase_1_gates import check_phase_1a, check_phase_1b, check_phase_1c + + +FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) +EVIDENCE_TEMPLATE_MARKERS = [ + "Briefly summarize what this evidence proves or disproves.", + "Describe the validation method used.", + "command goes here", + "Describe what happened.", +] + +REQUIRED_NOTES = [ + "target-profile.md", + "attack-surface.md", +] + +FINDING_STATUS_DIRS = [ + "PENDING", + "CONFIRMED", + "EXPLOITED", + "REJECTED", + "DUPLICATE", +] + + +def has_source_files() -> bool: + """Return True if src/ contains at least one file (not just .gitkeep).""" + src_dir = ROOT / "src" + if not src_dir.exists(): + return False + for child in src_dir.rglob("*"): + if child.is_file() and child.name != ".gitkeep": + return True + return False + + +def has_notes(*names: str) -> list[str]: + """Return list of missing note files.""" + notes_dir = ROOT / "itemdb" / "notes" + missing = [] + for name in names: + if not (notes_dir / name).exists(): + missing.append(name) + return missing + + +def count_findings(status: str) -> int: + """Count finding files in a status directory.""" + status_dir = ROOT / "itemdb" / "findings" / status + if not status_dir.exists(): + return 0 + return len(list(status_dir.glob("CC-*.md"))) + + +def count_all_findings() -> int: + """Count finding files across all status directories.""" + return sum(count_findings(s) for s in FINDING_STATUS_DIRS) + + +def load_frontmatter(path: Path) -> dict[str, object]: + """Load YAML frontmatter from a finding file.""" + if yaml is None: + raise RuntimeError("PyYAML is not installed. Run: pip install -r requirements.txt") + + content = path.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(content) + if not match: + return {} + + data = yaml.safe_load(match.group(1)) + return data if isinstance(data, dict) else {} + + +def find_finding(identifier: str) -> Path | None: + """Locate a finding file by path or ID.""" + candidate = Path(identifier) + if candidate.is_absolute() and candidate.exists(): + return candidate.resolve() + + root_relative = ROOT / identifier + if root_relative.exists(): + return root_relative.resolve() + + findings_root = ROOT / "itemdb" / "findings" + for status in FINDING_STATUS_DIRS: + status_dir = findings_root / status + if not status_dir.exists(): + continue + exact = status_dir / f"{identifier}.md" + if exact.exists(): + return exact.resolve() + matches = list(status_dir.glob(f"{identifier}-*.md")) + if matches: + return matches[0].resolve() + return None + + +def has_meaningful_evidence(finding_id: str) -> bool: + """Return True when the evidence directory contains more than scaffolding.""" + evidence_dir = ROOT / "itemdb" / "evidence" / finding_id + if not evidence_dir.exists(): + return False + + files = [path for path in evidence_dir.rglob("*") if path.is_file()] + if not files: + return False + + non_readme_files = [path for path in files if path.name != "README.md"] + if non_readme_files: + return True + + readme_path = evidence_dir / "README.md" + if not readme_path.exists(): + return False + + content = readme_path.read_text(encoding="utf-8") + return not any(marker in content for marker in EVIDENCE_TEMPLATE_MARKERS) + + +def gate_phase_1() -> int: + print(header("Phase 1: Target Reconnaissance")) + print() + + if not has_source_files(): + print(fail("src/ is empty or does not exist.")) + print() + print(info("Place target source code under src/ before running Phase 1.")) + print(info("See docs/target-setup.md for instructions.")) + return 1 + + print(ok("src/ contains source files.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 1.") + return 0 + + +def gate_phase_2() -> int: + print(header("Phase 2: Vulnerability Hypothesis Generation")) + print() + + missing = has_notes(*REQUIRED_NOTES) + if missing: + print(fail("Required reconnaissance notes are missing:")) + for name in missing: + print(f" {name}") + print() + print(info("Run Phase 1 first: make phase-1")) + return 1 + + print(ok("Required reconnaissance notes exist.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 2.") + return 0 + + +def gate_phase_3() -> int: + print(header("Phase 3: Counter-analysis")) + print() + + nv_count = count_findings("PENDING") + if nv_count == 0: + print(fail("No findings in PENDING.")) + print() + print(info("Run Phase 2 first: make phase-2")) + return 1 + + print(ok(f"{nv_count} finding(s) in PENDING.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 3.") + return 0 + + +def gate_phase_4(identifier: str) -> int: + print(header(f"Phase 4: Validate {identifier}")) + print() + + path = find_finding(identifier) + if path is None: + print(fail(f"Finding not found: {identifier}")) + print() + print(info("Check available findings: make status")) + return 1 + + if path.parent.name != "PENDING": + print(warn(f"{path.stem} is in {path.parent.name}, not PENDING.")) + print() + print(info("Only PENDING findings can be validated.")) + return 1 + + print(ok(f"Found: {path.relative_to(ROOT)}")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to validate {path.stem}.") + return 0 + + +def gate_phase_5(identifier: str) -> int: + print(header(f"Phase 5: Exploit Development for {identifier}")) + print() + + path = find_finding(identifier) + if path is None: + print(fail(f"Finding not found: {identifier}")) + print() + print(info("Check available findings: make status")) + return 1 + + if path.parent.name != "CONFIRMED": + print(warn(f"{path.stem} is in {path.parent.name}, not CONFIRMED.")) + print() + print(info("Only CONFIRMED findings can have exploits developed.")) + return 1 + + frontmatter = load_frontmatter(path) + validation = frontmatter.get("validation") + validation_status = validation.get("status") if isinstance(validation, dict) else None + if validation_status != "CONFIRMED": + print(warn(f"{path.stem} has validation.status={validation_status!r}, not 'CONFIRMED'.")) + print() + print(info("Only findings with confirmed validation evidence can enter Phase 5.")) + return 1 + + finding_id = str(frontmatter.get("id", "-".join(path.stem.split("-", 2)[:2]))) + if not has_meaningful_evidence(finding_id): + print(warn(f"No meaningful validation evidence found under itemdb/evidence/{finding_id}/.")) + print() + print(info("Run Phase 4 first and record actual evidence before Phase 5.")) + return 1 + + print(ok(f"Found: {path.relative_to(ROOT)}")) + print(ok(f"Evidence exists: itemdb/evidence/{finding_id}/")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to develop exploit for {finding_id}.") + return 0 + + +def gate_phase_6() -> int: + print(header("Phase 6: Reporting")) + print() + + total = count_all_findings() + if total == 0: + print(fail("No findings exist in any status directory.")) + print() + print(info("Run Phases 1-5 first to produce findings.")) + return 1 + + print(ok(f"{total} finding(s) across all status directories.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 6.") + return 0 + + +def run_from_cli(args) -> int: + phase_str = str(args.phase) + + if phase_str == "1a": + return check_phase_1a() + if phase_str == "1b": + return check_phase_1b() + if phase_str == "1c": + return check_phase_1c() + + try: + phase_int = int(phase_str) + except ValueError: + print(fail(f"Invalid phase: {phase_str}")) + print() + print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) + return 1 + + if phase_int == 1: + return gate_phase_1() + if phase_int == 2: + return gate_phase_2() + if phase_int == 3: + return gate_phase_3() + if phase_int == 4: + if not args.finding_id: + print(fail("Phase 4 requires a finding ID.")) + print() + print(info("Usage: ./tools/gate-check.py 4 CC-0001")) + print(info(" or: ./tools/gate-check.py 4 itemdb/findings/PENDING/CC-0001-test.md")) + return 1 + return gate_phase_4(args.finding_id) + if phase_int == 5: + if not args.finding_id: + print(fail("Phase 5 requires a finding ID.")) + print() + print(info("Usage: ./tools/gate-check.py 5 CC-0001")) + print(info(" or: ./tools/gate-check.py 5 itemdb/findings/CONFIRMED/CC-0001-test.md")) + return 1 + return gate_phase_5(args.finding_id) + if phase_int == 6: + return gate_phase_6() + + print(fail(f"Invalid phase: {phase_str}")) + print() + print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) + return 1 diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py new file mode 100644 index 00000000..65d2d4c6 --- /dev/null +++ b/tools/phases/phase_1_gates.py @@ -0,0 +1,415 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Reusable Phase 1 subphase gate logic. + +This module holds the implementation for the Phase 1a/1b/1c checks so the +root ``tools/gate-check.py`` script can remain a thin CLI wrapper. +""" + +from __future__ import annotations + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None # type: ignore[assignment] + +import _colors as C + +from codecome.config import ROOT +from codeql.capabilities import is_supported_language, supported_build_modes + +try: + from codeql.config import resolve_config as _resolve_codeql_config +except ImportError: + _resolve_codeql_config = None # type: ignore[assignment] + + +REQUIRED_NOTES_1B = [ + "attack-surface.md", + "execution-model.md", + "trust-boundaries.md", + "data-flow.md", + "validation-model.md", + "interesting-files.md", + "file-risk-index.yml", + "security-assumptions.md", +] + +FINDING_STATUS_DIRS = [ + "PENDING", + "CONFIRMED", + "EXPLOITED", + "REJECTED", + "DUPLICATE", +] + +try: + from rich.console import Console as _RichConsole + + HAVE_RICH = True +except ImportError: # pragma: no cover + _RichConsole = None # type: ignore[assignment] + HAVE_RICH = False + + +def _emit(console, level: str, text: str) -> None: + """Emit a gate message through rich Console or plain output.""" + if console is not None and HAVE_RICH: + from rich.text import Text + + style_map = { + "header": "bold cyan", + "ok": "green", + "fail": "bold red", + "warn": "yellow", + "info": "dim", + } + console.print(Text(text, style=style_map.get(level, ""))) + return + + fn_map = { + "header": C.header, + "ok": C.ok, + "fail": C.fail, + "warn": C.warn, + "info": C.info, + } + formatter = fn_map.get(level) + print(formatter(text) if formatter else text) + + +def _emit_separator(console, style: str = "green") -> None: + """Emit a visual separator for rich/plain output.""" + if console is not None and HAVE_RICH: + from rich.rule import Rule + + console.print(Rule(style=style)) + else: + print() + + +def _notes_exist(*names: str) -> list[str]: + """Return names of note files missing from ``itemdb/notes``.""" + notes_dir = ROOT / "itemdb" / "notes" + return [name for name in names if not (notes_dir / name).exists()] + + +def _codeql_fail_policy() -> str: + """Return configured CodeQL fail policy, defaulting to soft on errors.""" + if _resolve_codeql_config is None: + return "soft" + try: + return _resolve_codeql_config().fail_policy + except Exception: + return "soft" + + +def _validate_codeql_language_entry( + *, + console, + unit_id: str, + lang: object, + index: int, + seen_databases: set[tuple[str, str]], + valid_confidences: set[str], +) -> int | None: + """Validate one language entry from codeql-plan.yml.""" + if not isinstance(lang, dict): + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {index} is not a mapping") + return 1 + language_id = lang.get("id") + if not isinstance(language_id, str) or not language_id: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {index} missing valid 'id'") + return 1 + if not is_supported_language(language_id): + fail_policy = _codeql_fail_policy() + if fail_policy == "hard": + _emit(console, "fail", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}'") + return 1 + _emit(console, "warn", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}' — will be skipped (fail_policy=soft)") + return None + db_key = (unit_id, language_id) + if db_key in seen_databases: + _emit(console, "fail", f"codeql-plan.yml: duplicate language '{language_id}' in analysis unit '{unit_id}'") + return 1 + seen_databases.add(db_key) + if lang.get("confidence") not in valid_confidences: + _emit( + console, + "warn", + f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unexpected confidence '{lang.get('confidence')}'", + ) + build_mode = lang.get("build_mode") + supported_modes = supported_build_modes(language_id) + if build_mode not in supported_modes: + allowed = ", ".join(sorted(supported_modes)) + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unsupported build_mode '{build_mode}' (allowed: {allowed})") + return 1 + build_command = lang.get("build_command") + if build_mode == "manual" and not (isinstance(build_command, str) and build_command.strip()): + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' uses manual build without build_command") + return 1 + if "packs" not in lang: + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' missing 'packs'") + return 1 + if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has empty packs list") + return 1 + return None + + +def _validate_codeql_analysis_unit( + *, + console, + unit: object, + index: int, + seen_unit_ids: set[str], + seen_databases: set[tuple[str, str]], + valid_confidences: set[str], +) -> int | None: + """Validate one analysis unit from codeql-plan.yml.""" + if not isinstance(unit, dict): + _emit(console, "fail", f"codeql-plan.yml: analysis unit {index} is not a mapping") + return 1 + unit_id = unit.get("id") + if not isinstance(unit_id, str) or not unit_id: + _emit(console, "fail", f"codeql-plan.yml: analysis unit {index} missing valid 'id'") + return 1 + if unit_id in seen_unit_ids: + _emit(console, "fail", f"codeql-plan.yml: duplicate analysis unit id '{unit_id}'") + return 1 + seen_unit_ids.add(unit_id) + + unit_path = unit.get("path") + if not isinstance(unit_path, str) or not unit_path: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' missing valid 'path'") + return 1 + resolved_path = (ROOT / unit_path).resolve() + src_root = (ROOT / "src").resolve() + try: + under_src = resolved_path == src_root or resolved_path.is_relative_to(src_root) + except ValueError: + under_src = False + if not under_src: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path must be under src/: {unit_path}") + return 1 + if "_codeql_detected_source_root" in resolved_path.parts: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path uses CodeQL-generated helper path") + return 1 + if not resolved_path.exists(): + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path does not exist: {unit_path}") + return 1 + + languages = unit.get("languages") + if unit.get("recommended") is False and (languages is None or languages == []): + _emit(console, "info", f"codeql-plan.yml: analysis unit '{unit_id}' is not recommended for CodeQL; skipping language validation") + return None + if not isinstance(languages, list) or len(languages) == 0: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' has no languages") + return 1 + + for j, lang in enumerate(languages): + result = _validate_codeql_language_entry( + console=console, + unit_id=unit_id, + lang=lang, + index=j, + seen_databases=seen_databases, + valid_confidences=valid_confidences, + ) + if result is not None: + return result + return None + + +def count_findings_snapshot(snapshot: dict[str, int] | None = None) -> dict[str, int]: + """Return finding counts, or deltas from a previous snapshot.""" + findings_root = ROOT / "itemdb" / "findings" + current: dict[str, int] = {} + for status in FINDING_STATUS_DIRS: + status_dir = findings_root / status + current[status] = len(list(status_dir.glob("CC-*.md"))) if status_dir.exists() else 0 + if snapshot is None: + return current + return {status: max(0, current[status] - snapshot.get(status, 0)) for status in FINDING_STATUS_DIRS} + + +def check_phase_1a(console=None, findings_snapshot: dict[str, int] | None = None) -> int: + """Gate 1a: target-profile/build-model/codeql-plan outputs must exist.""" + _emit(console, "header", "Gate 1a: Target Profile") + _emit_separator(console, "cyan") + + notes_dir = ROOT / "itemdb" / "notes" + required = ["target-profile.md", "build-model.md", "codeql-plan.yml"] + missing = [name for name in required if not (notes_dir / name).exists()] + if missing: + _emit(console, "fail", "Required Phase 1a outputs are missing:") + for name in missing: + _emit(console, "info", f" itemdb/notes/{name}") + _emit(console, "info", "Run Phase 1a first.") + return 1 + + _emit(console, "ok", "itemdb/notes/target-profile.md exists") + _emit(console, "ok", "itemdb/notes/build-model.md exists") + _emit(console, "ok", "itemdb/notes/codeql-plan.yml exists") + + if findings_snapshot is not None: + delta = count_findings_snapshot(findings_snapshot) + new_findings = sum(delta.values()) + if new_findings > 0: + _emit( + console, + "warn", + f"{new_findings} new finding(s) were created during Phase 1a. Findings should not be created during reconnaissance.", + ) + for status, count in delta.items(): + if count > 0: + _emit(console, "info", f" {status}: +{count}") + + plan_path = notes_dir / "codeql-plan.yml" + if yaml is None: + _emit(console, "warn", "Cannot validate codeql-plan.yml: PyYAML not available") + else: + try: + plan = yaml.safe_load(plan_path.read_text(encoding="utf-8")) + except (yaml.YAMLError, OSError, UnicodeDecodeError) as exc: + _emit(console, "fail", f"codeql-plan.yml is not valid YAML: {exc}") + return 1 + + if not isinstance(plan, dict): + _emit(console, "fail", "codeql-plan.yml is not a mapping") + return 1 + + if plan.get("recommended") is True: + units = plan.get("analysis_units", []) + if not isinstance(units, list) or len(units) == 0: + _emit(console, "fail", "codeql-plan.yml: recommended=true but no analysis_units entries") + return 1 + + valid_confidences = {"HIGH", "MEDIUM", "LOW"} + seen_unit_ids: set[str] = set() + seen_databases: set[tuple[str, str]] = set() + for i, unit in enumerate(units): + result = _validate_codeql_analysis_unit( + console=console, + unit=unit, + index=i, + seen_unit_ids=seen_unit_ids, + seen_databases=seen_databases, + valid_confidences=valid_confidences, + ) + if result is not None: + return result + + _emit(console, "ok", f"codeql-plan.yml: {len(units)} analysis unit(s) configured") + + _emit_separator(console, "green") + _emit(console, "ok", "Ready to run Phase 1b (CodeQL-assisted Reconnaissance).") + return 0 + + +def check_phase_1b(console=None, findings_snapshot: dict[str, int] | None = None) -> int: + """Gate 1b: recon notes and file-risk-index.yml must be valid.""" + _emit(console, "header", "Gate 1b: CodeQL-assisted Reconnaissance") + _emit_separator(console, "cyan") + + missing = _notes_exist(*REQUIRED_NOTES_1B) + if missing: + _emit(console, "fail", "Required Phase 1b reconnaissance notes are missing:") + for name in missing: + _emit(console, "info", f" itemdb/notes/{name}") + _emit(console, "info", "Run Phase 1b first.") + return 1 + + for name in REQUIRED_NOTES_1B: + _emit(console, "ok", f"itemdb/notes/{name} exists") + + risk_path = ROOT / "itemdb" / "notes" / "file-risk-index.yml" + if yaml is not None: + try: + data = yaml.safe_load(risk_path.read_text(encoding="utf-8")) + except (yaml.YAMLError, OSError, UnicodeDecodeError) as exc: + _emit(console, "fail", f"file-risk-index.yml is not valid YAML: {exc}") + return 1 + + if not isinstance(data, dict): + _emit(console, "fail", "file-risk-index.yml: must be a mapping") + return 1 + + if "schema_version" not in data: + _emit(console, "warn", "file-risk-index.yml: missing 'schema_version'") + files = data.get("files") + if files is None: + _emit(console, "fail", "file-risk-index.yml: missing 'files' key") + return 1 + if not isinstance(files, list): + _emit(console, "fail", "file-risk-index.yml: 'files' is not a list") + return 1 + + for entry in files: + if not isinstance(entry, dict): + continue + path_val = entry.get("path", "") + if path_val == "src/example/path/to/file.ext": + _emit(console, "fail", "file-risk-index.yml: contains template placeholder entry ('src/example/path/to/file.ext')") + return 1 + if "../" in str(path_val) or str(path_val).startswith("/"): + _emit(console, "warn", f"file-risk-index.yml: path '{path_val}' is not workspace-relative") + score = entry.get("score") + if score is not None: + try: + score_int = int(score) + if score_int < 1 or score_int > 5: + _emit(console, "warn", f"file-risk-index.yml: score {score} for '{path_val}' is not in 1..5") + except (TypeError, ValueError): + _emit(console, "warn", f"file-risk-index.yml: non-integer score '{score}' for '{path_val}'") + + _emit(console, "ok", f"file-risk-index.yml: {len(files)} file(s) indexed") + + if findings_snapshot is not None: + delta = count_findings_snapshot(findings_snapshot) + new_findings = sum(delta.values()) + if new_findings > 0: + _emit( + console, + "warn", + f"{new_findings} new finding(s) were created during Phase 1b. Findings should not be created during reconnaissance.", + ) + for status, count in delta.items(): + if count > 0: + _emit(console, "info", f" {status}: +{count}") + + _emit_separator(console, "green") + _emit(console, "ok", "Ready to run Phase 1c (Sandbox Bootstrap).") + return 0 + + +def check_phase_1c(console=None) -> int: + """Gate 1c: sandbox-plan.md must exist and sandbox provenance is checked.""" + _emit(console, "header", "Gate 1c: Sandbox Bootstrap") + _emit_separator(console, "cyan") + + plan_path = ROOT / "itemdb" / "notes" / "sandbox-plan.md" + if not plan_path.exists(): + _emit(console, "fail", "itemdb/notes/sandbox-plan.md does not exist") + _emit(console, "info", "Run Phase 1c first.") + return 1 + + _emit(console, "ok", "itemdb/notes/sandbox-plan.md exists") + + provenance = ROOT / "sandbox" / "CODECOME-GENERATED.md" + has_provenance = provenance.exists() + sandbox_dir = ROOT / "sandbox" + has_sandbox = sandbox_dir.exists() and any(entry.name != ".gitkeep" for entry in sandbox_dir.iterdir()) + + if has_provenance: + _emit(console, "ok", "sandbox/CODECOME-GENERATED.md exists") + elif has_sandbox: + _emit(console, "warn", "sandbox/ exists without CODECOME-GENERATED.md - may be user-managed") + else: + _emit(console, "warn", "sandbox/ is empty or does not exist") + + _emit_separator(console, "green") + _emit(console, "ok", "Phase 1 complete. Ready to run Phase 2.") + return 0 diff --git a/tools/rendering/context.py b/tools/rendering/context.py index 4508ebfa..be342a1d 100644 --- a/tools/rendering/context.py +++ b/tools/rendering/context.py @@ -7,7 +7,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from rendering.cache import SnapshotCache @@ -37,3 +37,4 @@ class RenderContext: hidden_reasoning_active: bool = False hidden_reasoning_started_at: float = 0.0 last_hidden_reasoning_rendered_at: float = 0.0 + inflight_write_files: set[str] = field(default_factory=set) diff --git a/tools/rendering/dispatch.py b/tools/rendering/dispatch.py index 88482e1f..feca819b 100644 --- a/tools/rendering/dispatch.py +++ b/tools/rendering/dispatch.py @@ -89,6 +89,9 @@ def _get_rendering_ctx(console: Any, *, root: Path | None = None) -> Any: SessionStatusRenderer, SessionDiffRenderer, SubagentStatusRenderer, + PatchRenderer, + FileEditedRenderer, + FileWatcherRenderer, ) registry.register_event(ServerConnectedRenderer(ctx)) registry.register_event(ServerHeartbeatRenderer(ctx)) @@ -102,6 +105,9 @@ def _get_rendering_ctx(console: Any, *, root: Path | None = None) -> Any: registry.register_event(SessionStatusRenderer(ctx)) registry.register_event(SessionDiffRenderer(ctx)) registry.register_event(SubagentStatusRenderer(ctx)) + registry.register_event(PatchRenderer(ctx)) + registry.register_event(FileEditedRenderer(ctx)) + registry.register_event(FileWatcherRenderer(ctx)) from rendering.tools import ( ApplyPatchRenderer, diff --git a/tools/rendering/events/__init__.py b/tools/rendering/events/__init__.py index b59f0427..63bec010 100644 --- a/tools/rendering/events/__init__.py +++ b/tools/rendering/events/__init__.py @@ -32,6 +32,8 @@ from rendering.events.session_diff import SessionDiffRenderer from rendering.events.message import MessageUpdatedRenderer from rendering.events.subagent import SubagentStatusRenderer +from rendering.events.patch import PatchRenderer +from rendering.events.file_events import FileEditedRenderer, FileWatcherRenderer from rendering.events.unknown import UnknownEventRenderer __all__ = [ @@ -53,5 +55,8 @@ "SessionDiffRenderer", "MessageUpdatedRenderer", "SubagentStatusRenderer", + "PatchRenderer", + "FileEditedRenderer", + "FileWatcherRenderer", "UnknownEventRenderer", ] diff --git a/tools/rendering/events/file_events.py b/tools/rendering/events/file_events.py new file mode 100644 index 00000000..51d32d67 --- /dev/null +++ b/tools/rendering/events/file_events.py @@ -0,0 +1,64 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""File event renderers — file.edited and file.watcher.updated.""" + +from __future__ import annotations + +import os +from typing import Any + +from rendering.events.base import EventRenderer +from rendering.utils import relativize_path +import _colors as C + + +def _norm(path: str) -> str: + return os.path.normpath(os.path.abspath(path)) if path else path + + +class FileEditedRenderer(EventRenderer): + event_types = ("file.edited",) + + def render(self, event: dict[str, Any]) -> bool: + file_path = str(event.get("properties", {}).get("file", "")) + if not file_path: + return False + + normed = _norm(file_path) + + if normed in self.context.inflight_write_files: + return True + + rel = relativize_path(file_path, self.context.root) + if self.rich: + from rich.text import Text + self.sink.write(Text(f" edited {rel}", style="dim")) + else: + self.sink.write_text(C.info(f" edited {rel}")) + + self.context.cache.reread(file_path) + return True + + +class FileWatcherRenderer(EventRenderer): + event_types = ("file.watcher.updated",) + + def render(self, event: dict[str, Any]) -> bool: + file_path = str(event.get("properties", {}).get("file", "")) + watcher_event = str(event.get("properties", {}).get("event", "")) + + if not self.context.settings.debug_unknown_events: + return True + + if not file_path: + return False + + rel = relativize_path(file_path, self.context.root) + label = f" watcher {watcher_event} {rel}" + if self.rich: + from rich.text import Text + self.sink.write(Text(label, style="dim")) + else: + self.sink.write_text(C.info(label)) + return True diff --git a/tools/rendering/events/patch.py b/tools/rendering/events/patch.py new file mode 100644 index 00000000..82441bd4 --- /dev/null +++ b/tools/rendering/events/patch.py @@ -0,0 +1,87 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""PatchRenderer — renders session-level patch events (hash + file list).""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +from rendering.utils import relativize_path +import _colors as C + + +class PatchRenderer(EventRenderer): + event_types = ("patch",) + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + hash_ = part.get("hash", "") + raw_files = part.get("files") + files: list[str] = raw_files if isinstance(raw_files, list) else [] + short_hash = hash_[:8] if hash_ else "" + + if not files and not short_hash: + return False + + if self.rich: + return self._render_rich(short_hash, files) + else: + return self._render_plain(short_hash, files) + + def _render_rich(self, hash_: str, files: list[str]) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + settings = self.context.settings + cache = self.context.cache + + sections: list[Any] = [] + shown = files[:settings.apply_patch_max_files] + for fpath in shown: + rel = relativize_path(fpath, self.context.root) + sections.append(Text(f" {rel}", style="dim")) + + remaining = len(files) - len(shown) + if remaining > 0: + sections.append(Text(f" ... and {remaining} more file(s)", style="dim")) + + title = f"Session patch hash={hash_}" if hash_ else "Session patch" + nfile = len(files) + if nfile: + title += f" {nfile} file{'s' if nfile != 1 else ''}" + + self.sink.write(Panel( + Group(*sections) if sections else Text(" (no files)"), + title=title, + border_style="green" if files else "yellow", + expand=True, + )) + + for fpath in files: + cache.reread(fpath) + return True + + def _render_plain(self, hash_: str, files: list[str]) -> bool: + settings = self.context.settings + cache = self.context.cache + + nfile = len(files) + hash_part = f" hash={hash_}" if hash_ else "" + file_part = f" {nfile} file{'s' if nfile != 1 else ''}" if nfile else "" + self.sink.write_text(C.header(f"patch{hash_part}{file_part}")) + + shown = files[:settings.apply_patch_max_files] + for fpath in shown: + rel = relativize_path(fpath, self.context.root) + self.sink.write_text(f" {rel}") + + remaining = len(files) - len(shown) + if remaining > 0: + self.sink.write_text(f" ... and {remaining} more file(s)") + + for fpath in files: + cache.reread(fpath) + return True diff --git a/tools/rendering/events/text.py b/tools/rendering/events/text.py index fbbe0956..8f6cff3d 100644 --- a/tools/rendering/events/text.py +++ b/tools/rendering/events/text.py @@ -18,7 +18,7 @@ def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) text = str(part.get("text", "")).strip() if not text: - return False + return True _clear_hidden_reasoning_state(self.context) if self.rich: from rich.markdown import Markdown diff --git a/tools/rendering/events/tool_use.py b/tools/rendering/events/tool_use.py index 13c8244d..831cad6e 100644 --- a/tools/rendering/events/tool_use.py +++ b/tools/rendering/events/tool_use.py @@ -5,11 +5,27 @@ from __future__ import annotations +import os from typing import Any from rendering.events.base import EventRenderer, _clear_hidden_reasoning_state +def _is_write_like(inp: dict[str, Any]) -> bool: + """Return True if the tool input looks like a write or edit (has filePath + content/oldString).""" + fp = inp.get("filePath", "") + if not isinstance(fp, str) or not fp.strip(): + return False + return "content" in inp or "oldString" in inp + + +def _normalize_path(path: str) -> str: + """Normalize a file path for consistent set membership.""" + if not path: + return path + return os.path.normpath(os.path.abspath(path)) + + class ToolUseEventRenderer(EventRenderer): event_types = ("tool_use",) @@ -17,6 +33,14 @@ def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) tool = str(part.get("tool", "unknown")) state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} + inp = state.get("input", {}) if isinstance(state.get("input"), dict) else {} + status = state.get("status", "") + + if _is_write_like(inp): + file_path = _normalize_path(str(inp["filePath"])) + if status in ("completed", "error"): + self.context.inflight_write_files.discard(file_path) + _clear_hidden_reasoning_state(self.context) self.context.registry.dispatch_tool(tool, state) return True diff --git a/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py b/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py index 92614f36..57d9a484 100644 --- a/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py +++ b/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py @@ -203,6 +203,8 @@ def _sandbox_state_style(state_value: str) -> str: return "green" if state_value == "user-managed": return "yellow" + if state_value == "pending": + return "yellow" if state_value == "missing": return "red" return "dim" @@ -379,7 +381,12 @@ def _render_sandbox_status_rich( gate_pass = bool(payload.get("phase2_gate_pass")) gate_reason = str(payload.get("phase2_gate_reason", "")) - state_glyph = {"generated": glyphs["ok"], "user-managed": glyphs["warn"], "missing": glyphs["fail"]}.get(state_value, glyphs["info"]) + state_glyph = { + "generated": glyphs["ok"], + "user-managed": glyphs["warn"], + "pending": glyphs["warn"], + "missing": glyphs["fail"], + }.get(state_value, glyphs["info"]) sections.append(Text.assemble( ("state: ", "bold"), (f"{state_glyph} {state_value}", _sandbox_state_style(state_value)), @@ -425,6 +432,8 @@ def _render_sandbox_status_rich( is_helper = name in _SANDBOX_HELPER_CAPABILITIES if satisfied: badge = Text(f"{glyphs['ok']} ok", style="green") + elif state_value == "pending": + badge = Text(f"{glyphs['warn']} pending", style="yellow") elif is_helper and not present: badge = Text(f"{glyphs['skip']} optional", style="dim") else: @@ -716,6 +725,8 @@ def _render_sandbox_status_plain(payload: dict, glyphs: dict, sink) -> None: is_helper = name in _SANDBOX_HELPER_CAPABILITIES if satisfied: marker = f"{glyphs['ok']} ok" + elif state_value == "pending": + marker = f"{glyphs['warn']} pending" elif is_helper and not present: marker = f"{glyphs['skip']} optional" else: diff --git a/tools/sandbox-bootstrap.py b/tools/sandbox-bootstrap.py index e2d0e1b0..87a53655 100755 --- a/tools/sandbox-bootstrap.py +++ b/tools/sandbox-bootstrap.py @@ -404,6 +404,23 @@ def sandbox_has_user_content() -> bool: return False +def phase_1c_bootstrap_recorded() -> bool: + """Return True once Phase 1c has documented a sandbox bootstrap attempt.""" + return (NOTES_ROOT / "sandbox-plan.md").is_file() + + +def classify_sandbox_state() -> str: + """Classify sandbox state using both filesystem and workflow progress.""" + provenance = read_provenance() + if provenance is not None: + return "generated" + if sandbox_has_user_content(): + return "user-managed" + if phase_1c_bootstrap_recorded(): + return "missing" + return "pending" + + # --- Output helpers ----------------------------------------------------------- @@ -594,21 +611,15 @@ def _last_validation_outcome() -> Optional[str]: def cmd_status(args: argparse.Namespace) -> int: provenance = read_provenance() - has_user_content = sandbox_has_user_content() allow_no_sandbox = bool(os.environ.get("CODECOME_ALLOW_NO_SANDBOX")) capability_status = _capability_status() - - if provenance is not None: - sandbox_state = "generated" - elif has_user_content: - sandbox_state = "user-managed" - else: - sandbox_state = "missing" + sandbox_state = classify_sandbox_state() last_validation = _last_validation_outcome() # Gate logic: - # - missing -> block (override wins) + # - pending -> block (override wins), but Phase 1c has not run yet + # - missing -> block (override wins), because Phase 1c should have created it # - generated + failed -> block (override wins) # - generated + passed -> pass # - generated + mixed -> pass with warning (some tiers skipped) @@ -618,6 +629,9 @@ def cmd_status(args: argparse.Namespace) -> int: if allow_no_sandbox: gate_pass = True gate_reason = "override (CODECOME_ALLOW_NO_SANDBOX=1)" + elif sandbox_state == "pending": + gate_pass = False + gate_reason = "sandbox bootstrap pending; run make phase-1" elif sandbox_state == "missing": gate_pass = False gate_reason = "sandbox is missing" @@ -666,7 +680,7 @@ def cmd_status(args: argparse.Namespace) -> int: print(f" {C.DIM}capabilities:{C.RESET}") for name in ("setup", "start", "check", "build", "test", "stop", "shell", "logs", "clean", "reset"): status = capability_status[name] - state = "ok" if status.get("satisfied") else "missing" + state = "ok" if status.get("satisfied") else "pending" if sandbox_state == "pending" else "missing" print(f" {name:<6} {state:<7} {status['path']}") if gate_pass: print(C.ok(f"Phase 2 sandbox gate would pass ({gate_reason})."))