yingchen-coding · dependabot · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/.agentguardignore b/.agentguardignore
@@ -0,0 +1,13 @@
+# Paths agentguard's own publish-check should skip (gitignore-style).
+# Test fixtures intentionally contain the patterns the rules detect.
+tests/fixtures
+tests/test_rules.py
+tests/test_project.py
+# Attack fixtures intentionally contain vulnerable definitions for demos and docs.
+examples/attacks
+# The accuracy benchmark embeds vulnerable/secret definitions as labeled test data.
+eval/benchmark.py
+# The rule/pattern library defines the malware & secret signatures as regex literals,
+# so it necessarily "contains" them (semgrep excludes its own rules dir for the same reason).
+agentguard/rules.py
+agentguard/project.py
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
@@ -0,0 +1,38 @@
+{
+  "$schema": "https://anthropic.com/claude-code/marketplace.schema.json",
+  "name": "agent-armor",
+  "description": "Deterministic agent-definition security scanning plus optional assisted hardening plugins.",
+  "owner": {
+    "name": "Ying Chen",
+    "url": "https://github.com/yingchen-coding"
+  },
+  "plugins": [
+    {
+      "name": "adversarial-critic",
+      "description": "Red-teams an agent/skill/command definition across 10 failure dimensions before it ships.",
+      "author": {
+        "name": "Ying Chen"
+      },
+      "category": "development",
+      "source": "./plugins/agent-armor/plugins/adversarial-critic"
+    },
+    {
+      "name": "critique-loop",
+      "description": "Runs adversarial-critic in a loop and applies fixes until a definition has no real Critical or Major issues.",
+      "author": {
+        "name": "Ying Chen"
+      },
+      "category": "development",
+      "source": "./plugins/agent-armor/plugins/critique-loop"
+    },
+    {
+      "name": "agent-orchestrator",
+      "description": "Decompose independent subtasks, fan them out to bounded parallel sub-agents, and consolidate verified results.",
+      "author": {
+        "name": "Ying Chen"
+      },
+      "category": "productivity",
+      "source": "./plugins/agent-armor/plugins/agent-orchestrator"
+    }
+  ]
+}
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,15 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+indent_style = space
+
+[*.py]
+indent_size = 4
+max_line_length = 100
+
+[*.{json,yml,yaml,toml,md}]
+indent_size = 2
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,29 @@
+name: Bug report
+description: A rule misfires, crashes, or behaves wrong
+labels: [bug]
+body:
+  - type: textarea
+    id: snippet
+    attributes:
+      label: Minimal definition snippet
+      description: The smallest agent/command/skill markdown that reproduces it.
+      render: markdown
+    validations:
+      required: true
+  - type: input
+    id: rule
+    attributes:
+      label: Rule code
+      placeholder: e.g. AL300
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected vs actual
+      description: What did you expect, and what did agentguard do?
+    validations:
+      required: true
+  - type: input
+    id: version
+    attributes:
+      label: agentguard version
+      placeholder: "agentguard --version"
diff --git a/.github/ISSUE_TEMPLATE/false_positive.yml b/.github/ISSUE_TEMPLATE/false_positive.yml
@@ -0,0 +1,27 @@
+name: False positive
+description: A rule fired on something that is actually fine
+labels: [false-positive]
+body:
+  - type: markdown
+    attributes:
+      value: "False positives are the most valuable reports — they're how the rules get calibrated."
+  - type: input
+    id: rule
+    attributes:
+      label: Rule code
+      placeholder: e.g. AL202
+    validations:
+      required: true
+  - type: textarea
+    id: snippet
+    attributes:
+      label: The definition that wrongly tripped it
+      render: markdown
+    validations:
+      required: true
+  - type: textarea
+    id: why
+    attributes:
+      label: Why this is not actually a problem
+    validations:
+      required: true
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,24 @@
+<!-- Thanks for contributing! Keep it focused — one logical change per PR. -->
+
+## What & why
+
+<!-- What does this change, and why? -->
+
+## Trust boundary and evidence
+
+<!-- What untrusted input, capability, sink, or failure mode changes? What reproducer proves it? -->
+
+## User and cross-functional impact
+
+<!-- Compatibility, docs/evidence changes, rollout concerns, or "none". -->
+
+## Checklist
+
+- [ ] `pytest -q` passes
+- [ ] New/changed rule has a test that it **fires** and a test that it **stays quiet** on the near-miss
+- [ ] If a rule changed, I ran it on a real corpus and confirmed no new false positives
+- [ ] `python eval/benchmark.py` and `python eval/adversarial_review.py` pass without lowering the baseline
+- [ ] `python tools/verify_contracts.py` passes; docs/evidence/skill changed with the code where needed
+- [ ] Risk-based change-review packet has no missing evidence
+- [ ] `python tools/workflow_audit.py` passes without hiding added workflow cost
+- [ ] No new runtime dependencies (stdlib only)
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,13 @@
+version: 2
+updates:
+  # Keep GitHub Actions pinned and current (the only external supply chain we have).
+  # Grouped into a single weekly PR so the repo stays on one branch, not one branch per bump.
+  - package-ecosystem: github-actions
+    directory: "/"
+    schedule:
+      interval: weekly
+    commit-message:
+      prefix: ci
+    groups:
+      github-actions:
+        patterns: ["*"]
diff --git a/.github/workflows/agent-factory.yml b/.github/workflows/agent-factory.yml
@@ -0,0 +1,73 @@
+name: agent-factory
+
+on:
+  workflow_dispatch:
+    inputs:
+      publish_issue:
+        description: "Update the human-reviewed corpus audit issue"
+        required: false
+        default: false
+        type: boolean
+  schedule:
+    - cron: "41 4 * * 2"
+
+permissions:
+  contents: read
+
+jobs:
+  corpus-audit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v7
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+          cache: pip
+      - run: pip install -e .
+      - uses: actions/cache@v6
+        with:
+          path: .agentguard-corpus-state.json
+          key: corpus-state-${{ github.run_id }}
+          restore-keys: |
+            corpus-state-
+      - name: Scan, deduplicate, diff, and generate repair patches
+        run: |
+          args=(--manifest corpus/manifest.json --output build/corpus-audit --jobs 3)
+          if [[ -f .agentguard-corpus-state.json ]]; then
+            args+=(--state .agentguard-corpus-state.json)
+          fi
+          python3 tools/corpus_audit.py "${args[@]}"
+          cp build/corpus-audit/state.json .agentguard-corpus-state.json
+      - name: Verify the audit against its schema before human review
+        run: python3 tools/validate_audit.py build/corpus-audit/audit.json
+      - uses: actions/upload-artifact@v7
+        with:
+          name: agentguard-corpus-audit
+          path: build/corpus-audit/
+          if-no-files-found: error
+
+  publish-reviewed-summary:
+    if: github.event_name == 'workflow_dispatch' && inputs.publish_issue
+    needs: corpus-audit
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    environment: corpus-publish
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v7
+      - uses: actions/download-artifact@v8
+        with:
+          name: agentguard-corpus-audit
+          path: build/corpus-audit
+      - name: Create or update one deduplicated audit issue
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          python3 tools/publish_audit_issue.py \
+            --report build/corpus-audit/report.md \
+            --repo "${{ github.repository }}" \
+            --title "AgentGuard corpus audit" \
+            --confirm-publish
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -5,32 +5,125 @@ on:
     branches: [main]
   pull_request:
 
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+# Default-deny: every job gets read-only contents unless it declares more.
+permissions:
+  contents: read
+
 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v7
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+          cache: pip
+      - run: pip install -e ".[dev]"
+      - run: ruff check .
+      - run: mypy agentguard
+      - run: python3 tools/verify_contracts.py
+      - run: python3 eval/adversarial_review.py
+      - run: python3 tools/workflow_audit.py
+      - name: Build risk-based PR review packet
+        if: github.event_name == 'pull_request'
+        run: |
+          python3 tools/change_review.py \
+            --base "origin/${{ github.base_ref }}" \
+            --head "${{ github.sha }}" \
+            --json-output build/change-review.json \
+            --markdown-output build/change-review.md
+          cat build/change-review.md >> "$GITHUB_STEP_SUMMARY"
+
   test:
     runs-on: ubuntu-latest
+    timeout-minutes: 15
     strategy:
+      fail-fast: false
       matrix:
         python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v7
+      - uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
+          cache: pip
       - run: pip install -e ".[dev]"
-      - run: pytest -q
+      - run: python -m pytest -q
+
+  quality:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v7
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+          cache: pip
+      - run: pip install -e ".[dev]"
+      - run: python3 eval/benchmark.py --verbose
+      - run: python -m build
+      - run: python -m twine check dist/*
+
+  action-smoke:
+    # Run the published composite action end-to-end (uses: ./) so the Marketplace
+    # wrapper is proven on every commit — install path, arg parsing, and exit codes.
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v7
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+      # Happy path: a clean target must exit 0 through the action wrapper.
+      - name: Action passes on a clean target
+        uses: ./
+        with:
+          path: skills
+          fail-at: major
+      # Gate path: findings at/above fail-at must fail the action. Capture the
+      # outcome and assert it failed, so a wrapper that silently passes is caught.
+      - name: Action fails on intentionally-flagged examples
+        id: gate
+        uses: ./
+        continue-on-error: true
+        with:
+          path: examples
+          fail-at: major
+      - name: Assert the gate actually failed
+        run: |
+          if [ "${{ steps.gate.outcome }}" != "failure" ]; then
+            echo "::error::action did not fail on examples/ — fail-at gate is broken"
+            exit 1
+          fi
+          echo "fail-at gate works: examples/ correctly failed the action."
 
   self-lint:
-    # agent-lint lints its own example/fixture definitions — dogfooding in CI.
+    # agentguard lints its own example definitions and runs its own supply-chain check — dogfooding.
     runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      security-events: write   # upload-sarif writes to code scanning
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v7
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.12"
+          cache: pip
       - run: pip install -e .
-      - run: agent-lint --format sarif -o agent-lint.sarif examples || true
-      - uses: github/codeql-action/upload-sarif@v3
+      - run: agentguard --format sarif -o agentguard.sarif examples || true
+      - uses: github/codeql-action/upload-sarif@v4
         if: always()
         with:
-          sarif_file: agent-lint.sarif
+          sarif_file: agentguard.sarif
         continue-on-error: true
+      # Supply-chain self-scan: the repo must be free of committed secrets and malware signatures.
+      # (--select limits the gate to the security-critical AL5xx checks; placeholders won't fail it.)
+      - run: agentguard . --publish-check --select AL503,AL510,AL511,AL512,AL513 --fail-at major