raml-dev · matstech · May 17, 2026 · May 17, 2026
diff --git a/.env.example b/.env.example
@@ -0,0 +1,20 @@
+# Prudence Environment Variables Example
+# Copy this to .env and fill in the values
+
+# LLM Provider Configuration
+PROVIDER_NAME=google
+MODEL_NAME=gemini-2.0-flash
+TEMPERATURE=0.3
+
+# GitHub Configuration
+REPO_NAME=your-org/your-repo
+GITHUB_TOKEN=your-github-token
+
+# Alternatively, use GitHub App credentials
+# GITHUB_APP_ID=
+# GITHUB_PRIVATE_KEY=
+# GITHUB_INSTALLATION_ID=
+
+# Google Cloud Configuration (for ADK)
+# GOOGLE_CLOUD_PROJECT=
+# GOOGLE_CLOUD_LOCATION=global
diff --git a/GEMINI.md b/GEMINI.md
@@ -0,0 +1,55 @@
+# Coding Agent Guide
+
+## Prerequisites
+
+Install the CLI (one-time):
+```bash
+uv tool install google-agents-cli
+```
+
+---
+
+## Development Phases
+
+### Phase 1: Understand Requirements
+Before writing any code, understand the project's requirements, constraints, and success criteria.
+
+### Phase 2: Build and Implement
+Implement agent logic in `agent/`. Use `agents-cli playground` for interactive testing. Iterate based on user feedback.
+
+### Phase 3: The Evaluation Loop (Main Iteration Phase)
+Start with 1-2 eval cases, run `agents-cli eval run`, iterate. Expect 5-10+ iterations. See the **Evaluation Guide** for metrics, evalset schema, LLM-as-judge config, and common gotchas.
+
+### Phase 4: Pre-Deployment Tests
+Run `uv run pytest tests/unit tests/integration`. Fix issues until all tests pass.
+
+### Phase 5: Deploy to Dev
+**Requires explicit human approval.** Run `agents-cli deploy` only after user confirms. See the **Deployment Guide** for details.
+
+### Phase 6: Production Deployment
+Ask the user: Option A (simple single-project) or Option B (full CI/CD pipeline with `agents-cli infra cicd`).
+
+## Development Commands
+
+| Command | Purpose |
+|---------|---------|
+| `agents-cli playground` | Interactive local testing |
+| `uv run pytest tests/unit tests/integration` | Run unit and integration tests |
+| `agents-cli eval run` | Run evaluation against evalsets |
+| `agents-cli lint` | Check code quality |
+| `agents-cli infra single-project` | Set up project infrastructure (Terraform) |
+| `agents-cli deploy` | Deploy to dev |
+| `agents-cli scaffold enhance` | Add deployment target or CI/CD to project |
+| `agents-cli scaffold upgrade` | Upgrade project to latest version |
+
+---
+
+## Operational Guidelines for Coding Agents
+
+- **Code preservation**: Only modify code directly targeted by the user's request. Preserve all surrounding code, config values (e.g., `model`), comments, and formatting.
+- **NEVER change the model** unless explicitly asked.
+- **Model 404 errors**: Fix `GOOGLE_CLOUD_LOCATION` (e.g., `global` instead of `us-east1`), not the model name.
+- **ADK tool imports**: Import the tool instance, not the module: `from google.adk.tools.load_web_page import load_web_page`
+- **Run Python with `uv`**: `uv run python script.py`. Run `agents-cli install` first.
+- **Stop on repeated errors**: If the same error appears 3+ times, fix the root cause instead of retrying.
+- **Terraform conflicts** (Error 409): Use `terraform import` instead of retrying creation.
diff --git a/Makefile b/Makefile
@@ -1,12 +1,4 @@
 run:
-	@PROVIDER_NAME="groq" \
-	GROQ_API_KEY="" \
-	MODEL_NAME="moonshotai/kimi-k2-instruct-0905" \
-	REPO_NAME="" \
-	GITHUB_TOKEN="" \
-	GITHUB_APP_ID="" \
-	GITHUB_PRIVATE_KEY="" \
-	GITHUB_INSTALLATION_ID="" \
-	uv run main.py $(pr_number)
+	@uv run main.py $(pr_number)
 
 .PHONY: run
diff --git a/agent/agent.py b/agent/agent.py
@@ -0,0 +1,30 @@
+import os
+import google.auth
+from google.adk.apps import App
+from configuration.config import Configuration
+from integration.github import GitHubProvider
+from agent.engine import PrudenceEngine
+
+# 1. Load Configuration
+config = Configuration()
+
+# 2. Initialize GitHub Provider
+github_provider = GitHubProvider(
+    token=config.github_token,
+    app_id=config.github_app_id,
+    private_key=config.github_private_key,
+    installation_id=config.github_installation_id
+)
+
+# 3. Define the Root Agent
+root_agent = PrudenceEngine(
+    name="prudence",
+    config=config,
+    github_provider=github_provider
+)
+
+# 4. Create the ADK App
+app = App(
+    root_agent=root_agent,
+    name="agent", # Must match the directory name
+)
diff --git a/agent/context_utils.py b/agent/context_utils.py
@@ -0,0 +1,94 @@
+import re
+from typing import List, Dict
+
+class ContextUtils:
+    @staticmethod
+    def extract_hunks(content: str, patch: str, context_lines: int = 15) -> str:
+        """
+        Extracts relevant hunks from the content based on the patch.
+        Each hunk includes a context window.
+        """
+        if not patch or not content:
+            return ""
+
+        lines = content.splitlines()
+        changed_lines = set()
+
+        # Parse patch to find changed lines
+        current_line = 0
+        for line in patch.split('\n'):
+            if line.startswith('@@'):
+                match = re.search(r'\+(\d+)', line)
+                if match:
+                    current_line = int(match.group(1))
+            elif line.startswith('+') and not line.startswith('+++'):
+                changed_lines.add(current_line)
+                current_line += 1
+            elif line.startswith('-') and not line.startswith('---'):
+                pass
+            elif line.startswith(' ') or line == '':
+                current_line += 1
+
+        if not changed_lines:
+            return ""
+
+        # Identify ranges to include (changed lines + context)
+        ranges = []
+        sorted_changes = sorted(list(changed_lines))
+
+        if not sorted_changes:
+            return ""
+
+        current_range = [max(1, sorted_changes[0] - context_lines), min(len(lines), sorted_changes[0] + context_lines)]
+
+        for line_num in sorted_changes[1:]:
+            start = max(1, line_num - context_lines)
+            end = min(len(lines), line_num + context_lines)
+
+            if start <= current_range[1] + 1:
+                current_range[1] = end
+            else:
+                ranges.append(current_range)
+                current_range = [start, end]
+        ranges.append(current_range)
+
+        # Build hunk string
+        output = []
+        for start, end in ranges:
+            output.append(f"--- Lines {start}-{end} ---")
+            for i in range(start - 1, end):
+                if (i + 1) in changed_lines:
+                    output.append(f"{i+1:4d} |+ {lines[i]}")
+                else:
+                    output.append(f"{i+1:4d} |  {lines[i]}")
+            output.append("")
+
+        return "\n".join(output)
+
+    @staticmethod
+    def generate_skeleton(content: str, filename: str) -> str:
+        """
+        Generates a skeleton of the file (classes and functions signatures).
+        """
+        lines = content.splitlines()
+        skeleton = []
+
+        # Simple regex for Python, Go, JS/TS
+        patterns = [
+            r'^\s*(class\s+\w+)',           # Python/JS Class
+            r'^\s*(def\s+\w+\s*\(.*?\)\s*:)', # Python function
+            r'^\s*(async\s+)?(function\s+\w+\s*\(.*?\))', # JS function
+            r'^\s*(export\s+)?(const|let|var)\s+\w+\s*=\s*\(.*?\)\s*=>', # JS arrow function
+            r'^\s*func\s+(\(\w+\s+\*?\w+\)\s+)?\w+\s*\(.*?\)', # Go function
+        ]
+
+        combined_pattern = "|".join(f"({p})" for p in patterns)
+
+        for i, line in enumerate(lines):
+            if re.match(combined_pattern, line):
+                skeleton.append(f"{i+1:4d} | {line.strip()}")
+
+        if not skeleton:
+            return "No structural elements found."
+
+        return "\n".join(skeleton)
diff --git a/agent/engine.py b/agent/engine.py
@@ -0,0 +1,134 @@
+import asyncio
+import logging
+from typing import AsyncGenerator, List, Dict
+
+from google.adk.agents import Agent, BaseAgent
+from google.adk.agents.invocation_context import InvocationContext
+from google.adk.events import Event
+from google.genai import types
+
+from agent.models import PrudenceOutput
+from agent.prompts import SYSTEM_PROMPT, FILE_ANALYSIS_PROMPT, HUNK_ANALYSIS_PROMPT
+from agent.triage import TriageEngine, ReviewStrategy
+from agent.context_utils import ContextUtils
+from integration.github import GitHubProvider, parse_patch_lines
+
+logger = logging.getLogger(__name__)
+
+class PrudenceEngine(BaseAgent):
+    def __init__(self, name: str, config, github_provider: GitHubProvider):
+        super().__init__(name=name)
+        self.config = config
+        self.github = github_provider
+        self.semaphore = asyncio.Semaphore(3) # Max 3 concurrent LLM calls
+
+        # Internal LLM agent for reviews
+        self.reviewer = Agent(
+            name="reviewer",
+            model=config.model or "gemini-2.0-flash",
+            instruction=SYSTEM_PROMPT,
+            output_schema=PrudenceOutput,
+            generate_content_config=types.GenerateContentConfig(
+                temperature=config.temperature,
+            )
+        )
+
+    async def _run_async_impl(self, ctx: InvocationContext) -> AsyncGenerator[Event, None]:
+        pr_number = ctx.session.state.get("pr_number")
+        repo_name = self.config.repo_name
+
+        if not pr_number:
+            yield Event(author=self.name, content="PR number not provided in state.")
+            return
+
+        yield Event(author=self.name, content=f"🚀 Starting review for PR #{pr_number} in {repo_name}...")
+
+        # 1. Discovery
+        files_metadata = await self.github.get_pr_files_metadata(repo_name, pr_number)
+
+        # 2. Triage and Execution Queue
+        tasks = []
+        for file in files_metadata:
+            strategy = TriageEngine.get_strategy(
+                file["filename"], 
+                file["additions"] + file["deletions"], # Rough line count proxy for triage
+                file["patch"]
+            )
+
+            if strategy == ReviewStrategy.SKIP:
+                logger.info(f"Skipping {file['filename']}")
+                continue
+
+            tasks.append(self.review_file_task(repo_name, pr_number, file, strategy, ctx))
+
+        # 3. Parallel Execution with Immediate Publishing
+        if not tasks:
+            yield Event(author=self.name, content="No files requiring review found.")
+            return
+
+        # We use asyncio.as_completed to yield progress events
+        for task in asyncio.as_completed(tasks):
+            try:
+                result_msg = await task
+                yield Event(author=self.name, content=result_msg)
+            except Exception as e:
+                logger.error(f"Error in review task: {e}")
+                yield Event(author=self.name, content=f"❌ Error reviewing a file: {str(e)}")
+
+        yield Event(author=self.name, content="✅ Review completed successfully.")
+
+    async def review_file_task(self, repo_name: str, pr_number: int, file: Dict, strategy: ReviewStrategy, parent_ctx: InvocationContext) -> str:
+        async with self.semaphore:
+            filename = file["filename"]
+            patch = file["patch"]
+            sha = file["sha"]
+
+            # Context Preparation
+            content = await self.github.get_file_content(repo_name, filename, sha)
+
+            if strategy == ReviewStrategy.FULL:
+                # Add line numbers
+                lines = content.splitlines()
+                file_content = "\n".join(f"{i+1:4d} | {line}" for i, line in enumerate(lines))
+                changed_lines = parse_patch_lines(patch)
+                prompt = FILE_ANALYSIS_PROMPT.format(
+                    file_path=filename,
+                    file_content=file_content,
+                    changed_lines=", ".join(map(str, changed_lines))
+                )
+            elif strategy in [ReviewStrategy.SURGICAL, ReviewStrategy.PATCH_ONLY]:
+                hunks = ContextUtils.extract_hunks(content, patch)
+                skeleton = ContextUtils.generate_skeleton(content, filename) if strategy == ReviewStrategy.SURGICAL else "Skipped for Patch Only"
+                prompt = HUNK_ANALYSIS_PROMPT.format(
+                    file_path=filename,
+                    skeleton=skeleton,
+                    hunks=hunks
+                )
+            else:
+                return f"Skipped {filename} (Unsupported strategy)"
+
+            # LLM Review
+            # We use a sub-invocation to run the reviewer agent
+            review_result: PrudenceOutput = await self.reviewer.run_one_shot_async(
+                parent_ctx, 
+                prompt
+            )
+
+            # 4. Immediate Delivery
+            if review_result and review_result.comments:
+                posted_count = 0
+                for comment in review_result.comments:
+                    # Final safety check: only comment on modified lines if it's a code file
+                    # (Simplified for now, prompt already handles it)
+                    try:
+                        await self.github.post_comment(
+                            repo_name, pr_number, filename, 
+                            comment.line, comment.body, comment.suggestion
+                        )
+                        posted_count += 1
+                    except Exception as e:
+                        logger.error(f"Failed to post comment on {filename}:{comment.line}: {e}")
+
+                return f"📝 Reviewed {filename}: {posted_count} comments posted."
+
+            return f"✅ Reviewed {filename}: No issues found."