From 670fab351a26cc0ed25eb1fd8106625dddb38543 Mon Sep 17 00:00:00 2001 From: yyovil Date: Mon, 1 Jun 2026 05:31:41 +0530 Subject: [PATCH 01/17] feat(plugin): add agents plugin (first iteration) Faithful copy of the agents plugin implementation from yyovil/better-ao (internal/plugin/ -> backend/internal/plugin/) plus its PRD (prds/plugins/agents/PRD.md), as a first-iteration proposal for review. Imports are left at their original github.com/yyovil/better-ao/... paths and are NOT yet reconciled to this repo's module; see PR description for the integration deltas (module path, missing internal/utils dependency). Co-authored-by: Claude --- backend/internal/plugin/agent/agent.go | 127 +++++ .../claudecode/.claude/settings.local.json | 38 ++ .../plugin/agent/claudecode/claudecode.go | 433 ++++++++++++++++ .../agent/claudecode/claudecode_test.go | 472 ++++++++++++++++++ .../internal/plugin/agent/claudecode/hooks.go | 187 +++++++ .../plugin/agent/codex/.codex/hooks.json | 40 ++ backend/internal/plugin/agent/codex/codex.go | 244 +++++++++ .../internal/plugin/agent/codex/codex_test.go | 335 +++++++++++++ backend/internal/plugin/agent/codex/hooks.go | 236 +++++++++ backend/internal/plugin/plugin.go | 68 +++ prds/plugins/agents/PRD.md | 118 +++++ 11 files changed, 2298 insertions(+) create mode 100644 backend/internal/plugin/agent/agent.go create mode 100644 backend/internal/plugin/agent/claudecode/.claude/settings.local.json create mode 100644 backend/internal/plugin/agent/claudecode/claudecode.go create mode 100644 backend/internal/plugin/agent/claudecode/claudecode_test.go create mode 100644 backend/internal/plugin/agent/claudecode/hooks.go create mode 100644 backend/internal/plugin/agent/codex/.codex/hooks.json create mode 100644 backend/internal/plugin/agent/codex/codex.go create mode 100644 backend/internal/plugin/agent/codex/codex_test.go create mode 100644 backend/internal/plugin/agent/codex/hooks.go create mode 100644 backend/internal/plugin/plugin.go create mode 100644 prds/plugins/agents/PRD.md diff --git a/backend/internal/plugin/agent/agent.go b/backend/internal/plugin/agent/agent.go new file mode 100644 index 00000000..ac70f7bf --- /dev/null +++ b/backend/internal/plugin/agent/agent.go @@ -0,0 +1,127 @@ +package agent + +import ( + "context" + + "github.com/yyovil/better-ao/internal/config" +) + +// Agent defines the behavior every CLI coding agent plugin must provide. +type Agent interface { + // GetConfigSpec describes the agent-specific config keys Better-AO can + // expose to users in ~/.better-ao/config.yaml. + GetConfigSpec(ctx context.Context) (ConfigSpec, error) + + // GetLaunchCommand builds the command Better-AO should run to start this agent. + GetLaunchCommand(ctx context.Context, cfg LaunchConfig) (cmd []string, err error) + + // GetPromptDeliveryStrategy tells Better-AO whether the prompt is included in + // the launch command or must be sent after the agent process starts. + GetPromptDeliveryStrategy(ctx context.Context, cfg LaunchConfig) (PromptDeliveryStrategy, error) + + // GetAgentHooks installs or merges Better-AO hooks into the agent's + // native workspace-local hook config. It must preserve user-defined hooks. + GetAgentHooks(ctx context.Context, cfg WorkspaceHookConfig) error + + // GetRestoreCommand builds a command that continues an existing native agent + // session. ok=false means no existing native session can be continued. + GetRestoreCommand(ctx context.Context, cfg RestoreConfig) (cmd []string, ok bool, err error) + + // SessionInfo reads agent-owned session metadata such as native session id, + // display title, or summary. ok=false means no info is available. + SessionInfo(ctx context.Context, session SessionRef) (info SessionInfo, ok bool, err error) +} + +// Config contains values loaded from the selected agent's section in +// ~/.better-ao/config.yaml. Agent plugins own validation for their custom keys. +type Config = config.AgentConfig + +// ConfigSpec describes the agent-specific config keys Better-AO can expose to +// users in ~/.better-ao/config.yaml. +type ConfigSpec struct { + Fields []ConfigField +} + +// ConfigField describes one user-facing agent config key. +type ConfigField struct { + Key string + Type ConfigFieldType + Description string + Required bool + Default any + Enum []string +} + +// ConfigFieldType is the primitive value kind Better-AO expects for a field. +type ConfigFieldType string + +const ( + ConfigFieldString ConfigFieldType = "string" + ConfigFieldBool ConfigFieldType = "bool" + ConfigFieldNumber ConfigFieldType = "number" + ConfigFieldStringList ConfigFieldType = "string_list" + ConfigFieldEnum ConfigFieldType = "enum" +) + +// LaunchConfig carries inputs needed to build a new agent launch command. +type LaunchConfig struct { + Config Config + IssueID string + Permissions PermissionMode + Prompt string + SessionID string + SystemPrompt string + SystemPromptFile string + WorkspacePath string +} + +// WorkspaceHookConfig carries inputs needed to install workspace-local agent hooks. +type WorkspaceHookConfig struct { + Config Config + DataDir string + SessionID string + WorkspacePath string +} + +// RestoreConfig carries inputs needed to continue an existing native agent session. +type RestoreConfig struct { + Config Config + Permissions PermissionMode + Session SessionRef +} + +// SessionRef identifies a Better-AO session whose agent-owned metadata may be read. +type SessionRef struct { + ID string + Metadata map[string]string + WorkspacePath string +} + +// SessionInfo contains agent-owned session metadata. +type SessionInfo struct { + AgentSessionID string + Metadata map[string]string + Title string + Summary string +} + +// PermissionMode controls how much review an agent requires before acting. +type PermissionMode string + +const ( + // "default" is special: plugins emit no flag for it so the agent resolves + // its starting mode from the user's own config (e.g. Claude's TUI reading + // ~/.claude/settings.json defaultMode). + PermissionModeDefault PermissionMode = "default" + PermissionModeAcceptEdits PermissionMode = "accept-edits" + PermissionModeAuto PermissionMode = "auto" + PermissionModeBypassPermissions PermissionMode = "bypass-permissions" +) + +// PromptDeliveryStrategy describes how Better-AO should deliver the initial prompt. +type PromptDeliveryStrategy string + +const ( + PromptDeliveryInCommand PromptDeliveryStrategy = "in_command" + PromptDeliveryAfterStart PromptDeliveryStrategy = "after_start" +) diff --git a/backend/internal/plugin/agent/claudecode/.claude/settings.local.json b/backend/internal/plugin/agent/claudecode/.claude/settings.local.json new file mode 100644 index 00000000..da538700 --- /dev/null +++ b/backend/internal/plugin/agent/claudecode/.claude/settings.local.json @@ -0,0 +1,38 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "startup", + "hooks": [ + { + "type": "command", + "command": "better-ao hooks claude-code session-start", + "timeout": 30 + } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "better-ao hooks claude-code user-prompt-submit", + "timeout": 30 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "better-ao hooks claude-code stop", + "timeout": 30 + } + ] + } + ] + } +} diff --git a/backend/internal/plugin/agent/claudecode/claudecode.go b/backend/internal/plugin/agent/claudecode/claudecode.go new file mode 100644 index 00000000..ed871cfb --- /dev/null +++ b/backend/internal/plugin/agent/claudecode/claudecode.go @@ -0,0 +1,433 @@ +// Package claudecode implements the Claude Code agent plugin. +// +// It builds the argv to launch `claude` as an interactive session inside a +// session's worktree, installs worktree-local hooks that report normalized +// session metadata (native id, title, summary) back into Better-AO's store, +// and supports resume: GetLaunchCommand pins a stable `--session-id` so +// GetRestoreCommand can rebuild `claude --resume `. SessionInfo reads the +// hook-captured metadata from the store — it does not parse transcripts. +// GetConfigSpec remains a no-op (no agent-specific config keys yet). +// +// Claude Code starts an interactive session by default (no -p/--print), which +// is exactly what better-ao wants: a live agent the user can attach to in the +// browser terminal or via `zellij attach`. The initial task prompt is passed +// as the positional argument; the orchestrator system prompt (if any) is +// appended to Claude's default system prompt so its built-in coding +// instructions are preserved. +package claudecode + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "sync" + + "github.com/google/uuid" + "github.com/yyovil/better-ao/internal/plugin" + "github.com/yyovil/better-ao/internal/plugin/agent" + "github.com/yyovil/better-ao/internal/utils" +) + +const ( + // pluginID is the registry id and the value users pass to + // `better-ao spawn --agent`. + pluginID = "claude-code" + + // Normalized session-metadata keys the Claude Code hooks persist into the + // Better-AO session store and SessionInfo reads back. Shared vocabulary + // with the Codex plugin so the dashboard treats every agent uniformly. + // agentSessionId is also the preferred restore id. + claudeAgentSessionIDMetadataKey = "agentSessionId" + claudeTitleMetadataKey = "title" + claudeSummaryMetadataKey = "summary" +) + +// claudeSessionNamespace seeds the UUIDv5 derivation that maps a better-ao +// session id onto a stable Claude Code `--session-id`. A fixed namespace makes +// the mapping deterministic, so GetLaunchCommand (which pins --session-id at +// launch) and GetRestoreCommand (which recomputes it as a fallback for +// pre-hook sessions) agree without persisting anything. +var claudeSessionNamespace = uuid.MustParse("a1f0c3d2-7b54-4e96-8a2b-0d9e1f2a3b4c") + +type Plugin struct { + binaryMu sync.Mutex + resolvedBinary string +} + +func New() *Plugin { + return &Plugin{} +} + +var _ plugin.Plugin = (*Plugin)(nil) +var _ agent.Agent = (*Plugin)(nil) + +func (p *Plugin) Manifest() plugin.Manifest { + return plugin.Manifest{ + ID: pluginID, + Name: "Claude Code", + Description: "Run Claude Code worker sessions.", + Version: "0.0.1", + Capabilities: []plugin.Capability{ + plugin.CapabilityAgent, + }, + } +} + +func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { + if err := ctx.Err(); err != nil { + return agent.ConfigSpec{}, err + } + return agent.ConfigSpec{}, nil +} + +// GetLaunchCommand builds the argv to start an interactive Claude Code +// session. Shape: +// +// claude [--session-id ] \ +// [--permission-mode ] \ +// [--append-system-prompt ] \ +// [-- ] +// +// --session-id pins Claude's native session UUID to a value derived from the +// better-ao session id, so the session is resumable later (see +// GetRestoreCommand) and its transcript is locatable (see SessionInfo) without +// a separate capture step. +// +// is acceptEdits, auto, or bypassPermissions. better-ao's "default" +// mode emits no --permission-mode flag, so Claude's TUI resolves the starting +// mode from ~/.claude/settings.json exactly as a normal launch. +// +// The prompt is passed after `--` so a prompt beginning with "-" is not +// mistaken for a flag. +func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) (cmd []string, err error) { + binary, err := p.claudeBinary(ctx) + if err != nil { + return nil, err + } + + cmd = []string{binary} + if cfg.SessionID != "" { + cmd = append(cmd, "--session-id", claudeSessionUUID(cfg.SessionID)) + } + appendPermissionFlags(&cmd, cfg.Permissions) + + systemPrompt, err := resolveSystemPrompt(cfg) + if err != nil { + return nil, err + } + if systemPrompt != "" { + // Append rather than replace: Claude Code's default system prompt + // carries its tool-use and coding instructions, which we want to + // keep. The orchestrator prompt layers on top. + cmd = append(cmd, "--append-system-prompt", systemPrompt) + } + + if cfg.Prompt != "" { + cmd = append(cmd, "--", cfg.Prompt) + } + + return cmd, nil +} + +func (p *Plugin) GetPromptDeliveryStrategy(ctx context.Context, cfg agent.LaunchConfig) (agent.PromptDeliveryStrategy, error) { + if err := ctx.Err(); err != nil { + return "", err + } + return agent.PromptDeliveryInCommand, nil +} + +// PreLaunch is an optional capability the spawn engine invokes (via type +// assertion) immediately before creating the session. Claude Code shows a +// blocking "do you trust this folder?" dialog the first time it runs in any +// directory. Every better-ao worktree is a fresh path, so without this the +// agent would hang at that prompt with no one to answer it. +// +// A better-ao worktree is derived from the repo the user is already running +// better-ao in, so it is inherently trusted. PreLaunch records that trust in +// ~/.claude.json before launch, additively and atomically, so it cannot +// clobber a concurrently-running Claude instance's config. +func (p *Plugin) PreLaunch(ctx context.Context, cfg agent.LaunchConfig) error { + if err := ctx.Err(); err != nil { + return err + } + if cfg.WorkspacePath == "" { + return nil + } + cfgPath, err := claudeConfigPath() + if err != nil { + return err + } + return ensureWorkspaceTrusted(cfgPath, cfg.WorkspacePath) +} + +// GetRestoreCommand rebuilds the argv that continues an existing Claude Code +// session: `claude [--permission-mode ] --resume `. It +// prefers the hook-captured native session id from +// cfg.Session.Metadata["agentSessionId"]; for sessions created before hooks +// captured it, it falls back to the deterministic UUID better-ao pins via +// --session-id at launch. ok is false only when neither is available, so the +// caller fresh-spawns. The command re-applies the permission mode (resume +// otherwise reverts to the configured default) but not the prompt/system +// prompt, which the session already carries. +func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg agent.RestoreConfig) (cmd []string, ok bool, err error) { + if err := ctx.Err(); err != nil { + return nil, false, err + } + + sessionID := strings.TrimSpace(cfg.Session.Metadata[claudeAgentSessionIDMetadataKey]) + if sessionID == "" && cfg.Session.ID != "" { + // Explicit fallback for pre-hook sessions: the id better-ao + // deterministically pinned via --session-id at launch. + sessionID = claudeSessionUUID(cfg.Session.ID) + } + if sessionID == "" { + return nil, false, nil + } + + binary, err := p.claudeBinary(ctx) + if err != nil { + return nil, false, err + } + cmd = []string{binary} + appendPermissionFlags(&cmd, cfg.Permissions) + cmd = append(cmd, "--resume", sessionID) + return cmd, true, nil +} + +// SessionInfo surfaces the normalized session metadata that the Claude Code +// hooks persisted into Better-AO's store: the native session id, the title (the +// first user prompt), and the summary (the final assistant message). It reads +// only from session.Metadata — never from transcript files — and returns +// ok=false when none of those fields are present. Metadata is intentionally nil: +// there is no Claude-specific field callers need beyond the normalized ones. +func (p *Plugin) SessionInfo(ctx context.Context, session agent.SessionRef) (agent.SessionInfo, bool, error) { + if err := ctx.Err(); err != nil { + return agent.SessionInfo{}, false, err + } + info := agent.SessionInfo{ + AgentSessionID: session.Metadata[claudeAgentSessionIDMetadataKey], + Title: session.Metadata[claudeTitleMetadataKey], + Summary: session.Metadata[claudeSummaryMetadataKey], + } + if info.AgentSessionID == "" && info.Title == "" && info.Summary == "" { + return agent.SessionInfo{}, false, nil + } + return info, true, nil +} + +// claudeSessionUUID maps a better-ao session id onto a stable Claude Code +// session UUID via UUIDv5 over a fixed namespace, so the same better-ao session +// always resolves to the same Claude session. +func claudeSessionUUID(betterAoSessionID string) string { + return uuid.NewSHA1(claudeSessionNamespace, []byte(betterAoSessionID)).String() +} + +// resolveSystemPrompt returns the system prompt text to append, preferring +// SystemPromptFile (read from disk) over an inline SystemPrompt. +func resolveSystemPrompt(cfg agent.LaunchConfig) (string, error) { + if cfg.SystemPromptFile != "" { + data, err := os.ReadFile(cfg.SystemPromptFile) + if err != nil { + return "", fmt.Errorf("claude-code: read system prompt file: %w", err) + } + return strings.TrimRight(string(data), "\n"), nil + } + return cfg.SystemPrompt, nil +} + +// appendPermissionFlags maps better-ao's permission modes onto Claude Code's +// --permission-mode values: +// - default → no flag. Claude's TUI resolves the starting mode +// from ~/.claude/settings.json (defaultMode), exactly as a normal launch. +// - accept-edits → --permission-mode acceptEdits (auto-accept edits + +// safe filesystem bash; still prompts for network/system bash, MCP, web) +// - auto → --permission-mode auto (classifier-gated +// auto-approval; auto-runs what a safety model deems safe) +// - bypass-permissions → --permission-mode bypassPermissions (skip all +// checks; equivalent to --dangerously-skip-permissions) +// +// Empty/unrecognized normalizes to default, so no flag is emitted. +func appendPermissionFlags(cmd *[]string, permissions agent.PermissionMode) { + switch normalizePermissionMode(permissions) { + case agent.PermissionModeDefault: + // No flag: defer to the user's settings.json defaultMode. + case agent.PermissionModeAcceptEdits: + *cmd = append(*cmd, "--permission-mode", "acceptEdits") + case agent.PermissionModeAuto: + *cmd = append(*cmd, "--permission-mode", "auto") + case agent.PermissionModeBypassPermissions: + *cmd = append(*cmd, "--permission-mode", "bypassPermissions") + } +} + +func normalizePermissionMode(mode agent.PermissionMode) agent.PermissionMode { + switch mode { + case agent.PermissionModeDefault, + agent.PermissionModeAcceptEdits, + agent.PermissionModeAuto, + agent.PermissionModeBypassPermissions: + return mode + default: + // Empty or unrecognized: defer to settings.json (no flag). + return agent.PermissionModeDefault + } +} + +// ResolveClaudeBinary finds the `claude` binary, searching PATH then a few +// well-known install locations (the native installer's ~/.local/bin, npm +// global, Homebrew). Returns "claude" as a last resort so callers get a +// clear "command not found" rather than an empty argv. +func ResolveClaudeBinary(ctx context.Context) (string, error) { + if err := ctx.Err(); err != nil { + return "", err + } + + if runtime.GOOS == "windows" { + for _, name := range []string{"claude.cmd", "claude.exe", "claude"} { + if path, err := exec.LookPath(name); err == nil && path != "" { + return path, nil + } + } + candidates := []string{} + if appData := os.Getenv("APPDATA"); appData != "" { + candidates = append(candidates, + filepath.Join(appData, "npm", "claude.cmd"), + filepath.Join(appData, "npm", "claude.exe"), + ) + } + for _, candidate := range candidates { + if utils.FileExists(candidate) { + return candidate, nil + } + } + return "claude", nil + } + + if path, err := exec.LookPath("claude"); err == nil && path != "" { + return path, nil + } + + candidates := []string{ + "/usr/local/bin/claude", + "/opt/homebrew/bin/claude", + } + if home, err := os.UserHomeDir(); err == nil { + candidates = append(candidates, + filepath.Join(home, ".local", "bin", "claude"), + filepath.Join(home, ".npm", "bin", "claude"), + filepath.Join(home, ".claude", "local", "claude"), + ) + } + for _, candidate := range candidates { + if utils.FileExists(candidate) { + return candidate, nil + } + if err := ctx.Err(); err != nil { + return "", err + } + } + + return "claude", nil +} + +func (p *Plugin) claudeBinary(ctx context.Context) (string, error) { + p.binaryMu.Lock() + defer p.binaryMu.Unlock() + + if p.resolvedBinary != "" { + return p.resolvedBinary, nil + } + + binary, err := ResolveClaudeBinary(ctx) + if err != nil { + return "", err + } + p.resolvedBinary = binary + return binary, nil +} + +// claudeConfigPath returns the path to Claude Code's global config file, +// ~/.claude.json. +func claudeConfigPath() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("claude-code: resolve home directory: %w", err) + } + return filepath.Join(home, ".claude.json"), nil +} + +// ensureWorkspaceTrusted records workspacePath as trusted in Claude Code's +// config so the interactive trust dialog does not block a spawned session. +// +// It is additive and concurrency-safe: it reads the existing config, sets +// only projects[workspacePath].hasTrustDialogAccepted = true (preserving the +// rest of the entry and every other project), and writes back via a +// temp-file + atomic rename. If the path is already trusted, it makes no +// write at all. A missing config file is treated as an empty one. +func ensureWorkspaceTrusted(configPath, workspacePath string) error { + root := map[string]any{} + data, err := os.ReadFile(configPath) + switch { + case err == nil: + if len(data) > 0 { + if err := json.Unmarshal(data, &root); err != nil { + return fmt.Errorf("claude-code: parse %s: %w", configPath, err) + } + } + case os.IsNotExist(err): + // Treat as empty config; we'll create it. + default: + return fmt.Errorf("claude-code: read %s: %w", configPath, err) + } + + projects, _ := root["projects"].(map[string]any) + if projects == nil { + projects = map[string]any{} + root["projects"] = projects + } + + entry, _ := projects[workspacePath].(map[string]any) + if entry == nil { + entry = map[string]any{} + projects[workspacePath] = entry + } + + if trusted, ok := entry["hasTrustDialogAccepted"].(bool); ok && trusted { + // Already trusted — no write needed, so no race window at all. + return nil + } + entry["hasTrustDialogAccepted"] = true + + out, err := json.MarshalIndent(root, "", " ") + if err != nil { + return fmt.Errorf("claude-code: encode %s: %w", configPath, err) + } + + // Atomic write: temp file in the same directory, then rename. Matches + // how Claude Code itself updates this file, so concurrent updates are + // last-writer-wins rather than corrupting. + dir := filepath.Dir(configPath) + tmp, err := os.CreateTemp(dir, ".claude.json.tmp-*") + if err != nil { + return fmt.Errorf("claude-code: create temp config: %w", err) + } + tmpName := tmp.Name() + defer os.Remove(tmpName) // no-op once renamed + + if _, err := tmp.Write(out); err != nil { + _ = tmp.Close() + return fmt.Errorf("claude-code: write temp config: %w", err) + } + if err := tmp.Close(); err != nil { + return fmt.Errorf("claude-code: close temp config: %w", err) + } + if err := os.Rename(tmpName, configPath); err != nil { + return fmt.Errorf("claude-code: replace config: %w", err) + } + return nil +} diff --git a/backend/internal/plugin/agent/claudecode/claudecode_test.go b/backend/internal/plugin/agent/claudecode/claudecode_test.go new file mode 100644 index 00000000..049f99d9 --- /dev/null +++ b/backend/internal/plugin/agent/claudecode/claudecode_test.go @@ -0,0 +1,472 @@ +package claudecode + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/google/uuid" + "github.com/yyovil/better-ao/internal/plugin/agent" +) + +func TestGetLaunchCommandBypassWithPrompt(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + + cmd, err := p.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + Permissions: agent.PermissionModeBypassPermissions, + Prompt: "-add a health check", + }) + if err != nil { + t.Fatal(err) + } + + want := []string{ + "claude", + "--permission-mode", "bypassPermissions", + "--", "-add a health check", + } + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("unexpected command\nwant: %#v\n got: %#v", want, cmd) + } +} + +func TestGetLaunchCommandMapsPermissionModes(t *testing.T) { + tests := []struct { + name string + permission agent.PermissionMode + want []string + notExpected string + }{ + {"default omits flag (defers to settings.json)", agent.PermissionModeDefault, nil, "--permission-mode"}, + {"accept-edits", agent.PermissionModeAcceptEdits, []string{"--permission-mode", "acceptEdits"}, ""}, + {"auto", agent.PermissionModeAuto, []string{"--permission-mode", "auto"}, ""}, + {"bypass-permissions", agent.PermissionModeBypassPermissions, []string{"--permission-mode", "bypassPermissions"}, ""}, + {"empty omits permission flags", "", nil, "--permission-mode"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + cmd, err := p.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + Permissions: tt.permission, + }) + if err != nil { + t.Fatal(err) + } + if len(tt.want) > 0 && !containsSubsequence(cmd, tt.want) { + t.Fatalf("command %#v does not contain %#v", cmd, tt.want) + } + if tt.notExpected != "" && contains(cmd, tt.notExpected) { + t.Fatalf("command %#v unexpectedly contains %q", cmd, tt.notExpected) + } + }) + } +} + +func TestGetLaunchCommandAppendsSystemPromptFromFile(t *testing.T) { + dir := t.TempDir() + promptFile := filepath.Join(dir, "system.md") + if err := os.WriteFile(promptFile, []byte("You are an orchestrator.\n"), 0o644); err != nil { + t.Fatal(err) + } + + p := &Plugin{resolvedBinary: "claude"} + cmd, err := p.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + SystemPromptFile: promptFile, + Prompt: "do the thing", + }) + if err != nil { + t.Fatal(err) + } + + want := []string{ + "claude", + "--append-system-prompt", "You are an orchestrator.", + "--", "do the thing", + } + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("unexpected command\nwant: %#v\n got: %#v", want, cmd) + } +} + +func TestGetLaunchCommandInlineSystemPrompt(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + cmd, err := p.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + SystemPrompt: "inline instructions", + }) + if err != nil { + t.Fatal(err) + } + if !containsSubsequence(cmd, []string{"--append-system-prompt", "inline instructions"}) { + t.Fatalf("command %#v does not append inline system prompt", cmd) + } +} + +func TestGetLaunchCommandMissingSystemPromptFileErrors(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + _, err := p.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + SystemPromptFile: filepath.Join(t.TempDir(), "does-not-exist.md"), + }) + if err == nil { + t.Fatal("expected error for missing system prompt file") + } +} + +func TestGetLaunchCommandInjectsSessionID(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + cmd, err := p.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + SessionID: "e0tt49", + Prompt: "do the thing", + }) + if err != nil { + t.Fatal(err) + } + wantUUID := claudeSessionUUID("e0tt49") + if !containsSubsequence(cmd, []string{"--session-id", wantUUID}) { + t.Fatalf("command %#v missing --session-id %q", cmd, wantUUID) + } + + // No SessionID → no --session-id flag. + cmd, err = p.GetLaunchCommand(context.Background(), agent.LaunchConfig{Prompt: "x"}) + if err != nil { + t.Fatal(err) + } + if contains(cmd, "--session-id") { + t.Fatalf("command %#v unexpectedly contains --session-id", cmd) + } +} + +func TestClaudeSessionUUIDDeterministicAndUnique(t *testing.T) { + a1 := claudeSessionUUID("alpha") + a2 := claudeSessionUUID("alpha") + b := claudeSessionUUID("beta") + if a1 != a2 { + t.Fatalf("derivation not deterministic: %q != %q", a1, a2) + } + if a1 == b { + t.Fatalf("distinct ids collided: both %q", a1) + } + if _, err := uuid.Parse(a1); err != nil { + t.Fatalf("derived value is not a valid UUID: %q (%v)", a1, err) + } +} + +func TestGetAgentHooksInstallsClaudeHooks(t *testing.T) { + p := &Plugin{resolvedBinary: "claude"} + workspace := t.TempDir() + settingsDir := filepath.Join(workspace, ".claude") + if err := os.MkdirAll(settingsDir, 0o755); err != nil { + t.Fatal(err) + } + settingsPath := filepath.Join(settingsDir, "settings.local.json") + // Pre-seed a user's own Stop hook + an unrelated setting; both must survive. + existing := `{"hooks":{"Stop":[{"hooks":[{"type":"command","command":"my own stop hook","timeout":5}]}]},"permissions":{"defaultMode":"plan"}}` + if err := os.WriteFile(settingsPath, []byte(existing), 0o644); err != nil { + t.Fatal(err) + } + + cfg := agent.WorkspaceHookConfig{DataDir: t.TempDir(), SessionID: "sess-1", WorkspacePath: workspace} + if err := p.GetAgentHooks(context.Background(), cfg); err != nil { + t.Fatal(err) + } + // A second install must not duplicate Better-AO hook commands. + if err := p.GetAgentHooks(context.Background(), cfg); err != nil { + t.Fatal(err) + } + + data, err := os.ReadFile(settingsPath) + if err != nil { + t.Fatal(err) + } + var config struct { + Hooks map[string][]claudeMatcherGroup `json:"hooks"` + Permissions json.RawMessage `json:"permissions"` + } + if err := json.Unmarshal(data, &config); err != nil { + t.Fatal(err) + } + if config.Hooks == nil { + t.Fatalf("hooks object missing: %s", data) + } + + // Every command in the embedded template is installed exactly once. + templateHooks, err := claudeEmbeddedHookGroups() + if err != nil { + t.Fatal(err) + } + for event, templateGroups := range templateHooks { + for _, group := range templateGroups { + for _, hook := range group.Hooks { + if got := countClaudeHookCommand(config.Hooks[event], hook.Command); got != 1 { + t.Fatalf("%s command %q count = %d, want 1", event, hook.Command, got) + } + } + } + } + // Existing user hook preserved. + if countClaudeHookCommand(config.Hooks["Stop"], "my own stop hook") != 1 { + t.Fatalf("existing Stop hook not preserved: %#v", config.Hooks["Stop"]) + } + // Unrelated settings preserved. + if len(config.Permissions) == 0 { + t.Fatalf("unrelated settings clobbered: %s", data) + } + // SessionStart carries the required matcher; UserPromptSubmit omits it. + if m := matcherForCommand(config.Hooks["SessionStart"], "better-ao hooks claude-code session-start"); m == nil || *m != "startup" { + t.Fatalf("SessionStart matcher = %v, want startup", m) + } + if m := matcherForCommand(config.Hooks["UserPromptSubmit"], "better-ao hooks claude-code user-prompt-submit"); m != nil { + t.Fatalf("UserPromptSubmit matcher = %v, want none", m) + } +} + +func TestSessionInfoReadsHookMetadata(t *testing.T) { + info, ok, err := (&Plugin{resolvedBinary: "claude"}).SessionInfo(context.Background(), agent.SessionRef{ + WorkspacePath: "/some/path", + Metadata: map[string]string{ + claudeAgentSessionIDMetadataKey: "claude-native-1", + claudeTitleMetadataKey: "Fix login redirect", + claudeSummaryMetadataKey: "Updated the auth callback and tests.", + "ignored": "not returned", + }, + }) + if err != nil || !ok { + t.Fatalf("SessionInfo = (ok=%v, err=%v), want ok", ok, err) + } + if info.AgentSessionID != "claude-native-1" { + t.Fatalf("AgentSessionID = %q", info.AgentSessionID) + } + if info.Title != "Fix login redirect" { + t.Fatalf("Title = %q", info.Title) + } + if info.Summary != "Updated the auth callback and tests." { + t.Fatalf("Summary = %q", info.Summary) + } + if info.Metadata != nil { + t.Fatalf("Metadata = %#v, want nil for Claude", info.Metadata) + } +} + +func TestSessionInfoFalseWhenNoHookMetadata(t *testing.T) { + info, ok, err := (&Plugin{resolvedBinary: "claude"}).SessionInfo(context.Background(), agent.SessionRef{ + WorkspacePath: "/some/path", + Metadata: map[string]string{}, + }) + if err != nil { + t.Fatalf("err = %v", err) + } + if ok { + t.Fatalf("ok = true, want false") + } + if !reflect.DeepEqual(info, agent.SessionInfo{}) { + t.Fatalf("info = %#v, want zero", info) + } +} + +// countClaudeHookCommand counts how many hook entries under one event register +// the given command — used to prove no duplicate Better-AO hooks. +func countClaudeHookCommand(groups []claudeMatcherGroup, command string) int { + count := 0 + for _, group := range groups { + for _, hook := range group.Hooks { + if hook.Command == command { + count++ + } + } + } + return count +} + +// matcherForCommand returns the matcher on the group that registers the given +// command (nil if the group has no matcher). +func matcherForCommand(groups []claudeMatcherGroup, command string) *string { + for _, group := range groups { + for _, hook := range group.Hooks { + if hook.Command == command { + return group.Matcher + } + } + } + return nil +} + +func TestGetRestoreCommandReadsAgentSessionID(t *testing.T) { + cmd, ok, err := (&Plugin{resolvedBinary: "claude"}).GetRestoreCommand(context.Background(), agent.RestoreConfig{ + Permissions: agent.PermissionModeBypassPermissions, + Session: agent.SessionRef{ + ID: "sess-r", + Metadata: map[string]string{claudeAgentSessionIDMetadataKey: "claude-native-1"}, + }, + }) + if err != nil || !ok { + t.Fatalf("restore = (ok=%v, err=%v), want ok", ok, err) + } + // The hook-captured native id wins over the derived fallback. + want := []string{"claude", "--permission-mode", "bypassPermissions", "--resume", "claude-native-1"} + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("restore cmd\nwant: %#v\n got: %#v", want, cmd) + } +} + +func TestGetRestoreCommandFallsBackToDerivedUUID(t *testing.T) { + // No agentSessionId captured (pre-hook session) → derive deterministically + // from the better-ao session id, the explicit fallback. + cmd, ok, err := (&Plugin{resolvedBinary: "claude"}).GetRestoreCommand(context.Background(), agent.RestoreConfig{ + Permissions: agent.PermissionModeBypassPermissions, + Session: agent.SessionRef{ID: "sess-r"}, + }) + if err != nil || !ok { + t.Fatalf("restore = (ok=%v, err=%v), want ok", ok, err) + } + want := []string{"claude", "--permission-mode", "bypassPermissions", "--resume", claudeSessionUUID("sess-r")} + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("restore cmd\nwant: %#v\n got: %#v", want, cmd) + } +} + +func TestGetRestoreCommandFalseWithoutSessionID(t *testing.T) { + cases := []struct { + name string + ref agent.SessionRef + }{ + {"empty ref", agent.SessionRef{}}, + {"blank agent session, no id", agent.SessionRef{Metadata: map[string]string{claudeAgentSessionIDMetadataKey: " "}}}, + {"workspace path only", agent.SessionRef{WorkspacePath: "/some/path"}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cmd, ok, err := (&Plugin{resolvedBinary: "claude"}).GetRestoreCommand(context.Background(), + agent.RestoreConfig{Permissions: agent.PermissionModeBypassPermissions, Session: tc.ref}) + if err != nil || ok || cmd != nil { + t.Fatalf("restore = (%#v, %v, %v), want (nil,false,nil)", cmd, ok, err) + } + }) + } +} + +func TestManifestID(t *testing.T) { + if got := New().Manifest().ID; got != "claude-code" { + t.Fatalf("manifest id = %q, want claude-code", got) + } +} + +func TestEnsureWorkspaceTrustedCreatesEntry(t *testing.T) { + dir := t.TempDir() + cfgPath := filepath.Join(dir, ".claude.json") + // Seed an existing config with another project + a top-level key, to + // prove we preserve unrelated state. + seed := `{"userID":"abc","projects":{"/existing/proj":{"hasTrustDialogAccepted":true,"lastCost":1.5}}}` + if err := os.WriteFile(cfgPath, []byte(seed), 0o600); err != nil { + t.Fatal(err) + } + + work := "/Users/me/.better-ao/worktrees/01ABC" + if err := ensureWorkspaceTrusted(cfgPath, work); err != nil { + t.Fatalf("ensureWorkspaceTrusted: %v", err) + } + + root := readJSON(t, cfgPath) + projects := root["projects"].(map[string]any) + + // New entry trusted. + newEntry := projects[work].(map[string]any) + if newEntry["hasTrustDialogAccepted"] != true { + t.Fatalf("new entry not trusted: %#v", newEntry) + } + // Existing project preserved (including its other fields). + existing := projects["/existing/proj"].(map[string]any) + if existing["hasTrustDialogAccepted"] != true || existing["lastCost"].(float64) != 1.5 { + t.Fatalf("existing project clobbered: %#v", existing) + } + // Top-level key preserved. + if root["userID"] != "abc" { + t.Fatalf("top-level key clobbered: %#v", root["userID"]) + } +} + +func TestEnsureWorkspaceTrustedIsIdempotentAndNoWriteWhenAlreadyTrusted(t *testing.T) { + dir := t.TempDir() + cfgPath := filepath.Join(dir, ".claude.json") + work := "/w" + if err := os.WriteFile(cfgPath, []byte(`{"projects":{"/w":{"hasTrustDialogAccepted":true}}}`), 0o600); err != nil { + t.Fatal(err) + } + info1, err := os.Stat(cfgPath) + if err != nil { + t.Fatal(err) + } + + if err := ensureWorkspaceTrusted(cfgPath, work); err != nil { + t.Fatalf("ensureWorkspaceTrusted: %v", err) + } + + // Already trusted → no rewrite → mtime unchanged. + info2, err := os.Stat(cfgPath) + if err != nil { + t.Fatal(err) + } + if !info1.ModTime().Equal(info2.ModTime()) { + t.Fatal("expected no rewrite when already trusted") + } +} + +func TestEnsureWorkspaceTrustedCreatesMissingConfig(t *testing.T) { + dir := t.TempDir() + cfgPath := filepath.Join(dir, ".claude.json") // does not exist yet + work := "/fresh/worktree" + + if err := ensureWorkspaceTrusted(cfgPath, work); err != nil { + t.Fatalf("ensureWorkspaceTrusted: %v", err) + } + + root := readJSON(t, cfgPath) + projects := root["projects"].(map[string]any) + entry := projects[work].(map[string]any) + if entry["hasTrustDialogAccepted"] != true { + t.Fatalf("entry not trusted in freshly-created config: %#v", entry) + } +} + +func readJSON(t *testing.T, path string) map[string]any { + t.Helper() + data, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + var m map[string]any + if err := json.Unmarshal(data, &m); err != nil { + t.Fatalf("parse %s: %v", path, err) + } + return m +} + +func contains(values []string, needle string) bool { + for _, v := range values { + if v == needle { + return true + } + } + return false +} + +func containsSubsequence(values, needle []string) bool { + if len(needle) == 0 { + return true + } + for start := 0; start+len(needle) <= len(values); start++ { + ok := true + for i, w := range needle { + if values[start+i] != w { + ok = false + break + } + } + if ok { + return true + } + } + return false +} diff --git a/backend/internal/plugin/agent/claudecode/hooks.go b/backend/internal/plugin/agent/claudecode/hooks.go new file mode 100644 index 00000000..093fc990 --- /dev/null +++ b/backend/internal/plugin/agent/claudecode/hooks.go @@ -0,0 +1,187 @@ +package claudecode + +import ( + "context" + "embed" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/yyovil/better-ao/internal/plugin/agent" +) + +const ( + claudeSettingsDirName = ".claude" + claudeSettingsFileName = "settings.local.json" + claudeHooksTemplate = ".claude/settings.local.json" +) + +//go:embed .claude/settings.local.json +var claudeHookTemplateFS embed.FS + +type claudeHookFile struct { + Hooks map[string][]claudeMatcherGroup `json:"hooks"` +} + +type claudeMatcherGroup struct { + // Matcher is a pointer so it round-trips exactly: SessionStart requires a + // real matcher ("startup"); UserPromptSubmit/Stop omit it (Claude ignores + // matcher for those events). omitempty drops a nil matcher on write. + Matcher *string `json:"matcher,omitempty"` + Hooks []claudeHookEntry `json:"hooks"` +} + +type claudeHookEntry struct { + Type string `json:"type"` + Command string `json:"command"` + Timeout int `json:"timeout,omitempty"` +} + +// GetAgentHooks installs Better-AO's Claude Code hooks into the worktree-local +// .claude/settings.local.json file (the per-session local settings, not the +// shared .claude/settings.json). The hooks (SessionStart, UserPromptSubmit, +// Stop) report normalized session metadata back into Better-AO's store. Existing +// hooks and unrelated settings are preserved, and duplicate Better-AO commands +// are not appended, so the install is idempotent. +func (p *Plugin) GetAgentHooks(ctx context.Context, cfg agent.WorkspaceHookConfig) error { + if err := ctx.Err(); err != nil { + return err + } + if strings.TrimSpace(cfg.WorkspacePath) == "" { + return errors.New("claude-code.GetAgentHooks: WorkspacePath is required") + } + + settingsPath := filepath.Join(cfg.WorkspacePath, claudeSettingsDirName, claudeSettingsFileName) + // Preserve every top-level setting (permissions, model, …) and every hook + // event we don't touch by keeping them as raw JSON. + topLevel := map[string]json.RawMessage{} + rawHooks := map[string]json.RawMessage{} + + if existingData, err := os.ReadFile(settingsPath); err == nil { + if len(strings.TrimSpace(string(existingData))) > 0 { + if err := json.Unmarshal(existingData, &topLevel); err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: parse %s: %w", settingsPath, err) + } + if hooksRaw, ok := topLevel["hooks"]; ok { + if err := json.Unmarshal(hooksRaw, &rawHooks); err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: parse hooks in %s: %w", settingsPath, err) + } + } + } + } else if !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("claude-code.GetAgentHooks: read %s: %w", settingsPath, err) + } + + templateHooks, err := claudeEmbeddedHookGroups() + if err != nil { + return err + } + for event, templateGroups := range templateHooks { + var existingGroups []claudeMatcherGroup + if err := parseClaudeHookType(rawHooks, event, &existingGroups); err != nil { + return err + } + for _, group := range templateGroups { + for _, hook := range group.Hooks { + if !claudeHookCommandExists(existingGroups, hook.Command) { + existingGroups = addClaudeHook(existingGroups, hook, group.Matcher) + } + } + } + if err := marshalClaudeHookType(rawHooks, event, existingGroups); err != nil { + return err + } + } + + hooksJSON, err := json.Marshal(rawHooks) + if err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: encode hooks: %w", err) + } + topLevel["hooks"] = hooksJSON + + if err := os.MkdirAll(filepath.Dir(settingsPath), 0o750); err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: create settings dir: %w", err) + } + data, err := json.MarshalIndent(topLevel, "", " ") + if err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: encode %s: %w", settingsPath, err) + } + data = append(data, '\n') + if err := os.WriteFile(settingsPath, data, 0o600); err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: write %s: %w", settingsPath, err) + } + return nil +} + +func claudeEmbeddedHookGroups() (map[string][]claudeMatcherGroup, error) { + data, err := claudeHookTemplateFS.ReadFile(claudeHooksTemplate) + if err != nil { + return nil, fmt.Errorf("claude-code.GetAgentHooks: read embedded %s: %w", claudeHooksTemplate, err) + } + var file claudeHookFile + if err := json.Unmarshal(data, &file); err != nil { + return nil, fmt.Errorf("claude-code.GetAgentHooks: parse embedded %s: %w", claudeHooksTemplate, err) + } + if file.Hooks == nil { + return map[string][]claudeMatcherGroup{}, nil + } + return file.Hooks, nil +} + +func parseClaudeHookType(rawHooks map[string]json.RawMessage, event string, target *[]claudeMatcherGroup) error { + data, ok := rawHooks[event] + if !ok { + return nil + } + if err := json.Unmarshal(data, target); err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: parse %s hooks: %w", event, err) + } + return nil +} + +func marshalClaudeHookType(rawHooks map[string]json.RawMessage, event string, groups []claudeMatcherGroup) error { + if len(groups) == 0 { + delete(rawHooks, event) + return nil + } + data, err := json.Marshal(groups) + if err != nil { + return fmt.Errorf("claude-code.GetAgentHooks: encode %s hooks: %w", event, err) + } + rawHooks[event] = data + return nil +} + +func claudeHookCommandExists(groups []claudeMatcherGroup, command string) bool { + for _, group := range groups { + for _, hook := range group.Hooks { + if hook.Command == command { + return true + } + } + } + return false +} + +// addClaudeHook appends hook to an existing group with the same matcher (so a +// SessionStart hook lands under its "startup" matcher), creating that group if +// none matches. +func addClaudeHook(groups []claudeMatcherGroup, hook claudeHookEntry, matcher *string) []claudeMatcherGroup { + for i, group := range groups { + if matchersEqual(group.Matcher, matcher) { + groups[i].Hooks = append(groups[i].Hooks, hook) + return groups + } + } + return append(groups, claudeMatcherGroup{Matcher: matcher, Hooks: []claudeHookEntry{hook}}) +} + +func matchersEqual(a, b *string) bool { + if a == nil || b == nil { + return a == nil && b == nil + } + return *a == *b +} diff --git a/backend/internal/plugin/agent/codex/.codex/hooks.json b/backend/internal/plugin/agent/codex/.codex/hooks.json new file mode 100644 index 00000000..aaf1660b --- /dev/null +++ b/backend/internal/plugin/agent/codex/.codex/hooks.json @@ -0,0 +1,40 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": null, + "hooks": [ + { + "type": "command", + "command": "better-ao hooks codex session-start", + "timeout": 30 + } + ] + } + ], + "UserPromptSubmit": [ + { + "matcher": null, + "hooks": [ + { + "type": "command", + "command": "better-ao hooks codex user-prompt-submit", + "timeout": 30 + } + ] + } + ], + "Stop": [ + { + "matcher": null, + "hooks": [ + { + "type": "command", + "command": "better-ao hooks codex stop", + "timeout": 30 + } + ] + } + ] + } +} diff --git a/backend/internal/plugin/agent/codex/codex.go b/backend/internal/plugin/agent/codex/codex.go new file mode 100644 index 00000000..d451517d --- /dev/null +++ b/backend/internal/plugin/agent/codex/codex.go @@ -0,0 +1,244 @@ +// Package codex implements the Codex agent plugin: launching new sessions, +// resuming hook-tracked sessions, installing workspace-local hooks, and reading +// hook-derived session info. +// +// Better-AO-managed sessions derive native session identity and display +// metadata from Codex hooks instead of transcript/cache scans. +package codex + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "sync" + + "github.com/yyovil/better-ao/internal/plugin" + "github.com/yyovil/better-ao/internal/plugin/agent" + "github.com/yyovil/better-ao/internal/utils" +) + +const ( + codexAgentSessionIDMetadataKey = "agentSessionId" + codexTitleMetadataKey = "title" + codexSummaryMetadataKey = "summary" +) + +type Plugin struct { + binaryMu sync.Mutex + resolvedBinary string +} + +func New() *Plugin { + return &Plugin{} +} + +var _ plugin.Plugin = (*Plugin)(nil) +var _ agent.Agent = (*Plugin)(nil) + +func (p *Plugin) Manifest() plugin.Manifest { + return plugin.Manifest{ + ID: "codex", + Name: "Codex", + Description: "Run Codex worker sessions.", + Version: "0.0.1", + Capabilities: []plugin.Capability{ + plugin.CapabilityAgent, + }, + } +} + +func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { + if err := ctx.Err(); err != nil { + return agent.ConfigSpec{}, err + } + return agent.ConfigSpec{}, nil +} + +func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) (cmd []string, err error) { + binary, err := p.codexBinary(ctx) + if err != nil { + return nil, err + } + + cmd = []string{binary} + appendNoUpdateCheckFlag(&cmd) + appendApprovalFlags(&cmd, cfg.Permissions) + + if cfg.SystemPromptFile != "" { + cmd = append(cmd, "-c", "model_instructions_file="+cfg.SystemPromptFile) + } else if cfg.SystemPrompt != "" { + cmd = append(cmd, "-c", "developer_instructions="+cfg.SystemPrompt) + } + + if cfg.Prompt != "" { + cmd = append(cmd, "--", cfg.Prompt) + } + + return cmd, nil +} + +func (p *Plugin) GetPromptDeliveryStrategy(ctx context.Context, cfg agent.LaunchConfig) (agent.PromptDeliveryStrategy, error) { + if err := ctx.Err(); err != nil { + return "", err + } + return agent.PromptDeliveryInCommand, nil +} + +// GetRestoreCommand rebuilds the argv that continues an existing Codex +// session: `codex resume `. ok is false when the hook-derived +// native session id has not landed yet, so callers can fall back to fresh +// launch behavior. +func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg agent.RestoreConfig) (cmd []string, ok bool, err error) { + if err := ctx.Err(); err != nil { + return nil, false, err + } + agentSessionID := strings.TrimSpace(cfg.Session.Metadata[codexAgentSessionIDMetadataKey]) + if agentSessionID == "" { + return nil, false, nil + } + + binary, err := p.codexBinary(ctx) + if err != nil { + return nil, false, err + } + + cmd = []string{binary, "resume"} + appendNoUpdateCheckFlag(&cmd) + appendApprovalFlags(&cmd, cfg.Permissions) + cmd = append(cmd, agentSessionID) + return cmd, true, nil +} + +// SessionInfo surfaces Codex hook-derived metadata. Metadata is intentionally +// nil for Codex: callers get the normalized fields directly. +func (p *Plugin) SessionInfo(ctx context.Context, session agent.SessionRef) (agent.SessionInfo, bool, error) { + if err := ctx.Err(); err != nil { + return agent.SessionInfo{}, false, err + } + info := agent.SessionInfo{ + AgentSessionID: session.Metadata[codexAgentSessionIDMetadataKey], + Title: session.Metadata[codexTitleMetadataKey], + Summary: session.Metadata[codexSummaryMetadataKey], + } + if info.AgentSessionID == "" && info.Title == "" && info.Summary == "" { + return agent.SessionInfo{}, false, nil + } + return info, true, nil +} + +// ResolveCodexBinary returns the path to the codex binary on this machine, +// searching PATH then a handful of well-known install locations +// (Homebrew, Cargo, npm global). Returns "codex" as a last-ditch fallback +// so callers see a clear "command not found" rather than an empty argv. +func ResolveCodexBinary(ctx context.Context) (string, error) { + if err := ctx.Err(); err != nil { + return "", err + } + + if runtime.GOOS == "windows" { + for _, name := range []string{"codex.cmd", "codex.exe", "codex"} { + path, err := exec.LookPath(name) + if err == nil && path != "" { + return path, nil + } + if err := ctx.Err(); err != nil { + return "", err + } + } + + candidates := []string{} + if appData := os.Getenv("APPDATA"); appData != "" { + candidates = append(candidates, + filepath.Join(appData, "npm", "codex.cmd"), + filepath.Join(appData, "npm", "codex.exe"), + ) + } + if home, err := os.UserHomeDir(); err == nil { + candidates = append(candidates, filepath.Join(home, ".cargo", "bin", "codex.exe")) + } + for _, candidate := range candidates { + if utils.FileExists(candidate) { + return candidate, nil + } + if err := ctx.Err(); err != nil { + return "", err + } + } + + return "codex", nil + } + + if path, err := exec.LookPath("codex"); err == nil && path != "" { + return path, nil + } + + candidates := []string{ + "/usr/local/bin/codex", + "/opt/homebrew/bin/codex", + } + if home, err := os.UserHomeDir(); err == nil { + candidates = append(candidates, + filepath.Join(home, ".cargo", "bin", "codex"), + filepath.Join(home, ".npm", "bin", "codex"), + ) + } + + for _, candidate := range candidates { + if utils.FileExists(candidate) { + return candidate, nil + } + if err := ctx.Err(); err != nil { + return "", err + } + } + + return "codex", nil +} + +func (p *Plugin) codexBinary(ctx context.Context) (string, error) { + p.binaryMu.Lock() + defer p.binaryMu.Unlock() + + if p.resolvedBinary != "" { + return p.resolvedBinary, nil + } + + binary, err := ResolveCodexBinary(ctx) + if err != nil { + return "", err + } + p.resolvedBinary = binary + return binary, nil +} + +func appendNoUpdateCheckFlag(cmd *[]string) { + *cmd = append(*cmd, "-c", "check_for_update_on_startup=false") +} + +func appendApprovalFlags(cmd *[]string, permissions agent.PermissionMode) { + switch normalizePermissionMode(permissions) { + case agent.PermissionModeDefault: + // No flag: defer to the user's Codex config/default behavior. + case agent.PermissionModeAcceptEdits: + *cmd = append(*cmd, "--ask-for-approval", "on-request") + case agent.PermissionModeAuto: + *cmd = append(*cmd, "--ask-for-approval", "on-request", "-c", `approvals_reviewer="auto_review"`) + case agent.PermissionModeBypassPermissions: + *cmd = append(*cmd, "--dangerously-bypass-approvals-and-sandbox") + } +} + +func normalizePermissionMode(mode agent.PermissionMode) agent.PermissionMode { + switch mode { + case agent.PermissionModeDefault, + agent.PermissionModeAcceptEdits, + agent.PermissionModeAuto, + agent.PermissionModeBypassPermissions: + return mode + default: + return agent.PermissionModeDefault + } +} diff --git a/backend/internal/plugin/agent/codex/codex_test.go b/backend/internal/plugin/agent/codex/codex_test.go new file mode 100644 index 00000000..24dcf425 --- /dev/null +++ b/backend/internal/plugin/agent/codex/codex_test.go @@ -0,0 +1,335 @@ +package codex + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + + "github.com/yyovil/better-ao/internal/plugin/agent" +) + +func TestGetLaunchCommandBuildsCrossPlatformArgv(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + cmd, err := plugin.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + Permissions: agent.PermissionModeBypassPermissions, + Prompt: "-fix this", + SystemPromptFile: filepath.Join("tmp", "prompt with spaces.md"), + SystemPrompt: "ignored", + }) + if err != nil { + t.Fatal(err) + } + + want := []string{ + "codex", + "-c", "check_for_update_on_startup=false", + "--dangerously-bypass-approvals-and-sandbox", + "-c", "model_instructions_file=" + filepath.Join("tmp", "prompt with spaces.md"), + "--", "-fix this", + } + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("unexpected command\nwant: %#v\n got: %#v", want, cmd) + } +} + +func TestGetLaunchCommandMapsApprovalModes(t *testing.T) { + tests := []struct { + name string + permission agent.PermissionMode + want []string + notExpected string + }{ + { + name: "default", + permission: agent.PermissionModeDefault, + notExpected: "--ask-for-approval", + }, + { + name: "accept-edits", + permission: agent.PermissionModeAcceptEdits, + want: []string{"--ask-for-approval", "on-request"}, + }, + { + name: "auto", + permission: agent.PermissionModeAuto, + want: []string{"--ask-for-approval", "on-request", "-c", `approvals_reviewer="auto_review"`}, + }, + { + name: "bypass-permissions", + permission: agent.PermissionModeBypassPermissions, + want: []string{"--dangerously-bypass-approvals-and-sandbox"}, + }, + { + name: "empty", + permission: "", + notExpected: "--ask-for-approval", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + cmd, err := plugin.GetLaunchCommand(context.Background(), agent.LaunchConfig{ + Permissions: tt.permission, + }) + if err != nil { + t.Fatal(err) + } + if len(tt.want) > 0 && !containsSubsequence(cmd, tt.want) { + t.Fatalf("command %#v does not contain %#v", cmd, tt.want) + } + if tt.notExpected != "" && contains(cmd, tt.notExpected) { + t.Fatalf("command %#v contains %q", cmd, tt.notExpected) + } + }) + } +} + +func TestGetPromptDeliveryStrategyIsInCommand(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + got, err := plugin.GetPromptDeliveryStrategy(context.Background(), agent.LaunchConfig{}) + if err != nil { + t.Fatal(err) + } + if got != agent.PromptDeliveryInCommand { + t.Fatalf("unexpected strategy: %q", got) + } +} + +func TestGetConfigSpecHasNoCustomFieldsYet(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + spec, err := plugin.GetConfigSpec(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(spec.Fields) != 0 { + t.Fatalf("unexpected config fields: %#v", spec.Fields) + } +} + +func TestGetAgentHooksInstallsCodexHooks(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + workspace := t.TempDir() + hooksDir := filepath.Join(workspace, ".codex") + if err := os.MkdirAll(hooksDir, 0o755); err != nil { + t.Fatal(err) + } + hooksPath := filepath.Join(hooksDir, "hooks.json") + existing := `{"hooks":{"Stop":[{"matcher":null,"hooks":[{"type":"command","command":"custom stop hook","timeout":3}]}]}}` + if err := os.WriteFile(hooksPath, []byte(existing), 0o644); err != nil { + t.Fatal(err) + } + + cfg := agent.WorkspaceHookConfig{ + DataDir: t.TempDir(), + SessionID: "sess-1", + WorkspacePath: workspace, + } + if err := plugin.GetAgentHooks(context.Background(), cfg); err != nil { + t.Fatal(err) + } + // A second install must not duplicate Better-AO hook commands. + if err := plugin.GetAgentHooks(context.Background(), cfg); err != nil { + t.Fatal(err) + } + + data, err := os.ReadFile(hooksPath) + if err != nil { + t.Fatal(err) + } + var config codexHookFile + if err := json.Unmarshal(data, &config); err != nil { + t.Fatal(err) + } + if config.Hooks == nil { + t.Fatalf("hooks config missing hooks object: %#v", config) + } + templateHooks, err := codexEmbeddedHookGroups() + if err != nil { + t.Fatal(err) + } + for event, templateGroups := range templateHooks { + entries := config.Hooks[event] + for _, templateGroup := range templateGroups { + for _, hook := range templateGroup.Hooks { + count := countCodexHookCommand(entries, hook.Command) + if count != 1 { + t.Fatalf("%s command count = %d, want 1 in %#v", event, count, entries) + } + } + } + } + stopEntries := config.Hooks["Stop"] + if countCodexHookCommand(stopEntries, "custom stop hook") != 1 { + t.Fatalf("existing Stop hook was not preserved: %#v", stopEntries) + } + + configData, err := os.ReadFile(filepath.Join(workspace, ".codex", "config.toml")) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(configData), codexHooksFeatureLine) { + t.Fatalf("config.toml missing hooks feature flag: %s", configData) + } +} + +func TestGetRestoreCommandReadsAgentSessionID(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + cmd, ok, err := plugin.GetRestoreCommand(context.Background(), agent.RestoreConfig{ + Permissions: agent.PermissionModeAuto, + Session: agent.SessionRef{ + Metadata: map[string]string{codexAgentSessionIDMetadataKey: "thread-123"}, + }, + }) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + if !ok { + t.Fatal("ok = false, want true") + } + want := []string{ + "codex", + "resume", + "-c", "check_for_update_on_startup=false", + "--ask-for-approval", "on-request", + "-c", `approvals_reviewer="auto_review"`, + "thread-123", + } + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("restore cmd\nwant: %#v\n got: %#v", want, cmd) + } +} + +func TestGetRestoreCommandFalseWithoutAgentSessionID(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + cases := []struct { + name string + ref agent.SessionRef + }{ + {"empty session ref", agent.SessionRef{}}, + {"empty metadata", agent.SessionRef{Metadata: map[string]string{}}}, + {"blank agent session metadata", agent.SessionRef{Metadata: map[string]string{codexAgentSessionIDMetadataKey: " "}}}, + {"workspace path only", agent.SessionRef{WorkspacePath: "/some/path"}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cmd, ok, err := plugin.GetRestoreCommand(context.Background(), agent.RestoreConfig{ + Permissions: agent.PermissionModeAuto, + Session: tc.ref, + }) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + if ok { + t.Fatalf("ok = true, want false") + } + if cmd != nil { + t.Fatalf("cmd = %#v, want nil", cmd) + } + }) + } +} + +func TestSessionInfoReadsHookMetadata(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + info, ok, err := plugin.SessionInfo(context.Background(), agent.SessionRef{ + WorkspacePath: "/some/path", + Metadata: map[string]string{ + codexAgentSessionIDMetadataKey: "thread-123", + codexTitleMetadataKey: "Fix login redirect", + codexSummaryMetadataKey: "Updated the auth callback and tests.", + "ignored": "not returned", + }, + }) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + if !ok { + t.Fatalf("ok = false, want true") + } + if info.AgentSessionID != "thread-123" { + t.Fatalf("AgentSessionID = %q, want native id", info.AgentSessionID) + } + if info.Title != "Fix login redirect" { + t.Fatalf("Title = %q, want hook title", info.Title) + } + if info.Summary != "Updated the auth callback and tests." { + t.Fatalf("Summary = %q, want hook summary", info.Summary) + } + if info.Metadata != nil { + t.Fatalf("Metadata = %#v, want nil for Codex", info.Metadata) + } +} + +func TestSessionInfoFalseWhenNoHookMetadata(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + info, ok, err := plugin.SessionInfo(context.Background(), agent.SessionRef{ + WorkspacePath: "/some/path", + Metadata: map[string]string{}, + }) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + if ok { + t.Fatalf("ok = true, want false") + } + if !reflect.DeepEqual(info, agent.SessionInfo{}) { + t.Fatalf("info = %#v, want zero value", info) + } +} + +func contains(values []string, needle string) bool { + for _, value := range values { + if value == needle { + return true + } + } + return false +} + +func containsSubsequence(values []string, needle []string) bool { + if len(needle) == 0 { + return true + } + + for start := range values { + if start+len(needle) > len(values) { + return false + } + ok := true + for offset, want := range needle { + if values[start+offset] != want { + ok = false + break + } + } + if ok { + return true + } + } + + return false +} + +func countCodexHookCommand(entries []codexMatcherGroup, command string) int { + count := 0 + for _, entry := range entries { + for _, hook := range entry.Hooks { + if hook.Command == command { + count++ + } + } + } + return count +} diff --git a/backend/internal/plugin/agent/codex/hooks.go b/backend/internal/plugin/agent/codex/hooks.go new file mode 100644 index 00000000..dbcf568f --- /dev/null +++ b/backend/internal/plugin/agent/codex/hooks.go @@ -0,0 +1,236 @@ +package codex + +import ( + "context" + "embed" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/yyovil/better-ao/internal/plugin/agent" +) + +const ( + codexHooksDirName = ".codex" + codexHooksFileName = "hooks.json" + codexHooksTemplate = ".codex/hooks.json" + + codexConfigFileName = "config.toml" + codexHooksFeatureLine = "hooks = true" + codexLegacyHookFeatureLine = "codex_hooks = true" +) + +//go:embed .codex/hooks.json +var codexHookTemplateFS embed.FS + +type codexHookFile struct { + Hooks map[string][]codexMatcherGroup `json:"hooks"` +} + +type codexMatcherGroup struct { + Matcher *string `json:"matcher"` + Hooks []codexHookEntry `json:"hooks"` +} + +type codexHookEntry struct { + Type string `json:"type"` + Command string `json:"command"` + Timeout int `json:"timeout,omitempty"` +} + +// GetAgentHooks installs Better-AO's Codex hooks into the worktree-local +// .codex/hooks.json file. Existing hook entries are preserved and duplicate +// Better-AO commands are not appended. +func (p *Plugin) GetAgentHooks(ctx context.Context, cfg agent.WorkspaceHookConfig) error { + if err := ctx.Err(); err != nil { + return err + } + if strings.TrimSpace(cfg.WorkspacePath) == "" { + return errors.New("codex.GetAgentHooks: WorkspacePath is required") + } + + hooksPath := filepath.Join(cfg.WorkspacePath, codexHooksDirName, codexHooksFileName) + topLevel := map[string]json.RawMessage{} + rawHooks := map[string]json.RawMessage{} + + if existingData, err := os.ReadFile(hooksPath); err == nil { + if len(strings.TrimSpace(string(existingData))) > 0 { + if err := json.Unmarshal(existingData, &topLevel); err != nil { + return fmt.Errorf("codex.GetAgentHooks: parse %s: %w", hooksPath, err) + } + if hooksRaw, ok := topLevel["hooks"]; ok { + if err := json.Unmarshal(hooksRaw, &rawHooks); err != nil { + return fmt.Errorf("codex.GetAgentHooks: parse hooks in %s: %w", hooksPath, err) + } + } + } + } else if !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("codex.GetAgentHooks: read %s: %w", hooksPath, err) + } + + templateHooks, err := codexEmbeddedHookGroups() + if err != nil { + return err + } + for event, templateGroups := range templateHooks { + var existingGroups []codexMatcherGroup + if err := parseCodexHookType(rawHooks, event, &existingGroups); err != nil { + return err + } + for _, group := range templateGroups { + for _, hook := range group.Hooks { + if !codexHookCommandExists(existingGroups, hook.Command) { + existingGroups = addCodexHook(existingGroups, hook) + } + } + } + if err := marshalCodexHookType(rawHooks, event, existingGroups); err != nil { + return err + } + } + + hooksJSON, err := json.Marshal(rawHooks) + if err != nil { + return fmt.Errorf("codex.GetAgentHooks: encode hooks: %w", err) + } + topLevel["hooks"] = hooksJSON + + if err := os.MkdirAll(filepath.Dir(hooksPath), 0o750); err != nil { + return fmt.Errorf("codex.GetAgentHooks: create hook dir: %w", err) + } + data, err := json.MarshalIndent(topLevel, "", " ") + if err != nil { + return fmt.Errorf("codex.GetAgentHooks: encode %s: %w", hooksPath, err) + } + data = append(data, '\n') + if err := os.WriteFile(hooksPath, data, 0o600); err != nil { + return fmt.Errorf("codex.GetAgentHooks: write %s: %w", hooksPath, err) + } + + if err := ensureCodexHooksFeatureEnabled(cfg.WorkspacePath); err != nil { + return fmt.Errorf("codex.GetAgentHooks: enable hooks feature: %w", err) + } + return nil +} + +func codexEmbeddedHookGroups() (map[string][]codexMatcherGroup, error) { + data, err := codexHookTemplateFS.ReadFile(codexHooksTemplate) + if err != nil { + return nil, fmt.Errorf("codex.GetAgentHooks: read embedded %s: %w", codexHooksTemplate, err) + } + var file codexHookFile + if err := json.Unmarshal(data, &file); err != nil { + return nil, fmt.Errorf("codex.GetAgentHooks: parse embedded %s: %w", codexHooksTemplate, err) + } + if file.Hooks == nil { + return map[string][]codexMatcherGroup{}, nil + } + return file.Hooks, nil +} + +func parseCodexHookType(rawHooks map[string]json.RawMessage, event string, target *[]codexMatcherGroup) error { + data, ok := rawHooks[event] + if !ok { + return nil + } + if err := json.Unmarshal(data, target); err != nil { + return fmt.Errorf("codex.GetAgentHooks: parse %s hooks: %w", event, err) + } + return nil +} + +func marshalCodexHookType(rawHooks map[string]json.RawMessage, event string, groups []codexMatcherGroup) error { + if len(groups) == 0 { + delete(rawHooks, event) + return nil + } + data, err := json.Marshal(groups) + if err != nil { + return fmt.Errorf("codex.GetAgentHooks: encode %s hooks: %w", event, err) + } + rawHooks[event] = data + return nil +} + +func codexHookCommandExists(groups []codexMatcherGroup, command string) bool { + for _, group := range groups { + for _, hook := range group.Hooks { + if hook.Command == command { + return true + } + } + } + return false +} + +func addCodexHook(groups []codexMatcherGroup, hook codexHookEntry) []codexMatcherGroup { + for i, group := range groups { + if group.Matcher == nil { + groups[i].Hooks = append(groups[i].Hooks, hook) + return groups + } + } + return append(groups, codexMatcherGroup{ + Matcher: nil, + Hooks: []codexHookEntry{hook}, + }) +} + +func ensureCodexHooksFeatureEnabled(workspacePath string) error { + configPath := filepath.Join(workspacePath, codexHooksDirName, codexConfigFileName) + data, err := os.ReadFile(configPath) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("read config.toml: %w", err) + } + + content := string(data) + hasNew := containsCodexFeatureLine(content, codexHooksFeatureLine) + hasLegacy := containsCodexFeatureLine(content, codexLegacyHookFeatureLine) + switch { + case hasNew && hasLegacy: + content = stripCodexLegacyHookFeatureLine(content) + case hasNew: + return nil + case hasLegacy: + content = strings.Replace(content, codexLegacyHookFeatureLine, codexHooksFeatureLine, 1) + case strings.Contains(content, "[features]"): + content = strings.Replace(content, "[features]", "[features]\n"+codexHooksFeatureLine, 1) + default: + if len(content) > 0 && !strings.HasSuffix(content, "\n") { + content += "\n" + } + content += "\n[features]\n" + codexHooksFeatureLine + "\n" + } + + if err := os.MkdirAll(filepath.Dir(configPath), 0o750); err != nil { + return fmt.Errorf("create .codex directory: %w", err) + } + if err := os.WriteFile(configPath, []byte(content), 0o600); err != nil { + return fmt.Errorf("write config.toml: %w", err) + } + return nil +} + +func containsCodexFeatureLine(content string, line string) bool { + for raw := range strings.SplitSeq(content, "\n") { + if strings.TrimSpace(raw) == line { + return true + } + } + return false +} + +func stripCodexLegacyHookFeatureLine(content string) string { + idx := strings.Index(content, codexLegacyHookFeatureLine) + if idx < 0 { + return content + } + end := idx + len(codexLegacyHookFeatureLine) + if end < len(content) && content[end] == '\n' { + end++ + } + return content[:idx] + content[end:] +} diff --git a/backend/internal/plugin/plugin.go b/backend/internal/plugin/plugin.go new file mode 100644 index 00000000..623a0724 --- /dev/null +++ b/backend/internal/plugin/plugin.go @@ -0,0 +1,68 @@ +package plugin + +import ( + "fmt" + "sort" +) + +type Capability string + +const ( + CapabilityAgent Capability = "agent" + CapabilityIssueTracker Capability = "issue-tracker" +) + +type Manifest struct { + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Version string `json:"version"` + Capabilities []Capability `json:"capabilities"` +} + +type Plugin interface { + Manifest() Manifest +} + +type Registry struct { + plugins map[string]Plugin +} + +func NewRegistry() *Registry { + return &Registry{ + plugins: make(map[string]Plugin), + } +} + +func (r *Registry) Register(plugin Plugin) error { + manifest := plugin.Manifest() + if manifest.ID == "" { + return fmt.Errorf("plugin id is required") + } + if _, exists := r.plugins[manifest.ID]; exists { + return fmt.Errorf("plugin %q is already registered", manifest.ID) + } + + r.plugins[manifest.ID] = plugin + return nil +} + +// Get returns the registered plugin with the given id, or nil and false +// when no such plugin exists. +func (r *Registry) Get(id string) (Plugin, bool) { + p, ok := r.plugins[id] + return p, ok +} + +func (r *Registry) Manifests() []Manifest { + manifests := make([]Manifest, 0, len(r.plugins)) + for _, plugin := range r.plugins { + manifests = append(manifests, plugin.Manifest()) + } + + sort.Slice(manifests, func(i, j int) bool { + return manifests[i].ID < manifests[j].ID + }) + + return manifests +} diff --git a/prds/plugins/agents/PRD.md b/prds/plugins/agents/PRD.md new file mode 100644 index 00000000..a9fe3946 --- /dev/null +++ b/prds/plugins/agents/PRD.md @@ -0,0 +1,118 @@ +# Agent Plugin PRD + +## Goal + +Agent plugins let Better-AO run and observe different CLI coding agents without hardcoding agent-specific behavior into the spawn engine. Every CLI coding agent must implement the contract in `internal/plugin/agent/agent.go`. + +The important current slice is hook-derived session info. Better-AO should know a running worker's native agent session id, title, and summary from agent hooks installed in the per-session worktree, not from scanning agent transcript/cache files. + +## Current Decisions + +- Better-AO only needs to derive session info for Better-AO-managed sessions. +- Hook installation happens at worktree/session creation time. +- `SessionInfo` reads normalized metadata persisted in Better-AO's session store. +- `SessionInfo` must not infer display info by reading agent transcript/cache files. +- `SummaryIsFallback` is removed from `agent.SessionInfo`. +- `TranscriptPath` is removed from `agent.SessionInfo`. +- `Title` and `Summary` are both first-class fields. +- `Title` is derived from the user prompt hook. +- `Summary` is derived from the stop/final assistant hook. +- Agent plugin `Metadata` should stay nil/empty unless a plugin has a real extra field that does not belong in the normalized contract. + +## Agent Contract + +The shared contract lives in `internal/plugin/agent/agent.go`. + +Required plugin behavior: + +- `GetConfigSpec` describes user-facing agent config. +- `GetLaunchCommand` builds the native agent command. +- `GetPromptDeliveryStrategy` says whether the prompt is passed in argv or sent after launch. +- `GetAgentHooks` installs or merges Better-AO hooks into the agent's workspace-local hook config. +- `GetRestoreCommand` builds a native resume command when restore is supported. +- `SessionInfo` returns normalized metadata: + - `AgentSessionID` + - `Title` + - `Summary` + - optional plugin-specific `Metadata` + +Implementation layout: + +- Agent-specific hook installation and embedded hook templates should live beside the agent plugin in `internal/plugin/agent//hooks.go`. +- Launch, restore, and session-info behavior can stay in the main agent implementation unless the file grows enough to justify another split. + +## Metadata Keys + +Hook callbacks persist these normalized keys in the session metadata JSON blob: + +- `agentSessionId`: native agent session id. +- `title`: display title, derived from the first user prompt hook for the session. +- `summary`: display summary, derived from the final assistant message exposed to the stop hook. + +The original spawn prompt may remain in metadata as `prompt` for launch/debug fallback, but `title` is the preferred display title once hook metadata lands. + +## Hook Methodology + +Agent plugins install hooks into the worktree-local config owned by the native agent. + +Hook callbacks run through hidden Better-AO CLI commands: + +```text +better-ao hooks +``` + +The callback: + +1. Reads the native hook JSON payload from stdin. +2. Reads the Better-AO session id from `BETTER_AO_SESSION_ID`. +3. Opens `~/.better-ao/state.db`. +4. Merges normalized metadata into the matching session row. +5. Publishes `session.updated` when metadata changed. +6. Prints `{}` and exits 0 for successful no-op cases, including non-AO sessions or missing rows. + +The spawn engine inserts the Better-AO session row before launching the durability provider so early startup hooks can update an existing row. If launch fails after insertion, spawn deletes the row during rollback. + +## Restore Boundary + +Session display info and native restore are separate concerns. + +Some agents may still need transcript-derived or deterministic native ids for `GetRestoreCommand` until restore is redesigned for that agent. Do not remove restore support just because `SessionInfo` stops reading transcripts. + +For `SessionInfo`, transcript/cache files are not an acceptable source of title or summary. + +## UI And Events + +The workspace adapter prefers: + +- `metadata.title` as session title. +- `metadata.summary` as session description. +- `metadata.prompt` only as fallback. + +Hook metadata changes publish `session.updated`. The frontend listens to `session.created`, `session.terminated`, and `session.updated` and invalidates the workspace query. + + +## Acceptance Criteria + +Agent plugin behavior: + +- Agent hook installation preserves user hooks and deduplicates Better-AO hooks. +- Hook callbacks persist native session id, title, and summary. +- `SessionInfo` returns normalized fields from persisted metadata. +- `SessionInfo` does not read transcripts or caches for title/summary. +- Plugin-specific metadata stays nil/empty unless a concrete feature requires it. + +Engine and UI: + +- Spawn installs hooks before launching the native agent. +- The session row exists before launch so hooks can merge metadata. +- Launch failure after row insertion deletes the row. +- Metadata updates publish `session.updated`. +- The dashboard refreshes title/summary without a manual reload. + +Verification: + +```sh +go test ./... +node --test scripts/*.test.mjs +pnpm --filter @better-ao/web lint:ts +``` From 73ed7afaf56b727954a9e07fcc510c5d6a0c9022 Mon Sep 17 00:00:00 2001 From: yyovil Date: Mon, 1 Jun 2026 07:32:09 +0530 Subject: [PATCH 02/17] Move agent adapters under backend adapters --- .envrc | 1 + .gitignore | 3 + .../{plugin => adapters}/agent/agent.go | 15 ++--- .../claudecode/.claude/settings.local.json | 0 .../agent/claudecode/claudecode.go | 34 +++++----- .../agent/claudecode/claudecode_test.go | 2 +- .../agent/claudecode/hooks.go | 2 +- .../agent/codex/.codex/hooks.json | 0 .../{plugin => adapters}/agent/codex/codex.go | 26 ++++---- .../agent/codex/codex_test.go | 2 +- .../{plugin => adapters}/agent/codex/hooks.go | 2 +- .../plugin.go => adapters/registry.go} | 34 +++++----- backend/internal/daemon/daemon.go | 2 +- backend/internal/daemon/lifecycle_wiring.go | 8 +-- backend/internal/ports/inbound.go | 2 +- backend/internal/ports/outbound.go | 2 +- backend/internal/session/manager.go | 2 +- docs/README.md | 3 +- .../agents/PRD.md => docs/agent/README.md | 22 +++---- flake.lock | 61 ++++++++++++++++++ flake.nix | 62 +++++++++++++++++++ 21 files changed, 209 insertions(+), 76 deletions(-) create mode 100644 .envrc rename backend/internal/{plugin => adapters}/agent/agent.go (88%) rename backend/internal/{plugin => adapters}/agent/claudecode/.claude/settings.local.json (100%) rename backend/internal/{plugin => adapters}/agent/claudecode/claudecode.go (95%) rename backend/internal/{plugin => adapters}/agent/claudecode/claudecode_test.go (99%) rename backend/internal/{plugin => adapters}/agent/claudecode/hooks.go (98%) rename backend/internal/{plugin => adapters}/agent/codex/.codex/hooks.json (100%) rename backend/internal/{plugin => adapters}/agent/codex/codex.go (91%) rename backend/internal/{plugin => adapters}/agent/codex/codex_test.go (99%) rename backend/internal/{plugin => adapters}/agent/codex/hooks.go (98%) rename backend/internal/{plugin/plugin.go => adapters/registry.go} (50%) rename prds/plugins/agents/PRD.md => docs/agent/README.md (83%) create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/.envrc b/.envrc new file mode 100644 index 00000000..3550a30f --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index 425b31d7..2f028097 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Node / Electron node_modules/ +.pnpm/ dist/ out/ build/ @@ -9,6 +10,7 @@ yarn-debug.log* yarn-error.log* # Go +.go/ bin/ *.test *.out @@ -27,6 +29,7 @@ session-events.jsonl session-events.jsonl.* # Environment +.direnv/ .env .env.* !.env.example diff --git a/backend/internal/plugin/agent/agent.go b/backend/internal/adapters/agent/agent.go similarity index 88% rename from backend/internal/plugin/agent/agent.go rename to backend/internal/adapters/agent/agent.go index ac70f7bf..25eb9bf9 100644 --- a/backend/internal/plugin/agent/agent.go +++ b/backend/internal/adapters/agent/agent.go @@ -2,11 +2,9 @@ package agent import ( "context" - - "github.com/yyovil/better-ao/internal/config" ) -// Agent defines the behavior every CLI coding agent plugin must provide. +// Agent defines the behavior every CLI coding agent adapter must provide. type Agent interface { // GetConfigSpec describes the agent-specific config keys Better-AO can // expose to users in ~/.better-ao/config.yaml. @@ -32,12 +30,11 @@ type Agent interface { SessionInfo(ctx context.Context, session SessionRef) (info SessionInfo, ok bool, err error) } -// Config contains values loaded from the selected agent's section in -// ~/.better-ao/config.yaml. Agent plugins own validation for their custom keys. -type Config = config.AgentConfig +// Config contains values loaded from the selected agent's config section. +// Agent adapters own validation for their custom keys. +type Config map[string]any -// ConfigSpec describes the agent-specific config keys Better-AO can expose to -// users in ~/.better-ao/config.yaml. +// ConfigSpec describes the agent-specific config keys AO can expose to users. type ConfigSpec struct { Fields []ConfigField } @@ -109,7 +106,7 @@ type SessionInfo struct { type PermissionMode string const ( - // "default" is special: plugins emit no flag for it so the agent resolves + // "default" is special: adapters emit no flag for it so the agent resolves // its starting mode from the user's own config (e.g. Claude's TUI reading // ~/.claude/settings.json defaultMode). PermissionModeDefault PermissionMode = "default" diff --git a/backend/internal/plugin/agent/claudecode/.claude/settings.local.json b/backend/internal/adapters/agent/claudecode/.claude/settings.local.json similarity index 100% rename from backend/internal/plugin/agent/claudecode/.claude/settings.local.json rename to backend/internal/adapters/agent/claudecode/.claude/settings.local.json diff --git a/backend/internal/plugin/agent/claudecode/claudecode.go b/backend/internal/adapters/agent/claudecode/claudecode.go similarity index 95% rename from backend/internal/plugin/agent/claudecode/claudecode.go rename to backend/internal/adapters/agent/claudecode/claudecode.go index ed871cfb..054f32ce 100644 --- a/backend/internal/plugin/agent/claudecode/claudecode.go +++ b/backend/internal/adapters/agent/claudecode/claudecode.go @@ -1,4 +1,4 @@ -// Package claudecode implements the Claude Code agent plugin. +// Package claudecode implements the Claude Code agent adapter. // // It builds the argv to launch `claude` as an interactive session inside a // session's worktree, installs worktree-local hooks that report normalized @@ -27,20 +27,19 @@ import ( "strings" "sync" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" "github.com/google/uuid" - "github.com/yyovil/better-ao/internal/plugin" - "github.com/yyovil/better-ao/internal/plugin/agent" - "github.com/yyovil/better-ao/internal/utils" ) const ( - // pluginID is the registry id and the value users pass to + // adapterID is the registry id and the value users pass to // `better-ao spawn --agent`. - pluginID = "claude-code" + adapterID = "claude-code" // Normalized session-metadata keys the Claude Code hooks persist into the // Better-AO session store and SessionInfo reads back. Shared vocabulary - // with the Codex plugin so the dashboard treats every agent uniformly. + // with the Codex adapter so the dashboard treats every agent uniformly. // agentSessionId is also the preferred restore id. claudeAgentSessionIDMetadataKey = "agentSessionId" claudeTitleMetadataKey = "title" @@ -63,17 +62,17 @@ func New() *Plugin { return &Plugin{} } -var _ plugin.Plugin = (*Plugin)(nil) +var _ adapters.Adapter = (*Plugin)(nil) var _ agent.Agent = (*Plugin)(nil) -func (p *Plugin) Manifest() plugin.Manifest { - return plugin.Manifest{ - ID: pluginID, +func (p *Plugin) Manifest() adapters.Manifest { + return adapters.Manifest{ + ID: adapterID, Name: "Claude Code", Description: "Run Claude Code worker sessions.", Version: "0.0.1", - Capabilities: []plugin.Capability{ - plugin.CapabilityAgent, + Capabilities: []adapters.Capability{ + adapters.CapabilityAgent, }, } } @@ -301,7 +300,7 @@ func ResolveClaudeBinary(ctx context.Context) (string, error) { ) } for _, candidate := range candidates { - if utils.FileExists(candidate) { + if fileExists(candidate) { return candidate, nil } } @@ -324,7 +323,7 @@ func ResolveClaudeBinary(ctx context.Context) (string, error) { ) } for _, candidate := range candidates { - if utils.FileExists(candidate) { + if fileExists(candidate) { return candidate, nil } if err := ctx.Err(); err != nil { @@ -431,3 +430,8 @@ func ensureWorkspaceTrusted(configPath, workspacePath string) error { } return nil } + +func fileExists(path string) bool { + info, err := os.Stat(path) + return err == nil && !info.IsDir() +} diff --git a/backend/internal/plugin/agent/claudecode/claudecode_test.go b/backend/internal/adapters/agent/claudecode/claudecode_test.go similarity index 99% rename from backend/internal/plugin/agent/claudecode/claudecode_test.go rename to backend/internal/adapters/agent/claudecode/claudecode_test.go index 049f99d9..35e55309 100644 --- a/backend/internal/plugin/agent/claudecode/claudecode_test.go +++ b/backend/internal/adapters/agent/claudecode/claudecode_test.go @@ -8,8 +8,8 @@ import ( "reflect" "testing" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" "github.com/google/uuid" - "github.com/yyovil/better-ao/internal/plugin/agent" ) func TestGetLaunchCommandBypassWithPrompt(t *testing.T) { diff --git a/backend/internal/plugin/agent/claudecode/hooks.go b/backend/internal/adapters/agent/claudecode/hooks.go similarity index 98% rename from backend/internal/plugin/agent/claudecode/hooks.go rename to backend/internal/adapters/agent/claudecode/hooks.go index 093fc990..2adf5e60 100644 --- a/backend/internal/plugin/agent/claudecode/hooks.go +++ b/backend/internal/adapters/agent/claudecode/hooks.go @@ -10,7 +10,7 @@ import ( "path/filepath" "strings" - "github.com/yyovil/better-ao/internal/plugin/agent" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" ) const ( diff --git a/backend/internal/plugin/agent/codex/.codex/hooks.json b/backend/internal/adapters/agent/codex/.codex/hooks.json similarity index 100% rename from backend/internal/plugin/agent/codex/.codex/hooks.json rename to backend/internal/adapters/agent/codex/.codex/hooks.json diff --git a/backend/internal/plugin/agent/codex/codex.go b/backend/internal/adapters/agent/codex/codex.go similarity index 91% rename from backend/internal/plugin/agent/codex/codex.go rename to backend/internal/adapters/agent/codex/codex.go index d451517d..a80438f6 100644 --- a/backend/internal/plugin/agent/codex/codex.go +++ b/backend/internal/adapters/agent/codex/codex.go @@ -1,4 +1,4 @@ -// Package codex implements the Codex agent plugin: launching new sessions, +// Package codex implements the Codex agent adapter: launching new sessions, // resuming hook-tracked sessions, installing workspace-local hooks, and reading // hook-derived session info. // @@ -15,9 +15,8 @@ import ( "strings" "sync" - "github.com/yyovil/better-ao/internal/plugin" - "github.com/yyovil/better-ao/internal/plugin/agent" - "github.com/yyovil/better-ao/internal/utils" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" ) const ( @@ -35,17 +34,17 @@ func New() *Plugin { return &Plugin{} } -var _ plugin.Plugin = (*Plugin)(nil) +var _ adapters.Adapter = (*Plugin)(nil) var _ agent.Agent = (*Plugin)(nil) -func (p *Plugin) Manifest() plugin.Manifest { - return plugin.Manifest{ +func (p *Plugin) Manifest() adapters.Manifest { + return adapters.Manifest{ ID: "codex", Name: "Codex", Description: "Run Codex worker sessions.", Version: "0.0.1", - Capabilities: []plugin.Capability{ - plugin.CapabilityAgent, + Capabilities: []adapters.Capability{ + adapters.CapabilityAgent, }, } } @@ -160,7 +159,7 @@ func ResolveCodexBinary(ctx context.Context) (string, error) { candidates = append(candidates, filepath.Join(home, ".cargo", "bin", "codex.exe")) } for _, candidate := range candidates { - if utils.FileExists(candidate) { + if fileExists(candidate) { return candidate, nil } if err := ctx.Err(); err != nil { @@ -187,7 +186,7 @@ func ResolveCodexBinary(ctx context.Context) (string, error) { } for _, candidate := range candidates { - if utils.FileExists(candidate) { + if fileExists(candidate) { return candidate, nil } if err := ctx.Err(); err != nil { @@ -242,3 +241,8 @@ func normalizePermissionMode(mode agent.PermissionMode) agent.PermissionMode { return agent.PermissionModeDefault } } + +func fileExists(path string) bool { + info, err := os.Stat(path) + return err == nil && !info.IsDir() +} diff --git a/backend/internal/plugin/agent/codex/codex_test.go b/backend/internal/adapters/agent/codex/codex_test.go similarity index 99% rename from backend/internal/plugin/agent/codex/codex_test.go rename to backend/internal/adapters/agent/codex/codex_test.go index 24dcf425..9dd1b3ac 100644 --- a/backend/internal/plugin/agent/codex/codex_test.go +++ b/backend/internal/adapters/agent/codex/codex_test.go @@ -9,7 +9,7 @@ import ( "strings" "testing" - "github.com/yyovil/better-ao/internal/plugin/agent" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" ) func TestGetLaunchCommandBuildsCrossPlatformArgv(t *testing.T) { diff --git a/backend/internal/plugin/agent/codex/hooks.go b/backend/internal/adapters/agent/codex/hooks.go similarity index 98% rename from backend/internal/plugin/agent/codex/hooks.go rename to backend/internal/adapters/agent/codex/hooks.go index dbcf568f..15ec6cc6 100644 --- a/backend/internal/plugin/agent/codex/hooks.go +++ b/backend/internal/adapters/agent/codex/hooks.go @@ -10,7 +10,7 @@ import ( "path/filepath" "strings" - "github.com/yyovil/better-ao/internal/plugin/agent" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" ) const ( diff --git a/backend/internal/plugin/plugin.go b/backend/internal/adapters/registry.go similarity index 50% rename from backend/internal/plugin/plugin.go rename to backend/internal/adapters/registry.go index 623a0724..68e5e275 100644 --- a/backend/internal/plugin/plugin.go +++ b/backend/internal/adapters/registry.go @@ -1,4 +1,4 @@ -package plugin +package adapters import ( "fmt" @@ -20,44 +20,44 @@ type Manifest struct { Capabilities []Capability `json:"capabilities"` } -type Plugin interface { +type Adapter interface { Manifest() Manifest } type Registry struct { - plugins map[string]Plugin + adapters map[string]Adapter } func NewRegistry() *Registry { return &Registry{ - plugins: make(map[string]Plugin), + adapters: make(map[string]Adapter), } } -func (r *Registry) Register(plugin Plugin) error { - manifest := plugin.Manifest() +func (r *Registry) Register(adapter Adapter) error { + manifest := adapter.Manifest() if manifest.ID == "" { - return fmt.Errorf("plugin id is required") + return fmt.Errorf("adapter id is required") } - if _, exists := r.plugins[manifest.ID]; exists { - return fmt.Errorf("plugin %q is already registered", manifest.ID) + if _, exists := r.adapters[manifest.ID]; exists { + return fmt.Errorf("adapter %q is already registered", manifest.ID) } - r.plugins[manifest.ID] = plugin + r.adapters[manifest.ID] = adapter return nil } -// Get returns the registered plugin with the given id, or nil and false -// when no such plugin exists. -func (r *Registry) Get(id string) (Plugin, bool) { - p, ok := r.plugins[id] +// Get returns the registered adapter with the given id, or nil and false +// when no such adapter exists. +func (r *Registry) Get(id string) (Adapter, bool) { + p, ok := r.adapters[id] return p, ok } func (r *Registry) Manifests() []Manifest { - manifests := make([]Manifest, 0, len(r.plugins)) - for _, plugin := range r.plugins { - manifests = append(manifests, plugin.Manifest()) + manifests := make([]Manifest, 0, len(r.adapters)) + for _, adapter := range r.adapters { + manifests = append(manifests, adapter.Manifest()) } sort.Slice(manifests, func(i, j int) bool { diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index 3cb4f45c..01170a5a 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -43,7 +43,7 @@ func Run() error { // fans events out to the SSE transport. The LCM/Session Manager and the HTTP // API routes that drive and read this store are owned by the daemon lane and // are wired there once their collaborators (Notifier, AgentMessenger, and the - // runtime/agent/workspace plugins) have production implementations; here we + // runtime/agent/workspace adapters) have production implementations; here we // stand up the persistence + change-delivery foundation they build on. store, err := sqlite.Open(cfg.DataDir) if err != nil { diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 65308f0e..3e7961ae 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -33,9 +33,9 @@ type lifecycleStack struct { // The goroutine stops when ctx is cancelled; Stop waits for it to drain. // // TEMPORARY STUBS (replace as the daemon lane lands the collaborators): -// - noopMessenger — swap for the runtime/agent-plugin-backed AgentMessenger. +// - noopMessenger — swap for the runtime/agent-adapter-backed AgentMessenger. // - reaper.MapRegistry{} — empty runtime registry, so the reaper ticks -// escalations but probes nothing until the runtime plugins exist. +// escalations but probes nothing until the runtime adapters exist. func startLifecycle(ctx context.Context, store *sqlite.Store, logger *slog.Logger) *lifecycleStack { renderer := notification.NewRenderer(store) notifier := notification.NewEnqueuer(store, renderer, logger) @@ -62,7 +62,7 @@ type sessionStack struct { // daemon lane (#10). Returning the SM here lets main hold the wired-but-quiet // instance so future route wiring is a one-line plumb-through. func startSession(ctx context.Context, cfg config.Config, ls *lifecycleStack, log *slog.Logger) (*sessionStack, error) { - _ = ctx // reserved for future ctx-aware plugin construction; today's tmux/gitworktree constructors are synchronous. + _ = ctx // reserved for future ctx-aware adapter construction; today's tmux/gitworktree constructors are synchronous. runtime := tmux.New(tmux.Options{}) ws, err := gitworktree.New(gitworktree.Options{ @@ -96,7 +96,7 @@ func startSession(ctx context.Context, cfg config.Config, ls *lifecycleStack, lo // noopMessenger is a TEMPORARY stub (see startLifecycle): the canonical write // path and durable notifications work without it; only live agent nudges are -// absent until the real runtime/agent plugin is wired. +// absent until the real runtime/agent adapter is wired. type noopMessenger struct{} func (noopMessenger) Send(context.Context, domain.SessionID, string) error { return nil } diff --git a/backend/internal/ports/inbound.go b/backend/internal/ports/inbound.go index fa472d00..c4aebf42 100644 --- a/backend/internal/ports/inbound.go +++ b/backend/internal/ports/inbound.go @@ -28,7 +28,7 @@ type LifecycleManager interface { } // SessionManager is the inbound contract the API/CLI call for explicit -// mutations. It drives the runtime/agent/workspace plugins and routes canonical +// mutations. It drives the runtime/agent/workspace adapters and routes canonical // writes to the LCM. type SessionManager interface { Spawn(ctx context.Context, cfg SpawnConfig) (domain.Session, error) diff --git a/backend/internal/ports/outbound.go b/backend/internal/ports/outbound.go index 58e1f509..e547de52 100644 --- a/backend/internal/ports/outbound.go +++ b/backend/internal/ports/outbound.go @@ -87,7 +87,7 @@ type EscalationEvent struct { DurationMs int64 } -// ---- runtime / agent / workspace plugin ports (used by the Session Manager) ---- +// ---- runtime / agent / workspace adapter ports (used by the Session Manager) ---- // Runtime is where a session's agent process runs — a tmux/zellij session or a // bare process. The Session Manager creates one per session and tears it down. diff --git a/backend/internal/session/manager.go b/backend/internal/session/manager.go index 37b1de81..45bbd019 100644 --- a/backend/internal/session/manager.go +++ b/backend/internal/session/manager.go @@ -1,5 +1,5 @@ // Package session implements ports.SessionManager: the explicit-mutation half of -// the lane. It drives the runtime/agent/workspace plugins to create and tear +// the lane. It drives the runtime/agent/workspace adapters to create and tear // down sessions, routes canonical writes to the LCM, and is the single producer // of the derived display status (attached on read in List/Get). package session diff --git a/docs/README.md b/docs/README.md index 220dec40..ee65d2df 100644 --- a/docs/README.md +++ b/docs/README.md @@ -16,6 +16,7 @@ fakes) on the `feat/lcm-sm-contracts` integration branch. | [architecture.md](architecture.md) | How the lane works: the OBSERVE→DECIDE→ACT loop, the canonical state model, the package layout, every component, and the load-bearing invariants. Read this first. | | [status.md](status.md) | What's done (PR by PR), what's left, the integration to-dos, the open cross-lane contract questions, and how to build/test. | | [cli/README.md](cli/README.md) | CLI foundation decisions: Cobra, reference projects, old CLI inventory, and the first command surface. | +| [agent/README.md](agent/README.md) | Agent adapter contract, hook-derived session metadata, restore boundaries, and acceptance criteria. | ## The one-paragraph mental model @@ -30,6 +31,6 @@ explicit-mutation plumbing (spawn/kill/restore/cleanup) that feeds it. Other lanes (built by other people, in parallel) provide the real adapters this lane depends on through narrow interfaces: the **persistence layer + CDC**, the -**SCM poller**, the **runtime/agent/workspace plugins**, the **backend API + +**SCM poller**, the **runtime/agent/workspace adapters**, the **backend API + OpenAPI**, and the **frontend store**. See [status.md](status.md#integration) for the hand-off points. diff --git a/prds/plugins/agents/PRD.md b/docs/agent/README.md similarity index 83% rename from prds/plugins/agents/PRD.md rename to docs/agent/README.md index a9fe3946..9c079926 100644 --- a/prds/plugins/agents/PRD.md +++ b/docs/agent/README.md @@ -1,8 +1,8 @@ -# Agent Plugin PRD +# Agent Adapter PRD ## Goal -Agent plugins let Better-AO run and observe different CLI coding agents without hardcoding agent-specific behavior into the spawn engine. Every CLI coding agent must implement the contract in `internal/plugin/agent/agent.go`. +Agent adapters let Better-AO run and observe different CLI coding agents without hardcoding agent-specific behavior into the spawn engine. Every CLI coding agent must implement the contract in `backend/internal/adapters/agent/agent.go`. The important current slice is hook-derived session info. Better-AO should know a running worker's native agent session id, title, and summary from agent hooks installed in the per-session worktree, not from scanning agent transcript/cache files. @@ -17,13 +17,13 @@ The important current slice is hook-derived session info. Better-AO should know - `Title` and `Summary` are both first-class fields. - `Title` is derived from the user prompt hook. - `Summary` is derived from the stop/final assistant hook. -- Agent plugin `Metadata` should stay nil/empty unless a plugin has a real extra field that does not belong in the normalized contract. +- Agent adapter `Metadata` should stay nil/empty unless an adapter has a real extra field that does not belong in the normalized contract. ## Agent Contract -The shared contract lives in `internal/plugin/agent/agent.go`. +The shared contract lives in `backend/internal/adapters/agent/agent.go`. -Required plugin behavior: +Required adapter behavior: - `GetConfigSpec` describes user-facing agent config. - `GetLaunchCommand` builds the native agent command. @@ -34,11 +34,11 @@ Required plugin behavior: - `AgentSessionID` - `Title` - `Summary` - - optional plugin-specific `Metadata` + - optional adapter-specific `Metadata` Implementation layout: -- Agent-specific hook installation and embedded hook templates should live beside the agent plugin in `internal/plugin/agent//hooks.go`. +- Agent-specific hook installation and embedded hook templates should live beside the agent adapter in `backend/internal/adapters/agent//hooks.go`. - Launch, restore, and session-info behavior can stay in the main agent implementation unless the file grows enough to justify another split. ## Metadata Keys @@ -53,12 +53,12 @@ The original spawn prompt may remain in metadata as `prompt` for launch/debug fa ## Hook Methodology -Agent plugins install hooks into the worktree-local config owned by the native agent. +Agent adapters install hooks into the worktree-local config owned by the native agent. Hook callbacks run through hidden Better-AO CLI commands: ```text -better-ao hooks +better-ao hooks ``` The callback: @@ -93,13 +93,13 @@ Hook metadata changes publish `session.updated`. The frontend listens to `sessio ## Acceptance Criteria -Agent plugin behavior: +Agent adapter behavior: - Agent hook installation preserves user hooks and deduplicates Better-AO hooks. - Hook callbacks persist native session id, title, and summary. - `SessionInfo` returns normalized fields from persisted metadata. - `SessionInfo` does not read transcripts or caches for title/summary. -- Plugin-specific metadata stays nil/empty unless a concrete feature requires it. +- Adapter-specific metadata stays nil/empty unless a concrete feature requires it. Engine and UI: diff --git a/flake.lock b/flake.lock new file mode 100644 index 00000000..0cf6e3d9 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1780030872, + "narHash": "sha256-u6WU/yd/o8iYQrHX3RAwO1hYa3LkoSL+WNQD0rJfJZQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "e9a7635a57597d9754eccebdfc7045e6c8600e6b", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 00000000..53a830a9 --- /dev/null +++ b/flake.nix @@ -0,0 +1,62 @@ +{ + description = "better-ao local agent orchestrator development shell"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = + { + nixpkgs, + flake-utils, + ... + }: + flake-utils.lib.eachDefaultSystem ( + system: + let + pkgs = import nixpkgs { inherit system; }; + go = pkgs.go_1_25; + betterAoDev = pkgs.writeShellApplication { + name = "better-ao"; + runtimeInputs = [ + pkgs.coreutils + pkgs.pnpm_10 + ]; + text = '' + root="$PWD" + while [ "$root" != "/" ] && [ ! -f "$root/pnpm-workspace.yaml" ]; do + root="$(dirname "$root")" + done + + if [ ! -f "$root/pnpm-workspace.yaml" ]; then + echo "Unable to find the better-ao workspace root." + exit 1 + fi + + cd "$root" + exec pnpm dev "$@" + ''; + }; + in + { + devShells.default = pkgs.mkShell { + buildInputs = [ + betterAoDev + go + pkgs.nodejs_22 + pkgs.pnpm_10 + pkgs.just + ]; + + shellHook = '' + export GOROOT="${go}/share/go" + export GOPATH="$PWD/.go" + export GOBIN="$GOPATH/bin" + export PNPM_HOME="$PWD/.pnpm" + export PATH="$GOBIN:$PNPM_HOME:$PATH" + ''; + }; + } + ); +} From 7714f9476a7c41ff63ba2c01368cecd7ce7d23dc Mon Sep 17 00:00:00 2001 From: yyovil Date: Mon, 1 Jun 2026 07:57:44 +0530 Subject: [PATCH 03/17] Keep daemon ports and session out of adapter move --- backend/internal/daemon/daemon.go | 2 +- backend/internal/daemon/lifecycle_wiring.go | 8 ++++---- backend/internal/ports/inbound.go | 2 +- backend/internal/ports/outbound.go | 2 +- backend/internal/session/manager.go | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index 01170a5a..3cb4f45c 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -43,7 +43,7 @@ func Run() error { // fans events out to the SSE transport. The LCM/Session Manager and the HTTP // API routes that drive and read this store are owned by the daemon lane and // are wired there once their collaborators (Notifier, AgentMessenger, and the - // runtime/agent/workspace adapters) have production implementations; here we + // runtime/agent/workspace plugins) have production implementations; here we // stand up the persistence + change-delivery foundation they build on. store, err := sqlite.Open(cfg.DataDir) if err != nil { diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 3e7961ae..65308f0e 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -33,9 +33,9 @@ type lifecycleStack struct { // The goroutine stops when ctx is cancelled; Stop waits for it to drain. // // TEMPORARY STUBS (replace as the daemon lane lands the collaborators): -// - noopMessenger — swap for the runtime/agent-adapter-backed AgentMessenger. +// - noopMessenger — swap for the runtime/agent-plugin-backed AgentMessenger. // - reaper.MapRegistry{} — empty runtime registry, so the reaper ticks -// escalations but probes nothing until the runtime adapters exist. +// escalations but probes nothing until the runtime plugins exist. func startLifecycle(ctx context.Context, store *sqlite.Store, logger *slog.Logger) *lifecycleStack { renderer := notification.NewRenderer(store) notifier := notification.NewEnqueuer(store, renderer, logger) @@ -62,7 +62,7 @@ type sessionStack struct { // daemon lane (#10). Returning the SM here lets main hold the wired-but-quiet // instance so future route wiring is a one-line plumb-through. func startSession(ctx context.Context, cfg config.Config, ls *lifecycleStack, log *slog.Logger) (*sessionStack, error) { - _ = ctx // reserved for future ctx-aware adapter construction; today's tmux/gitworktree constructors are synchronous. + _ = ctx // reserved for future ctx-aware plugin construction; today's tmux/gitworktree constructors are synchronous. runtime := tmux.New(tmux.Options{}) ws, err := gitworktree.New(gitworktree.Options{ @@ -96,7 +96,7 @@ func startSession(ctx context.Context, cfg config.Config, ls *lifecycleStack, lo // noopMessenger is a TEMPORARY stub (see startLifecycle): the canonical write // path and durable notifications work without it; only live agent nudges are -// absent until the real runtime/agent adapter is wired. +// absent until the real runtime/agent plugin is wired. type noopMessenger struct{} func (noopMessenger) Send(context.Context, domain.SessionID, string) error { return nil } diff --git a/backend/internal/ports/inbound.go b/backend/internal/ports/inbound.go index c4aebf42..fa472d00 100644 --- a/backend/internal/ports/inbound.go +++ b/backend/internal/ports/inbound.go @@ -28,7 +28,7 @@ type LifecycleManager interface { } // SessionManager is the inbound contract the API/CLI call for explicit -// mutations. It drives the runtime/agent/workspace adapters and routes canonical +// mutations. It drives the runtime/agent/workspace plugins and routes canonical // writes to the LCM. type SessionManager interface { Spawn(ctx context.Context, cfg SpawnConfig) (domain.Session, error) diff --git a/backend/internal/ports/outbound.go b/backend/internal/ports/outbound.go index e547de52..58e1f509 100644 --- a/backend/internal/ports/outbound.go +++ b/backend/internal/ports/outbound.go @@ -87,7 +87,7 @@ type EscalationEvent struct { DurationMs int64 } -// ---- runtime / agent / workspace adapter ports (used by the Session Manager) ---- +// ---- runtime / agent / workspace plugin ports (used by the Session Manager) ---- // Runtime is where a session's agent process runs — a tmux/zellij session or a // bare process. The Session Manager creates one per session and tears it down. diff --git a/backend/internal/session/manager.go b/backend/internal/session/manager.go index 45bbd019..37b1de81 100644 --- a/backend/internal/session/manager.go +++ b/backend/internal/session/manager.go @@ -1,5 +1,5 @@ // Package session implements ports.SessionManager: the explicit-mutation half of -// the lane. It drives the runtime/agent/workspace adapters to create and tear +// the lane. It drives the runtime/agent/workspace plugins to create and tear // down sessions, routes canonical writes to the LCM, and is the single producer // of the derived display status (attached on read in List/Get). package session From 908a8799304b1034e1b98b30780f311f1ea7ba09 Mon Sep 17 00:00:00 2001 From: yyovil Date: Mon, 1 Jun 2026 08:00:18 +0530 Subject: [PATCH 04/17] Remove Better-AO naming from flake --- flake.nix | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/flake.nix b/flake.nix index 53a830a9..a07628b4 100644 --- a/flake.nix +++ b/flake.nix @@ -1,5 +1,5 @@ { - description = "better-ao local agent orchestrator development shell"; + description = "agent-orchestrator development shell"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; @@ -17,32 +17,32 @@ let pkgs = import nixpkgs { inherit system; }; go = pkgs.go_1_25; - betterAoDev = pkgs.writeShellApplication { - name = "better-ao"; + agentOrchestratorDev = pkgs.writeShellApplication { + name = "agent-orchestrator"; runtimeInputs = [ pkgs.coreutils - pkgs.pnpm_10 + pkgs.nodejs_22 ]; text = '' root="$PWD" - while [ "$root" != "/" ] && [ ! -f "$root/pnpm-workspace.yaml" ]; do + while [ "$root" != "/" ] && { [ ! -f "$root/backend/go.mod" ] || [ ! -f "$root/frontend/package.json" ]; }; do root="$(dirname "$root")" done - if [ ! -f "$root/pnpm-workspace.yaml" ]; then - echo "Unable to find the better-ao workspace root." + if [ ! -f "$root/backend/go.mod" ] || [ ! -f "$root/frontend/package.json" ]; then + echo "Unable to find the agent-orchestrator workspace root." exit 1 fi - cd "$root" - exec pnpm dev "$@" + cd "$root/frontend" + exec npm start "$@" ''; }; in { devShells.default = pkgs.mkShell { buildInputs = [ - betterAoDev + agentOrchestratorDev go pkgs.nodejs_22 pkgs.pnpm_10 From 8749d859c195619c33b05ff8134260ff83577330 Mon Sep 17 00:00:00 2001 From: yyovil Date: Mon, 1 Jun 2026 08:01:16 +0530 Subject: [PATCH 05/17] Keep flake as dev shell only --- flake.nix | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/flake.nix b/flake.nix index a07628b4..dc0a99fd 100644 --- a/flake.nix +++ b/flake.nix @@ -17,32 +17,10 @@ let pkgs = import nixpkgs { inherit system; }; go = pkgs.go_1_25; - agentOrchestratorDev = pkgs.writeShellApplication { - name = "agent-orchestrator"; - runtimeInputs = [ - pkgs.coreutils - pkgs.nodejs_22 - ]; - text = '' - root="$PWD" - while [ "$root" != "/" ] && { [ ! -f "$root/backend/go.mod" ] || [ ! -f "$root/frontend/package.json" ]; }; do - root="$(dirname "$root")" - done - - if [ ! -f "$root/backend/go.mod" ] || [ ! -f "$root/frontend/package.json" ]; then - echo "Unable to find the agent-orchestrator workspace root." - exit 1 - fi - - cd "$root/frontend" - exec npm start "$@" - ''; - }; in { devShells.default = pkgs.mkShell { buildInputs = [ - agentOrchestratorDev go pkgs.nodejs_22 pkgs.pnpm_10 From 2dd0d6ea64db72550946c8fa272596593ada080b Mon Sep 17 00:00:00 2001 From: yyovil Date: Mon, 1 Jun 2026 08:16:36 +0530 Subject: [PATCH 06/17] Use goimports for local formatting --- README.md | 2 +- backend/.golangci.yml | 1 - backend/internal/adapters/agent/claudecode/claudecode.go | 3 ++- backend/internal/adapters/agent/claudecode/claudecode_test.go | 3 ++- docs/status.md | 4 ++-- flake.nix | 1 + 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 61a639d2..1331657b 100644 --- a/README.md +++ b/README.md @@ -52,5 +52,5 @@ is intentionally not env-configurable. ```bash cd backend -gofmt -l . && go build ./... && go vet ./... && go test -race ./... +goimports -local github.com/aoagents/agent-orchestrator -l . && go build ./... && go vet ./... && go test -race ./... ``` diff --git a/backend/.golangci.yml b/backend/.golangci.yml index 438dd020..49b4127f 100644 --- a/backend/.golangci.yml +++ b/backend/.golangci.yml @@ -107,7 +107,6 @@ linters: formatters: enable: - - gofmt - goimports settings: goimports: diff --git a/backend/internal/adapters/agent/claudecode/claudecode.go b/backend/internal/adapters/agent/claudecode/claudecode.go index 054f32ce..b120249b 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode.go +++ b/backend/internal/adapters/agent/claudecode/claudecode.go @@ -27,9 +27,10 @@ import ( "strings" "sync" + "github.com/google/uuid" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters" "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" - "github.com/google/uuid" ) const ( diff --git a/backend/internal/adapters/agent/claudecode/claudecode_test.go b/backend/internal/adapters/agent/claudecode/claudecode_test.go index 35e55309..e4be463d 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode_test.go +++ b/backend/internal/adapters/agent/claudecode/claudecode_test.go @@ -8,8 +8,9 @@ import ( "reflect" "testing" - "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" "github.com/google/uuid" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" ) func TestGetLaunchCommandBypassWithPrompt(t *testing.T) { diff --git a/docs/status.md b/docs/status.md index 9bb79cdb..a6f12efb 100644 --- a/docs/status.md +++ b/docs/status.md @@ -20,7 +20,7 @@ the eventual lane→main merge is a single cumulative review. | LCM — reactions | reaction table + escalation engine + real `TickEscalations` | #6 | | Session Manager | spawn / kill / restore / cleanup / list, eager rollback, worktree-remove safety | #7 | -`gofmt` / `go build` / `go vet` / `go test -race` all green across `domain`, +`goimports` / `go build` / `go vet` / `go test -race` all green across `domain`, `domain/decide`, `lifecycle`, and `session`. The `decide` core is at 100% statement coverage; the impl packages cover the load-bearing logic including the error/rollback paths. @@ -29,7 +29,7 @@ error/rollback paths. ``` cd backend -gofmt -l . # must print nothing +goimports -local github.com/aoagents/agent-orchestrator -l . # must print nothing go build ./... go vet ./... go test -race ./... diff --git a/flake.nix b/flake.nix index dc0a99fd..d99c9381 100644 --- a/flake.nix +++ b/flake.nix @@ -22,6 +22,7 @@ devShells.default = pkgs.mkShell { buildInputs = [ go + pkgs.gotools pkgs.nodejs_22 pkgs.pnpm_10 pkgs.just From 40f19768ea8ed516107db1fa6325b62af08f3f59 Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 20:56:11 +0530 Subject: [PATCH 07/17] =?UTF-8?q?feat(scm):=20GitHub=20provider=20adapter?= =?UTF-8?q?=20=E2=80=94=20Observe(prURL)=20=E2=86=92=20PRObservation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A fresh GitHub SCM provider adapter under backend/internal/adapters/scm/github/ exposing one method: (*Provider).Observe(ctx, prURL) (ports.PRObservation, error) It performs a REST GET on /repos/{o}/{r}/pulls/{n} for the authoritative draft/merged/closed/head-SHA, one GraphQL query for the reviewDecision + mergeStateStatus + statusCheckRollup + unresolved review threads, and (only for failure-class CheckRuns) a REST GET on /actions/jobs/{job_id}/logs to splice the last 20 lines of the failed job into the observation. The package is the observation primitive; the polling loop, cadence selection, daemon wiring, persistence and webhook receiver are all intentionally out of scope (separate PRs / lanes). Closes #27 — this supersedes PR #28's attempt, which targeted types (domain.SCMProvider / SCMSnapshot / ports.SCMObserveRequest) that the PR #62 simplification refactor has since removed. The GraphQL queries and mergeability composition logic are credited to @whoisasx from PR #28's provider.go; the package was re-implemented against the current ports.PRObservation seam (post-#62) rather than rebased. Bot-author detection uses ONLY GitHub's typed signal (__typename "Bot" / User.Type "Bot"). The strings.Contains(login, "bot") fallback from PR #28 was intentionally dropped — aa-18's review flagged it as a false-positive magnet for logins like "robothon" / "lambot123". 46 table-driven tests against httptest.NewServer cover happy path, draft, merged, closed (not merged), CI passing/failing/pending, StatusContext legacy, log-tail extraction (and the best-effort log-fetch failure case), mergeability mergeable/conflicting/blocked (including ci-failing → blocked even when GitHub still says CLEAN — the load-bearing aa-18 contract)/unstable/unknown, review approved/changes-requested/required/none, bot-author filtering (including the robothon false-positive guard), unresolved-only threads, all-bots → empty Comments, ETag-304 cache hit, primary + secondary rate-limit (with errors.As → *RateLimitError), 401 → ErrAuthFailed, malformed JSON → Fetched:false, network error → Fetched:false, Authorization Bearer header injection, StaticTokenSource blank/whitespace rejection, GHTokenSource memoize + invalidate. Verification: - go build ./... clean - go vet ./... clean - gofmt -l backend/internal/adapters/scm/ clean - golangci-lint run ./... (v2.12, repo .golangci.yml) 0 issues - go test -race ./internal/adapters/scm/github/... 46/46 PASS References: - aa-18 review of PR #28: ~/.ao/agent-reports/aa-18.md - aa-26 tracker adapter (sibling Go-adapter pattern): #36 / agent-reports/aa-26.md Co-Authored-By: Claude Opus 4.7 --- backend/internal/adapters/scm/github/auth.go | 139 +++ .../internal/adapters/scm/github/client.go | 431 +++++++ backend/internal/adapters/scm/github/doc.go | 121 ++ .../internal/adapters/scm/github/provider.go | 663 ++++++++++ .../adapters/scm/github/provider_test.go | 1063 +++++++++++++++++ 5 files changed, 2417 insertions(+) create mode 100644 backend/internal/adapters/scm/github/auth.go create mode 100644 backend/internal/adapters/scm/github/client.go create mode 100644 backend/internal/adapters/scm/github/doc.go create mode 100644 backend/internal/adapters/scm/github/provider.go create mode 100644 backend/internal/adapters/scm/github/provider_test.go diff --git a/backend/internal/adapters/scm/github/auth.go b/backend/internal/adapters/scm/github/auth.go new file mode 100644 index 00000000..3349d7c3 --- /dev/null +++ b/backend/internal/adapters/scm/github/auth.go @@ -0,0 +1,139 @@ +package github + +import ( + "context" + "errors" + "os" + "os/exec" + "strings" + "sync" + "time" +) + +// TokenSource yields a GitHub bearer token on demand. Production wires this +// to EnvTokenSource or GHTokenSource; tests inject StaticTokenSource. +type TokenSource interface { + Token(ctx context.Context) (string, error) +} + +// tokenInvalidator is the optional capability of dropping a cached token so +// the next call re-fetches it. The Client invokes this whenever GitHub +// responds with an auth-class failure: the next request will pick up a +// rotated token without restarting the daemon. +type tokenInvalidator interface { + InvalidateToken() +} + +// ErrNoToken is returned when no token source could yield a non-empty token. +var ErrNoToken = errors.New("github scm: no token configured") + +// StaticTokenSource is a literal token, typically used in tests. +type StaticTokenSource string + +// Token returns the literal token, or ErrNoToken if it is blank. +func (s StaticTokenSource) Token(context.Context) (string, error) { + t := strings.TrimSpace(string(s)) + if t == "" { + return "", ErrNoToken + } + return t, nil +} + +// EnvTokenSource reads the first non-empty value from the listed env vars, +// falling back to GITHUB_TOKEN. Order matters: a project-scoped variable +// (AO_GITHUB_TOKEN) should win over the global default. +type EnvTokenSource struct { + EnvVars []string +} + +// Token returns the first non-empty env-var value found, or ErrNoToken. +func (s EnvTokenSource) Token(context.Context) (string, error) { + for _, name := range s.EnvVars { + if v := strings.TrimSpace(os.Getenv(name)); v != "" { + return v, nil + } + } + if v := strings.TrimSpace(os.Getenv("GITHUB_TOKEN")); v != "" { + return v, nil + } + return "", ErrNoToken +} + +const defaultGHTokenCacheTTL = 5 * time.Minute + +// GHTokenSource shells out to `gh auth token` when env vars are not +// configured. It memoizes the result for TokenTTL so we don't fork-exec on +// every request, but the Client invalidates the cache on auth failures so a +// rotated token is picked up on the next call. Tests inject GH so the gh +// binary is never required. +type GHTokenSource struct { + // GH is the shell-out hook. Production leaves this nil and falls back + // to `exec.CommandContext("gh", "auth", "token")`; tests inject a + // fake to avoid touching the real binary. + GH func(ctx context.Context) (string, error) + // TokenTTL is how long a successful read is memoized. Zero means use + // defaultGHTokenCacheTTL. + TokenTTL time.Duration + // Clock allows tests to drive expiration. Zero means time.Now. + Clock func() time.Time + + mu sync.Mutex + token string + expiresAt time.Time +} + +// Token returns the cached token if still fresh, otherwise re-runs gh. +func (s *GHTokenSource) Token(ctx context.Context) (string, error) { + s.mu.Lock() + defer s.mu.Unlock() + now := s.now() + if s.token != "" && now.Before(s.expiresAt) { + return s.token, nil + } + run := s.GH + if run == nil { + run = ghAuthToken + } + out, err := run(ctx) + if err != nil { + return "", err + } + token := strings.TrimSpace(out) + if token == "" { + return "", ErrNoToken + } + s.token = token + s.expiresAt = now.Add(s.ttl()) + return token, nil +} + +// InvalidateToken drops the memoized token so the next Token call shells +// out again. The Client calls this on 401/403-auth responses. +func (s *GHTokenSource) InvalidateToken() { + s.mu.Lock() + defer s.mu.Unlock() + s.token = "" + s.expiresAt = time.Time{} +} + +func (s *GHTokenSource) now() time.Time { + if s.Clock != nil { + return s.Clock() + } + return time.Now() +} + +func (s *GHTokenSource) ttl() time.Duration { + if s.TokenTTL > 0 { + return s.TokenTTL + } + return defaultGHTokenCacheTTL +} + +func ghAuthToken(ctx context.Context) (string, error) { + out, err := exec.CommandContext(ctx, "gh", "auth", "token").Output() + if err != nil { + return "", err + } + return string(out), nil +} diff --git a/backend/internal/adapters/scm/github/client.go b/backend/internal/adapters/scm/github/client.go new file mode 100644 index 00000000..89d69081 --- /dev/null +++ b/backend/internal/adapters/scm/github/client.go @@ -0,0 +1,431 @@ +package github + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" + "sync" + "time" +) + +const ( + defaultRESTBaseURL = "https://api.github.com" + defaultGraphQLURL = "https://api.github.com/graphql" + defaultUserAgent = "ao-agent-orchestrator/scm-github" +) + +// Sentinel errors. Provider-level callers should match on these via +// errors.Is; the orchestrator's lifecycle code is intentionally insulated +// from raw HTTP status codes. +var ( + ErrNotFound = errors.New("github scm: not found") + ErrAuthFailed = errors.New("github scm: authentication failed") + ErrRateLimited = errors.New("github scm: rate limited") +) + +// RateLimitError carries the structured backoff hints from a rate-limit +// response. Callers that want to back off intelligently can extract +// ResetAt / RetryAfter via errors.As; callers that only need the category +// can use errors.Is(err, ErrRateLimited). +type RateLimitError struct { + ResetAt time.Time + RetryAfter time.Duration + Message string +} + +// Error formats the rate-limit error for logs. +func (e *RateLimitError) Error() string { + if e == nil { + return ErrRateLimited.Error() + } + if e.Message != "" { + return "github scm: rate limited: " + e.Message + } + return ErrRateLimited.Error() +} + +// Is lets errors.Is match a *RateLimitError against ErrRateLimited. +func (e *RateLimitError) Is(target error) bool { return target == ErrRateLimited } + +// ClientOptions configures a Client. Production code sets Token alone; +// tests inject HTTPClient and the URL fields to point at an httptest fake. +type ClientOptions struct { + HTTPClient *http.Client + Token TokenSource + RESTBase string + GraphQLURL string + UserAgent string +} + +// Client is the HTTP wrapper. It owns: +// - bearer-token injection (with cache invalidation on auth failures), +// - ETag cache for REST GETs (so the second observation of the same PR +// burns a free 304 instead of a fresh payload), and +// - sentinel-error classification so callers don't switch on status codes. +type Client struct { + http *http.Client + tokens TokenSource + restBase string + graphqlURL string + userAgent string + + mu sync.Mutex + etagOut map[string]string // key (method+path+query) -> last-seen ETag + bodyOut map[string][]byte // key -> last-seen body for 304 replay + cacheLRU []string // insertion-order keys for FIFO eviction +} + +// cacheMaxEntries caps the number of distinct (method,path,query) tuples +// the in-memory ETag cache will track. A single Provider observes one PR +// at a time today, but the follow-up poller will reuse one Provider for +// the whole daemon — without a cap, long-running daemons would grow this +// map forever. +const cacheMaxEntries = 512 + +// NewClient returns a Client. It is intentionally tolerant of nil +// dependencies: production passes a TokenSource; tests sometimes leave it +// nil and supply Bearer-less fakes. +func NewClient(opts ClientOptions) *Client { + c := &Client{ + http: opts.HTTPClient, + tokens: opts.Token, + restBase: opts.RESTBase, + graphqlURL: opts.GraphQLURL, + userAgent: opts.UserAgent, + etagOut: map[string]string{}, + bodyOut: map[string][]byte{}, + } + if c.http == nil { + c.http = &http.Client{Timeout: 30 * time.Second} + } + if c.restBase == "" { + c.restBase = defaultRESTBaseURL + } + if c.graphqlURL == "" { + c.graphqlURL = defaultGraphQLURL + } + if c.userAgent == "" { + c.userAgent = defaultUserAgent + } + return c +} + +// RESTResponse is what doREST returns to the Provider. NotModified=true +// means the cached body is being served; the byte slice is unchanged from +// the previous fresh fetch. +type RESTResponse struct { + StatusCode int + NotModified bool + ETag string + Body []byte +} + +// doREST performs one REST request with ETag-aware caching. The cache is +// scoped to the (method, path, query) tuple so repeated PR observations +// against the same endpoint replay from the cache while observations of a +// different PR don't share state. Only GET requests participate in the +// cache — mutating methods would mis-replay 304s as the previous payload. +func (c *Client) doREST(ctx context.Context, method, path string, q url.Values, body any) (RESTResponse, error) { + cacheable := method == http.MethodGet + cacheKey := method + " " + path + "?" + q.Encode() + var prevETag string + var prevBody []byte + if cacheable { + c.mu.Lock() + prevETag = c.etagOut[cacheKey] + prevBody = c.bodyOut[cacheKey] + c.mu.Unlock() + } + + var rdr io.Reader + if body != nil { + b, err := json.Marshal(body) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: encode %s %s body: %w", method, path, err) + } + rdr = bytes.NewReader(b) + } + + u, err := c.restURL(path, q) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: build %s URL: %w", path, err) + } + req, err := http.NewRequestWithContext(ctx, method, u, rdr) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: build %s %s request: %w", method, path, err) + } + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + req.Header.Set("User-Agent", c.userAgent) + if prevETag != "" { + req.Header.Set("If-None-Match", prevETag) + } + if err := c.authorize(ctx, req); err != nil { + return RESTResponse{}, err + } + + resp, err := c.http.Do(req) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: %s %s: %w", method, path, err) + } + defer func() { _ = resp.Body.Close() }() + + if cacheable && resp.StatusCode == http.StatusNotModified { + // Replay the cached body. Update the ETag if GitHub returned a + // fresher one — some endpoints rotate ETags on weak revalidation. + newETag := resp.Header.Get("ETag") + if newETag != "" && newETag != prevETag { + c.mu.Lock() + c.etagOut[cacheKey] = newETag + c.mu.Unlock() + } + return RESTResponse{StatusCode: resp.StatusCode, NotModified: true, ETag: newETag, Body: prevBody}, nil + } + + b, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return RESTResponse{}, fmt.Errorf("github scm: read %s body: %w", path, readErr) + } + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + etag := resp.Header.Get("ETag") + if cacheable && etag != "" { + // Defensive copy: GitHub's HTTP body is owned by net/http's + // buffer pool. Holding the raw slice in our cache would let a + // later caller mutate or alias the same backing array. + c.storeCacheEntry(cacheKey, etag, append([]byte(nil), b...)) + } + return RESTResponse{StatusCode: resp.StatusCode, ETag: etag, Body: b}, nil + } + + err = classifyError(resp, b) + if errors.Is(err, ErrAuthFailed) { + c.invalidateToken() + } + return RESTResponse{StatusCode: resp.StatusCode, Body: b}, err +} + +// doGraphQL posts one GraphQL request and returns the decoded data map +// (the "data" field). Top-level GraphQL errors are surfaced as Go errors +// classified by the same sentinels as REST. +func (c *Client) doGraphQL(ctx context.Context, query string, variables map[string]any) (map[string]any, error) { + payload := map[string]any{"query": query} + if variables != nil { + payload["variables"] = variables + } + b, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("github scm: encode graphql body: %w", err) + } + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.graphqlURL, bytes.NewReader(b)) + if err != nil { + return nil, fmt.Errorf("github scm: build graphql request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + req.Header.Set("User-Agent", c.userAgent) + if err := c.authorize(ctx, req); err != nil { + return nil, err + } + + resp, err := c.http.Do(req) + if err != nil { + return nil, fmt.Errorf("github scm: POST graphql: %w", err) + } + defer func() { _ = resp.Body.Close() }() + respBody, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return nil, fmt.Errorf("github scm: read graphql body: %w", readErr) + } + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + err := classifyError(resp, respBody) + if errors.Is(err, ErrAuthFailed) { + c.invalidateToken() + } + return nil, err + } + var decoded struct { + Data map[string]any `json:"data"` + Errors []struct { + Message string `json:"message"` + Type string `json:"type"` + } `json:"errors"` + } + if err := json.Unmarshal(respBody, &decoded); err != nil { + return nil, fmt.Errorf("github scm: decode graphql response: %w", err) + } + if len(decoded.Errors) > 0 { + msg := decoded.Errors[0].Message + low := strings.ToLower(msg) + switch { + case strings.Contains(low, "rate limit") || strings.Contains(low, "abuse"): + return decoded.Data, &RateLimitError{Message: msg} + case strings.Contains(low, "bad credentials") || strings.Contains(low, "credentials"): + c.invalidateToken() + return decoded.Data, fmt.Errorf("%w: %s", ErrAuthFailed, msg) + case strings.Contains(low, "could not resolve") || strings.Contains(low, "not found"): + return decoded.Data, fmt.Errorf("%w: %s", ErrNotFound, msg) + default: + return decoded.Data, fmt.Errorf("github scm: graphql error: %s", msg) + } + } + return decoded.Data, nil +} + +// fetchPlainText is a small REST helper used for the job-log endpoint, +// which returns text/plain rather than JSON. It does NOT participate in +// the ETag cache (logs are append-only and tiny enough that re-fetch is +// cheap; caching would just inflate memory for no win). +func (c *Client) fetchPlainText(ctx context.Context, path string) ([]byte, error) { + u, err := c.restURL(path, nil) + if err != nil { + return nil, fmt.Errorf("github scm: build %s URL: %w", path, err) + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, http.NoBody) + if err != nil { + return nil, fmt.Errorf("github scm: build %s request: %w", path, err) + } + req.Header.Set("Accept", "text/plain") + req.Header.Set("User-Agent", c.userAgent) + if err := c.authorize(ctx, req); err != nil { + return nil, err + } + resp, err := c.http.Do(req) + if err != nil { + return nil, fmt.Errorf("github scm: GET %s: %w", path, err) + } + defer func() { _ = resp.Body.Close() }() + body, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return nil, fmt.Errorf("github scm: read %s body: %w", path, readErr) + } + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return body, nil + } + return nil, classifyError(resp, body) +} + +// storeCacheEntry records one (ETag, body) pair under cacheKey and evicts +// the oldest entry once cacheMaxEntries is exceeded. FIFO is intentional: +// the access pattern is "one PR per poll cycle"; an LRU would just add +// bookkeeping without changing eviction order in practice. +func (c *Client) storeCacheEntry(cacheKey, etag string, body []byte) { + c.mu.Lock() + defer c.mu.Unlock() + if _, exists := c.etagOut[cacheKey]; !exists { + c.cacheLRU = append(c.cacheLRU, cacheKey) + } + c.etagOut[cacheKey] = etag + c.bodyOut[cacheKey] = body + for len(c.cacheLRU) > cacheMaxEntries { + evict := c.cacheLRU[0] + c.cacheLRU = c.cacheLRU[1:] + delete(c.etagOut, evict) + delete(c.bodyOut, evict) + } +} + +func (c *Client) authorize(ctx context.Context, req *http.Request) error { + if c.tokens == nil { + return nil + } + token, err := c.tokens.Token(ctx) + if err != nil { + return fmt.Errorf("%w: %w", ErrAuthFailed, err) + } + req.Header.Set("Authorization", "Bearer "+token) + return nil +} + +func (c *Client) invalidateToken() { + if inv, ok := c.tokens.(tokenInvalidator); ok { + inv.InvalidateToken() + } +} + +func (c *Client) restURL(path string, q url.Values) (string, error) { + base, err := url.Parse(c.restBase) + if err != nil { + return "", err + } + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + base.Path = strings.TrimSuffix(base.Path, "/") + path + if q != nil { + base.RawQuery = q.Encode() + } + return base.String(), nil +} + +func classifyError(resp *http.Response, body []byte) error { + msg := githubMessage(body) + switch resp.StatusCode { + case http.StatusNotFound: + return fmt.Errorf("%w: %s", ErrNotFound, msg) + case http.StatusTooManyRequests: + return rateLimited(resp, msg) + case http.StatusUnauthorized: + return fmt.Errorf("%w: %s", ErrAuthFailed, msg) + case http.StatusForbidden: + // GitHub returns 403 for primary rate-limit exhaustion, for + // secondary/abuse limits, and for genuine auth/permission failures. + // Disambiguate by signal: primary limit sets X-RateLimit-Remaining=0; + // secondary/abuse sets Retry-After (often without the Remaining + // header); either case mentions "rate limit" / "abuse" in the body. + // Everything else is an auth/permission failure. + if isRateLimited(resp, msg) { + return rateLimited(resp, msg) + } + return fmt.Errorf("%w: %s", ErrAuthFailed, msg) + } + return fmt.Errorf("github scm: %d %s", resp.StatusCode, msg) +} + +func isRateLimited(resp *http.Response, msg string) bool { + if rem := resp.Header.Get("X-RateLimit-Remaining"); rem != "" { + if n, err := strconv.Atoi(rem); err == nil && n == 0 { + return true + } + } + if resp.Header.Get("Retry-After") != "" { + return true + } + low := strings.ToLower(msg) + return strings.Contains(low, "rate limit") || strings.Contains(low, "abuse detection") || strings.Contains(low, "secondary rate") +} + +func rateLimited(resp *http.Response, msg string) error { + e := &RateLimitError{Message: msg} + if reset := resp.Header.Get("X-RateLimit-Reset"); reset != "" { + if sec, err := strconv.ParseInt(reset, 10, 64); err == nil && sec > 0 { + e.ResetAt = time.Unix(sec, 0) + } + } + if ra := resp.Header.Get("Retry-After"); ra != "" { + if sec, err := strconv.Atoi(ra); err == nil && sec >= 0 { + e.RetryAfter = time.Duration(sec) * time.Second + } + } + return e +} + +func githubMessage(body []byte) string { + var p struct { + Message string `json:"message"` + } + if json.Unmarshal(body, &p) == nil && p.Message != "" { + return p.Message + } + return strings.TrimSpace(string(body)) +} diff --git a/backend/internal/adapters/scm/github/doc.go b/backend/internal/adapters/scm/github/doc.go new file mode 100644 index 00000000..8dee9a34 --- /dev/null +++ b/backend/internal/adapters/scm/github/doc.go @@ -0,0 +1,121 @@ +// Package github observes GitHub pull requests for the PR Manager. +// +// The exported surface is one function: +// +// (*Provider).Observe(ctx, prURL) (ports.PRObservation, error) +// +// It performs a REST GET on /repos/{o}/{r}/pulls/{n} for the authoritative +// state booleans (draft / merged / closed / head SHA), one GraphQL query +// for the reviewDecision + mergeStateStatus + statusCheckRollup + review +// threads, and (only for CheckRuns that concluded failure-class) a REST +// GET on /repos/{o}/{r}/actions/jobs/{job_id}/logs to splice the last 20 +// lines of the failed job into the observation. +// +// The poller / cadence loop is intentionally NOT in this package — it is +// a follow-up PR. This adapter is the observation primitive that loop +// will call. +// +// # State mapping +// +// Each ports.PRObservation field is derived as follows: +// +// - Fetched: false if any required REST/GraphQL call fails; true +// only once all the calls have succeeded. Log-tail +// fetch failures are best-effort: the LogTail is +// stamped with a "" sentinel +// and the observation still surfaces as Fetched=true. +// +// - URL, Number: the URL the caller passed (validated) plus the +// number from REST pulls/{n}. +// +// - Draft: REST pulls/{n}.draft. +// +// - Merged: REST pulls/{n}.merged OR a non-null merged_at. +// +// - Closed: REST pulls/{n}.state == "closed" AND NOT Merged. +// (Closed and Merged are mutually exclusive.) +// +// - CI: derived from the latest commit's statusCheckRollup contexts +// (CheckRun + StatusContext). Failed if ANY context concluded in a +// failure class (failure / cancelled / timed_out / action_required / +// error). Pending if any context is still running / queued. +// Passing if all non-skipped contexts concluded SUCCESS / NEUTRAL. +// Unknown otherwise. Empty rollup falls back to the rollup-level +// "state" field. +// +// - Review: from GraphQL pullRequest.reviewDecision: +// +// | GraphQL | domain.ReviewDecision | +// |------------------------|-------------------------| +// | APPROVED | ReviewApproved | +// | CHANGES_REQUESTED | ReviewChangesRequest | +// | REVIEW_REQUIRED | ReviewRequired | +// | null / unknown | ReviewNone | +// +// - Mergeability: composed in priority order; the first rule that +// matches wins. The primary signal is the GraphQL pullRequest +// payload; the REST pulls/{n} response is consulted only as a +// tiebreaker when GraphQL is empty or has not yet been computed. +// Rules: +// (1) mergeStateStatus == DIRTY -> MergeConflicting +// (2) mergeStateStatus == BLOCKED -> MergeBlocked +// (3) mergeStateStatus == UNSTABLE -> MergeUnstable +// (4) GraphQL mergeable == CONFLICTING -> MergeConflicting +// (5) reviewDecision == changes_requested -> MergeBlocked +// (6) CI == failing -> MergeBlocked +// (7) REST mergeable_state pin — a TIE-BREAKER, not a terminal +// step: "dirty"->MergeConflicting, "blocked"->MergeBlocked, +// "unstable"->MergeUnstable, "clean"->MergeMergeable ONLY if +// GraphQL says MERGEABLE or REST mergeable bool is true +// (otherwise stays unknown — REST lags GraphQL). +// (8) mergeable == MERGEABLE AND mergeStateStatus == CLEAN +// -> MergeMergeable +// (9) otherwise -> MergeUnknown +// +// - Checks[]: one entry per rollup context. For CheckRun rows we use +// name + conclusion + detailsUrl + the head SHA as the CommitHash; +// for StatusContext rows we use context + state + targetUrl. LogTail +// is populated ONLY for failure-class CheckRun entries, by fetching +// /actions/jobs/{job_id}/logs and tailing to the last 20 lines. +// +// - Comments[]: one entry per unresolved review-thread comment. +// Resolved threads are skipped client-side (Resolved on the +// observation is therefore always false). Bot authors are detected +// via GitHub's __typename == "Bot" or User.Type == "Bot" and +// dropped — the legacy strings.Contains(login, "bot") fallback was +// intentionally NOT carried forward (it false-positives on logins +// like "robothon" / "lambot123"; aa-18's review of PR #28 flagged +// this). +// +// # Errors +// +// The Client classifies HTTP failures into three sentinels: +// +// - ErrNotFound — 404 (PR doesn't exist or token can't see it) +// - ErrAuthFailed — 401, or 403 without rate-limit signals +// - ErrRateLimited — 403 with X-RateLimit-Remaining=0, 403 with the +// secondary "abuse detection" body, or 429 +// (also returns *RateLimitError with ResetAt / +// RetryAfter — match via errors.As) +// +// All other transport failures (decode errors, network failures, GraphQL +// "errors" array) bubble up as wrapped errors with Fetched=false on the +// observation, so the PR Manager keeps the prior row rather than +// fabricating a closed/merged transition from a failed observation. +// +// # Caching +// +// The Client maintains an in-memory ETag cache per (method, path, query). +// On the second observation of the same PR the REST GET sends +// If-None-Match and replays the cached body on a 304 — GraphQL is always +// re-fetched because it doesn't expose ETag-based revalidation. +// +// # Out of scope (intentionally — these are different PRs / lanes) +// +// - The poller loop and cadence selection (issue #35). +// - Webhook ingestion (this package is polling-only). +// - Persistence (PR Manager owns the row mapping; see internal/pr). +// - Linear / GitLab providers (separate PRs). +// - Issue tracking (separate lane, see internal/adapters/tracker). +// - Comment-injection-into-session-context (Messenger lane, not SCM). +package github diff --git a/backend/internal/adapters/scm/github/provider.go b/backend/internal/adapters/scm/github/provider.go new file mode 100644 index 00000000..81fd9fbb --- /dev/null +++ b/backend/internal/adapters/scm/github/provider.go @@ -0,0 +1,663 @@ +package github + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "net/url" + "path" + "strconv" + "strings" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// ciFailureLogTailLines is the number of trailing lines of a failed job's +// log we splice into the observation. 20 lines is enough to catch the +// usual "X tests failed" tail without bloating the per-PR row. +const ciFailureLogTailLines = 20 + +// ProviderOptions configures a Provider. Production code typically sets +// Token; tests inject a pre-built Client pointed at httptest. +type ProviderOptions struct { + Client *Client + HTTPClient *http.Client + Token TokenSource + RESTBase string + GraphQLURL string + UserAgent string +} + +// Provider observes one GitHub pull request and returns a normalized +// ports.PRObservation for the PR Manager to persist. There is no polling +// loop in v1 — the loop is a follow-up PR (#35); this adapter is the +// observation primitive that loop will call. +type Provider struct { + client *Client +} + +// NewProvider returns a Provider. If opts.Client is supplied it is used +// verbatim; otherwise a Client is built from the other options. When a +// Token source is supplied it is exercised once so missing credentials +// surface at daemon startup rather than at first observation. Tests that +// want an unauthenticated fake pass opts.Client directly. +func NewProvider(opts ProviderOptions) (*Provider, error) { + if opts.Client == nil && opts.Token != nil { + if _, err := opts.Token.Token(context.Background()); err != nil { + return nil, err + } + } + c := opts.Client + if c == nil { + c = NewClient(ClientOptions{ + HTTPClient: opts.HTTPClient, + Token: opts.Token, + RESTBase: opts.RESTBase, + GraphQLURL: opts.GraphQLURL, + UserAgent: opts.UserAgent, + }) + } + return &Provider{client: c}, nil +} + +// Observe fetches the current state of one PR by its github.com URL and +// returns a normalized ports.PRObservation. Any required network call +// failing yields Fetched=false (caller must not infer "PR closed" from a +// failed observation). +func (p *Provider) Observe(ctx context.Context, prURL string) (ports.PRObservation, error) { + out := ports.PRObservation{URL: prURL} + owner, repo, number, err := parsePRURL(prURL) + if err != nil { + return out, err + } + out.Number = number + + rest, err := p.fetchRESTPull(ctx, owner, repo, number) + if err != nil { + // Network/auth/rate-limit failures must surface as Fetched:false. + // Stable terminal states like 404 also surface that way — the PR + // Manager keeps the prior row rather than fabricating closed/merged. + return out, err + } + + out.Draft = rest.Draft + out.Merged = rest.Merged || (rest.MergedAt != "") + out.Closed = strings.EqualFold(rest.State, "closed") && !out.Merged + + gq, err := p.fetchGraphQL(ctx, owner, repo, number) + if err != nil { + return out, err + } + + out.CI = ciSummaryFromGraphQL(gq) + out.Review = reviewDecisionFromGraphQL(gq) + out.Mergeability = mergeabilityFromGraphQL(gq, rest, out.CI, out.Review) + out.Checks = checksFromGraphQL(gq, rest.Head.SHA) + out.Comments = commentsFromGraphQL(gq) + + // Log-tail enrichment is best-effort: a job-log fetch failure must not + // flip the observation to Fetched:false, because we already have the + // authoritative CI summary from GraphQL. Stamp a one-liner instead. + for i := range out.Checks { + if !isFailingCheckStatus(out.Checks[i].Status) { + continue + } + jobID := jobIDForCheck(gq, out.Checks[i].Name) + if jobID == 0 { + continue + } + log, fetchErr := p.fetchJobLogTail(ctx, owner, repo, jobID) + if fetchErr != nil { + out.Checks[i].LogTail = fmt.Sprintf("", scrubError(fetchErr)) + continue + } + out.Checks[i].LogTail = tailLines(log, ciFailureLogTailLines) + } + + out.Fetched = true + return out, nil +} + +// --------------------------------------------------------------------------- +// REST: pull payload +// --------------------------------------------------------------------------- + +type restPull struct { + State string `json:"state"` + Draft bool `json:"draft"` + Merged bool `json:"merged"` + MergedAt string `json:"merged_at"` + Head struct { + SHA string `json:"sha"` + } `json:"head"` + Mergeable *bool `json:"mergeable"` + MergeableState string `json:"mergeable_state"` + MergeStateStatus string `json:"merge_state_status"` +} + +func (p *Provider) fetchRESTPull(ctx context.Context, owner, repo string, number int) (restPull, error) { + resp, err := p.client.doREST(ctx, http.MethodGet, repoPath(owner, repo, "pulls", strconv.Itoa(number)), nil, nil) + if err != nil { + return restPull{}, err + } + if len(resp.Body) == 0 { + return restPull{}, errors.New("github scm: empty pull response") + } + var pull restPull + if err := json.Unmarshal(resp.Body, &pull); err != nil { + return restPull{}, fmt.Errorf("github scm: decode pull: %w", err) + } + return pull, nil +} + +// --------------------------------------------------------------------------- +// GraphQL: the heavy lift +// --------------------------------------------------------------------------- + +const graphQLCheckContextLimit = 50 + +// prObservationQuery is the GraphQL query (derived from PR #28, credited +// to @whoisasx) that pulls everything we need in one round trip: +// - reviewDecision (APPROVED / CHANGES_REQUESTED / REVIEW_REQUIRED / null) +// - mergeable + mergeStateStatus (DIRTY / BLOCKED / UNSTABLE / CLEAN / ...) +// - latest commit's statusCheckRollup (CheckRuns + StatusContexts) so we +// can derive a CIState without an extra REST hop, and so that bot vs +// human is detected via __typename on review comments. +const prObservationQuery = `query($owner:String!,$repo:String!,$number:Int!){ + repository(owner:$owner,name:$repo){ + pullRequest(number:$number){ + number + url + state + isDraft + merged + closed + mergeable + mergeStateStatus + reviewDecision + headRefOid + commits(last:1){ nodes{ commit{ + oid + statusCheckRollup{ + state + contexts(first:CONTEXT_LIMIT){ + nodes{ + __typename + ... on CheckRun { name status conclusion detailsUrl url databaseId } + ... on StatusContext { context state targetUrl } + } + pageInfo{ hasNextPage } + } + } + } } } + reviewThreads(last:100){ nodes{ + id + isResolved + comments(first:100){ nodes{ + id + body + path + line + url + author{ login __typename ... on User { } } + } } + } } + } + } +}` + +func (p *Provider) fetchGraphQL(ctx context.Context, owner, repo string, number int) (map[string]any, error) { + q := strings.Replace(prObservationQuery, "CONTEXT_LIMIT", strconv.Itoa(graphQLCheckContextLimit), 1) + data, err := p.client.doGraphQL(ctx, q, map[string]any{"owner": owner, "repo": repo, "number": number}) + if err != nil { + return nil, err + } + repoData, _ := data["repository"].(map[string]any) + pr, _ := repoData["pullRequest"].(map[string]any) + if pr == nil { + return nil, fmt.Errorf("%w: pull request not found in graphql response", ErrNotFound) + } + return pr, nil +} + +// --------------------------------------------------------------------------- +// REST: per-job log tail +// --------------------------------------------------------------------------- + +func (p *Provider) fetchJobLogTail(ctx context.Context, owner, repo string, jobID int64) (string, error) { + logPath := repoPath(owner, repo, "actions", "jobs", strconv.FormatInt(jobID, 10), "logs") + body, err := p.client.fetchPlainText(ctx, logPath) + if err != nil { + return "", err + } + return string(body), nil +} + +// --------------------------------------------------------------------------- +// Projection helpers +// --------------------------------------------------------------------------- + +// ciSummaryFromGraphQL maps the per-PR status rollup onto domain.CIState. +// If ANY context concluded failure-class we return CIFailing. Otherwise +// any pending context wins over passing. An empty rollup is CIUnknown. +func ciSummaryFromGraphQL(pr map[string]any) domain.CIState { + roll := statusRollup(pr) + if roll == nil { + return domain.CIUnknown + } + contexts, _ := roll["contexts"].(map[string]any) + rawNodes := nodes(contexts["nodes"]) + if len(rawNodes) == 0 { + // GitHub returns a top-level "state" on the rollup even when the + // nodes list is empty (e.g. SUCCESS / FAILURE / PENDING). Honor it + // rather than returning CIUnknown for an otherwise-decided PR. + return mapRollupState(str(roll["state"])) + } + pending, passing := false, false + for _, n := range rawNodes { + st := checkStatusFromGraphQL(n) + switch st { + case domain.PRCheckFailed, domain.PRCheckCancelled: + return domain.CIFailing + case domain.PRCheckQueued, domain.PRCheckInProgress: + pending = true + case domain.PRCheckPassed: + passing = true + } + } + switch { + case pending: + return domain.CIPending + case passing: + return domain.CIPassing + default: + return domain.CIUnknown + } +} + +func mapRollupState(s string) domain.CIState { + switch strings.ToUpper(strings.TrimSpace(s)) { + case "SUCCESS": + return domain.CIPassing + case "FAILURE", "ERROR": + return domain.CIFailing + case "PENDING", "EXPECTED": + return domain.CIPending + default: + return domain.CIUnknown + } +} + +// reviewDecisionFromGraphQL normalizes the GraphQL reviewDecision enum +// onto the domain vocabulary. Re-implemented inline because the helper +// referenced in the task brief lived against types that no longer exist. +func reviewDecisionFromGraphQL(pr map[string]any) domain.ReviewDecision { + switch strings.ToUpper(strings.TrimSpace(str(pr["reviewDecision"]))) { + case "APPROVED": + return domain.ReviewApproved + case "CHANGES_REQUESTED": + return domain.ReviewChangesRequest + case "REVIEW_REQUIRED": + return domain.ReviewRequired + default: + return domain.ReviewNone + } +} + +// mergeabilityFromGraphQL composes the merge verdict from three signals: +// the REST mergeable/rebaseable booleans, the GraphQL mergeStateStatus, +// and the already-derived CIState + ReviewDecision. The rules follow the +// spec table in doc.go. +func mergeabilityFromGraphQL(pr map[string]any, rest restPull, ci domain.CIState, review domain.ReviewDecision) domain.Mergeability { + state := strings.ToUpper(strings.TrimSpace(firstNonEmpty(str(pr["mergeStateStatus"]), rest.MergeStateStatus))) + rawMergeable := strings.ToUpper(strings.TrimSpace(str(pr["mergeable"]))) + + switch state { + case "DIRTY": + return domain.MergeConflicting + case "BLOCKED": + return domain.MergeBlocked + case "UNSTABLE": + return domain.MergeUnstable + } + if rawMergeable == "CONFLICTING" { + return domain.MergeConflicting + } + + if review == domain.ReviewChangesRequest { + return domain.MergeBlocked + } + if ci == domain.CIFailing { + return domain.MergeBlocked + } + + // REST's mergeable_state ("clean" / "blocked" / "behind" / "dirty" / "unstable" + // / "draft" / "unknown") backs up the GraphQL view when GitHub hasn't + // computed the rollup yet. + switch strings.ToLower(strings.TrimSpace(rest.MergeableState)) { + case "clean": + if rawMergeable == "MERGEABLE" || (rest.Mergeable != nil && *rest.Mergeable) { + return domain.MergeMergeable + } + case "dirty": + return domain.MergeConflicting + case "blocked": + return domain.MergeBlocked + case "unstable": + return domain.MergeUnstable + } + + if rawMergeable == "MERGEABLE" && state == "CLEAN" { + return domain.MergeMergeable + } + return domain.MergeUnknown +} + +// checksFromGraphQL projects each context node into a PRCheckObservation. +// StatusContext (commit-status) and CheckRun (Actions) are both flattened +// into the same slice because downstream consumers don't distinguish. +func checksFromGraphQL(pr map[string]any, headSHA string) []ports.PRCheckObservation { + roll := statusRollup(pr) + contexts, _ := roll["contexts"].(map[string]any) + rawNodes := nodes(contexts["nodes"]) + if len(rawNodes) == 0 { + return nil + } + out := make([]ports.PRCheckObservation, 0, len(rawNodes)) + for _, n := range rawNodes { + typ := str(n["__typename"]) + var name, urlOut string + switch typ { + case "CheckRun": + name = str(n["name"]) + urlOut = firstNonEmpty(str(n["detailsUrl"]), str(n["url"])) + case "StatusContext": + name = str(n["context"]) + urlOut = str(n["targetUrl"]) + default: + continue + } + if name == "" { + continue + } + out = append(out, ports.PRCheckObservation{ + Name: name, + CommitHash: headSHA, + Status: checkStatusFromGraphQL(n), + URL: urlOut, + }) + } + return out +} + +// commentsFromGraphQL flattens unresolved review threads into one comment +// per node, dropping bot authors entirely (the spec keeps Resolved=false +// always since we filter resolved threads out client-side). +func commentsFromGraphQL(pr map[string]any) []ports.PRCommentObservation { + threads, _ := pr["reviewThreads"].(map[string]any) + rawNodes := nodes(threads["nodes"]) + if len(rawNodes) == 0 { + return nil + } + var out []ports.PRCommentObservation + for _, th := range rawNodes { + if boolv(th["isResolved"]) { + continue + } + comments, _ := th["comments"].(map[string]any) + for _, cn := range nodes(comments["nodes"]) { + author, _ := cn["author"].(map[string]any) + if isBotAuthor(author) { + continue + } + out = append(out, ports.PRCommentObservation{ + ID: str(cn["id"]), + Author: str(author["login"]), + File: str(cn["path"]), + Line: int(num(cn["line"])), + Body: str(cn["body"]), + Resolved: false, + }) + } + } + return out +} + +// isBotAuthor uses ONLY GitHub's typed signal (__typename or User.Type +// === "Bot"). The strings.Contains(login, "bot") fallback from PR #28 +// was deliberately dropped — aa-18 flagged it as a false-positive +// magnet (logins like "robothon", "lambot123" tripped it). +func isBotAuthor(author map[string]any) bool { + if strings.EqualFold(str(author["__typename"]), "Bot") { + return true + } + if strings.EqualFold(str(author["type"]), "Bot") { + return true + } + return false +} + +// jobIDForCheck looks up the Actions job ID for a check by name, so we +// can call /actions/jobs/{job_id}/logs. StatusContext rows have no job +// ID (they're commit statuses, not Actions runs); those return 0 and +// the log fetch is skipped for them. +func jobIDForCheck(pr map[string]any, name string) int64 { + roll := statusRollup(pr) + contexts, _ := roll["contexts"].(map[string]any) + for _, n := range nodes(contexts["nodes"]) { + if str(n["__typename"]) != "CheckRun" { + continue + } + if str(n["name"]) != name { + continue + } + return int64(num(n["databaseId"])) + } + return 0 +} + +// statusRollup extracts the commits[0].commit.statusCheckRollup blob +// from the GraphQL pullRequest payload. Nil when the PR has no commits +// or GitHub hasn't computed the rollup yet. +func statusRollup(pr map[string]any) map[string]any { + commits, _ := pr["commits"].(map[string]any) + for _, n := range nodes(commits["nodes"]) { + commit, _ := n["commit"].(map[string]any) + roll, _ := commit["statusCheckRollup"].(map[string]any) + if roll != nil { + return roll + } + } + return nil +} + +// checkStatusFromGraphQL maps the (status, conclusion) tuple of one node +// onto the domain enum. Failure-class conclusions always win — pending +// status with a final conclusion of "failure" is still a failed check. +func checkStatusFromGraphQL(n map[string]any) domain.PRCheckStatus { + typ := str(n["__typename"]) + if typ == "StatusContext" { + switch strings.ToUpper(strings.TrimSpace(str(n["state"]))) { + case "SUCCESS": + return domain.PRCheckPassed + case "FAILURE", "ERROR": + return domain.PRCheckFailed + case "PENDING", "EXPECTED": + return domain.PRCheckInProgress + default: + return domain.PRCheckUnknown + } + } + conclusion := strings.ToUpper(strings.TrimSpace(str(n["conclusion"]))) + status := strings.ToUpper(strings.TrimSpace(str(n["status"]))) + switch conclusion { + case "SUCCESS", "NEUTRAL": + return domain.PRCheckPassed + case "FAILURE", "TIMED_OUT", "ACTION_REQUIRED", "STARTUP_FAILURE": + return domain.PRCheckFailed + case "CANCELLED": + return domain.PRCheckCancelled + case "SKIPPED", "STALE": + return domain.PRCheckSkipped + } + switch status { + case "QUEUED", "PENDING", "REQUESTED", "WAITING": + return domain.PRCheckQueued + case "IN_PROGRESS": + return domain.PRCheckInProgress + case "COMPLETED": + // Completed without a conclusion is unusual but reachable — treat + // it as unknown so the caller does not over-trust an absent state. + return domain.PRCheckUnknown + } + return domain.PRCheckUnknown +} + +func isFailingCheckStatus(s domain.PRCheckStatus) bool { + return s == domain.PRCheckFailed || s == domain.PRCheckCancelled +} + +// --------------------------------------------------------------------------- +// URL + path helpers +// --------------------------------------------------------------------------- + +// parsePRURL accepts both the canonical github.com web URL and the API +// pulls URL. Returns owner, repo, number or an error wrapping ErrNotFound +// for shapes we don't recognise (so the caller surfaces them like any +// other "PR isn't on GitHub" outcome). +func parsePRURL(prURL string) (string, string, int, error) { + if prURL == "" { + return "", "", 0, fmt.Errorf("%w: empty PR url", ErrNotFound) + } + u, err := url.Parse(prURL) + if err != nil { + return "", "", 0, fmt.Errorf("%w: parse url: %w", ErrNotFound, err) + } + host := strings.ToLower(u.Host) + // Accept github.com (web) and api.github.com (REST/GraphQL). GitHub + // Enterprise hosts must end in .github.com or .ghe.io (GitHub's own + // dedicated TLDs); anything else looks like a bad URL or a different + // SCM and is rejected. + switch { + case host == "": + // Allow path-only URLs (parsePRURL is also exercised via API + // paths without a host in some tests). + case host == "github.com", host == "www.github.com", host == "api.github.com": + // canonical + case strings.HasSuffix(host, ".github.com") || strings.HasSuffix(host, ".ghe.io"): + // enterprise + default: + return "", "", 0, fmt.Errorf("%w: host %q is not a github host", ErrNotFound, host) + } + parts := strings.Split(strings.Trim(u.Path, "/"), "/") + // Web form: /owner/repo/pull/123 + if len(parts) >= 4 && (parts[2] == "pull" || parts[2] == "pulls") { + owner, repo := parts[0], parts[1] + n, err := strconv.Atoi(parts[3]) + if err != nil || n <= 0 { + return "", "", 0, fmt.Errorf("%w: bad PR number %q", ErrNotFound, parts[3]) + } + return owner, repo, n, nil + } + // API form: /repos/owner/repo/pulls/123 + if len(parts) >= 5 && parts[0] == "repos" && parts[3] == "pulls" { + owner, repo := parts[1], parts[2] + n, err := strconv.Atoi(parts[4]) + if err != nil || n <= 0 { + return "", "", 0, fmt.Errorf("%w: bad PR number %q", ErrNotFound, parts[4]) + } + return owner, repo, n, nil + } + return "", "", 0, fmt.Errorf("%w: not a github PR url: %s", ErrNotFound, prURL) +} + +func repoPath(owner, repo string, elems ...string) string { + all := append([]string{"repos", owner, repo}, elems...) + for i := range all { + all[i] = url.PathEscape(all[i]) + } + return "/" + path.Join(all...) +} + +// --------------------------------------------------------------------------- +// Small JSON-ish accessors +// --------------------------------------------------------------------------- + +func nodes(v any) []map[string]any { + a, ok := v.([]any) + if !ok { + return nil + } + out := make([]map[string]any, 0, len(a)) + for _, item := range a { + if m, ok := item.(map[string]any); ok { + out = append(out, m) + } + } + return out +} + +func str(v any) string { + if s, ok := v.(string); ok { + return s + } + return "" +} + +func boolv(v any) bool { + if b, ok := v.(bool); ok { + return b + } + return false +} + +func num(v any) float64 { + switch t := v.(type) { + case float64: + return t + case int: + return float64(t) + case int64: + return float64(t) + case json.Number: + f, _ := t.Float64() + return f + default: + return 0 + } +} + +func firstNonEmpty(a, b string) string { + if strings.TrimSpace(a) != "" { + return a + } + return b +} + +func tailLines(s string, n int) string { + s = strings.ReplaceAll(strings.TrimSpace(s), "\r\n", "\n") + if s == "" { + return "" + } + lines := strings.Split(s, "\n") + if len(lines) > n { + lines = lines[len(lines)-n:] + } + return strings.Join(lines, "\n") +} + +// scrubError keeps the error message single-line so the LogTail field +// stays a tidy one-liner instead of leaking multi-line API payloads +// into the PR row. +func scrubError(err error) string { + if err == nil { + return "" + } + msg := err.Error() + msg = strings.ReplaceAll(msg, "\n", " ") + msg = strings.ReplaceAll(msg, "\r", " ") + return strings.TrimSpace(msg) +} diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go new file mode 100644 index 00000000..82edf58a --- /dev/null +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -0,0 +1,1063 @@ +package github + +import ( + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// --------------------------------------------------------------------------- +// Test scaffolding: programmable httptest.Server with route-based dispatch. +// Tests register handlers per "METHOD path" key; unmatched requests fail +// loudly so an accidental extra call surfaces immediately. +// --------------------------------------------------------------------------- + +type recordedReq struct { + Method string + Path string + Header http.Header + Body string +} + +type fakeGH struct { + t *testing.T + server *httptest.Server + mu sync.Mutex + requests []recordedReq + handlers map[string]http.HandlerFunc +} + +func newFakeGH(t *testing.T) *fakeGH { + t.Helper() + f := &fakeGH{t: t, handlers: map[string]http.HandlerFunc{}} + f.server = httptest.NewServer(http.HandlerFunc(f.serve)) + t.Cleanup(f.server.Close) + return f +} + +// on registers a handler for one METHOD + path tuple. Path is taken +// verbatim (no query string). +func (f *fakeGH) on(method, path string, h http.HandlerFunc) { + f.mu.Lock() + defer f.mu.Unlock() + f.handlers[method+" "+path] = h +} + +func (f *fakeGH) serve(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + hdrCopy := r.Header.Clone() + f.mu.Lock() + f.requests = append(f.requests, recordedReq{Method: r.Method, Path: r.URL.Path, Header: hdrCopy, Body: string(body)}) + h, ok := f.handlers[r.Method+" "+r.URL.Path] + f.mu.Unlock() + if !ok { + f.t.Errorf("unexpected request: %s %s", r.Method, r.URL.Path) + http.Error(w, "no handler", http.StatusNotImplemented) + return + } + r.Body = io.NopCloser(strings.NewReader(string(body))) + h(w, r) +} + +func (f *fakeGH) calls() []recordedReq { + f.mu.Lock() + defer f.mu.Unlock() + out := make([]recordedReq, len(f.requests)) + copy(out, f.requests) + return out +} + +func (f *fakeGH) callsTo(method, path string) int { + n := 0 + for _, r := range f.calls() { + if r.Method == method && r.Path == path { + n++ + } + } + return n +} + +// newProviderForTest builds a Provider that talks only to the fake. +func newProviderForTest(t *testing.T, f *fakeGH) *Provider { + t.Helper() + p, err := NewProvider(ProviderOptions{ + Token: StaticTokenSource("tkn-test"), + HTTPClient: f.server.Client(), + RESTBase: f.server.URL, + GraphQLURL: f.server.URL + "/graphql", + UserAgent: "ao-scm-test", + }) + if err != nil { + t.Fatalf("NewProvider: %v", err) + } + return p +} + +func ctx() context.Context { return context.Background() } + +// --------------------------------------------------------------------------- +// Fixture builders. Each test composes a REST pull + GraphQL response so +// it can pin the exact shape it cares about without sharing global state +// with other tests. +// --------------------------------------------------------------------------- + +type prFixture struct { + owner, repo string + number int + rest map[string]any + graphql map[string]any + jobLogs map[int64]string // job_id -> log body +} + +func basePRFixture() *prFixture { + return &prFixture{ + owner: "octocat", + repo: "hello", + number: 42, + rest: map[string]any{ + "number": 42, + "title": "Found a bug", + "state": "open", + "draft": false, + "merged": false, + "merged_at": nil, + "html_url": "https://github.com/octocat/hello/pull/42", + "head": map[string]any{"ref": "feat/x", "sha": "deadbeef"}, + "base": map[string]any{"ref": "main"}, + "mergeable": true, + "rebaseable": true, + "mergeable_state": "clean", + "merge_state_status": "CLEAN", + }, + graphql: map[string]any{ + "data": map[string]any{ + "repository": map[string]any{ + "pullRequest": map[string]any{ + "number": 42, + "url": "https://github.com/octocat/hello/pull/42", + "state": "OPEN", + "isDraft": false, + "merged": false, + "closed": false, + "mergeable": "MERGEABLE", + "mergeStateStatus": "CLEAN", + "reviewDecision": "APPROVED", + "headRefOid": "deadbeef", + "commits": map[string]any{"nodes": []any{ + map[string]any{"commit": map[string]any{ + "oid": "deadbeef", + "statusCheckRollup": map[string]any{ + "state": "SUCCESS", + "contexts": map[string]any{ + "nodes": []any{ + map[string]any{ + "__typename": "CheckRun", + "name": "build", + "status": "COMPLETED", + "conclusion": "SUCCESS", + "detailsUrl": "https://github.com/octocat/hello/runs/9001", + "databaseId": float64(9001), + }, + }, + "pageInfo": map[string]any{"hasNextPage": false}, + }, + }, + }}, + }}, + "reviewThreads": map[string]any{"nodes": []any{}}, + }, + }, + }, + }, + } +} + +// install wires REST + GraphQL handlers onto the fake. +func (f *prFixture) install(t *testing.T, fake *fakeGH) { + restPath := "/repos/" + f.owner + "/" + f.repo + "/pulls/" + strconv.Itoa(f.number) + fake.on(http.MethodGet, restPath, func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", `W/"v1"`) + _ = json.NewEncoder(w).Encode(f.rest) + }) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(f.graphql) + }) + for jobID, body := range f.jobLogs { + fake.on(http.MethodGet, "/repos/"+f.owner+"/"+f.repo+"/actions/jobs/"+strconv.FormatInt(jobID, 10)+"/logs", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + _, _ = w.Write([]byte(body)) + }) + } +} + +// prData mutates the nested GraphQL pullRequest map. +func (f *prFixture) prData(mut func(pr map[string]any)) *prFixture { + repoData := f.graphql["data"].(map[string]any)["repository"].(map[string]any) + pr := repoData["pullRequest"].(map[string]any) + mut(pr) + return f +} + +func (f *prFixture) prURL() string { + return "https://github.com/" + f.owner + "/" + f.repo + "/pull/" + strconv.Itoa(f.number) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +func TestParsePRURL(t *testing.T) { + cases := []struct { + name string + url string + wantOwner string + wantRepo string + wantNumber int + wantErr bool + }{ + {"web url", "https://github.com/o/r/pull/42", "o", "r", 42, false}, + {"api url", "https://api.github.com/repos/o/r/pulls/42", "o", "r", 42, false}, + {"trailing slash", "https://github.com/o/r/pull/42/", "o", "r", 42, false}, + {"empty", "", "", "", 0, true}, + {"not github", "https://example.com/o/r/pull/1", "", "", 0, true}, + {"bad number", "https://github.com/o/r/pull/abc", "", "", 0, true}, + {"zero", "https://github.com/o/r/pull/0", "", "", 0, true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + o, r, n, err := parsePRURL(tc.url) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got %s/%s#%d", o, r, n) + } + if !errors.Is(err, ErrNotFound) { + t.Fatalf("err = %v, want wraps ErrNotFound", err) + } + return + } + if err != nil { + t.Fatalf("parse: %v", err) + } + if o != tc.wantOwner || r != tc.wantRepo || n != tc.wantNumber { + t.Fatalf("got %s/%s#%d, want %s/%s#%d", o, r, n, tc.wantOwner, tc.wantRepo, tc.wantNumber) + } + }) + } +} + +func TestObserve_HappyPath(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if !obs.Fetched { + t.Fatalf("Fetched = false; want true") + } + if obs.URL != fx.prURL() { + t.Errorf("URL = %q, want %q", obs.URL, fx.prURL()) + } + if obs.Number != 42 { + t.Errorf("Number = %d, want 42", obs.Number) + } + if obs.Draft || obs.Merged || obs.Closed { + t.Errorf("Draft/Merged/Closed = %v/%v/%v, want all false", obs.Draft, obs.Merged, obs.Closed) + } + if obs.CI != domain.CIPassing { + t.Errorf("CI = %q, want passing", obs.CI) + } + if obs.Review != domain.ReviewApproved { + t.Errorf("Review = %q, want approved", obs.Review) + } + if obs.Mergeability != domain.MergeMergeable { + t.Errorf("Mergeability = %q, want mergeable", obs.Mergeability) + } + if len(obs.Checks) != 1 { + t.Fatalf("Checks = %#v; want 1 entry", obs.Checks) + } + if obs.Checks[0].Status != domain.PRCheckPassed { + t.Errorf("Checks[0].Status = %q, want passed", obs.Checks[0].Status) + } + if obs.Checks[0].LogTail != "" { + t.Errorf("Checks[0].LogTail = %q; want empty on success", obs.Checks[0].LogTail) + } + if obs.Checks[0].CommitHash != "deadbeef" { + t.Errorf("Checks[0].CommitHash = %q; want deadbeef", obs.Checks[0].CommitHash) + } + if len(obs.Comments) != 0 { + t.Errorf("Comments = %#v; want empty", obs.Comments) + } +} + +func TestObserve_DraftPR(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.rest["draft"] = true + fx.prData(func(pr map[string]any) { pr["isDraft"] = true }) + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if !obs.Draft { + t.Errorf("Draft = false; want true") + } +} + +func TestObserve_MergedPR(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.rest["state"] = "closed" + fx.rest["merged"] = true + fx.rest["merged_at"] = "2026-05-30T12:00:00Z" + fx.prData(func(pr map[string]any) { + pr["state"] = "MERGED" + pr["merged"] = true + pr["closed"] = true + }) + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if !obs.Merged { + t.Errorf("Merged = false; want true") + } + if obs.Closed { + t.Errorf("Closed = true; want false (merged is mutually exclusive)") + } +} + +func TestObserve_ClosedNotMerged(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.rest["state"] = "closed" + fx.rest["merged"] = false + fx.rest["merged_at"] = nil + fx.prData(func(pr map[string]any) { + pr["state"] = "CLOSED" + pr["closed"] = true + }) + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if !obs.Closed { + t.Errorf("Closed = false; want true") + } + if obs.Merged { + t.Errorf("Merged = true; want false") + } +} + +func TestObserve_CIStates(t *testing.T) { + cases := []struct { + name string + nodes []any + wantCI domain.CIState + wantHead domain.PRCheckStatus + }{ + { + name: "passing", + nodes: []any{ + map[string]any{"__typename": "CheckRun", "name": "build", "status": "COMPLETED", "conclusion": "SUCCESS"}, + }, + wantCI: domain.CIPassing, + wantHead: domain.PRCheckPassed, + }, + { + name: "failing wins over passing", + nodes: []any{ + map[string]any{"__typename": "CheckRun", "name": "build", "status": "COMPLETED", "conclusion": "SUCCESS"}, + map[string]any{"__typename": "CheckRun", "name": "lint", "status": "COMPLETED", "conclusion": "FAILURE"}, + }, + wantCI: domain.CIFailing, + }, + { + name: "pending blocks passing-only", + nodes: []any{ + map[string]any{"__typename": "CheckRun", "name": "build", "status": "COMPLETED", "conclusion": "SUCCESS"}, + map[string]any{"__typename": "CheckRun", "name": "test", "status": "IN_PROGRESS"}, + }, + wantCI: domain.CIPending, + }, + { + name: "cancelled is failing", + nodes: []any{ + map[string]any{"__typename": "CheckRun", "name": "deploy", "status": "COMPLETED", "conclusion": "CANCELLED"}, + }, + wantCI: domain.CIFailing, + }, + { + name: "legacy statuscontext failure", + nodes: []any{ + map[string]any{"__typename": "StatusContext", "context": "ci/legacy", "state": "FAILURE", "targetUrl": "https://ci"}, + }, + wantCI: domain.CIFailing, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.prData(func(pr map[string]any) { + commits := pr["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["contexts"].(map[string]any)["nodes"] = tc.nodes + }) + fx.install(t, f) + p := newProviderForTest(t, f) + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if obs.CI != tc.wantCI { + t.Fatalf("CI = %q, want %q", obs.CI, tc.wantCI) + } + }) + } +} + +func TestObserve_LogTailOnFailure(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.jobLogs = map[int64]string{ + 9001: strings.Repeat("line\n", 30) + strings.Join([]string{ + "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", + "11", "12", "13", "14", "15", "16", "17", "18", "19", "FAILED-LAST", + }, "\n"), + } + fx.prData(func(pr map[string]any) { + commits := pr["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["contexts"].(map[string]any)["nodes"] = []any{ + map[string]any{ + "__typename": "CheckRun", + "name": "build", + "status": "COMPLETED", + "conclusion": "FAILURE", + "detailsUrl": "https://github.com/octocat/hello/runs/9001", + "databaseId": float64(9001), + }, + } + }) + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if obs.CI != domain.CIFailing { + t.Fatalf("CI = %q, want failing", obs.CI) + } + if len(obs.Checks) != 1 { + t.Fatalf("Checks = %#v", obs.Checks) + } + tail := obs.Checks[0].LogTail + if tail == "" { + t.Fatalf("LogTail empty; expected last %d lines", ciFailureLogTailLines) + } + lines := strings.Split(tail, "\n") + if len(lines) > ciFailureLogTailLines { + t.Fatalf("LogTail has %d lines, want ≤ %d", len(lines), ciFailureLogTailLines) + } + if !strings.Contains(tail, "FAILED-LAST") { + t.Fatalf("LogTail missing the actual tail content: %q", tail) + } +} + +func TestObserve_LogTailFetchFailureIsBestEffort(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.prData(func(pr map[string]any) { + commits := pr["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["contexts"].(map[string]any)["nodes"] = []any{ + map[string]any{ + "__typename": "CheckRun", + "name": "build", + "status": "COMPLETED", + "conclusion": "FAILURE", + "databaseId": float64(9001), + }, + } + }) + fx.install(t, f) + // Job-log endpoint returns 500 — the observation must still come back + // Fetched=true with a synthetic LogTail. + f.on(http.MethodGet, "/repos/octocat/hello/actions/jobs/9001/logs", func(w http.ResponseWriter, r *http.Request) { + http.Error(w, `{"message":"server exploded"}`, http.StatusInternalServerError) + }) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if !obs.Fetched { + t.Fatalf("Fetched = false; log-fetch failures must not flip the whole observation") + } + if got := obs.Checks[0].LogTail; !strings.HasPrefix(got, " sentinel", got) + } +} + +func TestObserve_MergeabilityStates(t *testing.T) { + cases := []struct { + name string + mutateREST func(map[string]any) + mutateGQL func(map[string]any) + want domain.Mergeability + }{ + { + name: "mergeable", + // base fixture is the happy path + mutateREST: func(m map[string]any) {}, + mutateGQL: func(m map[string]any) {}, + want: domain.MergeMergeable, + }, + { + name: "conflicting via merge_state_status=DIRTY", + mutateREST: func(m map[string]any) { + m["mergeable_state"] = "dirty" + }, + mutateGQL: func(m map[string]any) { + m["mergeable"] = "CONFLICTING" + m["mergeStateStatus"] = "DIRTY" + }, + want: domain.MergeConflicting, + }, + { + name: "blocked by review", + mutateREST: func(m map[string]any) { + m["mergeable_state"] = "blocked" + }, + mutateGQL: func(m map[string]any) { + m["mergeStateStatus"] = "BLOCKED" + m["reviewDecision"] = "CHANGES_REQUESTED" + }, + want: domain.MergeBlocked, + }, + { + name: "unstable via merge_state_status=UNSTABLE", + mutateREST: func(m map[string]any) { + m["mergeable_state"] = "unstable" + }, + mutateGQL: func(m map[string]any) { + m["mergeStateStatus"] = "UNSTABLE" + }, + want: domain.MergeUnstable, + }, + { + name: "unknown when github hasn't computed yet", + mutateREST: func(m map[string]any) { + m["mergeable"] = nil + m["mergeable_state"] = "unknown" + }, + mutateGQL: func(m map[string]any) { + m["mergeable"] = "UNKNOWN" + m["mergeStateStatus"] = "UNKNOWN" + }, + want: domain.MergeUnknown, + }, + { + // Load-bearing aa-18 contract: CI failing must force + // MergeBlocked even when GitHub still reports the rollup + // as CLEAN (mergeStateStatus has not yet flipped to + // UNSTABLE). Without this guard the LCM would think a + // failing-CI PR is ready to merge. + name: "ci failing forces blocked even when mergeStateStatus is CLEAN", + mutateREST: func(m map[string]any) { + m["mergeable_state"] = "clean" + }, + mutateGQL: func(m map[string]any) { + m["mergeable"] = "MERGEABLE" + m["mergeStateStatus"] = "CLEAN" + commits := m["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + // databaseId=0 so the provider skips the per-job log + // fetch (this test is about mergeability, not log tail). + roll["contexts"].(map[string]any)["nodes"] = []any{ + map[string]any{"__typename": "CheckRun", "name": "lint", "status": "COMPLETED", "conclusion": "FAILURE", "databaseId": float64(0)}, + } + }, + want: domain.MergeBlocked, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + tc.mutateREST(fx.rest) + fx.prData(tc.mutateGQL) + fx.install(t, f) + p := newProviderForTest(t, f) + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if obs.Mergeability != tc.want { + t.Fatalf("Mergeability = %q, want %q", obs.Mergeability, tc.want) + } + }) + } +} + +func TestObserve_ReviewDecisions(t *testing.T) { + cases := []struct { + name string + decision any + want domain.ReviewDecision + }{ + {"approved", "APPROVED", domain.ReviewApproved}, + {"changes requested", "CHANGES_REQUESTED", domain.ReviewChangesRequest}, + {"review required", "REVIEW_REQUIRED", domain.ReviewRequired}, + {"none / null", nil, domain.ReviewNone}, + {"unrecognized falls to none", "WAT", domain.ReviewNone}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.prData(func(pr map[string]any) { pr["reviewDecision"] = tc.decision }) + fx.install(t, f) + p := newProviderForTest(t, f) + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if obs.Review != tc.want { + t.Fatalf("Review = %q, want %q", obs.Review, tc.want) + } + }) + } +} + +func TestObserve_BotAuthorFiltering(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.prData(func(pr map[string]any) { + pr["reviewThreads"] = map[string]any{"nodes": []any{ + map[string]any{ + "id": "T1", + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{ + "id": "C1", + "body": "real human concern", + "path": "foo/bar.go", + "line": float64(12), + "url": "https://github.com/octocat/hello/pull/42#discussion_r1", + "author": map[string]any{"login": "alice", "__typename": "User"}, + }, + }}, + }, + // Bot thread — must be filtered out entirely. + map[string]any{ + "id": "T2", + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{ + "id": "C2", + "body": "dependabot says update", + "path": "go.mod", + "line": float64(1), + "author": map[string]any{"login": "dependabot[bot]", "__typename": "Bot"}, + }, + }}, + }, + // Resolved thread — must also be filtered out. + map[string]any{ + "id": "T3", + "isResolved": true, + "comments": map[string]any{"nodes": []any{ + map[string]any{"id": "C3", "body": "lgtm now", "author": map[string]any{"login": "bob", "__typename": "User"}}, + }}, + }, + // Login like "robothon" — must NOT be treated as a bot (aa-18 + // flagged the strings.Contains(login,"bot") fallback as a + // false-positive magnet; we use the typed signal only). + map[string]any{ + "id": "T4", + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{"id": "C4", "body": "actual comment", "path": "a.go", "line": float64(3), "author": map[string]any{"login": "robothon", "__typename": "User"}}, + }}, + }, + }} + }) + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if len(obs.Comments) != 2 { + t.Fatalf("Comments = %#v; want exactly 2 (alice + robothon)", obs.Comments) + } + authors := []string{obs.Comments[0].Author, obs.Comments[1].Author} + if !contains(authors, "alice") { + t.Errorf("missing alice's comment: %v", authors) + } + if !contains(authors, "robothon") { + t.Errorf("robothon misclassified as bot: %v", authors) + } + for _, c := range obs.Comments { + if c.Resolved { + t.Errorf("comment %q marked Resolved=true; observation set is unresolved-only", c.ID) + } + } +} + +// TestObserve_AllBotThreadsYieldsNilComments pins that a PR whose review +// threads are 100% bot-authored produces Comments == nil but a fully +// fetched observation. The PR Manager downstream must handle a nil +// Comments slice without panicking, and Fetched=true means lifecycle +// can still apply the rest of the observation. +func TestObserve_AllBotThreadsYieldsNilComments(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.prData(func(pr map[string]any) { + pr["reviewThreads"] = map[string]any{"nodes": []any{ + map[string]any{ + "id": "T-bot-only", + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{"id": "C1", "body": "auto-merged", "author": map[string]any{"login": "dependabot[bot]", "__typename": "Bot"}}, + map[string]any{"id": "C2", "body": "renovate", "author": map[string]any{"login": "renovate[bot]", "__typename": "Bot"}}, + }}, + }, + }} + }) + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if !obs.Fetched { + t.Fatalf("Fetched = false; want true even when all comments are bots") + } + if len(obs.Comments) != 0 { + t.Fatalf("Comments = %#v; want empty (all authors are bots)", obs.Comments) + } +} + +func contains(ss []string, x string) bool { + for _, s := range ss { + if s == x { + return true + } + } + return false +} + +func TestObserve_ETag304Cached(t *testing.T) { + // Second call to the REST pull endpoint must send If-None-Match and + // reuse the cached body, while still completing the rest of the + // observation (GraphQL is always re-fetched — there's no cache for it). + f := newFakeGH(t) + fx := basePRFixture() + var restHits int + restPath := "/repos/" + fx.owner + "/" + fx.repo + "/pulls/" + strconv.Itoa(fx.number) + f.on(http.MethodGet, restPath, func(w http.ResponseWriter, r *http.Request) { + restHits++ + if r.Header.Get("If-None-Match") == `W/"v1"` { + w.Header().Set("ETag", `W/"v1"`) + w.WriteHeader(http.StatusNotModified) + return + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", `W/"v1"`) + _ = json.NewEncoder(w).Encode(fx.rest) + }) + f.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(fx.graphql) + }) + p := newProviderForTest(t, f) + + first, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("first Observe: %v", err) + } + second, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("second Observe: %v", err) + } + if first.CI != second.CI || first.Mergeability != second.Mergeability { + t.Fatalf("304 replay diverged: %#v vs %#v", first, second) + } + if !second.Fetched { + t.Fatalf("second Fetched = false despite 304 hit") + } + if restHits != 2 { + t.Fatalf("expected 2 hits to the REST pull endpoint (one fresh, one 304), got %d", restHits) + } + // And: the second call must have actually sent If-None-Match. + var sentConditional bool + for _, r := range f.calls() { + if r.Method == http.MethodGet && r.Path == restPath && r.Header.Get("If-None-Match") != "" { + sentConditional = true + break + } + } + if !sentConditional { + t.Fatalf("second call did not send If-None-Match; ETag cache is broken") + } +} + +func TestObserve_PrimaryRateLimit(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + reset := time.Now().Add(2 * time.Minute).Unix() + f.on(http.MethodGet, "/repos/octocat/hello/pulls/42", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-RateLimit-Remaining", "0") + w.Header().Set("X-RateLimit-Reset", strconv.FormatInt(reset, 10)) + http.Error(w, `{"message":"API rate limit exceeded"}`, http.StatusForbidden) + }) + // GraphQL would never be reached in this scenario. + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if !errors.Is(err, ErrRateLimited) { + t.Fatalf("err = %v, want ErrRateLimited", err) + } + if obs.Fetched { + t.Fatalf("Fetched = true on rate-limit error; want false") + } + var rle *RateLimitError + if !errors.As(err, &rle) { + t.Fatalf("err = %v, want *RateLimitError", err) + } + if rle.ResetAt.Unix() != reset { + t.Fatalf("ResetAt = %d, want %d", rle.ResetAt.Unix(), reset) + } +} + +func TestObserve_SecondaryRateLimit(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + f.on(http.MethodGet, "/repos/octocat/hello/pulls/42", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "30") + http.Error(w, `{"message":"You have exceeded a secondary rate limit"}`, http.StatusForbidden) + }) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if !errors.Is(err, ErrRateLimited) { + t.Fatalf("err = %v, want ErrRateLimited", err) + } + if obs.Fetched { + t.Fatalf("Fetched = true on rate-limit error") + } + var rle *RateLimitError + if !errors.As(err, &rle) { + t.Fatalf("err = %v, want *RateLimitError", err) + } + if rle.RetryAfter != 30*time.Second { + t.Fatalf("RetryAfter = %v, want 30s", rle.RetryAfter) + } +} + +func TestObserve_AuthFailedSurfacesAsErrAuthFailed(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + f.on(http.MethodGet, "/repos/octocat/hello/pulls/42", func(w http.ResponseWriter, r *http.Request) { + http.Error(w, `{"message":"Bad credentials"}`, http.StatusUnauthorized) + }) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if !errors.Is(err, ErrAuthFailed) { + t.Fatalf("err = %v, want ErrAuthFailed", err) + } + if obs.Fetched { + t.Fatalf("Fetched = true on auth-failed; want false") + } +} + +func TestObserve_MalformedJSONIsNotFetched(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + f.on(http.MethodGet, "/repos/octocat/hello/pulls/42", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{not valid json`)) + }) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err == nil { + t.Fatalf("expected decode error, got nil") + } + if obs.Fetched { + t.Fatalf("Fetched = true on decode failure; want false") + } +} + +func TestObserve_NetworkErrorIsNotFetched(t *testing.T) { + // Point the provider at a closed server to force a transport error. + f := newFakeGH(t) + p, err := NewProvider(ProviderOptions{ + Token: StaticTokenSource("tkn"), + HTTPClient: &http.Client{Timeout: 200 * time.Millisecond}, + RESTBase: "http://127.0.0.1:1", // reserved port; refuses connections + GraphQLURL: "http://127.0.0.1:1/graphql", + }) + if err != nil { + t.Fatalf("NewProvider: %v", err) + } + obs, observeErr := p.Observe(ctx(), "https://github.com/o/r/pull/1") + if observeErr == nil { + t.Fatalf("expected network error, got nil") + } + if obs.Fetched { + t.Fatalf("Fetched = true on network error; want false") + } + // Reference f so the test linter doesn't flag it; we don't use the + // fake here but the helper is the canonical way to scope a test. + _ = f +} + +func TestObserve_TokenInjectedAsBearer(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.install(t, f) + p := newProviderForTest(t, f) + if _, err := p.Observe(ctx(), fx.prURL()); err != nil { + t.Fatalf("Observe: %v", err) + } + for _, r := range f.calls() { + if got := r.Header.Get("Authorization"); got != "Bearer tkn-test" { + t.Fatalf("Authorization header on %s %s = %q, want Bearer tkn-test", r.Method, r.Path, got) + } + } +} + +func TestStaticTokenSourceRejectsBlank(t *testing.T) { + if _, err := StaticTokenSource("").Token(context.Background()); !errors.Is(err, ErrNoToken) { + t.Fatalf("err = %v, want ErrNoToken", err) + } + if _, err := StaticTokenSource(" ").Token(context.Background()); !errors.Is(err, ErrNoToken) { + t.Fatalf("blank-with-spaces: err = %v, want ErrNoToken", err) + } +} + +func TestGHTokenSourceUsesInjectedHook(t *testing.T) { + calls := 0 + src := &GHTokenSource{ + GH: func(ctx context.Context) (string, error) { + calls++ + return "from-gh\n", nil + }, + TokenTTL: time.Hour, + } + tok, err := src.Token(context.Background()) + if err != nil { + t.Fatalf("Token: %v", err) + } + if tok != "from-gh" { + t.Fatalf("Token = %q, want %q", tok, "from-gh") + } + // Second call within TTL must be cached. + if _, err := src.Token(context.Background()); err != nil { + t.Fatalf("second Token: %v", err) + } + if calls != 1 { + t.Fatalf("GH called %d times; want 1 (cache miss only)", calls) + } + // Invalidate and the next call must re-run. + src.InvalidateToken() + if _, err := src.Token(context.Background()); err != nil { + t.Fatalf("third Token: %v", err) + } + if calls != 2 { + t.Fatalf("after invalidate, GH called %d times; want 2", calls) + } +} + +// TestObserve_StatusContextLegacyHasNoLogTail pins that we do NOT try to +// fetch a job log for a legacy commit-status row (those have no Actions +// job ID, so /actions/jobs/0/logs would 404 if we let the path leak). +func TestObserve_StatusContextLegacyHasNoLogTail(t *testing.T) { + f := newFakeGH(t) + fx := basePRFixture() + fx.prData(func(pr map[string]any) { + commits := pr["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["contexts"].(map[string]any)["nodes"] = []any{ + map[string]any{"__typename": "StatusContext", "context": "ci/legacy", "state": "FAILURE", "targetUrl": "https://ci"}, + } + }) + fx.install(t, f) + p := newProviderForTest(t, f) + + obs, err := p.Observe(ctx(), fx.prURL()) + if err != nil { + t.Fatalf("Observe: %v", err) + } + if obs.CI != domain.CIFailing { + t.Fatalf("CI = %q, want failing", obs.CI) + } + if len(obs.Checks) != 1 { + t.Fatalf("Checks = %#v", obs.Checks) + } + if obs.Checks[0].LogTail != "" { + t.Fatalf("LogTail = %q; want empty (StatusContext has no job log)", obs.Checks[0].LogTail) + } + if f.callsTo(http.MethodGet, "/repos/octocat/hello/actions/jobs/0/logs") != 0 { + t.Fatalf("unexpected attempt to fetch a /actions/jobs/0/logs URL") + } +} + +// TestObserve_AssertsPRObservationShape is a belt-and-braces compile-time +// guard that PRObservation has the fields we depend on. If the port adds +// or renames a field, this test fails to compile rather than failing at +// runtime. +func TestObserve_AssertsPRObservationShape(t *testing.T) { + var o ports.PRObservation + o.Fetched = true + o.URL = "" + o.Number = 0 + o.Draft = false + o.Merged = false + o.Closed = false + o.CI = domain.CIUnknown + o.Review = domain.ReviewNone + o.Mergeability = domain.MergeUnknown + o.Checks = nil + o.Comments = nil + _ = o +} From b463232582b7834d431772d08283788513eb63fe Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 21:21:58 +0530 Subject: [PATCH 08/17] feat(daemon): wire Session Manager + agent shim + RepoResolver + inbox messenger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #62 ("simplify session lifecycle and zellij runtime") deleted the Session Manager wiring from the daemon — every call to session.New() was removed and only the integration test still constructed one. This brings SM back, end to end: calling sm.Spawn() now launches a real Claude Code agent in a real git worktree in a real zellij session, and lifecycle nudges reach the agent via an inbox file. Four new pieces: - adapters/agent/portshim: bridges the richer adapters/agent.Agent interface (PR #65, @yyovil) onto the narrower ports.Agent the SM consumes. POSIX shell-quoting joins argv into the single string the zellij `sh -lc` wrapper expects. - adapters/workspace/gitworktree/projectresolver: gitworktree.RepoResolver backed by project.Manager. Lives in its own subpackage so gitworktree stays free of the project import (and the cycle that would create). - adapters/messenger/inbox: ports.AgentMessenger writing each message as /.ao/inbox/_.md. Symlink-safe via os.Lstat on the .ao/inbox segments. - daemon/session_wiring.go: assembles claudecode → portshim, gitworktree over projectresolver, inbox messenger over the sqlite store, and the SM itself. Reuses the existing zellij runtime / project manager / lcm singletons rather than constructing parallel copies. Daemon-wide singleton sharing (the change of behavior under #62 / #65 + this PR): - One zellij.Runtime instance services both the terminal mux and SM.Spawn. Two adapters would race on the same socket. - One lifecycle.Manager instance services both the reaper (runtime liveness observations) and the SM (spawn/restore/kill writes). Two LCMs would split agent-nudge state. - One project.Manager instance services both httpd (/api/v1/projects) and the gitworktree RepoResolver. Two stores would diverge on cached reads. - One ports.AgentMessenger services both the LCM (PR-driven reactions: CI fail, review feedback, merge conflict) and the SM (Send). - One *sqlite.Store services CDC, lifecycle, SM, and the inbox workspace lookup. Already the case; preserved. Also promotes the duplicated "agentSessionId" metadata key literal in the claudecode and codex adapters to a single agent.MetadataKeyAgentSessionID constant in adapters/agent, which the portshim now uses to populate Session.Metadata for the underlying adapter's GetRestoreCommand. What this PR does NOT do (covered by follow-ups γ/δ): - No HTTP routes for SM (POST /sessions, etc.) — γ - No `ao session new` CLI — γ - No SCM poller — δ - No codex agent wiring (claude-code only) — later - No zellij send-keys pane-ping — the agent reads its inbox on demand Tests: - portshim: 15 table-driven cases (shell quoting, env, restore propagation, error fall-through, safe-string short-circuit). - projectresolver: 4 cases (interface satisfaction, happy path, unknown project, degraded project). - inbox: 9 cases (interface satisfaction, write, dir create, two-distinct, unknown session, empty workspace path, symlinked inbox refused, empty message, filename shape). - daemon/wiring_test: SM stack constructed + sharing singletons + messenger reaches the same store via SessionMetadata.WorkspacePath end to end. go test -race / go vet / gofmt clean. The pre-existing TestSessionStreamsRealZellijPane integration test fails on this host because \$TMPDIR > 103 chars (zellij IPC socket limit) — also fails on origin/staging without these changes. Branched from origin/staging (= main + #65) so claudecode is available; PR base must be staging. Co-Authored-By: Claude Opus 4.7 --- backend/internal/adapters/agent/agent.go | 6 + .../adapters/agent/claudecode/claudecode.go | 10 +- .../agent/claudecode/claudecode_test.go | 6 +- .../internal/adapters/agent/codex/codex.go | 2 +- .../internal/adapters/agent/portshim/shim.go | 114 ++++++++++++ .../adapters/agent/portshim/shim_test.go | 164 ++++++++++++++++++ .../adapters/messenger/inbox/inbox.go | 110 ++++++++++++ .../adapters/messenger/inbox/inbox_test.go | 152 ++++++++++++++++ .../gitworktree/projectresolver/resolver.go | 47 +++++ .../projectresolver/resolver_test.go | 69 ++++++++ backend/internal/daemon/daemon.go | 30 +++- backend/internal/daemon/lifecycle_wiring.go | 15 +- backend/internal/daemon/session_wiring.go | 75 ++++++++ backend/internal/daemon/wiring_test.go | 76 ++++++++ 14 files changed, 854 insertions(+), 22 deletions(-) create mode 100644 backend/internal/adapters/agent/portshim/shim.go create mode 100644 backend/internal/adapters/agent/portshim/shim_test.go create mode 100644 backend/internal/adapters/messenger/inbox/inbox.go create mode 100644 backend/internal/adapters/messenger/inbox/inbox_test.go create mode 100644 backend/internal/adapters/workspace/gitworktree/projectresolver/resolver.go create mode 100644 backend/internal/adapters/workspace/gitworktree/projectresolver/resolver_test.go create mode 100644 backend/internal/daemon/session_wiring.go diff --git a/backend/internal/adapters/agent/agent.go b/backend/internal/adapters/agent/agent.go index 25eb9bf9..16d000a8 100644 --- a/backend/internal/adapters/agent/agent.go +++ b/backend/internal/adapters/agent/agent.go @@ -30,6 +30,12 @@ type Agent interface { SessionInfo(ctx context.Context, session SessionRef) (info SessionInfo, ok bool, err error) } +// MetadataKeyAgentSessionID is the SessionRef.Metadata key under which every +// adapter persists the native agent session id captured at launch and reads it +// back during restore. The Better-AO portshim sets it so the underlying +// adapter's GetRestoreCommand sees a unified location regardless of harness. +const MetadataKeyAgentSessionID = "agentSessionId" + // Config contains values loaded from the selected agent's config section. // Agent adapters own validation for their custom keys. type Config map[string]any diff --git a/backend/internal/adapters/agent/claudecode/claudecode.go b/backend/internal/adapters/agent/claudecode/claudecode.go index b120249b..f4b5d6be 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode.go +++ b/backend/internal/adapters/agent/claudecode/claudecode.go @@ -41,10 +41,8 @@ const ( // Normalized session-metadata keys the Claude Code hooks persist into the // Better-AO session store and SessionInfo reads back. Shared vocabulary // with the Codex adapter so the dashboard treats every agent uniformly. - // agentSessionId is also the preferred restore id. - claudeAgentSessionIDMetadataKey = "agentSessionId" - claudeTitleMetadataKey = "title" - claudeSummaryMetadataKey = "summary" + claudeTitleMetadataKey = "title" + claudeSummaryMetadataKey = "summary" ) // claudeSessionNamespace seeds the UUIDv5 derivation that maps a better-ao @@ -179,7 +177,7 @@ func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg agent.RestoreConfig) return nil, false, err } - sessionID := strings.TrimSpace(cfg.Session.Metadata[claudeAgentSessionIDMetadataKey]) + sessionID := strings.TrimSpace(cfg.Session.Metadata[agent.MetadataKeyAgentSessionID]) if sessionID == "" && cfg.Session.ID != "" { // Explicit fallback for pre-hook sessions: the id better-ao // deterministically pinned via --session-id at launch. @@ -210,7 +208,7 @@ func (p *Plugin) SessionInfo(ctx context.Context, session agent.SessionRef) (age return agent.SessionInfo{}, false, err } info := agent.SessionInfo{ - AgentSessionID: session.Metadata[claudeAgentSessionIDMetadataKey], + AgentSessionID: session.Metadata[agent.MetadataKeyAgentSessionID], Title: session.Metadata[claudeTitleMetadataKey], Summary: session.Metadata[claudeSummaryMetadataKey], } diff --git a/backend/internal/adapters/agent/claudecode/claudecode_test.go b/backend/internal/adapters/agent/claudecode/claudecode_test.go index e4be463d..3914ff64 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode_test.go +++ b/backend/internal/adapters/agent/claudecode/claudecode_test.go @@ -228,7 +228,7 @@ func TestSessionInfoReadsHookMetadata(t *testing.T) { info, ok, err := (&Plugin{resolvedBinary: "claude"}).SessionInfo(context.Background(), agent.SessionRef{ WorkspacePath: "/some/path", Metadata: map[string]string{ - claudeAgentSessionIDMetadataKey: "claude-native-1", + agent.MetadataKeyAgentSessionID: "claude-native-1", claudeTitleMetadataKey: "Fix login redirect", claudeSummaryMetadataKey: "Updated the auth callback and tests.", "ignored": "not returned", @@ -299,7 +299,7 @@ func TestGetRestoreCommandReadsAgentSessionID(t *testing.T) { Permissions: agent.PermissionModeBypassPermissions, Session: agent.SessionRef{ ID: "sess-r", - Metadata: map[string]string{claudeAgentSessionIDMetadataKey: "claude-native-1"}, + Metadata: map[string]string{agent.MetadataKeyAgentSessionID: "claude-native-1"}, }, }) if err != nil || !ok { @@ -334,7 +334,7 @@ func TestGetRestoreCommandFalseWithoutSessionID(t *testing.T) { ref agent.SessionRef }{ {"empty ref", agent.SessionRef{}}, - {"blank agent session, no id", agent.SessionRef{Metadata: map[string]string{claudeAgentSessionIDMetadataKey: " "}}}, + {"blank agent session, no id", agent.SessionRef{Metadata: map[string]string{agent.MetadataKeyAgentSessionID: " "}}}, {"workspace path only", agent.SessionRef{WorkspacePath: "/some/path"}}, } for _, tc := range cases { diff --git a/backend/internal/adapters/agent/codex/codex.go b/backend/internal/adapters/agent/codex/codex.go index a80438f6..f298c25e 100644 --- a/backend/internal/adapters/agent/codex/codex.go +++ b/backend/internal/adapters/agent/codex/codex.go @@ -20,7 +20,7 @@ import ( ) const ( - codexAgentSessionIDMetadataKey = "agentSessionId" + codexAgentSessionIDMetadataKey = agent.MetadataKeyAgentSessionID codexTitleMetadataKey = "title" codexSummaryMetadataKey = "summary" ) diff --git a/backend/internal/adapters/agent/portshim/shim.go b/backend/internal/adapters/agent/portshim/shim.go new file mode 100644 index 00000000..541fbcc7 --- /dev/null +++ b/backend/internal/adapters/agent/portshim/shim.go @@ -0,0 +1,114 @@ +// Package portshim bridges the richer adapters/agent.Agent interface onto the +// narrower ports.Agent the Session Manager consumes. The richer interface +// returns argv slices and takes a context; ports.Agent returns a single shell +// string and is context-free. The shim joins argv with POSIX shell quoting so +// the zellij runtime, which evaluates LaunchCommand under `sh -lc`, sees the +// agent's argv intact. +package portshim + +import ( + "context" + "strings" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// Shim wraps an adapters/agent.Agent and satisfies ports.Agent. The shim is +// context-free at its API surface; it threads context.Background() into the +// richer interface. That matches the existing ports.Agent shape — extending it +// is a separate change. +type Shim struct { + agent agent.Agent +} + +// New constructs a Shim. agent is required; nil is not supported. +func New(a agent.Agent) *Shim { return &Shim{agent: a} } + +var _ ports.Agent = (*Shim)(nil) + +// GetLaunchCommand asks the wrapped agent for its launch argv and renders it as +// a single POSIX-shell-safe string. An adapter error or empty argv yields "". +func (s *Shim) GetLaunchCommand(cfg ports.AgentConfig) string { + argv, err := s.agent.GetLaunchCommand(context.Background(), launchConfigFor(cfg)) + if err != nil { + return "" + } + return joinShellArgv(argv) +} + +// GetEnvironment returns nil: the richer agent interface doesn't carry the env +// keys ports.AgentConfig exposes, and the SM layers AO_SESSION_ID, +// AO_PROJECT_ID, AO_ISSUE_ID on top of whatever the agent contributes. A nil +// map is fine here — session.spawnEnv treats nil as empty. +func (s *Shim) GetEnvironment(ports.AgentConfig) map[string]string { + return nil +} + +// GetRestoreCommand resumes a native agent session given its agentSessionID and +// returns the resume command as a POSIX-shell-safe string. An adapter error or +// ok=false yields "" — the SM falls back to a fresh Spawn. +func (s *Shim) GetRestoreCommand(agentSessionID string) string { + cfg := agent.RestoreConfig{ + Session: agent.SessionRef{ + ID: agentSessionID, + Metadata: map[string]string{ + agent.MetadataKeyAgentSessionID: agentSessionID, + }, + }, + } + argv, ok, err := s.agent.GetRestoreCommand(context.Background(), cfg) + if err != nil || !ok { + return "" + } + return joinShellArgv(argv) +} + +func launchConfigFor(cfg ports.AgentConfig) agent.LaunchConfig { + return agent.LaunchConfig{ + SessionID: string(cfg.SessionID), + WorkspacePath: cfg.WorkspacePath, + Prompt: cfg.Prompt, + } +} + +// joinShellArgv renders argv as a single string the POSIX shell will re-parse +// into the same tokens. Each arg is quoted in single quotes unless it consists +// only of characters guaranteed safe to leave bare. +func joinShellArgv(argv []string) string { + if len(argv) == 0 { + return "" + } + parts := make([]string, len(argv)) + for i, a := range argv { + parts[i] = shellQuote(a) + } + return strings.Join(parts, " ") +} + +func shellQuote(s string) string { + if s == "" { + return "''" + } + if isShellSafe(s) { + return s + } + return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" +} + +// isShellSafe matches the conservative bash-completion convention: letters, +// digits, and a handful of punctuation that never trigger expansion or word +// splitting. Anything else is quoted. +func isShellSafe(s string) bool { + for _, r := range s { + switch { + case r >= 'a' && r <= 'z', + r >= 'A' && r <= 'Z', + r >= '0' && r <= '9', + r == '-', r == '_', r == '/', r == '.', r == ',', r == ':', r == '+', r == '@', r == '=': + continue + } + return false + } + return true +} diff --git a/backend/internal/adapters/agent/portshim/shim_test.go b/backend/internal/adapters/agent/portshim/shim_test.go new file mode 100644 index 00000000..2ff6f373 --- /dev/null +++ b/backend/internal/adapters/agent/portshim/shim_test.go @@ -0,0 +1,164 @@ +package portshim_test + +import ( + "context" + "errors" + "strings" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/portshim" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/session" +) + +type fakeAgent struct { + launchCmd []string + launchErr error + restoreCmd []string + restoreOK bool + restoreErr error + gotLaunchCfg agent.LaunchConfig + gotRestoreCfg agent.RestoreConfig +} + +func (f *fakeAgent) GetConfigSpec(context.Context) (agent.ConfigSpec, error) { + return agent.ConfigSpec{}, nil +} +func (f *fakeAgent) GetLaunchCommand(_ context.Context, cfg agent.LaunchConfig) ([]string, error) { + f.gotLaunchCfg = cfg + return f.launchCmd, f.launchErr +} +func (f *fakeAgent) GetPromptDeliveryStrategy(context.Context, agent.LaunchConfig) (agent.PromptDeliveryStrategy, error) { + return agent.PromptDeliveryInCommand, nil +} +func (f *fakeAgent) GetAgentHooks(context.Context, agent.WorkspaceHookConfig) error { return nil } +func (f *fakeAgent) GetRestoreCommand(_ context.Context, cfg agent.RestoreConfig) ([]string, bool, error) { + f.gotRestoreCfg = cfg + return f.restoreCmd, f.restoreOK, f.restoreErr +} +func (f *fakeAgent) SessionInfo(context.Context, agent.SessionRef) (agent.SessionInfo, bool, error) { + return agent.SessionInfo{}, false, nil +} + +func TestSatisfiesPortsAgent(t *testing.T) { + var _ ports.Agent = (*portshim.Shim)(nil) +} + +func TestGetLaunchCommand_JoinsArgvShellSafely(t *testing.T) { + tests := []struct { + name string + argv []string + want string + }{ + {"simple", []string{"claude"}, "claude"}, + {"flags and prompt", []string{"claude", "--", "do it"}, "claude -- 'do it'"}, + {"path with spaces", []string{"/Applications/My App/claude", "--flag"}, "'/Applications/My App/claude' --flag"}, + {"prompt with single quote", []string{"claude", "--", "it's fine"}, `claude -- 'it'\''s fine'`}, + {"empty argv", []string{}, ""}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + s := portshim.New(&fakeAgent{launchCmd: tc.argv}) + got := s.GetLaunchCommand(ports.AgentConfig{}) + if got != tc.want { + t.Fatalf("got %q want %q", got, tc.want) + } + }) + } +} + +func TestGetLaunchCommand_PropagatesAgentConfig(t *testing.T) { + fake := &fakeAgent{launchCmd: []string{"claude"}} + s := portshim.New(fake) + cfg := ports.AgentConfig{SessionID: "p-1", WorkspacePath: "/ws/p-1", Prompt: "hello"} + _ = s.GetLaunchCommand(cfg) + if fake.gotLaunchCfg.SessionID != "p-1" { + t.Errorf("SessionID not propagated: %+v", fake.gotLaunchCfg) + } + if fake.gotLaunchCfg.WorkspacePath != "/ws/p-1" { + t.Errorf("WorkspacePath not propagated: %+v", fake.gotLaunchCfg) + } + if fake.gotLaunchCfg.Prompt != "hello" { + t.Errorf("Prompt not propagated: %+v", fake.gotLaunchCfg) + } +} + +func TestGetLaunchCommand_AgentErrorReturnsEmpty(t *testing.T) { + fake := &fakeAgent{launchErr: errors.New("boom")} + s := portshim.New(fake) + got := s.GetLaunchCommand(ports.AgentConfig{SessionID: "p-1"}) + if got != "" { + t.Fatalf("expected empty on error, got %q", got) + } +} + +func TestGetEnvironment_ReturnsAgentEnvKeysOnly(t *testing.T) { + // The richer Agent interface doesn't carry the env keys the SM port supplies, + // so the shim has nothing agent-specific to surface. SM layers AO_* on top. + s := portshim.New(&fakeAgent{}) + got := s.GetEnvironment(ports.AgentConfig{SessionID: "p-1"}) + if len(got) != 0 { + t.Fatalf("expected empty env from shim, got %v", got) + } + for _, k := range []string{session.EnvSessionID, session.EnvProjectID, session.EnvIssueID} { + if _, ok := got[k]; ok { + t.Errorf("shim must not pre-populate AO env key %s; SM owns it", k) + } + } +} + +func TestGetRestoreCommand_JoinsWhenOK(t *testing.T) { + fake := &fakeAgent{restoreCmd: []string{"claude", "--resume", "abc 123"}, restoreOK: true} + s := portshim.New(fake) + got := s.GetRestoreCommand("abc 123") + want := `claude --resume 'abc 123'` + if got != want { + t.Fatalf("got %q want %q", got, want) + } + if fake.gotRestoreCfg.Session.ID != "abc 123" { + t.Errorf("session id not propagated: %+v", fake.gotRestoreCfg) + } +} + +func TestGetRestoreCommand_NotOKReturnsEmpty(t *testing.T) { + fake := &fakeAgent{restoreOK: false} + s := portshim.New(fake) + if got := s.GetRestoreCommand("anything"); got != "" { + t.Fatalf("expected empty when not restorable, got %q", got) + } +} + +func TestGetRestoreCommand_ErrorReturnsEmpty(t *testing.T) { + fake := &fakeAgent{restoreErr: errors.New("boom")} + s := portshim.New(fake) + if got := s.GetRestoreCommand("x"); got != "" { + t.Fatalf("expected empty on restore error, got %q", got) + } +} + +func TestGetRestoreCommand_PassesAgentSessionIDAsMetadata(t *testing.T) { + // Claude-code (and Codex) read the native session id off cfg.Session.Metadata + // ["agentSessionId"] to rebuild the --resume command. Pass it via both Session.ID + // (the legacy fallback) and Session.Metadata so the richer adapter can find it. + fake := &fakeAgent{restoreCmd: []string{"claude", "--resume", "x"}, restoreOK: true} + s := portshim.New(fake) + _ = s.GetRestoreCommand("native-uuid") + gotID := fake.gotRestoreCfg.Session.ID + if gotID != "native-uuid" { + t.Errorf("Session.ID want native-uuid, got %q", gotID) + } + if m := fake.gotRestoreCfg.Session.Metadata[agent.MetadataKeyAgentSessionID]; m != "native-uuid" { + t.Errorf("Session.Metadata[%s] want native-uuid, got %q", agent.MetadataKeyAgentSessionID, m) + } +} + +func TestShellQuotingDoesNotDoubleQuoteSafeStrings(t *testing.T) { + // Safe identifiers (letters, digits, dash, dot, slash, underscore) should + // pass through unquoted; quoting them would inflate every command. + s := portshim.New(&fakeAgent{launchCmd: []string{"/usr/local/bin/claude", "--session-id", "abc-123_xyz.uuid"}}) + got := s.GetLaunchCommand(ports.AgentConfig{}) + if strings.Contains(got, "'") { + t.Fatalf("got unexpected quotes: %q", got) + } +} diff --git a/backend/internal/adapters/messenger/inbox/inbox.go b/backend/internal/adapters/messenger/inbox/inbox.go new file mode 100644 index 00000000..5a6c06c8 --- /dev/null +++ b/backend/internal/adapters/messenger/inbox/inbox.go @@ -0,0 +1,110 @@ +// Package inbox implements ports.AgentMessenger by writing each message as a +// file in /.ao/inbox/. The agent reads its inbox on demand; +// pinging the runtime pane to consume new files is a separate concern that +// lives in the runtime adapter, not here. +package inbox + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// SessionWorkspace resolves a session id to the absolute path of its workspace. +// The sqlite store satisfies this via GetSession; the adapter is in +// daemon/session_wiring.go. +type SessionWorkspace interface { + WorkspacePath(ctx context.Context, id domain.SessionID) (string, error) +} + +// Messenger writes inbox files into per-session workspaces. +type Messenger struct { + lookup SessionWorkspace + clock func() time.Time +} + +// New builds a Messenger over the given workspace lookup. lookup is required. +func New(lookup SessionWorkspace) *Messenger { + return &Messenger{lookup: lookup, clock: time.Now} +} + +var _ ports.AgentMessenger = (*Messenger)(nil) + +// Send writes message into /.ao/inbox/_.md. +// +// Filename collisions are practically impossible: nanosecond timestamp plus an +// 8-char hash of the body. We do not retry on EEXIST. +// +// Symlink safety: if .ao or .ao/inbox already exists as a symlink, refuse. +// Otherwise os.MkdirAll creates real directories and os.WriteFile (which uses +// O_CREATE|O_WRONLY|O_TRUNC without O_NOFOLLOW) writes the message body. The +// inbox is owned by ao; a symlink there is either user misconfig or attack. +func (m *Messenger) Send(ctx context.Context, id domain.SessionID, message string) error { + ws, err := m.lookup.WorkspacePath(ctx, id) + if err != nil { + return fmt.Errorf("inbox: lookup workspace for %s: %w", id, err) + } + if ws == "" { + return fmt.Errorf("inbox: empty workspace path for %s", id) + } + + aoDir := filepath.Join(ws, ".ao") + if err := ensureRealDir(aoDir); err != nil { + return fmt.Errorf("inbox: prepare .ao for %s: %w", id, err) + } + inboxDir := filepath.Join(aoDir, "inbox") + if err := ensureRealDir(inboxDir); err != nil { + return fmt.Errorf("inbox: prepare inbox for %s: %w", id, err) + } + + name := filenameFor(m.clock(), message) + if err := os.WriteFile(filepath.Join(inboxDir, name), []byte(message), 0o644); err != nil { + return fmt.Errorf("inbox: write %s for %s: %w", name, id, err) + } + return nil +} + +// ensureRealDir creates path if missing (0755), refuses if path is a symlink. +// Lstat (not Stat) is used so a symlink isn't followed into a different tree. +// +// The workspace root itself is not Lstat-checked because gitworktree.Workspace +// resolves ManagedRoot to an absolute, symlink-free path at construction +// (gitworktree.physicalAbs); per-session workspaces under it are created by ao. +// A symlinked .ao or .ao/inbox inside an ao-owned workspace would be user +// misconfig or attack, and is the only segment that can be tampered with +// between Spawn and Send. +func ensureRealDir(path string) error { + info, err := os.Lstat(path) + switch { + case err == nil: + if info.Mode()&os.ModeSymlink != 0 { + return fmt.Errorf("%q is a symlink; refusing to follow", path) + } + if !info.IsDir() { + return fmt.Errorf("%q exists and is not a directory", path) + } + return nil + case errors.Is(err, os.ErrNotExist): + return os.MkdirAll(path, 0o755) + default: + return err + } +} + +// filenameFor builds a sortable, collision-resistant name from the timestamp +// and message body. Underscore separator keeps the timestamp's own dashes +// distinguishable from the hash prefix. +func filenameFor(t time.Time, message string) string { + sum := sha256.Sum256([]byte(message)) + hash := hex.EncodeToString(sum[:])[:8] + return strconv.FormatInt(t.UnixNano(), 10) + "_" + hash + ".md" +} diff --git a/backend/internal/adapters/messenger/inbox/inbox_test.go b/backend/internal/adapters/messenger/inbox/inbox_test.go new file mode 100644 index 00000000..f9c0235e --- /dev/null +++ b/backend/internal/adapters/messenger/inbox/inbox_test.go @@ -0,0 +1,152 @@ +package inbox_test + +import ( + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/messenger/inbox" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +func TestSatisfiesAgentMessenger(t *testing.T) { + var _ ports.AgentMessenger = (*inbox.Messenger)(nil) +} + +type fakeLookup struct { + path string + err error +} + +func (f fakeLookup) WorkspacePath(context.Context, domain.SessionID) (string, error) { + return f.path, f.err +} + +func TestSend_WritesMessageFile(t *testing.T) { + dir := t.TempDir() + m := inbox.New(fakeLookup{path: dir}) + if err := m.Send(context.Background(), "s-1", "hello agent"); err != nil { + t.Fatal(err) + } + inboxDir := filepath.Join(dir, ".ao", "inbox") + entries, err := os.ReadDir(inboxDir) + if err != nil { + t.Fatalf("inbox dir: %v", err) + } + if len(entries) != 1 { + t.Fatalf("want 1 file, got %d", len(entries)) + } + name := entries[0].Name() + if !strings.HasSuffix(name, ".md") { + t.Errorf("want .md suffix, got %q", name) + } + body, err := os.ReadFile(filepath.Join(inboxDir, name)) + if err != nil { + t.Fatal(err) + } + if string(body) != "hello agent" { + t.Errorf("body %q want %q", body, "hello agent") + } +} + +func TestSend_CreatesInboxDirIfMissing(t *testing.T) { + dir := t.TempDir() + // dir contains no .ao yet. + m := inbox.New(fakeLookup{path: dir}) + if err := m.Send(context.Background(), "s-1", "x"); err != nil { + t.Fatal(err) + } + if _, err := os.Stat(filepath.Join(dir, ".ao", "inbox")); err != nil { + t.Fatalf("inbox dir not created: %v", err) + } +} + +func TestSend_TwoSendsProduceTwoFiles(t *testing.T) { + dir := t.TempDir() + m := inbox.New(fakeLookup{path: dir}) + ctx := context.Background() + if err := m.Send(ctx, "s-1", "first"); err != nil { + t.Fatal(err) + } + if err := m.Send(ctx, "s-1", "second"); err != nil { + t.Fatal(err) + } + entries, _ := os.ReadDir(filepath.Join(dir, ".ao", "inbox")) + if len(entries) != 2 { + t.Fatalf("want 2 files, got %d", len(entries)) + } +} + +func TestSend_UnknownSessionReturnsError(t *testing.T) { + m := inbox.New(fakeLookup{err: errors.New("not found")}) + err := m.Send(context.Background(), "s-1", "x") + if err == nil { + t.Fatal("expected error when workspace lookup fails") + } + if !strings.Contains(err.Error(), "not found") { + t.Errorf("error should wrap lookup error, got %v", err) + } +} + +func TestSend_EmptyWorkspacePathReturnsError(t *testing.T) { + // A spawned-but-not-yet-mark-spawned row has WorkspacePath == "". The + // messenger must refuse rather than write into "/.ao/inbox/...". + m := inbox.New(fakeLookup{path: ""}) + if err := m.Send(context.Background(), "s-1", "x"); err == nil { + t.Fatal("expected error for empty workspace path") + } +} + +func TestSend_SymlinkedInboxIsRefused(t *testing.T) { + dir := t.TempDir() + // Create .ao/inbox as a symlink to a sibling directory. + target := t.TempDir() + if err := os.MkdirAll(filepath.Join(dir, ".ao"), 0o755); err != nil { + t.Fatal(err) + } + if err := os.Symlink(target, filepath.Join(dir, ".ao", "inbox")); err != nil { + t.Skipf("symlink not supported: %v", err) + } + m := inbox.New(fakeLookup{path: dir}) + err := m.Send(context.Background(), "s-1", "x") + if err == nil { + t.Fatal("expected refusal when inbox is a symlink") + } + if entries, _ := os.ReadDir(target); len(entries) != 0 { + t.Errorf("symlink target should not have received writes, got %d entries", len(entries)) + } +} + +func TestSend_EmptyMessageStillWritesAFile(t *testing.T) { + dir := t.TempDir() + m := inbox.New(fakeLookup{path: dir}) + if err := m.Send(context.Background(), "s-1", ""); err != nil { + t.Fatal(err) + } + entries, _ := os.ReadDir(filepath.Join(dir, ".ao", "inbox")) + if len(entries) != 1 { + t.Fatalf("want 1 file even for empty message, got %d", len(entries)) + } +} + +func TestSend_FilenameContainsTimestampAndHashPrefix(t *testing.T) { + dir := t.TempDir() + m := inbox.New(fakeLookup{path: dir}) + if err := m.Send(context.Background(), "s-1", "payload"); err != nil { + t.Fatal(err) + } + entries, _ := os.ReadDir(filepath.Join(dir, ".ao", "inbox")) + name := strings.TrimSuffix(entries[0].Name(), ".md") + // Format: _; underscore separator avoids the timestamp's own dashes. + parts := strings.SplitN(name, "_", 2) + if len(parts) != 2 { + t.Fatalf("filename should be _.md, got %q", entries[0].Name()) + } + if len(parts[1]) < 4 { + t.Errorf("hash prefix too short: %q", parts[1]) + } +} diff --git a/backend/internal/adapters/workspace/gitworktree/projectresolver/resolver.go b/backend/internal/adapters/workspace/gitworktree/projectresolver/resolver.go new file mode 100644 index 00000000..496f8adc --- /dev/null +++ b/backend/internal/adapters/workspace/gitworktree/projectresolver/resolver.go @@ -0,0 +1,47 @@ +// Package projectresolver supplies gitworktree.Workspace with a RepoResolver +// backed by the project.Manager. It lives in its own subpackage so the +// gitworktree package can stay free of the project package import (and the +// import cycle that would create if project ever depended on gitworktree). +package projectresolver + +import ( + "context" + "fmt" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/workspace/gitworktree" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/project" +) + +// Resolver maps a domain.ProjectID to its local repo path by consulting the +// project store via project.Manager. +type Resolver struct { + projects project.Manager +} + +// New builds a Resolver over the given Manager. projects is required. +func New(projects project.Manager) *Resolver { + return &Resolver{projects: projects} +} + +var _ gitworktree.RepoResolver = (*Resolver)(nil) + +// RepoPath returns the absolute repo path the project is registered against. +// A degraded project (config failed to load) and an unknown project both yield +// an error rather than the empty path that would silently mis-create worktrees. +// +// The gitworktree.RepoResolver interface is context-free, so we use +// context.Background() to call the underlying Manager. +func (r *Resolver) RepoPath(projectID domain.ProjectID) (string, error) { + res, err := r.projects.Get(context.Background(), projectID) + if err != nil { + return "", fmt.Errorf("projectresolver: lookup %q: %w", projectID, err) + } + if res.Project == nil { + return "", fmt.Errorf("projectresolver: project %q is %s; no repo path available", projectID, res.Status) + } + if res.Project.Path == "" { + return "", fmt.Errorf("projectresolver: project %q has no path", projectID) + } + return res.Project.Path, nil +} diff --git a/backend/internal/adapters/workspace/gitworktree/projectresolver/resolver_test.go b/backend/internal/adapters/workspace/gitworktree/projectresolver/resolver_test.go new file mode 100644 index 00000000..4721a529 --- /dev/null +++ b/backend/internal/adapters/workspace/gitworktree/projectresolver/resolver_test.go @@ -0,0 +1,69 @@ +package projectresolver_test + +import ( + "context" + "os/exec" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/workspace/gitworktree" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/workspace/gitworktree/projectresolver" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/project" +) + +func TestSatisfiesRepoResolver(t *testing.T) { + var _ gitworktree.RepoResolver = (*projectresolver.Resolver)(nil) +} + +func TestRepoPath_ReturnsProjectPath(t *testing.T) { + mgr := project.NewMemoryManager() + repo := mkGitRepo(t) + added, err := mgr.Add(context.Background(), project.AddInput{Path: repo}) + if err != nil { + t.Fatal(err) + } + r := projectresolver.New(mgr) + got, err := r.RepoPath(added.ID) + if err != nil { + t.Fatal(err) + } + if got != added.Path { + t.Fatalf("got %q want %q", got, added.Path) + } +} + +func TestRepoPath_UnknownProjectReturnsError(t *testing.T) { + mgr := project.NewMemoryManager() + r := projectresolver.New(mgr) + if _, err := r.RepoPath("nope"); err == nil { + t.Fatal("expected error for unknown project") + } +} + +func TestRepoPath_DegradedProjectReturnsError(t *testing.T) { + // Degraded resolves a status, not a Project — the resolver must surface an + // error rather than the empty path that would silently mis-create worktrees. + r := projectresolver.New(stubManagerDegraded{}) + _, err := r.RepoPath("p1") + if err == nil { + t.Fatal("expected error for degraded project") + } +} + +// stubManagerDegraded only overrides Get; other Manager methods would panic if +// reached, which they should not in this test. +type stubManagerDegraded struct{ project.Manager } + +func (stubManagerDegraded) Get(context.Context, domain.ProjectID) (project.GetResult, error) { + return project.GetResult{Status: "degraded"}, nil +} + +func mkGitRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + cmd := exec.Command("git", "init", "-q", dir) + if err := cmd.Run(); err != nil { + t.Skipf("git not available: %v", err) + } + return dir +} diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index b8d89053..626656f5 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -11,6 +11,7 @@ import ( "os/signal" "syscall" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/messenger/inbox" "github.com/aoagents/agent-orchestrator/backend/internal/adapters/runtime/zellij" "github.com/aoagents/agent-orchestrator/backend/internal/config" "github.com/aoagents/agent-orchestrator/backend/internal/httpd" @@ -58,15 +59,21 @@ func Run() error { return err } + // Singletons shared across the daemon. Constructing each exactly once and + // passing the same instance everywhere prevents the multi-zellij-socket / + // dual-LCM / dual-project-store hazards that fragmented adapters create. + runtimeAdapter := zellij.New(zellij.Options{}) + projects := project.NewManager(store) + messenger := inbox.New(newStoreWorkspaceLookup(store)) + // Terminal streaming: the Zellij runtime supplies the PTY-attach command and // liveness; the CDC broadcaster feeds the session-state channel. The manager // is handed to httpd, which mounts it at /mux. Raw PTY bytes never flow // through the CDC change_log — only session-state events do. - runtimeAdapter := zellij.New(zellij.Options{}) termMgr := terminal.NewManager(runtimeAdapter, cdcPipe.Broadcaster, log) defer termMgr.Close() - srv, err := httpd.NewWithDeps(cfg, log, termMgr, httpd.APIDeps{Projects: project.NewManager(store)}) + srv, err := httpd.NewWithDeps(cfg, log, termMgr, httpd.APIDeps{Projects: projects}) if err != nil { stop() if cdcErr := cdcPipe.Stop(); cdcErr != nil { @@ -75,10 +82,21 @@ func Run() error { return err } - // Bring up the Lifecycle Manager and the reaper. This makes the session - // lifecycle write path live end-to-end: reducer write -> store -> DB trigger - // -> change_log -> poller -> broadcaster. - lcStack := startLifecycle(ctx, store, runtimeAdapter, log) + // Bring up the Lifecycle Manager + reaper, then the Session Manager stack + // over the same lcm/runtime/projects/messenger singletons. SM has no HTTP + // routes yet — they land in a follow-up PR; constructing it here lets the + // next PR hang controllers off ss.sm without further wiring changes. + lcStack := startLifecycle(ctx, store, runtimeAdapter, messenger, log) + ss, err := buildSessionStack(cfg, store, runtimeAdapter, projects, lcStack.lcm, messenger) + if err != nil { + stop() + lcStack.Stop() + if cdcErr := cdcPipe.Stop(); cdcErr != nil { + log.Error("cdc pipeline shutdown", "err", cdcErr) + } + return err + } + _ = ss // sm: HTTP routes land in a follow-up PR (γ) runErr := srv.Run(ctx) diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 5c04002d..23071cb9 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -10,18 +10,21 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" ) -// lifecycleStack owns the runtime reaper goroutine started with the lifecycle -// reducer. The reducer itself is only used for wiring observations into storage. +// lifecycleStack owns the Lifecycle Manager (which the Session Manager and the +// reaper both depend on) and the reaper goroutine. type lifecycleStack struct { + lcm *lifecycle.Manager reaperDone <-chan struct{} } // startLifecycle constructs the Lifecycle Manager over the store and starts the -// reaper. The goroutine stops when ctx is cancelled; Stop waits for it to drain. -func startLifecycle(ctx context.Context, store *sqlite.Store, runtime ports.Runtime, logger *slog.Logger) *lifecycleStack { - lcm := lifecycle.New(store, nil) +// reaper. The messenger is passed into the LCM so PR-driven reactions (CI fail, +// review feedback, merge conflict) can nudge the agent. The goroutine stops +// when ctx is cancelled; Stop waits for it to drain. +func startLifecycle(ctx context.Context, store *sqlite.Store, runtime ports.Runtime, messenger ports.AgentMessenger, logger *slog.Logger) *lifecycleStack { + lcm := lifecycle.New(store, messenger) rp := reaper.New(lcm, store, runtime, reaper.Config{Logger: logger}) - return &lifecycleStack{reaperDone: rp.Start(ctx)} + return &lifecycleStack{lcm: lcm, reaperDone: rp.Start(ctx)} } // Stop waits for the reaper goroutine to exit. The caller must cancel the ctx diff --git a/backend/internal/daemon/session_wiring.go b/backend/internal/daemon/session_wiring.go new file mode 100644 index 00000000..61dd25a3 --- /dev/null +++ b/backend/internal/daemon/session_wiring.go @@ -0,0 +1,75 @@ +package daemon + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/claudecode" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/portshim" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/messenger/inbox" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/workspace/gitworktree" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/workspace/gitworktree/projectresolver" + "github.com/aoagents/agent-orchestrator/backend/internal/config" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/lifecycle" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" + "github.com/aoagents/agent-orchestrator/backend/internal/session" + "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" +) + +// sessionStack groups the per-session collaborators the daemon assembles around +// the Session Manager. HTTP routes that expose SM operations land in a +// follow-up PR; this PR just constructs the stack so the next one can hang +// routes off it. +type sessionStack struct { + sm *session.Manager + workspace ports.Workspace + messenger ports.AgentMessenger +} + +// buildSessionStack assembles the session-runtime stack: gitworktree workspace +// over a project-store-backed RepoResolver, claudecode-via-portshim agent, +// inbox-file AgentMessenger, and the Session Manager itself. The runtime, lcm, +// projects, and store passed in are the same instances the rest of the daemon +// uses, so there is one source of truth per collaborator. +func buildSessionStack(cfg config.Config, store *sqlite.Store, runtime ports.Runtime, projects project.Manager, lcm *lifecycle.Manager, messenger ports.AgentMessenger) (*sessionStack, error) { + ws, err := gitworktree.New(gitworktree.Options{ + ManagedRoot: filepath.Join(cfg.DataDir, "worktrees"), + RepoResolver: projectresolver.New(projects), + }) + if err != nil { + return nil, fmt.Errorf("gitworktree: %w", err) + } + sm := session.New(session.Deps{ + Runtime: runtime, + Agent: portshim.New(claudecode.New()), + Workspace: ws, + Store: store, + Messenger: messenger, + Lifecycle: lcm, + }) + return &sessionStack{sm: sm, workspace: ws, messenger: messenger}, nil +} + +// storeWorkspaceLookup adapts the sqlite store to the SessionWorkspace lookup +// the inbox messenger needs. WorkspacePath becomes meaningful only after the +// LCM records spawn metadata, so a session that exists but has no path is an +// error — Send must not invent a destination. +type storeWorkspaceLookup struct{ store *sqlite.Store } + +func newStoreWorkspaceLookup(store *sqlite.Store) inbox.SessionWorkspace { + return storeWorkspaceLookup{store: store} +} + +func (s storeWorkspaceLookup) WorkspacePath(ctx context.Context, id domain.SessionID) (string, error) { + rec, ok, err := s.store.GetSession(ctx, id) + if err != nil { + return "", err + } + if !ok { + return "", fmt.Errorf("session %s not found", id) + } + return rec.Metadata.WorkspacePath, nil +} diff --git a/backend/internal/daemon/wiring_test.go b/backend/internal/daemon/wiring_test.go index 6d6dae04..81da6931 100644 --- a/backend/internal/daemon/wiring_test.go +++ b/backend/internal/daemon/wiring_test.go @@ -2,11 +2,16 @@ package daemon import ( "context" + "os" + "path/filepath" "sync" "testing" "time" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/messenger/inbox" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/runtime/zellij" "github.com/aoagents/agent-orchestrator/backend/internal/cdc" + "github.com/aoagents/agent-orchestrator/backend/internal/config" "github.com/aoagents/agent-orchestrator/backend/internal/domain" "github.com/aoagents/agent-orchestrator/backend/internal/lifecycle" "github.com/aoagents/agent-orchestrator/backend/internal/ports" @@ -69,3 +74,74 @@ func TestWiring_WriteFlowsToBroadcaster(t *testing.T) { t.Fatalf("expected a change_log event for %s to reach the broadcaster, got %d events", rec.ID, len(got)) } } + +// TestWiring_SessionStackSharesSingletons asserts the daemon's wiring shape: +// startLifecycle and buildSessionStack share the same messenger and LCM, and +// the messenger reaches the same store the SM reads. Two LCMs would split +// agent-nudge state; two messengers would route inbox writes inconsistently. +// +// The pointer-identity check on ss.messenger proves buildSessionStack does not +// silently construct a second messenger; the end-to-end Send through a row the +// store owns proves the storeWorkspaceLookup is the same store SM uses. +func TestWiring_SessionStackSharesSingletons(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + store, err := sqlite.Open(t.TempDir()) + if err != nil { + t.Fatal(err) + } + defer store.Close() + cfg := config.Config{DataDir: t.TempDir()} + + projects := project.NewManager(store) + runtime := zellij.New(zellij.Options{}) + messenger := inbox.New(newStoreWorkspaceLookup(store)) + lcStack := startLifecycle(ctx, store, runtime, messenger, nil) + // Cancel-then-Stop in order: Stop drains the reaper goroutine, which only + // exits when ctx is cancelled. A naive `defer cancel(); defer lcStack.Stop()` + // reverses this (defer is LIFO) and deadlocks. + t.Cleanup(func() { + cancel() + lcStack.Stop() + }) + + if lcStack.lcm == nil { + t.Fatal("lifecycleStack must expose its LCM so the SM can share it") + } + ss, err := buildSessionStack(cfg, store, runtime, projects, lcStack.lcm, messenger) + if err != nil { + t.Fatalf("buildSessionStack: %v", err) + } + if ss.sm == nil || ss.workspace == nil || ss.messenger == nil { + t.Fatal("session stack must be fully populated") + } + if ss.messenger != messenger { + t.Error("buildSessionStack must reuse the messenger it is given, not construct a second one") + } + + // End-to-end: a session row in the shared store should be reachable through + // the messenger that buildSessionStack wired up. A second store would + // surface as "session not found" here. + if err := store.Upsert(ctx, project.Row{ID: "p", Path: "/repo/p", RegisteredAt: time.Now()}); err != nil { + t.Fatal(err) + } + workspaceDir := t.TempDir() + rec, err := store.CreateSession(ctx, domain.SessionRecord{ + ProjectID: "p", Kind: domain.KindWorker, + Activity: domain.Activity{State: domain.ActivityIdle, LastActivityAt: time.Now()}, + Metadata: domain.SessionMetadata{WorkspacePath: workspaceDir}, + }) + if err != nil { + t.Fatal(err) + } + if err := ss.messenger.Send(ctx, rec.ID, "hello"); err != nil { + t.Fatalf("messenger.Send through shared store lookup: %v", err) + } + entries, err := os.ReadDir(filepath.Join(workspaceDir, ".ao", "inbox")) + if err != nil { + t.Fatalf("inbox dir: %v", err) + } + if len(entries) != 1 { + t.Fatalf("want 1 inbox file, got %d", len(entries)) + } +} From 6bd45597eb2d69775095e92e4ddc5cfd2ad46be0 Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 21:46:55 +0530 Subject: [PATCH 09/17] feat(ao): `ao spawn` CLI + POST /api/v1/sessions route --- backend/internal/cli/root.go | 1 + backend/internal/cli/spawn.go | 128 ++++++++++ backend/internal/cli/spawn_test.go | 230 ++++++++++++++++++ backend/internal/daemon/daemon.go | 16 +- backend/internal/httpd/api.go | 7 + .../internal/httpd/controllers/sessions.go | 110 +++++++++ .../httpd/controllers/sessions_test.go | 193 +++++++++++++++ backend/internal/session/spawner.go | 17 ++ 8 files changed, 694 insertions(+), 8 deletions(-) create mode 100644 backend/internal/cli/spawn.go create mode 100644 backend/internal/cli/spawn_test.go create mode 100644 backend/internal/httpd/controllers/sessions.go create mode 100644 backend/internal/httpd/controllers/sessions_test.go create mode 100644 backend/internal/session/spawner.go diff --git a/backend/internal/cli/root.go b/backend/internal/cli/root.go index ce015738..9dfd49f4 100644 --- a/backend/internal/cli/root.go +++ b/backend/internal/cli/root.go @@ -146,6 +146,7 @@ func NewRootCommand(deps Deps) *cobra.Command { root.AddCommand(newStartCommand(ctx)) root.AddCommand(newStopCommand(ctx)) root.AddCommand(newStatusCommand(ctx)) + root.AddCommand(newSpawnCommand(ctx)) root.AddCommand(newDoctorCommand(ctx)) root.AddCommand(newCompletionCommand()) root.AddCommand(newVersionCommand()) diff --git a/backend/internal/cli/spawn.go b/backend/internal/cli/spawn.go new file mode 100644 index 00000000..ba8ea496 --- /dev/null +++ b/backend/internal/cli/spawn.go @@ -0,0 +1,128 @@ +package cli + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + + "github.com/spf13/cobra" + + "github.com/aoagents/agent-orchestrator/backend/internal/config" + "github.com/aoagents/agent-orchestrator/backend/internal/runfile" +) + +type spawnOptions struct { + project string + prompt string + agent string +} + +func newSpawnCommand(ctx *commandContext) *cobra.Command { + var opts spawnOptions + cmd := &cobra.Command{ + Use: "spawn", + Short: "Spawn a new agent session", + Args: noArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + return ctx.spawnSession(cmd.Context(), cmd.OutOrStdout(), opts) + }, + } + cmd.Flags().StringVar(&opts.prompt, "prompt", "", "Initial prompt for the agent") + cmd.Flags().StringVar(&opts.project, "project", "", "Project id") + cmd.Flags().StringVar(&opts.agent, "agent", "claude-code", "Agent plugin") + return cmd +} + +type spawnAPIRequest struct { + ProjectID string `json:"projectId"` + Prompt string `json:"prompt"` + Agent string `json:"agent,omitempty"` +} + +type spawnAPIResponse struct { + SessionID string `json:"sessionId"` + WorkspacePath string `json:"workspacePath"` + RuntimeHandle string `json:"runtimeHandle"` +} + +type apiError struct { + Kind string `json:"error"` + Code string `json:"code"` + Message string `json:"message"` +} + +func (c *commandContext) spawnSession(ctx context.Context, out io.Writer, opts spawnOptions) error { + prompt := strings.TrimSpace(opts.prompt) + if prompt == "" { + return usageError{errors.New("usage: --prompt is required")} + } + project := strings.TrimSpace(opts.project) + if project == "" { + return usageError{errors.New("usage: --project is required")} + } + + cfg, err := config.Load() + if err != nil { + return err + } + + info, err := runfile.Read(cfg.RunFilePath) + if err != nil { + return fmt.Errorf("read run-file: %w", err) + } + if info == nil { + return errors.New("AO daemon is not running; start it with `ao start`") + } + + payload := spawnAPIRequest{ + ProjectID: project, + Prompt: prompt, + Agent: opts.agent, + } + body, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("encode request: %w", err) + } + + url := fmt.Sprintf("http://%s:%d/api/v1/sessions", config.LoopbackHost, info.Port) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.deps.HTTPClient.Do(req) + if err != nil { + return fmt.Errorf("daemon request: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("read response: %w", err) + } + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + var ok spawnAPIResponse + if err := json.Unmarshal(respBody, &ok); err != nil { + return fmt.Errorf("decode response: %w", err) + } + _, err := fmt.Fprintf(out, "Spawned session %s in %s\nAttach: zellij attach %s\n", + ok.SessionID, ok.WorkspacePath, ok.RuntimeHandle) + return err + } + + // Non-2xx: surface the server's error envelope when present, otherwise the + // raw status. Both 4xx and 5xx exit 1; usage errors (which exit 2) come from + // flag validation above. + var apiErr apiError + if jerr := json.Unmarshal(respBody, &apiErr); jerr == nil && apiErr.Kind != "" { + return fmt.Errorf("%s: %s", apiErr.Kind, apiErr.Message) + } + return fmt.Errorf("daemon returned HTTP %d", resp.StatusCode) +} diff --git a/backend/internal/cli/spawn_test.go b/backend/internal/cli/spawn_test.go new file mode 100644 index 00000000..2638a4d4 --- /dev/null +++ b/backend/internal/cli/spawn_test.go @@ -0,0 +1,230 @@ +package cli + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + "strings" + "testing" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/runfile" +) + +// spawnServer wires up an httptest server, writes a runfile pointing at it, and +// returns the captured request body slot the caller assertions can read. +func spawnServer(t *testing.T, status int, respBody string) (*httptest.Server, *string) { + t.Helper() + var captured string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/api/v1/sessions" && r.Method == http.MethodPost { + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatalf("read req body: %v", err) + } + captured = string(body) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _, _ = io.WriteString(w, respBody) + return + } + http.NotFound(w, r) + })) + t.Cleanup(srv.Close) + return srv, &captured +} + +func writeRunFileFor(t *testing.T, cfg testConfig, srv *httptest.Server) { + t.Helper() + port := serverPort(t, srv.URL) + if err := runfile.Write(cfg.runFile, runfile.Info{ + PID: os.Getpid(), + Port: port, + StartedAt: time.Unix(100, 0).UTC(), + }); err != nil { + t.Fatal(err) + } +} + +func TestSpawn_Success(t *testing.T) { + cfg := setConfigEnv(t) + resp := `{"sessionId":"demo-1","workspacePath":"/tmp/demo-1","runtimeHandle":"zellij-demo-1"}` + srv, captured := spawnServer(t, http.StatusCreated, resp) + writeRunFileFor(t, cfg, srv) + + out, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "do the thing", "--agent", "claude-code") + if err != nil { + t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut) + } + if !strings.Contains(out, "Spawned session demo-1 in /tmp/demo-1") { + t.Fatalf("stdout missing spawn line:\n%s", out) + } + if !strings.Contains(out, "Attach: zellij attach zellij-demo-1") { + t.Fatalf("stdout missing attach line:\n%s", out) + } + + var req struct { + ProjectID string `json:"projectId"` + Prompt string `json:"prompt"` + Agent string `json:"agent"` + } + if err := json.Unmarshal([]byte(*captured), &req); err != nil { + t.Fatalf("decode captured req: %v\nbody=%s", err, *captured) + } + if req.ProjectID != "demo" || req.Prompt != "do the thing" || req.Agent != "claude-code" { + t.Fatalf("captured payload = %#v", req) + } +} + +func TestSpawn_DefaultsAgent(t *testing.T) { + cfg := setConfigEnv(t) + srv, captured := spawnServer(t, http.StatusCreated, + `{"sessionId":"demo-1","workspacePath":"/tmp/demo-1","runtimeHandle":"zellij-demo-1"}`) + writeRunFileFor(t, cfg, srv) + + _, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err != nil { + t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut) + } + if !strings.Contains(*captured, `"agent":"claude-code"`) { + t.Fatalf("agent default not sent: %s", *captured) + } +} + +func TestSpawn_EmptyPromptIsUsageError(t *testing.T) { + setConfigEnv(t) + _, _, err := executeCLI(t, Deps{}, "spawn", "--project", "demo", "--prompt", " ") + if err == nil { + t.Fatal("expected usage error for empty prompt") + } + if got := ExitCode(err); got != 2 { + t.Fatalf("exit code = %d, want 2", got) + } + if !strings.Contains(err.Error(), "--prompt is required") { + t.Fatalf("error missing usage message: %v", err) + } +} + +func TestSpawn_MissingProjectIsUsageError(t *testing.T) { + setConfigEnv(t) + _, _, err := executeCLI(t, Deps{}, "spawn", "--prompt", "x") + if err == nil { + t.Fatal("expected usage error for missing project") + } + if got := ExitCode(err); got != 2 { + t.Fatalf("exit code = %d, want 2", got) + } +} + +func TestSpawn_ServerBadRequestExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusBadRequest, + `{"error":"bad_request","code":"PROMPT_REQUIRED","message":"prompt is required"}`) + writeRunFileFor(t, cfg, srv) + + _, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected runtime error from 400") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } + if !strings.Contains(err.Error(), "bad_request") && !strings.Contains(errOut, "bad_request") { + t.Fatalf("error did not include server kind: %v\nstderr=%s", err, errOut) + } +} + +func TestSpawn_ServerNotFoundExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusNotFound, + `{"error":"not_found","code":"PROJECT_NOT_FOUND","message":"Unknown project"}`) + writeRunFileFor(t, cfg, srv) + + _, _, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "missing", "--prompt", "x") + if err == nil { + t.Fatal("expected runtime error from 404") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } +} + +func TestSpawn_ServerInternalErrorExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusInternalServerError, + `{"error":"internal","code":"SPAWN_FAILED","message":"Failed to spawn session"}`) + writeRunFileFor(t, cfg, srv) + + _, _, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected runtime error from 500") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } +} + +func TestSpawn_DaemonNotRunningExits1(t *testing.T) { + setConfigEnv(t) + // No runfile: daemon is stopped. + _, _, err := executeCLI(t, Deps{}, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected error when daemon is not running") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } +} + +func TestSpawn_SessionsDisabledExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusServiceUnavailable, + `{"error":"sessions_disabled","code":"SESSIONS_DISABLED","message":"Session Manager is not wired in this daemon"}`) + writeRunFileFor(t, cfg, srv) + + _, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected error from 503") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } + if !strings.Contains(err.Error(), "sessions_disabled") && !strings.Contains(errOut, "sessions_disabled") { + t.Fatalf("error did not include sessions_disabled: %v\nstderr=%s", err, errOut) + } +} + +// Sanity helper: ensure the formatted spawn message is stable. +func TestSpawn_StdoutShape(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusCreated, fmt.Sprintf( + `{"sessionId":%q,"workspacePath":%q,"runtimeHandle":%q}`, + "proj-7", "/tmp/proj-7", "zellij-proj-7")) + writeRunFileFor(t, cfg, srv) + + out, _, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "proj", "--prompt", "go") + if err != nil { + t.Fatal(err) + } + want := "Spawned session proj-7 in /tmp/proj-7\nAttach: zellij attach zellij-proj-7\n" + if out != want { + t.Fatalf("stdout mismatch:\n got %q\n want %q", out, want) + } +} diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index 626656f5..59eae3f8 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -73,21 +73,22 @@ func Run() error { termMgr := terminal.NewManager(runtimeAdapter, cdcPipe.Broadcaster, log) defer termMgr.Close() - srv, err := httpd.NewWithDeps(cfg, log, termMgr, httpd.APIDeps{Projects: projects}) + // Bring up the Lifecycle Manager + reaper, then the Session Manager stack + // over the same lcm/runtime/projects/messenger singletons. SM is constructed + // before the HTTP server so its Spawner can be plumbed into APIDeps and the + // /api/v1/sessions controller can drive it. + lcStack := startLifecycle(ctx, store, runtimeAdapter, messenger, log) + ss, err := buildSessionStack(cfg, store, runtimeAdapter, projects, lcStack.lcm, messenger) if err != nil { stop() + lcStack.Stop() if cdcErr := cdcPipe.Stop(); cdcErr != nil { log.Error("cdc pipeline shutdown", "err", cdcErr) } return err } - // Bring up the Lifecycle Manager + reaper, then the Session Manager stack - // over the same lcm/runtime/projects/messenger singletons. SM has no HTTP - // routes yet — they land in a follow-up PR; constructing it here lets the - // next PR hang controllers off ss.sm without further wiring changes. - lcStack := startLifecycle(ctx, store, runtimeAdapter, messenger, log) - ss, err := buildSessionStack(cfg, store, runtimeAdapter, projects, lcStack.lcm, messenger) + srv, err := httpd.NewWithDeps(cfg, log, termMgr, httpd.APIDeps{Projects: projects, Sessions: ss.sm}) if err != nil { stop() lcStack.Stop() @@ -96,7 +97,6 @@ func Run() error { } return err } - _ = ss // sm: HTTP routes land in a follow-up PR (γ) runErr := srv.Run(ctx) diff --git a/backend/internal/httpd/api.go b/backend/internal/httpd/api.go index 9480cdad..78bfc6f8 100644 --- a/backend/internal/httpd/api.go +++ b/backend/internal/httpd/api.go @@ -11,6 +11,7 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/httpd/controllers" "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" "github.com/aoagents/agent-orchestrator/backend/internal/project" + "github.com/aoagents/agent-orchestrator/backend/internal/session" ) // APIDeps bundles every Manager the API layer's controllers depend on. @@ -19,6 +20,7 @@ import ( // registered but returns the OpenAPI-backed 501 response. type APIDeps struct { Projects project.Manager + Sessions session.Spawner } // API owns one controller per resource and is the single Register call the @@ -26,6 +28,7 @@ type APIDeps struct { type API struct { cfg config.Config projects *controllers.ProjectsController + sessions *controllers.SessionsController } // NewAPI constructs the API surface from its dependencies. cfg carries the @@ -37,6 +40,9 @@ func NewAPI(cfg config.Config, deps APIDeps) *API { projects: &controllers.ProjectsController{ Mgr: deps.Projects, }, + sessions: &controllers.SessionsController{ + Mgr: deps.Sessions, + }, } } @@ -55,6 +61,7 @@ func (a *API) Register(root chi.Router) { r.Group(func(r chi.Router) { r.Use(middleware.Timeout(timeout)) a.projects.Register(r) + a.sessions.Register(r) // Sibling REST controllers plug in here. }) // Surfaces that intentionally bypass the REST timeout register at this level. diff --git a/backend/internal/httpd/controllers/sessions.go b/backend/internal/httpd/controllers/sessions.go new file mode 100644 index 00000000..42c83b77 --- /dev/null +++ b/backend/internal/httpd/controllers/sessions.go @@ -0,0 +1,110 @@ +package controllers + +import ( + "encoding/json" + "errors" + "net/http" + "strings" + + "github.com/go-chi/chi/v5" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" + "github.com/aoagents/agent-orchestrator/backend/internal/session" +) + +// SessionsController owns the /sessions routes. Mgr nil means the Session +// Manager has not been wired into the daemon yet; the controller answers 503 +// "sessions_disabled" so the CLI gets an actionable signal instead of a panic. +type SessionsController struct { + Mgr session.Spawner +} + +// Register mounts the sessions routes on the supplied router. +func (c *SessionsController) Register(r chi.Router) { + r.Post("/sessions", c.spawn) +} + +type spawnRequest struct { + ProjectID string `json:"projectId"` + Prompt string `json:"prompt"` + Agent string `json:"agent,omitempty"` +} + +type spawnResponse struct { + SessionID string `json:"sessionId"` + WorkspacePath string `json:"workspacePath"` + RuntimeHandle string `json:"runtimeHandle"` +} + +func (c *SessionsController) spawn(w http.ResponseWriter, r *http.Request) { + if c.Mgr == nil { + envelope.WriteJSON(w, http.StatusServiceUnavailable, map[string]any{ + "error": "sessions_disabled", + "code": "SESSIONS_DISABLED", + "message": "Session Manager is not wired in this daemon", + }) + return + } + + var in spawnRequest + if err := json.NewDecoder(r.Body).Decode(&in); err != nil { + envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "INVALID_JSON", "Invalid JSON body", nil) + return + } + projectID := strings.TrimSpace(in.ProjectID) + prompt := strings.TrimSpace(in.Prompt) + if projectID == "" { + envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "PROJECT_ID_REQUIRED", "projectId is required", nil) + return + } + if prompt == "" { + envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "PROMPT_REQUIRED", "prompt is required", nil) + return + } + + harness := domain.AgentHarness(strings.TrimSpace(in.Agent)) + if harness == "" { + harness = domain.HarnessClaudeCode + } + + sess, err := c.Mgr.Spawn(r.Context(), ports.SpawnConfig{ + ProjectID: domain.ProjectID(projectID), + Kind: domain.KindWorker, + Harness: harness, + Prompt: prompt, + }) + if err != nil { + writeSpawnError(w, r, err) + return + } + + envelope.WriteJSON(w, http.StatusCreated, spawnResponse{ + SessionID: string(sess.ID), + WorkspacePath: sess.Metadata.WorkspacePath, + RuntimeHandle: sess.Metadata.RuntimeHandleID, + }) +} + +// writeSpawnError maps an SM-returned error to the right HTTP status. A +// project.Error in the chain (most commonly "unknown project" from the +// projectresolver) becomes 404; anything else surfaces as 500 SPAWN_FAILED. +func writeSpawnError(w http.ResponseWriter, r *http.Request, err error) { + var pe *project.Error + if errors.As(err, &pe) { + status := http.StatusInternalServerError + switch pe.Kind { + case "bad_request": + status = http.StatusBadRequest + case "not_found": + status = http.StatusNotFound + case "conflict": + status = http.StatusConflict + } + envelope.WriteAPIError(w, r, status, pe.Kind, pe.Code, pe.Message, pe.Details) + return + } + envelope.WriteAPIError(w, r, http.StatusInternalServerError, "internal", "SPAWN_FAILED", "Failed to spawn session", nil) +} diff --git a/backend/internal/httpd/controllers/sessions_test.go b/backend/internal/httpd/controllers/sessions_test.go new file mode 100644 index 00000000..580481e3 --- /dev/null +++ b/backend/internal/httpd/controllers/sessions_test.go @@ -0,0 +1,193 @@ +package controllers_test + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "sync" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/config" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/httpd" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" +) + +// fakeSpawner records the SpawnConfig it was called with and returns the +// canned Session/error. It satisfies session.Spawner. +type fakeSpawner struct { + mu sync.Mutex + calls []ports.SpawnConfig + session domain.Session + err error +} + +func (f *fakeSpawner) Spawn(_ context.Context, cfg ports.SpawnConfig) (domain.Session, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.calls = append(f.calls, cfg) + if f.err != nil { + return domain.Session{}, f.err + } + return f.session, nil +} + +func (f *fakeSpawner) recorded() []ports.SpawnConfig { + f.mu.Lock() + defer f.mu.Unlock() + out := make([]ports.SpawnConfig, len(f.calls)) + copy(out, f.calls) + return out +} + +func sessionsServer(t *testing.T, spawner *fakeSpawner) *httptest.Server { + t.Helper() + log := slog.New(slog.NewTextHandler(io.Discard, nil)) + deps := httpd.APIDeps{} + if spawner != nil { + deps.Sessions = spawner + } + srv := httptest.NewServer(httpd.NewRouterWithAPI(config.Config{}, log, nil, deps)) + t.Cleanup(srv.Close) + return srv +} + +func TestSessionsAPI_Spawn_Success(t *testing.T) { + spawner := &fakeSpawner{ + session: domain.Session{ + SessionRecord: domain.SessionRecord{ + ID: "demo-1", + ProjectID: "demo", + Kind: domain.KindWorker, + Harness: domain.HarnessClaudeCode, + Metadata: domain.SessionMetadata{ + WorkspacePath: "/tmp/demo-1", + RuntimeHandleID: "zellij-demo-1", + }, + }, + }, + } + srv := sessionsServer(t, spawner) + + body, status, headers := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"do the thing","agent":"claude-code"}`) + if status != http.StatusCreated { + t.Fatalf("status = %d, want 201; body=%s", status, body) + } + assertJSON(t, headers) + + var out struct { + SessionID string `json:"sessionId"` + WorkspacePath string `json:"workspacePath"` + RuntimeHandle string `json:"runtimeHandle"` + } + mustJSON(t, body, &out) + if out.SessionID != "demo-1" || out.WorkspacePath != "/tmp/demo-1" || out.RuntimeHandle != "zellij-demo-1" { + t.Fatalf("response = %#v", out) + } + + got := spawner.recorded() + if len(got) != 1 { + t.Fatalf("spawn calls = %d, want 1", len(got)) + } + if got[0].ProjectID != "demo" || got[0].Prompt != "do the thing" || got[0].Harness != domain.HarnessClaudeCode || got[0].Kind != domain.KindWorker { + t.Fatalf("recorded spawn = %#v", got[0]) + } +} + +func TestSessionsAPI_Spawn_DefaultsAgentToClaudeCode(t *testing.T) { + spawner := &fakeSpawner{ + session: domain.Session{ + SessionRecord: domain.SessionRecord{ID: "demo-2", ProjectID: "demo"}, + }, + } + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"do the thing"}`) + if status != http.StatusCreated { + t.Fatalf("status = %d, want 201; body=%s", status, body) + } + got := spawner.recorded() + if len(got) != 1 || got[0].Harness != domain.HarnessClaudeCode { + t.Fatalf("default agent not applied: %#v", got) + } +} + +func TestSessionsAPI_Spawn_BadRequest(t *testing.T) { + cases := []struct { + name, body, wantCode string + }{ + {name: "invalid json", body: `{`, wantCode: "INVALID_JSON"}, + {name: "missing projectId", body: `{"prompt":"x"}`, wantCode: "PROJECT_ID_REQUIRED"}, + {name: "blank projectId", body: `{"projectId":" ","prompt":"x"}`, wantCode: "PROJECT_ID_REQUIRED"}, + {name: "missing prompt", body: `{"projectId":"demo"}`, wantCode: "PROMPT_REQUIRED"}, + {name: "blank prompt", body: `{"projectId":"demo","prompt":" "}`, wantCode: "PROMPT_REQUIRED"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + spawner := &fakeSpawner{} + srv := sessionsServer(t, spawner) + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", tc.body) + assertErrorCode(t, body, status, http.StatusBadRequest, tc.wantCode) + if len(spawner.recorded()) != 0 { + t.Fatalf("spawn was called for invalid request") + } + }) + } +} + +func TestSessionsAPI_Spawn_UnknownProject(t *testing.T) { + spawner := &fakeSpawner{ + err: &project.Error{Kind: "not_found", Code: "PROJECT_NOT_FOUND", Message: "Unknown project"}, + } + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"missing","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusNotFound, "PROJECT_NOT_FOUND") +} + +func TestSessionsAPI_Spawn_UnknownProjectWrapped(t *testing.T) { + // Mirror the real production wrap: session.Manager.Spawn returns + // `fmt.Errorf("spawn %s: workspace: %w", id, err)` over the projectresolver + // chain. The controller must unwrap *project.Error rather than match by + // string, so errors.As walks the linear %w chain. + inner := &project.Error{Kind: "not_found", Code: "PROJECT_NOT_FOUND", Message: "Unknown project"} + spawner := &fakeSpawner{ + err: fmt.Errorf("spawn demo-1: workspace: %w", fmt.Errorf("projectresolver: lookup %q: %w", "missing", inner)), + } + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"missing","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusNotFound, "PROJECT_NOT_FOUND") +} + +func TestSessionsAPI_Spawn_SessionsDisabled(t *testing.T) { + srv := sessionsServer(t, nil) + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"x"}`) + if status != http.StatusServiceUnavailable { + t.Fatalf("status = %d, want 503; body=%s", status, body) + } + var got errorBody + mustJSON(t, body, &got) + if got.Error != "sessions_disabled" { + t.Fatalf("error = %q, want sessions_disabled\nbody=%s", got.Error, body) + } +} + +func TestSessionsAPI_Spawn_InternalFailure(t *testing.T) { + spawner := &fakeSpawner{err: errors.New("runtime boom")} + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusInternalServerError, "SPAWN_FAILED") +} diff --git a/backend/internal/session/spawner.go b/backend/internal/session/spawner.go new file mode 100644 index 00000000..243308f5 --- /dev/null +++ b/backend/internal/session/spawner.go @@ -0,0 +1,17 @@ +package session + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// Spawner is the slice of the Session Manager the HTTP controller depends on. +// *Manager satisfies it; tests can substitute a fake without dragging in the +// runtime/workspace/agent collaborators a real Manager needs. +type Spawner interface { + Spawn(ctx context.Context, cfg ports.SpawnConfig) (domain.Session, error) +} + +var _ Spawner = (*Manager)(nil) From 68f92dd55e9734b8339b0695eb2722848134f9aa Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 21:54:21 +0530 Subject: [PATCH 10/17] =?UTF-8?q?feat(observe):=20SCM=20poller=20=E2=80=94?= =?UTF-8?q?=20Observe=20=E2=86=92=20pr.Manager=20=E2=86=92=20lifecycle=20n?= =?UTF-8?q?udges?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the github.Provider (PR #69) into the daemon as a periodic poller on top of the SM/lifecycle stack (PR #70). Every 30s the loop lists alive sessions, branch-discovers each session's open PR, observes it through the Provider under a 15s per-call deadline, and hands the result to pr.Manager.ApplyObservation — which transactionally writes the row and forwards to lifecycle.ApplyPRObservation for CI-failure log-tail nudges, review-feedback nudges (capped at reviewMaxNudge=3), and merge-conflict rebase nudges. Branch discovery is the v1 fallback because sessions don't yet carry a PR URL field; adding that column is a separate session/sqlc PR. Until then the poller resolves owner/repo from project.Repo (currently RepoOriginURL) or git remote get-url origin, then asks GitHub for the open PR with head = owner:branch. Error classification follows the spec: - ErrRateLimited: short-circuit rest of tick (don't burn through remaining sessions while GitHub asks us to back off) - ErrAuthFailed: flip Healthy() to false (sticky — does NOT auto-recover, because a 304-cached success doesn't actually exercise the token) and continue - other: log warn, continue No-token environments degrade gracefully: startSCM logs an Info notice and returns a closed done-channel; Stop is a free call. --- .../adapters/scm/github/find_branch_pr.go | 94 ++++ .../scm/github/find_branch_pr_test.go | 131 +++++ backend/internal/daemon/daemon.go | 7 + backend/internal/daemon/scm_wiring.go | 61 ++ .../internal/integration/scm_poller_test.go | 185 +++++++ backend/internal/observe/scm/poller.go | 364 ++++++++++++ backend/internal/observe/scm/poller_test.go | 519 ++++++++++++++++++ 7 files changed, 1361 insertions(+) create mode 100644 backend/internal/adapters/scm/github/find_branch_pr.go create mode 100644 backend/internal/adapters/scm/github/find_branch_pr_test.go create mode 100644 backend/internal/daemon/scm_wiring.go create mode 100644 backend/internal/integration/scm_poller_test.go create mode 100644 backend/internal/observe/scm/poller.go create mode 100644 backend/internal/observe/scm/poller_test.go diff --git a/backend/internal/adapters/scm/github/find_branch_pr.go b/backend/internal/adapters/scm/github/find_branch_pr.go new file mode 100644 index 00000000..7ee79981 --- /dev/null +++ b/backend/internal/adapters/scm/github/find_branch_pr.go @@ -0,0 +1,94 @@ +package github + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +// FindOpenPRForBranch returns the canonical github.com URL of the most +// recently updated open PR whose head ref is "{owner}:{branch}", or "" +// with a nil error when no open PR matches. +// +// The poller uses this for branch-based discovery: since the session +// record does not (yet) carry a stored PR URL, the only way to find +// "the PR for this session" is by the workspace branch. The endpoint +// hit is GET /repos/{owner}/{repo}/pulls?head={owner}:{branch}&state=open +// per the GitHub REST API. +// +// When multiple open PRs share the same head ref (rare but legal — +// e.g. forks that pushed to the same branch name), we pick the most +// recently updated one rather than failing closed. Failing closed +// would silently stop observing the PR every time a stale duplicate +// shows up. +func (p *Provider) FindOpenPRForBranch(ctx context.Context, owner, repo, branch string) (string, error) { + owner = strings.TrimSpace(owner) + repo = strings.TrimSpace(repo) + branch = strings.TrimSpace(branch) + if owner == "" || repo == "" || branch == "" { + return "", fmt.Errorf("github scm: FindOpenPRForBranch requires owner/repo/branch (got %q/%q/%q)", owner, repo, branch) + } + + q := url.Values{} + q.Set("state", "open") + q.Set("head", owner+":"+branch) + q.Set("per_page", "100") + + resp, err := p.client.doREST(ctx, http.MethodGet, repoPath(owner, repo, "pulls"), q, nil) + if err != nil { + return "", err + } + if len(resp.Body) == 0 { + return "", nil + } + var list []listedPR + if err := json.Unmarshal(resp.Body, &list); err != nil { + return "", fmt.Errorf("github scm: decode pulls list: %w", err) + } + if len(list) == 0 { + return "", nil + } + + best := -1 + var bestTime time.Time + for i, pr := range list { + if !strings.EqualFold(pr.State, "open") { + continue + } + t := parsePRTimestamp(pr.UpdatedAt) + if best < 0 || t.After(bestTime) { + best = i + bestTime = t + } + } + if best < 0 { + return "", nil + } + chosen := list[best] + if chosen.HTMLURL != "" { + return chosen.HTMLURL, nil + } + // Construct the canonical web URL from owner/repo/number when the + // API response omits html_url (some enterprise responses elide it). + return "https://github.com/" + owner + "/" + repo + "/pull/" + strconv.Itoa(chosen.Number), nil +} + +type listedPR struct { + Number int `json:"number"` + State string `json:"state"` + HTMLURL string `json:"html_url"` + UpdatedAt string `json:"updated_at"` +} + +func parsePRTimestamp(s string) time.Time { + t, err := time.Parse(time.RFC3339, s) + if err != nil { + return time.Time{} + } + return t +} diff --git a/backend/internal/adapters/scm/github/find_branch_pr_test.go b/backend/internal/adapters/scm/github/find_branch_pr_test.go new file mode 100644 index 00000000..39b5be77 --- /dev/null +++ b/backend/internal/adapters/scm/github/find_branch_pr_test.go @@ -0,0 +1,131 @@ +package github + +import ( + "encoding/json" + "errors" + "net/http" + "strings" + "testing" +) + +func TestFindOpenPRForBranchSingleMatch(t *testing.T) { + fake := newFakeGH(t) + p := newProviderForTest(t, fake) + fake.on(http.MethodGet, "/repos/acme/repo/pulls", func(w http.ResponseWriter, r *http.Request) { + if got := r.URL.Query().Get("head"); got != "acme:feat/x" { + t.Errorf("head query = %q, want acme:feat/x", got) + } + if got := r.URL.Query().Get("state"); got != "open" { + t.Errorf("state query = %q, want open", got) + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"number": 7, "state": "open", "html_url": "https://github.com/acme/repo/pull/7", "updated_at": "2026-05-01T10:00:00Z"}, + }) + }) + + url, err := p.FindOpenPRForBranch(ctx(), "acme", "repo", "feat/x") + if err != nil { + t.Fatalf("FindOpenPRForBranch: %v", err) + } + if url != "https://github.com/acme/repo/pull/7" { + t.Fatalf("url = %q", url) + } +} + +func TestFindOpenPRForBranchNoMatch(t *testing.T) { + fake := newFakeGH(t) + p := newProviderForTest(t, fake) + fake.on(http.MethodGet, "/repos/acme/repo/pulls", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte("[]")) + }) + url, err := p.FindOpenPRForBranch(ctx(), "acme", "repo", "feat/x") + if err != nil { + t.Fatalf("FindOpenPRForBranch: %v", err) + } + if url != "" { + t.Fatalf("url = %q, want empty", url) + } +} + +func TestFindOpenPRForBranchMultiplePicksMostRecent(t *testing.T) { + fake := newFakeGH(t) + p := newProviderForTest(t, fake) + fake.on(http.MethodGet, "/repos/acme/repo/pulls", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"number": 1, "state": "open", "html_url": "https://github.com/acme/repo/pull/1", "updated_at": "2026-01-01T00:00:00Z"}, + {"number": 9, "state": "open", "html_url": "https://github.com/acme/repo/pull/9", "updated_at": "2026-05-01T00:00:00Z"}, + {"number": 4, "state": "open", "html_url": "https://github.com/acme/repo/pull/4", "updated_at": "2026-03-01T00:00:00Z"}, + }) + }) + url, err := p.FindOpenPRForBranch(ctx(), "acme", "repo", "feat/x") + if err != nil { + t.Fatalf("FindOpenPRForBranch: %v", err) + } + if url != "https://github.com/acme/repo/pull/9" { + t.Fatalf("url = %q, want pull/9", url) + } +} + +func TestFindOpenPRForBranchEmptyInputsError(t *testing.T) { + fake := newFakeGH(t) + p := newProviderForTest(t, fake) + for _, tc := range []struct{ owner, repo, branch string }{ + {"", "repo", "b"}, + {"o", "", "b"}, + {"o", "r", ""}, + } { + _, err := p.FindOpenPRForBranch(ctx(), tc.owner, tc.repo, tc.branch) + if err == nil { + t.Errorf("expected error for empty input %+v", tc) + } + } +} + +func TestFindOpenPRForBranchRateLimited(t *testing.T) { + fake := newFakeGH(t) + p := newProviderForTest(t, fake) + fake.on(http.MethodGet, "/repos/acme/repo/pulls", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-RateLimit-Remaining", "0") + w.Header().Set("X-RateLimit-Reset", "1700000000") + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(`{"message":"API rate limit exceeded"}`)) + }) + _, err := p.FindOpenPRForBranch(ctx(), "acme", "repo", "feat/x") + if !errors.Is(err, ErrRateLimited) { + t.Fatalf("err = %v, want ErrRateLimited", err) + } +} + +func TestFindOpenPRForBranchAuthFailed(t *testing.T) { + fake := newFakeGH(t) + p := newProviderForTest(t, fake) + fake.on(http.MethodGet, "/repos/acme/repo/pulls", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte(`{"message":"Bad credentials"}`)) + }) + _, err := p.FindOpenPRForBranch(ctx(), "acme", "repo", "feat/x") + if !errors.Is(err, ErrAuthFailed) { + t.Fatalf("err = %v, want ErrAuthFailed", err) + } +} + +func TestFindOpenPRForBranchSynthesizesURLWhenHTMLEmpty(t *testing.T) { + fake := newFakeGH(t) + p := newProviderForTest(t, fake) + fake.on(http.MethodGet, "/repos/acme/repo/pulls", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"number": 42, "state": "open", "updated_at": "2026-05-01T10:00:00Z"}, + }) + }) + url, err := p.FindOpenPRForBranch(ctx(), "acme", "repo", "feat/x") + if err != nil { + t.Fatalf("err = %v", err) + } + if !strings.HasSuffix(url, "/acme/repo/pull/42") { + t.Fatalf("url = %q, want suffix /acme/repo/pull/42", url) + } +} diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index 626656f5..c897a027 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -98,6 +98,12 @@ func Run() error { } _ = ss // sm: HTTP routes land in a follow-up PR (γ) + // SCM observation: polling Provider -> pr.Manager -> lifecycle nudges. + // Constructed after lifecycle so the PR Manager can forward observations + // to ApplyPRObservation; runs alongside the reaper as a sibling background + // loop. Missing GITHUB_TOKEN degrades gracefully (loop is not started). + scmStk := startSCM(ctx, store, projects, lcStack.lcm, log) + runErr := srv.Run(ctx) // Shut the background goroutines down in order: cancel the context FIRST so @@ -105,6 +111,7 @@ func Run() error { // via defer) avoids the LIFO trap where a Stop() that blocks on ctx-cancel // runs before the cancel — which would hang any non-signal exit path. stop() + scmStk.Stop() lcStack.Stop() if err := cdcPipe.Stop(); err != nil { log.Error("cdc pipeline shutdown", "err", err) diff --git a/backend/internal/daemon/scm_wiring.go b/backend/internal/daemon/scm_wiring.go new file mode 100644 index 00000000..a0390cac --- /dev/null +++ b/backend/internal/daemon/scm_wiring.go @@ -0,0 +1,61 @@ +package daemon + +import ( + "context" + "errors" + "log/slog" + + scmgithub "github.com/aoagents/agent-orchestrator/backend/internal/adapters/scm/github" + "github.com/aoagents/agent-orchestrator/backend/internal/lifecycle" + "github.com/aoagents/agent-orchestrator/backend/internal/observe/scm" + "github.com/aoagents/agent-orchestrator/backend/internal/pr" + "github.com/aoagents/agent-orchestrator/backend/internal/project" + "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" +) + +// scmStack owns the SCM observation loop: a GitHub Provider, a pr.Manager +// that writes PR rows and forwards observations to lifecycle for nudges, +// and the polling goroutine that drives both. A nil-token environment +// degrades gracefully — the daemon still runs locally without SCM +// observation; PR-driven nudges (CI-failure log tail, review feedback, +// merge-conflict rebase) will not fire until a token is supplied. +type scmStack struct { + pollerDone <-chan struct{} +} + +// startSCM constructs and starts the SCM observation stack. The Provider +// reads its token from AO_GITHUB_TOKEN (preferred) or GITHUB_TOKEN, both +// via os.Getenv. Without a token, the poller is not started and a no-op +// done channel is returned — Stop is a free call in that case. +func startSCM(ctx context.Context, store *sqlite.Store, projects project.Manager, lcm *lifecycle.Manager, log *slog.Logger) *scmStack { + tokenSource := scmgithub.EnvTokenSource{EnvVars: []string{"AO_GITHUB_TOKEN", "GITHUB_TOKEN"}} + provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{Token: tokenSource}) + if err != nil { + if errors.Is(err, scmgithub.ErrNoToken) { + log.Info("scm poller: no GITHUB_TOKEN configured, SCM observation disabled") + } else { + log.Warn("scm poller: provider construction failed, SCM observation disabled", "err", err) + } + return &scmStack{pollerDone: closedDone()} + } + prMgr := pr.New(pr.Deps{Writer: store, Lifecycle: lcm}) + poller := scm.New(scm.Deps{ + Provider: provider, + Branches: provider, + Sessions: store, + Projects: projects, + PR: prMgr, + Logger: log, + }) + return &scmStack{pollerDone: poller.Start(ctx)} +} + +// Stop waits for the poller goroutine to exit. The caller must cancel the +// ctx passed to startSCM before calling Stop. +func (s *scmStack) Stop() { <-s.pollerDone } + +func closedDone() <-chan struct{} { + ch := make(chan struct{}) + close(ch) + return ch +} diff --git a/backend/internal/integration/scm_poller_test.go b/backend/internal/integration/scm_poller_test.go new file mode 100644 index 00000000..021fed82 --- /dev/null +++ b/backend/internal/integration/scm_poller_test.go @@ -0,0 +1,185 @@ +package integration + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + scmgithub "github.com/aoagents/agent-orchestrator/backend/internal/adapters/scm/github" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/observe/scm" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" +) + +// TestSCMPollerEndToEnd boots store + LCM + pr.Manager + the scm.Poller +// against an httptest GitHub stub, ticks once, and asserts: +// - the poller resolved the PR URL via branch discovery +// - pr.Manager persisted the PR row (PRWriter side of the bus) +// - lifecycle.ApplyPRObservation fired the CI-failure nudge to the messenger +// +// This is the seam-by-seam validation that aa-37's spec describes: from +// SCM observation to PR row to agent nudge, with every dependency the +// daemon wires in production. +func TestSCMPollerEndToEnd(t *testing.T) { + ctx := context.Background() + st := newStack(t) + + if err := st.store.Upsert(ctx, project.Row{ID: "acme", Path: "/repo/acme", RepoOriginURL: "https://github.com/acme/repo.git", RegisteredAt: time.Now()}); err != nil { + t.Fatal(err) + } + sess, err := st.sm.Spawn(ctx, ports.SpawnConfig{ProjectID: "acme", Kind: domain.KindWorker, Branch: "feat/x", Prompt: "fix CI"}) + if err != nil { + t.Fatal(err) + } + + // The PR URL the GitHub stub will report for branch acme:feat/x. + prURL := "https://github.com/acme/repo/pull/77" + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(r.Body) + w.Header().Set("Content-Type", "application/json") + switch { + case r.Method == http.MethodGet && r.URL.Path == "/repos/acme/repo/pulls": + if got := r.URL.Query().Get("head"); got != "acme:feat/x" { + t.Errorf("pulls list head = %q, want acme:feat/x", got) + } + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"number": 77, "state": "open", "html_url": prURL, "updated_at": "2026-05-15T10:00:00Z"}, + }) + case r.Method == http.MethodGet && r.URL.Path == "/repos/acme/repo/pulls/77": + w.Header().Set("ETag", `W/"v1"`) + _ = json.NewEncoder(w).Encode(map[string]any{ + "number": 77, + "state": "open", + "draft": false, + "merged": false, + "merged_at": nil, + "html_url": prURL, + "head": map[string]any{"ref": "feat/x", "sha": "deadbeef"}, + "base": map[string]any{"ref": "main"}, + "mergeable": false, + "rebaseable": true, + "mergeable_state": "blocked", + "merge_state_status": "BLOCKED", + }) + case r.Method == http.MethodPost && r.URL.Path == "/graphql": + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{ + "repository": map[string]any{ + "pullRequest": map[string]any{ + "number": 77, + "url": prURL, + "state": "OPEN", + "isDraft": false, + "merged": false, + "closed": false, + "mergeable": "MERGEABLE", + "mergeStateStatus": "BLOCKED", + "reviewDecision": "REVIEW_REQUIRED", + "headRefOid": "deadbeef", + "commits": map[string]any{"nodes": []any{ + map[string]any{"commit": map[string]any{ + "oid": "deadbeef", + "statusCheckRollup": map[string]any{ + "state": "FAILURE", + "contexts": map[string]any{ + "nodes": []any{ + map[string]any{ + "__typename": "CheckRun", + "name": "build", + "status": "COMPLETED", + "conclusion": "FAILURE", + "detailsUrl": "https://github.com/acme/repo/runs/9001", + "databaseId": float64(9001), + }, + }, + "pageInfo": map[string]any{"hasNextPage": false}, + }, + }, + }}, + }}, + "reviewThreads": map[string]any{"nodes": []any{}}, + }, + }, + }, + }) + case r.Method == http.MethodGet && r.URL.Path == "/repos/acme/repo/actions/jobs/9001/logs": + w.Header().Set("Content-Type", "text/plain") + _, _ = w.Write([]byte("FAIL TestX\nFAIL TestY\n")) + default: + t.Errorf("unexpected request: %s %s", r.Method, r.URL.Path) + http.Error(w, "no handler", http.StatusNotImplemented) + } + })) + t.Cleanup(server.Close) + + provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{ + Token: scmgithub.StaticTokenSource("tkn"), + HTTPClient: server.Client(), + RESTBase: server.URL, + GraphQLURL: server.URL + "/graphql", + }) + if err != nil { + t.Fatal(err) + } + + projects := project.NewManager(st.store) + poller := scm.New(scm.Deps{ + Provider: provider, + Branches: provider, + Sessions: st.store, + Projects: projects, + PR: st.prm, + Interval: time.Hour, // ticker won't fire — we call Tick directly + ObserveTimeout: 5 * time.Second, + RemoteResolver: func(context.Context, string) (string, error) { + // The project Row.RepoOriginURL is set above, so this fallback + // should never be called; failing loudly catches a regression + // where the poller silently shells out instead of using + // project.Repo. + t.Fatalf("remote resolver should not be invoked when project.Repo is set") + return "", nil + }, + }) + + if err := poller.Tick(ctx); err != nil { + t.Fatalf("poller.Tick: %v", err) + } + + got, ok, err := st.store.GetPR(ctx, prURL) + if err != nil { + t.Fatal(err) + } + if !ok { + t.Fatalf("pr row not written for %s", prURL) + } + if got.SessionID != sess.ID { + t.Errorf("pr.SessionID = %q, want %q", got.SessionID, sess.ID) + } + if got.CI != domain.CIFailing { + t.Errorf("pr.CI = %q, want %q", got.CI, domain.CIFailing) + } + checks, err := st.store.ListChecks(ctx, prURL) + if err != nil { + t.Fatal(err) + } + if len(checks) != 1 || checks[0].Status != domain.PRCheckFailed { + t.Fatalf("checks = %+v", checks) + } + + if len(st.msg.msgs) != 1 { + t.Fatalf("expected exactly 1 lifecycle nudge, got %d (a double-nudge would regress sendOnce)", len(st.msg.msgs)) + } + if !strings.Contains(st.msg.msgs[0], "CI is failing") { + t.Errorf("messenger did not receive CI-failure body; got %q", st.msg.msgs[0]) + } + if !strings.Contains(st.msg.msgs[0], "FAIL TestX") { + t.Errorf("messenger did not receive log-tail body; got %q", st.msg.msgs[0]) + } +} diff --git a/backend/internal/observe/scm/poller.go b/backend/internal/observe/scm/poller.go new file mode 100644 index 00000000..e907d954 --- /dev/null +++ b/backend/internal/observe/scm/poller.go @@ -0,0 +1,364 @@ +// Package scm implements the OBSERVE-layer polling loop that drives +// SCM (pull-request) observations into the PR Manager and Lifecycle +// Manager. The loop is intentionally dumb: every tick it lists alive +// sessions, finds the open PR for each session's branch, asks the +// Provider for an observation, and hands the result to the PR +// Manager (which transactionally writes the row and forwards to +// lifecycle for nudges). +// +// The poller does not own any reaction logic. CI-failure log-tail +// nudges, review-feedback nudges (capped at reviewMaxNudge), and +// merge-conflict rebase nudges all live in lifecycle.ApplyPRObservation. +// Polling is uniform 30s for v1; per-PR adaptive cadence is a follow-up. +package scm + +import ( + "context" + "errors" + "log/slog" + "net/url" + "os/exec" + "strings" + "sync/atomic" + "time" + + scmgithub "github.com/aoagents/agent-orchestrator/backend/internal/adapters/scm/github" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" +) + +// DefaultInterval is the cadence used when Deps.Interval is zero. +const DefaultInterval = 30 * time.Second + +// DefaultObserveTimeout caps one Provider.Observe call so a single hung +// request can't stall the whole tick. +const DefaultObserveTimeout = 15 * time.Second + +// Provider observes one PR by its canonical URL. The github adapter +// satisfies this; other SCM adapters (gitlab, etc.) can implement the +// same surface without touching the poller. +type Provider interface { + Observe(ctx context.Context, prURL string) (ports.PRObservation, error) +} + +// BranchPRFinder resolves a session's branch to its open PR URL. v1 +// uses this because sessions do not (yet) carry a PR URL field; when +// they do, the poller will prefer the stored URL and only fall back +// here. An empty return with nil error means "no matching open PR". +type BranchPRFinder interface { + FindOpenPRForBranch(ctx context.Context, owner, repo, branch string) (string, error) +} + +// sessionLister narrows the sqlite store to what the poller needs. +type sessionLister interface { + ListAllSessions(ctx context.Context) ([]domain.SessionRecord, error) +} + +// projectGetter narrows project.Manager to its read path. +type projectGetter interface { + Get(ctx context.Context, id domain.ProjectID) (project.GetResult, error) +} + +// prApplier is the seam over pr.Manager.ApplyObservation — which itself +// persists the PR row and forwards to lifecycle for nudges. Keeping +// this one method on the seam means the poller never needs to know +// about lifecycle directly. +type prApplier interface { + ApplyObservation(ctx context.Context, id domain.SessionID, o ports.PRObservation) error +} + +// remoteResolver shells out to git to read a repo's origin URL. +// Injected so tests don't require a real git checkout. +type remoteResolver func(ctx context.Context, projectPath string) (string, error) + +// Deps groups every collaborator the Poller needs. Zero-valued +// optional fields fall back to safe defaults (slog.Default, 30s tick, +// 15s observe deadline, real `git` for origin lookup). +type Deps struct { + Provider Provider + Branches BranchPRFinder + Sessions sessionLister + Projects projectGetter + PR prApplier + Logger *slog.Logger + Interval time.Duration + ObserveTimeout time.Duration + RemoteResolver func(ctx context.Context, projectPath string) (string, error) +} + +// Poller is the SCM observation loop. Construct it with New, start the +// background goroutine with Start. Tick is exported so daemon and tests +// can drive a single cycle synchronously. +type Poller struct { + provider Provider + branches BranchPRFinder + sessions sessionLister + projects projectGetter + pr prApplier + logger *slog.Logger + interval time.Duration + observeTimeout time.Duration + remoteResolver remoteResolver + + healthy atomic.Bool +} + +// New constructs a Poller from its dependencies. +func New(d Deps) *Poller { + p := &Poller{ + provider: d.Provider, + branches: d.Branches, + sessions: d.Sessions, + projects: d.Projects, + pr: d.PR, + logger: d.Logger, + interval: d.Interval, + observeTimeout: d.ObserveTimeout, + remoteResolver: d.RemoteResolver, + } + if p.interval <= 0 { + p.interval = DefaultInterval + } + if p.observeTimeout <= 0 { + p.observeTimeout = DefaultObserveTimeout + } + if p.logger == nil { + p.logger = slog.Default() + } + if p.remoteResolver == nil { + p.remoteResolver = defaultRemoteResolver + } + p.healthy.Store(true) + return p +} + +// Healthy reports whether the SCM provider's authentication has been +// observed working since the poller started. It starts true and flips +// to false the first time the provider returns ErrAuthFailed; it does +// NOT auto-recover, because a single subsequent success could be an +// ETag-cached 304 that didn't actually exercise the token. A future +// health route consumes this bit; clearing it after token rotation is +// a daemon-restart concern. +func (p *Poller) Healthy() bool { return p.healthy.Load() } + +// Start launches the background goroutine and returns a channel that +// closes once the loop has exited. The loop exits when ctx is cancelled; +// callers should wait on the returned channel before tearing down the +// PR Manager / lifecycle / store dependencies. +func (p *Poller) Start(ctx context.Context) <-chan struct{} { + done := make(chan struct{}) + go p.loop(ctx, done) + return done +} + +func (p *Poller) loop(ctx context.Context, done chan<- struct{}) { + defer close(done) + t := time.NewTicker(p.interval) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + if err := p.Tick(ctx); err != nil { + p.logger.Error("scm poller: tick failed", "err", err) + } + } + } +} + +// Tick runs one observation cycle. +// +// It lists every session, skips terminated rows and rows without a +// branch, resolves each remaining session's open PR URL via the +// BranchPRFinder, asks the Provider for an observation under a +// per-call deadline, and hands a successful observation to the PR +// Manager. Errors are classified by sentinel: +// - ErrRateLimited: short-circuit the rest of the tick (don't burn +// through remaining sessions while GitHub is asking us to back off). +// - ErrAuthFailed: flip Healthy() to false; continue with the next +// session so a single misconfigured token does not stall the loop. +// - other: log warn, continue. +// +// A session-listing failure is the only error Tick propagates; it +// short-circuits the cycle just like the reaper. +func (p *Poller) Tick(ctx context.Context) error { + sessions, err := p.sessions.ListAllSessions(ctx) + if err != nil { + return err + } + for _, sess := range sessions { + if sess.IsTerminated || sess.Metadata.Branch == "" { + continue + } + if err := ctx.Err(); err != nil { + return err + } + stop := p.pollOne(ctx, sess) + if stop { + return nil + } + } + return nil +} + +// pollOne handles one session. Returns stop=true when the caller +// should short-circuit the remaining sessions (rate-limit signal). +func (p *Poller) pollOne(ctx context.Context, sess domain.SessionRecord) bool { + prURL, err := p.resolvePRURL(ctx, sess) + if err != nil { + return p.classify(sess.ID, "resolve-pr-url", err) + } + if prURL == "" { + p.logger.Debug("scm poller: no open PR for branch, skipping", + "session", sess.ID, "branch", sess.Metadata.Branch) + return false + } + + pollCtx, cancel := context.WithTimeout(ctx, p.observeTimeout) + defer cancel() + obs, err := p.provider.Observe(pollCtx, prURL) + if err != nil { + return p.classify(sess.ID, "observe", err) + } + if !obs.Fetched { + p.logger.Debug("scm poller: observation not fetched, skipping", + "session", sess.ID, "url", prURL) + return false + } + if err := p.pr.ApplyObservation(ctx, sess.ID, obs); err != nil { + p.logger.Warn("scm poller: apply observation failed", + "session", sess.ID, "err", err) + } + return false +} + +// classify maps a Provider/lookup error to the loop's continue/stop +// decision and surfaces it in the logs. Auth-class failures flip the +// Healthy() bool; rate-limit signals stop the tick. +func (p *Poller) classify(sid domain.SessionID, stage string, err error) bool { + switch { + case errors.Is(err, scmgithub.ErrRateLimited): + p.logger.Warn("scm poller: rate limited, skipping rest of tick", + "session", sid, "stage", stage, "err", err) + return true + case errors.Is(err, scmgithub.ErrAuthFailed): + p.healthy.Store(false) + p.logger.Error("scm poller: auth failed, provider marked unhealthy", + "session", sid, "stage", stage, "err", err) + return false + default: + p.logger.Warn("scm poller: error", + "session", sid, "stage", stage, "err", err) + return false + } +} + +// resolvePRURL finds the open PR URL for a session's branch. +// +// v1 strategy: branch-based discovery. Look up the session's project, +// derive owner/repo from project.Repo (which today holds the origin URL), +// falling back to `git remote get-url origin` against the project's +// on-disk path, then ask BranchPRFinder. When neither yields an +// owner/repo, the session is silently skipped — that is not a poller bug, +// it's a project that hasn't been configured for SCM observation. +// +// When the session record grows a stored PR URL field (separate PR), +// this function should prefer it over branch discovery. +func (p *Poller) resolvePRURL(ctx context.Context, sess domain.SessionRecord) (string, error) { + if p.branches == nil { + return "", nil + } + res, err := p.projects.Get(ctx, sess.ProjectID) + if err != nil { + return "", err + } + if res.Project == nil { + return "", nil + } + owner, repo, ok := ownerRepoFromProject(*res.Project) + if !ok { + remoteURL, err := p.remoteResolver(ctx, res.Project.Path) + if err != nil { + p.logger.Debug("scm poller: git remote lookup failed, skipping session", + "session", sess.ID, "project", sess.ProjectID, "err", err) + return "", nil + } + owner, repo, ok = parseGitHubRemote(remoteURL) + if !ok { + return "", nil + } + } + return p.branches.FindOpenPRForBranch(ctx, owner, repo, sess.Metadata.Branch) +} + +// ownerRepoFromProject derives (owner, repo) from a Project. Today +// project.Repo holds the origin URL (despite the type comment claiming +// "owner/name") — so we try both shapes here without touching the +// project package. +func ownerRepoFromProject(p project.Project) (owner, repo string, ok bool) { + repoField := strings.TrimSpace(p.Repo) + if repoField == "" { + return "", "", false + } + if o, r, ok := parseGitHubRemote(repoField); ok { + return o, r, true + } + return "", "", false +} + +// parseGitHubRemote accepts both URL- and SSH-style remote strings and +// the bare "owner/repo" shorthand. It is intentionally host-agnostic — +// the github.Provider will reject non-github hosts at Observe time, so +// rejecting them here would just duplicate that check and silently drop +// legitimately-configured projects on enterprise hosts. +// +// Recognised forms: +// - https://github.com/owner/repo[.git] +// - http(s)://host/owner/repo[.git] +// - git@host:owner/repo[.git] +// - ssh://git@host/owner/repo[.git] +// - owner/repo +func parseGitHubRemote(s string) (owner, repo string, ok bool) { + s = strings.TrimSpace(s) + if s == "" { + return "", "", false + } + switch { + case strings.HasPrefix(s, "git@"): + idx := strings.Index(s, ":") + if idx < 0 { + return "", "", false + } + s = s[idx+1:] + case strings.Contains(s, "://"): + u, err := url.Parse(s) + if err != nil || u.Host == "" { + return "", "", false + } + s = strings.TrimPrefix(u.Path, "/") + } + s = strings.TrimSuffix(s, ".git") + parts := strings.SplitN(s, "/", 3) + if len(parts) < 2 { + return "", "", false + } + owner = strings.TrimSpace(parts[0]) + repo = strings.TrimSpace(parts[1]) + if owner == "" || repo == "" { + return "", "", false + } + return owner, repo, true +} + +func defaultRemoteResolver(ctx context.Context, projectPath string) (string, error) { + if strings.TrimSpace(projectPath) == "" { + return "", errors.New("scm poller: project has no path") + } + out, err := exec.CommandContext(ctx, "git", "-C", projectPath, "remote", "get-url", "origin").Output() + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil +} diff --git a/backend/internal/observe/scm/poller_test.go b/backend/internal/observe/scm/poller_test.go new file mode 100644 index 00000000..d4350594 --- /dev/null +++ b/backend/internal/observe/scm/poller_test.go @@ -0,0 +1,519 @@ +package scm + +import ( + "context" + "errors" + "log/slog" + "sync" + "sync/atomic" + "testing" + "time" + + scmgithub "github.com/aoagents/agent-orchestrator/backend/internal/adapters/scm/github" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" +) + +// --------------------------------------------------------------------------- +// Fakes +// --------------------------------------------------------------------------- + +type fakeProvider struct { + mu sync.Mutex + calls []string + obs map[string]ports.PRObservation + errs map[string]error + hangFor time.Duration +} + +func (f *fakeProvider) Observe(ctx context.Context, prURL string) (ports.PRObservation, error) { + f.mu.Lock() + f.calls = append(f.calls, prURL) + hang := f.hangFor + f.mu.Unlock() + if hang > 0 { + select { + case <-time.After(hang): + case <-ctx.Done(): + return ports.PRObservation{URL: prURL}, ctx.Err() + } + } + f.mu.Lock() + defer f.mu.Unlock() + if err, ok := f.errs[prURL]; ok { + return ports.PRObservation{URL: prURL}, err + } + if o, ok := f.obs[prURL]; ok { + return o, nil + } + return ports.PRObservation{URL: prURL}, nil +} + +func (f *fakeProvider) seenURLs() []string { + f.mu.Lock() + defer f.mu.Unlock() + out := make([]string, len(f.calls)) + copy(out, f.calls) + return out +} + +type fakeBranches struct { + mu sync.Mutex + urls map[string]string // owner/repo/branch -> prURL + err error + callCount int +} + +func (f *fakeBranches) FindOpenPRForBranch(_ context.Context, owner, repo, branch string) (string, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.callCount++ + if f.err != nil { + return "", f.err + } + return f.urls[owner+"/"+repo+"/"+branch], nil +} + +type fakeSessions struct { + sessions []domain.SessionRecord + err error +} + +func (f *fakeSessions) ListAllSessions(context.Context) ([]domain.SessionRecord, error) { + if f.err != nil { + return nil, f.err + } + out := make([]domain.SessionRecord, len(f.sessions)) + copy(out, f.sessions) + return out, nil +} + +type fakeProjects struct { + projects map[domain.ProjectID]project.Project +} + +func (f *fakeProjects) Get(_ context.Context, id domain.ProjectID) (project.GetResult, error) { + p, ok := f.projects[id] + if !ok { + return project.GetResult{}, errors.New("project not found") + } + pp := p + return project.GetResult{Status: "ok", Project: &pp}, nil +} + +type fakePR struct { + mu sync.Mutex + applied []appliedObs + applyErr error +} + +type appliedObs struct { + id domain.SessionID + obs ports.PRObservation +} + +func (f *fakePR) ApplyObservation(_ context.Context, id domain.SessionID, o ports.PRObservation) error { + f.mu.Lock() + defer f.mu.Unlock() + f.applied = append(f.applied, appliedObs{id: id, obs: o}) + return f.applyErr +} + +func (f *fakePR) records() []appliedObs { + f.mu.Lock() + defer f.mu.Unlock() + out := make([]appliedObs, len(f.applied)) + copy(out, f.applied) + return out +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +func newTestPoller(t *testing.T, d Deps) *Poller { + t.Helper() + if d.Logger == nil { + d.Logger = slog.New(slog.NewTextHandler(testWriter{t}, &slog.HandlerOptions{Level: slog.LevelDebug})) + } + return New(d) +} + +type testWriter struct{ t *testing.T } + +func (w testWriter) Write(p []byte) (int, error) { + w.t.Log(string(p)) + return len(p), nil +} + +func aliveSession(id domain.SessionID, project domain.ProjectID, branch string) domain.SessionRecord { + return domain.SessionRecord{ + ID: id, + ProjectID: project, + Kind: domain.KindWorker, + Metadata: domain.SessionMetadata{Branch: branch, RuntimeHandleID: "h"}, + } +} + +func terminatedSession(id domain.SessionID, project domain.ProjectID, branch string) domain.SessionRecord { + s := aliveSession(id, project, branch) + s.IsTerminated = true + return s +} + +func githubProject(id domain.ProjectID) project.Project { + return project.Project{ID: id, Path: "/repo/" + string(id), Repo: "https://github.com/acme/repo.git"} +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +func TestTickObservesAliveSessionAndAppliesObservation(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{ + aliveSession("s-1", "acme", "feat/x"), + terminatedSession("s-2", "acme", "feat/y"), + }} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{ + "acme/repo/feat/x": "https://github.com/acme/repo/pull/11", + "acme/repo/feat/y": "https://github.com/acme/repo/pull/12", + }} + provider := &fakeProvider{obs: map[string]ports.PRObservation{ + "https://github.com/acme/repo/pull/11": {Fetched: true, URL: "https://github.com/acme/repo/pull/11", Number: 11, CI: domain.CIPassing}, + }} + prm := &fakePR{} + + p := newTestPoller(t, Deps{ + Provider: provider, + Branches: branches, + Sessions: sessions, + Projects: projects, + PR: prm, + }) + + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick error: %v", err) + } + + if got := provider.seenURLs(); len(got) != 1 || got[0] != "https://github.com/acme/repo/pull/11" { + t.Fatalf("provider.Observe calls = %v, want [pull/11] (terminated session skipped)", got) + } + rec := prm.records() + if len(rec) != 1 || rec[0].id != "s-1" || rec[0].obs.Number != 11 { + t.Fatalf("pr.ApplyObservation = %+v, want one call for s-1/pull-11", rec) + } +} + +func TestTickSkipsApplyWhenNotFetched(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{aliveSession("s-1", "acme", "feat/x")}} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{"acme/repo/feat/x": "https://github.com/acme/repo/pull/11"}} + provider := &fakeProvider{obs: map[string]ports.PRObservation{ + "https://github.com/acme/repo/pull/11": {Fetched: false, URL: "https://github.com/acme/repo/pull/11"}, + }} + prm := &fakePR{} + p := newTestPoller(t, Deps{Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm}) + + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if got := prm.records(); len(got) != 0 { + t.Fatalf("ApplyObservation called %d times on !Fetched obs", len(got)) + } +} + +func TestTickSkipsSessionsWithoutBranch(t *testing.T) { + ctx := context.Background() + noBranch := aliveSession("s-1", "acme", "") + sessions := &fakeSessions{sessions: []domain.SessionRecord{noBranch}} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{} + provider := &fakeProvider{} + prm := &fakePR{} + p := newTestPoller(t, Deps{Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm}) + + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if got := provider.seenURLs(); len(got) != 0 { + t.Fatalf("provider should not be called for session without branch, got %v", got) + } + if got := branches.callCount; got != 0 { + t.Fatalf("branches lookup should not be called for session without branch, got %d", got) + } +} + +func TestTickSkipsSessionsWithNoOpenPR(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{aliveSession("s-1", "acme", "feat/x")}} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{}} // empty: no PR exists + provider := &fakeProvider{} + prm := &fakePR{} + p := newTestPoller(t, Deps{Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm}) + + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if got := provider.seenURLs(); len(got) != 0 { + t.Fatalf("provider should not be called when no PR found, got %v", got) + } +} + +func TestTickRateLimitShortCircuits(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{ + aliveSession("s-1", "acme", "feat/x"), + aliveSession("s-2", "acme", "feat/y"), + }} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{ + "acme/repo/feat/x": "https://github.com/acme/repo/pull/11", + "acme/repo/feat/y": "https://github.com/acme/repo/pull/12", + }} + provider := &fakeProvider{ + errs: map[string]error{ + "https://github.com/acme/repo/pull/11": scmgithub.ErrRateLimited, + }, + obs: map[string]ports.PRObservation{ + "https://github.com/acme/repo/pull/12": {Fetched: true, URL: "https://github.com/acme/repo/pull/12", Number: 12}, + }, + } + prm := &fakePR{} + p := newTestPoller(t, Deps{Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm}) + + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if got := provider.seenURLs(); len(got) != 1 { + t.Fatalf("expected exactly one Observe call (rate-limit short-circuits), got %v", got) + } + if got := prm.records(); len(got) != 0 { + t.Fatalf("no observations should be applied after rate-limit, got %d", len(got)) + } +} + +func TestTickAuthFailureMarksUnhealthyAndContinues(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{ + aliveSession("s-1", "acme", "feat/x"), + aliveSession("s-2", "acme", "feat/y"), + }} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{ + "acme/repo/feat/x": "https://github.com/acme/repo/pull/11", + "acme/repo/feat/y": "https://github.com/acme/repo/pull/12", + }} + provider := &fakeProvider{ + errs: map[string]error{ + "https://github.com/acme/repo/pull/11": scmgithub.ErrAuthFailed, + }, + obs: map[string]ports.PRObservation{ + "https://github.com/acme/repo/pull/12": {Fetched: true, URL: "https://github.com/acme/repo/pull/12", Number: 12, CI: domain.CIPassing}, + }, + } + prm := &fakePR{} + p := newTestPoller(t, Deps{Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm}) + if !p.Healthy() { + t.Fatalf("poller should start healthy") + } + + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if p.Healthy() { + t.Fatalf("poller should be unhealthy after ErrAuthFailed") + } + if got := provider.seenURLs(); len(got) != 2 { + t.Fatalf("expected provider to be called for both sessions, got %v", got) + } + rec := prm.records() + if len(rec) != 1 || rec[0].id != "s-2" { + t.Fatalf("expected one apply for s-2 after auth failure on s-1, got %+v", rec) + } +} + +func TestTickProjectLookupErrorContinues(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{ + aliveSession("s-1", "missing", "feat/x"), + aliveSession("s-2", "acme", "feat/y"), + }} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{ + "acme/repo/feat/y": "https://github.com/acme/repo/pull/12", + }} + provider := &fakeProvider{obs: map[string]ports.PRObservation{ + "https://github.com/acme/repo/pull/12": {Fetched: true, URL: "https://github.com/acme/repo/pull/12", Number: 12}, + }} + prm := &fakePR{} + p := newTestPoller(t, Deps{Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm}) + + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if got := prm.records(); len(got) != 1 || got[0].id != "s-2" { + t.Fatalf("expected s-2 applied after project-lookup err on s-1, got %+v", got) + } + if !p.Healthy() { + t.Fatalf("project lookup error should not mark unhealthy") + } +} + +func TestTickGenericErrorContinues(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{ + aliveSession("s-1", "acme", "feat/x"), + aliveSession("s-2", "acme", "feat/y"), + }} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{ + "acme/repo/feat/x": "https://github.com/acme/repo/pull/11", + "acme/repo/feat/y": "https://github.com/acme/repo/pull/12", + }} + provider := &fakeProvider{ + errs: map[string]error{ + "https://github.com/acme/repo/pull/11": errors.New("transient network blip"), + }, + obs: map[string]ports.PRObservation{ + "https://github.com/acme/repo/pull/12": {Fetched: true, URL: "https://github.com/acme/repo/pull/12", Number: 12}, + }, + } + prm := &fakePR{} + p := newTestPoller(t, Deps{Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm}) + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if got := prm.records(); len(got) != 1 || got[0].id != "s-2" { + t.Fatalf("expected s-2 applied after generic err on s-1, got %+v", got) + } + if !p.Healthy() { + t.Fatalf("generic errors should not mark unhealthy") + } +} + +func TestPerCallDeadline(t *testing.T) { + ctx := context.Background() + sessions := &fakeSessions{sessions: []domain.SessionRecord{aliveSession("s-1", "acme", "feat/x")}} + projects := &fakeProjects{projects: map[domain.ProjectID]project.Project{"acme": githubProject("acme")}} + branches := &fakeBranches{urls: map[string]string{"acme/repo/feat/x": "https://github.com/acme/repo/pull/11"}} + provider := &fakeProvider{hangFor: 200 * time.Millisecond} + prm := &fakePR{} + p := newTestPoller(t, Deps{ + Provider: provider, + Branches: branches, + Sessions: sessions, + Projects: projects, + PR: prm, + ObserveTimeout: 10 * time.Millisecond, + }) + start := time.Now() + if err := p.Tick(ctx); err != nil { + t.Fatalf("Tick: %v", err) + } + if elapsed := time.Since(start); elapsed > 150*time.Millisecond { + t.Fatalf("Tick took %v — per-call deadline did not fire", elapsed) + } + if got := prm.records(); len(got) != 0 { + t.Fatalf("no apply on deadline timeout, got %d", len(got)) + } +} + +func TestStartDrainsOnContextCancel(t *testing.T) { + sessions := &fakeSessions{} + projects := &fakeProjects{} + branches := &fakeBranches{} + provider := &fakeProvider{} + prm := &fakePR{} + p := newTestPoller(t, Deps{ + Provider: provider, Branches: branches, Sessions: sessions, Projects: projects, PR: prm, + Interval: 5 * time.Millisecond, + }) + ctx, cancel := context.WithCancel(context.Background()) + done := p.Start(ctx) + cancel() + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("poller did not exit within 1s of ctx cancel") + } +} + +func TestStartTicksRepeatedly(t *testing.T) { + var ticks atomic.Int32 + sessions := &fakeSessions{} + projects := &fakeProjects{} + branches := &fakeBranches{} + provider := &fakeProvider{} + prm := &fakePR{} + p := newTestPoller(t, Deps{ + Provider: provider, + Branches: branches, + Sessions: &countingSessions{wrap: sessions, ticks: &ticks}, + Projects: projects, + PR: prm, + Interval: 5 * time.Millisecond, + }) + ctx, cancel := context.WithCancel(context.Background()) + done := p.Start(ctx) + deadline := time.After(500 * time.Millisecond) +loop: + for { + if ticks.Load() >= 3 { + break + } + select { + case <-deadline: + break loop + case <-time.After(2 * time.Millisecond): + } + } + cancel() + <-done + if ticks.Load() < 2 { + t.Fatalf("expected at least 2 ticks, got %d", ticks.Load()) + } +} + +// countingSessions ticks the counter each time ListAllSessions is called. +type countingSessions struct { + wrap *fakeSessions + ticks *atomic.Int32 +} + +func (c *countingSessions) ListAllSessions(ctx context.Context) ([]domain.SessionRecord, error) { + c.ticks.Add(1) + return c.wrap.ListAllSessions(ctx) +} + +// --------------------------------------------------------------------------- +// owner/repo derivation +// --------------------------------------------------------------------------- + +func TestParseGitHubRemote(t *testing.T) { + tests := []struct{ in, owner, repo string }{ + {"https://github.com/acme/repo.git", "acme", "repo"}, + {"https://github.com/acme/repo", "acme", "repo"}, + {"git@github.com:acme/repo.git", "acme", "repo"}, + {"ssh://git@github.com/acme/repo.git", "acme", "repo"}, + {"acme/repo", "acme", "repo"}, + {"", "", ""}, + {"https://gitlab.com/x/y", "x", "y"}, // host-agnostic parser; provider rejects non-GitHub at Observe time + } + for _, tc := range tests { + owner, repo, ok := parseGitHubRemote(tc.in) + if tc.owner == "" { + if ok { + t.Errorf("parseGitHubRemote(%q): expected !ok, got %q/%q", tc.in, owner, repo) + } + continue + } + if !ok || owner != tc.owner || repo != tc.repo { + t.Errorf("parseGitHubRemote(%q) = %q/%q ok=%v; want %q/%q true", tc.in, owner, repo, ok, tc.owner, tc.repo) + } + } +} From 26f49dad673074405e16e187a94d2c84541f5fb4 Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 22:05:45 +0530 Subject: [PATCH 11/17] =?UTF-8?q?fix(ao):=20address=20PR=20=CE=B3=20review?= =?UTF-8?q?=20+=20clear=20inherited=20lint=20debt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review fixes (PR #71): - spawn CLI now uses a dedicated 90 s timeout (90 s > server's 60 s DefaultRequestTimeout) via context.WithTimeout, and stops sharing deps.HTTPClient — that client is sized for fast /healthz/shutdown probes (2 s) and was preempting the synchronous Spawn long before the daemon could finish provisioning a worktree + zellij pane + agent. - Harden writeSpawnError so a *project.Error with a non-client Kind ("internal", "not_implemented", or anything unknown) falls through to the generic 500 SPAWN_FAILED envelope instead of passing the project error's Code/Message verbatim to the client. Adds three subtests that pin down the opacity contract. Lint debt cleared (inherited from PRs #65/#70): - Add doc comments on every exported symbol in the agent / claudecode / codex / adapters-registry packages (revive: exported) - gosec G306/G301: inbox file/dir perms 0644→0600 and 0755→0750 - gosec G703 (path traversal via taint): excluded globally with the same rationale as G304 — adapter paths are daemon-config/worktree-derived, not user input - gocritic emptyStringTest: len(strings.TrimSpace(...)) > 0 → != "" - gocritic paramTypeCombine: combine adjacent same-type params - errcheck: wrap deferred os.Remove(tmpName) in a closure - prealloc: preallocate cmd slices on the resume paths --- backend/.golangci.yml | 1 + backend/internal/adapters/agent/agent.go | 10 +++++-- .../adapters/agent/claudecode/claudecode.go | 12 ++++++-- .../adapters/agent/claudecode/hooks.go | 2 +- .../internal/adapters/agent/codex/codex.go | 11 ++++++- .../internal/adapters/agent/codex/hooks.go | 6 ++-- .../adapters/messenger/inbox/inbox.go | 4 +-- backend/internal/adapters/registry.go | 15 ++++++++++ backend/internal/cli/spawn.go | 21 ++++++++++++-- .../internal/httpd/controllers/sessions.go | 23 +++++++++------ .../httpd/controllers/sessions_test.go | 29 +++++++++++++++++++ 11 files changed, 111 insertions(+), 23 deletions(-) diff --git a/backend/.golangci.yml b/backend/.golangci.yml index 49b4127f..2a471bf5 100644 --- a/backend/.golangci.yml +++ b/backend/.golangci.yml @@ -85,6 +85,7 @@ linters: excludes: - G104 # unchecked errors — errcheck owns this - G304 # file inclusion via variable — paths are config/run-file/worktree-derived, not user input + - G703 # path traversal via taint — same rationale as G304: paths are daemon-owned exclusions: generated: lax # skip sqlc/codegen ("Code generated ... DO NOT EDIT") diff --git a/backend/internal/adapters/agent/agent.go b/backend/internal/adapters/agent/agent.go index 16d000a8..a0ef0b48 100644 --- a/backend/internal/adapters/agent/agent.go +++ b/backend/internal/adapters/agent/agent.go @@ -58,6 +58,7 @@ type ConfigField struct { // ConfigFieldType is the primitive value kind Better-AO expects for a field. type ConfigFieldType string +// Known ConfigFieldType values. const ( ConfigFieldString ConfigFieldType = "string" ConfigFieldBool ConfigFieldType = "bool" @@ -111,10 +112,12 @@ type SessionInfo struct { // PermissionMode controls how much review an agent requires before acting. type PermissionMode string +// Known PermissionMode values. +// +// PermissionModeDefault is special: adapters emit no flag for it so the agent +// resolves its starting mode from the user's own config (e.g. Claude's TUI +// reading ~/.claude/settings.json defaultMode). const ( - // "default" is special: adapters emit no flag for it so the agent resolves - // its starting mode from the user's own config (e.g. Claude's TUI reading - // ~/.claude/settings.json defaultMode). PermissionModeDefault PermissionMode = "default" PermissionModeAcceptEdits PermissionMode = "accept-edits" PermissionModeAuto PermissionMode = "auto" @@ -124,6 +127,7 @@ const ( // PromptDeliveryStrategy describes how Better-AO should deliver the initial prompt. type PromptDeliveryStrategy string +// Known PromptDeliveryStrategy values. const ( PromptDeliveryInCommand PromptDeliveryStrategy = "in_command" PromptDeliveryAfterStart PromptDeliveryStrategy = "after_start" diff --git a/backend/internal/adapters/agent/claudecode/claudecode.go b/backend/internal/adapters/agent/claudecode/claudecode.go index f4b5d6be..fb66fb7f 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode.go +++ b/backend/internal/adapters/agent/claudecode/claudecode.go @@ -52,11 +52,13 @@ const ( // pre-hook sessions) agree without persisting anything. var claudeSessionNamespace = uuid.MustParse("a1f0c3d2-7b54-4e96-8a2b-0d9e1f2a3b4c") +// Plugin is the Claude Code adapter. The zero value is not usable; call New. type Plugin struct { binaryMu sync.Mutex resolvedBinary string } +// New constructs a Claude Code adapter instance. func New() *Plugin { return &Plugin{} } @@ -64,6 +66,7 @@ func New() *Plugin { var _ adapters.Adapter = (*Plugin)(nil) var _ agent.Agent = (*Plugin)(nil) +// Manifest reports the adapter's self-describing record. func (p *Plugin) Manifest() adapters.Manifest { return adapters.Manifest{ ID: adapterID, @@ -76,6 +79,8 @@ func (p *Plugin) Manifest() adapters.Manifest { } } +// GetConfigSpec returns the agent-specific config keys this adapter exposes. +// Claude Code has none today. func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { if err := ctx.Err(); err != nil { return agent.ConfigSpec{}, err @@ -132,6 +137,8 @@ func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) ( return cmd, nil } +// GetPromptDeliveryStrategy reports how Better-AO should deliver the initial +// prompt. Claude Code accepts it in the launch command. func (p *Plugin) GetPromptDeliveryStrategy(ctx context.Context, cfg agent.LaunchConfig) (agent.PromptDeliveryStrategy, error) { if err := ctx.Err(); err != nil { return "", err @@ -191,7 +198,8 @@ func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg agent.RestoreConfig) if err != nil { return nil, false, err } - cmd = []string{binary} + cmd = make([]string, 0, 5) + cmd = append(cmd, binary) appendPermissionFlags(&cmd, cfg.Permissions) cmd = append(cmd, "--resume", sessionID) return cmd, true, nil @@ -415,7 +423,7 @@ func ensureWorkspaceTrusted(configPath, workspacePath string) error { return fmt.Errorf("claude-code: create temp config: %w", err) } tmpName := tmp.Name() - defer os.Remove(tmpName) // no-op once renamed + defer func() { _ = os.Remove(tmpName) }() // no-op once renamed if _, err := tmp.Write(out); err != nil { _ = tmp.Close() diff --git a/backend/internal/adapters/agent/claudecode/hooks.go b/backend/internal/adapters/agent/claudecode/hooks.go index 2adf5e60..16be91fe 100644 --- a/backend/internal/adapters/agent/claudecode/hooks.go +++ b/backend/internal/adapters/agent/claudecode/hooks.go @@ -61,7 +61,7 @@ func (p *Plugin) GetAgentHooks(ctx context.Context, cfg agent.WorkspaceHookConfi rawHooks := map[string]json.RawMessage{} if existingData, err := os.ReadFile(settingsPath); err == nil { - if len(strings.TrimSpace(string(existingData))) > 0 { + if strings.TrimSpace(string(existingData)) != "" { if err := json.Unmarshal(existingData, &topLevel); err != nil { return fmt.Errorf("claude-code.GetAgentHooks: parse %s: %w", settingsPath, err) } diff --git a/backend/internal/adapters/agent/codex/codex.go b/backend/internal/adapters/agent/codex/codex.go index f298c25e..bc04fd23 100644 --- a/backend/internal/adapters/agent/codex/codex.go +++ b/backend/internal/adapters/agent/codex/codex.go @@ -25,11 +25,13 @@ const ( codexSummaryMetadataKey = "summary" ) +// Plugin is the Codex adapter. The zero value is not usable; call New. type Plugin struct { binaryMu sync.Mutex resolvedBinary string } +// New constructs a Codex adapter instance. func New() *Plugin { return &Plugin{} } @@ -37,6 +39,7 @@ func New() *Plugin { var _ adapters.Adapter = (*Plugin)(nil) var _ agent.Agent = (*Plugin)(nil) +// Manifest reports the adapter's self-describing record. func (p *Plugin) Manifest() adapters.Manifest { return adapters.Manifest{ ID: "codex", @@ -49,6 +52,8 @@ func (p *Plugin) Manifest() adapters.Manifest { } } +// GetConfigSpec returns the agent-specific config keys this adapter exposes. +// Codex has none today. func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { if err := ctx.Err(); err != nil { return agent.ConfigSpec{}, err @@ -56,6 +61,7 @@ func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { return agent.ConfigSpec{}, nil } +// GetLaunchCommand builds the argv to start a fresh Codex session. func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) (cmd []string, err error) { binary, err := p.codexBinary(ctx) if err != nil { @@ -79,6 +85,8 @@ func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) ( return cmd, nil } +// GetPromptDeliveryStrategy reports how Better-AO should deliver the initial +// prompt. Codex accepts it in the launch command. func (p *Plugin) GetPromptDeliveryStrategy(ctx context.Context, cfg agent.LaunchConfig) (agent.PromptDeliveryStrategy, error) { if err := ctx.Err(); err != nil { return "", err @@ -104,7 +112,8 @@ func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg agent.RestoreConfig) return nil, false, err } - cmd = []string{binary, "resume"} + cmd = make([]string, 0, 5) + cmd = append(cmd, binary, "resume") appendNoUpdateCheckFlag(&cmd) appendApprovalFlags(&cmd, cfg.Permissions) cmd = append(cmd, agentSessionID) diff --git a/backend/internal/adapters/agent/codex/hooks.go b/backend/internal/adapters/agent/codex/hooks.go index 15ec6cc6..d7b0ee89 100644 --- a/backend/internal/adapters/agent/codex/hooks.go +++ b/backend/internal/adapters/agent/codex/hooks.go @@ -57,7 +57,7 @@ func (p *Plugin) GetAgentHooks(ctx context.Context, cfg agent.WorkspaceHookConfi rawHooks := map[string]json.RawMessage{} if existingData, err := os.ReadFile(hooksPath); err == nil { - if len(strings.TrimSpace(string(existingData))) > 0 { + if strings.TrimSpace(string(existingData)) != "" { if err := json.Unmarshal(existingData, &topLevel); err != nil { return fmt.Errorf("codex.GetAgentHooks: parse %s: %w", hooksPath, err) } @@ -199,7 +199,7 @@ func ensureCodexHooksFeatureEnabled(workspacePath string) error { case strings.Contains(content, "[features]"): content = strings.Replace(content, "[features]", "[features]\n"+codexHooksFeatureLine, 1) default: - if len(content) > 0 && !strings.HasSuffix(content, "\n") { + if content != "" && !strings.HasSuffix(content, "\n") { content += "\n" } content += "\n[features]\n" + codexHooksFeatureLine + "\n" @@ -214,7 +214,7 @@ func ensureCodexHooksFeatureEnabled(workspacePath string) error { return nil } -func containsCodexFeatureLine(content string, line string) bool { +func containsCodexFeatureLine(content, line string) bool { for raw := range strings.SplitSeq(content, "\n") { if strings.TrimSpace(raw) == line { return true diff --git a/backend/internal/adapters/messenger/inbox/inbox.go b/backend/internal/adapters/messenger/inbox/inbox.go index 5a6c06c8..55aaf524 100644 --- a/backend/internal/adapters/messenger/inbox/inbox.go +++ b/backend/internal/adapters/messenger/inbox/inbox.go @@ -67,7 +67,7 @@ func (m *Messenger) Send(ctx context.Context, id domain.SessionID, message strin } name := filenameFor(m.clock(), message) - if err := os.WriteFile(filepath.Join(inboxDir, name), []byte(message), 0o644); err != nil { + if err := os.WriteFile(filepath.Join(inboxDir, name), []byte(message), 0o600); err != nil { return fmt.Errorf("inbox: write %s for %s: %w", name, id, err) } return nil @@ -94,7 +94,7 @@ func ensureRealDir(path string) error { } return nil case errors.Is(err, os.ErrNotExist): - return os.MkdirAll(path, 0o755) + return os.MkdirAll(path, 0o750) default: return err } diff --git a/backend/internal/adapters/registry.go b/backend/internal/adapters/registry.go index 68e5e275..a384979a 100644 --- a/backend/internal/adapters/registry.go +++ b/backend/internal/adapters/registry.go @@ -1,3 +1,6 @@ +// Package adapters defines the plugin contract every external integration +// (agent, tracker, scm, runtime) satisfies plus a registry that holds the +// concrete plugins the daemon resolves by id. package adapters import ( @@ -5,13 +8,16 @@ import ( "sort" ) +// Capability tags a Manifest with the role(s) a plugin fills. type Capability string +// Known capabilities. A plugin may advertise more than one. const ( CapabilityAgent Capability = "agent" CapabilityIssueTracker Capability = "issue-tracker" ) +// Manifest is the self-describing record every Adapter returns. type Manifest struct { ID string `json:"id"` Name string `json:"name"` @@ -20,20 +26,27 @@ type Manifest struct { Capabilities []Capability `json:"capabilities"` } +// Adapter is the minimal contract every registered plugin satisfies: it can +// describe itself via Manifest. Per-capability behaviour lives on richer +// interfaces (e.g. agent.Agent) that callers obtain via type assertion. type Adapter interface { Manifest() Manifest } +// Registry holds the daemon's resolved plugins, keyed by Manifest.ID. type Registry struct { adapters map[string]Adapter } +// NewRegistry returns an empty Registry ready to accept Register calls. func NewRegistry() *Registry { return &Registry{ adapters: make(map[string]Adapter), } } +// Register adds adapter under its Manifest.ID, returning an error when the id +// is empty or already in use. func (r *Registry) Register(adapter Adapter) error { manifest := adapter.Manifest() if manifest.ID == "" { @@ -54,6 +67,8 @@ func (r *Registry) Get(id string) (Adapter, bool) { return p, ok } +// Manifests returns every registered adapter's Manifest, sorted by id for +// deterministic output. func (r *Registry) Manifests() []Manifest { manifests := make([]Manifest, 0, len(r.adapters)) for _, adapter := range r.adapters { diff --git a/backend/internal/cli/spawn.go b/backend/internal/cli/spawn.go index ba8ea496..84e52720 100644 --- a/backend/internal/cli/spawn.go +++ b/backend/internal/cli/spawn.go @@ -9,6 +9,7 @@ import ( "io" "net/http" "strings" + "time" "github.com/spf13/cobra" @@ -16,6 +17,15 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/runfile" ) +// spawnRequestTimeout bounds a single POST /api/v1/sessions call. It is +// deliberately longer than DefaultDeps.HTTPClient.Timeout (which is sized for +// fast probes like /healthz and /shutdown) because spawn synchronously creates +// a worktree, launches a zellij pane, and starts the agent — that can comfortably +// exceed 2 s on a cold cache. 90 s buys headroom over the server's +// config.DefaultRequestTimeout (60 s) without hanging the CLI forever on a +// truly stuck daemon. +const spawnRequestTimeout = 90 * time.Second + type spawnOptions struct { project string prompt string @@ -90,13 +100,20 @@ func (c *commandContext) spawnSession(ctx context.Context, out io.Writer, opts s } url := fmt.Sprintf("http://%s:%d/api/v1/sessions", config.LoopbackHost, info.Port) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + + reqCtx, cancel := context.WithTimeout(ctx, spawnRequestTimeout) + defer cancel() + req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return err } req.Header.Set("Content-Type", "application/json") - resp, err := c.deps.HTTPClient.Do(req) + // Use a dedicated client (no client-level timeout) so the deadline is + // driven solely by reqCtx. The shared deps.HTTPClient is sized for + // short-lived probes; reusing it here would preempt spawn long before + // the daemon could finish provisioning. + resp, err := (&http.Client{}).Do(req) if err != nil { return fmt.Errorf("daemon request: %w", err) } diff --git a/backend/internal/httpd/controllers/sessions.go b/backend/internal/httpd/controllers/sessions.go index 42c83b77..692cfae4 100644 --- a/backend/internal/httpd/controllers/sessions.go +++ b/backend/internal/httpd/controllers/sessions.go @@ -88,23 +88,28 @@ func (c *SessionsController) spawn(w http.ResponseWriter, r *http.Request) { }) } -// writeSpawnError maps an SM-returned error to the right HTTP status. A -// project.Error in the chain (most commonly "unknown project" from the -// projectresolver) becomes 404; anything else surfaces as 500 SPAWN_FAILED. +// writeSpawnError maps an SM-returned error to the right HTTP status. +// +// A *project.Error in the chain with a client-flavoured Kind ("bad_request", +// "not_found", "conflict") is surfaced verbatim — those are safe to show. Any +// other Kind ("internal", "not_implemented", or anything unknown) falls through +// to the generic 500 SPAWN_FAILED envelope rather than passing the project +// error's Code/Message back to the client, which may carry internal detail +// (store paths, schema versions, etc.) we don't want to leak. func writeSpawnError(w http.ResponseWriter, r *http.Request, err error) { var pe *project.Error if errors.As(err, &pe) { - status := http.StatusInternalServerError switch pe.Kind { case "bad_request": - status = http.StatusBadRequest + envelope.WriteAPIError(w, r, http.StatusBadRequest, pe.Kind, pe.Code, pe.Message, pe.Details) + return case "not_found": - status = http.StatusNotFound + envelope.WriteAPIError(w, r, http.StatusNotFound, pe.Kind, pe.Code, pe.Message, pe.Details) + return case "conflict": - status = http.StatusConflict + envelope.WriteAPIError(w, r, http.StatusConflict, pe.Kind, pe.Code, pe.Message, pe.Details) + return } - envelope.WriteAPIError(w, r, status, pe.Kind, pe.Code, pe.Message, pe.Details) - return } envelope.WriteAPIError(w, r, http.StatusInternalServerError, "internal", "SPAWN_FAILED", "Failed to spawn session", nil) } diff --git a/backend/internal/httpd/controllers/sessions_test.go b/backend/internal/httpd/controllers/sessions_test.go index 580481e3..0dbdc57c 100644 --- a/backend/internal/httpd/controllers/sessions_test.go +++ b/backend/internal/httpd/controllers/sessions_test.go @@ -191,3 +191,32 @@ func TestSessionsAPI_Spawn_InternalFailure(t *testing.T) { `{"projectId":"demo","prompt":"x"}`) assertErrorCode(t, body, status, http.StatusInternalServerError, "SPAWN_FAILED") } + +// TestSessionsAPI_Spawn_InternalKindIsOpaque verifies that a *project.Error +// with a non-client Kind (e.g. "internal" or "not_implemented") does not leak +// its Code/Message verbatim — those flavoured project errors should fall +// through to the generic SPAWN_FAILED envelope, same as any other 500. +func TestSessionsAPI_Spawn_InternalKindIsOpaque(t *testing.T) { + cases := []struct { + name string + err error + }{ + {name: "internal kind", err: &project.Error{Kind: "internal", Code: "PROJECT_STORE_CORRUPT", Message: "store file checksum mismatch"}}, + {name: "not_implemented kind", err: &project.Error{Kind: "not_implemented", Code: "PROJECT_CONFIG_NOT_IMPLEMENTED", Message: "Project config patching is not available"}}, + {name: "unknown kind", err: &project.Error{Kind: "weird", Code: "WEIRD_INTERNAL_THING", Message: "internal-only message"}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + srv := sessionsServer(t, &fakeSpawner{err: tc.err}) + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusInternalServerError, "SPAWN_FAILED") + // And confirm the project.Error's internal Message/Code didn't slip into the body. + var got errorBody + mustJSON(t, body, &got) + if got.Message != "Failed to spawn session" { + t.Fatalf("internal message leaked into response: %q", got.Message) + } + }) + } +} From f8ec354362f6bccb9f46b3e7933999ac83997481 Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 22:13:01 +0530 Subject: [PATCH 12/17] fix(lint): lift loop condition in scm poller test (staticcheck QF1006) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inherited from PR #72 merging into staging after this branch opened. golangci-lint v2.12.2 → 0 issues. --- backend/internal/observe/scm/poller_test.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/internal/observe/scm/poller_test.go b/backend/internal/observe/scm/poller_test.go index d4350594..64e09162 100644 --- a/backend/internal/observe/scm/poller_test.go +++ b/backend/internal/observe/scm/poller_test.go @@ -462,10 +462,7 @@ func TestStartTicksRepeatedly(t *testing.T) { done := p.Start(ctx) deadline := time.After(500 * time.Millisecond) loop: - for { - if ticks.Load() >= 3 { - break - } + for ticks.Load() < 3 { select { case <-deadline: break loop From e72b5b9cabdcae77d9f64034bb4af51d93779919 Mon Sep 17 00:00:00 2001 From: Harshit Singh Bhandari Date: Mon, 1 Jun 2026 22:23:34 +0530 Subject: [PATCH 13/17] feat(ao): `ao spawn` CLI + POST /api/v1/sessions route (#71) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(ao): `ao spawn` CLI + POST /api/v1/sessions route * fix(ao): address PR γ review + clear inherited lint debt Review fixes (PR #71): - spawn CLI now uses a dedicated 90 s timeout (90 s > server's 60 s DefaultRequestTimeout) via context.WithTimeout, and stops sharing deps.HTTPClient — that client is sized for fast /healthz/shutdown probes (2 s) and was preempting the synchronous Spawn long before the daemon could finish provisioning a worktree + zellij pane + agent. - Harden writeSpawnError so a *project.Error with a non-client Kind ("internal", "not_implemented", or anything unknown) falls through to the generic 500 SPAWN_FAILED envelope instead of passing the project error's Code/Message verbatim to the client. Adds three subtests that pin down the opacity contract. Lint debt cleared (inherited from PRs #65/#70): - Add doc comments on every exported symbol in the agent / claudecode / codex / adapters-registry packages (revive: exported) - gosec G306/G301: inbox file/dir perms 0644→0600 and 0755→0750 - gosec G703 (path traversal via taint): excluded globally with the same rationale as G304 — adapter paths are daemon-config/worktree-derived, not user input - gocritic emptyStringTest: len(strings.TrimSpace(...)) > 0 → != "" - gocritic paramTypeCombine: combine adjacent same-type params - errcheck: wrap deferred os.Remove(tmpName) in a closure - prealloc: preallocate cmd slices on the resume paths * fix(lint): lift loop condition in scm poller test (staticcheck QF1006) Inherited from PR #72 merging into staging after this branch opened. golangci-lint v2.12.2 → 0 issues. --- backend/.golangci.yml | 1 + backend/internal/adapters/agent/agent.go | 10 +- .../adapters/agent/claudecode/claudecode.go | 12 +- .../adapters/agent/claudecode/hooks.go | 2 +- .../internal/adapters/agent/codex/codex.go | 11 +- .../internal/adapters/agent/codex/hooks.go | 6 +- .../adapters/messenger/inbox/inbox.go | 4 +- backend/internal/adapters/registry.go | 15 ++ backend/internal/cli/root.go | 1 + backend/internal/cli/spawn.go | 145 +++++++++++ backend/internal/cli/spawn_test.go | 230 ++++++++++++++++++ backend/internal/daemon/daemon.go | 16 +- backend/internal/httpd/api.go | 7 + .../internal/httpd/controllers/sessions.go | 115 +++++++++ .../httpd/controllers/sessions_test.go | 222 +++++++++++++++++ backend/internal/observe/scm/poller_test.go | 5 +- backend/internal/session/spawner.go | 17 ++ 17 files changed, 795 insertions(+), 24 deletions(-) create mode 100644 backend/internal/cli/spawn.go create mode 100644 backend/internal/cli/spawn_test.go create mode 100644 backend/internal/httpd/controllers/sessions.go create mode 100644 backend/internal/httpd/controllers/sessions_test.go create mode 100644 backend/internal/session/spawner.go diff --git a/backend/.golangci.yml b/backend/.golangci.yml index 49b4127f..2a471bf5 100644 --- a/backend/.golangci.yml +++ b/backend/.golangci.yml @@ -85,6 +85,7 @@ linters: excludes: - G104 # unchecked errors — errcheck owns this - G304 # file inclusion via variable — paths are config/run-file/worktree-derived, not user input + - G703 # path traversal via taint — same rationale as G304: paths are daemon-owned exclusions: generated: lax # skip sqlc/codegen ("Code generated ... DO NOT EDIT") diff --git a/backend/internal/adapters/agent/agent.go b/backend/internal/adapters/agent/agent.go index 16d000a8..a0ef0b48 100644 --- a/backend/internal/adapters/agent/agent.go +++ b/backend/internal/adapters/agent/agent.go @@ -58,6 +58,7 @@ type ConfigField struct { // ConfigFieldType is the primitive value kind Better-AO expects for a field. type ConfigFieldType string +// Known ConfigFieldType values. const ( ConfigFieldString ConfigFieldType = "string" ConfigFieldBool ConfigFieldType = "bool" @@ -111,10 +112,12 @@ type SessionInfo struct { // PermissionMode controls how much review an agent requires before acting. type PermissionMode string +// Known PermissionMode values. +// +// PermissionModeDefault is special: adapters emit no flag for it so the agent +// resolves its starting mode from the user's own config (e.g. Claude's TUI +// reading ~/.claude/settings.json defaultMode). const ( - // "default" is special: adapters emit no flag for it so the agent resolves - // its starting mode from the user's own config (e.g. Claude's TUI reading - // ~/.claude/settings.json defaultMode). PermissionModeDefault PermissionMode = "default" PermissionModeAcceptEdits PermissionMode = "accept-edits" PermissionModeAuto PermissionMode = "auto" @@ -124,6 +127,7 @@ const ( // PromptDeliveryStrategy describes how Better-AO should deliver the initial prompt. type PromptDeliveryStrategy string +// Known PromptDeliveryStrategy values. const ( PromptDeliveryInCommand PromptDeliveryStrategy = "in_command" PromptDeliveryAfterStart PromptDeliveryStrategy = "after_start" diff --git a/backend/internal/adapters/agent/claudecode/claudecode.go b/backend/internal/adapters/agent/claudecode/claudecode.go index f4b5d6be..fb66fb7f 100644 --- a/backend/internal/adapters/agent/claudecode/claudecode.go +++ b/backend/internal/adapters/agent/claudecode/claudecode.go @@ -52,11 +52,13 @@ const ( // pre-hook sessions) agree without persisting anything. var claudeSessionNamespace = uuid.MustParse("a1f0c3d2-7b54-4e96-8a2b-0d9e1f2a3b4c") +// Plugin is the Claude Code adapter. The zero value is not usable; call New. type Plugin struct { binaryMu sync.Mutex resolvedBinary string } +// New constructs a Claude Code adapter instance. func New() *Plugin { return &Plugin{} } @@ -64,6 +66,7 @@ func New() *Plugin { var _ adapters.Adapter = (*Plugin)(nil) var _ agent.Agent = (*Plugin)(nil) +// Manifest reports the adapter's self-describing record. func (p *Plugin) Manifest() adapters.Manifest { return adapters.Manifest{ ID: adapterID, @@ -76,6 +79,8 @@ func (p *Plugin) Manifest() adapters.Manifest { } } +// GetConfigSpec returns the agent-specific config keys this adapter exposes. +// Claude Code has none today. func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { if err := ctx.Err(); err != nil { return agent.ConfigSpec{}, err @@ -132,6 +137,8 @@ func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) ( return cmd, nil } +// GetPromptDeliveryStrategy reports how Better-AO should deliver the initial +// prompt. Claude Code accepts it in the launch command. func (p *Plugin) GetPromptDeliveryStrategy(ctx context.Context, cfg agent.LaunchConfig) (agent.PromptDeliveryStrategy, error) { if err := ctx.Err(); err != nil { return "", err @@ -191,7 +198,8 @@ func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg agent.RestoreConfig) if err != nil { return nil, false, err } - cmd = []string{binary} + cmd = make([]string, 0, 5) + cmd = append(cmd, binary) appendPermissionFlags(&cmd, cfg.Permissions) cmd = append(cmd, "--resume", sessionID) return cmd, true, nil @@ -415,7 +423,7 @@ func ensureWorkspaceTrusted(configPath, workspacePath string) error { return fmt.Errorf("claude-code: create temp config: %w", err) } tmpName := tmp.Name() - defer os.Remove(tmpName) // no-op once renamed + defer func() { _ = os.Remove(tmpName) }() // no-op once renamed if _, err := tmp.Write(out); err != nil { _ = tmp.Close() diff --git a/backend/internal/adapters/agent/claudecode/hooks.go b/backend/internal/adapters/agent/claudecode/hooks.go index 2adf5e60..16be91fe 100644 --- a/backend/internal/adapters/agent/claudecode/hooks.go +++ b/backend/internal/adapters/agent/claudecode/hooks.go @@ -61,7 +61,7 @@ func (p *Plugin) GetAgentHooks(ctx context.Context, cfg agent.WorkspaceHookConfi rawHooks := map[string]json.RawMessage{} if existingData, err := os.ReadFile(settingsPath); err == nil { - if len(strings.TrimSpace(string(existingData))) > 0 { + if strings.TrimSpace(string(existingData)) != "" { if err := json.Unmarshal(existingData, &topLevel); err != nil { return fmt.Errorf("claude-code.GetAgentHooks: parse %s: %w", settingsPath, err) } diff --git a/backend/internal/adapters/agent/codex/codex.go b/backend/internal/adapters/agent/codex/codex.go index f298c25e..bc04fd23 100644 --- a/backend/internal/adapters/agent/codex/codex.go +++ b/backend/internal/adapters/agent/codex/codex.go @@ -25,11 +25,13 @@ const ( codexSummaryMetadataKey = "summary" ) +// Plugin is the Codex adapter. The zero value is not usable; call New. type Plugin struct { binaryMu sync.Mutex resolvedBinary string } +// New constructs a Codex adapter instance. func New() *Plugin { return &Plugin{} } @@ -37,6 +39,7 @@ func New() *Plugin { var _ adapters.Adapter = (*Plugin)(nil) var _ agent.Agent = (*Plugin)(nil) +// Manifest reports the adapter's self-describing record. func (p *Plugin) Manifest() adapters.Manifest { return adapters.Manifest{ ID: "codex", @@ -49,6 +52,8 @@ func (p *Plugin) Manifest() adapters.Manifest { } } +// GetConfigSpec returns the agent-specific config keys this adapter exposes. +// Codex has none today. func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { if err := ctx.Err(); err != nil { return agent.ConfigSpec{}, err @@ -56,6 +61,7 @@ func (p *Plugin) GetConfigSpec(ctx context.Context) (agent.ConfigSpec, error) { return agent.ConfigSpec{}, nil } +// GetLaunchCommand builds the argv to start a fresh Codex session. func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) (cmd []string, err error) { binary, err := p.codexBinary(ctx) if err != nil { @@ -79,6 +85,8 @@ func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg agent.LaunchConfig) ( return cmd, nil } +// GetPromptDeliveryStrategy reports how Better-AO should deliver the initial +// prompt. Codex accepts it in the launch command. func (p *Plugin) GetPromptDeliveryStrategy(ctx context.Context, cfg agent.LaunchConfig) (agent.PromptDeliveryStrategy, error) { if err := ctx.Err(); err != nil { return "", err @@ -104,7 +112,8 @@ func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg agent.RestoreConfig) return nil, false, err } - cmd = []string{binary, "resume"} + cmd = make([]string, 0, 5) + cmd = append(cmd, binary, "resume") appendNoUpdateCheckFlag(&cmd) appendApprovalFlags(&cmd, cfg.Permissions) cmd = append(cmd, agentSessionID) diff --git a/backend/internal/adapters/agent/codex/hooks.go b/backend/internal/adapters/agent/codex/hooks.go index 15ec6cc6..d7b0ee89 100644 --- a/backend/internal/adapters/agent/codex/hooks.go +++ b/backend/internal/adapters/agent/codex/hooks.go @@ -57,7 +57,7 @@ func (p *Plugin) GetAgentHooks(ctx context.Context, cfg agent.WorkspaceHookConfi rawHooks := map[string]json.RawMessage{} if existingData, err := os.ReadFile(hooksPath); err == nil { - if len(strings.TrimSpace(string(existingData))) > 0 { + if strings.TrimSpace(string(existingData)) != "" { if err := json.Unmarshal(existingData, &topLevel); err != nil { return fmt.Errorf("codex.GetAgentHooks: parse %s: %w", hooksPath, err) } @@ -199,7 +199,7 @@ func ensureCodexHooksFeatureEnabled(workspacePath string) error { case strings.Contains(content, "[features]"): content = strings.Replace(content, "[features]", "[features]\n"+codexHooksFeatureLine, 1) default: - if len(content) > 0 && !strings.HasSuffix(content, "\n") { + if content != "" && !strings.HasSuffix(content, "\n") { content += "\n" } content += "\n[features]\n" + codexHooksFeatureLine + "\n" @@ -214,7 +214,7 @@ func ensureCodexHooksFeatureEnabled(workspacePath string) error { return nil } -func containsCodexFeatureLine(content string, line string) bool { +func containsCodexFeatureLine(content, line string) bool { for raw := range strings.SplitSeq(content, "\n") { if strings.TrimSpace(raw) == line { return true diff --git a/backend/internal/adapters/messenger/inbox/inbox.go b/backend/internal/adapters/messenger/inbox/inbox.go index 5a6c06c8..55aaf524 100644 --- a/backend/internal/adapters/messenger/inbox/inbox.go +++ b/backend/internal/adapters/messenger/inbox/inbox.go @@ -67,7 +67,7 @@ func (m *Messenger) Send(ctx context.Context, id domain.SessionID, message strin } name := filenameFor(m.clock(), message) - if err := os.WriteFile(filepath.Join(inboxDir, name), []byte(message), 0o644); err != nil { + if err := os.WriteFile(filepath.Join(inboxDir, name), []byte(message), 0o600); err != nil { return fmt.Errorf("inbox: write %s for %s: %w", name, id, err) } return nil @@ -94,7 +94,7 @@ func ensureRealDir(path string) error { } return nil case errors.Is(err, os.ErrNotExist): - return os.MkdirAll(path, 0o755) + return os.MkdirAll(path, 0o750) default: return err } diff --git a/backend/internal/adapters/registry.go b/backend/internal/adapters/registry.go index 68e5e275..a384979a 100644 --- a/backend/internal/adapters/registry.go +++ b/backend/internal/adapters/registry.go @@ -1,3 +1,6 @@ +// Package adapters defines the plugin contract every external integration +// (agent, tracker, scm, runtime) satisfies plus a registry that holds the +// concrete plugins the daemon resolves by id. package adapters import ( @@ -5,13 +8,16 @@ import ( "sort" ) +// Capability tags a Manifest with the role(s) a plugin fills. type Capability string +// Known capabilities. A plugin may advertise more than one. const ( CapabilityAgent Capability = "agent" CapabilityIssueTracker Capability = "issue-tracker" ) +// Manifest is the self-describing record every Adapter returns. type Manifest struct { ID string `json:"id"` Name string `json:"name"` @@ -20,20 +26,27 @@ type Manifest struct { Capabilities []Capability `json:"capabilities"` } +// Adapter is the minimal contract every registered plugin satisfies: it can +// describe itself via Manifest. Per-capability behaviour lives on richer +// interfaces (e.g. agent.Agent) that callers obtain via type assertion. type Adapter interface { Manifest() Manifest } +// Registry holds the daemon's resolved plugins, keyed by Manifest.ID. type Registry struct { adapters map[string]Adapter } +// NewRegistry returns an empty Registry ready to accept Register calls. func NewRegistry() *Registry { return &Registry{ adapters: make(map[string]Adapter), } } +// Register adds adapter under its Manifest.ID, returning an error when the id +// is empty or already in use. func (r *Registry) Register(adapter Adapter) error { manifest := adapter.Manifest() if manifest.ID == "" { @@ -54,6 +67,8 @@ func (r *Registry) Get(id string) (Adapter, bool) { return p, ok } +// Manifests returns every registered adapter's Manifest, sorted by id for +// deterministic output. func (r *Registry) Manifests() []Manifest { manifests := make([]Manifest, 0, len(r.adapters)) for _, adapter := range r.adapters { diff --git a/backend/internal/cli/root.go b/backend/internal/cli/root.go index ce015738..9dfd49f4 100644 --- a/backend/internal/cli/root.go +++ b/backend/internal/cli/root.go @@ -146,6 +146,7 @@ func NewRootCommand(deps Deps) *cobra.Command { root.AddCommand(newStartCommand(ctx)) root.AddCommand(newStopCommand(ctx)) root.AddCommand(newStatusCommand(ctx)) + root.AddCommand(newSpawnCommand(ctx)) root.AddCommand(newDoctorCommand(ctx)) root.AddCommand(newCompletionCommand()) root.AddCommand(newVersionCommand()) diff --git a/backend/internal/cli/spawn.go b/backend/internal/cli/spawn.go new file mode 100644 index 00000000..84e52720 --- /dev/null +++ b/backend/internal/cli/spawn.go @@ -0,0 +1,145 @@ +package cli + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/spf13/cobra" + + "github.com/aoagents/agent-orchestrator/backend/internal/config" + "github.com/aoagents/agent-orchestrator/backend/internal/runfile" +) + +// spawnRequestTimeout bounds a single POST /api/v1/sessions call. It is +// deliberately longer than DefaultDeps.HTTPClient.Timeout (which is sized for +// fast probes like /healthz and /shutdown) because spawn synchronously creates +// a worktree, launches a zellij pane, and starts the agent — that can comfortably +// exceed 2 s on a cold cache. 90 s buys headroom over the server's +// config.DefaultRequestTimeout (60 s) without hanging the CLI forever on a +// truly stuck daemon. +const spawnRequestTimeout = 90 * time.Second + +type spawnOptions struct { + project string + prompt string + agent string +} + +func newSpawnCommand(ctx *commandContext) *cobra.Command { + var opts spawnOptions + cmd := &cobra.Command{ + Use: "spawn", + Short: "Spawn a new agent session", + Args: noArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + return ctx.spawnSession(cmd.Context(), cmd.OutOrStdout(), opts) + }, + } + cmd.Flags().StringVar(&opts.prompt, "prompt", "", "Initial prompt for the agent") + cmd.Flags().StringVar(&opts.project, "project", "", "Project id") + cmd.Flags().StringVar(&opts.agent, "agent", "claude-code", "Agent plugin") + return cmd +} + +type spawnAPIRequest struct { + ProjectID string `json:"projectId"` + Prompt string `json:"prompt"` + Agent string `json:"agent,omitempty"` +} + +type spawnAPIResponse struct { + SessionID string `json:"sessionId"` + WorkspacePath string `json:"workspacePath"` + RuntimeHandle string `json:"runtimeHandle"` +} + +type apiError struct { + Kind string `json:"error"` + Code string `json:"code"` + Message string `json:"message"` +} + +func (c *commandContext) spawnSession(ctx context.Context, out io.Writer, opts spawnOptions) error { + prompt := strings.TrimSpace(opts.prompt) + if prompt == "" { + return usageError{errors.New("usage: --prompt is required")} + } + project := strings.TrimSpace(opts.project) + if project == "" { + return usageError{errors.New("usage: --project is required")} + } + + cfg, err := config.Load() + if err != nil { + return err + } + + info, err := runfile.Read(cfg.RunFilePath) + if err != nil { + return fmt.Errorf("read run-file: %w", err) + } + if info == nil { + return errors.New("AO daemon is not running; start it with `ao start`") + } + + payload := spawnAPIRequest{ + ProjectID: project, + Prompt: prompt, + Agent: opts.agent, + } + body, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("encode request: %w", err) + } + + url := fmt.Sprintf("http://%s:%d/api/v1/sessions", config.LoopbackHost, info.Port) + + reqCtx, cancel := context.WithTimeout(ctx, spawnRequestTimeout) + defer cancel() + req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, url, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + // Use a dedicated client (no client-level timeout) so the deadline is + // driven solely by reqCtx. The shared deps.HTTPClient is sized for + // short-lived probes; reusing it here would preempt spawn long before + // the daemon could finish provisioning. + resp, err := (&http.Client{}).Do(req) + if err != nil { + return fmt.Errorf("daemon request: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("read response: %w", err) + } + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + var ok spawnAPIResponse + if err := json.Unmarshal(respBody, &ok); err != nil { + return fmt.Errorf("decode response: %w", err) + } + _, err := fmt.Fprintf(out, "Spawned session %s in %s\nAttach: zellij attach %s\n", + ok.SessionID, ok.WorkspacePath, ok.RuntimeHandle) + return err + } + + // Non-2xx: surface the server's error envelope when present, otherwise the + // raw status. Both 4xx and 5xx exit 1; usage errors (which exit 2) come from + // flag validation above. + var apiErr apiError + if jerr := json.Unmarshal(respBody, &apiErr); jerr == nil && apiErr.Kind != "" { + return fmt.Errorf("%s: %s", apiErr.Kind, apiErr.Message) + } + return fmt.Errorf("daemon returned HTTP %d", resp.StatusCode) +} diff --git a/backend/internal/cli/spawn_test.go b/backend/internal/cli/spawn_test.go new file mode 100644 index 00000000..2638a4d4 --- /dev/null +++ b/backend/internal/cli/spawn_test.go @@ -0,0 +1,230 @@ +package cli + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + "strings" + "testing" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/runfile" +) + +// spawnServer wires up an httptest server, writes a runfile pointing at it, and +// returns the captured request body slot the caller assertions can read. +func spawnServer(t *testing.T, status int, respBody string) (*httptest.Server, *string) { + t.Helper() + var captured string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/api/v1/sessions" && r.Method == http.MethodPost { + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatalf("read req body: %v", err) + } + captured = string(body) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _, _ = io.WriteString(w, respBody) + return + } + http.NotFound(w, r) + })) + t.Cleanup(srv.Close) + return srv, &captured +} + +func writeRunFileFor(t *testing.T, cfg testConfig, srv *httptest.Server) { + t.Helper() + port := serverPort(t, srv.URL) + if err := runfile.Write(cfg.runFile, runfile.Info{ + PID: os.Getpid(), + Port: port, + StartedAt: time.Unix(100, 0).UTC(), + }); err != nil { + t.Fatal(err) + } +} + +func TestSpawn_Success(t *testing.T) { + cfg := setConfigEnv(t) + resp := `{"sessionId":"demo-1","workspacePath":"/tmp/demo-1","runtimeHandle":"zellij-demo-1"}` + srv, captured := spawnServer(t, http.StatusCreated, resp) + writeRunFileFor(t, cfg, srv) + + out, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "do the thing", "--agent", "claude-code") + if err != nil { + t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut) + } + if !strings.Contains(out, "Spawned session demo-1 in /tmp/demo-1") { + t.Fatalf("stdout missing spawn line:\n%s", out) + } + if !strings.Contains(out, "Attach: zellij attach zellij-demo-1") { + t.Fatalf("stdout missing attach line:\n%s", out) + } + + var req struct { + ProjectID string `json:"projectId"` + Prompt string `json:"prompt"` + Agent string `json:"agent"` + } + if err := json.Unmarshal([]byte(*captured), &req); err != nil { + t.Fatalf("decode captured req: %v\nbody=%s", err, *captured) + } + if req.ProjectID != "demo" || req.Prompt != "do the thing" || req.Agent != "claude-code" { + t.Fatalf("captured payload = %#v", req) + } +} + +func TestSpawn_DefaultsAgent(t *testing.T) { + cfg := setConfigEnv(t) + srv, captured := spawnServer(t, http.StatusCreated, + `{"sessionId":"demo-1","workspacePath":"/tmp/demo-1","runtimeHandle":"zellij-demo-1"}`) + writeRunFileFor(t, cfg, srv) + + _, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err != nil { + t.Fatalf("unexpected error: %v\nstderr=%s", err, errOut) + } + if !strings.Contains(*captured, `"agent":"claude-code"`) { + t.Fatalf("agent default not sent: %s", *captured) + } +} + +func TestSpawn_EmptyPromptIsUsageError(t *testing.T) { + setConfigEnv(t) + _, _, err := executeCLI(t, Deps{}, "spawn", "--project", "demo", "--prompt", " ") + if err == nil { + t.Fatal("expected usage error for empty prompt") + } + if got := ExitCode(err); got != 2 { + t.Fatalf("exit code = %d, want 2", got) + } + if !strings.Contains(err.Error(), "--prompt is required") { + t.Fatalf("error missing usage message: %v", err) + } +} + +func TestSpawn_MissingProjectIsUsageError(t *testing.T) { + setConfigEnv(t) + _, _, err := executeCLI(t, Deps{}, "spawn", "--prompt", "x") + if err == nil { + t.Fatal("expected usage error for missing project") + } + if got := ExitCode(err); got != 2 { + t.Fatalf("exit code = %d, want 2", got) + } +} + +func TestSpawn_ServerBadRequestExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusBadRequest, + `{"error":"bad_request","code":"PROMPT_REQUIRED","message":"prompt is required"}`) + writeRunFileFor(t, cfg, srv) + + _, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected runtime error from 400") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } + if !strings.Contains(err.Error(), "bad_request") && !strings.Contains(errOut, "bad_request") { + t.Fatalf("error did not include server kind: %v\nstderr=%s", err, errOut) + } +} + +func TestSpawn_ServerNotFoundExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusNotFound, + `{"error":"not_found","code":"PROJECT_NOT_FOUND","message":"Unknown project"}`) + writeRunFileFor(t, cfg, srv) + + _, _, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "missing", "--prompt", "x") + if err == nil { + t.Fatal("expected runtime error from 404") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } +} + +func TestSpawn_ServerInternalErrorExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusInternalServerError, + `{"error":"internal","code":"SPAWN_FAILED","message":"Failed to spawn session"}`) + writeRunFileFor(t, cfg, srv) + + _, _, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected runtime error from 500") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } +} + +func TestSpawn_DaemonNotRunningExits1(t *testing.T) { + setConfigEnv(t) + // No runfile: daemon is stopped. + _, _, err := executeCLI(t, Deps{}, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected error when daemon is not running") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } +} + +func TestSpawn_SessionsDisabledExits1(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusServiceUnavailable, + `{"error":"sessions_disabled","code":"SESSIONS_DISABLED","message":"Session Manager is not wired in this daemon"}`) + writeRunFileFor(t, cfg, srv) + + _, errOut, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "demo", "--prompt", "x") + if err == nil { + t.Fatal("expected error from 503") + } + if got := ExitCode(err); got != 1 { + t.Fatalf("exit code = %d, want 1", got) + } + if !strings.Contains(err.Error(), "sessions_disabled") && !strings.Contains(errOut, "sessions_disabled") { + t.Fatalf("error did not include sessions_disabled: %v\nstderr=%s", err, errOut) + } +} + +// Sanity helper: ensure the formatted spawn message is stable. +func TestSpawn_StdoutShape(t *testing.T) { + cfg := setConfigEnv(t) + srv, _ := spawnServer(t, http.StatusCreated, fmt.Sprintf( + `{"sessionId":%q,"workspacePath":%q,"runtimeHandle":%q}`, + "proj-7", "/tmp/proj-7", "zellij-proj-7")) + writeRunFileFor(t, cfg, srv) + + out, _, err := executeCLI(t, Deps{ + ProcessAlive: func(int) bool { return true }, + }, "spawn", "--project", "proj", "--prompt", "go") + if err != nil { + t.Fatal(err) + } + want := "Spawned session proj-7 in /tmp/proj-7\nAttach: zellij attach zellij-proj-7\n" + if out != want { + t.Fatalf("stdout mismatch:\n got %q\n want %q", out, want) + } +} diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index c897a027..ad360b21 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -73,21 +73,22 @@ func Run() error { termMgr := terminal.NewManager(runtimeAdapter, cdcPipe.Broadcaster, log) defer termMgr.Close() - srv, err := httpd.NewWithDeps(cfg, log, termMgr, httpd.APIDeps{Projects: projects}) + // Bring up the Lifecycle Manager + reaper, then the Session Manager stack + // over the same lcm/runtime/projects/messenger singletons. SM is constructed + // before the HTTP server so its Spawner can be plumbed into APIDeps and the + // /api/v1/sessions controller can drive it. + lcStack := startLifecycle(ctx, store, runtimeAdapter, messenger, log) + ss, err := buildSessionStack(cfg, store, runtimeAdapter, projects, lcStack.lcm, messenger) if err != nil { stop() + lcStack.Stop() if cdcErr := cdcPipe.Stop(); cdcErr != nil { log.Error("cdc pipeline shutdown", "err", cdcErr) } return err } - // Bring up the Lifecycle Manager + reaper, then the Session Manager stack - // over the same lcm/runtime/projects/messenger singletons. SM has no HTTP - // routes yet — they land in a follow-up PR; constructing it here lets the - // next PR hang controllers off ss.sm without further wiring changes. - lcStack := startLifecycle(ctx, store, runtimeAdapter, messenger, log) - ss, err := buildSessionStack(cfg, store, runtimeAdapter, projects, lcStack.lcm, messenger) + srv, err := httpd.NewWithDeps(cfg, log, termMgr, httpd.APIDeps{Projects: projects, Sessions: ss.sm}) if err != nil { stop() lcStack.Stop() @@ -96,7 +97,6 @@ func Run() error { } return err } - _ = ss // sm: HTTP routes land in a follow-up PR (γ) // SCM observation: polling Provider -> pr.Manager -> lifecycle nudges. // Constructed after lifecycle so the PR Manager can forward observations diff --git a/backend/internal/httpd/api.go b/backend/internal/httpd/api.go index 9480cdad..78bfc6f8 100644 --- a/backend/internal/httpd/api.go +++ b/backend/internal/httpd/api.go @@ -11,6 +11,7 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/httpd/controllers" "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" "github.com/aoagents/agent-orchestrator/backend/internal/project" + "github.com/aoagents/agent-orchestrator/backend/internal/session" ) // APIDeps bundles every Manager the API layer's controllers depend on. @@ -19,6 +20,7 @@ import ( // registered but returns the OpenAPI-backed 501 response. type APIDeps struct { Projects project.Manager + Sessions session.Spawner } // API owns one controller per resource and is the single Register call the @@ -26,6 +28,7 @@ type APIDeps struct { type API struct { cfg config.Config projects *controllers.ProjectsController + sessions *controllers.SessionsController } // NewAPI constructs the API surface from its dependencies. cfg carries the @@ -37,6 +40,9 @@ func NewAPI(cfg config.Config, deps APIDeps) *API { projects: &controllers.ProjectsController{ Mgr: deps.Projects, }, + sessions: &controllers.SessionsController{ + Mgr: deps.Sessions, + }, } } @@ -55,6 +61,7 @@ func (a *API) Register(root chi.Router) { r.Group(func(r chi.Router) { r.Use(middleware.Timeout(timeout)) a.projects.Register(r) + a.sessions.Register(r) // Sibling REST controllers plug in here. }) // Surfaces that intentionally bypass the REST timeout register at this level. diff --git a/backend/internal/httpd/controllers/sessions.go b/backend/internal/httpd/controllers/sessions.go new file mode 100644 index 00000000..692cfae4 --- /dev/null +++ b/backend/internal/httpd/controllers/sessions.go @@ -0,0 +1,115 @@ +package controllers + +import ( + "encoding/json" + "errors" + "net/http" + "strings" + + "github.com/go-chi/chi/v5" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/httpd/envelope" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" + "github.com/aoagents/agent-orchestrator/backend/internal/session" +) + +// SessionsController owns the /sessions routes. Mgr nil means the Session +// Manager has not been wired into the daemon yet; the controller answers 503 +// "sessions_disabled" so the CLI gets an actionable signal instead of a panic. +type SessionsController struct { + Mgr session.Spawner +} + +// Register mounts the sessions routes on the supplied router. +func (c *SessionsController) Register(r chi.Router) { + r.Post("/sessions", c.spawn) +} + +type spawnRequest struct { + ProjectID string `json:"projectId"` + Prompt string `json:"prompt"` + Agent string `json:"agent,omitempty"` +} + +type spawnResponse struct { + SessionID string `json:"sessionId"` + WorkspacePath string `json:"workspacePath"` + RuntimeHandle string `json:"runtimeHandle"` +} + +func (c *SessionsController) spawn(w http.ResponseWriter, r *http.Request) { + if c.Mgr == nil { + envelope.WriteJSON(w, http.StatusServiceUnavailable, map[string]any{ + "error": "sessions_disabled", + "code": "SESSIONS_DISABLED", + "message": "Session Manager is not wired in this daemon", + }) + return + } + + var in spawnRequest + if err := json.NewDecoder(r.Body).Decode(&in); err != nil { + envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "INVALID_JSON", "Invalid JSON body", nil) + return + } + projectID := strings.TrimSpace(in.ProjectID) + prompt := strings.TrimSpace(in.Prompt) + if projectID == "" { + envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "PROJECT_ID_REQUIRED", "projectId is required", nil) + return + } + if prompt == "" { + envelope.WriteAPIError(w, r, http.StatusBadRequest, "bad_request", "PROMPT_REQUIRED", "prompt is required", nil) + return + } + + harness := domain.AgentHarness(strings.TrimSpace(in.Agent)) + if harness == "" { + harness = domain.HarnessClaudeCode + } + + sess, err := c.Mgr.Spawn(r.Context(), ports.SpawnConfig{ + ProjectID: domain.ProjectID(projectID), + Kind: domain.KindWorker, + Harness: harness, + Prompt: prompt, + }) + if err != nil { + writeSpawnError(w, r, err) + return + } + + envelope.WriteJSON(w, http.StatusCreated, spawnResponse{ + SessionID: string(sess.ID), + WorkspacePath: sess.Metadata.WorkspacePath, + RuntimeHandle: sess.Metadata.RuntimeHandleID, + }) +} + +// writeSpawnError maps an SM-returned error to the right HTTP status. +// +// A *project.Error in the chain with a client-flavoured Kind ("bad_request", +// "not_found", "conflict") is surfaced verbatim — those are safe to show. Any +// other Kind ("internal", "not_implemented", or anything unknown) falls through +// to the generic 500 SPAWN_FAILED envelope rather than passing the project +// error's Code/Message back to the client, which may carry internal detail +// (store paths, schema versions, etc.) we don't want to leak. +func writeSpawnError(w http.ResponseWriter, r *http.Request, err error) { + var pe *project.Error + if errors.As(err, &pe) { + switch pe.Kind { + case "bad_request": + envelope.WriteAPIError(w, r, http.StatusBadRequest, pe.Kind, pe.Code, pe.Message, pe.Details) + return + case "not_found": + envelope.WriteAPIError(w, r, http.StatusNotFound, pe.Kind, pe.Code, pe.Message, pe.Details) + return + case "conflict": + envelope.WriteAPIError(w, r, http.StatusConflict, pe.Kind, pe.Code, pe.Message, pe.Details) + return + } + } + envelope.WriteAPIError(w, r, http.StatusInternalServerError, "internal", "SPAWN_FAILED", "Failed to spawn session", nil) +} diff --git a/backend/internal/httpd/controllers/sessions_test.go b/backend/internal/httpd/controllers/sessions_test.go new file mode 100644 index 00000000..0dbdc57c --- /dev/null +++ b/backend/internal/httpd/controllers/sessions_test.go @@ -0,0 +1,222 @@ +package controllers_test + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "sync" + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/config" + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/httpd" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" + "github.com/aoagents/agent-orchestrator/backend/internal/project" +) + +// fakeSpawner records the SpawnConfig it was called with and returns the +// canned Session/error. It satisfies session.Spawner. +type fakeSpawner struct { + mu sync.Mutex + calls []ports.SpawnConfig + session domain.Session + err error +} + +func (f *fakeSpawner) Spawn(_ context.Context, cfg ports.SpawnConfig) (domain.Session, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.calls = append(f.calls, cfg) + if f.err != nil { + return domain.Session{}, f.err + } + return f.session, nil +} + +func (f *fakeSpawner) recorded() []ports.SpawnConfig { + f.mu.Lock() + defer f.mu.Unlock() + out := make([]ports.SpawnConfig, len(f.calls)) + copy(out, f.calls) + return out +} + +func sessionsServer(t *testing.T, spawner *fakeSpawner) *httptest.Server { + t.Helper() + log := slog.New(slog.NewTextHandler(io.Discard, nil)) + deps := httpd.APIDeps{} + if spawner != nil { + deps.Sessions = spawner + } + srv := httptest.NewServer(httpd.NewRouterWithAPI(config.Config{}, log, nil, deps)) + t.Cleanup(srv.Close) + return srv +} + +func TestSessionsAPI_Spawn_Success(t *testing.T) { + spawner := &fakeSpawner{ + session: domain.Session{ + SessionRecord: domain.SessionRecord{ + ID: "demo-1", + ProjectID: "demo", + Kind: domain.KindWorker, + Harness: domain.HarnessClaudeCode, + Metadata: domain.SessionMetadata{ + WorkspacePath: "/tmp/demo-1", + RuntimeHandleID: "zellij-demo-1", + }, + }, + }, + } + srv := sessionsServer(t, spawner) + + body, status, headers := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"do the thing","agent":"claude-code"}`) + if status != http.StatusCreated { + t.Fatalf("status = %d, want 201; body=%s", status, body) + } + assertJSON(t, headers) + + var out struct { + SessionID string `json:"sessionId"` + WorkspacePath string `json:"workspacePath"` + RuntimeHandle string `json:"runtimeHandle"` + } + mustJSON(t, body, &out) + if out.SessionID != "demo-1" || out.WorkspacePath != "/tmp/demo-1" || out.RuntimeHandle != "zellij-demo-1" { + t.Fatalf("response = %#v", out) + } + + got := spawner.recorded() + if len(got) != 1 { + t.Fatalf("spawn calls = %d, want 1", len(got)) + } + if got[0].ProjectID != "demo" || got[0].Prompt != "do the thing" || got[0].Harness != domain.HarnessClaudeCode || got[0].Kind != domain.KindWorker { + t.Fatalf("recorded spawn = %#v", got[0]) + } +} + +func TestSessionsAPI_Spawn_DefaultsAgentToClaudeCode(t *testing.T) { + spawner := &fakeSpawner{ + session: domain.Session{ + SessionRecord: domain.SessionRecord{ID: "demo-2", ProjectID: "demo"}, + }, + } + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"do the thing"}`) + if status != http.StatusCreated { + t.Fatalf("status = %d, want 201; body=%s", status, body) + } + got := spawner.recorded() + if len(got) != 1 || got[0].Harness != domain.HarnessClaudeCode { + t.Fatalf("default agent not applied: %#v", got) + } +} + +func TestSessionsAPI_Spawn_BadRequest(t *testing.T) { + cases := []struct { + name, body, wantCode string + }{ + {name: "invalid json", body: `{`, wantCode: "INVALID_JSON"}, + {name: "missing projectId", body: `{"prompt":"x"}`, wantCode: "PROJECT_ID_REQUIRED"}, + {name: "blank projectId", body: `{"projectId":" ","prompt":"x"}`, wantCode: "PROJECT_ID_REQUIRED"}, + {name: "missing prompt", body: `{"projectId":"demo"}`, wantCode: "PROMPT_REQUIRED"}, + {name: "blank prompt", body: `{"projectId":"demo","prompt":" "}`, wantCode: "PROMPT_REQUIRED"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + spawner := &fakeSpawner{} + srv := sessionsServer(t, spawner) + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", tc.body) + assertErrorCode(t, body, status, http.StatusBadRequest, tc.wantCode) + if len(spawner.recorded()) != 0 { + t.Fatalf("spawn was called for invalid request") + } + }) + } +} + +func TestSessionsAPI_Spawn_UnknownProject(t *testing.T) { + spawner := &fakeSpawner{ + err: &project.Error{Kind: "not_found", Code: "PROJECT_NOT_FOUND", Message: "Unknown project"}, + } + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"missing","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusNotFound, "PROJECT_NOT_FOUND") +} + +func TestSessionsAPI_Spawn_UnknownProjectWrapped(t *testing.T) { + // Mirror the real production wrap: session.Manager.Spawn returns + // `fmt.Errorf("spawn %s: workspace: %w", id, err)` over the projectresolver + // chain. The controller must unwrap *project.Error rather than match by + // string, so errors.As walks the linear %w chain. + inner := &project.Error{Kind: "not_found", Code: "PROJECT_NOT_FOUND", Message: "Unknown project"} + spawner := &fakeSpawner{ + err: fmt.Errorf("spawn demo-1: workspace: %w", fmt.Errorf("projectresolver: lookup %q: %w", "missing", inner)), + } + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"missing","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusNotFound, "PROJECT_NOT_FOUND") +} + +func TestSessionsAPI_Spawn_SessionsDisabled(t *testing.T) { + srv := sessionsServer(t, nil) + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"x"}`) + if status != http.StatusServiceUnavailable { + t.Fatalf("status = %d, want 503; body=%s", status, body) + } + var got errorBody + mustJSON(t, body, &got) + if got.Error != "sessions_disabled" { + t.Fatalf("error = %q, want sessions_disabled\nbody=%s", got.Error, body) + } +} + +func TestSessionsAPI_Spawn_InternalFailure(t *testing.T) { + spawner := &fakeSpawner{err: errors.New("runtime boom")} + srv := sessionsServer(t, spawner) + + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusInternalServerError, "SPAWN_FAILED") +} + +// TestSessionsAPI_Spawn_InternalKindIsOpaque verifies that a *project.Error +// with a non-client Kind (e.g. "internal" or "not_implemented") does not leak +// its Code/Message verbatim — those flavoured project errors should fall +// through to the generic SPAWN_FAILED envelope, same as any other 500. +func TestSessionsAPI_Spawn_InternalKindIsOpaque(t *testing.T) { + cases := []struct { + name string + err error + }{ + {name: "internal kind", err: &project.Error{Kind: "internal", Code: "PROJECT_STORE_CORRUPT", Message: "store file checksum mismatch"}}, + {name: "not_implemented kind", err: &project.Error{Kind: "not_implemented", Code: "PROJECT_CONFIG_NOT_IMPLEMENTED", Message: "Project config patching is not available"}}, + {name: "unknown kind", err: &project.Error{Kind: "weird", Code: "WEIRD_INTERNAL_THING", Message: "internal-only message"}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + srv := sessionsServer(t, &fakeSpawner{err: tc.err}) + body, status, _ := doRequest(t, srv, "POST", "/api/v1/sessions", + `{"projectId":"demo","prompt":"x"}`) + assertErrorCode(t, body, status, http.StatusInternalServerError, "SPAWN_FAILED") + // And confirm the project.Error's internal Message/Code didn't slip into the body. + var got errorBody + mustJSON(t, body, &got) + if got.Message != "Failed to spawn session" { + t.Fatalf("internal message leaked into response: %q", got.Message) + } + }) + } +} diff --git a/backend/internal/observe/scm/poller_test.go b/backend/internal/observe/scm/poller_test.go index d4350594..64e09162 100644 --- a/backend/internal/observe/scm/poller_test.go +++ b/backend/internal/observe/scm/poller_test.go @@ -462,10 +462,7 @@ func TestStartTicksRepeatedly(t *testing.T) { done := p.Start(ctx) deadline := time.After(500 * time.Millisecond) loop: - for { - if ticks.Load() >= 3 { - break - } + for ticks.Load() < 3 { select { case <-deadline: break loop diff --git a/backend/internal/session/spawner.go b/backend/internal/session/spawner.go new file mode 100644 index 00000000..243308f5 --- /dev/null +++ b/backend/internal/session/spawner.go @@ -0,0 +1,17 @@ +package session + +import ( + "context" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +// Spawner is the slice of the Session Manager the HTTP controller depends on. +// *Manager satisfies it; tests can substitute a fake without dragging in the +// runtime/workspace/agent collaborators a real Manager needs. +type Spawner interface { + Spawn(ctx context.Context, cfg ports.SpawnConfig) (domain.Session, error) +} + +var _ Spawner = (*Manager)(nil) From eb0ac00459968c8f09b9900ddd9ec46eda428e73 Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 22:37:17 +0530 Subject: [PATCH 14/17] =?UTF-8?q?scripts:=20add=20ao-here.sh=20=E2=80=94?= =?UTF-8?q?=20register=20cwd=20as=20a=20project=20and=20start=20the=20daem?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convenience script for the typical local-dev flow: cd into a git repo, run `scripts/ao-here.sh`, and the daemon starts (if not already up), the cwd is registered as an AO project, and the script prints the project ID along with a ready-to-run `ao spawn` command. Idempotent: 409 conflict on existing-path is handled by pulling the existingProjectId out of the response so re-running just re-uses the previous registration. --- scripts/ao-here.sh | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 scripts/ao-here.sh diff --git a/scripts/ao-here.sh b/scripts/ao-here.sh new file mode 100755 index 00000000..67774935 --- /dev/null +++ b/scripts/ao-here.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# +# ao-here.sh — register the current (or given) directory as an AO project and start the daemon. +# +# Usage: +# ./ao-here.sh # uses $PWD +# ./ao-here.sh /path/to/repo # uses given path +# +# Env overrides: +# AO_HOST (default 127.0.0.1) +# AO_PORT (default 3001) + +set -euo pipefail + +PROJECT_PATH="$(cd "${1:-$PWD}" && pwd)" + +if [[ ! -d "$PROJECT_PATH/.git" ]]; then + echo "error: $PROJECT_PATH is not a git repository (no .git dir)" >&2 + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "error: 'jq' is required (brew install jq)" >&2 + exit 1 +fi + +AO_HOST="${AO_HOST:-127.0.0.1}" +AO_PORT="${AO_PORT:-3001}" +BASE="http://${AO_HOST}:${AO_PORT}" + +is_ready() { curl -fsS --max-time 1 "${BASE}/readyz" >/dev/null 2>&1; } + +if is_ready; then + echo "[ao] daemon already running at ${BASE}" +else + if ! command -v ao >/dev/null 2>&1; then + echo "error: 'ao' not on PATH. Build it: cd /backend && go install ./cmd/ao" >&2 + exit 1 + fi + echo "[ao] starting daemon..." + ao start + for _ in {1..30}; do + if is_ready; then break; fi + sleep 1 + done + if ! is_ready; then + echo "error: daemon did not become ready in 30s at ${BASE}" >&2 + exit 1 + fi + echo "[ao] daemon ready at ${BASE}" +fi + +BODY="$(jq -nc --arg path "$PROJECT_PATH" '{path: $path}')" +RESPONSE="$(curl -sS -w '\n%{http_code}' -X POST -H 'Content-Type: application/json' -d "$BODY" "${BASE}/api/v1/projects")" +HTTP_CODE="$(echo "$RESPONSE" | tail -1)" +BODY_OUT="$(echo "$RESPONSE" | sed '$d')" + +case "$HTTP_CODE" in + 201) + PROJECT_ID="$(echo "$BODY_OUT" | jq -r '.project.id')" + echo "[ao] registered project: $PROJECT_ID -> $PROJECT_PATH" + ;; + 409) + PROJECT_ID="$(echo "$BODY_OUT" | jq -r '.error.details.existingProjectId // empty')" + if [[ -z "$PROJECT_ID" ]]; then + echo "error: conflict response missing existingProjectId; raw:" >&2 + echo "$BODY_OUT" | jq . >&2 2>/dev/null || echo "$BODY_OUT" >&2 + exit 1 + fi + echo "[ao] project already registered: $PROJECT_ID -> $PROJECT_PATH" + ;; + *) + echo "error: unexpected HTTP $HTTP_CODE from POST /api/v1/projects:" >&2 + echo "$BODY_OUT" | jq . >&2 2>/dev/null || echo "$BODY_OUT" >&2 + exit 1 + ;; +esac + +echo "" +echo " next: ao spawn --project $PROJECT_ID --prompt \"\"" From 0e14340700809d4c49b5610511c82d8acbf8423a Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 22:44:58 +0530 Subject: [PATCH 15/17] scripts(ao-here): use built backend/cmd/ao binary explicitly, drop PATH lookup The PATH version collided with the TypeScript orchestrator CLI (same binary name 'ao', different commands). This version finds the repo via the script's own location, builds (or rebuilds when sources are newer) backend/cmd/ao into backend/bin/ao, and invokes that explicit binary for both 'start' and 'spawn'. No reliance on PATH; no ambiguity about which 'ao' is running. backend/bin/ is already gitignored. --- scripts/ao-here.sh | 48 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/scripts/ao-here.sh b/scripts/ao-here.sh index 67774935..677b0325 100755 --- a/scripts/ao-here.sh +++ b/scripts/ao-here.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash # -# ao-here.sh — register the current (or given) directory as an AO project and start the daemon. +# ao-here.sh — register the current (or given) directory as an AO project and +# start the daemon. Uses OUR Go binary (built from this repo's +# backend/cmd/ao) explicitly — does NOT rely on whatever `ao` is on PATH +# (which on dev machines is usually the TypeScript orchestrator CLI). # # Usage: -# ./ao-here.sh # uses $PWD -# ./ao-here.sh /path/to/repo # uses given path +# ./scripts/ao-here.sh # registers $PWD +# ./scripts/ao-here.sh /path/to/repo # registers given path # # Env overrides: # AO_HOST (default 127.0.0.1) @@ -12,10 +15,19 @@ set -euo pipefail -PROJECT_PATH="$(cd "${1:-$PWD}" && pwd)" +# Find the repo root: this script lives at /scripts/ao-here.sh +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +BACKEND_DIR="${REPO_ROOT}/backend" -if [[ ! -d "$PROJECT_PATH/.git" ]]; then - echo "error: $PROJECT_PATH is not a git repository (no .git dir)" >&2 +if [[ ! -d "${BACKEND_DIR}/cmd/ao" ]]; then + echo "error: can't find backend/cmd/ao under ${REPO_ROOT}" >&2 + echo " (this script must live inside the agent-orchestrator repo)" >&2 + exit 1 +fi + +if ! command -v go >/dev/null 2>&1; then + echo "error: 'go' is required to build the daemon" >&2 exit 1 fi @@ -24,6 +36,21 @@ if ! command -v jq >/dev/null 2>&1; then exit 1 fi +# Build the daemon binary to a local path inside the repo (gitignored). +# Rebuild if any source file is newer than the existing binary. +AO_BIN="${BACKEND_DIR}/bin/ao" +if [[ ! -x "$AO_BIN" ]] || [[ -n "$(find "${BACKEND_DIR}" -newer "$AO_BIN" -type f -name '*.go' -print -quit 2>/dev/null || true)" ]]; then + echo "[ao] building daemon -> ${AO_BIN}" + (cd "$BACKEND_DIR" && go build -o "$AO_BIN" ./cmd/ao) +fi + +PROJECT_PATH="$(cd "${1:-$PWD}" && pwd)" + +if [[ ! -d "$PROJECT_PATH/.git" ]]; then + echo "error: $PROJECT_PATH is not a git repository (no .git dir)" >&2 + exit 1 +fi + AO_HOST="${AO_HOST:-127.0.0.1}" AO_PORT="${AO_PORT:-3001}" BASE="http://${AO_HOST}:${AO_PORT}" @@ -33,12 +60,8 @@ is_ready() { curl -fsS --max-time 1 "${BASE}/readyz" >/dev/null 2>&1; } if is_ready; then echo "[ao] daemon already running at ${BASE}" else - if ! command -v ao >/dev/null 2>&1; then - echo "error: 'ao' not on PATH. Build it: cd /backend && go install ./cmd/ao" >&2 - exit 1 - fi echo "[ao] starting daemon..." - ao start + "$AO_BIN" start for _ in {1..30}; do if is_ready; then break; fi sleep 1 @@ -77,4 +100,5 @@ case "$HTTP_CODE" in esac echo "" -echo " next: ao spawn --project $PROJECT_ID --prompt \"\"" +echo " next:" +echo " ${AO_BIN} spawn --project $PROJECT_ID --prompt \"\"" From b6c76b452004344666fd67acfa7fd25fe4971802 Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 23:25:52 +0530 Subject: [PATCH 16/17] fix(session): default branch to ao/ when SpawnConfig.Branch is unset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CLI/API does not expose a branch flag, but the gitworktree adapter requires a non-empty branch (and cannot have two worktrees on the same branch). Spawn forwarded cfg.Branch verbatim to workspace.Create, so the first real `ao spawn` returned 500 with "workspace: gitworktree: branch is required". The session id is assigned by the store inside Spawn, so the SM is the only layer where a per-session default ref can be computed. Explicit branches still win. Also surface internal SPAWN_FAILED errors to the daemon log via slog.Error in writeSpawnError. Response stays opaque ("Failed to spawn session") — TestSessionsAPI_Spawn_InternalKindIsOpaque still passes — but operators get a real error in the log instead of a 713µs 500 with no trace. Co-Authored-By: Claude Opus 4.7 --- .../internal/httpd/controllers/sessions.go | 2 ++ backend/internal/session/manager.go | 11 +++++++- backend/internal/session/manager_test.go | 27 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/backend/internal/httpd/controllers/sessions.go b/backend/internal/httpd/controllers/sessions.go index 692cfae4..757570e0 100644 --- a/backend/internal/httpd/controllers/sessions.go +++ b/backend/internal/httpd/controllers/sessions.go @@ -3,6 +3,7 @@ package controllers import ( "encoding/json" "errors" + "log/slog" "net/http" "strings" @@ -111,5 +112,6 @@ func writeSpawnError(w http.ResponseWriter, r *http.Request, err error) { return } } + slog.Default().Error("spawn: internal error returned to client", "err", err.Error()) envelope.WriteAPIError(w, r, http.StatusInternalServerError, "internal", "SPAWN_FAILED", "Failed to spawn session", nil) } diff --git a/backend/internal/session/manager.go b/backend/internal/session/manager.go index ca4d0fa6..9caa6699 100644 --- a/backend/internal/session/manager.go +++ b/backend/internal/session/manager.go @@ -95,7 +95,16 @@ func (m *Manager) Spawn(ctx context.Context, cfg ports.SpawnConfig) (domain.Sess } id := rec.ID - ws, err := m.workspace.Create(ctx, ports.WorkspaceConfig{ProjectID: cfg.ProjectID, SessionID: id, Branch: cfg.Branch}) + // The CLI/API does not expose a branch flag, but the gitworktree adapter + // requires a non-empty branch (and cannot have two worktrees on the same + // branch). The session id is assigned by the store above, so this is the + // only layer where a per-session default ref can be computed. + branch := cfg.Branch + if branch == "" { + branch = "ao/" + string(id) + } + + ws, err := m.workspace.Create(ctx, ports.WorkspaceConfig{ProjectID: cfg.ProjectID, SessionID: id, Branch: branch}) if err != nil { m.markSpawnFailedTerminated(ctx, id) return domain.Session{}, fmt.Errorf("spawn %s: workspace: %w", id, err) diff --git a/backend/internal/session/manager_test.go b/backend/internal/session/manager_test.go index f682a51a..261be2c6 100644 --- a/backend/internal/session/manager_test.go +++ b/backend/internal/session/manager_test.go @@ -159,6 +159,33 @@ func TestSpawn_AssignsIDAndGoesIdle(t *testing.T) { t.Fatal("handle not folded") } } + +// SpawnConfig.Branch is optional from the API surface (the CLI does not expose +// it). The SM is the only layer with the session id (assigned by the store +// inside Spawn), so it defaults the branch to a per-session ref. The +// gitworktree workspace requires a non-empty branch and cannot have two +// worktrees on the same branch — so the default must be unique per session. +func TestSpawn_DefaultsBranchPerSession_WhenUnset(t *testing.T) { + m, st, _, _ := newManager() + if _, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker, Prompt: "do it"}); err != nil { + t.Fatal(err) + } + if got := st.sessions["mer-1"].Metadata.Branch; got != "ao/mer-1" { + t.Fatalf("default branch: got %q, want %q", got, "ao/mer-1") + } +} + +// An explicit branch in SpawnConfig must win over the default — the API/CLI +// layer can still pin a branch when it wants to. +func TestSpawn_HonorsExplicitBranch(t *testing.T) { + m, st, _, _ := newManager() + if _, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker, Prompt: "do it", Branch: "feature/x"}); err != nil { + t.Fatal(err) + } + if got := st.sessions["mer-1"].Metadata.Branch; got != "feature/x" { + t.Fatalf("explicit branch: got %q, want %q", got, "feature/x") + } +} func TestSpawn_RollsBackOnRuntimeFailure(t *testing.T) { m, st, _, ws := newManager() m.runtime = &fakeRuntime{createErr: errors.New("boom")} From baa386f75645393c44bc1682a61185874ab9577f Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Mon, 1 Jun 2026 23:27:09 +0530 Subject: [PATCH 17/17] fix(ao-here): correct jq path for 409 existingProjectId MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The envelope is flat — '.details.existingProjectId', not '.error.details.existingProjectId' (the 'error' field is a string kind, not an object). Re-running the script against an already-registered project now correctly extracts the project id from the conflict response instead of crashing the helper. Found by aa-46 while debugging the end-to-end spawn flow. --- scripts/ao-here.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ao-here.sh b/scripts/ao-here.sh index 677b0325..df980e0e 100755 --- a/scripts/ao-here.sh +++ b/scripts/ao-here.sh @@ -84,7 +84,7 @@ case "$HTTP_CODE" in echo "[ao] registered project: $PROJECT_ID -> $PROJECT_PATH" ;; 409) - PROJECT_ID="$(echo "$BODY_OUT" | jq -r '.error.details.existingProjectId // empty')" + PROJECT_ID="$(echo "$BODY_OUT" | jq -r '.details.existingProjectId // empty')" if [[ -z "$PROJECT_ID" ]]; then echo "error: conflict response missing existingProjectId; raw:" >&2 echo "$BODY_OUT" | jq . >&2 2>/dev/null || echo "$BODY_OUT" >&2