From e871d27cdcae4d95d0d694aec94a8934a0ff94b2 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Sun, 7 Jun 2026 13:39:50 -0400 Subject: [PATCH] refactor: move cleanup backend helpers --- inc/Workspace/Workspace.php | 641 +----------------- .../WorkspaceWorktreeCleanupEngine.php | 633 +++++++++++++++++ 2 files changed, 638 insertions(+), 636 deletions(-) diff --git a/inc/Workspace/Workspace.php b/inc/Workspace/Workspace.php index ec26684..c2ff9c0 100644 --- a/inc/Workspace/Workspace.php +++ b/inc/Workspace/Workspace.php @@ -13,7 +13,6 @@ namespace DataMachineCode\Workspace; use DataMachine\Core\FilesRepository\FilesystemHelper; -use DataMachineCode\Support\GitHubRemote; use DataMachineCode\Support\GitRunner; use DataMachineCode\Support\PathSecurity; use DataMachineCode\Storage\WorktreeInventoryRepository; @@ -525,427 +524,15 @@ public function ensure_exists(): array|\WP_Error { // Worktree operations // ========================================================================= - /** - * Remove a worktree at an explicit path. - * - * Path-aware counterpart to `worktree_remove()`, which reconstructs the - * path from `@` convention. Cleanup code must use this so - * reviewed inventory rows are removed by their safety-probed path. - * - * Hard safety rails applied here before any removal: - * 1. Primary repo's `.git` must exist (we're about to invoke it) - * 2. The worktree path must be a real directory - * 3. The worktree path must be inside `$workspace_path` (containment - * validation — no external targets, ever) - * 4. The worktree's `.git` must be a file (worktree marker), not a - * directory. A directory `.git` means it's a primary, not a - * worktree — removing it would be catastrophic. - * 5. If dirty and not forcing, refuse. - * - * @param string $repo Primary repo directory name (for routing git commands). - * @param string $branch Branch the worktree is checked out to. - * @param string $wt_path Absolute path to the worktree directory. - * @param bool $force Pass --force to `git worktree remove`. - * @return array{success: bool, handle: string, message: string}|\WP_Error - */ - private function remove_worktree_by_path( string $repo, string $branch, string $wt_path, bool $force ): array|\WP_Error { - $repo = $this->sanitize_name($repo); - if ( '' === $repo ) { - return new \WP_Error('invalid_repo', 'Repository name is required.', array( 'status' => 400 )); - } - - $primary_path = $this->get_primary_path($repo); - if ( ! is_dir($primary_path . '/.git') ) { - return new \WP_Error('primary_not_found', sprintf('Primary checkout for "%s" does not exist.', $repo), array( 'status' => 404 )); - } - - if ( '' === $wt_path || ! is_dir($wt_path) ) { - return new \WP_Error('worktree_path_missing', sprintf('Worktree path does not exist: %s', $wt_path), array( 'status' => 404 )); - } - - // Belt-and-suspenders containment — cleanup callers already skip - // `external` worktrees, but validate again at the blast radius. - $validation = $this->validate_containment($wt_path, $this->workspace_path); - if ( ! $validation['valid'] ) { - return new \WP_Error( - 'path_outside_workspace', - sprintf('Refusing to remove "%s": path is outside workspace (%s).', $wt_path, $validation['message'] ?? ''), - array( 'status' => 403 ) - ); - } - - // A worktree's .git is a FILE pointing at the primary's .git dir. - // A directory .git means we're looking at a primary checkout — never - // touch those. - $real_path = (string) ( $validation['real_path'] ?? '' ); - $git_marker = rtrim($real_path, '/') . '/.git'; - if ( '' === $real_path ) { - return new \WP_Error( - 'path_outside_workspace', - sprintf('Refusing to remove "%s": path did not resolve inside workspace.', $wt_path), - array( 'status' => 403 ) - ); - } - if ( ! is_file($git_marker) ) { - return new \WP_Error( - 'not_a_worktree', - sprintf('Refusing to remove "%s": .git is not a worktree marker file (got: %s). This may be a primary checkout.', $wt_path, is_dir($git_marker) ? 'directory' : 'missing'), - array( 'status' => 403 ) - ); - } - - $cmd = sprintf('worktree remove %s%s', $force ? '--force ' : '', escapeshellarg($real_path)); - $result = $this->run_git($primary_path, $cmd); - - if ( is_wp_error($result) ) { - return $result; - } - - // If the directory survived `git worktree remove` (can happen for - // locked worktrees, or when the worktree was already detached), prune - // the directory manually so cleanup is effective. - if ( is_dir($real_path) ) { - $escaped = escapeshellarg($real_path); - // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.system_calls_exec - exec(sprintf('rm -rf %s 2>&1', $escaped)); - } - - WorktreeContextInjector::forget_metadata(basename($wt_path)); - $this->worktree_inventory()->delete(basename($wt_path)); - - return array( - 'success' => true, - 'handle' => basename($wt_path), - 'message' => sprintf('Worktree at "%s" removed.', $wt_path), - 'branch' => $branch, - ); - } - - /** - * Classify a dirty worktree as "merged + only obsolete dirty changes". - * - * Returns the classification payload when: - * - The branch has a confirmed merge signal (upstream-gone, local-merged, - * pr-merged, or already cleanup-eligible per metadata). - * - All dirty paths reported by `git status --porcelain` are tracked - * paths whose entries are absent on the remote default branch tip - * (i.e. modifying or deleting files the default branch no longer has). - * - * Returns null in every other case so the caller falls back to the - * generic `dirty_worktree` skip: - * - No merge signal, or signal cannot be confirmed. - * - Any dirty path is untracked (could be new content). - * - Any dirty path still exists on the default branch tip. - * - Default branch ref cannot be resolved. - * - Any git probe times out or fails. - * - * The classification keeps cleanup conservative: it never auto-removes - * dirty worktrees, but the distinct reason code lets reviewers spot the - * "safe to force" subset without manual archaeology. - * - * @param string $repo Repo directory name. - * @param string $branch Branch name. - * @param string $wt_path Worktree path. - * @param bool $skip_github Whether to skip GitHub API lookups. - * @param array $github_cache Run-local GitHub cache. - * @param array $fetched Per-repo fetch tracker. - * @param array $fetch_timeouts Per-repo fetch timeout tracker. - * @param mixed $metadata Worktree metadata. - * @param bool $include_repaired_metadata Whether repaired metadata counts as a cleanup signal. - * @return array{paths: array, merge_signal: string, pr_url?: ?string, default_ref: string}|null - */ - private function classify_dirty_obsolete_on_default_branch( - string $repo, - string $branch, - string $wt_path, - bool $skip_github, - array &$github_cache, - array &$fetched, - array &$fetch_timeouts, - $metadata, - bool $include_repaired_metadata - ): ?array { - if ( '' === $repo || '' === $branch || '' === $wt_path || ! is_dir($wt_path) ) { - return null; - } - - $primary_path = $this->get_primary_path($repo); - if ( ! is_dir($primary_path . '/.git') ) { - return null; - } - - // Refuse to classify if a previous worktree already saw this repo's - // fetch time out — the default-ref / merge-signal probes would race - // against stale data. - if ( isset($fetch_timeouts[ $repo ]) ) { - return null; - } - - // Ensure remote refs are fresh once per repo per cleanup run. Reuses - // the caller's `$fetched` tracker so this never double-fetches. - if ( empty($fetched[ $repo ]) ) { - $fetch = $this->run_git($primary_path, 'fetch --prune --quiet origin', self::CLEANUP_GIT_PROBE_TIMEOUT); - if ( is_wp_error($fetch) && $this->is_git_timeout_error($fetch) ) { - $fetch_timeouts[ $repo ] = $fetch; - return null; - } - $fetched[ $repo ] = true; - } - - $default_ref = $this->resolve_remote_default_ref($primary_path, self::CLEANUP_GIT_PROBE_TIMEOUT); - if ( $default_ref instanceof \WP_Error || null === $default_ref || '' === $default_ref ) { - return null; - } - - // Confirm the default ref actually resolves to a commit. If it doesn't, - // every `cat-file -e :` would fail and we'd mis-classify the - // whole worktree as obsolete-on-default. - $default_resolve = $this->run_git( - $primary_path, - sprintf('rev-parse --verify --quiet %s', escapeshellarg($default_ref . '^{commit}')), - self::CLEANUP_GIT_PROBE_TIMEOUT - ); - if ( is_wp_error($default_resolve) ) { - return null; - } - - $signal = null; - if ( is_array($metadata) && WorktreeContextInjector::has_cleanup_signal($metadata) ) { - $signal = array( - 'signal' => 'cleanup_eligible', - 'reason' => 'worktree finalized or explicitly marked cleanup_eligible', - ); - if ( ! empty($metadata['pr_url']) ) { - $signal['pr_url'] = (string) $metadata['pr_url']; - } - } elseif ( $include_repaired_metadata && is_array($metadata) && ! empty($metadata['metadata_repaired']) ) { - $signal = array( - 'signal' => 'repaired_metadata', - 'reason' => 'operator-approved cleanup of repaired metadata', - ); - } else { - $signal = $this->detect_merge_signal($primary_path, $repo, $branch, $skip_github, $github_cache); - } - - if ( ! is_array($signal) ) { - return null; - } - $signal_kind = (string) $signal['signal']; - $merged_signals = array( 'upstream-gone', 'local-merged', 'pr-merged', 'cleanup_eligible', 'repaired_metadata' ); - if ( ! in_array($signal_kind, $merged_signals, true) ) { - return null; - } - - // Untracked files are never "obsolete on default" — they could be new - // content the operator wants to preserve. Bail at the first hint of - // untracked content so this classifier stays conservative. - $untracked = $this->run_git( - $wt_path, - 'ls-files --others --exclude-standard', - self::CLEANUP_GIT_PROBE_TIMEOUT - ); - if ( $this->is_git_timeout_error($untracked) ) { - return null; - } - if ( ! is_wp_error($untracked) && '' !== trim( (string) ( $untracked['output'] ?? '' )) ) { - return null; - } - - // Modified/deleted/added tracked paths against the worktree's HEAD. - // `diff --name-only HEAD` covers staged and unstaged changes in one - // shot and avoids the porcelain status leading-whitespace quirk that - // `trim()`-on-output would corrupt. - $tracked = $this->run_git( - $wt_path, - 'diff --name-only HEAD', - self::CLEANUP_GIT_PROBE_TIMEOUT - ); - if ( is_wp_error($tracked) ) { - return null; - } - - $paths = array_values( - array_filter( - array_map('trim', explode("\n", (string) ( $tracked['output'] ?? '' ))), - fn( $line ) => '' !== $line - ) - ); - if ( array() === $paths ) { - return null; - } - - $obsolete_paths = array(); - foreach ( $paths as $path ) { - // `cat-file -e :` exits 0 when the path exists on the - // default branch tip. Non-zero (missing/ambiguous) means the path - // is absent there — exactly the case we want to classify as - // obsolete-on-default. - $probe = $this->run_git( - $primary_path, - sprintf('cat-file -e %s', escapeshellarg($default_ref . ':' . $path)), - self::CLEANUP_GIT_PROBE_TIMEOUT - ); - if ( is_wp_error($probe) && $this->is_git_timeout_error($probe) ) { - return null; - } - if ( is_wp_error($probe) ) { - $obsolete_paths[ $path ] = 'absent_on_default'; - continue; - } - // Path still exists on the default branch tip — dirty edit may - // still be relevant. Refuse to classify into the new bucket. - return null; - } - - if ( array() === $obsolete_paths ) { - return null; - } - - return array( - 'paths' => $obsolete_paths, - 'merge_signal' => $signal_kind, - 'pr_url' => $signal['pr_url'] ?? null, - 'default_ref' => $default_ref, - ); - } - - /** - * Detect whether a branch looks merged into the remote default branch. - * - * Returns an array with `signal` and `reason`, or null if no signal is - * present (leave the worktree alone). - * - * Signal priority: - * 1. `upstream-gone` — local branch's upstream tracking ref is gone. - * Typical after GitHub auto-deletes the head branch on PR merge. - * 2. `pr-merged` — GitHub API reports a closed+merged PR for this - * branch. Requires $skip_github = false and a configured PAT. - * - * @param string $primary_path Path to the primary git checkout. - * @param string $repo Primary repo directory name. - * @param string $branch Branch name. - * @param bool $skip_github If true, skip GitHub API lookup. - * @param array $github_cache Run-local cache for GitHub repo lookups. - * @return array{signal: string, reason: string, pr_url?: string}|null - */ - private function detect_merge_signal( string $primary_path, string $repo, string $branch, bool $skip_github, array &$github_cache = array() ): ?array { - $ref = 'refs/heads/' . $branch; - $format = '%(upstream:track)'; - $result = $this->run_git($primary_path, sprintf('for-each-ref --format=%s %s', escapeshellarg($format), escapeshellarg($ref)), self::CLEANUP_GIT_PROBE_TIMEOUT); - - if ( is_wp_error($result) && $this->is_git_timeout_error($result) ) { - return array( - 'signal' => 'probe-timeout', - 'reason' => $result->get_error_message(), - ); - } - - if ( ! is_wp_error($result) ) { - $track = trim( (string) ( $result['output'] ?? '' )); - if ( str_contains($track, 'gone') ) { - return array( - 'signal' => 'upstream-gone', - 'reason' => 'remote branch deleted (likely merged + auto-deleted)', - ); - } - } - - $local_merged = $this->detect_local_merged_signal($primary_path, $branch); - if ( null !== $local_merged ) { - return $local_merged; - } - - if ( $skip_github ) { - return null; - } - - $gh_slug = $this->resolve_github_slug($primary_path); - if ( null === $gh_slug ) { - return null; - } - - $pr = $this->find_closed_pr_for_branch($gh_slug, $branch, $github_cache); - if ( is_wp_error($pr) ) { - return array( - 'signal' => 'github-unknown', - 'reason' => 'unknown_github_state — ' . $pr->get_error_message(), - ); - } - if ( null === $pr ) { - return null; - } - - if ( ! empty($pr['merged_at']) ) { - return array( - 'signal' => 'pr-merged', - 'reason' => sprintf('PR #%d merged (%s)', $pr['number'], $pr['state']), - 'finalized_state' => WorktreeContextInjector::STATE_MERGED, - 'pr_url' => $pr['html_url'] ?? null, - ); - } - - return array( - 'signal' => 'pr-closed', - 'reason' => sprintf('PR #%d closed without merge', $pr['number']), - 'finalized_state' => WorktreeContextInjector::STATE_CLOSED, - 'pr_url' => $pr['html_url'] ?? null, - ); - } - - /** - * Detect branches already contained in the remote default branch using local git refs only. - * - * This catches manually-merged branches before falling through to the GitHub - * API, which keeps GitHub-backed cleanup bounded while avoiding unnecessary - * network calls for branches whose merge state is already locally provable. - * - * @param string $primary_path Path to the primary git checkout. - * @param string $branch Branch name. - * @return array{signal: string, reason: string}|null - */ - private function detect_local_merged_signal( string $primary_path, string $branch ): ?array { - $default_ref = $this->resolve_remote_default_ref($primary_path, self::CLEANUP_GIT_PROBE_TIMEOUT); - if ( $default_ref instanceof \WP_Error ) { - return array( - 'signal' => 'probe-timeout', - 'reason' => $default_ref->get_error_message(), - ); - } - if ( null === $default_ref ) { - return null; - } - - $branch_ref = 'refs/heads/' . $branch; - $result = $this->run_git( - $primary_path, - sprintf('rev-list --count %s..%s', escapeshellarg($default_ref), escapeshellarg($branch_ref)), - self::CLEANUP_GIT_PROBE_TIMEOUT - ); - if ( is_wp_error($result) && $this->is_git_timeout_error($result) ) { - return array( - 'signal' => 'probe-timeout', - 'reason' => $result->get_error_message(), - ); - } - if ( is_wp_error($result) ) { - return null; - } - - $unique_commits = (int) trim( (string) ( $result['output'] ?? '' )); - if ( 0 !== $unique_commits ) { - return null; - } - - return array( - 'signal' => 'local-merged', - 'reason' => sprintf('branch has no commits outside remote default (%s)', $default_ref), - ); - } + // ========================================================================= + // Internal helpers + // ========================================================================= /** * Resolve the remote default branch ref for local cleanup checks. * - * @param string $primary_path Path to the primary git checkout. + * @param string $primary_path Path to the primary git checkout. + * @param int $timeout_seconds Optional timeout in seconds. * @return string|\WP_Error|null Fully-qualified remote default ref, timeout error, or null when unavailable. */ private function resolve_remote_default_ref( string $primary_path, int $timeout_seconds = 0 ): string|\WP_Error|null { @@ -958,224 +545,6 @@ private function resolve_remote_default_ref( string $primary_path, int $timeout_ return '' === $ref ? null : $ref; } - /** - * Extract owner/repo slug from a primary checkout's origin remote. - * - * @param string $primary_path Primary checkout path. - * @return string|null `owner/repo` or null if origin is not a GitHub URL. - */ - private function resolve_github_slug( string $primary_path ): ?string { - $remote = $this->git_get_remote($primary_path); - if ( null === $remote || '' === $remote ) { - return null; - } - return GitHubRemote::slug($remote); - } - - /** - * Look up a closed PR for a branch via a cached GitHub API snapshot. - * - * Cleanup may inspect hundreds of worktrees for the same repo. Querying - * GitHub once per branch does not scale, so each repo gets one bounded - * closed-PR snapshot per cleanup run and branch lookups read that cache. - * - * @param string $slug owner/repo. - * @param string $branch Branch name. - * @param array $github_cache Run-local cache keyed by owner/repo. - * @return array|null|\WP_Error PR data, null when no PR matched, or lookup failure. - */ - private function find_closed_pr_for_branch( string $slug, string $branch, array &$github_cache = array() ): array|\WP_Error|null { - $lookup = $this->get_cleanup_github_lookup($slug, $github_cache); - if ( is_wp_error($lookup) ) { - return $lookup; - } - - if ( null !== $lookup && isset($lookup[ $branch ]) ) { - return $lookup[ $branch ]; - } - - return $this->find_pr_for_branch_direct($slug, $branch, $github_cache, true); - } - - /** - * Look up a PR for one branch directly via GitHub's head filter. - * - * The repo-level closed-PR snapshot is intentionally bounded for cleanup runs, - * so older PRs can be missed. This precise fallback keeps PR lifecycle as the - * source of truth without treating remote branch existence as liveness. - * - * @param string $slug owner/repo. - * @param string $branch Branch name. - * @param array $github_cache Run-local cache keyed by owner/repo and branch. - * @param bool $finalized_only If true, ignore open PRs. - * @return array|null|\WP_Error PR data, null when no matching PR exists, or lookup failure. - */ - private function find_pr_for_branch_direct( string $slug, string $branch, array &$github_cache = array(), bool $finalized_only = true ): array|\WP_Error|null { - $cache_key = $slug . '#head:' . ( $finalized_only ? 'finalized:' : 'any:' ) . $branch; - if ( array_key_exists($cache_key, $github_cache) ) { - return $github_cache[ $cache_key ]; - } - - if ( ! class_exists('\DataMachineCode\Abilities\GitHubAbilities') ) { - $github_cache[ $cache_key ] = null; - return null; - } - - $parts = explode('/', $slug, 2); - $owner = $parts[0]; - if ( '' === $owner || empty($parts[1]) ) { - $github_cache[ $cache_key ] = null; - return null; - } - - $pat = \DataMachineCode\Abilities\GitHubAbilities::getPat(array( 'repo' => $slug )); - if ( empty($pat) ) { - $github_cache[ $cache_key ] = null; - return null; - } - - $response = \DataMachineCode\Abilities\GitHubAbilities::apiGet( - GitHubRemote::apiUrl($slug, 'pulls'), - array( - 'head' => $owner . ':' . $branch, - 'sort' => 'updated', - 'direction' => 'desc', - 'state' => 'all', - 'per_page' => 5, - ), - $pat, - self::CLEANUP_GITHUB_TIMEOUT - ); - - if ( is_wp_error($response) ) { - $error = new \WP_Error( - 'github_cleanup_branch_lookup_failed', - sprintf('GitHub cleanup branch lookup failed for %s:%s: %s', $slug, $branch, $response->get_error_message()), - $response->get_error_data() - ); - $github_cache[ $cache_key ] = $error; - return $error; - } - - foreach ( (array) ( $response['data'] ?? array() ) as $pr ) { - if ( ! is_array($pr) ) { - continue; - } - - $head = is_array($pr['head'] ?? null) ? $pr['head'] : array(); - $head_repo = is_array($head['repo'] ?? null) ? (string) ( $head['repo']['full_name'] ?? '' ) : ''; - $head_ref = (string) ( $head['ref'] ?? '' ); - $state = (string) ( $pr['state'] ?? '' ); - if ( $head_repo !== $slug || $head_ref !== $branch ) { - continue; - } - if ( $finalized_only && 'closed' !== $state ) { - continue; - } - - $github_cache[ $cache_key ] = array( - 'number' => (int) ( $pr['number'] ?? 0 ), - 'state' => $state, - 'merged_at' => (string) ( $pr['merged_at'] ?? '' ), - 'html_url' => (string) ( $pr['html_url'] ?? '' ), - ); - - return $github_cache[ $cache_key ]; - } - - $github_cache[ $cache_key ] = null; - return null; - } - - /** - * Load and cache closed same-repo PRs for a GitHub repo. - * - * @param string $slug owner/repo. - * @param array $github_cache Run-local cache keyed by owner/repo. - * @return array|null|\WP_Error Branch-name map, null when GitHub is unavailable, or lookup failure. - */ - private function get_cleanup_github_lookup( string $slug, array &$github_cache ): array|\WP_Error|null { - if ( array_key_exists($slug, $github_cache) ) { - return $github_cache[ $slug ]; - } - - if ( ! class_exists('\DataMachineCode\Abilities\GitHubAbilities') ) { - $github_cache[ $slug ] = null; - return null; - } - - // Pass the repo through so credential profiles with `allowed_repos` - // can win over the global default profile when scanning closed PRs. - $pat = \DataMachineCode\Abilities\GitHubAbilities::getPat(array( 'repo' => $slug )); - if ( empty($pat) ) { - $github_cache[ $slug ] = null; - return null; - } - - $parts = explode('/', $slug, 2); - $owner = $parts[0]; - if ( '' === $owner || empty($parts[1]) ) { - $github_cache[ $slug ] = null; - return null; - } - - $closed = array(); - $url = GitHubRemote::apiUrl($slug, 'pulls'); - - for ( $page = 1; $page <= self::CLEANUP_GITHUB_MAX_PAGES; $page++ ) { - $response = \DataMachineCode\Abilities\GitHubAbilities::apiGet( - $url, - array( - 'state' => 'closed', - 'sort' => 'updated', - 'direction' => 'desc', - 'per_page' => 100, - 'page' => $page, - ), - $pat, - self::CLEANUP_GITHUB_TIMEOUT - ); - - if ( is_wp_error($response) ) { - $error = new \WP_Error( - 'github_cleanup_lookup_failed', - sprintf('GitHub cleanup lookup failed for %s: %s', $slug, $response->get_error_message()), - $response->get_error_data() - ); - $github_cache[ $slug ] = $error; - return $error; - } - - $items = (array) ( $response['data'] ?? array() ); - foreach ( $items as $pr ) { - $head = is_array($pr['head'] ?? null) ? $pr['head'] : array(); - $head_repo = is_array($head['repo'] ?? null) ? (string) ( $head['repo']['full_name'] ?? '' ) : ''; - $head_ref = (string) ( $head['ref'] ?? '' ); - if ( $head_repo !== $slug || '' === $head_ref ) { - continue; - } - - $closed[ $head_ref ] = array( - 'number' => (int) ( $pr['number'] ?? 0 ), - 'state' => (string) ( $pr['state'] ?? 'closed' ), - 'merged_at' => (string) ( $pr['merged_at'] ?? '' ), - 'html_url' => (string) ( $pr['html_url'] ?? '' ), - ); - } - - if ( count($items) < 100 ) { - break; - } - } - - $github_cache[ $slug ] = $closed; - return $closed; - } - - // ========================================================================= - // Internal helpers - // ========================================================================= - /** * Validate that a target path is contained within a parent directory. * diff --git a/inc/Workspace/WorkspaceWorktreeCleanupEngine.php b/inc/Workspace/WorkspaceWorktreeCleanupEngine.php index cb66231..768158d 100644 --- a/inc/Workspace/WorkspaceWorktreeCleanupEngine.php +++ b/inc/Workspace/WorkspaceWorktreeCleanupEngine.php @@ -7,6 +7,8 @@ namespace DataMachineCode\Workspace; +use DataMachineCode\Support\GitHubRemote; + defined('ABSPATH') || exit; trait WorkspaceWorktreeCleanupEngine { @@ -2204,4 +2206,635 @@ private function scope_worktree_cleanup_to_plan( array $planned_candidates, arra 'skipped' => $scoped_skipped, ); } + + /** + * Remove a worktree at an explicit path. + * + * Path-aware counterpart to `worktree_remove()`, which reconstructs the + * path from `@` convention. Cleanup code must use this so + * reviewed inventory rows are removed by their safety-probed path. + * + * Hard safety rails applied here before any removal: + * 1. Primary repo's `.git` must exist (we're about to invoke it) + * 2. The worktree path must be a real directory + * 3. The worktree path must be inside `$workspace_path` (containment + * validation — no external targets, ever) + * 4. The worktree's `.git` must be a file (worktree marker), not a + * directory. A directory `.git` means it's a primary, not a + * worktree — removing it would be catastrophic. + * 5. If dirty and not forcing, refuse. + * + * @param string $repo Primary repo directory name (for routing git commands). + * @param string $branch Branch the worktree is checked out to. + * @param string $wt_path Absolute path to the worktree directory. + * @param bool $force Pass --force to `git worktree remove`. + * @return array{success: bool, handle: string, message: string}|\WP_Error + */ + private function remove_worktree_by_path( string $repo, string $branch, string $wt_path, bool $force ): array|\WP_Error { + $repo = $this->sanitize_name($repo); + if ( '' === $repo ) { + return new \WP_Error('invalid_repo', 'Repository name is required.', array( 'status' => 400 )); + } + + $primary_path = $this->get_primary_path($repo); + if ( ! is_dir($primary_path . '/.git') ) { + return new \WP_Error('primary_not_found', sprintf('Primary checkout for "%s" does not exist.', $repo), array( 'status' => 404 )); + } + + if ( '' === $wt_path || ! is_dir($wt_path) ) { + return new \WP_Error('worktree_path_missing', sprintf('Worktree path does not exist: %s', $wt_path), array( 'status' => 404 )); + } + + // Belt-and-suspenders containment — cleanup callers already skip + // `external` worktrees, but validate again at the blast radius. + $validation = $this->validate_containment($wt_path, $this->workspace_path); + if ( ! $validation['valid'] ) { + return new \WP_Error( + 'path_outside_workspace', + sprintf('Refusing to remove "%s": path is outside workspace (%s).', $wt_path, $validation['message'] ?? ''), + array( 'status' => 403 ) + ); + } + + // A worktree's .git is a FILE pointing at the primary's .git dir. + // A directory .git means we're looking at a primary checkout — never + // touch those. + $real_path = (string) ( $validation['real_path'] ?? '' ); + $git_marker = rtrim($real_path, '/') . '/.git'; + if ( '' === $real_path ) { + return new \WP_Error( + 'path_outside_workspace', + sprintf('Refusing to remove "%s": path did not resolve inside workspace.', $wt_path), + array( 'status' => 403 ) + ); + } + if ( ! is_file($git_marker) ) { + return new \WP_Error( + 'not_a_worktree', + sprintf('Refusing to remove "%s": .git is not a worktree marker file (got: %s). This may be a primary checkout.', $wt_path, is_dir($git_marker) ? 'directory' : 'missing'), + array( 'status' => 403 ) + ); + } + + $cmd = sprintf('worktree remove %s%s', $force ? '--force ' : '', escapeshellarg($real_path)); + $result = $this->run_git($primary_path, $cmd); + + if ( is_wp_error($result) ) { + return $result; + } + + // If the directory survived `git worktree remove` (can happen for + // locked worktrees, or when the worktree was already detached), prune + // the directory manually so cleanup is effective. + if ( is_dir($real_path) ) { + $escaped = escapeshellarg($real_path); + // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.system_calls_exec + exec(sprintf('rm -rf %s 2>&1', $escaped)); + } + + WorktreeContextInjector::forget_metadata(basename($wt_path)); + $this->worktree_inventory()->delete(basename($wt_path)); + + return array( + 'success' => true, + 'handle' => basename($wt_path), + 'message' => sprintf('Worktree at "%s" removed.', $wt_path), + 'branch' => $branch, + ); + } + + /** + * Classify a dirty worktree as "merged + only obsolete dirty changes". + * + * Returns the classification payload when: + * - The branch has a confirmed merge signal (upstream-gone, local-merged, + * pr-merged, or already cleanup-eligible per metadata). + * - All dirty paths reported by `git status --porcelain` are tracked + * paths whose entries are absent on the remote default branch tip + * (i.e. modifying or deleting files the default branch no longer has). + * + * Returns null in every other case so the caller falls back to the + * generic `dirty_worktree` skip: + * - No merge signal, or signal cannot be confirmed. + * - Any dirty path is untracked (could be new content). + * - Any dirty path still exists on the default branch tip. + * - Default branch ref cannot be resolved. + * - Any git probe times out or fails. + * + * The classification keeps cleanup conservative: it never auto-removes + * dirty worktrees, but the distinct reason code lets reviewers spot the + * "safe to force" subset without manual archaeology. + * + * @param string $repo Repo directory name. + * @param string $branch Branch name. + * @param string $wt_path Worktree path. + * @param bool $skip_github Whether to skip GitHub API lookups. + * @param array $github_cache Run-local GitHub cache. + * @param array $fetched Per-repo fetch tracker. + * @param array $fetch_timeouts Per-repo fetch timeout tracker. + * @param mixed $metadata Worktree metadata. + * @param bool $include_repaired_metadata Whether repaired metadata counts as a cleanup signal. + * @return array{paths: array, merge_signal: string, pr_url?: ?string, default_ref: string}|null + */ + private function classify_dirty_obsolete_on_default_branch( + string $repo, + string $branch, + string $wt_path, + bool $skip_github, + array &$github_cache, + array &$fetched, + array &$fetch_timeouts, + $metadata, + bool $include_repaired_metadata + ): ?array { + if ( '' === $repo || '' === $branch || '' === $wt_path || ! is_dir($wt_path) ) { + return null; + } + + $primary_path = $this->get_primary_path($repo); + if ( ! is_dir($primary_path . '/.git') ) { + return null; + } + + // Refuse to classify if a previous worktree already saw this repo's + // fetch time out — the default-ref / merge-signal probes would race + // against stale data. + if ( isset($fetch_timeouts[ $repo ]) ) { + return null; + } + + // Ensure remote refs are fresh once per repo per cleanup run. Reuses + // the caller's `$fetched` tracker so this never double-fetches. + if ( empty($fetched[ $repo ]) ) { + $fetch = $this->run_git($primary_path, 'fetch --prune --quiet origin', self::CLEANUP_GIT_PROBE_TIMEOUT); + if ( is_wp_error($fetch) && $this->is_git_timeout_error($fetch) ) { + $fetch_timeouts[ $repo ] = $fetch; + return null; + } + $fetched[ $repo ] = true; + } + + $default_ref = $this->resolve_remote_default_ref($primary_path, self::CLEANUP_GIT_PROBE_TIMEOUT); + if ( $default_ref instanceof \WP_Error || null === $default_ref || '' === $default_ref ) { + return null; + } + + // Confirm the default ref actually resolves to a commit. If it doesn't, + // every `cat-file -e :` would fail and we'd mis-classify the + // whole worktree as obsolete-on-default. + $default_resolve = $this->run_git( + $primary_path, + sprintf('rev-parse --verify --quiet %s', escapeshellarg($default_ref . '^{commit}')), + self::CLEANUP_GIT_PROBE_TIMEOUT + ); + if ( is_wp_error($default_resolve) ) { + return null; + } + + $signal = null; + if ( is_array($metadata) && WorktreeContextInjector::has_cleanup_signal($metadata) ) { + $signal = array( + 'signal' => 'cleanup_eligible', + 'reason' => 'worktree finalized or explicitly marked cleanup_eligible', + ); + if ( ! empty($metadata['pr_url']) ) { + $signal['pr_url'] = (string) $metadata['pr_url']; + } + } elseif ( $include_repaired_metadata && is_array($metadata) && ! empty($metadata['metadata_repaired']) ) { + $signal = array( + 'signal' => 'repaired_metadata', + 'reason' => 'operator-approved cleanup of repaired metadata', + ); + } else { + $signal = $this->detect_merge_signal($primary_path, $repo, $branch, $skip_github, $github_cache); + } + + if ( ! is_array($signal) ) { + return null; + } + $signal_kind = (string) $signal['signal']; + $merged_signals = array( 'upstream-gone', 'local-merged', 'pr-merged', 'cleanup_eligible', 'repaired_metadata' ); + if ( ! in_array($signal_kind, $merged_signals, true) ) { + return null; + } + + // Untracked files are never "obsolete on default" — they could be new + // content the operator wants to preserve. Bail at the first hint of + // untracked content so this classifier stays conservative. + $untracked = $this->run_git( + $wt_path, + 'ls-files --others --exclude-standard', + self::CLEANUP_GIT_PROBE_TIMEOUT + ); + if ( $this->is_git_timeout_error($untracked) ) { + return null; + } + if ( ! is_wp_error($untracked) && '' !== trim( (string) ( $untracked['output'] ?? '' )) ) { + return null; + } + + // Modified/deleted/added tracked paths against the worktree's HEAD. + // `diff --name-only HEAD` covers staged and unstaged changes in one + // shot and avoids the porcelain status leading-whitespace quirk that + // `trim()`-on-output would corrupt. + $tracked = $this->run_git( + $wt_path, + 'diff --name-only HEAD', + self::CLEANUP_GIT_PROBE_TIMEOUT + ); + if ( is_wp_error($tracked) ) { + return null; + } + + $paths = array_values( + array_filter( + array_map('trim', explode("\n", (string) ( $tracked['output'] ?? '' ))), + fn( $line ) => '' !== $line + ) + ); + if ( array() === $paths ) { + return null; + } + + $obsolete_paths = array(); + foreach ( $paths as $path ) { + // `cat-file -e :` exits 0 when the path exists on the + // default branch tip. Non-zero (missing/ambiguous) means the path + // is absent there — exactly the case we want to classify as + // obsolete-on-default. + $probe = $this->run_git( + $primary_path, + sprintf('cat-file -e %s', escapeshellarg($default_ref . ':' . $path)), + self::CLEANUP_GIT_PROBE_TIMEOUT + ); + if ( is_wp_error($probe) && $this->is_git_timeout_error($probe) ) { + return null; + } + if ( is_wp_error($probe) ) { + $obsolete_paths[ $path ] = 'absent_on_default'; + continue; + } + // Path still exists on the default branch tip — dirty edit may + // still be relevant. Refuse to classify into the new bucket. + return null; + } + + if ( array() === $obsolete_paths ) { + return null; + } + + return array( + 'paths' => $obsolete_paths, + 'merge_signal' => $signal_kind, + 'pr_url' => $signal['pr_url'] ?? null, + 'default_ref' => $default_ref, + ); + } + + /** + * Detect whether a branch looks merged into the remote default branch. + * + * Returns an array with `signal` and `reason`, or null if no signal is + * present (leave the worktree alone). + * + * Signal priority: + * 1. `upstream-gone` — local branch's upstream tracking ref is gone. + * Typical after GitHub auto-deletes the head branch on PR merge. + * 2. `pr-merged` — GitHub API reports a closed+merged PR for this + * branch. Requires $skip_github = false and a configured PAT. + * + * @param string $primary_path Path to the primary git checkout. + * @param string $repo Primary repo directory name. + * @param string $branch Branch name. + * @param bool $skip_github If true, skip GitHub API lookup. + * @param array $github_cache Run-local cache for GitHub repo lookups. + * @return array{signal: string, reason: string, pr_url?: string}|null + */ + private function detect_merge_signal( string $primary_path, string $repo, string $branch, bool $skip_github, array &$github_cache = array() ): ?array { + $ref = 'refs/heads/' . $branch; + $format = '%(upstream:track)'; + $result = $this->run_git($primary_path, sprintf('for-each-ref --format=%s %s', escapeshellarg($format), escapeshellarg($ref)), self::CLEANUP_GIT_PROBE_TIMEOUT); + + if ( is_wp_error($result) && $this->is_git_timeout_error($result) ) { + return array( + 'signal' => 'probe-timeout', + 'reason' => $result->get_error_message(), + ); + } + + if ( ! is_wp_error($result) ) { + $track = trim( (string) ( $result['output'] ?? '' )); + if ( str_contains($track, 'gone') ) { + return array( + 'signal' => 'upstream-gone', + 'reason' => 'remote branch deleted (likely merged + auto-deleted)', + ); + } + } + + $local_merged = $this->detect_local_merged_signal($primary_path, $branch); + if ( null !== $local_merged ) { + return $local_merged; + } + + if ( $skip_github ) { + return null; + } + + $gh_slug = $this->resolve_github_slug($primary_path); + if ( null === $gh_slug ) { + return null; + } + + $pr = $this->find_closed_pr_for_branch($gh_slug, $branch, $github_cache); + if ( is_wp_error($pr) ) { + return array( + 'signal' => 'github-unknown', + 'reason' => 'unknown_github_state — ' . $pr->get_error_message(), + ); + } + if ( null === $pr ) { + return null; + } + + if ( ! empty($pr['merged_at']) ) { + return array( + 'signal' => 'pr-merged', + 'reason' => sprintf('PR #%d merged (%s)', $pr['number'], $pr['state']), + 'finalized_state' => WorktreeContextInjector::STATE_MERGED, + 'pr_url' => $pr['html_url'] ?? null, + ); + } + + return array( + 'signal' => 'pr-closed', + 'reason' => sprintf('PR #%d closed without merge', $pr['number']), + 'finalized_state' => WorktreeContextInjector::STATE_CLOSED, + 'pr_url' => $pr['html_url'] ?? null, + ); + } + + /** + * Detect branches already contained in the remote default branch using local git refs only. + * + * This catches manually-merged branches before falling through to the GitHub + * API, which keeps GitHub-backed cleanup bounded while avoiding unnecessary + * network calls for branches whose merge state is already locally provable. + * + * @param string $primary_path Path to the primary git checkout. + * @param string $branch Branch name. + * @return array{signal: string, reason: string}|null + */ + private function detect_local_merged_signal( string $primary_path, string $branch ): ?array { + $default_ref = $this->resolve_remote_default_ref($primary_path, self::CLEANUP_GIT_PROBE_TIMEOUT); + if ( $default_ref instanceof \WP_Error ) { + return array( + 'signal' => 'probe-timeout', + 'reason' => $default_ref->get_error_message(), + ); + } + if ( null === $default_ref ) { + return null; + } + + $branch_ref = 'refs/heads/' . $branch; + $result = $this->run_git( + $primary_path, + sprintf('rev-list --count %s..%s', escapeshellarg($default_ref), escapeshellarg($branch_ref)), + self::CLEANUP_GIT_PROBE_TIMEOUT + ); + if ( is_wp_error($result) && $this->is_git_timeout_error($result) ) { + return array( + 'signal' => 'probe-timeout', + 'reason' => $result->get_error_message(), + ); + } + if ( is_wp_error($result) ) { + return null; + } + + $unique_commits = (int) trim( (string) ( $result['output'] ?? '' )); + if ( 0 !== $unique_commits ) { + return null; + } + + return array( + 'signal' => 'local-merged', + 'reason' => sprintf('branch has no commits outside remote default (%s)', $default_ref), + ); + } + + /** + * Extract owner/repo slug from a primary checkout's origin remote. + * + * @param string $primary_path Primary checkout path. + * @return string|null `owner/repo` or null if origin is not a GitHub URL. + */ + private function resolve_github_slug( string $primary_path ): ?string { + $remote = $this->git_get_remote($primary_path); + if ( null === $remote || '' === $remote ) { + return null; + } + return GitHubRemote::slug($remote); + } + + /** + * Look up a closed PR for a branch via a cached GitHub API snapshot. + * + * Cleanup may inspect hundreds of worktrees for the same repo. Querying + * GitHub once per branch does not scale, so each repo gets one bounded + * closed-PR snapshot per cleanup run and branch lookups read that cache. + * + * @param string $slug owner/repo. + * @param string $branch Branch name. + * @param array $github_cache Run-local cache keyed by owner/repo. + * @return array|null|\WP_Error PR data, null when no PR matched, or lookup failure. + */ + private function find_closed_pr_for_branch( string $slug, string $branch, array &$github_cache = array() ): array|\WP_Error|null { + $lookup = $this->get_cleanup_github_lookup($slug, $github_cache); + if ( is_wp_error($lookup) ) { + return $lookup; + } + + if ( null !== $lookup && isset($lookup[ $branch ]) ) { + return $lookup[ $branch ]; + } + + return $this->find_pr_for_branch_direct($slug, $branch, $github_cache, true); + } + + /** + * Look up a PR for one branch directly via GitHub's head filter. + * + * The repo-level closed-PR snapshot is intentionally bounded for cleanup runs, + * so older PRs can be missed. This precise fallback keeps PR lifecycle as the + * source of truth without treating remote branch existence as liveness. + * + * @param string $slug owner/repo. + * @param string $branch Branch name. + * @param array $github_cache Run-local cache keyed by owner/repo and branch. + * @param bool $finalized_only If true, ignore open PRs. + * @return array|null|\WP_Error PR data, null when no matching PR exists, or lookup failure. + */ + private function find_pr_for_branch_direct( string $slug, string $branch, array &$github_cache = array(), bool $finalized_only = true ): array|\WP_Error|null { + $cache_key = $slug . '#head:' . ( $finalized_only ? 'finalized:' : 'any:' ) . $branch; + if ( array_key_exists($cache_key, $github_cache) ) { + return $github_cache[ $cache_key ]; + } + + if ( ! class_exists('\DataMachineCode\Abilities\GitHubAbilities') ) { + $github_cache[ $cache_key ] = null; + return null; + } + + $parts = explode('/', $slug, 2); + $owner = $parts[0]; + if ( '' === $owner || empty($parts[1]) ) { + $github_cache[ $cache_key ] = null; + return null; + } + + $pat = \DataMachineCode\Abilities\GitHubAbilities::getPat(array( 'repo' => $slug )); + if ( empty($pat) ) { + $github_cache[ $cache_key ] = null; + return null; + } + + $response = \DataMachineCode\Abilities\GitHubAbilities::apiGet( + GitHubRemote::apiUrl($slug, 'pulls'), + array( + 'head' => $owner . ':' . $branch, + 'sort' => 'updated', + 'direction' => 'desc', + 'state' => 'all', + 'per_page' => 5, + ), + $pat, + self::CLEANUP_GITHUB_TIMEOUT + ); + + if ( is_wp_error($response) ) { + $error = new \WP_Error( + 'github_cleanup_branch_lookup_failed', + sprintf('GitHub cleanup branch lookup failed for %s:%s: %s', $slug, $branch, $response->get_error_message()), + $response->get_error_data() + ); + $github_cache[ $cache_key ] = $error; + return $error; + } + + foreach ( (array) ( $response['data'] ?? array() ) as $pr ) { + if ( ! is_array($pr) ) { + continue; + } + + $head = is_array($pr['head'] ?? null) ? $pr['head'] : array(); + $head_repo = is_array($head['repo'] ?? null) ? (string) ( $head['repo']['full_name'] ?? '' ) : ''; + $head_ref = (string) ( $head['ref'] ?? '' ); + $state = (string) ( $pr['state'] ?? '' ); + if ( $head_repo !== $slug || $head_ref !== $branch ) { + continue; + } + if ( $finalized_only && 'closed' !== $state ) { + continue; + } + + $github_cache[ $cache_key ] = array( + 'number' => (int) ( $pr['number'] ?? 0 ), + 'state' => $state, + 'merged_at' => (string) ( $pr['merged_at'] ?? '' ), + 'html_url' => (string) ( $pr['html_url'] ?? '' ), + ); + + return $github_cache[ $cache_key ]; + } + + $github_cache[ $cache_key ] = null; + return null; + } + + /** + * Load and cache closed same-repo PRs for a GitHub repo. + * + * @param string $slug owner/repo. + * @param array $github_cache Run-local cache keyed by owner/repo. + * @return array|null|\WP_Error Branch-name map, null when GitHub is unavailable, or lookup failure. + */ + private function get_cleanup_github_lookup( string $slug, array &$github_cache ): array|\WP_Error|null { + if ( array_key_exists($slug, $github_cache) ) { + return $github_cache[ $slug ]; + } + + if ( ! class_exists('\DataMachineCode\Abilities\GitHubAbilities') ) { + $github_cache[ $slug ] = null; + return null; + } + + // Pass the repo through so credential profiles with `allowed_repos` + // can win over the global default profile when scanning closed PRs. + $pat = \DataMachineCode\Abilities\GitHubAbilities::getPat(array( 'repo' => $slug )); + if ( empty($pat) ) { + $github_cache[ $slug ] = null; + return null; + } + + $parts = explode('/', $slug, 2); + $owner = $parts[0]; + if ( '' === $owner || empty($parts[1]) ) { + $github_cache[ $slug ] = null; + return null; + } + + $closed = array(); + $url = GitHubRemote::apiUrl($slug, 'pulls'); + + for ( $page = 1; $page <= self::CLEANUP_GITHUB_MAX_PAGES; $page++ ) { + $response = \DataMachineCode\Abilities\GitHubAbilities::apiGet( + $url, + array( + 'state' => 'closed', + 'sort' => 'updated', + 'direction' => 'desc', + 'per_page' => 100, + 'page' => $page, + ), + $pat, + self::CLEANUP_GITHUB_TIMEOUT + ); + + if ( is_wp_error($response) ) { + $error = new \WP_Error( + 'github_cleanup_lookup_failed', + sprintf('GitHub cleanup lookup failed for %s: %s', $slug, $response->get_error_message()), + $response->get_error_data() + ); + $github_cache[ $slug ] = $error; + return $error; + } + + $items = (array) ( $response['data'] ?? array() ); + foreach ( $items as $pr ) { + $head = is_array($pr['head'] ?? null) ? $pr['head'] : array(); + $head_repo = is_array($head['repo'] ?? null) ? (string) ( $head['repo']['full_name'] ?? '' ) : ''; + $head_ref = (string) ( $head['ref'] ?? '' ); + if ( $head_repo !== $slug || '' === $head_ref ) { + continue; + } + + $closed[ $head_ref ] = array( + 'number' => (int) ( $pr['number'] ?? 0 ), + 'state' => (string) ( $pr['state'] ?? 'closed' ), + 'merged_at' => (string) ( $pr['merged_at'] ?? '' ), + 'html_url' => (string) ( $pr['html_url'] ?? '' ), + ); + } + + if ( count($items) < 100 ) { + break; + } + } + + $github_cache[ $slug ] = $closed; + return $closed; + } }