From ffa5db673ce2588e22661a7095dfb5e6e42eb08e Mon Sep 17 00:00:00 2001 From: BadgerOps Date: Tue, 24 Feb 2026 20:44:08 -0600 Subject: [PATCH] docs: consolidate and refresh repository documentation --- AGENTS.md | 68 + README.md | 319 +-- docs/airgap-project/.obsidian/app.json | 7 - docs/airgap-project/00 - Project Index.md | 46 - .../01 - Existing Repos Audit.md | 125 -- docs/airgap-project/02 - Architecture.md | 256 --- docs/airgap-project/03 - Configuration.md | 203 -- docs/airgap-project/04 - Transfer Workflow.md | 199 -- docs/airgap-project/05 - Web UI Design.md | 194 -- docs/airgap-project/06 - CLI Reference.md | 189 -- .../07 - Implementation Phases.md | 186 -- docs/airgap-project/08 - Migration Path.md | 178 -- .../09 - Technical Decisions.md | 212 -- .../10 - Retry and Resilience.md | 167 -- docs/airgap-project/11 - Development Setup.md | 179 -- docs/airgap-project/12 - Foundation Setup.md | 87 - .../13 - Architecture Reference.md | 72 - docs/airgap-project/14 - CLI Scaffolding.md | 104 - .../15 - Store Implementation.md | 89 - docs/airgap-project/16 - Component Wiring.md | 98 - docs/airgap-project/17 - Code Verification.md | 77 - docs/architecture.md | 91 + docs/configuration.md | 73 + docs/http-api.md | 69 + docs/plans/2026-02-19-export-import-design.md | 172 -- docs/plans/2026-02-19-export-import-plan.md | 1753 ---------------- .../2026-02-20-mirror-discovery-design.md | 94 - .../plans/2026-02-20-mirror-discovery-plan.md | 1859 ----------------- .../2026-02-20-provider-management-design.md | 153 -- .../2026-02-20-provider-management-plan.md | 1461 ------------- docs/release-process.md | 50 +- 31 files changed, 426 insertions(+), 8404 deletions(-) create mode 100644 AGENTS.md delete mode 100644 docs/airgap-project/.obsidian/app.json delete mode 100644 docs/airgap-project/00 - Project Index.md delete mode 100644 docs/airgap-project/01 - Existing Repos Audit.md delete mode 100644 docs/airgap-project/02 - Architecture.md delete mode 100644 docs/airgap-project/03 - Configuration.md delete mode 100644 docs/airgap-project/04 - Transfer Workflow.md delete mode 100644 docs/airgap-project/05 - Web UI Design.md delete mode 100644 docs/airgap-project/06 - CLI Reference.md delete mode 100644 docs/airgap-project/07 - Implementation Phases.md delete mode 100644 docs/airgap-project/08 - Migration Path.md delete mode 100644 docs/airgap-project/09 - Technical Decisions.md delete mode 100644 docs/airgap-project/10 - Retry and Resilience.md delete mode 100644 docs/airgap-project/11 - Development Setup.md delete mode 100644 docs/airgap-project/12 - Foundation Setup.md delete mode 100644 docs/airgap-project/13 - Architecture Reference.md delete mode 100644 docs/airgap-project/14 - CLI Scaffolding.md delete mode 100644 docs/airgap-project/15 - Store Implementation.md delete mode 100644 docs/airgap-project/16 - Component Wiring.md delete mode 100644 docs/airgap-project/17 - Code Verification.md create mode 100644 docs/architecture.md create mode 100644 docs/configuration.md create mode 100644 docs/http-api.md delete mode 100644 docs/plans/2026-02-19-export-import-design.md delete mode 100644 docs/plans/2026-02-19-export-import-plan.md delete mode 100644 docs/plans/2026-02-20-mirror-discovery-design.md delete mode 100644 docs/plans/2026-02-20-mirror-discovery-plan.md delete mode 100644 docs/plans/2026-02-20-provider-management-design.md delete mode 100644 docs/plans/2026-02-20-provider-management-plan.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..98369f5 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,68 @@ +# AGENTS + +Repository-specific guidance for coding agents and contributors. + +## Scope + +This repository builds `airgap`, a Go CLI/server for offline content synchronization, transfer, and serving. + +## Stack + +- Go `1.23` +- Cobra CLI +- SQLite via `modernc.org/sqlite` +- Server-rendered HTML (`html/template`) + static assets + +## Canonical Docs + +Keep these docs accurate when behavior changes: +- `README.md` +- `docs/architecture.md` +- `docs/configuration.md` +- `docs/http-api.md` +- `docs/release-process.md` +- `CHANGELOG.md` + +## Common Commands + +```bash +make build +make test +make lint +make fmt +``` + +If touching release/versioning logic: + +```bash +python3 scripts/validate_versions.py +``` + +## Key Runtime Behavior + +- Provider configs are stored in SQLite table `provider_configs`. +- YAML `providers:` are only used for first-run seeding when DB configs are empty. +- Active providers are created from enabled DB configs at startup. +- Server/API provider CRUD triggers hot-reload (`ReconfigureProviders`). + +## Provider Types + +Recognized types: +- `epel` +- `ocp_binaries` +- `ocp_clients` +- `rhcos` +- `container_images` +- `registry` +- `custom_files` + +Notes: +- `registry` is used as a target for image push operations. +- `custom_files` is accepted as config type but not wired as a sync provider yet. + +## Code Change Expectations + +- Keep changes minimal and focused. +- Prefer updating docs in the same PR when behavior/flags/routes change. +- Do not add placeholder “implemented soon” docs without code backing. +- Preserve changelog heading format: `## X.Y.Z - YYYY-MM-DD` (newest first). diff --git a/README.md b/README.md index b4a99da..74c9b81 100644 --- a/README.md +++ b/README.md @@ -1,259 +1,122 @@ -# airgap - Unified Offline Sync Tool +# airgap -A Go-based framework for managing offline content synchronization across multiple sources including EPEL repositories, OpenShift Container Platform binaries, container images, and custom files. +`airgap` is a single Go binary for syncing and serving offline content for disconnected environments. -## Foundation Status +It supports: +- RPM repository mirroring (EPEL) +- OpenShift binaries and client artifact mirroring +- Container image mirroring metadata/blob sync +- Export/import workflows for physical transfer media +- A built-in web UI + HTTP API -This is the **project foundation** with core interfaces, configuration system, and database models. All major packages are structured and ready for implementation. +## Current Status -## Quick Start +The codebase is active and functional (not just scaffolding). CLI, server, sync engine, provider registry, transfer engine, and SQLite persistence are implemented. -### Project Structure -``` -/sessions/beautiful-clever-dijkstra/mnt/ocp-offline/ -├── cmd/airgap/ # Application entry point -│ └── main.go -├── internal/ -│ ├── api/ # REST API handlers (ready) -│ ├── config/ -│ │ └── config.go # Configuration system (implemented) -│ ├── engine/ # Sync engine & scheduler (ready) -│ ├── provider/ -│ │ ├── provider.go # Core interface (implemented) -│ │ ├── epel/ # EPEL provider (ready) -│ │ ├── ocp/ # OCP provider (ready) -│ │ ├── containers/ # Container provider (ready) -│ │ └── custom/ # Custom files provider (ready) -│ ├── store/ -│ │ └── models.go # Database models (implemented) -│ ├── download/ # Download engine (ready) -│ └── ui/ # Web UI (ready) -│ ├── templates/ # HTML templates -│ └── static/ # CSS/JS/Images -├── configs/ # Configuration examples -├── go.mod # Module definition -├── go.sum # Dependency checksums -├── PROJECT_SETUP.md # Setup documentation -├── ARCHITECTURE.md # System design -├── FILE_MANIFEST.txt # File inventory -└── README.md # This file -``` +## Requirements -## Implemented Components - -### 1. Provider Interface (`internal/provider/provider.go`) -Core abstraction for all content sources. - -**Interface Methods:** -- `Name()` - Provider identifier -- `Configure(cfg ProviderConfig)` - Load settings -- `Plan(ctx context.Context)` - Preview changes -- `Sync(ctx, plan, opts)` - Execute sync -- `Validate(ctx)` - Verify integrity - -**Key Types:** -- `SyncPlan` - Preview of what will change (read-only) -- `SyncAction` - Single file operation (download, delete, skip, update) -- `SyncReport` - Execution results with metrics -- `ValidationReport` - Integrity check results -- `Registry` - Provider discovery and management - -### 2. Configuration System (`internal/config/config.go`) -Unified YAML configuration for all providers. - -**Top-Level Sections:** -```yaml -server: - listen: "0.0.0.0:8080" - data_dir: "/var/lib/airgap" - db_path: "/var/lib/airgap/airgap.db" - -export: - split_size: "25GB" - compression: "zstd" - output_dir: "/mnt/transfer-disk" - manifest_name: "airgap-manifest.json" - -schedule: - enabled: true - default_cron: "0 2 * * 0" - -providers: - epel: { ... } - ocp-binaries: { ... } - rhcos: { ... } - container-images: { ... } - registry: { ... } - custom-files: { ... } -``` +- Go `1.23+` (for local builds) +- Optional external tools: + - `skopeo` (required for `registry push` operations) + - `createrepo_c` (optional but recommended for RPM metadata regeneration after import) -**Features:** -- `DefaultConfig()` - Sensible defaults -- `Load(path)` - Load from YAML -- `FindConfigFile()` - Auto-discover config -- `ProviderEnabled(name)` - Check if enabled -- `ParseProviderConfig[T]()` - Type-safe unmarshaling - -### 3. Database Models (`internal/store/models.go`) -Persistence layer for tracking state and history. - -**Models:** -- `SyncRun` - Audit trail of sync operations -- `FileRecord` - Inventory of synced files with checksums -- `Job` - Scheduled and completed operations -- `Transfer` - Export/import archive operations -- `FailedFileRecord` - Dead letter queue for retries - -## Design Patterns - -1. **Provider Pattern** - Interface-based extensibility -2. **Registry Pattern** - Centralized provider discovery -3. **Plan-Execute Separation** - Preview before applying -4. **Type-Safe Configuration** - Generics for config unmarshaling -5. **Data Transfer Objects** - Structured sync data -6. **Dead Letter Queue** - Failed file tracking -7. **Audit Trail** - Comprehensive logging - -## Key Features - -- **Multi-Source Support**: EPEL, OCP, RHCOS, container images, custom sources -- **Unified Configuration**: Single YAML controls everything -- **Plan Preview**: See what will change before executing -- **Automatic Retry**: Configurable retries with dead letter queue -- **Checksum Verification**: SHA256 validation on all downloads -- **Concurrent Downloads**: Configurable worker pool per provider -- **Integrity Validation**: Offline verification of synced content -- **Comprehensive Audit Trail**: All operations tracked in database - -## Installation & Building - -### Prerequisites -- Go 1.21+ -- YAML support (via gopkg.in/yaml.v3) - -### Build -```bash -cd /sessions/beautiful-clever-dijkstra/mnt/ocp-offline +## Build and Test -# Download dependencies -go mod download +```bash +make build +make test +``` -# Build all packages -go build ./... +Manual build: -# Build binary +```bash go build -o bin/airgap ./cmd/airgap - -# Run tests -go test ./... ``` -## Core Abstractions - -### Provider Interface -All content sources implement this interface: -```go -type Provider interface { - Name() string - Configure(cfg ProviderConfig) error - Plan(ctx context.Context) (*SyncPlan, error) - Sync(ctx context.Context, plan *SyncPlan, opts SyncOptions) (*SyncReport, error) - Validate(ctx context.Context) (*ValidationReport, error) -} +## Quick Start + +1. Copy and edit the example config: + +```bash +cp configs/airgap.example.yaml ./airgap.yaml ``` -### Sync Workflow -1. **Plan Phase**: Provider.Plan() → SyncPlan (what will change) -2. **Execute Phase**: Provider.Sync() → SyncReport (what happened) -3. **Validate Phase**: Provider.Validate() → ValidationReport (integrity check) - -### Registry Pattern -```go -registry := provider.NewRegistry() -registry.Register(epelProvider) -registry.Register(ocpProvider) -registry.Register(customProvider) - -// Discover providers -provider, found := registry.Get("epel") -allNames := registry.Names() +2. Run a sync (all enabled providers): + +```bash +./bin/airgap sync ``` -## Extension Points - -### Adding a New Provider - -1. Create package in `internal/provider/{name}/` -2. Implement `Provider` interface -3. Add config struct to `internal/config/config.go` -4. Add YAML section to configuration -5. Register in engine - -Example: -```go -// internal/provider/myProvider/provider.go -type MyProvider struct { - name string - config *MyProviderConfig -} - -func (p *MyProvider) Name() string { ... } -func (p *MyProvider) Configure(cfg ProviderConfig) error { ... } -func (p *MyProvider) Plan(ctx context.Context) (*SyncPlan, error) { ... } -func (p *MyProvider) Sync(ctx, plan, opts) (*SyncReport, error) { ... } -func (p *MyProvider) Validate(ctx context.Context) (*ValidationReport, error) { ... } +3. Start the UI/API server: + +```bash +./bin/airgap serve ``` -## File Inventory +Default listen address is `0.0.0.0:8080`. + +## Configuration + +Config file discovery order: +- `./airgap.yaml` +- `/etc/airgap/airgap.yaml` +- `$HOME/.config/airgap/airgap.yaml` + +Top-level sections: +- `server` +- `export` +- `schedule` +- `providers` + +For full details, see [docs/configuration.md](docs/configuration.md). + +## Provider Types + +Provider configs are stored in SQLite (`provider_configs`). YAML provider entries are used for first-run seeding when the table is empty. + +Implemented provider types: +- `epel` +- `ocp_binaries` +- `ocp_clients` +- `rhcos` +- `container_images` -**Core Implementation (427 lines):** -- `internal/provider/provider.go` - 140 lines (Provider interface) -- `internal/config/config.go` - 199 lines (Configuration system) -- `internal/store/models.go` - 72 lines (Database models) -- `cmd/airgap/main.go` - 7 lines (Entry point) -- `go.mod` - 5 lines (Module definition) -- `go.sum` - 2 lines (Dependency checksums) +Supported as config/target types: +- `registry` (used as a destination for `registry push`) +- `custom_files` (accepted config type; sync implementation is not wired yet) -**Documentation:** -- `PROJECT_SETUP.md` - Setup guide -- `ARCHITECTURE.md` - System design details -- `FILE_MANIFEST.txt` - File inventory -- `README.md` - This file +## CLI Commands -## TODO / Known Limitations +- `sync`: sync one/all providers +- `validate`: validate local files against provider metadata +- `status`: provider status summary from store state +- `export`: create split `tar.zst` transfer archives + manifest +- `import`: verify/import transfer archives +- `serve`: web UI + API server +- `providers list`: list provider configs from SQLite +- `registry push`: push mirrored container images to a registry target +- `config show`: print loaded config +- `config set`: currently a stub (prints intended change; does not persist) -- **Multi-architecture support**: Currently OCP binaries and RHCOS URLs are hardcoded to `x86_64`. Need to support discovering and downloading content for multiple architectures (x86_64, aarch64, ppc64le, s390x). This affects the mirror discovery service, provider configurations, and the UI (architecture selector for OCP/RHCOS providers). +## Web UI and API -## Next Steps +Main pages: +- `/dashboard` +- `/providers` +- `/providers/{name}` +- `/transfer` +- `/ocp/clients` -1. Implement engine package (sync orchestration) -2. Implement download package (concurrent downloads, retry) -3. Implement store package (SQLite persistence) -4. Implement individual providers: - - EPEL repository provider - - OCP binaries provider - - RHCOS images provider - - Container images provider - - Custom files provider -5. Implement API package (REST endpoints) -6. Implement UI package (web interface) -7. Add tests and integration tests +API routes are documented in [docs/http-api.md](docs/http-api.md). -## Development Notes +## Architecture and Data Flow -- All packages follow standard Go conventions -- Interface-based design enables pluggable implementations -- Configuration uses gopkg.in/yaml.v3 for YAML support -- Database models ready for SQLite integration -- Structured error handling with context support -- Concurrent operation support via sync options +For architecture and runtime flow, see [docs/architecture.md](docs/architecture.md). -## License +## Release Process -See LICENSE file (if applicable) +Release/version workflow is changelog-driven. See [docs/release-process.md](docs/release-process.md). -## Support +## Changelog -For detailed architecture, see [ARCHITECTURE.md](ARCHITECTURE.md) -For setup instructions, see [PROJECT_SETUP.md](PROJECT_SETUP.md) -For file inventory, see [FILE_MANIFEST.txt](FILE_MANIFEST.txt) +All releases are tracked in [CHANGELOG.md](CHANGELOG.md). diff --git a/docs/airgap-project/.obsidian/app.json b/docs/airgap-project/.obsidian/app.json deleted file mode 100644 index ac81937..0000000 --- a/docs/airgap-project/.obsidian/app.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "alwaysUpdateLinks": true, - "useMarkdownLinks": false, - "showLineNumber": true, - "strictLineBreaks": false, - "readableLineLength": true -} diff --git a/docs/airgap-project/00 - Project Index.md b/docs/airgap-project/00 - Project Index.md deleted file mode 100644 index 7d0a34b..0000000 --- a/docs/airgap-project/00 - Project Index.md +++ /dev/null @@ -1,46 +0,0 @@ -# airgap — Project Index - -#airgap #index - -> Unified offline synchronization tool for disconnected/air-gapped environments. -> Single Go binary: sync, validate, export, import across OCP, EPEL, container images, and custom content. - -## Design Documents - -- [[01 - Existing Repos Audit]] — inventory of current BadgerOps repos and what carries forward -- [[02 - Architecture]] — system design, provider plugin model, project directory structure -- [[03 - Configuration]] — unified `airgap.yaml` config reference with full example -- [[04 - Transfer Workflow]] — the export/import engine, split tar.zst archives, physical media workflow -- [[05 - Web UI Design]] — htmx + Alpine.js pages, SSE streaming, template layout -- [[06 - CLI Reference]] — cobra commands, flags, and usage examples -- [[07 - Implementation Phases]] — 6-phase roadmap with deliverables per phase -- [[08 - Migration Path]] — how each existing repo maps into the unified codebase -- [[09 - Technical Decisions]] — ADR-style log of every major tech choice and rationale -- [[10 - Retry and Resilience]] — download reliability patterns, backoff, dead letter queue - -- [[11 - Development Setup]] — Nix flake, building, testing, CI - -## Subagent Implementation Logs - -- [[12 - Foundation Setup]] — Phase 1 project setup, directory structure, initial code stats -- [[13 - Architecture Reference]] — subagent architecture design output -- [[14 - CLI Scaffolding]] — Cobra CLI scaffolding details, command reference, patterns -- [[15 - Store Implementation]] — SQLite store API, models, migrations -- [[16 - Component Wiring]] — initialization sequence, dependency graph, error handling strategy -- [[17 - Code Verification]] — comprehensive verification report, build errors found and fixed - -## Quick Links - -- GitHub repos: [BadgerOps](https://github.com/BadgerOps) -- Key upstream tools: [oc-mirror](https://github.com/openshift/oc-mirror), [mirror-registry](https://github.com/quay/mirror-registry) - -## Status - -| Phase | Description | Status | -|-------|-------------|--------| -| 1 | Foundation + OCP providers | **Complete** — build errors fixed, Go 1.23 | -| 2 | EPEL provider + Web UI skeleton | **Complete** — EPEL provider, server routes, htmx templates | -| 3 | Export/Import engine | Not started | -| 4 | External tool wrappers | Not started | -| 5 | Web UI polish | Not started | -| 6 | Hardening | Not started | diff --git a/docs/airgap-project/01 - Existing Repos Audit.md b/docs/airgap-project/01 - Existing Repos Audit.md deleted file mode 100644 index c95d7ff..0000000 --- a/docs/airgap-project/01 - Existing Repos Audit.md +++ /dev/null @@ -1,125 +0,0 @@ -# Existing Repos Audit - -#airgap #audit #existing-code - -Back to [[00 - Project Index]] - -## Overview - -The current disconnected-environment toolchain is spread across six BadgerOps repos. Two are battle-tested production tools, two are archived forks of upstream OpenShift projects, and two are early prototypes. - -## Repo Inventory - -### epel-offline-sync (Python) — Production - -**Repo:** [BadgerOps/epel-offline-sync](https://github.com/BadgerOps/epel-offline-sync) -**Language:** Python 3.9 (stdlib only) -**Container:** UBI9 base, runs via Podman - -**What it does:** -Given an upstream EPEL repo URL in `config.ini`, it downloads `repomd.xml` and `primary.xml`, then uses `ElementTree` to identify packages to download. Deduplication is built in — it uses `hashlib` to compare local file hashes against the `common:checksum` in the XML manifest. Multi-threaded downloads. - -**Key patterns to carry forward:** -- repomd.xml → primary.xml parsing pipeline -- Checksum-based dedup (don't re-download unchanged packages) -- Config-driven repo definitions - -**Known limitation:** Does not remove packages that are removed upstream. The unified app will fix this — see [[03 - Configuration]] `cleanup_removed_packages`. - -**Destination in unified app:** `internal/provider/epel/` — ported from Python to Go. See [[08 - Migration Path]]. - ---- - -### ocpsync (Go) — Production - -**Repo:** [BadgerOps/ocpsync](https://github.com/BadgerOps/ocpsync) -**Language:** Go -**Version:** v1.0.0 - -**What it does:** -Downloads OCP client binaries and RHCOS images from `mirror.openshift.com`. Reads `config.yaml` to get base URLs, version lists, and ignore patterns. Downloads SHA256 checksum manifests, generates filtered file lists, downloads with retry (exponential backoff, 3 attempts), and validates each file against its checksum. - -**Core structs:** -- `Config` → `OcpBinaries` section + `Rhcos` section -- Each section has `BaseURL`, `Version[]`, `IgnoredFiles[]`, `OutputDir` - -**Key patterns to carry forward:** -- SHA256 manifest → filtered file list → download → validate pipeline -- Exponential backoff with retry -- Ignore-list filtering (skip windows, mac, cloud-specific images) -- Logrus structured logging (will migrate to `slog`) - -**Destination in unified app:** `internal/provider/ocp/binaries.go` and `rhcos.go` — near-direct port, restructured into the Provider interface. See [[08 - Migration Path]]. - ---- - -### mirror-registry (Go + Ansible) — Archived Fork - -**Repo:** [BadgerOps/mirror-registry](https://github.com/BadgerOps/mirror-registry) -**Upstream:** [quay/mirror-registry](https://github.com/quay/mirror-registry) -**Language:** Go 68.7%, Jinja 11.5%, Makefile 10.6%, Dockerfile 9.2% -**Status:** Archived Feb 2026 - -**What it does:** -CLI tool that orchestrates Ansible playbooks to deploy a standalone Quay registry (Quay + Redis + Postgres via Podman). Used to mirror OCP container images in disconnected environments. - -**Commands:** `install`, `upgrade`, `uninstall` -**Requirements:** RHEL 8 or Fedora, Podman v3.3+, OpenSSL, FQDN resolvable - -**Destination in unified app:** Wrapped as external tool — `internal/provider/containers/registry.go` invokes the `mirror-registry` binary. See [[09 - Technical Decisions]] for the wrap-vs-reimplement decision. - ---- - -### oc-mirror (Go) — Archived Fork - -**Repo:** [BadgerOps/oc-mirror](https://github.com/BadgerOps/oc-mirror) -**Upstream:** [openshift/oc-mirror](https://github.com/openshift/oc-mirror) -**Language:** Go -**Status:** Archived - -**What it does:** -Lifecycle manager for disconnected OCP environments. Mirrors container images, operators, and Helm charts based on an `ImageSetConfiguration` YAML. Handles differential updates. - -**Destination in unified app:** Wrapped as external tool — `internal/provider/containers/ocmirror.go`. See [[09 - Technical Decisions]]. - ---- - -### dlserver (Go) — Early Prototype - -**Repo:** [BadgerOps/dlserver](https://github.com/BadgerOps/dlserver) -**Language:** Go - -**What it does:** -REST API server on port 8080 with SQLite persistence for scheduling download jobs. Has `/getjobs` (GET), `/schedule` (POST), duplicate checking. Defines a `Job` struct with `Name`, `Time`, `URL`. - -**Key gap:** Has job CRUD but no actual download execution or file serving. - -**Destination in unified app:** Conceptually replaced by the web UI + API layer. The job scheduling model (SQLite-backed, REST endpoints) carries forward into [[05 - Web UI Design]] and the scheduler in [[02 - Architecture]]. - ---- - -### rpm-builder (Go) — Early Prototype - -**Repo:** [BadgerOps/rpm-builder](https://github.com/BadgerOps/rpm-builder) -**Language:** Go (100%) -**License:** AGPL-3.0 -**Status:** 2 commits - -**What it does:** -Wrapper to streamline RPM spec creation and build automation. - -**Destination in unified app:** Out of scope for v1. Could become a provider later if custom RPM building is needed in the disconnected environment. - -## Patterns Across Repos - -All the sync tools share the same fundamental loop: - -``` -1. Fetch upstream manifest (repomd.xml, SHA256SUMS, imageset config) -2. Diff manifest against local state -3. Download new/changed files -4. Validate downloads against checksums -5. Report results -``` - -This pattern becomes the [[02 - Architecture|Provider interface]]: `Plan()` does steps 1-2, `Sync()` does steps 3-4, `Validate()` does step 5 independently. diff --git a/docs/airgap-project/02 - Architecture.md b/docs/airgap-project/02 - Architecture.md deleted file mode 100644 index 093c9a6..0000000 --- a/docs/airgap-project/02 - Architecture.md +++ /dev/null @@ -1,256 +0,0 @@ -# Architecture - -#airgap #architecture #design - -Back to [[00 - Project Index]] | Related: [[09 - Technical Decisions]], [[03 - Configuration]] - -## System Overview - -`airgap` is a single Go binary with four interaction surfaces that share a common core engine: - -``` -┌──────────────────────────────────────────────────────────┐ -│ airgap binary (Go) │ -├──────────┬──────────┬───────────┬────────────────────────┤ -│ Web UI │ REST API │ CLI │ Background Scheduler │ -│ (htmx + │ (JSON) │ (cobra) │ (cron-style) │ -│ alpine) │ │ │ │ -├──────────┴──────────┴───────────┴────────────────────────┤ -│ Core Engine │ -│ ┌────────────┐ ┌──────────┐ ┌─────────┐ ┌────────────┐ │ -│ │ SyncManager│ │ Validate │ │ Export │ │ Import │ │ -│ │ │ │ Engine │ │ Engine │ │ Engine │ │ -│ └─────┬──────┘ └──────────┘ └─────────┘ └────────────┘ │ -│ │ │ -│ ┌─────┴──────────────────────────────────────────┐ │ -│ │ Content Providers │ │ -│ │ ┌───────┐ ┌────────┐ ┌──────────┐ ┌────────┐ │ │ -│ │ │ RPM │ │ OCP │ │Container │ │ Custom │ │ │ -│ │ │(EPEL) │ │Binaries│ │ Images │ │ Files │ │ │ -│ │ └───────┘ └────────┘ └──────────┘ └────────┘ │ │ -│ └────────────────────────────────────────────────┘ │ -│ │ -│ ┌────────────────────────────────────────────────┐ │ -│ │ External Tool Wrappers │ │ -│ │ ┌───────────┐ ┌────────────────┐ │ │ -│ │ │ oc-mirror │ │mirror-registry │ │ │ -│ │ └───────────┘ └────────────────┘ │ │ -│ └────────────────────────────────────────────────┘ │ -│ │ -│ ┌────────────────────────────────────────────────┐ │ -│ │ Storage Layer │ │ -│ │ SQLite (job state, history, config) │ │ -│ │ Filesystem (downloaded content, manifests) │ │ -│ └────────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────┘ -``` - -## Provider Plugin Model - -The central abstraction. Each content type implements this interface: - -```go -type Provider interface { - // Name returns the provider identifier (e.g., "epel", "ocp-binaries") - Name() string - - // Configure loads provider-specific settings from the unified config - Configure(cfg ProviderConfig) error - - // Plan compares upstream manifest against local state, returns - // a list of actions (download, delete, skip) without executing them - Plan(ctx context.Context) (*SyncPlan, error) - - // Sync executes the plan — downloads, validates, retries - Sync(ctx context.Context, plan *SyncPlan, opts SyncOptions) (*SyncReport, error) - - // Validate checks integrity of all local content against manifests - Validate(ctx context.Context) (*ValidationReport, error) -} -``` - -### SyncPlan - -The `Plan()` method returns a `SyncPlan` — a list of file-level actions computed by diffing the upstream manifest against local state. This is a dry-run by default, giving the admin visibility into what will change before anything downloads. - -```go -type SyncPlan struct { - Provider string - Actions []SyncAction - TotalSize int64 // bytes to download - TotalFiles int - Timestamp time.Time -} - -type SyncAction struct { - Path string // relative path within provider output dir - Action ActionType // Download, Delete, Skip, Update - Size int64 - Checksum string // expected SHA256 - Reason string // "new file", "checksum mismatch", "removed upstream" -} -``` - -### SyncReport - -Returned after `Sync()` executes a plan: - -```go -type SyncReport struct { - Provider string - StartTime time.Time - EndTime time.Time - Downloaded int - Deleted int - Skipped int - Failed []FailedFile // goes to dead letter queue - BytesTransferred int64 -} -``` - -## Built-in Providers - -| Provider | Package | Replaces | Notes | -|----------|---------|----------|-------| -| `epel` | `internal/provider/epel/` | [[01 - Existing Repos Audit#epel-offline-sync|epel-offline-sync]] | repomd.xml/primary.xml parsing, RPM dedup | -| `ocp-binaries` | `internal/provider/ocp/` | [[01 - Existing Repos Audit#ocpsync|ocpsync]] | OCP clients from mirror.openshift.com | -| `rhcos` | `internal/provider/ocp/` | [[01 - Existing Repos Audit#ocpsync|ocpsync]] | RHCOS images, same package | -| `container-images` | `internal/provider/containers/` | [[01 - Existing Repos Audit#oc-mirror|oc-mirror]] | Wraps `oc-mirror` CLI | -| `registry` | `internal/provider/containers/` | [[01 - Existing Repos Audit#mirror-registry|mirror-registry]] | Wraps `mirror-registry` CLI | -| `custom-files` | `internal/provider/custom/` | New | Generic HTTP/S3 file sync | - -## Core Engine Components - -### SyncManager (`internal/engine/sync.go`) - -Orchestrates provider execution. Loads enabled providers from config, runs `Plan()` then `Sync()`, records results to the SQLite store. Supports running all providers or a filtered subset. - -### Export Engine (`internal/engine/export.go`) - -See [[04 - Transfer Workflow]] for full detail. Creates split tar.zst archives with a transfer manifest. - -### Import Engine (`internal/engine/import.go`) - -See [[04 - Transfer Workflow]]. Validates archive integrity, extracts, rebuilds repo metadata. - -### Validate Engine (`internal/engine/validate.go`) - -Runs `Validate()` across all providers — checks every local file against its expected checksum. Reports discrepancies without fixing them (the admin decides whether to re-sync or accept). - -### Scheduler (`internal/engine/scheduler.go`) - -Cron-style job scheduler. Runs sync jobs on configured intervals. Uses `robfig/cron` or similar Go cron library. Jobs are persisted in SQLite so they survive restarts. - -## Storage Layer - -### SQLite (`internal/store/`) - -Pure-Go SQLite via `modernc.org/sqlite` — no CGO, no external deps. Single file at `db_path` from config. - -**Tables:** - -- `sync_runs` — history of every sync execution (provider, start/end time, files downloaded/failed, bytes) -- `file_inventory` — current state of all downloaded files (path, size, checksum, provider, last_verified) -- `jobs` — scheduled and completed jobs (type, cron expression, last run, next run, status) -- `transfers` — export/import history (direction, timestamp, archive count, manifest hash) -- `failed_files` — dead letter queue (file path, provider, error, retry count, last attempt) -- `migrations` — schema version tracking - -### Filesystem - -Downloaded content lives under `data_dir` organized by provider: - -``` -/var/lib/airgap/ -├── epel/ -│ ├── 9/ -│ │ ├── repodata/ -│ │ └── Packages/ -│ └── 8/ -├── ocp-clients/ -│ ├── latest-4.17/ -│ ├── latest-4.18/ -│ └── latest-4.19/ -├── rhcos/ -│ ├── 4.17/ -│ ├── 4.18/ -│ └── 4.19/ -├── container-images/ -│ └── (oc-mirror output) -└── airgap.db -``` - -## Download Client (`internal/download/`) - -Shared HTTP download infrastructure used by all providers. See [[10 - Retry and Resilience]] for detail. - -Key capabilities: concurrent worker pool (goroutine-based), exponential backoff with jitter, HTTP Range resume for large files, streaming SHA256 validation during download, configurable per-provider concurrency. - -## Project Directory Structure - -``` -airgap/ -├── cmd/ -│ └── airgap/ -│ └── main.go # cobra root command setup -├── internal/ -│ ├── api/ -│ │ ├── router.go # chi router, mounts all routes -│ │ ├── handlers_dashboard.go -│ │ ├── handlers_providers.go -│ │ ├── handlers_jobs.go -│ │ ├── handlers_transfer.go -│ │ ├── handlers_settings.go -│ │ └── middleware.go # logging, recovery -│ ├── config/ -│ │ ├── config.go # unified config struct + YAML loader -│ │ └── config_test.go -│ ├── engine/ -│ │ ├── sync.go # SyncManager orchestrates providers -│ │ ├── export.go # tar split + manifest generation -│ │ ├── import.go # validate + extract + rebuild -│ │ ├── validate.go # integrity checking -│ │ └── scheduler.go # cron-based job scheduling -│ ├── provider/ -│ │ ├── provider.go # Provider interface definition -│ │ ├── epel/ -│ │ │ ├── epel.go -│ │ │ └── epel_test.go -│ │ ├── ocp/ -│ │ │ ├── binaries.go -│ │ │ ├── rhcos.go -│ │ │ └── ocp_test.go -│ │ ├── containers/ -│ │ │ ├── ocmirror.go -│ │ │ └── registry.go -│ │ └── custom/ -│ │ └── files.go -│ ├── store/ -│ │ ├── sqlite.go -│ │ ├── models.go -│ │ └── migrations.go -│ ├── download/ -│ │ ├── client.go -│ │ ├── pool.go -│ │ └── client_test.go -│ └── ui/ -│ ├── templates/ -│ │ ├── layout.html -│ │ ├── dashboard.html -│ │ ├── providers.html -│ │ ├── provider_detail.html -│ │ ├── jobs.html -│ │ ├── transfer.html -│ │ └── settings.html -│ └── static/ -│ ├── htmx.min.js -│ ├── alpine.min.js -│ └── styles.css -├── configs/ -│ └── airgap.example.yaml -├── Containerfile -├── Makefile -├── go.mod -├── go.sum -└── README.md -``` diff --git a/docs/airgap-project/03 - Configuration.md b/docs/airgap-project/03 - Configuration.md deleted file mode 100644 index eca8596..0000000 --- a/docs/airgap-project/03 - Configuration.md +++ /dev/null @@ -1,203 +0,0 @@ -# Configuration - -#airgap #config - -Back to [[00 - Project Index]] | Related: [[02 - Architecture]], [[01 - Existing Repos Audit]] - -## Overview - -Single YAML file (`airgap.yaml`) replaces the scattered `config.ini` (epel-offline-sync) and `config.yaml` (ocpsync). Loaded by `internal/config/config.go` into typed Go structs. - -Config file search order: -1. `--config` CLI flag (explicit path) -2. `./airgap.yaml` (current directory) -3. `/etc/airgap/airgap.yaml` (system-wide) -4. `$HOME/.config/airgap/airgap.yaml` (user-level) - -## Full Reference - -```yaml -# airgap.yaml — complete configuration reference - -# ─── Server Settings ───────────────────────────────────── -server: - listen: "0.0.0.0:8080" # web UI + API bind address - data_dir: "/var/lib/airgap" # root directory for all downloaded content - db_path: "/var/lib/airgap/airgap.db" # SQLite database location - -# ─── Export/Import Settings ────────────────────────────── -export: - split_size: "25GB" # max size per tar archive part - compression: "zstd" # "zstd" (fast) or "gzip" (compatible) - output_dir: "/mnt/transfer-disk" # default export destination - manifest_name: "airgap-manifest.json" - -# ─── Scheduler Settings ───────────────────────────────── -schedule: - enabled: true - default_cron: "0 2 * * 0" # default: weekly Sunday 2am - -# ─── Provider Settings ─────────────────────────────────── -providers: - - # --- EPEL RPM Repositories --- - epel: - enabled: true - repos: - - name: "epel-9" - base_url: "https://dl.fedoraproject.org/pub/epel/9/Everything/x86_64/" - output_dir: "epel/9" - - name: "epel-8" - base_url: "https://dl.fedoraproject.org/pub/epel/8/Everything/x86_64/" - output_dir: "epel/8" - max_concurrent_downloads: 8 - retry_attempts: 3 - cleanup_removed_packages: true # NEW: removes packages deleted upstream - - # --- OCP Client Binaries --- - ocp_binaries: - enabled: true - base_url: "https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/" - versions: - - "latest-4.17" - - "latest-4.18" - - "latest-4.19" - ignored_patterns: - - "windows" - - "mac" - - "arm64" - - "aarch64" - - "ppc64le" - output_dir: "ocp-clients" - retry_attempts: 3 - - # --- RHCOS Images --- - rhcos: - enabled: true - base_url: "https://mirror.openshift.com/pub/openshift-v4/x86_64/dependencies/rhcos/" - versions: - - "4.17/latest" - - "4.18/latest" - - "4.19/latest" - ignored_patterns: - - "aliyun" - - "aws" - - "azure" - - "gcp" - - "ibmcloud" - - "nutanix" - - "openstack" - - "vmware" - output_dir: "rhcos" - retry_attempts: 3 - - # --- Container Images (oc-mirror wrapper) --- - container_images: - enabled: true - oc_mirror_binary: "/usr/local/bin/oc-mirror" - imageset_config: "/etc/airgap/imageset-config.yaml" - output_dir: "container-images" - - # --- Registry Management (mirror-registry wrapper) --- - registry: - enabled: true - mirror_registry_binary: "/usr/local/bin/mirror-registry" - quay_root: "/var/lib/quay" - - # --- Custom File Sources --- - custom_files: - enabled: false - sources: [] - # - name: "helm-charts" - # url: "https://example.com/charts/" - # checksum_url: "https://example.com/charts/SHA256SUMS" - # output_dir: "helm-charts" -``` - -## Config Migration from Existing Repos - -### From epel-offline-sync `config.ini` - -```ini -# OLD (config.ini) -[upstream] -base_url = https://dl.fedoraproject.org/pub/epel/9/Everything/x86_64/ -``` - -Maps to: - -```yaml -# NEW (airgap.yaml) -providers: - epel: - repos: - - name: "epel-9" - base_url: "https://dl.fedoraproject.org/pub/epel/9/Everything/x86_64/" -``` - -### From ocpsync `config.yaml` - -```yaml -# OLD (config.yaml) -ocp_binaries: - base_url: "https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/" - version: - - "latest-4.17" - ignored_files: - - "windows" - output_dir: "/data/ocp-clients" -``` - -Maps to: - -```yaml -# NEW (airgap.yaml) -providers: - ocp_binaries: - base_url: "https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/" - versions: - - "latest-4.17" - ignored_patterns: - - "windows" - output_dir: "ocp-clients" # relative to data_dir now -``` - -Key changes: `version` → `versions`, `ignored_files` → `ignored_patterns`, `output_dir` is now relative to `server.data_dir` instead of absolute. - -## Go Struct Mapping - -```go -type Config struct { - Server ServerConfig `yaml:"server"` - Export ExportConfig `yaml:"export"` - Schedule ScheduleConfig `yaml:"schedule"` - Providers map[string]ProviderConfig `yaml:"providers"` -} - -type ServerConfig struct { - Listen string `yaml:"listen"` - DataDir string `yaml:"data_dir"` - DBPath string `yaml:"db_path"` -} - -type ExportConfig struct { - SplitSize string `yaml:"split_size"` - Compression string `yaml:"compression"` - OutputDir string `yaml:"output_dir"` - ManifestName string `yaml:"manifest_name"` -} -``` - -Provider configs are loaded as `map[string]any` initially, then each provider's `Configure()` method unmarshals its own section into typed structs. This keeps the config loader generic while providers own their schema. - -## Runtime Config Modification - -The CLI supports modifying config at runtime: - -```bash -airgap config show # dump effective config -airgap config set providers.epel.enabled true # toggle provider -airgap config set export.split_size "50GB" # change split size -``` - -The web UI [[05 - Web UI Design#Settings|Settings page]] provides a form-based editor for the same operations. diff --git a/docs/airgap-project/04 - Transfer Workflow.md b/docs/airgap-project/04 - Transfer Workflow.md deleted file mode 100644 index 4148dd8..0000000 --- a/docs/airgap-project/04 - Transfer Workflow.md +++ /dev/null @@ -1,199 +0,0 @@ -# Transfer Workflow - -#airgap #transfer #export #import - -Back to [[00 - Project Index]] | Related: [[02 - Architecture]], [[06 - CLI Reference]] - -## Overview - -The transfer workflow is the critical new capability that doesn't exist in any of the [[01 - Existing Repos Audit|current repos]]. It packages synced content for physical media transfer from an internet-connected machine (Machine A) to a disconnected machine (Machine B). - -## The Two-Machine Model - -``` -┌─────────────────────┐ ┌─────────────────────┐ -│ Machine A │ USB/ │ Machine B │ -│ (Internet) │ disk │ (Air-gapped) │ -│ │ ─────► │ │ -│ airgap sync --all │ │ airgap import │ -│ airgap export │ │ airgap serve │ -└─────────────────────┘ └─────────────────────┘ -``` - -Both machines run the same `airgap` binary. Machine A uses the sync + export commands. Machine B uses import + serve. The web UI works on both sides. - -## Export Process (Machine A) - -### CLI - -```bash -airgap export --to /mnt/usb # export all enabled providers -airgap export --to /mnt/usb --provider epel,rhcos # export subset -airgap export --to /mnt/usb --split-size 10GB # override split size -``` - -### Steps - -1. **Snapshot sync state** — query the SQLite `file_inventory` table for all files belonging to the selected providers. This captures what's downloaded, versions, and checksums at the exact moment of export. - -2. **Generate transfer manifest** — `airgap-manifest.json`: - ```json - { - "version": "1.0", - "created": "2026-02-19T14:30:00Z", - "source_host": "sync-server.example.com", - "providers": { - "epel": { - "repos": ["epel-9", "epel-8"], - "file_count": 4521, - "total_size": 18739281920 - }, - "ocp_binaries": { - "versions": ["latest-4.18"], - "file_count": 12, - "total_size": 2147483648 - } - }, - "archives": [ - { - "name": "airgap-transfer-001.tar.zst", - "size": 26843545600, - "sha256": "abc123...", - "files": ["epel/9/Packages/a-*.rpm", "..."] - }, - { - "name": "airgap-transfer-002.tar.zst", - "size": 19327352832, - "sha256": "def456...", - "files": ["ocp-clients/latest-4.18/*", "..."] - } - ], - "total_archives": 2, - "total_size": 46170898432, - "file_inventory": [ - {"path": "epel/9/Packages/ansible-core-2.15.0-1.el9.x86_64.rpm", "size": 3145728, "sha256": "..."}, - "..." - ] - } - ``` - -3. **Create split tar archives** — custom tar writer that tracks cumulative bytes and rolls to a new archive file when `split_size` is reached: - - `airgap-transfer-001.tar.zst` - - `airgap-transfer-002.tar.zst` - - etc. - - Compression: zstd by default (3-5x faster than gzip at similar ratios) - - The manifest JSON is embedded in archive 001 AND written as a standalone file for redundancy - -4. **Generate per-archive checksums** — `.sha256` sidecar file for each archive part. Used during import to catch corrupt copies without extracting. - -5. **Write TRANSFER-README.txt** — human-readable instructions for the person physically carrying the disk: - ``` - AIRGAP TRANSFER PACKAGE - ======================= - Created: 2026-02-19 14:30 UTC - Source: sync-server.example.com - Archives: 2 parts - Total size: 43 GB - - TO IMPORT: - 1. Mount this disk on the disconnected machine - 2. Run: airgap import --from /mnt/usb - 3. The tool will validate all archives before extracting - - IF AN ARCHIVE IS CORRUPT: - - The import tool will tell you which archive(s) failed - - Re-copy only the failed archive from the source machine - - Re-run: airgap import --from /mnt/usb - ``` - -### Resulting Disk Layout - -``` -/mnt/usb/ -├── airgap-manifest.json # standalone manifest (readable) -├── airgap-manifest.json.sha256 # manifest checksum -├── airgap-transfer-001.tar.zst # split archive part 1 -├── airgap-transfer-001.tar.zst.sha256 -├── airgap-transfer-002.tar.zst # split archive part 2 -├── airgap-transfer-002.tar.zst.sha256 -└── TRANSFER-README.txt # human-readable guide -``` - -## Import Process (Machine B) - -### CLI - -```bash -airgap import --from /mnt/usb # full import -airgap import --from /mnt/usb --verify-only # just check integrity -airgap import --from /mnt/usb --force # skip checksum verification (not recommended) -``` - -### Steps - -1. **Read manifest** — parse `airgap-manifest.json`, verify all expected archive parts are present on the media. - -2. **Validate archives** — compute SHA256 of each `.tar.zst` file, compare against manifest. Report per-archive pass/fail: - ``` - Validating archives... - [OK] airgap-transfer-001.tar.zst (25.0 GB) ✓ - [FAIL] airgap-transfer-002.tar.zst — expected sha256 def456..., got 789abc... - - 1 of 2 archives failed validation. - Re-copy the failed archive(s) from source and re-run import. - ``` - -3. **Partial re-transfer support** — if only some archives are corrupt, the admin re-copies just those files from the source disk. On re-run, already-validated archives are skipped (checksums cached in local state). - -4. **Extract content** — decompress and untar each archive into `data_dir`, preserving the provider directory structure. - -5. **Rebuild metadata**: - - RPM repos: run `createrepo` (or `createrepo_c`) to rebuild `repodata/` - - Container images: load into local registry via `oc-mirror` or `skopeo` - - Update the local SQLite `file_inventory` with the imported files - -6. **Update state DB** — record the import in `transfers` table (timestamp, archive count, manifest hash, provider list). - -## Incremental Transfers - -Future enhancement (post-v1): support differential exports that only include files changed since the last export. The manifest would include a `based_on` field referencing the previous manifest hash, and the import would merge rather than replace. - -For v1, each export is a full snapshot of the selected providers. This is simpler and more robust — if any previous import was incomplete, a full re-export fixes it. - -## Web UI Integration - -See [[05 - Web UI Design#Transfer Page]] for the export/import wizard UI. Key features: - -- Provider selection checkboxes -- Split size slider/input -- Destination path browser -- Real-time progress bar during archive creation -- Per-archive validation status during import -- Transfer history log - -## Split Archive Implementation Notes - -The custom tar writer (in `internal/engine/export.go`) works like this: - -```go -// Pseudocode -archiveNum := 1 -currentSize := 0 -writer := newArchiveWriter(archiveNum) - -for _, file := range filesToExport { - if currentSize + file.Size > splitSize { - writer.Close() - archiveNum++ - currentSize = 0 - writer = newArchiveWriter(archiveNum) - } - writer.AddFile(file) - currentSize += file.Size -} -writer.Close() -``` - -Each archive is independently decompressible — you don't need all parts to extract any single part. The manifest tells you which files are in which archive. - -This is different from `split` on a single tar (where you need all parts to reconstruct). Our approach means a corrupt archive only affects the files in that archive, not the entire transfer. diff --git a/docs/airgap-project/05 - Web UI Design.md b/docs/airgap-project/05 - Web UI Design.md deleted file mode 100644 index 0d2a10d..0000000 --- a/docs/airgap-project/05 - Web UI Design.md +++ /dev/null @@ -1,194 +0,0 @@ -# Web UI Design - -#airgap #ui #htmx #alpine - -Back to [[00 - Project Index]] | Related: [[02 - Architecture]], [[06 - CLI Reference]] - -## Tech Stack - -- **htmx** — server-driven interactions, no client-side routing -- **Alpine.js** — lightweight reactive state for dropdowns, toggles, modals -- **`html/template`** (Go stdlib) — server-rendered templates with auto-escaping -- **Minimal CSS** — classless base (e.g., Simple.css or Pico.css) + a few custom utility classes -- **No build step** — static assets embedded in the Go binary via `embed.FS` - -All templates live in `internal/ui/templates/`. Static JS/CSS in `internal/ui/static/`. - -## Layout - -Common layout with sidebar navigation: - -``` -┌──────────────────────────────────────────────┐ -│ airgap [status]│ -├────────────┬─────────────────────────────────┤ -│ │ │ -│ Dashboard │ [Page Content] │ -│ Providers │ │ -│ Jobs │ │ -│ Transfer │ │ -│ Settings │ │ -│ │ │ -└────────────┴─────────────────────────────────┘ -``` - -## Pages - -### Dashboard (`/`) - -At-a-glance health for the entire sync ecosystem. - -**Content:** -- Provider status cards — one per enabled provider showing: name, last sync time, file count, total size, status (healthy/warning/error) -- Quick action buttons: "Sync All", "Validate All", "Export" -- Recent activity feed — last 10 sync runs / transfers with timestamps and outcomes - -**htmx patterns:** -- Cards auto-refresh every 30s via `hx-trigger="every 30s"` polling -- "Sync All" button uses `hx-post="/api/sync"` with `hx-swap="none"` (triggers via SSE instead) -- Activity feed uses `hx-get="/partials/activity"` with `hx-trigger="every 10s"` - -### Providers (`/providers`) - -Card grid of all configured providers. - -**Content:** -- Card per provider: name, enabled/disabled toggle, repo count or version list, last sync summary -- Click card → drill into [[#Provider Detail]] -- "Add Provider" button (for custom-files type) - -**htmx patterns:** -- Enable/disable toggle: `hx-patch="/api/providers/{name}" hx-vals='{"enabled": true}'` -- Cards link via standard `` - -### Provider Detail (`/providers/{name}`) - -Deep view into a single provider. - -**Content:** -- Config summary (base URL, versions, ignore patterns) -- "Edit Config" button → inline form (Alpine.js toggle) -- "Sync Now" button → triggers sync with live log streaming -- File browser — sortable table of downloaded files (name, size, checksum, last modified) -- Sync history — table of past sync runs with diffs (added/removed/unchanged counts) - -**htmx patterns:** -- Sync trigger: `hx-post="/api/providers/{name}/sync"` → redirects to SSE log stream -- File browser: paginated with `hx-get="/partials/providers/{name}/files?page=2"` and `hx-swap="innerHTML"` -- Config edit form: `hx-put="/api/providers/{name}/config"` with `hx-target="#config-summary"` - -### Sync Jobs (`/jobs`) - -**Content:** -- Active jobs — currently running syncs with progress indicators -- Scheduled jobs — next scheduled run per provider -- Job history — past runs with status, duration, file counts - -**htmx patterns:** -- Active job progress: SSE connection via `hx-ext="sse" sse-connect="/api/jobs/{id}/stream"` -- Progress bar updates via SSE events with `sse-swap="progress"` - -### Transfer Page (`/transfer`) {#Transfer Page} - -Export and import wizards. - -**Export wizard (tabs or steps):** -1. Select providers (checkboxes) -2. Choose destination path (text input, defaults to config `export.output_dir`) -3. Set split size (input with sensible default from config) -4. Review summary → "Start Export" button -5. Progress tracking — per-archive creation status, overall percentage - -**Import wizard:** -1. Set source path (text input) -2. "Scan" button → reads manifest, shows summary (provider list, file counts, archive count) -3. "Validate" → per-archive checksum status (pass/fail with details) -4. "Import" → extraction progress with per-archive status -5. Final report — files imported, repos rebuilt, any errors - -**htmx patterns:** -- Wizard steps use `hx-get="/partials/transfer/export/step2"` swapping into a target div -- Export/import progress via SSE -- Alpine.js manages wizard step state client-side - -### Settings (`/settings`) - -**Content:** -- Server config (listen address, data dir, db path) — read-only display -- Export defaults (split size, compression, output dir) — editable form -- Schedule config (enabled, cron expression) — editable form -- External tool paths (oc-mirror binary, mirror-registry binary) — editable form -- Log viewer — tail of recent log output, filterable by level - -**htmx patterns:** -- Each config section is a form with `hx-put="/api/settings/{section}"` and `hx-target="this"` (replaces form with success message, then swaps back) -- Log viewer: `hx-get="/partials/logs?level=error&lines=100"` with polling - -## SSE (Server-Sent Events) for Live Streaming - -Long-running operations (sync, export, import) stream progress via SSE: - -``` -GET /api/jobs/{id}/stream -Content-Type: text/event-stream - -event: progress -data: {"percent": 45, "current_file": "epel/9/Packages/ansible-core-2.15.rpm", "speed": "12.5 MB/s"} - -event: progress -data: {"percent": 46, "current_file": "epel/9/Packages/ansible-lint-6.0.rpm", "speed": "11.8 MB/s"} - -event: log -data: {"level": "info", "message": "Downloaded 2145 of 4521 files"} - -event: complete -data: {"status": "success", "duration": "12m34s", "files_downloaded": 4521} -``` - -htmx SSE integration: - -```html -
-
- -
-
- -
-
-``` - -## API Endpoints Summary - -All UI pages are backed by JSON API endpoints that the CLI can also use: - -| Method | Path | Description | -|--------|------|-------------| -| GET | `/api/status` | Overall system status | -| GET | `/api/providers` | List all providers with status | -| GET | `/api/providers/{name}` | Provider detail | -| PATCH | `/api/providers/{name}` | Update provider config | -| POST | `/api/providers/{name}/sync` | Trigger sync | -| GET | `/api/providers/{name}/files` | List provider files | -| GET | `/api/jobs` | List all jobs | -| GET | `/api/jobs/{id}` | Job detail | -| GET | `/api/jobs/{id}/stream` | SSE stream for running job | -| POST | `/api/sync` | Trigger sync for all providers | -| POST | `/api/validate` | Trigger validation | -| POST | `/api/export` | Start export | -| POST | `/api/import` | Start import | -| GET | `/api/transfers` | Transfer history | -| GET | `/api/settings` | Current settings | -| PUT | `/api/settings/{section}` | Update settings | -| GET | `/api/logs` | Recent log entries | - -## Embedded Static Assets - -Using Go's `embed.FS` to bundle everything into the single binary: - -```go -//go:embed templates/* static/* -var uiFS embed.FS -``` - -This means `airgap serve` works with zero external file dependencies — the web UI is compiled into the binary. For development, a `--dev` flag can serve from the filesystem instead for hot-reload. diff --git a/docs/airgap-project/06 - CLI Reference.md b/docs/airgap-project/06 - CLI Reference.md deleted file mode 100644 index dbb8bbc..0000000 --- a/docs/airgap-project/06 - CLI Reference.md +++ /dev/null @@ -1,189 +0,0 @@ -# CLI Reference - -#airgap #cli #cobra - -Back to [[00 - Project Index]] | Related: [[02 - Architecture]], [[05 - Web UI Design]] - -## Overview - -Built with [Cobra](https://github.com/spf13/cobra). Every operation available in the [[05 - Web UI Design|web UI]] is also available via CLI. The CLI is the primary interface for scripting and automation (cron jobs, CI/CD pipelines, etc). - -## Global Flags - -``` ---config string Path to config file (default: search order) ---data-dir string Override data directory ---log-level string Log level: debug, info, warn, error (default: info) ---log-format string Log format: text, json (default: text) ---quiet Suppress non-error output -``` - -## Commands - -### `airgap serve` - -Start the web UI and API server. - -```bash -airgap serve # start on configured listen address -airgap serve --listen 0.0.0.0:9090 # override listen address -airgap serve --dev # serve templates from filesystem (hot-reload) -``` - -Starts the HTTP server, the background scheduler, and the SSE hub. Runs until interrupted (SIGINT/SIGTERM). - -### `airgap sync` - -Trigger synchronization. - -```bash -airgap sync --all # sync all enabled providers -airgap sync --provider epel # sync single provider -airgap sync --provider epel,rhcos # sync multiple providers -airgap sync --provider epel --dry-run # show what would change (runs Plan() only) -``` - -Flags: -- `--all` — sync all enabled providers -- `--provider string` — comma-separated list of provider names -- `--dry-run` — run `Plan()` only, print what would change, don't download -- `--force` — re-download all files regardless of checksum match - -Output (normal): -``` -Syncing epel... - Planning: 4521 files, 2145 new, 0 removed, 2376 unchanged - Downloading: [████████████████████░░░░░] 2145/2145 (12.5 MB/s) - Validating: 4521/4521 OK - Duration: 12m34s - -Syncing ocp_binaries... - Planning: 12 files, 3 new, 0 removed, 9 unchanged - Downloading: [█████████████████████████] 3/3 (45.2 MB/s) - Validating: 12/12 OK - Duration: 1m12s - -All syncs complete. -``` - -Output (dry-run): -``` -[DRY RUN] epel: - Download: 2145 files (17.4 GB) - Delete: 0 files - Skip: 2376 files (unchanged) - -[DRY RUN] ocp_binaries: - Download: 3 files (1.2 GB) - Delete: 0 files - Skip: 9 files (unchanged) -``` - -### `airgap validate` - -Check integrity of all local content. - -```bash -airgap validate --all # validate all providers -airgap validate --provider epel # validate specific provider -``` - -Computes SHA256 of every file in the provider's output directory and compares against the stored manifest. Reports mismatches. - -Output: -``` -Validating epel... - 4521/4521 files OK -Validating ocp_binaries... - 12/12 files OK -Validating rhcos... - [FAIL] rhcos/4.18/latest/rhcos-live.x86_64.iso — expected abc123, got def456 - 5/6 files OK, 1 FAILED - -Validation complete: 1 file failed. Run 'airgap sync --provider rhcos' to re-download. -``` - -### `airgap export` - -Package content for physical media transfer. See [[04 - Transfer Workflow]] for full detail. - -```bash -airgap export --to /mnt/usb # export all providers -airgap export --to /mnt/usb --provider epel,rhcos # export subset -airgap export --to /mnt/usb --split-size 10GB # override split size -airgap export --to /mnt/usb --compression gzip # use gzip instead of zstd -``` - -Flags: -- `--to string` — destination directory (required) -- `--provider string` — comma-separated provider list (default: all enabled) -- `--split-size string` — override archive split size -- `--compression string` — override compression (zstd, gzip) - -### `airgap import` - -Import content from transfer media. See [[04 - Transfer Workflow]] for full detail. - -```bash -airgap import --from /mnt/usb # full import -airgap import --from /mnt/usb --verify-only # just check archive integrity -airgap import --from /mnt/usb --force # skip verification (not recommended) -``` - -Flags: -- `--from string` — source directory containing archives + manifest (required) -- `--verify-only` — validate archive checksums without extracting -- `--force` — skip checksum validation (use when you've already verified) - -### `airgap status` - -Show current sync state summary. - -```bash -airgap status # overview of all providers -airgap status --provider epel # detailed status for one provider -``` - -Output: -``` -Provider Status Last Sync Files Size -───────── ────── ───────── ───── ──── -epel OK 2026-02-18 02:00 4521 17.4 GB -ocp_binaries OK 2026-02-18 02:15 12 1.2 GB -rhcos WARNING 2026-02-18 02:20 6 4.8 GB -containers OK 2026-02-16 02:00 — 22.1 GB -registry RUNNING — — — - -Total: 45.5 GB across 4 providers -Next scheduled sync: 2026-02-23 02:00 -``` - -### `airgap config` - -View and modify configuration. - -```bash -airgap config show # dump effective config as YAML -airgap config show --provider epel # show provider-specific config -airgap config set providers.epel.enabled true # set a value -airgap config set export.split_size "50GB" # change export defaults -``` - -## Exit Codes - -| Code | Meaning | -|------|---------| -| 0 | Success | -| 1 | General error | -| 2 | Config error (file not found, parse failure) | -| 3 | Sync failure (some files failed after all retries) | -| 4 | Validation failure (checksum mismatches found) | -| 5 | Export/import failure (archive creation or extraction failed) | - -## Shell Completion - -```bash -airgap completion bash > /etc/bash_completion.d/airgap -airgap completion zsh > "${fpath[1]}/_airgap" -airgap completion fish > ~/.config/fish/completions/airgap.fish -``` diff --git a/docs/airgap-project/07 - Implementation Phases.md b/docs/airgap-project/07 - Implementation Phases.md deleted file mode 100644 index dd07a0a..0000000 --- a/docs/airgap-project/07 - Implementation Phases.md +++ /dev/null @@ -1,186 +0,0 @@ -# Implementation Phases - -#airgap #roadmap #phases - -Back to [[00 - Project Index]] | Related: [[02 - Architecture]], [[08 - Migration Path]] - -## Overview - -Six phases over approximately 12 weeks. Each phase produces a working, testable increment. The app is usable from the CLI after Phase 1. - ---- - -## Phase 1: Foundation (Weeks 1–2) - -**Goal:** Working CLI that can sync OCP binaries and RHCOS images — proving the core architecture with code you already have. - -### Deliverables - -- [ ] Project scaffolding: Go module (`github.com/BadgerOps/airgap`), Makefile, Containerfile -- [ ] `internal/config/` — YAML config loader with validation -- [ ] `internal/store/` — SQLite schema, migrations, basic CRUD for sync_runs and file_inventory -- [ ] `internal/download/` — HTTP client with retry, exponential backoff, jitter, HTTP Range resume, streaming SHA256 validation -- [ ] `internal/download/pool.go` — concurrent download worker pool (configurable goroutine count) -- [ ] `internal/provider/provider.go` — Provider interface definition -- [ ] `internal/provider/ocp/binaries.go` — OCP binaries provider (ported from [[01 - Existing Repos Audit#ocpsync|ocpsync]]) -- [ ] `internal/provider/ocp/rhcos.go` — RHCOS provider (ported from ocpsync) -- [ ] `internal/engine/sync.go` — SyncManager that orchestrates Plan → Sync → Report -- [ ] `cmd/airgap/main.go` — Cobra root command + `sync` subcommand -- [ ] `configs/airgap.example.yaml` — example config -- [ ] Unit tests for config loading, download client, checksum validation - -### Key Decisions in This Phase - -- Confirm `chi` vs stdlib router (can defer since no HTTP server yet) -- Confirm `modernc.org/sqlite` vs `mattn/go-sqlite3` (prefer pure Go) -- Set up CI (GitHub Actions) for build + test - -### Done When - -`airgap sync --provider ocp_binaries` downloads OCP client binaries with retry and checksum validation, matching the behavior of the original `ocpsync`. - ---- - -## Phase 2: EPEL Provider + Validation (Weeks 3–4) - -**Goal:** Port the EPEL sync logic from Python to Go, add validation engine, begin web UI. - -### Deliverables - -- [ ] `internal/provider/epel/epel.go` — EPEL provider: - - repomd.xml parsing via `encoding/xml` - - primary.xml.gz decompression and parsing - - Checksum-based dedup (same algorithm as [[01 - Existing Repos Audit#epel-offline-sync|epel-offline-sync]]) - - `cleanup_removed_packages` capability (fixes known limitation) -- [ ] `internal/provider/epel/epel_test.go` — tests with fixture XML files -- [ ] `internal/engine/validate.go` — validation engine (runs Validate() across providers) -- [ ] `airgap validate` CLI command -- [ ] `internal/api/router.go` — basic chi router setup -- [ ] `internal/ui/templates/layout.html` — base layout with nav sidebar -- [ ] `internal/ui/templates/dashboard.html` — dashboard skeleton -- [ ] `internal/ui/templates/providers.html` — provider list -- [ ] `airgap serve` CLI command (starts web server) -- [ ] Embed static assets via `embed.FS` - -### Done When - -`airgap sync --provider epel` mirrors an EPEL repo with full dedup. `airgap validate --all` checks all local content. `airgap serve` shows a working (if minimal) dashboard. - ---- - -## Phase 3: Export/Import Engine (Weeks 5–6) - -**Goal:** The [[04 - Transfer Workflow]] — the key differentiator of this tool. - -### Deliverables - -- [ ] `internal/engine/export.go`: - - Split tar writer with configurable size boundary - - zstd compression (via `github.com/klauspost/compress/zstd`) - - Transfer manifest JSON generation - - Per-archive SHA256 sidecar files - - TRANSFER-README.txt generation -- [ ] `internal/engine/import.go`: - - Manifest parsing and archive presence verification - - Per-archive SHA256 validation - - Partial re-transfer support (skip already-validated archives) - - Content extraction with provider directory structure preservation - - RPM repo metadata rebuild (invoke `createrepo_c`) -- [ ] `airgap export` CLI command -- [ ] `airgap import` CLI command -- [ ] `internal/ui/templates/transfer.html` — export/import wizard -- [ ] Integration test: full round-trip (sync → export → import → validate) - -### Done When - -You can sync on Machine A, export to a directory, copy that directory, import on Machine B, and all content validates. Corrupt archive detection works. - ---- - -## Phase 4: External Tool Wrappers (Weeks 7–8) - -**Goal:** Container image and registry management via oc-mirror and mirror-registry. - -### Deliverables - -- [ ] `internal/provider/containers/ocmirror.go`: - - Wraps `oc-mirror` CLI binary - - Generates/manages ImageSetConfiguration YAML - - Captures stdout/stderr, parses progress - - Maps oc-mirror output into Provider interface (Plan/Sync/Validate) -- [ ] `internal/provider/containers/registry.go`: - - Wraps `mirror-registry` CLI binary - - install/upgrade/uninstall/status commands - - Health check (is Quay running? Redis? Postgres?) -- [ ] `internal/engine/scheduler.go`: - - Cron-based recurring sync jobs - - SQLite-persisted schedule (survives restarts) - - Per-provider schedule overrides -- [ ] `internal/ui/templates/jobs.html` — job management page -- [ ] Container image and registry management in web UI - -### Done When - -`airgap sync --provider container_images` wraps oc-mirror successfully. Scheduled syncs run on cron. Registry can be deployed and health-checked through the tool. - ---- - -## Phase 5: Web UI Polish + Custom Provider (Weeks 9–10) - -**Goal:** Complete, polished web UI with all features. Custom file provider for arbitrary sources. - -### Deliverables - -- [ ] Complete all web UI pages from [[05 - Web UI Design]]: - - Dashboard with auto-refreshing provider cards - - Provider detail with file browser, sync history, inline config editing - - Job management with progress bars - - Transfer wizard with step-by-step flow - - Settings page with config editing -- [ ] SSE integration for live log streaming during sync/export/import -- [ ] `internal/provider/custom/files.go`: - - Generic HTTP/S3 file sync - - Configurable checksum URL or per-file checksums - - Supports any URL pattern -- [ ] Log viewer page -- [ ] Dead letter queue UI (view failed files, retry individual or all) -- [ ] Error handling and user-facing error messages throughout UI - -### Done When - -The web UI is the primary admin interface — an operator can manage everything from the browser. Custom provider can sync arbitrary files. - ---- - -## Phase 6: Hardening (Weeks 11–12) - -**Goal:** Production-ready with tests, docs, and performance validation. - -### Deliverables - -- [ ] Comprehensive test suite: - - Unit tests for every provider, engine component, config loader - - Integration tests: full sync → export → import round-trips - - Mock HTTP server for testing download client without network -- [ ] Containerfile optimization: - - Multi-stage build (builder + runtime) - - Include oc-mirror and mirror-registry binaries - - Rootless container support -- [ ] Documentation: - - README.md — quick start, installation, basic usage - - User guide — detailed walkthrough of all features - - Admin guide — deployment, configuration, troubleshooting - - Transfer workflow guide — step-by-step with screenshots -- [ ] TRANSFER-README.txt template refinement -- [ ] Performance testing: - - Large EPEL repo (thousands of RPMs) - - Large RHCOS images (1GB+ per file) - - Export/import of 100GB+ datasets - - Concurrent download tuning -- [ ] Shell completion scripts (bash, zsh, fish) -- [ ] Makefile targets: `build`, `test`, `lint`, `container`, `release` -- [ ] GitHub Actions CI: build, test, lint, container build - -### Done When - -CI is green. Tests cover critical paths. Container builds and runs. An operator can follow the docs from zero to working disconnected sync. diff --git a/docs/airgap-project/08 - Migration Path.md b/docs/airgap-project/08 - Migration Path.md deleted file mode 100644 index 7486665..0000000 --- a/docs/airgap-project/08 - Migration Path.md +++ /dev/null @@ -1,178 +0,0 @@ -# Migration Path - -#airgap #migration - -Back to [[00 - Project Index]] | Related: [[01 - Existing Repos Audit]], [[02 - Architecture]] - -## Overview - -How each existing BadgerOps repo maps into the unified `airgap` codebase. Two repos get ported (code migrated), two get wrapped (external tool invocation), one is conceptually absorbed, and one is deferred. - -## ocpsync → `internal/provider/ocp/` - -**Migration type:** Direct port (Go → Go, restructured) - -**What changes:** -- `main.go` logic splits into `binaries.go` (OCP clients) and `rhcos.go` (RHCOS images) -- Both implement the `Provider` interface: `Plan()`, `Sync()`, `Validate()` -- Config moves from standalone `config.yaml` to the `providers.ocp_binaries` and `providers.rhcos` sections of `airgap.yaml` (see [[03 - Configuration#From ocpsync config.yaml]]) -- `downloadFile()` → replaced by shared `internal/download/client.go` -- `validateFile()` → replaced by shared checksum logic in download client -- `generateFileList()` → becomes part of `Plan()` -- `downloadHandler()` → becomes `Sync()` -- Logging migrates from `logrus` to `slog` - -**What stays the same:** -- SHA256 checksum manifest parsing logic -- Ignore-list filtering patterns -- Exponential backoff retry (now in shared download client) - -**Effort:** Low — the Go code already exists, it's a structural refactor into the provider interface. - -**Phase:** [[07 - Implementation Phases#Phase 1 Foundation Weeks 1–2|Phase 1]] - ---- - -## epel-offline-sync → `internal/provider/epel/` - -**Migration type:** Language port (Python → Go) - -**What changes:** -- `config.ini` parsing → YAML config via `providers.epel` section -- `xml.etree.ElementTree` → Go's `encoding/xml` -- `gzip` decompression of `primary.xml.gz` → Go's `compress/gzip` -- Python's `hashlib.sha256` → Go's `crypto/sha256` -- Python's `ThreadPoolExecutor` → Go goroutine worker pool (`internal/download/pool.go`) -- `os.path` operations → Go's `filepath` package - -**What stays the same:** -- Algorithm: fetch repomd.xml → parse for primary.xml location → download primary.xml.gz → decompress → parse package list → diff against local → download new/changed → validate -- Checksum-based dedup logic (compare local hash vs manifest hash) - -**What's new:** -- `cleanup_removed_packages` — detect packages in local dir that are no longer in the upstream primary.xml and remove them. This fixes the known limitation noted in the original README. -- Package count is tracked in SQLite for history/reporting - -**Key parsing detail to preserve:** - -The repomd.xml structure (simplified): -```xml - - - - abc123 - - -``` - -The primary.xml structure (simplified): -```xml - - - ansible-core - - def456 - - - - -``` - -Go structs for XML parsing: -```go -type RepoMD struct { - XMLName xml.Name `xml:"repomd"` - Data []RepoMDData `xml:"data"` -} - -type RepoMDData struct { - Type string `xml:"type,attr"` - Location RepoMDLocation `xml:"location"` - Checksum RepoMDChecksum `xml:"checksum"` -} - -type PrimaryMetadata struct { - XMLName xml.Name `xml:"metadata"` - Packages []Package `xml:"package"` -} - -type Package struct { - Name string `xml:"name"` - Checksum PackageChecksum `xml:"checksum"` - Location PackageLocation `xml:"location"` - Size PackageSize `xml:"size"` -} -``` - -**Effort:** Medium — algorithmic port is straightforward, but XML parsing details need care. - -**Phase:** [[07 - Implementation Phases#Phase 2 EPEL Provider Validation Weeks 3–4|Phase 2]] - ---- - -## mirror-registry → `internal/provider/containers/registry.go` - -**Migration type:** External tool wrapper - -**What happens:** The `mirror-registry` binary is invoked via `os/exec`. We don't port any of its Go or Ansible code. The binary path is configured in `airgap.yaml`. - -**Wrapper responsibilities:** -- `install` — invoke `mirror-registry install` with appropriate flags, capture output -- `upgrade` — invoke `mirror-registry upgrade` -- `status` — check if Quay, Redis, Postgres are running (via Podman inspection or HTTP health check) -- `uninstall` — invoke `mirror-registry uninstall` -- Map stdout/stderr into structured log entries -- Detect errors in exit codes and output parsing - -**Why wrap instead of reimplement:** See [[09 - Technical Decisions#Wrap vs Reimplement External Tools]]. Summary: mirror-registry's Ansible playbooks handle complex Quay deployment logic that would be expensive to rewrite and keep in sync with upstream. - -**Effort:** Low — thin exec wrapper with output parsing. - -**Phase:** [[07 - Implementation Phases#Phase 4 External Tool Wrappers Weeks 7–8|Phase 4]] - ---- - -## oc-mirror → `internal/provider/containers/ocmirror.go` - -**Migration type:** External tool wrapper - -**What happens:** Same pattern as mirror-registry. The `oc-mirror` binary is invoked via `os/exec`. - -**Wrapper responsibilities:** -- Generate or manage `ImageSetConfiguration` YAML based on airgap config -- Invoke `oc-mirror --config imageset-config.yaml file:///output-dir` -- Parse oc-mirror output for progress reporting -- Map oc-mirror's differential update model into our Plan/Sync interface -- Handle oc-mirror's own archive format and convert/integrate with our export engine - -**Complexity note:** oc-mirror has its own concept of mirroring to disk (`file://` backend) which overlaps with our export engine. The wrapper needs to manage this carefully — we use oc-mirror's disk output as the provider's content, then our export engine packages it alongside RPMs and binaries. - -**Effort:** Medium — oc-mirror's output format and configuration model add complexity. - -**Phase:** [[07 - Implementation Phases#Phase 4 External Tool Wrappers Weeks 7–8|Phase 4]] - ---- - -## dlserver → absorbed into web UI + API - -**Migration type:** Conceptual replacement - -**What carries forward:** -- The idea of a REST API for job management -- SQLite-backed persistence -- CORS-enabled endpoints for web UI consumption - -**What's different:** -- Full web UI instead of bare API -- Rich job model (sync, export, import) instead of generic "download jobs" -- SSE for real-time progress instead of polling - -**Effort:** None — dlserver code is not used, but its design intent is realized in the unified app. - ---- - -## rpm-builder → deferred - -**What happens:** Out of scope for v1. Could become a provider in a future version if there's a need to build custom RPMs in the disconnected environment. - -Potential v2 use case: build custom RPMs on Machine A, include them in the export alongside EPEL packages, and publish them as a separate repo on Machine B. diff --git a/docs/airgap-project/09 - Technical Decisions.md b/docs/airgap-project/09 - Technical Decisions.md deleted file mode 100644 index 14317e1..0000000 --- a/docs/airgap-project/09 - Technical Decisions.md +++ /dev/null @@ -1,212 +0,0 @@ -# Technical Decisions - -#airgap #adr #decisions - -Back to [[00 - Project Index]] | Related: [[02 - Architecture]] - -## Decision Log - -Architecture Decision Records (ADR) style — each decision records the context, options considered, choice made, and rationale. - ---- - -### ADR-001: Language — Go - -**Context:** Existing repos are split between Python (epel-offline-sync) and Go (ocpsync, dlserver, rpm-builder). Need a single language for the unified app. - -**Options:** -1. Go -2. Python -3. Rust - -**Decision:** Go - -**Rationale:** -- 4 of 6 existing repos are Go — majority of code already exists in Go -- Single binary distribution — critical for air-gapped environments where package managers may not be available -- Excellent concurrency primitives (goroutines) for parallel downloads -- Strong stdlib: `net/http`, `html/template`, `encoding/xml`, `crypto/sha256`, `archive/tar` -- `embed.FS` lets us bundle the web UI into the binary -- The EPEL Python code is algorithmically simple enough that porting to Go is low-risk - ---- - -### ADR-002: Web Framework — `net/http` + chi router - -**Context:** Need HTTP server for web UI and REST API. - -**Options:** -1. `net/http` + `chi` router -2. Gin -3. Echo -4. Fiber - -**Decision:** `net/http` + `chi` - -**Rationale:** -- chi is stdlib-compatible (`http.Handler` interface) — no framework lock-in -- Lightweight: adds routing and middleware, nothing else -- Middleware composability (logging, recovery, CORS) without magic -- No reflection-based binding or code generation -- Widely used in the Go community, well-maintained - ---- - -### ADR-003: Frontend — htmx + Alpine.js - -**Context:** Need a web UI for admin operations. The user specifically wants htmx + Alpine.js. - -**Options:** -1. htmx + Alpine.js (server-rendered) -2. React SPA -3. Vue SPA -4. Svelte SPA - -**Decision:** htmx + Alpine.js - -**Rationale:** -- No build step, no node_modules, no JavaScript toolchain -- Server-rendered HTML via Go's `html/template` — keeps all logic in Go -- htmx handles dynamic interactions (AJAX, SSE, polling) with HTML attributes -- Alpine.js handles client-side state for dropdowns, modals, toggles -- Templates embedded in the binary via `embed.FS` — zero external dependencies -- Perfect for a tool that runs in disconnected environments where you can't `npm install` - ---- - -### ADR-004: Database — SQLite (pure Go) - -**Context:** Need persistent storage for job state, sync history, file inventory. - -**Options:** -1. `modernc.org/sqlite` (pure Go, no CGO) -2. `mattn/go-sqlite3` (CGO-based) -3. PostgreSQL -4. BoltDB / bbolt - -**Decision:** `modernc.org/sqlite` (pure Go) - -**Rationale:** -- Zero external dependencies — no C compiler or shared libraries needed -- Single file database — trivial to back up, move, or reset -- Perfect for single-node tools (this isn't a distributed system) -- Full SQL support for complex queries (join sync_runs with file_inventory, etc.) -- Pure Go means cross-compilation works without CGO headaches -- Tradeoff: slightly slower than CGO sqlite3, but this is a management tool, not a high-throughput database - ---- - -### ADR-005: CLI Framework — Cobra - -**Context:** Need a CLI framework for structured commands and flags. - -**Decision:** Cobra - -**Rationale:** Industry standard for Go CLIs. Used by kubectl, docker, gh, hugo, and nearly every other major Go CLI tool. Built-in shell completion, help generation, and flag parsing. Your existing familiarity from other Go projects. - ---- - -### ADR-006: Compression — zstd (default), gzip (option) - -**Context:** Export archives need compression for physical media transfer. Multi-GB datasets need fast compression. - -**Options:** -1. zstd -2. gzip -3. lz4 -4. xz - -**Decision:** zstd default, gzip as fallback - -**Rationale:** -- zstd is 3-5x faster than gzip at similar compression ratios -- For 100GB+ datasets, this is the difference between hours and minutes -- `github.com/klauspost/compress/zstd` is a mature, pure-Go implementation -- gzip option for environments that don't have zstd tooling (though our binary handles decompression) -- xz has better ratios but is 10x slower — not worth it for this use case -- lz4 is faster but worse ratios — the transfer media has finite space - ---- - -### ADR-007: Archive Strategy — Independent Split Archives - -**Context:** Need to split large exports across multiple archive files for media transfer. See [[04 - Transfer Workflow]]. - -**Options:** -1. Custom split tar writer (each archive is independently decompressible) -2. Single tar piped through `split` (all parts needed to reconstruct) -3. One archive per provider - -**Decision:** Custom split tar writer - -**Rationale:** -- Each archive is independently extractable — a corrupt part only affects files in that part -- With `split`, a corrupt part makes the entire archive unrecoverable -- One-per-provider doesn't help with the split size problem (EPEL can be 100GB+) -- The custom writer tracks byte counts and rolls to a new file at the boundary -- The manifest records which files are in which archive, enabling targeted re-transfer - ---- - -### ADR-008: Wrap vs Reimplement External Tools {#Wrap vs Reimplement External Tools} - -**Context:** oc-mirror and mirror-registry are complex upstream tools. Should we wrap them or reimplement their functionality? - -**Options:** -1. Wrap as external tool invocations -2. Reimplement core functionality in Go -3. Fork and embed as Go libraries - -**Decision:** Wrap as external tools - -**Rationale:** -- oc-mirror handles OCI image mirroring, operator catalog parsing, Helm chart handling — reimplementing this is months of work -- mirror-registry uses Ansible playbooks for Quay deployment — reimplementing Quay deployment logic is complex and brittle -- Wrapping means we get upstream updates for free (just update the binary) -- Clean separation of concerns — we handle sync orchestration, they handle domain-specific operations -- Binary path is configurable — easy to test with different versions -- Tradeoff: external dependency on these binaries existing on the system -- Mitigation: our Containerfile bundles them, and `airgap status` checks for their presence - ---- - -### ADR-009: Config Format — YAML - -**Context:** Need a config file format for the unified app. - -**Options:** -1. YAML -2. TOML -3. JSON -4. INI - -**Decision:** YAML - -**Rationale:** -- ocpsync already uses YAML — familiar -- Standard in the Kubernetes/OpenShift ecosystem -- Supports comments (unlike JSON) -- Deeply nested config is readable (unlike INI) -- `gopkg.in/yaml.v3` is mature and widely used -- The oc-mirror ImageSetConfiguration is also YAML — consistency - ---- - -### ADR-010: Logging — slog (stdlib) - -**Context:** Need structured logging. ocpsync uses logrus. - -**Options:** -1. `log/slog` (Go stdlib, 1.21+) -2. logrus -3. zap -4. zerolog - -**Decision:** `slog` - -**Rationale:** -- Part of Go stdlib since 1.21 — no external dependency -- Structured logging with key-value pairs -- Pluggable handlers (text for CLI, JSON for machine parsing) -- Replaces logrus which is in maintenance mode -- Good enough for a single-binary tool — we don't need zap's extreme performance diff --git a/docs/airgap-project/10 - Retry and Resilience.md b/docs/airgap-project/10 - Retry and Resilience.md deleted file mode 100644 index 8993b5e..0000000 --- a/docs/airgap-project/10 - Retry and Resilience.md +++ /dev/null @@ -1,167 +0,0 @@ -# Retry and Resilience - -#airgap #reliability #downloads - -Back to [[00 - Project Index]] | Related: [[02 - Architecture]], [[09 - Technical Decisions]] - -## Overview - -Download reliability is critical — EPEL repos have thousands of packages, RHCOS images are 1GB+, and the tool runs unattended on a schedule. Building on patterns already proven in [[01 - Existing Repos Audit#ocpsync|ocpsync]] (exponential backoff) and [[01 - Existing Repos Audit#epel-offline-sync|epel-offline-sync]] (concurrent downloads), the unified download client in `internal/download/` provides a robust foundation for all providers. - -## Download Client (`internal/download/client.go`) - -### Exponential Backoff with Jitter - -Carried forward from ocpsync's retry logic, enhanced with jitter to prevent thundering herd: - -```go -func backoff(attempt int) time.Duration { - base := time.Second * time.Duration(math.Pow(2, float64(attempt))) - jitter := time.Duration(rand.Int63n(int64(base / 2))) - return base + jitter -} -``` - -Retry schedule (approximate): 1s → 2-3s → 4-6s (with jitter). Default 3 attempts per file, configurable per provider via `retry_attempts` in [[03 - Configuration|config]]. - -### Resumable Downloads (HTTP Range) - -For large files (RHCOS ISOs, QCOW2 images), if a download is interrupted mid-stream: - -1. Check if a partial file exists locally -2. Send `Range: bytes={partial_size}-` header -3. If server responds `206 Partial Content`, resume from where we left off -4. If server responds `200 OK` (doesn't support Range), re-download from scratch -5. Validate final SHA256 against expected checksum - -This saves significant time and bandwidth for 1GB+ files on flaky connections. - -### Streaming Checksum Validation - -Instead of downloading the entire file and then computing its hash, we compute the SHA256 incrementally during the download using an `io.TeeReader`: - -```go -hash := sha256.New() -reader := io.TeeReader(resp.Body, hash) -io.Copy(file, reader) -computed := hex.EncodeToString(hash.Sum(nil)) -``` - -If the checksum fails, the file is removed immediately — no disk space wasted on corrupt downloads. - -### HTTP Client Configuration - -```go -client := &http.Client{ - Timeout: 30 * time.Minute, // generous for large files - Transport: &http.Transport{ - MaxIdleConns: 100, - MaxIdleConnsPerHost: 10, - IdleConnTimeout: 90 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - }, -} -``` - -Timeout is per-request, not per-byte. For 1GB files on slow connections, 30 minutes is reasonable. The download client also monitors throughput and logs warnings if speed drops below a configurable threshold. - -## Worker Pool (`internal/download/pool.go`) - -Concurrent download orchestration using Go channels: - -```go -type Pool struct { - workers int - jobs chan DownloadJob - results chan DownloadResult - wg sync.WaitGroup -} -``` - -- Workers pull jobs from a channel, download with retry, push results -- Configurable per provider: `max_concurrent_downloads` (default 8 for EPEL, lower for large files) -- Progress reporting via callback function (used by both CLI progress bars and SSE streaming) -- Graceful shutdown on context cancellation - -### Concurrency Guidelines - -| Provider | Recommended Workers | Rationale | -|----------|-------------------|-----------| -| EPEL | 8-16 | Many small files (RPMs are typically <50MB) | -| OCP binaries | 2-4 | Fewer, larger files | -| RHCOS | 1-2 | Very large files (1GB+), server may rate-limit | -| Custom files | 4-8 | Depends on source server capacity | - -## Dead Letter Queue - -Files that fail after all retries go to the dead letter queue, stored in the `failed_files` SQLite table: - -```sql -CREATE TABLE failed_files ( - id INTEGER PRIMARY KEY, - provider TEXT NOT NULL, - file_path TEXT NOT NULL, - url TEXT NOT NULL, - expected_checksum TEXT, - error TEXT NOT NULL, - retry_count INTEGER DEFAULT 0, - first_failure DATETIME NOT NULL, - last_failure DATETIME NOT NULL, - resolved BOOLEAN DEFAULT FALSE -); -``` - -### Dead Letter Queue Operations - -**CLI:** -```bash -airgap status --failed # list all failed files -airgap sync --retry-failed # retry all failed files -airgap sync --retry-failed --provider epel # retry failed for one provider -``` - -**Web UI:** -The [[05 - Web UI Design#Dashboard|dashboard]] shows a warning badge when the dead letter queue is non-empty. The provider detail page lists failed files with per-file retry buttons. - -### Automatic Resolution - -When a subsequent sync run successfully downloads a file that was previously in the dead letter queue, the queue entry is automatically marked `resolved = TRUE`. This handles transient server issues without admin intervention. - -## Sync Recovery - -### Interrupted Sync - -If the process is killed mid-sync (power failure, OOM, etc.): - -1. Partially downloaded files are left on disk (incomplete) -2. On next sync, `Plan()` checks each file's checksum against the manifest -3. Incomplete files fail checksum → scheduled for re-download -4. Complete files pass checksum → skipped -5. Net effect: the sync resumes where it left off - -No special recovery logic needed — the checksum-based diff naturally handles it. - -### Corrupted State DB - -If `airgap.db` is corrupted: - -1. Delete the DB file -2. Run `airgap sync --all` — the sync rebuilds the file inventory by scanning the data directory and re-downloading any missing files -3. The tool is designed so that the filesystem is the source of truth, not the database. The DB is an acceleration layer (avoid re-scanning manifests), not the authoritative state. - -## Transfer Resilience - -See [[04 - Transfer Workflow]] for details on per-archive checksums and partial re-transfer. The key resilience properties: - -- Each archive is independently valid (no cross-archive dependencies) -- Per-archive SHA256 catches corrupt copies before extraction -- Only corrupt archives need re-transfer — not the entire dataset -- The manifest is duplicated (standalone file + inside archive 001) for redundancy - -## Monitoring and Alerting (Future) - -Post-v1 considerations: - -- Prometheus metrics endpoint (`/metrics`) for sync duration, file counts, failure rates -- Webhook notifications on sync failure (email, Slack, PagerDuty) -- Health check endpoint for monitoring systems diff --git a/docs/airgap-project/11 - Development Setup.md b/docs/airgap-project/11 - Development Setup.md deleted file mode 100644 index b8b3e4c..0000000 --- a/docs/airgap-project/11 - Development Setup.md +++ /dev/null @@ -1,179 +0,0 @@ -# Development Setup - -#airgap #nix #dev #testing - -Back to [[00 - Project Index]] | Related: [[09 - Technical Decisions]] - -## Prerequisites - -- [Nix](https://nixos.org/download.html) with flakes enabled -- Git - -## Getting Started - -```bash -# Enter the dev shell (installs Go, tools, everything) -nix develop - -# First time: resolve all Go dependencies -go mod tidy - -# Build -go build ./cmd/airgap - -# Test -go test ./... - -# Test verbose -go test -v ./... - -# Test with coverage -go test -cover ./... -``` - -## Nix Flake - -The project uses a Nix flake (`flake.nix`) for reproducible development environments. It provides: - -### `nix develop` — Dev Shell - -Everything you need to build, test, lint, and run the project: - -- **Go 1.23** — compiler and toolchain -- **gopls** — Go language server -- **gotools** — goimports, gorename, etc. -- **staticcheck** — static analysis -- **delve** — debugger -- **golangci-lint** — linter aggregator -- **podman** — container builds -- **skopeo** — container image inspection -- **zstd** — compression for export engine -- **createrepo_c** — RPM repo metadata generation -- **jq, yq, curl, sqlite** — general utilities - -### `nix build` — Build Binary - -Produces the `airgap` binary in `./result/bin/airgap`. - -### `nix build .#container` — Container Image - -Builds a minimal OCI container image with: -- The `airgap` binary -- TLS certificates -- zstd, createrepo_c, sqlite, coreutils, bash -- Exposes port 8080 -- Volumes for `/var/lib/airgap` and `/mnt/transfer-disk` - -## Makefile Targets - -```bash -make build # go build ./cmd/airgap -make test # go test ./... -make test-verbose # go test -v ./... -make test-coverage # go test -coverprofile=coverage.out ./... -make lint # golangci-lint run -make clean # remove build artifacts -make container # podman build -make all # lint + test + build -``` - -## Test Suite - -Tests are organized by package, matching the production code structure: - -| Package | Test File | Tests | Lines | Model Used | -|---------|-----------|-------|-------|------------| -| `config` | `config_test.go` | 13 | 596 | Haiku | -| `provider` | `provider_test.go` | 10 | 365 | Haiku | -| `download` | `client_test.go` | 13 | 530 | Sonnet | -| `download` | `pool_test.go` | 10 | 464 | Sonnet | -| `store` | `sqlite_test.go` | 40 | 1,472 | Sonnet | -| `engine` | `sync_test.go` | 17 | 1,053 | Sonnet | -| `provider/ocp` | `ocp_test.go` | 17 | 965 | Sonnet | -| **Total** | **7 files** | **120** | **5,445** | | - -### Test Tier Strategy - -Tests were written using different AI models based on complexity: - -- **Haiku** — simple/mechanical tests (config parsing, registry CRUD, type assertions). Fast and cheap for straightforward test patterns. -- **Sonnet** — tests requiring understanding of complex logic (HTTP mocking, concurrent download pools, database integration, sync orchestration). Worth the cost for getting mock servers, race conditions, and integration patterns right. - -### Running Specific Tests - -```bash -# All tests -go test ./... - -# Specific package -go test -v ./internal/config/... -go test -v ./internal/store/... -go test -v ./internal/download/... -go test -v ./internal/engine/... -go test -v ./internal/provider/ocp/... - -# Specific test -go test -v -run TestSyncProviderDryRun ./internal/engine/... - -# With race detector -go test -race ./... - -# Coverage report -go test -coverprofile=coverage.out ./... -go tool cover -html=coverage.out -``` - -## First Build Checklist - -After cloning and entering `nix develop`: - -```bash -# 1. Resolve dependencies -go mod tidy - -# 2. Verify everything compiles -go build ./... - -# 3. Run tests -go test ./... - -# 4. Build the binary -go build -o airgap ./cmd/airgap - -# 5. Verify it runs -./airgap --help -./airgap status -``` - -## CI/CD - -The Makefile provides everything needed for GitHub Actions: - -```yaml -# .github/workflows/ci.yml -name: CI -on: [push, pull_request] -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: cachix/install-nix-action@v27 - - run: nix develop --command bash -c "go mod tidy && make all" -``` - -Or without Nix: - -```yaml -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version: '1.23' - - run: go mod tidy - - run: go test ./... - - run: go build ./cmd/airgap -``` diff --git a/docs/airgap-project/12 - Foundation Setup.md b/docs/airgap-project/12 - Foundation Setup.md deleted file mode 100644 index 37f0e2b..0000000 --- a/docs/airgap-project/12 - Foundation Setup.md +++ /dev/null @@ -1,87 +0,0 @@ -# Foundation Setup - -> Subagent output from Phase 1 initial setup. Preserved for posterity. - -## Source Files -- `SETUP_COMPLETE.txt` -- `PROJECT_SETUP.md` -- `FILE_MANIFEST.txt` - ---- - -## Setup Summary - -**Status:** Complete -**Module:** `github.com/BadgerOps/airgap` -**Go Version:** 1.23 (upgraded from initial 1.21) - -### Foundation Components Created - -| Component | Location | Lines | -|-----------|----------|-------| -| Provider Interface | `internal/provider/provider.go` | 140 | -| Configuration System | `internal/config/config.go` | 199 | -| Database Models | `internal/store/models.go` | 72 | -| Entry Point | `cmd/airgap/main.go` | 7 | - -### Initial Directory Structure - -``` -airgap/ -├── cmd/airgap/ -│ └── main.go -├── internal/ -│ ├── config/config.go -│ ├── engine/ -│ ├── provider/ -│ │ ├── provider.go -│ │ ├── epel/ -│ │ ├── ocp/ -│ │ ├── containers/ -│ │ └── custom/ -│ ├── store/models.go -│ ├── download/ -│ └── ui/ -├── configs/ -├── go.mod -└── go.sum -``` - -### Key Design Decisions Made - -1. **Provider Pattern** - Clean interface for pluggable content sources -2. **Plan-Execute Separation** - Always preview changes before applying -3. **Type-Safe Configuration** - Generics for provider-specific config unmarshaling -4. **Unified Persistence** - Single SQLite database for all state -5. **Dead Letter Queue** - Failed file tracking with automatic retry analysis -6. **Concurrent Operations** - Configurable worker pools per provider - -### Code Statistics at Foundation - -- `provider.go`: 140 lines -- `config.go`: 199 lines -- `models.go`: 72 lines -- `main.go`: 7 lines -- **Total core code:** 427 lines - ---- - -## File Manifest (at time of setup) - -### Core Source Files -- `go.mod` — Module definition with dependencies -- `go.sum` — Dependency checksums -- `cmd/airgap/main.go` — Application entry point -- `internal/config/config.go` — Configuration system -- `internal/provider/provider.go` — Provider interface and registry -- `internal/store/models.go` — Database model definitions - -### Documentation Files -- `README.md` — Project overview -- `ARCHITECTURE.md` — System design document -- `PROJECT_SETUP.md` — Setup instructions -- `FILE_MANIFEST.txt` — File inventory - ---- - -*This note consolidates: SETUP_COMPLETE.txt, PROJECT_SETUP.md, FILE_MANIFEST.txt* diff --git a/docs/airgap-project/13 - Architecture Reference.md b/docs/airgap-project/13 - Architecture Reference.md deleted file mode 100644 index aa3b301..0000000 --- a/docs/airgap-project/13 - Architecture Reference.md +++ /dev/null @@ -1,72 +0,0 @@ -# Architecture Reference - -> Subagent output from Phase 1 architecture design. Preserved for posterity. - -## Source File -- `ARCHITECTURE.md` - ---- - -## System Overview - -The airgap tool is a unified solution for managing offline/disconnected OpenShift environments. It consolidates multiple standalone scripts into a single Go binary with a provider-based plugin architecture. - -## Core Architecture - -### Provider Plugin Pattern - -All content sources implement a common `Provider` interface: - -```go -type Provider interface { - Name() string - Configure(cfg ProviderConfig) error - Plan(ctx context.Context, opts SyncOptions) (*SyncPlan, error) - Sync(ctx context.Context, plan *SyncPlan, opts SyncOptions) (*SyncReport, error) - Validate(ctx context.Context) (*ValidationReport, error) -} -``` - -### Component Layers - -``` -┌─────────────────────────────────────────┐ -│ CLI (Cobra) │ -├─────────────────────────────────────────┤ -│ HTTP Server (htmx) │ -├─────────────────────────────────────────┤ -│ Sync Engine (orchestration) │ -├─────────────────────────────────────────┤ -│ Provider Registry (plugin discovery) │ -├──────┬──────┬──────┬──────┬────────────┤ -│ EPEL │ OCP │RHCOS │ Imgs │ Custom │ -├──────┴──────┴──────┴──────┴────────────┤ -│ Download Client (HTTP + retry) │ -├─────────────────────────────────────────┤ -│ SQLite Store (persistence) │ -└─────────────────────────────────────────┘ -``` - -### Data Flow - -1. **Plan Phase**: Provider examines remote manifest → compares with local state → produces SyncPlan -2. **Sync Phase**: Engine executes plan → downloads via worker pool → records in store -3. **Validate Phase**: Provider checksums local files → compares with expected → reports discrepancies - -### Key Patterns - -- **Plan-Execute Separation**: Every sync generates a plan first, enabling dry-run and audit -- **Worker Pool**: Configurable concurrency per provider for parallel downloads -- **Retry with Backoff**: Exponential backoff + jitter for transient failures -- **Resume Support**: HTTP Range headers for interrupted downloads -- **Dead Letter Queue**: Failed files tracked for retry analysis - -### Extension Points - -- New providers implement the `Provider` interface -- Provider configs are type-safe via `ParseProviderConfig[T]()` generics -- All providers auto-register via `Registry.Register()` - ---- - -*This note consolidates: ARCHITECTURE.md* diff --git a/docs/airgap-project/14 - CLI Scaffolding.md b/docs/airgap-project/14 - CLI Scaffolding.md deleted file mode 100644 index b4bf2bb..0000000 --- a/docs/airgap-project/14 - CLI Scaffolding.md +++ /dev/null @@ -1,104 +0,0 @@ -# CLI Scaffolding - -> Subagent output from CLI implementation. Preserved for posterity. - -## Source Files -- `CLI_SCAFFOLDING.md` -- `COBRA_CLI_SUMMARY.md` -- `CLI_IMPLEMENTATION_COMPLETE.txt` -- `IMPLEMENTATION_SUMMARY.txt` - ---- - -## Overview - -**Framework:** Cobra v1.8.1 -**Files Created:** 9 Go command files in `cmd/airgap/` -**Total Code:** ~18 KB - -## Command Files - -| File | Size | Purpose | -|------|------|---------| -| `main.go` | 124B | Entry point, creates and executes root command | -| `root.go` | 3.5KB | Root command, persistent flags, PersistentPreRunE hook | -| `sync.go` | 2.6KB | Content synchronization with dry-run support | -| `validate.go` | 1.8KB | Content integrity checking | -| `serve.go` | 1.5KB | HTTP server startup | -| `status.go` | 1.8KB | Provider status display | -| `export.go` | 2.5KB | Offline content export | -| `importcmd.go` | 1.7KB | Offline content import (named to avoid Go keyword) | -| `config_cmd.go` | 2.2KB | Config show/set subcommands | - -## Persistent Flags (Root Level) - -``` ---config string Path to config file ---data-dir string Override data directory ---log-level string Set log level (debug/info/warn/error) ---log-format string Set output format (text/json) ---quiet Suppress non-error output -``` - -## Command Reference - -```bash -airgap sync --all # Sync all providers -airgap sync --provider xxx # Sync specific providers -airgap sync --dry-run # Preview without changes -airgap sync --force # Force re-download - -airgap validate --all # Validate all -airgap validate --provider xxx # Validate specific - -airgap serve # Start server -airgap serve --listen 127.0.0.1:9000 # Custom listen address - -airgap status # Show all status -airgap status --provider xxx # Specific status -airgap status --failed # Only failures - -airgap export --to /path # Export content -airgap export --to /path --split-size 10GB - -airgap import --from /path # Import content -airgap import --from /path --verify-only - -airgap config show # Show current config -airgap config set KEY VALUE # Set config value -``` - -## Execution Flow - -``` -main() → root command - └→ PersistentPreRunE: - 1. setupLogging() → initializes slog - 2. config.FindConfigFile() → discovers config - 3. config.Load() or DefaultConfig() - 4. Flag overrides applied - └→ Subcommand RunE: - - Access globalCfg - - Use slog.Default() for logging - - Execute command logic -``` - -## Integration Points - -- `config.FindConfigFile()` — Auto-discover config -- `config.Load(path)` — Load config from file -- `config.DefaultConfig()` — Get defaults -- `slog.Default()` — Get logger in any command -- Provider registry pattern ready for real implementations - -## Design Patterns - -- All commands follow `newXxxCmd()` + `xxxRun()` pattern -- All commands check `globalCfg` before use -- Comma-separated provider list support -- All errors wrapped with `fmt.Errorf` context -- Required flags via `MarkFlagRequired()` - ---- - -*This note consolidates: CLI_SCAFFOLDING.md, COBRA_CLI_SUMMARY.md, CLI_IMPLEMENTATION_COMPLETE.txt, IMPLEMENTATION_SUMMARY.txt* diff --git a/docs/airgap-project/15 - Store Implementation.md b/docs/airgap-project/15 - Store Implementation.md deleted file mode 100644 index be2a378..0000000 --- a/docs/airgap-project/15 - Store Implementation.md +++ /dev/null @@ -1,89 +0,0 @@ -# Store Implementation - -> Subagent output from SQLite store implementation. Preserved for posterity. - -## Source Files -- `STORE_IMPLEMENTATION.md` -- `CODE_SNIPPETS.md` (store-related sections) - ---- - -## Overview - -**Database:** SQLite via `modernc.org/sqlite` (pure Go, no CGO) -**Location:** `internal/store/` -**Files:** `sqlite.go`, `models.go`, `migrations.go` - -## Database Models - -### SyncRun -Tracks each sync execution: -- `ID`, `ProviderName`, `StartedAt`, `CompletedAt` -- `Status` (running/completed/failed) -- Counts: `FilesDownloaded`, `FilesSkipped`, `FilesDeleted`, `FilesFailed` -- `BytesDownloaded`, `ErrorMessage` - -### FileRecord -Inventory of synced files: -- `ID`, `ProviderName`, `RelativePath`, `URL` -- `Checksum`, `ChecksumType` -- `SizeBytes`, `LastSyncRunID` -- `CreatedAt`, `UpdatedAt` - -### FailedFileRecord -Dead letter queue for failures: -- `ID`, `SyncRunID`, `ProviderName` -- `RelativePath`, `URL`, `ErrorMessage` -- `RetryCount`, `LastRetryAt`, `CreatedAt` - -### Job (scheduling) -- `ID`, `Name`, `ProviderName`, `Schedule` -- `Enabled`, `LastRunAt`, `NextRunAt` - -### Transfer (export/import) -- `ID`, `Type`, `ProviderName` -- `ArchivePath`, `Status` -- `TotalFiles`, `TotalBytes` -- `StartedAt`, `CompletedAt` - -## Store API - -```go -// Lifecycle -func New(dbPath string, logger *slog.Logger) (*Store, error) -func (s *Store) Close() error - -// Sync runs -func (s *Store) CreateSyncRun(run *SyncRun) error -func (s *Store) UpdateSyncRun(run *SyncRun) error -func (s *Store) GetLastSyncRun(provider string) (*SyncRun, error) - -// File records -func (s *Store) UpsertFileRecord(fileRec *FileRecord) error -func (s *Store) GetFileRecord(provider, path string) (*FileRecord, error) -func (s *Store) ListFileRecords(provider string) ([]*FileRecord, error) -func (s *Store) DeleteFileRecord(provider, path string) error - -// Failed files -func (s *Store) AddFailedFile(failedRec *FailedFileRecord) error -func (s *Store) ListFailedFiles(provider string) ([]*FailedFileRecord, error) - -// Statistics -func (s *Store) GetProviderStats(provider string) (*ProviderStats, error) -``` - -## Migrations - -Schema versioning via `schema_version` table. Migrations run automatically on store initialization. - -## Key Design Points - -- Pure Go SQLite (no CGO required) — simplifies cross-compilation -- In-memory database support for testing (`":memory:"`) -- Automatic schema migrations on startup -- Upsert pattern for file records (insert or update on conflict) -- Dead letter queue for retry analysis - ---- - -*This note consolidates: STORE_IMPLEMENTATION.md, store-related CODE_SNIPPETS.md* diff --git a/docs/airgap-project/16 - Component Wiring.md b/docs/airgap-project/16 - Component Wiring.md deleted file mode 100644 index 22e6f91..0000000 --- a/docs/airgap-project/16 - Component Wiring.md +++ /dev/null @@ -1,98 +0,0 @@ -# Component Wiring - -> Subagent output from wiring all components together. Preserved for posterity. - -## Source Files -- `WIRING_COMPLETE.md` -- `TASK_COMPLETION_REPORT.md` -- `IMPLEMENTATION_REFERENCE.md` - ---- - -## Overview - -All components wired together: CLI commands integrated with real implementations of Store, Download Client, Provider Registry, OCP Providers, and Sync Manager Engine. - -## Component Dependency Graph - -``` -root.go - ├── config.Config - ├── store.Store - ├── download.Client - ├── provider.Registry - │ ├── ocp.BinariesProvider - │ └── ocp.RHCOSProvider - └── engine.SyncManager -``` - -## Initialization Sequence - -``` -Command Execution - ↓ -PersistentPreRunE: - 1. setupLogging() - Initialize slog - 2. Load Config File - From file or defaults - 3. shouldSkipComponentInit() - Check if init needed - 4. initializeComponents() - Initialize components - a. Store.New() - b. download.NewClient() - c. provider.NewRegistry() - d. OCP Binaries Provider (new + configure) - e. RHCOS Provider (new + configure) - f. engine.NewSyncManager() - ↓ -Command RunE: - 1. Validate globalEngine - 2. Determine providers - 3. Execute operation - 4. Display results - ↓ -PersistentPostRun: - 1. closeStore() - Cleanup -``` - -## Files Modified - -| File | Lines | Changes | -|------|-------|---------| -| `root.go` | 223 | +3 global vars, +3 init funcs, modified PreRunE/PostRun | -| `sync.go` | 151 | Real sync via `globalEngine.SyncProvider()` | -| `validate.go` | 125 | Real validation via `globalEngine.ValidateProvider()` | -| `status.go` | 133 | Real status via `globalEngine.Status()` | -| `serve.go` | 77 | Engine validation, endpoint listing | - -## Error Handling Strategy - -### Initialization Errors (Critical) -- Store init failure → Command fails -- Config parse failure → Command fails - -### Operation Errors (Graceful) -- Provider config failure → Logged as warning, provider still registered -- Individual provider failure → Logged, continue with other providers -- Sync/validate failures → Return summary with failure count - -### Resource Cleanup -- Store close failure → Logged as error, doesn't prevent exit -- All errors properly wrapped with context - -## Key Functions Added - -```go -func initializeComponents() error // Initialize all components -func shouldSkipComponentInit(string) bool // Check if init needed -func closeStore() // Resource cleanup -func formatBytes(int64) string // Human-readable byte sizes -``` - -## Data Flow - -``` -Command → Determine Providers → Build Options → Engine Call → Results → Display → Exit -``` - ---- - -*This note consolidates: WIRING_COMPLETE.md, TASK_COMPLETION_REPORT.md, IMPLEMENTATION_REFERENCE.md* diff --git a/docs/airgap-project/17 - Code Verification.md b/docs/airgap-project/17 - Code Verification.md deleted file mode 100644 index 8cfe6a3..0000000 --- a/docs/airgap-project/17 - Code Verification.md +++ /dev/null @@ -1,77 +0,0 @@ -# Code Verification - -> Subagent output from comprehensive code verification. Preserved for posterity. - -## Source Files -- `CODE_VERIFICATION_REPORT.txt` -- `CODE_SNIPPETS.md` (verification-related sections) - ---- - -## Verification Summary - -**Date:** 2026-02-19 -**Total Go Files:** 20 -**Total Go Lines:** 3,729 -**Overall Status:** Ready for build - -## Package Distribution - -| Package | Files | Lines | Purpose | -|---------|-------|-------|---------| -| `cmd/airgap/` | 9 | 670 | CLI commands and main entry | -| `internal/config/` | 1 | 199 | Configuration management | -| `internal/provider/` | 1 | 140 | Provider interface and registry | -| `internal/provider/ocp/` | 3 | 680 | OCP binaries and RHCOS providers | -| `internal/download/` | 2 | 439 | HTTP download client and pool | -| `internal/engine/` | 1 | 430 | Sync orchestration engine | -| `internal/store/` | 3 | 872 | SQLite persistence layer | - -## Checks Performed - -### 1. Go.mod Dependencies — PASS -All critical dependencies properly specified: Cobra v1.8.1, yaml.v3, modernc.org/sqlite. - -### 2. Package Structure — PASS -All `.go` files declare correct package names matching directory structure. - -### 3. Import Analysis — PASS -All module paths correct and consistent with `go.mod`. - -### 4. Cross-Package Type References — PASS -All types referenced across packages are defined and publicly accessible. - -### 5. Type Conflict — NOTED -`ProviderConfig` defined in both `config` and `provider` packages. Both are identical `map[string]interface{}` aliases. Fixed with type alias: `type ProviderConfig = config.ProviderConfig`. - -### 6. Syntax and Structure — PASS -All package declarations, import statements, type definitions, function signatures verified. - -### 7. Function Signatures — PASS -Sample signatures verified across all packages. - -### 8. Missing References — PASS -All required methods, interfaces, and constructors present. - -## Build Errors Found and Fixed - -| File | Issue | Fix | -|------|-------|-----| -| `store/sqlite.go` | Unused import `"strings"` | Removed | -| `store/migrations.go` | Unused imports `"database/sql"`, `"log/slog"` | Removed | -| `provider/ocp/binaries.go` | Unused variable `actualHash` | Changed to `_` | -| `provider/ocp/rhcos.go` | Unused variable `actualHash` | Changed to `_` | -| `provider/epel/epel.go` | Unused variable `actualChecksum` | Changed to `_` | -| `server/handlers.go` | Unused import `"strings"` (after Go 1.23 upgrade) | Removed | - -## Go Version Upgrade - -Project upgraded from Go 1.21 to Go 1.23: -- `go.mod`: `go 1.21` → `go 1.23` -- `Containerfile`: `go-toolset:1.21` → `go-toolset:1.23` -- Enabled Go 1.22+ enhanced ServeMux routing (method prefixes, path variables, exact match) -- `flake.nix` already used `go_1_23` - ---- - -*This note consolidates: CODE_VERIFICATION_REPORT.txt, CODE_SNIPPETS.md* diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..aff9683 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,91 @@ +# Architecture + +## Overview + +`airgap` is a single-process Go application that provides: +- CLI operations for sync/validate/export/import/push +- An embedded HTTP server for UI + API +- SQLite-backed state tracking + +Core packages: +- `cmd/airgap`: CLI command wiring +- `internal/engine`: sync/export/import orchestration +- `internal/provider/*`: provider implementations +- `internal/store`: SQLite models, migrations, CRUD +- `internal/server`: web UI and API handlers +- `internal/download`: HTTP download client + worker pool + +## Startup Flow + +1. Load config from YAML (or defaults). +2. Initialize store and run migrations. +3. Seed `provider_configs` from YAML providers on first run only. +4. Load provider configs from DB. +5. Instantiate enabled providers and register them. +6. Start CLI command execution (or HTTP server for `serve`). + +## Sync Flow + +1. CLI/API requests sync for one provider or all. +2. Engine calls provider `Plan()` to produce `SyncAction` items. +3. Engine executes download/update actions with worker pool. +4. Engine updates `file_records`, `sync_runs`, and failed-file state. +5. Status is served from store-backed summaries. + +Notes: +- Sync/push operations are serialized at server level (`syncRunning` guard). +- Progress is tracked through an in-memory `SyncTracker` used by UI polling/SSE paths. + +## Transfer Flow + +### Export + +- Reads file inventory from `file_records` +- Builds split `airgap-transfer-XXX.tar.zst` archives +- Writes archive SHA256 sidecars +- Writes `airgap-manifest.json` (+ `.sha256`) and `TRANSFER-README.txt` +- Records transfer in `transfers` + +### Import + +- Reads and validates manifest + archives +- Supports verify-only and skip-validated modes +- Extracts files into `server.data_dir` +- Attempts `createrepo_c` for RPM repositories +- Upserts `file_records` from manifest inventory + +## Provider Model + +All providers implement: +- `Name()` +- `Type()` +- `Configure()` +- `Plan()` +- `Sync()` +- `Validate()` + +Provider instances are registered by config name, allowing multiple provider configs of the same type. + +## Persistence + +SQLite tables include: +- `sync_runs` +- `file_records` +- `failed_files` +- `transfers` +- `transfer_archives` +- `provider_configs` + +Migrations are managed in `internal/store/migrations.go`. + +## HTTP Surface + +Server routes include: +- UI pages (`/dashboard`, `/providers`, `/transfer`, `/ocp/clients`) +- Sync/status/provider APIs +- Provider config CRUD APIs +- Transfer APIs +- Mirror discovery/speed-test APIs +- OCP client artifact discovery/download APIs + +See [http-api.md](http-api.md) for endpoint-level details. diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..649e017 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,73 @@ +# Configuration + +## File Discovery + +`airgap` searches for config in this order: +- `./airgap.yaml` +- `/etc/airgap/airgap.yaml` +- `$HOME/.config/airgap/airgap.yaml` + +Use `--config` to specify an explicit path. + +## Top-Level Schema + +```yaml +server: + listen: "0.0.0.0:8080" + data_dir: "/var/lib/airgap" + db_path: "/var/lib/airgap/airgap.db" + +export: + split_size: "25GB" + compression: "zstd" + output_dir: "/mnt/transfer-disk" + manifest_name: "airgap-manifest.json" + +schedule: + enabled: true + default_cron: "0 2 * * 0" + +providers: {} +``` + +## Provider Config Storage Model + +At runtime, provider configs are read from SQLite (`provider_configs`), not directly from YAML. + +Behavior: +- On first startup with an empty `provider_configs` table, YAML `providers:` entries are seeded into DB. +- On later startups, DB provider configs are authoritative. +- Provider CRUD in the UI/API updates DB and hot-reloads active providers. + +## Valid Provider Types + +- `epel` +- `ocp_binaries` +- `ocp_clients` +- `rhcos` +- `container_images` +- `registry` +- `custom_files` + +### Implementation Status + +- Fully wired for sync/validate: `epel`, `ocp_binaries`, `ocp_clients`, `rhcos`, `container_images` +- Used as registry push target config: `registry` +- Accepted config type but not wired for sync: `custom_files` + +## Example Config + +See [configs/airgap.example.yaml](../configs/airgap.example.yaml). + +## CLI Config Commands + +- `airgap config show`: prints effective loaded config +- `airgap config set KEY VALUE`: currently stubbed (does not persist changes) + +## Global CLI Flags + +- `--config`: config path +- `--data-dir`: overrides `server.data_dir` +- `--log-level`: `debug|info|warn|error` +- `--log-format`: `text|json` +- `--quiet`: suppresses non-error output diff --git a/docs/http-api.md b/docs/http-api.md new file mode 100644 index 0000000..799b2c3 --- /dev/null +++ b/docs/http-api.md @@ -0,0 +1,69 @@ +# HTTP API + +Routes are defined in `internal/server/server.go`. + +## UI Pages + +- `GET /` -> redirects to `/dashboard` +- `GET /dashboard` +- `GET /providers` +- `GET /providers/{name}` +- `GET /sync` +- `GET /transfer` +- `GET /ocp/clients` +- `GET /static/*` (embedded static assets) + +## Core API + +- `GET /api/status` - provider status summary +- `GET /api/providers` - active registered providers +- `POST /api/sync` - start sync (`provider` or `all`) +- `POST /api/sync/cancel` - cancel active sync/push operation +- `GET /api/sync/progress` - current progress snapshot/stream payload +- `GET /api/sync/running` - whether sync/push is active +- `POST /api/scan` - scan local files into store records +- `POST /api/validate` - validate provider content + +## Failed Download Management + +- `GET /api/sync/failures` - list unresolved failed files +- `DELETE /api/sync/failures/{id}` - resolve one failed file +- `POST /api/sync/failures/resolve` - bulk resolve failures +- `POST /api/sync/retry` - retry failed downloads + +## Provider Config Management + +- `GET /api/providers/config` +- `POST /api/providers/config` +- `PUT /api/providers/config/{name}` +- `DELETE /api/providers/config/{name}` +- `POST /api/providers/config/{name}/toggle` + +## Transfer API + +- `POST /api/transfer/export` +- `POST /api/transfer/import` +- `GET /api/transfers` + +## Mirror Discovery API + +- `GET /api/mirrors/epel/versions` +- `GET /api/mirrors/epel?version=&arch=` +- `GET /api/mirrors/ocp/versions` +- `POST /api/mirrors/speedtest` + +## OCP Client Discovery/Download API + +- `GET /api/ocp/tracks` +- `GET /api/ocp/releases?channel=` +- `GET /api/ocp/artifacts?version=` +- `POST /api/ocp/download` + +## Registry Push API + +- `POST /api/registry/push` + +## Notes + +- Several endpoints support HTMX form requests in addition to JSON. +- Long-running sync/push operations are asynchronous and update shared progress state. diff --git a/docs/plans/2026-02-19-export-import-design.md b/docs/plans/2026-02-19-export-import-design.md deleted file mode 100644 index 9e0703e..0000000 --- a/docs/plans/2026-02-19-export-import-design.md +++ /dev/null @@ -1,172 +0,0 @@ -# Export/Import Engine Design - -**Date:** 2026-02-19 -**Phase:** 3 (Export/Import Engine) -**Status:** Approved - -## Summary - -Implement the export/import engine that packages synced content into split tar.zst archives for physical media transfer between internet-connected and air-gapped machines. - -## Decisions - -- **Compression:** zstd only for v1 (validate CLI flag, reject gzip/none) -- **createrepo_c:** Not auto-run on import. Operator handles separately. -- **Validation cache:** SQLite transfers table (not sidecar files on media) -- **Archive strategy:** Independent per-split archives (not stream-split) - -## Export Engine (`internal/engine/export.go`) - -### Types - -```go -type ExportOptions struct { - OutputDir string - Providers []string - SplitSize int64 // bytes, parsed from "25GB" etc. - Compression string // "zstd" only for v1 -} - -type ExportReport struct { - Archives []ArchiveInfo - TotalFiles int - TotalSize int64 - ManifestPath string - Duration time.Duration -} - -type ArchiveInfo struct { - Name string - Size int64 - SHA256 string - Files []string // relative paths -} -``` - -### Flow - -1. Query `file_records` from store for each requested provider -2. Resolve actual file paths on disk (data_dir + provider + relative path) -3. Build manifest JSON with provider summaries and file inventory -4. Create split tar.zst archives: - - Track cumulative uncompressed bytes per archive - - Roll to new archive when SplitSize exceeded - - Embed manifest in archive 001 -5. Compute SHA256 of each completed archive file -6. Write `.sha256` sidecar for each archive -7. Write standalone `airgap-manifest.json` + `.sha256` -8. Write `TRANSFER-README.txt` -9. Record transfer in SQLite transfers table - -### Split Archive Writer - -Custom writer that wraps tar + zstd: -- Tracks bytes written to current archive -- When adding a file would exceed split size, close current archive and open next -- A single file larger than split size goes into its own archive (archive exceeds limit) -- Archive naming: `airgap-transfer-001.tar.zst`, `airgap-transfer-002.tar.zst`, etc. -- Each archive is independently decompressible - -## Import Engine (`internal/engine/import.go`) - -### Types - -```go -type ImportOptions struct { - SourceDir string - VerifyOnly bool - Force bool // skip checksum verification -} - -type ImportReport struct { - ArchivesValidated int - ArchivesFailed int - FilesExtracted int - TotalSize int64 - Duration time.Duration - Errors []string -} -``` - -### Flow - -1. Read `airgap-manifest.json` from source dir -2. Verify all expected archive files are present on disk -3. Validate each archive's SHA256 against manifest: - - Skip if `--force` - - Cache validated checksums in transfers table - - On re-run, skip already-validated archives -4. If `--verify-only`, stop and report validation results -5. Extract each validated archive into data_dir: - - Preserve provider directory structure - - Stream decompress (zstd) + untar -6. Upsert extracted files into file_records table -7. Record transfer in transfers table - -### Partial Re-transfer - -If some archives fail validation: -- Report which archives failed -- Operator re-copies only those files from source -- Re-run import; previously validated archives are skipped - -## Manifest Format (`airgap-manifest.json`) - -```json -{ - "version": "1.0", - "created": "2026-02-19T14:30:00Z", - "source_host": "hostname", - "providers": { - "epel": { "file_count": 4521, "total_size": 18739281920 }, - "ocp_binaries": { "file_count": 12, "total_size": 2147483648 } - }, - "archives": [ - { - "name": "airgap-transfer-001.tar.zst", - "size": 0, - "sha256": "abc123...", - "files": ["epel/9/Packages/a-foo.rpm", "..."] - } - ], - "total_archives": 1, - "total_size": 0, - "file_inventory": [ - { "provider": "epel", "path": "9/Packages/foo.rpm", "size": 3145728, "sha256": "..." } - ] -} -``` - -## Size Parsing Helper - -`ParseSize("25GB") -> int64` supporting B, KB, MB, GB, TB suffixes (case-insensitive). Lives in `internal/engine/export.go` or a small util. - -## Disk Layout (Export Output) - -``` -/mnt/usb/ -├── airgap-manifest.json -├── airgap-manifest.json.sha256 -├── airgap-transfer-001.tar.zst -├── airgap-transfer-001.tar.zst.sha256 -├── airgap-transfer-002.tar.zst -├── airgap-transfer-002.tar.zst.sha256 -└── TRANSFER-README.txt -``` - -## Not in v1 - -- Incremental/differential exports -- gzip/none compression -- Automatic createrepo_c on import -- Web UI transfer wizard (Phase 5) -- SSE progress streaming (Phase 5) - -## Store Changes - -- Add migration v2: `transfer_archives` table to cache per-archive validation results -- New store methods: `CreateTransferArchive`, `GetValidatedArchives` - -## New Dependencies - -- `github.com/klauspost/compress/zstd` for zstd compression/decompression diff --git a/docs/plans/2026-02-19-export-import-plan.md b/docs/plans/2026-02-19-export-import-plan.md deleted file mode 100644 index 8c43e3b..0000000 --- a/docs/plans/2026-02-19-export-import-plan.md +++ /dev/null @@ -1,1753 +0,0 @@ -# Export/Import Engine Implementation Plan - -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Implement the export/import engine that packages synced content into split tar.zst archives for air-gapped transfer. - -**Architecture:** Two new files in `internal/engine/` (export.go, import.go) plus a manifest package. The SyncManager gets Export() and Import() methods. The existing store Transfer model and CRUD methods are reused. A new migration adds a `transfer_archives` table for caching per-archive validation state. - -**Tech Stack:** Go stdlib `archive/tar`, `github.com/klauspost/compress/zstd`, existing SQLite store, existing `store.Transfer` model. - -**Design Doc:** `docs/plans/2026-02-19-export-import-design.md` - ---- - -### Task 1: Add zstd dependency - -**Files:** -- Modify: `go.mod` - -**Step 1: Add the zstd module** - -Run: `cd /Users/badger/code/ocp-offline && go get github.com/klauspost/compress/zstd` - -**Step 2: Verify it was added** - -Run: `grep klauspost /Users/badger/code/ocp-offline/go.mod` -Expected: Line containing `github.com/klauspost/compress` - ---- - -### Task 2: ParseSize helper + tests - -**Files:** -- Create: `internal/engine/size.go` -- Create: `internal/engine/size_test.go` - -**Step 1: Write the failing tests** - -Create `internal/engine/size_test.go`: - -```go -package engine - -import ( - "testing" -) - -func TestParseSize(t *testing.T) { - tests := []struct { - input string - expected int64 - wantErr bool - }{ - {"100B", 100, false}, - {"1KB", 1024, false}, - {"1MB", 1024 * 1024, false}, - {"1GB", 1024 * 1024 * 1024, false}, - {"25GB", 25 * 1024 * 1024 * 1024, false}, - {"1TB", 1024 * 1024 * 1024 * 1024, false}, - {"500mb", 500 * 1024 * 1024, false}, - {"10gb", 10 * 1024 * 1024 * 1024, false}, - {"1024", 1024, false}, - {"", 0, true}, - {"GB", 0, true}, - {"-1GB", 0, true}, - {"abc", 0, true}, - } - - for _, tt := range tests { - t.Run(tt.input, func(t *testing.T) { - got, err := ParseSize(tt.input) - if tt.wantErr { - if err == nil { - t.Errorf("ParseSize(%q) expected error, got %d", tt.input, got) - } - return - } - if err != nil { - t.Fatalf("ParseSize(%q) unexpected error: %v", tt.input, err) - } - if got != tt.expected { - t.Errorf("ParseSize(%q) = %d, want %d", tt.input, got, tt.expected) - } - }) - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestParseSize -v` -Expected: FAIL — `ParseSize` undefined - -**Step 3: Write the implementation** - -Create `internal/engine/size.go`: - -```go -package engine - -import ( - "fmt" - "strconv" - "strings" -) - -// ParseSize parses a human-readable size string like "25GB" into bytes. -// Supports B, KB, MB, GB, TB suffixes (case-insensitive). -// A plain number is treated as bytes. -func ParseSize(s string) (int64, error) { - s = strings.TrimSpace(s) - if s == "" { - return 0, fmt.Errorf("empty size string") - } - - s = strings.ToUpper(s) - - multipliers := []struct { - suffix string - mult int64 - }{ - {"TB", 1024 * 1024 * 1024 * 1024}, - {"GB", 1024 * 1024 * 1024}, - {"MB", 1024 * 1024}, - {"KB", 1024}, - {"B", 1}, - } - - for _, m := range multipliers { - if strings.HasSuffix(s, m.suffix) { - numStr := strings.TrimSuffix(s, m.suffix) - if numStr == "" { - return 0, fmt.Errorf("missing number in size: %s", s) - } - n, err := strconv.ParseInt(numStr, 10, 64) - if err != nil { - return 0, fmt.Errorf("invalid number in size %q: %w", s, err) - } - if n < 0 { - return 0, fmt.Errorf("negative size: %s", s) - } - return n * m.mult, nil - } - } - - // Plain number = bytes - n, err := strconv.ParseInt(s, 10, 64) - if err != nil { - return 0, fmt.Errorf("invalid size %q: %w", s, err) - } - if n < 0 { - return 0, fmt.Errorf("negative size: %s", s) - } - return n, nil -} -``` - -**Step 4: Run test to verify it passes** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestParseSize -v` -Expected: PASS - ---- - -### Task 3: Manifest types + JSON serialization - -**Files:** -- Create: `internal/engine/manifest.go` -- Create: `internal/engine/manifest_test.go` - -**Step 1: Write the failing tests** - -Create `internal/engine/manifest_test.go`: - -```go -package engine - -import ( - "encoding/json" - "testing" - "time" -) - -func TestManifestRoundTrip(t *testing.T) { - m := &TransferManifest{ - Version: "1.0", - Created: time.Date(2026, 2, 19, 14, 30, 0, 0, time.UTC), - SourceHost: "sync-server.example.com", - Providers: map[string]ManifestProvider{ - "epel": {FileCount: 100, TotalSize: 1024000}, - }, - Archives: []ManifestArchive{ - { - Name: "airgap-transfer-001.tar.zst", - Size: 512000, - SHA256: "abc123", - Files: []string{"epel/9/foo.rpm"}, - }, - }, - TotalArchives: 1, - TotalSize: 512000, - FileInventory: []ManifestFile{ - {Provider: "epel", Path: "9/foo.rpm", Size: 1024, SHA256: "def456"}, - }, - } - - data, err := json.MarshalIndent(m, "", " ") - if err != nil { - t.Fatalf("marshal error: %v", err) - } - - var decoded TransferManifest - if err := json.Unmarshal(data, &decoded); err != nil { - t.Fatalf("unmarshal error: %v", err) - } - - if decoded.Version != "1.0" { - t.Errorf("version = %q, want %q", decoded.Version, "1.0") - } - if decoded.SourceHost != "sync-server.example.com" { - t.Errorf("source_host = %q, want %q", decoded.SourceHost, "sync-server.example.com") - } - if len(decoded.Providers) != 1 { - t.Fatalf("providers count = %d, want 1", len(decoded.Providers)) - } - if decoded.Providers["epel"].FileCount != 100 { - t.Errorf("epel file_count = %d, want 100", decoded.Providers["epel"].FileCount) - } - if len(decoded.Archives) != 1 { - t.Fatalf("archives count = %d, want 1", len(decoded.Archives)) - } - if decoded.Archives[0].SHA256 != "abc123" { - t.Errorf("archive sha256 = %q, want %q", decoded.Archives[0].SHA256, "abc123") - } - if len(decoded.FileInventory) != 1 { - t.Fatalf("file_inventory count = %d, want 1", len(decoded.FileInventory)) - } - if decoded.FileInventory[0].Provider != "epel" { - t.Errorf("file provider = %q, want %q", decoded.FileInventory[0].Provider, "epel") - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestManifestRoundTrip -v` -Expected: FAIL — types undefined - -**Step 3: Write the implementation** - -Create `internal/engine/manifest.go`: - -```go -package engine - -import "time" - -// TransferManifest describes a complete export for transfer to an air-gapped environment. -type TransferManifest struct { - Version string `json:"version"` - Created time.Time `json:"created"` - SourceHost string `json:"source_host"` - Providers map[string]ManifestProvider `json:"providers"` - Archives []ManifestArchive `json:"archives"` - TotalArchives int `json:"total_archives"` - TotalSize int64 `json:"total_size"` - FileInventory []ManifestFile `json:"file_inventory"` -} - -// ManifestProvider summarizes one provider's contribution to the export. -type ManifestProvider struct { - FileCount int `json:"file_count"` - TotalSize int64 `json:"total_size"` -} - -// ManifestArchive describes a single split archive in the export. -type ManifestArchive struct { - Name string `json:"name"` - Size int64 `json:"size"` - SHA256 string `json:"sha256"` - Files []string `json:"files"` -} - -// ManifestFile is one entry in the full file inventory. -type ManifestFile struct { - Provider string `json:"provider"` - Path string `json:"path"` - Size int64 `json:"size"` - SHA256 string `json:"sha256"` -} -``` - -**Step 4: Run test to verify it passes** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestManifestRoundTrip -v` -Expected: PASS - ---- - -### Task 4: Store migration for transfer_archives - -**Files:** -- Modify: `internal/store/models.go` — add `TransferArchive` model -- Modify: `internal/store/migrations.go` — add migration v2 -- Modify: `internal/store/sqlite.go` — add CRUD for transfer_archives -- Modify: `internal/store/sqlite_test.go` — add tests - -**Step 1: Write the failing test** - -Add to `internal/store/sqlite_test.go`: - -```go -func TestTransferArchiveCRUD(t *testing.T) { - s := setupTestStore(t) - defer s.Close() - - // Create a parent transfer first - transfer := &Transfer{ - Direction: "export", - Path: "/mnt/usb", - Providers: "epel,rhcos", - Status: "running", - StartTime: time.Now(), - } - if err := s.CreateTransfer(transfer); err != nil { - t.Fatalf("create transfer: %v", err) - } - - // Create a transfer archive - archive := &TransferArchive{ - TransferID: transfer.ID, - ArchiveName: "airgap-transfer-001.tar.zst", - SHA256: "abc123def456", - Size: 1024000, - Validated: false, - ValidatedAt: time.Time{}, - } - if err := s.CreateTransferArchive(archive); err != nil { - t.Fatalf("create transfer archive: %v", err) - } - if archive.ID == 0 { - t.Fatal("expected non-zero ID") - } - - // Mark as validated - if err := s.MarkArchiveValidated(archive.ID); err != nil { - t.Fatalf("mark validated: %v", err) - } - - // List archives for transfer - archives, err := s.ListTransferArchives(transfer.ID) - if err != nil { - t.Fatalf("list archives: %v", err) - } - if len(archives) != 1 { - t.Fatalf("expected 1 archive, got %d", len(archives)) - } - if !archives[0].Validated { - t.Error("expected archive to be validated") - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/store/ -run TestTransferArchiveCRUD -v` -Expected: FAIL — `TransferArchive` undefined - -**Step 3: Add the model to models.go** - -Add to `internal/store/models.go`: - -```go -// TransferArchive tracks per-archive validation state during import -type TransferArchive struct { - ID int64 - TransferID int64 - ArchiveName string - SHA256 string - Size int64 - Validated bool - ValidatedAt time.Time -} -``` - -**Step 4: Add migration v2 to migrations.go** - -Append a new entry to the `migrations` slice in `internal/store/migrations.go`: - -```go -{ - version: 2, - sql: ` - CREATE TABLE transfer_archives ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - transfer_id INTEGER NOT NULL, - archive_name TEXT NOT NULL, - sha256 TEXT NOT NULL, - size INTEGER DEFAULT 0, - validated BOOLEAN DEFAULT 0, - validated_at DATETIME, - FOREIGN KEY(transfer_id) REFERENCES transfers(id) - ); - `, -}, -``` - -**Step 5: Add store methods to sqlite.go** - -Add to `internal/store/sqlite.go` after the Transfer section: - -```go -// ============================================================================ -// TransferArchive Operations -// ============================================================================ - -// CreateTransferArchive inserts a new TransferArchive and sets its ID -func (s *Store) CreateTransferArchive(a *TransferArchive) error { - const query = ` - INSERT INTO transfer_archives ( - transfer_id, archive_name, sha256, size, validated, validated_at - ) VALUES (?, ?, ?, ?, ?, ?) - ` - - result, err := s.db.Exec( - query, - a.TransferID, a.ArchiveName, a.SHA256, a.Size, - a.Validated, a.ValidatedAt, - ) - if err != nil { - return fmt.Errorf("failed to insert transfer archive: %w", err) - } - - id, err := result.LastInsertId() - if err != nil { - return fmt.Errorf("failed to get last insert id: %w", err) - } - - a.ID = id - return nil -} - -// MarkArchiveValidated marks a TransferArchive as validated -func (s *Store) MarkArchiveValidated(id int64) error { - const query = ` - UPDATE transfer_archives - SET validated = 1, validated_at = CURRENT_TIMESTAMP - WHERE id = ? - ` - - result, err := s.db.Exec(query, id) - if err != nil { - return fmt.Errorf("failed to mark archive validated: %w", err) - } - - rowsAffected, err := result.RowsAffected() - if err != nil { - return fmt.Errorf("failed to get rows affected: %w", err) - } - - if rowsAffected == 0 { - return fmt.Errorf("transfer archive not found: %d", id) - } - - return nil -} - -// ListTransferArchives retrieves all archives for a transfer -func (s *Store) ListTransferArchives(transferID int64) ([]TransferArchive, error) { - const query = ` - SELECT id, transfer_id, archive_name, sha256, size, validated, validated_at - FROM transfer_archives WHERE transfer_id = ? ORDER BY archive_name - ` - - rows, err := s.db.Query(query, transferID) - if err != nil { - return nil, fmt.Errorf("failed to query transfer archives: %w", err) - } - defer rows.Close() - - var archives []TransferArchive - for rows.Next() { - a := TransferArchive{} - err := rows.Scan( - &a.ID, &a.TransferID, &a.ArchiveName, &a.SHA256, - &a.Size, &a.Validated, &a.ValidatedAt, - ) - if err != nil { - return nil, fmt.Errorf("failed to scan transfer archive: %w", err) - } - archives = append(archives, a) - } - - if err := rows.Err(); err != nil { - return nil, fmt.Errorf("error iterating transfer archives: %w", err) - } - - return archives, nil -} -``` - -**Step 6: Run test to verify it passes** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/store/ -run TestTransferArchiveCRUD -v` -Expected: PASS - -**Step 7: Run all store tests to check for regressions** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/store/ -v` -Expected: All PASS - ---- - -### Task 5: Export engine implementation + tests - -**Files:** -- Create: `internal/engine/export.go` -- Create: `internal/engine/export_test.go` - -This is the largest task. The export engine creates split tar.zst archives from synced files. - -**Step 1: Write the failing test** - -Create `internal/engine/export_test.go`: - -```go -package engine - -import ( - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "io" - "log/slog" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/BadgerOps/airgap/internal/config" - "github.com/BadgerOps/airgap/internal/download" - "github.com/BadgerOps/airgap/internal/provider" - "github.com/BadgerOps/airgap/internal/store" - "github.com/klauspost/compress/zstd" -) - -// setupExportTest creates a temp data dir with fake synced files and a store with matching records. -func setupExportTest(t *testing.T) (*SyncManager, string, string) { - t.Helper() - - dataDir := t.TempDir() - outputDir := t.TempDir() - - // Create fake synced files under dataDir - files := map[string]string{ - "epel/9/Packages/foo.rpm": "fake-rpm-content-foo", - "epel/9/Packages/bar.rpm": "fake-rpm-content-bar", - "ocp_binaries/4.18/oc": "fake-oc-binary", - } - - for relPath, content := range files { - absPath := filepath.Join(dataDir, relPath) - if err := os.MkdirAll(filepath.Dir(absPath), 0o755); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(absPath, []byte(content), 0o644); err != nil { - t.Fatal(err) - } - } - - // Set up store with file records - dbPath := filepath.Join(t.TempDir(), "test.db") - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - st, err := store.New(dbPath, logger) - if err != nil { - t.Fatal(err) - } - t.Cleanup(func() { st.Close() }) - - for relPath, content := range files { - parts := strings.SplitN(relPath, "/", 2) - providerName := parts[0] - providerPath := parts[1] - h := sha256.Sum256([]byte(content)) - rec := &store.FileRecord{ - Provider: providerName, - Path: providerPath, - Size: int64(len(content)), - SHA256: hex.EncodeToString(h[:]), - LastModified: time.Now(), - LastVerified: time.Now(), - } - if err := st.UpsertFileRecord(rec); err != nil { - t.Fatal(err) - } - } - - cfg := &config.Config{ - Server: config.ServerConfig{DataDir: dataDir}, - Export: config.ExportConfig{ - SplitSize: "1GB", - Compression: "zstd", - }, - } - - registry := provider.NewRegistry() - client := download.NewClient(logger) - mgr := NewSyncManager(registry, st, client, cfg, logger) - - return mgr, dataDir, outputDir -} - -func TestExportCreatesArchivesAndManifest(t *testing.T) { - mgr, _, outputDir := setupExportTest(t) - - report, err := mgr.Export(context.Background(), ExportOptions{ - OutputDir: outputDir, - Providers: []string{"epel", "ocp_binaries"}, - SplitSize: 1024 * 1024 * 1024, // 1GB — all files fit in one archive - Compression: "zstd", - }) - if err != nil { - t.Fatalf("Export() error: %v", err) - } - - // Should have 1 archive (files are tiny) - if len(report.Archives) != 1 { - t.Fatalf("expected 1 archive, got %d", len(report.Archives)) - } - if report.TotalFiles != 3 { - t.Errorf("expected 3 total files, got %d", report.TotalFiles) - } - - // Verify archive file exists - archivePath := filepath.Join(outputDir, report.Archives[0].Name) - if _, err := os.Stat(archivePath); os.IsNotExist(err) { - t.Fatalf("archive file not found: %s", archivePath) - } - - // Verify .sha256 sidecar exists - sha256Path := archivePath + ".sha256" - if _, err := os.Stat(sha256Path); os.IsNotExist(err) { - t.Fatalf("sha256 sidecar not found: %s", sha256Path) - } - - // Verify manifest exists and is valid JSON - manifestPath := filepath.Join(outputDir, "airgap-manifest.json") - manifestData, err := os.ReadFile(manifestPath) - if err != nil { - t.Fatalf("read manifest: %v", err) - } - var manifest TransferManifest - if err := json.Unmarshal(manifestData, &manifest); err != nil { - t.Fatalf("unmarshal manifest: %v", err) - } - if manifest.Version != "1.0" { - t.Errorf("manifest version = %q, want %q", manifest.Version, "1.0") - } - if manifest.TotalArchives != 1 { - t.Errorf("manifest total_archives = %d, want 1", manifest.TotalArchives) - } - if len(manifest.FileInventory) != 3 { - t.Errorf("manifest file_inventory count = %d, want 3", len(manifest.FileInventory)) - } - - // Verify TRANSFER-README.txt exists - readmePath := filepath.Join(outputDir, "TRANSFER-README.txt") - if _, err := os.Stat(readmePath); os.IsNotExist(err) { - t.Fatal("TRANSFER-README.txt not found") - } - - // Verify manifest.json.sha256 exists - manifestSha := filepath.Join(outputDir, "airgap-manifest.json.sha256") - if _, err := os.Stat(manifestSha); os.IsNotExist(err) { - t.Fatal("manifest sha256 sidecar not found") - } -} - -func TestExportSplitsArchives(t *testing.T) { - mgr, dataDir, outputDir := setupExportTest(t) - - // Write a larger file so splitting triggers - bigContent := strings.Repeat("x", 1000) - bigPath := filepath.Join(dataDir, "epel/9/Packages/big.rpm") - if err := os.WriteFile(bigPath, []byte(bigContent), 0o644); err != nil { - t.Fatal(err) - } - h := sha256.Sum256([]byte(bigContent)) - // Access store via the manager's store field — we'll need to add a helper or - // just set the split size very small so existing files trigger splits. - - // Use a very small split size to force multiple archives - report, err := mgr.Export(context.Background(), ExportOptions{ - OutputDir: outputDir, - Providers: []string{"epel", "ocp_binaries"}, - SplitSize: 50, // 50 bytes — will force splits - Compression: "zstd", - }) - _ = h // unused but shows the pattern - if err != nil { - t.Fatalf("Export() error: %v", err) - } - - if len(report.Archives) < 2 { - t.Errorf("expected multiple archives with 50-byte split, got %d", len(report.Archives)) - } - - // Each archive should exist on disk - for _, arch := range report.Archives { - archPath := filepath.Join(outputDir, arch.Name) - if _, err := os.Stat(archPath); os.IsNotExist(err) { - t.Errorf("archive not found: %s", archPath) - } - } -} - -func TestExportRejectsNonZstdCompression(t *testing.T) { - mgr, _, outputDir := setupExportTest(t) - - _, err := mgr.Export(context.Background(), ExportOptions{ - OutputDir: outputDir, - Providers: []string{"epel"}, - SplitSize: 1024 * 1024 * 1024, - Compression: "gzip", - }) - if err == nil { - t.Fatal("expected error for gzip compression, got nil") - } - if !strings.Contains(err.Error(), "zstd") { - t.Errorf("error should mention zstd, got: %v", err) - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestExport -v` -Expected: FAIL — `ExportOptions` and `Export` method undefined - -**Step 3: Write export.go implementation** - -Create `internal/engine/export.go`: - -```go -package engine - -import ( - "archive/tar" - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" - "io" - "os" - "path/filepath" - "strings" - "time" - - "github.com/klauspost/compress/zstd" -) - -// ExportOptions configures an export operation. -type ExportOptions struct { - OutputDir string - Providers []string - SplitSize int64 - Compression string -} - -// ExportReport summarizes a completed export. -type ExportReport struct { - Archives []ArchiveInfo - TotalFiles int - TotalSize int64 - ManifestPath string - Duration time.Duration -} - -// ArchiveInfo describes one split archive. -type ArchiveInfo struct { - Name string - Size int64 - SHA256 string - Files []string -} - -// Export creates split tar.zst archives of synced content for air-gapped transfer. -func (m *SyncManager) Export(ctx context.Context, opts ExportOptions) (*ExportReport, error) { - startTime := time.Now() - - if opts.Compression != "zstd" { - return nil, fmt.Errorf("unsupported compression %q: only zstd is supported in v1", opts.Compression) - } - - if opts.SplitSize <= 0 { - return nil, fmt.Errorf("split size must be positive") - } - - if err := os.MkdirAll(opts.OutputDir, 0o755); err != nil { - return nil, fmt.Errorf("creating output directory: %w", err) - } - - // Collect files from store for requested providers - type fileEntry struct { - provider string - relPath string // relative to provider dir (from store) - absPath string // absolute on disk - size int64 - sha256 string - } - - var allFiles []fileEntry - providerSummary := make(map[string]ManifestProvider) - - for _, provName := range opts.Providers { - records, err := m.store.ListFileRecords(provName) - if err != nil { - m.logger.Warn("failed to list files for provider", "provider", provName, "error", err) - continue - } - - mp := ManifestProvider{} - for _, rec := range records { - absPath := filepath.Join(m.config.Server.DataDir, provName, rec.Path) - if _, err := os.Stat(absPath); os.IsNotExist(err) { - m.logger.Warn("file in store but not on disk, skipping", "path", absPath) - continue - } - - allFiles = append(allFiles, fileEntry{ - provider: provName, - relPath: rec.Path, - absPath: absPath, - size: rec.Size, - sha256: rec.SHA256, - }) - mp.FileCount++ - mp.TotalSize += rec.Size - } - providerSummary[provName] = mp - } - - if len(allFiles) == 0 { - return nil, fmt.Errorf("no files to export") - } - - // Create split archives - archiveNum := 1 - currentSize := int64(0) - var archives []ArchiveInfo - var currentFiles []string - - var tarWriter *tar.Writer - var zstdWriter *zstd.Encoder - var archiveFile *os.File - var archivePath string - - openArchive := func() error { - name := fmt.Sprintf("airgap-transfer-%03d.tar.zst", archiveNum) - archivePath = filepath.Join(opts.OutputDir, name) - - var err error - archiveFile, err = os.Create(archivePath) - if err != nil { - return fmt.Errorf("creating archive %s: %w", name, err) - } - zstdWriter, err = zstd.NewWriter(archiveFile) - if err != nil { - archiveFile.Close() - return fmt.Errorf("creating zstd writer: %w", err) - } - tarWriter = tar.NewWriter(zstdWriter) - currentFiles = nil - currentSize = 0 - return nil - } - - closeArchive := func() (*ArchiveInfo, error) { - if tarWriter == nil { - return nil, nil - } - if err := tarWriter.Close(); err != nil { - return nil, fmt.Errorf("closing tar writer: %w", err) - } - if err := zstdWriter.Close(); err != nil { - return nil, fmt.Errorf("closing zstd writer: %w", err) - } - if err := archiveFile.Close(); err != nil { - return nil, fmt.Errorf("closing archive file: %w", err) - } - - // Compute SHA256 of the archive - hash, size, err := hashFile(archivePath) - if err != nil { - return nil, fmt.Errorf("hashing archive: %w", err) - } - - name := filepath.Base(archivePath) - info := &ArchiveInfo{ - Name: name, - Size: size, - SHA256: hash, - Files: currentFiles, - } - - // Write .sha256 sidecar - sidecar := archivePath + ".sha256" - content := fmt.Sprintf("%s %s\n", hash, name) - if err := os.WriteFile(sidecar, []byte(content), 0o644); err != nil { - return nil, fmt.Errorf("writing sha256 sidecar: %w", err) - } - - tarWriter = nil - zstdWriter = nil - archiveFile = nil - archiveNum++ - return info, nil - } - - // Open first archive - if err := openArchive(); err != nil { - return nil, err - } - - for _, f := range allFiles { - select { - case <-ctx.Done(): - // Clean up on cancel - if tarWriter != nil { - tarWriter.Close() - zstdWriter.Close() - archiveFile.Close() - } - return nil, ctx.Err() - default: - } - - // Roll to next archive if this file would exceed split size - // (unless current archive is empty — a single large file must go somewhere) - if currentSize > 0 && currentSize+f.size > opts.SplitSize { - info, err := closeArchive() - if err != nil { - return nil, err - } - archives = append(archives, *info) - if err := openArchive(); err != nil { - return nil, err - } - } - - // Add file to tar - tarPath := filepath.Join(f.provider, f.relPath) - if err := addFileToTar(tarWriter, f.absPath, tarPath); err != nil { - return nil, fmt.Errorf("adding %s to archive: %w", tarPath, err) - } - currentFiles = append(currentFiles, tarPath) - currentSize += f.size - } - - // Close final archive - info, err := closeArchive() - if err != nil { - return nil, err - } - if info != nil { - archives = append(archives, *info) - } - - // Build manifest - hostname, _ := os.Hostname() - var fileInventory []ManifestFile - var totalSize int64 - for _, f := range allFiles { - fileInventory = append(fileInventory, ManifestFile{ - Provider: f.provider, - Path: f.relPath, - Size: f.size, - SHA256: f.sha256, - }) - totalSize += f.size - } - - manifest := &TransferManifest{ - Version: "1.0", - Created: time.Now().UTC(), - SourceHost: hostname, - Providers: providerSummary, - Archives: archivesToManifest(archives), - TotalArchives: len(archives), - TotalSize: totalSize, - FileInventory: fileInventory, - } - - // Write manifest JSON - manifestPath := filepath.Join(opts.OutputDir, "airgap-manifest.json") - manifestData, err := json.MarshalIndent(manifest, "", " ") - if err != nil { - return nil, fmt.Errorf("marshaling manifest: %w", err) - } - if err := os.WriteFile(manifestPath, manifestData, 0o644); err != nil { - return nil, fmt.Errorf("writing manifest: %w", err) - } - - // Write manifest .sha256 - manifestHash, _, err := hashFile(manifestPath) - if err != nil { - return nil, fmt.Errorf("hashing manifest: %w", err) - } - manifestSidecar := manifestPath + ".sha256" - sidecarContent := fmt.Sprintf("%s %s\n", manifestHash, "airgap-manifest.json") - if err := os.WriteFile(manifestSidecar, []byte(sidecarContent), 0o644); err != nil { - return nil, fmt.Errorf("writing manifest sha256: %w", err) - } - - // Write TRANSFER-README.txt - readmePath := filepath.Join(opts.OutputDir, "TRANSFER-README.txt") - readme := generateTransferReadme(manifest) - if err := os.WriteFile(readmePath, []byte(readme), 0o644); err != nil { - return nil, fmt.Errorf("writing TRANSFER-README.txt: %w", err) - } - - // Record in store - transfer := &store.Transfer{ - Direction: "export", - Path: opts.OutputDir, - Providers: strings.Join(opts.Providers, ","), - ArchiveCount: len(archives), - TotalSize: totalSize, - ManifestHash: manifestHash, - Status: "completed", - StartTime: startTime, - EndTime: time.Now(), - } - if err := m.store.CreateTransfer(transfer); err != nil { - m.logger.Warn("failed to record transfer in store", "error", err) - } - - duration := time.Since(startTime) - m.logger.Info("export completed", - "archives", len(archives), - "files", len(allFiles), - "total_size", totalSize, - "duration", duration, - ) - - return &ExportReport{ - Archives: archives, - TotalFiles: len(allFiles), - TotalSize: totalSize, - ManifestPath: manifestPath, - Duration: duration, - }, nil -} - -// addFileToTar adds a single file to a tar archive. -func addFileToTar(tw *tar.Writer, srcPath, tarPath string) error { - f, err := os.Open(srcPath) - if err != nil { - return err - } - defer f.Close() - - stat, err := f.Stat() - if err != nil { - return err - } - - header := &tar.Header{ - Name: tarPath, - Size: stat.Size(), - Mode: int64(stat.Mode()), - ModTime: stat.ModTime(), - } - if err := tw.WriteHeader(header); err != nil { - return err - } - if _, err := io.Copy(tw, f); err != nil { - return err - } - return nil -} - -// hashFile computes the SHA256 of a file, returning hex string and size. -func hashFile(path string) (string, int64, error) { - f, err := os.Open(path) - if err != nil { - return "", 0, err - } - defer f.Close() - - h := sha256.New() - size, err := io.Copy(h, f) - if err != nil { - return "", 0, err - } - return hex.EncodeToString(h.Sum(nil)), size, nil -} - -// archivesToManifest converts ArchiveInfo slice to ManifestArchive slice. -func archivesToManifest(archives []ArchiveInfo) []ManifestArchive { - result := make([]ManifestArchive, len(archives)) - for i, a := range archives { - result[i] = ManifestArchive{ - Name: a.Name, - Size: a.Size, - SHA256: a.SHA256, - Files: a.Files, - } - } - return result -} - -func formatSizeReadme(bytes int64) string { - const ( - gb = 1024 * 1024 * 1024 - mb = 1024 * 1024 - ) - if bytes >= gb { - return fmt.Sprintf("%.1f GB", float64(bytes)/float64(gb)) - } - return fmt.Sprintf("%.1f MB", float64(bytes)/float64(mb)) -} - -// generateTransferReadme creates the human-readable README for transfer media. -func generateTransferReadme(m *TransferManifest) string { - var b strings.Builder - b.WriteString("AIRGAP TRANSFER PACKAGE\n") - b.WriteString("=======================\n") - b.WriteString(fmt.Sprintf("Created: %s\n", m.Created.Format("2006-01-02 15:04 UTC"))) - b.WriteString(fmt.Sprintf("Source: %s\n", m.SourceHost)) - b.WriteString(fmt.Sprintf("Archives: %d parts\n", m.TotalArchives)) - b.WriteString(fmt.Sprintf("Total size: %s\n", formatSizeReadme(m.TotalSize))) - b.WriteString(fmt.Sprintf("Files: %d\n", len(m.FileInventory))) - b.WriteString("\nProviders included:\n") - for name, p := range m.Providers { - b.WriteString(fmt.Sprintf(" - %s (%d files, %s)\n", name, p.FileCount, formatSizeReadme(p.TotalSize))) - } - b.WriteString("\nTO IMPORT:\n") - b.WriteString("1. Mount this disk on the disconnected machine\n") - b.WriteString("2. Run: airgap import --from /mnt/usb\n") - b.WriteString("3. The tool will validate all archives before extracting\n") - b.WriteString("\nIF AN ARCHIVE IS CORRUPT:\n") - b.WriteString("- The import tool will tell you which archive(s) failed\n") - b.WriteString("- Re-copy only the failed archive from the source machine\n") - b.WriteString("- Re-run: airgap import --from /mnt/usb\n") - return b.String() -} -``` - -**Step 4: Run tests to verify they pass** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestExport -v -timeout 60s` -Expected: All 3 tests PASS - ---- - -### Task 6: Import engine implementation + tests - -**Files:** -- Create: `internal/engine/import.go` -- Add to: `internal/engine/export_test.go` (import tests go here for round-trip testing) - -**Step 1: Write the failing tests** - -Add to `internal/engine/export_test.go`: - -```go -func TestImportRoundTrip(t *testing.T) { - mgr, _, outputDir := setupExportTest(t) - - // Export first - _, err := mgr.Export(context.Background(), ExportOptions{ - OutputDir: outputDir, - Providers: []string{"epel", "ocp_binaries"}, - SplitSize: 1024 * 1024 * 1024, - Compression: "zstd", - }) - if err != nil { - t.Fatalf("Export() error: %v", err) - } - - // Create a fresh data dir for import target - importDataDir := t.TempDir() - mgr.config.Server.DataDir = importDataDir - - // Import - report, err := mgr.Import(context.Background(), ImportOptions{ - SourceDir: outputDir, - VerifyOnly: false, - Force: false, - }) - if err != nil { - t.Fatalf("Import() error: %v", err) - } - - if report.ArchivesFailed != 0 { - t.Errorf("expected 0 failed archives, got %d", report.ArchivesFailed) - } - if report.FilesExtracted != 3 { - t.Errorf("expected 3 files extracted, got %d", report.FilesExtracted) - } - - // Verify files exist in the new data dir - expectedFiles := []string{ - "epel/9/Packages/foo.rpm", - "epel/9/Packages/bar.rpm", - "ocp_binaries/4.18/oc", - } - for _, f := range expectedFiles { - p := filepath.Join(importDataDir, f) - if _, err := os.Stat(p); os.IsNotExist(err) { - t.Errorf("expected file not found after import: %s", f) - } - } -} - -func TestImportVerifyOnly(t *testing.T) { - mgr, _, outputDir := setupExportTest(t) - - // Export - _, err := mgr.Export(context.Background(), ExportOptions{ - OutputDir: outputDir, - Providers: []string{"epel"}, - SplitSize: 1024 * 1024 * 1024, - Compression: "zstd", - }) - if err != nil { - t.Fatalf("Export() error: %v", err) - } - - importDataDir := t.TempDir() - mgr.config.Server.DataDir = importDataDir - - report, err := mgr.Import(context.Background(), ImportOptions{ - SourceDir: outputDir, - VerifyOnly: true, - }) - if err != nil { - t.Fatalf("Import() verify-only error: %v", err) - } - - if report.ArchivesValidated != 1 { - t.Errorf("expected 1 archive validated, got %d", report.ArchivesValidated) - } - if report.FilesExtracted != 0 { - t.Errorf("verify-only should extract 0 files, got %d", report.FilesExtracted) - } -} - -func TestImportDetectsCorruptArchive(t *testing.T) { - mgr, _, outputDir := setupExportTest(t) - - // Export - _, err := mgr.Export(context.Background(), ExportOptions{ - OutputDir: outputDir, - Providers: []string{"epel"}, - SplitSize: 1024 * 1024 * 1024, - Compression: "zstd", - }) - if err != nil { - t.Fatalf("Export() error: %v", err) - } - - // Corrupt the archive - archivePath := filepath.Join(outputDir, "airgap-transfer-001.tar.zst") - if err := os.WriteFile(archivePath, []byte("corrupted"), 0o644); err != nil { - t.Fatal(err) - } - - importDataDir := t.TempDir() - mgr.config.Server.DataDir = importDataDir - - report, err := mgr.Import(context.Background(), ImportOptions{ - SourceDir: outputDir, - }) - // Import should return an error or report with failures - if err == nil && report.ArchivesFailed == 0 { - t.Fatal("expected failure for corrupted archive") - } -} - -func TestImportMissingManifest(t *testing.T) { - mgr, _, _ := setupExportTest(t) - - emptyDir := t.TempDir() - _, err := mgr.Import(context.Background(), ImportOptions{ - SourceDir: emptyDir, - }) - if err == nil { - t.Fatal("expected error for missing manifest") - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestImport -v` -Expected: FAIL — `Import` method and `ImportOptions` undefined - -**Step 3: Write import.go implementation** - -Create `internal/engine/import.go`: - -```go -package engine - -import ( - "archive/tar" - "context" - "encoding/json" - "fmt" - "io" - "os" - "path/filepath" - "strings" - "time" - - "github.com/BadgerOps/airgap/internal/store" - "github.com/klauspost/compress/zstd" -) - -// ImportOptions configures an import operation. -type ImportOptions struct { - SourceDir string - VerifyOnly bool - Force bool -} - -// ImportReport summarizes a completed import. -type ImportReport struct { - ArchivesValidated int - ArchivesFailed int - FilesExtracted int - TotalSize int64 - Duration time.Duration - Errors []string -} - -// Import reads an airgap transfer package and extracts its contents. -func (m *SyncManager) Import(ctx context.Context, opts ImportOptions) (*ImportReport, error) { - startTime := time.Now() - - // Read manifest - manifestPath := filepath.Join(opts.SourceDir, "airgap-manifest.json") - manifestData, err := os.ReadFile(manifestPath) - if err != nil { - return nil, fmt.Errorf("reading manifest: %w", err) - } - - var manifest TransferManifest - if err := json.Unmarshal(manifestData, &manifest); err != nil { - return nil, fmt.Errorf("parsing manifest: %w", err) - } - - m.logger.Info("import starting", - "source", opts.SourceDir, - "archives", manifest.TotalArchives, - "files", len(manifest.FileInventory), - ) - - // Verify all archive files are present - for _, arch := range manifest.Archives { - archPath := filepath.Join(opts.SourceDir, arch.Name) - if _, err := os.Stat(archPath); os.IsNotExist(err) { - return nil, fmt.Errorf("archive not found: %s", arch.Name) - } - } - - // Create a transfer record - transfer := &store.Transfer{ - Direction: "import", - Path: opts.SourceDir, - Status: "running", - StartTime: startTime, - } - if err := m.store.CreateTransfer(transfer); err != nil { - m.logger.Warn("failed to record transfer", "error", err) - } - - report := &ImportReport{} - - // Validate archives - for _, arch := range manifest.Archives { - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - archPath := filepath.Join(opts.SourceDir, arch.Name) - - if !opts.Force { - m.logger.Info("validating archive", "name", arch.Name) - actualHash, _, err := hashFile(archPath) - if err != nil { - report.ArchivesFailed++ - report.Errors = append(report.Errors, fmt.Sprintf("hashing %s: %v", arch.Name, err)) - continue - } - - if actualHash != arch.SHA256 { - report.ArchivesFailed++ - report.Errors = append(report.Errors, - fmt.Sprintf("%s: expected sha256 %s, got %s", arch.Name, arch.SHA256, actualHash)) - continue - } - - m.logger.Info("archive validated", "name", arch.Name) - } - - report.ArchivesValidated++ - - // Record archive validation in store - if transfer.ID != 0 { - ta := &store.TransferArchive{ - TransferID: transfer.ID, - ArchiveName: arch.Name, - SHA256: arch.SHA256, - Size: arch.Size, - Validated: true, - ValidatedAt: time.Now(), - } - if err := m.store.CreateTransferArchive(ta); err != nil { - m.logger.Warn("failed to record archive validation", "error", err) - } - } - } - - // If any archives failed, stop - if report.ArchivesFailed > 0 { - report.Duration = time.Since(startTime) - if transfer.ID != 0 { - transfer.Status = "failed" - transfer.ErrorMessage = fmt.Sprintf("%d archive(s) failed validation", report.ArchivesFailed) - transfer.EndTime = time.Now() - _ = m.store.UpdateTransfer(transfer) - } - return report, fmt.Errorf("%d archive(s) failed validation", report.ArchivesFailed) - } - - // If verify-only, stop here - if opts.VerifyOnly { - report.Duration = time.Since(startTime) - if transfer.ID != 0 { - transfer.Status = "completed" - transfer.ArchiveCount = report.ArchivesValidated - transfer.EndTime = time.Now() - _ = m.store.UpdateTransfer(transfer) - } - m.logger.Info("verify-only complete", "validated", report.ArchivesValidated) - return report, nil - } - - // Extract archives - for _, arch := range manifest.Archives { - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - archPath := filepath.Join(opts.SourceDir, arch.Name) - m.logger.Info("extracting archive", "name", arch.Name) - - extracted, size, err := m.extractArchive(archPath) - if err != nil { - report.Errors = append(report.Errors, fmt.Sprintf("extracting %s: %v", arch.Name, err)) - continue - } - - report.FilesExtracted += extracted - report.TotalSize += size - } - - // Upsert file records from manifest inventory - for _, f := range manifest.FileInventory { - absPath := filepath.Join(m.config.Server.DataDir, f.Provider, f.Path) - if _, err := os.Stat(absPath); os.IsNotExist(err) { - continue // file wasn't extracted (maybe from a failed archive) - } - - rec := &store.FileRecord{ - Provider: f.Provider, - Path: f.Path, - Size: f.Size, - SHA256: f.SHA256, - LastModified: time.Now(), - LastVerified: time.Now(), - } - if err := m.store.UpsertFileRecord(rec); err != nil { - m.logger.Warn("failed to upsert file record", "path", f.Path, "error", err) - } - } - - report.Duration = time.Since(startTime) - - // Update transfer record - if transfer.ID != 0 { - transfer.Status = "completed" - transfer.ArchiveCount = report.ArchivesValidated - transfer.TotalSize = report.TotalSize - transfer.EndTime = time.Now() - _ = m.store.UpdateTransfer(transfer) - } - - m.logger.Info("import completed", - "files_extracted", report.FilesExtracted, - "total_size", report.TotalSize, - "duration", report.Duration, - ) - - return report, nil -} - -// extractArchive decompresses and untars an archive into the data directory. -// Returns files extracted count and total bytes. -func (m *SyncManager) extractArchive(archivePath string) (int, int64, error) { - f, err := os.Open(archivePath) - if err != nil { - return 0, 0, fmt.Errorf("opening archive: %w", err) - } - defer f.Close() - - zr, err := zstd.NewReader(f) - if err != nil { - return 0, 0, fmt.Errorf("creating zstd reader: %w", err) - } - defer zr.Close() - - tr := tar.NewReader(zr) - - extracted := 0 - totalSize := int64(0) - - for { - header, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - return extracted, totalSize, fmt.Errorf("reading tar entry: %w", err) - } - - // Skip directories - if header.Typeflag == tar.TypeDir { - continue - } - - // Sanitize path to prevent directory traversal - cleanPath := filepath.Clean(header.Name) - if strings.HasPrefix(cleanPath, "..") || filepath.IsAbs(cleanPath) { - return extracted, totalSize, fmt.Errorf("unsafe path in archive: %s", header.Name) - } - - destPath := filepath.Join(m.config.Server.DataDir, cleanPath) - - if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil { - return extracted, totalSize, fmt.Errorf("creating directory: %w", err) - } - - outFile, err := os.Create(destPath) - if err != nil { - return extracted, totalSize, fmt.Errorf("creating file %s: %w", destPath, err) - } - - n, err := io.Copy(outFile, tr) - outFile.Close() - if err != nil { - return extracted, totalSize, fmt.Errorf("extracting %s: %w", header.Name, err) - } - - extracted++ - totalSize += n - } - - return extracted, totalSize, nil -} -``` - -**Step 4: Run tests to verify they pass** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestImport -v -timeout 60s` -Expected: All 4 tests PASS - ---- - -### Task 7: Wire CLI commands to engine - -**Files:** -- Modify: `cmd/airgap/export.go` -- Modify: `cmd/airgap/importcmd.go` - -**Step 1: Update export.go CLI to call engine** - -Replace the `exportRun` function in `cmd/airgap/export.go` to call `globalEngine.Export()`: - -```go -func exportRun(cmd *cobra.Command, args []string) error { - log := slog.Default() - - if globalEngine == nil { - return fmt.Errorf("engine not initialized") - } - - var providers []string - if exportProvider != "" { - providers = strings.Split(exportProvider, ",") - for i, p := range providers { - providers[i] = strings.TrimSpace(p) - } - } else { - for name := range globalCfg.Providers { - if globalCfg.ProviderEnabled(name) { - providers = append(providers, name) - } - } - } - - if len(providers) == 0 { - log.Warn("no providers to export") - return nil - } - - splitSize, err := engine.ParseSize(exportSplitSize) - if err != nil { - return fmt.Errorf("invalid split size %q: %w", exportSplitSize, err) - } - - fmt.Printf("Exporting to %s...\n", exportTo) - fmt.Printf(" Providers: %v\n", providers) - fmt.Printf(" Split size: %s\n", exportSplitSize) - fmt.Printf(" Compression: %s\n", exportCompression) - fmt.Println() - - report, err := globalEngine.Export(cmd.Context(), engine.ExportOptions{ - OutputDir: exportTo, - Providers: providers, - SplitSize: splitSize, - Compression: exportCompression, - }) - if err != nil { - return fmt.Errorf("export failed: %w", err) - } - - fmt.Printf("Export complete:\n") - fmt.Printf(" Archives: %d\n", len(report.Archives)) - fmt.Printf(" Files: %d\n", report.TotalFiles) - fmt.Printf(" Total size: %s\n", formatBytes(report.TotalSize)) - fmt.Printf(" Duration: %s\n", report.Duration.Round(time.Second)) - fmt.Printf(" Manifest: %s\n", report.ManifestPath) - - for _, arch := range report.Archives { - fmt.Printf(" - %s (%s)\n", arch.Name, formatBytes(arch.Size)) - } - - return nil -} -``` - -Note: add `"time"` and `"github.com/BadgerOps/airgap/internal/engine"` to the imports. - -**Step 2: Update importcmd.go CLI to call engine** - -Replace the `importRun` function in `cmd/airgap/importcmd.go`: - -```go -func importRun(cmd *cobra.Command, args []string) error { - if globalEngine == nil { - return fmt.Errorf("engine not initialized") - } - - fmt.Printf("Importing from %s...\n", importFrom) - if importVerifyOnly { - fmt.Println(" Mode: verify only") - } - if importForce { - fmt.Println(" Mode: force (skip checksum verification)") - } - fmt.Println() - - report, err := globalEngine.Import(cmd.Context(), engine.ImportOptions{ - SourceDir: importFrom, - VerifyOnly: importVerifyOnly, - Force: importForce, - }) - if err != nil { - // Still print partial report if available - if report != nil { - printImportReport(report) - } - return fmt.Errorf("import failed: %w", err) - } - - printImportReport(report) - return nil -} - -func printImportReport(report *engine.ImportReport) { - fmt.Printf("Import results:\n") - fmt.Printf(" Archives validated: %d\n", report.ArchivesValidated) - fmt.Printf(" Archives failed: %d\n", report.ArchivesFailed) - fmt.Printf(" Files extracted: %d\n", report.FilesExtracted) - fmt.Printf(" Total size: %s\n", formatBytes(report.TotalSize)) - fmt.Printf(" Duration: %s\n", report.Duration.Round(time.Second)) - if len(report.Errors) > 0 { - fmt.Println(" Errors:") - for _, e := range report.Errors { - fmt.Printf(" - %s\n", e) - } - } -} -``` - -Note: add `"time"` and `"github.com/BadgerOps/airgap/internal/engine"` to the imports. The `formatBytes` function already exists in `cmd/airgap/status.go`. - -**Step 3: Verify compilation** - -Run: `cd /Users/badger/code/ocp-offline && go build ./cmd/airgap/` -Expected: Success, no errors - ---- - -### Task 8: Run all tests - -**Step 1: Run the full test suite** - -Run: `cd /Users/badger/code/ocp-offline && go test ./... -timeout 120s` -Expected: All tests PASS - -**Step 2: Run with race detector** - -Run: `cd /Users/badger/code/ocp-offline && go test -race ./internal/engine/ -timeout 120s` -Expected: No race conditions detected - ---- - -### Task 9: Integration test — full round-trip - -**Files:** -- The round-trip test is already in Task 6 (`TestImportRoundTrip`). This task verifies it end-to-end. - -**Step 1: Run the round-trip test in verbose mode** - -Run: `cd /Users/badger/code/ocp-offline && go test ./internal/engine/ -run TestImportRoundTrip -v -timeout 60s` -Expected: PASS — sync → export → import → files verified - -**Step 2: Build binary and test CLI help output** - -Run: `cd /Users/badger/code/ocp-offline && go build -o bin/airgap ./cmd/airgap/ && ./bin/airgap export --help && ./bin/airgap import --help` -Expected: Help text shows flags and examples for both commands diff --git a/docs/plans/2026-02-20-mirror-discovery-design.md b/docs/plans/2026-02-20-mirror-discovery-design.md deleted file mode 100644 index b07dced..0000000 --- a/docs/plans/2026-02-20-mirror-discovery-design.md +++ /dev/null @@ -1,94 +0,0 @@ -# Mirror Auto-Discovery and Speed Test Design - -## Goal - -Make provider configuration easier by auto-discovering upstream mirrors (EPEL) and available versions (OCP/RHCOS), letting users pick from dropdowns, and ranking mirrors by speed. - -## Architecture - -A new `internal/mirror` package handles all upstream discovery with in-memory caching (1-hour TTL). The server exposes four new API endpoints that the providers UI calls on user action (button clicks, not page load). Speed tests run server-side since the browser may not have internet access in airgap-adjacent setups. - -## API Endpoints - -| Endpoint | Method | Purpose | -|----------|--------|---------| -| `/api/mirrors/epel/versions` | GET | Returns available EPEL versions and architectures | -| `/api/mirrors/epel?version=9&arch=x86_64` | GET | Returns EPEL mirrors from Fedora metalink, sorted by preference | -| `/api/mirrors/ocp/versions` | GET | Scrapes mirror.openshift.com for OCP versions (grouped by channel) and RHCOS versions | -| `/api/mirrors/speedtest` | POST | Runs latency + download test against given mirror URLs, returns ranked results | - -## `internal/mirror` Package - -### MirrorDiscovery struct - -Main service. Holds HTTP client, in-memory cache (map + RWMutex), and logger. - -### EPEL Discovery - -- Fetches `https://mirrors.fedoraproject.org/metalink?repo=epel-{version}&arch={arch}` -- Parses Metalink 3.0 XML to extract: mirror URL, country code, protocol, preference score (1-100) -- Known versions: 7, 8, 9, 10. Known architectures: x86_64, aarch64, ppc64le, s390x. -- Returns `[]MirrorInfo` - -### OCP/RHCOS Version Discovery - -- Fetches HTML directory listing from `mirror.openshift.com/pub/openshift-v4/clients/ocp/` -- Parses `
` tags, categorizes into: specific versions (4.17.48), channels (stable-4.17, fast-4.17, candidate-4.17) -- RHCOS: same approach at `/dependencies/rhcos/` - extracts minor versions and builds -- Returns structured version lists - -### Speed Test - -- Phase 1: HTTP HEAD to each mirror URL, measure latency. Run concurrently (10 goroutines max). -- Phase 2: Download a small file (repomd.xml for EPEL, sha256sum.txt for OCP) from top N fastest by latency. -- Returns `[]SpeedResult` sorted by throughput. -- Context-aware, 5-second timeout per mirror. - -## Data Structures - -``` -MirrorInfo: URL, Country, Protocol, Preference (int) -SpeedResult: URL, LatencyMs (int), ThroughputKBps (float64), Error (string) -OCPVersion: Version (string), Channel (string: "stable"/"fast"/"candidate"/"release") -RHCOSVersion: Minor (string, e.g. "4.17"), Builds ([]string) -``` - -## Caching - -In-memory map with TTL (1 hour). Key is request params (e.g., `epel:9:x86_64`). No persistence - discovery data is ephemeral. Protected by `sync.RWMutex`. - -## Error Handling - -Discovery failures return partial results with error messages. Speed test failures per-mirror are recorded in `SpeedResult.Error` rather than failing the whole batch. UI shows which mirrors errored. - -## UI Changes - -### EPEL Provider Form - -- Version dropdown: EPEL 7, 8, 9, 10 -- Architecture dropdown: x86_64, aarch64, ppc64le, s390x -- "Discover Mirrors" button fetches mirror list -- Mirror table: URL, Country, Preference. Selectable rows. -- "Test Speed" button runs speed test, adds Latency/Throughput columns, re-sorts -- Selecting a mirror auto-fills `base_url` -- Manual URL entry still supported - -### OCP Binaries Provider Form - -- "Load Versions" button fetches available versions -- Versions grouped by channel (stable, fast, candidate) and specific releases -- Multi-select checkboxes for versions to sync -- Base URL pre-filled with `https://mirror.openshift.com/pub/openshift-v4/clients/ocp`, editable for custom mirrors - -### RHCOS Provider Form - -- Same pattern as OCP: "Load Versions" button, version selection -- Base URL pre-filled with `https://mirror.openshift.com/pub/openshift-v4/dependencies/rhcos`, editable - -### UX Principle - -All discovery calls are explicit (user clicks button), never automatic on page load. - -## What Is NOT Persisted - -Mirror lists and speed test results are transient. Only the user's final choice (base_url, selected versions) gets persisted in the existing `provider_configs` table. diff --git a/docs/plans/2026-02-20-mirror-discovery-plan.md b/docs/plans/2026-02-20-mirror-discovery-plan.md deleted file mode 100644 index 684642f..0000000 --- a/docs/plans/2026-02-20-mirror-discovery-plan.md +++ /dev/null @@ -1,1859 +0,0 @@ -# Mirror Auto-Discovery Implementation Plan - -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Auto-discover upstream EPEL mirrors and OCP/RHCOS versions, let users pick from dropdowns, and rank mirrors by speed test. - -**Architecture:** New `internal/mirror` package with `MirrorDiscovery` service that fetches Fedora metalink XML (EPEL mirrors) and scrapes HTML directory listings (OCP/RHCOS versions). In-memory cache with 1-hour TTL. Four new API endpoints. Enhanced providers.html UI with Alpine.js. - -**Tech Stack:** Go stdlib (`encoding/xml`, `net/http`, `sync`, `html`, `strings`), Alpine.js for UI. - ---- - -### Task 1: MirrorInfo types and EPEL metalink parser - -**Files:** -- Create: `internal/mirror/types.go` -- Create: `internal/mirror/epel.go` -- Create: `internal/mirror/epel_test.go` - -**Step 1: Create types** - -Create `internal/mirror/types.go`: - -```go -package mirror - -// MirrorInfo represents a single upstream mirror. -type MirrorInfo struct { - URL string `json:"url"` - Country string `json:"country"` - Protocol string `json:"protocol"` - Preference int `json:"preference"` -} - -// SpeedResult represents a speed test result for a single mirror. -type SpeedResult struct { - URL string `json:"url"` - LatencyMs int `json:"latency_ms"` - ThroughputKBps float64 `json:"throughput_kbps"` - Error string `json:"error,omitempty"` -} - -// OCPVersion represents an available OCP release version. -type OCPVersion struct { - Version string `json:"version"` - Channel string `json:"channel"` // "stable", "fast", "candidate", "release" -} - -// RHCOSVersion represents an available RHCOS minor version with its builds. -type RHCOSVersion struct { - Minor string `json:"minor"` - Builds []string `json:"builds"` -} - -// EPELVersionInfo describes a known EPEL version. -type EPELVersionInfo struct { - Version int `json:"version"` - Architectures []string `json:"architectures"` -} -``` - -**Step 2: Write the metalink parser test** - -Create `internal/mirror/epel_test.go`: - -```go -package mirror - -import ( - "testing" -) - -const testMetalinkXML = ` - - - - - https://mirror1.example.com/epel/9/Everything/x86_64/repodata/repomd.xml - https://mirror2.example.de/epel/9/Everything/x86_64/repodata/repomd.xml - http://mirror3.example.jp/epel/9/Everything/x86_64/repodata/repomd.xml - - - -` - -func TestParseMetalink(t *testing.T) { - mirrors, err := parseMetalink([]byte(testMetalinkXML)) - if err != nil { - t.Fatalf("parseMetalink failed: %v", err) - } - - if len(mirrors) != 3 { - t.Fatalf("expected 3 mirrors, got %d", len(mirrors)) - } - - // Should be sorted by preference descending - if mirrors[0].Preference != 100 { - t.Errorf("expected first mirror preference 100, got %d", mirrors[0].Preference) - } - if mirrors[0].Country != "US" { - t.Errorf("expected first mirror country US, got %s", mirrors[0].Country) - } - if mirrors[0].Protocol != "https" { - t.Errorf("expected first mirror protocol https, got %s", mirrors[0].Protocol) - } - - // URL should be trimmed to base URL (strip /repodata/repomd.xml suffix) - expected := "https://mirror1.example.com/epel/9/Everything/x86_64" - if mirrors[0].URL != expected { - t.Errorf("expected URL %q, got %q", expected, mirrors[0].URL) - } - - if mirrors[2].Protocol != "http" { - t.Errorf("expected third mirror protocol http, got %s", mirrors[2].Protocol) - } -} - -func TestParseMetalinkEmpty(t *testing.T) { - mirrors, err := parseMetalink([]byte(``)) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(mirrors) != 0 { - t.Errorf("expected 0 mirrors, got %d", len(mirrors)) - } -} - -func TestParseMetalinkInvalid(t *testing.T) { - _, err := parseMetalink([]byte("not xml")) - if err == nil { - t.Error("expected error for invalid XML") - } -} -``` - -**Step 3: Run test to verify it fails** - -Run: `go test ./internal/mirror/ -run TestParseMetalink -v` -Expected: FAIL (package doesn't exist yet) - -**Step 4: Implement metalink parser** - -Create `internal/mirror/epel.go`: - -```go -package mirror - -import ( - "encoding/xml" - "fmt" - "sort" - "strings" -) - -// Metalink XML structures -type metalinkXML struct { - XMLName xml.Name `xml:"metalink"` - Files metalinkFiles `xml:"files"` -} - -type metalinkFiles struct { - File []metalinkFile `xml:"file"` -} - -type metalinkFile struct { - Name string `xml:"name,attr"` - Resources metalinkResources `xml:"resources"` -} - -type metalinkResources struct { - URLs []metalinkURL `xml:"url"` -} - -type metalinkURL struct { - Protocol string `xml:"protocol,attr"` - Type string `xml:"type,attr"` - Location string `xml:"location,attr"` - Preference int `xml:"preference,attr"` - URL string `xml:",chardata"` -} - -// Known EPEL versions and architectures -var ( - EPELVersions = []int{7, 8, 9, 10} - EPELArchitectures = []string{"x86_64", "aarch64", "ppc64le", "s390x"} -) - -// parseMetalink parses Fedora metalink XML and returns a list of mirrors -// sorted by preference (highest first). Mirror URLs are trimmed to the -// repo base URL (the /repodata/repomd.xml suffix is removed). -func parseMetalink(data []byte) ([]MirrorInfo, error) { - var ml metalinkXML - if err := xml.Unmarshal(data, &ml); err != nil { - return nil, fmt.Errorf("parsing metalink XML: %w", err) - } - - var mirrors []MirrorInfo - for _, f := range ml.Files.File { - for _, u := range f.Resources.URLs { - url := strings.TrimSpace(u.URL) - // Strip /repodata/repomd.xml suffix to get base repo URL - if idx := strings.Index(url, "/repodata/"); idx != -1 { - url = url[:idx] - } - - mirrors = append(mirrors, MirrorInfo{ - URL: url, - Country: u.Location, - Protocol: u.Protocol, - Preference: u.Preference, - }) - } - } - - sort.Slice(mirrors, func(i, j int) bool { - return mirrors[i].Preference > mirrors[j].Preference - }) - - return mirrors, nil -} -``` - -**Step 5: Run tests to verify they pass** - -Run: `go test ./internal/mirror/ -run TestParseMetalink -v` -Expected: PASS (all 3 tests) - -**Step 6: Commit** - -```bash -git add internal/mirror/ -git commit -m "feat: add mirror types and EPEL metalink parser" -``` - ---- - -### Task 2: OCP/RHCOS HTML directory parser - -**Files:** -- Create: `internal/mirror/ocp.go` -- Create: `internal/mirror/ocp_test.go` - -**Step 1: Write the directory parser test** - -Create `internal/mirror/ocp_test.go`: - -```go -package mirror - -import ( - "testing" -) - -const testOCPDirHTML = ` -4.14.41/ -4.17.48/ -4.18.3/ -stable-4.17/ -fast-4.17/ -candidate-4.18/ -latest-4.17/ -latest/ -stable/ -4.18.0-rc.2/ -4.12.0-ec.1/ -` - -func TestParseOCPVersions(t *testing.T) { - versions := parseOCPDirectoryListing([]byte(testOCPDirHTML)) - - channels := map[string]int{} - for _, v := range versions { - channels[v.Channel]++ - } - - if channels["stable"] != 1 { - t.Errorf("expected 1 stable channel entry, got %d", channels["stable"]) - } - if channels["fast"] != 1 { - t.Errorf("expected 1 fast channel entry, got %d", channels["fast"]) - } - if channels["candidate"] != 1 { - t.Errorf("expected 1 candidate channel entry, got %d", channels["candidate"]) - } - // 4.14.41, 4.17.48, 4.18.3 = specific releases; rc/ec excluded - if channels["release"] < 3 { - t.Errorf("expected at least 3 release entries, got %d", channels["release"]) - } -} - -const testRHCOSDirHTML = ` -4.14/ -4.17/ -4.18/ -latest/ -pre-release/ -` - -const testRHCOSBuildsHTML = ` -4.17.0/ -4.17.1/ -4.17.42/ -latest/ -` - -func TestParseRHCOSMinorVersions(t *testing.T) { - minors := parseRHCOSMinorVersions([]byte(testRHCOSDirHTML)) - - if len(minors) != 3 { - t.Fatalf("expected 3 minor versions, got %d: %v", len(minors), minors) - } - if minors[0] != "4.14" { - t.Errorf("expected first version 4.14, got %s", minors[0]) - } -} - -func TestParseRHCOSBuilds(t *testing.T) { - builds := parseRHCOSBuilds([]byte(testRHCOSBuildsHTML)) - - if len(builds) != 3 { - t.Fatalf("expected 3 builds (excluding 'latest'), got %d: %v", len(builds), builds) - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `go test ./internal/mirror/ -run TestParseOCP -v` -Expected: FAIL - -**Step 3: Implement parsers** - -Create `internal/mirror/ocp.go`: - -```go -package mirror - -import ( - "regexp" - "sort" - "strings" -) - -var ( - // Matches version directories like "4.17.48/" - versionRegex = regexp.MustCompile(`^(\d+\.\d+\.\d+)/?$`) - // Matches channel directories like "stable-4.17/" - channelRegex = regexp.MustCompile(`^(stable|fast|candidate|latest)-(\d+\.\d+)/?$`) - // Matches RHCOS minor version directories like "4.17/" - rhcosMinorRegex = regexp.MustCompile(`^(\d+\.\d+)/?$`) - // Matches href attributes in HTML anchor tags - hrefRegex = regexp.MustCompile(`href="([^"]+)"`) -) - -// Default upstream URLs -const ( - DefaultOCPBaseURL = "https://mirror.openshift.com/pub/openshift-v4/clients/ocp" - DefaultRHCOSBaseURL = "https://mirror.openshift.com/pub/openshift-v4/dependencies/rhcos" -) - -// extractHrefs pulls href values from HTML anchor tags. -func extractHrefs(data []byte) []string { - matches := hrefRegex.FindAllSubmatch(data, -1) - hrefs := make([]string, 0, len(matches)) - for _, m := range matches { - href := string(m[1]) - // Skip parent directory and non-directory links - if href == "../" || !strings.HasSuffix(href, "/") { - continue - } - hrefs = append(hrefs, strings.TrimSuffix(href, "/")) - } - return hrefs -} - -// parseOCPDirectoryListing parses an HTML directory listing from -// mirror.openshift.com and categorizes versions into channels and releases. -func parseOCPDirectoryListing(data []byte) []OCPVersion { - hrefs := extractHrefs(data) - var versions []OCPVersion - - for _, href := range hrefs { - // Check for channel pattern: stable-4.17, fast-4.17, candidate-4.18 - if m := channelRegex.FindStringSubmatch(href); m != nil { - channel := m[1] - if channel == "latest" { - continue // skip "latest-X.Y", redundant with stable - } - versions = append(versions, OCPVersion{ - Version: href, - Channel: channel, - }) - continue - } - - // Check for specific version: 4.17.48 - if m := versionRegex.FindStringSubmatch(href); m != nil { - ver := m[1] - // Skip RC and EC builds - if strings.Contains(href, "-rc.") || strings.Contains(href, "-ec.") { - continue - } - versions = append(versions, OCPVersion{ - Version: ver, - Channel: "release", - }) - } - } - - sort.Slice(versions, func(i, j int) bool { - return versions[i].Version > versions[j].Version - }) - - return versions -} - -// parseRHCOSMinorVersions extracts minor version numbers (e.g., "4.17") -// from the top-level RHCOS directory listing. -func parseRHCOSMinorVersions(data []byte) []string { - hrefs := extractHrefs(data) - var minors []string - - for _, href := range hrefs { - if m := rhcosMinorRegex.FindStringSubmatch(href); m != nil { - minors = append(minors, m[1]) - } - } - - sort.Strings(minors) - return minors -} - -// parseRHCOSBuilds extracts build version numbers from a RHCOS -// minor version directory listing. Skips "latest" and other non-version entries. -func parseRHCOSBuilds(data []byte) []string { - hrefs := extractHrefs(data) - var builds []string - - for _, href := range hrefs { - if versionRegex.MatchString(href) { - builds = append(builds, strings.TrimSuffix(href, "/")) - } - } - - sort.Strings(builds) - return builds -} -``` - -**Step 4: Run tests to verify they pass** - -Run: `go test ./internal/mirror/ -v` -Expected: PASS (all tests) - -**Step 5: Commit** - -```bash -git add internal/mirror/ocp.go internal/mirror/ocp_test.go -git commit -m "feat: add OCP/RHCOS HTML directory listing parsers" -``` - ---- - -### Task 3: MirrorDiscovery service with caching - -**Files:** -- Create: `internal/mirror/discovery.go` -- Create: `internal/mirror/discovery_test.go` - -**Step 1: Write the discovery service test** - -Create `internal/mirror/discovery_test.go`: - -```go -package mirror - -import ( - "context" - "log/slog" - "io" - "net/http" - "net/http/httptest" - "testing" - "time" -) - -func TestDiscoveryEPELMirrors(t *testing.T) { - // Serve test metalink XML - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/xml") - w.Write([]byte(testMetalinkXML)) - })) - defer srv.Close() - - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - d := NewDiscovery(logger) - d.metalinkBaseURL = srv.URL + "/?repo=%s&arch=%s" - - mirrors, err := d.EPELMirrors(context.Background(), 9, "x86_64") - if err != nil { - t.Fatalf("EPELMirrors failed: %v", err) - } - if len(mirrors) != 3 { - t.Fatalf("expected 3 mirrors, got %d", len(mirrors)) - } -} - -func TestDiscoveryCaching(t *testing.T) { - callCount := 0 - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - callCount++ - w.Write([]byte(testMetalinkXML)) - })) - defer srv.Close() - - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - d := NewDiscovery(logger) - d.metalinkBaseURL = srv.URL + "/?repo=%s&arch=%s" - - ctx := context.Background() - d.EPELMirrors(ctx, 9, "x86_64") - d.EPELMirrors(ctx, 9, "x86_64") - - if callCount != 1 { - t.Errorf("expected 1 upstream call (cached), got %d", callCount) - } -} - -func TestDiscoveryCacheExpiry(t *testing.T) { - callCount := 0 - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - callCount++ - w.Write([]byte(testMetalinkXML)) - })) - defer srv.Close() - - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - d := NewDiscovery(logger) - d.metalinkBaseURL = srv.URL + "/?repo=%s&arch=%s" - d.cacheTTL = 1 * time.Millisecond - - ctx := context.Background() - d.EPELMirrors(ctx, 9, "x86_64") - time.Sleep(5 * time.Millisecond) - d.EPELMirrors(ctx, 9, "x86_64") - - if callCount != 2 { - t.Errorf("expected 2 upstream calls (cache expired), got %d", callCount) - } -} - -func TestDiscoveryOCPVersions(t *testing.T) { - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(testOCPDirHTML)) - })) - defer srv.Close() - - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - d := NewDiscovery(logger) - d.ocpBaseURL = srv.URL - - versions, err := d.OCPVersions(context.Background()) - if err != nil { - t.Fatalf("OCPVersions failed: %v", err) - } - if len(versions) == 0 { - t.Fatal("expected some versions") - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `go test ./internal/mirror/ -run TestDiscovery -v` -Expected: FAIL - -**Step 3: Implement discovery service** - -Create `internal/mirror/discovery.go`: - -```go -package mirror - -import ( - "context" - "fmt" - "io" - "log/slog" - "net/http" - "sync" - "time" -) - -const ( - defaultMetalinkBaseURL = "https://mirrors.fedoraproject.org/metalink?repo=epel-%d&arch=%s" - defaultCacheTTL = 1 * time.Hour -) - -type cacheEntry struct { - data interface{} - fetchedAt time.Time -} - -// Discovery provides mirror and version discovery for upstream sources. -type Discovery struct { - client *http.Client - logger *slog.Logger - cache map[string]cacheEntry - mu sync.RWMutex - cacheTTL time.Duration - metalinkBaseURL string - ocpBaseURL string - rhcosBaseURL string -} - -// NewDiscovery creates a new Discovery service. -func NewDiscovery(logger *slog.Logger) *Discovery { - if logger == nil { - logger = slog.Default() - } - return &Discovery{ - client: &http.Client{ - Timeout: 30 * time.Second, - }, - logger: logger, - cache: make(map[string]cacheEntry), - cacheTTL: defaultCacheTTL, - metalinkBaseURL: defaultMetalinkBaseURL, - ocpBaseURL: DefaultOCPBaseURL, - rhcosBaseURL: DefaultRHCOSBaseURL, - } -} - -// EPELVersions returns the list of known EPEL versions and architectures. -func (d *Discovery) EPELVersions() []EPELVersionInfo { - var result []EPELVersionInfo - for _, v := range EPELVersions { - result = append(result, EPELVersionInfo{ - Version: v, - Architectures: EPELArchitectures, - }) - } - return result -} - -// EPELMirrors fetches and returns EPEL mirrors for the given version and architecture. -func (d *Discovery) EPELMirrors(ctx context.Context, version int, arch string) ([]MirrorInfo, error) { - key := fmt.Sprintf("epel:%d:%s", version, arch) - - if cached, ok := d.getCache(key); ok { - return cached.([]MirrorInfo), nil - } - - url := fmt.Sprintf(d.metalinkBaseURL, version, arch) - data, err := d.fetch(ctx, url) - if err != nil { - return nil, fmt.Errorf("fetching metalink for EPEL %d %s: %w", version, arch, err) - } - - mirrors, err := parseMetalink(data) - if err != nil { - return nil, fmt.Errorf("parsing metalink for EPEL %d %s: %w", version, arch, err) - } - - d.setCache(key, mirrors) - return mirrors, nil -} - -// OCPVersions fetches and returns available OCP versions from mirror.openshift.com. -func (d *Discovery) OCPVersions(ctx context.Context) ([]OCPVersion, error) { - key := "ocp:versions" - - if cached, ok := d.getCache(key); ok { - return cached.([]OCPVersion), nil - } - - data, err := d.fetch(ctx, d.ocpBaseURL+"/") - if err != nil { - return nil, fmt.Errorf("fetching OCP directory listing: %w", err) - } - - versions := parseOCPDirectoryListing(data) - d.setCache(key, versions) - return versions, nil -} - -// RHCOSVersions fetches and returns available RHCOS versions. -func (d *Discovery) RHCOSVersions(ctx context.Context) ([]RHCOSVersion, error) { - key := "rhcos:versions" - - if cached, ok := d.getCache(key); ok { - return cached.([]RHCOSVersion), nil - } - - data, err := d.fetch(ctx, d.rhcosBaseURL+"/") - if err != nil { - return nil, fmt.Errorf("fetching RHCOS directory listing: %w", err) - } - - minors := parseRHCOSMinorVersions(data) - - var versions []RHCOSVersion - for _, minor := range minors { - buildData, err := d.fetch(ctx, fmt.Sprintf("%s/%s/", d.rhcosBaseURL, minor)) - if err != nil { - d.logger.Warn("failed to fetch RHCOS builds", "minor", minor, "error", err) - versions = append(versions, RHCOSVersion{Minor: minor}) - continue - } - builds := parseRHCOSBuilds(buildData) - versions = append(versions, RHCOSVersion{Minor: minor, Builds: builds}) - } - - d.setCache(key, versions) - return versions, nil -} - -// fetch performs an HTTP GET and returns the response body. -func (d *Discovery) fetch(ctx context.Context, url string) ([]byte, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err != nil { - return nil, err - } - req.Header.Set("User-Agent", "airgap/1.0") - - resp, err := d.client.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url) - } - - return io.ReadAll(resp.Body) -} - -func (d *Discovery) getCache(key string) (interface{}, bool) { - d.mu.RLock() - defer d.mu.RUnlock() - entry, ok := d.cache[key] - if !ok { - return nil, false - } - if time.Since(entry.fetchedAt) > d.cacheTTL { - return nil, false - } - return entry.data, true -} - -func (d *Discovery) setCache(key string, data interface{}) { - d.mu.Lock() - defer d.mu.Unlock() - d.cache[key] = cacheEntry{data: data, fetchedAt: time.Now()} -} -``` - -**Step 4: Run tests to verify they pass** - -Run: `go test ./internal/mirror/ -v` -Expected: PASS (all tests) - -**Step 5: Commit** - -```bash -git add internal/mirror/discovery.go internal/mirror/discovery_test.go -git commit -m "feat: add MirrorDiscovery service with caching" -``` - ---- - -### Task 4: Speed test - -**Files:** -- Modify: `internal/mirror/discovery.go` -- Create: `internal/mirror/speedtest.go` -- Create: `internal/mirror/speedtest_test.go` - -**Step 1: Write speed test tests** - -Create `internal/mirror/speedtest_test.go`: - -```go -package mirror - -import ( - "context" - "io" - "log/slog" - "net/http" - "net/http/httptest" - "testing" - "time" -) - -func TestSpeedTest(t *testing.T) { - // Create a fast server and a slow server - fast := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte("fast response data here")) - })) - defer fast.Close() - - slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - time.Sleep(200 * time.Millisecond) - w.Write([]byte("slow")) - })) - defer slow.Close() - - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - d := NewDiscovery(logger) - - urls := []string{slow.URL, fast.URL} - results := d.SpeedTest(context.Background(), urls, 10) - - if len(results) != 2 { - t.Fatalf("expected 2 results, got %d", len(results)) - } - - // Results should be sorted by throughput (fastest first) - if results[0].URL != fast.URL { - t.Errorf("expected fastest mirror first, got %s", results[0].URL) - } - - // Both should have latency > 0 - for _, r := range results { - if r.LatencyMs <= 0 { - t.Errorf("expected positive latency for %s, got %d", r.URL, r.LatencyMs) - } - } -} - -func TestSpeedTestWithErrors(t *testing.T) { - good := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte("ok")) - })) - defer good.Close() - - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - d := NewDiscovery(logger) - - urls := []string{good.URL, "http://192.0.2.1:1"} // 192.0.2.1 is TEST-NET, should fail - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - - results := d.SpeedTest(ctx, urls, 10) - - if len(results) != 2 { - t.Fatalf("expected 2 results, got %d", len(results)) - } - - // The unreachable mirror should have an error - var hasError bool - for _, r := range results { - if r.Error != "" { - hasError = true - } - } - if !hasError { - t.Error("expected at least one result with error") - } -} -``` - -**Step 2: Run test to verify it fails** - -Run: `go test ./internal/mirror/ -run TestSpeedTest -v` -Expected: FAIL - -**Step 3: Implement speed test** - -Create `internal/mirror/speedtest.go`: - -```go -package mirror - -import ( - "context" - "fmt" - "io" - "net/http" - "sort" - "sync" - "time" -) - -const ( - speedTestTimeout = 5 * time.Second - speedTestMaxWorkers = 10 -) - -// SpeedTest runs latency and throughput tests against the given mirror URLs. -// It first measures HTTP HEAD latency, then downloads a small file from -// the top candidates. Results are sorted by throughput (highest first). -// Mirrors that error get sorted to the bottom with their error recorded. -func (d *Discovery) SpeedTest(ctx context.Context, urls []string, topN int) []SpeedResult { - if topN <= 0 || topN > len(urls) { - topN = len(urls) - } - - // Phase 1: Measure latency concurrently - results := d.measureLatency(ctx, urls) - - // Sort by latency (fastest first), errors last - sort.Slice(results, func(i, j int) bool { - if results[i].Error != "" && results[j].Error == "" { - return false - } - if results[i].Error == "" && results[j].Error != "" { - return true - } - return results[i].LatencyMs < results[j].LatencyMs - }) - - // Phase 2: Download test on top N by latency - candidates := results - if topN < len(candidates) { - candidates = candidates[:topN] - } - - d.measureThroughput(ctx, candidates) - - // Re-sort by throughput (highest first), errors last - sort.Slice(results, func(i, j int) bool { - if results[i].Error != "" && results[j].Error == "" { - return false - } - if results[i].Error == "" && results[j].Error != "" { - return true - } - return results[i].ThroughputKBps > results[j].ThroughputKBps - }) - - return results -} - -func (d *Discovery) measureLatency(ctx context.Context, urls []string) []SpeedResult { - results := make([]SpeedResult, len(urls)) - var wg sync.WaitGroup - sem := make(chan struct{}, speedTestMaxWorkers) - - for i, url := range urls { - wg.Add(1) - go func(idx int, mirrorURL string) { - defer wg.Done() - sem <- struct{}{} - defer func() { <-sem }() - - result := SpeedResult{URL: mirrorURL} - - reqCtx, cancel := context.WithTimeout(ctx, speedTestTimeout) - defer cancel() - - req, err := http.NewRequestWithContext(reqCtx, http.MethodHead, mirrorURL, nil) - if err != nil { - result.Error = err.Error() - results[idx] = result - return - } - req.Header.Set("User-Agent", "airgap/1.0") - - start := time.Now() - resp, err := d.client.Do(req) - latency := time.Since(start) - - if err != nil { - result.Error = fmt.Sprintf("latency test failed: %v", err) - results[idx] = result - return - } - resp.Body.Close() - - result.LatencyMs = int(latency.Milliseconds()) - results[idx] = result - }(i, url) - } - - wg.Wait() - return results -} - -func (d *Discovery) measureThroughput(ctx context.Context, results []SpeedResult) { - var wg sync.WaitGroup - sem := make(chan struct{}, speedTestMaxWorkers) - - for i := range results { - if results[i].Error != "" { - continue - } - - wg.Add(1) - go func(idx int) { - defer wg.Done() - sem <- struct{}{} - defer func() { <-sem }() - - mirrorURL := results[idx].URL - - reqCtx, cancel := context.WithTimeout(ctx, speedTestTimeout) - defer cancel() - - req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, mirrorURL, nil) - if err != nil { - results[idx].Error = err.Error() - return - } - req.Header.Set("User-Agent", "airgap/1.0") - - start := time.Now() - resp, err := d.client.Do(req) - if err != nil { - results[idx].Error = fmt.Sprintf("download test failed: %v", err) - return - } - defer resp.Body.Close() - - n, _ := io.Copy(io.Discard, resp.Body) - elapsed := time.Since(start) - - if elapsed > 0 && n > 0 { - results[idx].ThroughputKBps = float64(n) / elapsed.Seconds() / 1024.0 - } - }(i) - } - - wg.Wait() -} -``` - -**Step 4: Run tests to verify they pass** - -Run: `go test ./internal/mirror/ -run TestSpeedTest -v -timeout 30s` -Expected: PASS - -**Step 5: Commit** - -```bash -git add internal/mirror/speedtest.go internal/mirror/speedtest_test.go -git commit -m "feat: add mirror speed test with latency and throughput measurement" -``` - ---- - -### Task 5: Wire Discovery into Server and add API routes - -**Files:** -- Modify: `internal/server/server.go` -- Create: `internal/server/mirror_handlers.go` -- Modify: `cmd/airgap/serve.go` or `cmd/airgap/root.go` - -**Step 1: Add Discovery to Server struct** - -In `internal/server/server.go`, add `discovery` field to Server struct and update NewServer: - -```go -// Add import: -"github.com/BadgerOps/airgap/internal/mirror" - -// Add field to Server struct (after store): -discovery *mirror.Discovery - -// In NewServer, after setting logger: -discovery := mirror.NewDiscovery(logger) - -// In return statement, add: -discovery: discovery, -``` - -**Step 2: Add routes to setupRoutes** - -In `internal/server/server.go` `setupRoutes()`, add after the transfer routes block: - -```go -// Mirror discovery routes -mux.HandleFunc("GET /api/mirrors/epel/versions", s.handleEPELVersions) -mux.HandleFunc("GET /api/mirrors/epel", s.handleEPELMirrors) -mux.HandleFunc("GET /api/mirrors/ocp/versions", s.handleOCPVersions) -mux.HandleFunc("POST /api/mirrors/speedtest", s.handleSpeedTest) -``` - -**Step 3: Create mirror handlers** - -Create `internal/server/mirror_handlers.go`: - -```go -package server - -import ( - "encoding/json" - "net/http" - "strconv" -) - -// handleEPELVersions returns known EPEL versions and architectures. -func (s *Server) handleEPELVersions(w http.ResponseWriter, r *http.Request) { - versions := s.discovery.EPELVersions() - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(versions) -} - -// handleEPELMirrors returns EPEL mirrors for the given version and architecture. -func (s *Server) handleEPELMirrors(w http.ResponseWriter, r *http.Request) { - versionStr := r.URL.Query().Get("version") - arch := r.URL.Query().Get("arch") - - if versionStr == "" || arch == "" { - jsonError(w, http.StatusBadRequest, "version and arch query parameters required") - return - } - - version, err := strconv.Atoi(versionStr) - if err != nil { - jsonError(w, http.StatusBadRequest, "version must be an integer") - return - } - - mirrors, err := s.discovery.EPELMirrors(r.Context(), version, arch) - if err != nil { - s.logger.Error("failed to discover EPEL mirrors", "version", version, "arch", arch, "error", err) - jsonError(w, http.StatusBadGateway, "failed to fetch mirrors: "+err.Error()) - return - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(mirrors) -} - -// handleOCPVersions returns available OCP and RHCOS versions. -func (s *Server) handleOCPVersions(w http.ResponseWriter, r *http.Request) { - type response struct { - OCP interface{} `json:"ocp"` - RHCOS interface{} `json:"rhcos"` - } - - ocpVersions, ocpErr := s.discovery.OCPVersions(r.Context()) - rhcosVersions, rhcosErr := s.discovery.RHCOSVersions(r.Context()) - - if ocpErr != nil && rhcosErr != nil { - s.logger.Error("failed to discover versions", "ocp_error", ocpErr, "rhcos_error", rhcosErr) - jsonError(w, http.StatusBadGateway, "failed to fetch versions") - return - } - - if ocpErr != nil { - s.logger.Warn("failed to discover OCP versions", "error", ocpErr) - } - if rhcosErr != nil { - s.logger.Warn("failed to discover RHCOS versions", "error", rhcosErr) - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(response{ - OCP: ocpVersions, - RHCOS: rhcosVersions, - }) -} - -// speedTestRequest is the request body for POST /api/mirrors/speedtest. -type speedTestRequest struct { - URLs []string `json:"urls"` - TopN int `json:"top_n"` -} - -// handleSpeedTest runs speed tests against the given mirror URLs. -func (s *Server) handleSpeedTest(w http.ResponseWriter, r *http.Request) { - var req speedTestRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - jsonError(w, http.StatusBadRequest, "invalid request body") - return - } - - if len(req.URLs) == 0 { - jsonError(w, http.StatusBadRequest, "urls list required") - return - } - - if req.TopN <= 0 { - req.TopN = 10 - } - - results := s.discovery.SpeedTest(r.Context(), req.URLs, req.TopN) - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(results) -} -``` - -**Step 4: Run build to verify compilation** - -Run: `go build ./...` -Expected: Success - -**Step 5: Commit** - -```bash -git add internal/server/server.go internal/server/mirror_handlers.go -git commit -m "feat: add mirror discovery API endpoints" -``` - ---- - -### Task 6: Mirror handler tests - -**Files:** -- Create: `internal/server/mirror_handlers_test.go` - -**Step 1: Write handler tests** - -Create `internal/server/mirror_handlers_test.go`: - -```go -package server - -import ( - "bytes" - "encoding/json" - "net/http" - "net/http/httptest" - "testing" - - "github.com/BadgerOps/airgap/internal/mirror" -) - -func TestHandleEPELVersions(t *testing.T) { - srv := setupTestServer(t) - - req := httptest.NewRequest("GET", "/api/mirrors/epel/versions", nil) - w := httptest.NewRecorder() - srv.handleEPELVersions(w, req) - - if w.Code != http.StatusOK { - t.Fatalf("expected 200, got %d", w.Code) - } - - var versions []mirror.EPELVersionInfo - json.NewDecoder(w.Body).Decode(&versions) - if len(versions) == 0 { - t.Error("expected at least one EPEL version") - } -} - -func TestHandleEPELMirrorsMissingParams(t *testing.T) { - srv := setupTestServer(t) - - req := httptest.NewRequest("GET", "/api/mirrors/epel", nil) - w := httptest.NewRecorder() - srv.handleEPELMirrors(w, req) - - if w.Code != http.StatusBadRequest { - t.Fatalf("expected 400, got %d", w.Code) - } -} - -func TestHandleEPELMirrorsInvalidVersion(t *testing.T) { - srv := setupTestServer(t) - - req := httptest.NewRequest("GET", "/api/mirrors/epel?version=abc&arch=x86_64", nil) - w := httptest.NewRecorder() - srv.handleEPELMirrors(w, req) - - if w.Code != http.StatusBadRequest { - t.Fatalf("expected 400, got %d", w.Code) - } -} - -func TestHandleSpeedTestMissingURLs(t *testing.T) { - srv := setupTestServer(t) - - body := `{"urls":[]}` - req := httptest.NewRequest("POST", "/api/mirrors/speedtest", bytes.NewBufferString(body)) - req.Header.Set("Content-Type", "application/json") - w := httptest.NewRecorder() - srv.handleSpeedTest(w, req) - - if w.Code != http.StatusBadRequest { - t.Fatalf("expected 400, got %d", w.Code) - } -} - -func TestHandleSpeedTestValidRequest(t *testing.T) { - srv := setupTestServer(t) - - // Create a test server to speed-test against - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte("test data")) - })) - defer ts.Close() - - body, _ := json.Marshal(speedTestRequest{URLs: []string{ts.URL}, TopN: 5}) - req := httptest.NewRequest("POST", "/api/mirrors/speedtest", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") - w := httptest.NewRecorder() - srv.handleSpeedTest(w, req) - - if w.Code != http.StatusOK { - t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) - } - - var results []mirror.SpeedResult - json.NewDecoder(w.Body).Decode(&results) - if len(results) != 1 { - t.Errorf("expected 1 result, got %d", len(results)) - } -} -``` - -**Step 2: Run tests** - -Run: `go test ./internal/server/ -run TestHandle -v` -Expected: PASS - -**Step 3: Commit** - -```bash -git add internal/server/mirror_handlers_test.go -git commit -m "test: add mirror handler tests" -``` - ---- - -### Task 7: Enhance providers.html with EPEL mirror discovery UI - -**Files:** -- Modify: `internal/server/templates/providers.html` - -**Step 1: Update the EPEL section** - -Replace the `