diff --git a/Makefile b/Makefile index b6576ce8..20a3440d 100644 --- a/Makefile +++ b/Makefile @@ -22,8 +22,8 @@ SERVER_PORT ?= 8080 # Default database backend for dev environment (postgres|oracle) DB ?= postgres -# Default kube cluster target for dev environment (kind|k3s) -CLUSTER ?= kind +# Default kube cluster target for dev environment (docker-desktop|k3s|kind) +CLUSTER ?= docker-desktop # ============================================================================ # ATOMIC COMPONENTS - Infrastructure Management @@ -224,7 +224,7 @@ clean-everything: # COMPOSITE TARGETS - Main User-Facing Commands # ============================================================================ -.PHONY: test-unit test-integration test-integration-pg test-integration-oci test-api test-api-collection test-api-list start-dev start-dev-oci restart-dev stop-dev tilt-up tilt-down test-coverage test-manual-google-workspace test-corpus-ooxml test-dev-scripts dev-up dev-down dev-restart dev-reset dev-nuke dev-status dev-logs dev-deploy dev-cluster-up dev-cluster-down dev-db-up dev-db-down +.PHONY: test-unit test-integration test-integration-pg test-integration-oci test-api test-api-collection test-api-list start-dev start-dev-oci restart-dev stop-dev tilt-up tilt-down test-coverage test-manual-google-workspace test-corpus-ooxml test-dev-scripts dev-up dev-down dev-restart dev-reset dev-nuke dev-status dev-logs dev-deploy dev-cluster-up dev-cluster-down # Dev-environment Python helpers unit tests test-dev-scripts: ## Run unit tests for the dev-environment Python helpers @@ -320,10 +320,10 @@ tilt-down: ## Stop Tilt and restore the prod-shaped server @echo "prod-shaped server restored" # ============================================================================ -# DEV ENVIRONMENT — single orchestrator (scripts/devenv.py). DB=postgres|oracle CLUSTER=kind|k3s +# DEV ENVIRONMENT — single orchestrator (scripts/devenv.py). DB=postgres|oracle CLUSTER=docker-desktop|k3s # ============================================================================ -dev-up: ## Bring up the full dev environment (cluster + db + deploy). DB=postgres|oracle CLUSTER=kind|k3s +dev-up: ## Bring up the full dev environment (cluster + deploy). DB=postgres|oracle CLUSTER=docker-desktop|k3s @uv run scripts/devenv.py --db $(DB) --cluster $(CLUSTER) up dev-down: ## Tear down the dev environment; KEEP db data @@ -347,18 +347,12 @@ dev-logs: ## Stream the tmi-server pod logs dev-deploy: ## (Re)apply manifests + rollout without recreating cluster/db @uv run scripts/devenv.py --db $(DB) --cluster $(CLUSTER) deploy -dev-cluster-up: ## Create the local cluster + registry only (kind); switch context (k3s) +dev-cluster-up: ## Switch to the cluster kube context (docker-desktop or k3s) @uv run scripts/devenv.py --cluster $(CLUSTER) cluster up -dev-cluster-down: ## Delete the local kind cluster only (no-op for k3s) +dev-cluster-down: ## No-op for docker-desktop and k3s (clusters are not owned) @uv run scripts/devenv.py --cluster $(CLUSTER) cluster down -dev-db-up: ## Start the postgres dev container only - @uv run scripts/devenv.py db up - -dev-db-down: ## Stop the postgres dev container only (keep data) - @uv run scripts/devenv.py db down - # --- deprecated aliases (removable next release) --- start-dev: ## DEPRECATED alias for dev-up @echo "note: 'make start-dev' is renamed to 'make dev-up'"; $(MAKE) dev-up DB=$(DB) @@ -425,7 +419,7 @@ kill-oauth-stub: check-oauth-stub: @uv run scripts/manage-oauth-stub.py status -stop-all: stop-oauth-stub dev-down ## Stop the OAuth stub and tear down the kind dev environment +stop-all: stop-oauth-stub dev-down ## Stop the OAuth stub and tear down the dev environment # ============================================================================ diff --git a/deployments/k8s/dev/docker-desktop-oracle/kustomization.yaml b/deployments/k8s/dev/docker-desktop-oracle/kustomization.yaml new file mode 100644 index 00000000..2e07729a --- /dev/null +++ b/deployments/k8s/dev/docker-desktop-oracle/kustomization.yaml @@ -0,0 +1,37 @@ +# docker-desktop oracle overlay (CLUSTER=docker-desktop DB=oracle). Same as the +# docker-desktop postgres overlay but deploys server-oracle.yml (external Oracle +# ADB via the tmi-oracle-db/tmi-oracle-wallet secrets) with NO in-cluster Postgres. +# Images are imported into the node containerd by bare name; reuses the +# docker-desktop patches (server Deployment is named tmi-server; worker patches +# are identical). +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: tmi-platform +resources: + - ../controller.yml + - ../redis.yml + - ../server-oracle.yml + - ../../platform/components/tmi-extractor.yml + - ../../platform/components/tmi-chunk-embed.yml +images: + - name: localhost:5000/tmi-server-oracle + newName: tmi-server-oracle + - name: localhost:5000/tmi-component-controller + newName: tmi-component-controller +patches: + - path: ../docker-desktop/patches/server-pullpolicy.yaml + target: + kind: Deployment + name: tmi-server + - path: ../docker-desktop/patches/extractor-image.yaml + target: + group: tmi.dev + version: v1alpha1 + kind: TMIComponent + name: tmi-extractor + - path: ../docker-desktop/patches/chunkembed-image.yaml + target: + group: tmi.dev + version: v1alpha1 + kind: TMIComponent + name: tmi-chunk-embed diff --git a/deployments/k8s/dev/docker-desktop/README.md b/deployments/k8s/dev/docker-desktop/README.md new file mode 100644 index 00000000..cf719572 --- /dev/null +++ b/deployments/k8s/dev/docker-desktop/README.md @@ -0,0 +1,91 @@ +# Docker Desktop Kubernetes Dev Environment + +This directory contains the kustomize overlay for running TMI's local dev +environment on **Docker Desktop's built-in Kubernetes cluster**. It is the +default cluster target — `make dev-up` (no `CLUSTER=` override) uses this +overlay. + +--- + +## One-Time Setup + +### 1. Enable Kubernetes in Docker Desktop + +1. Open Docker Desktop → **Settings** (gear icon) → **Kubernetes**. +2. Check **Enable Kubernetes**. +3. Leave the provisioner set to **kind** (the default). Docker Desktop + provisions the cluster using kind internally; the resulting context is + named `docker-desktop`. +4. Click **Apply & Restart** and wait for the Kubernetes status indicator to + turn green (may take a minute or two). + +### 2. Verify the cluster is ready + +```bash +kubectl --context docker-desktop get nodes +``` + +Expected output shows a single node named `desktop-control-plane` in `Ready` +state: + +``` +NAME STATUS ROLES AGE VERSION +desktop-control-plane Ready control-plane … v1.X.Y +``` + +If you see a different node name or a non-`Ready` status, go back to Docker +Desktop and confirm Kubernetes is fully started. + +--- + +## How Images Are Loaded + +There is **no local registry and no image mirror** in this topology. Images +are imported directly into the cluster node's containerd image store via: + +```bash +docker save | docker exec -i ctr -n k8s.io images import - +``` + +The `make dev-up` orchestration (`scripts/devenv.py` + `scripts/lib/cluster.py`) +runs this import automatically after building each image. You do not need to +push to any registry. + +--- + +## Endpoints After `make dev-up` + +| Service | Address | How | +|-------------|----------------------|--------------------------------| +| TMI server | `http://localhost:8080` | `kubectl port-forward` managed by devenv | +| Redis | `localhost:6379` | `kubectl port-forward` managed by devenv | + +Run `make dev-status` to see live port-forward and pod status. + +--- + +## Teardown + +```bash +make dev-down # stop pods + port-forwards; keep DB data +make dev-nuke # destroy everything including DB data +``` + +--- + +## Why e2e-platform Tests Stay on Standalone kind + +The automated end-to-end platform tests (`e2e-platform/`) are **not** run +against Docker Desktop Kubernetes. They require: + +- A **swappable CNI** — specifically Calico — to test and enforce + `NetworkPolicy` rules. Docker Desktop ships with its own fixed CNI that + does not support runtime replacement. +- **Ephemeral clusters** — each test run spins up a fresh kind cluster, + applies Calico, runs the suite, then destroys the cluster. Docker Desktop + does not support programmatic cluster creation and deletion. + +For day-to-day feature development the Docker Desktop topology is faster and +requires no extra tooling. For changes that touch networking, policy +enforcement, or multi-cluster behavior, use `CLUSTER=kind` and a standalone +kind installation. diff --git a/deployments/k8s/dev/docker-desktop/kustomization.yaml b/deployments/k8s/dev/docker-desktop/kustomization.yaml new file mode 100644 index 00000000..6c21e09a --- /dev/null +++ b/deployments/k8s/dev/docker-desktop/kustomization.yaml @@ -0,0 +1,36 @@ +# docker-desktop (CLUSTER=docker-desktop, the default) dev overlay. Images are +# imported into the node's containerd by bare name (no registry), so the images +# transformer strips the localhost:5000/ prefix and the server's imagePullPolicy +# is forced to IfNotPresent. Postgres (postgres.yml) is applied as a prerequisite +# by deploy.py, not here. Redis stays chainguard (DD kernel is 4KB pages). +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: tmi-platform +resources: + - ../controller.yml + - ../redis.yml + - ../server.yml + - ../../platform/components/tmi-extractor.yml + - ../../platform/components/tmi-chunk-embed.yml +images: + - name: localhost:5000/tmi-server + newName: tmi-server + - name: localhost:5000/tmi-component-controller + newName: tmi-component-controller +patches: + - path: patches/server-pullpolicy.yaml + target: + kind: Deployment + name: tmi-server + - path: patches/extractor-image.yaml + target: + group: tmi.dev + version: v1alpha1 + kind: TMIComponent + name: tmi-extractor + - path: patches/chunkembed-image.yaml + target: + group: tmi.dev + version: v1alpha1 + kind: TMIComponent + name: tmi-chunk-embed diff --git a/deployments/k8s/dev/docker-desktop/patches/chunkembed-image.yaml b/deployments/k8s/dev/docker-desktop/patches/chunkembed-image.yaml new file mode 100644 index 00000000..7190a36d --- /dev/null +++ b/deployments/k8s/dev/docker-desktop/patches/chunkembed-image.yaml @@ -0,0 +1,3 @@ +- op: replace + path: /spec/image + value: tmi-chunk-embed:dev diff --git a/deployments/k8s/dev/docker-desktop/patches/extractor-image.yaml b/deployments/k8s/dev/docker-desktop/patches/extractor-image.yaml new file mode 100644 index 00000000..92f93e28 --- /dev/null +++ b/deployments/k8s/dev/docker-desktop/patches/extractor-image.yaml @@ -0,0 +1,3 @@ +- op: replace + path: /spec/image + value: tmi-extractor:dev diff --git a/deployments/k8s/dev/docker-desktop/patches/server-pullpolicy.yaml b/deployments/k8s/dev/docker-desktop/patches/server-pullpolicy.yaml new file mode 100644 index 00000000..8cdf47c0 --- /dev/null +++ b/deployments/k8s/dev/docker-desktop/patches/server-pullpolicy.yaml @@ -0,0 +1,3 @@ +- op: replace + path: /spec/template/spec/containers/0/imagePullPolicy + value: IfNotPresent diff --git a/deployments/k8s/dev/docker-desktop/postgres.yml b/deployments/k8s/dev/docker-desktop/postgres.yml new file mode 100644 index 00000000..9e8b8f85 --- /dev/null +++ b/deployments/k8s/dev/docker-desktop/postgres.yml @@ -0,0 +1,84 @@ +# In-cluster single-node Postgres for the Docker Desktop dev target; uses the +# cluster default storageclass (hostpath). +# +# Uses the same vanilla Chainguard image as the kind dev DB (Dockerfile.postgres +# is just `FROM cgr.dev/chainguard/postgres:latest` + PGDATA/locale env, with no +# extensions), so the node pulls it straight from cgr.dev — no build/push to the +# in-cluster registry is needed. Schema is created by the server's GORM +# AutoMigrate at startup (same as kind). +# +# Credentials MUST match database.url in config-development.yml +# (postgres://tmi_dev:dev123@.../tmi_dev). The server reaches this DB via the +# `postgres` Service DNS name; deploy.py rewrites the config URL host to `postgres` +# for CLUSTER=docker-desktop. +apiVersion: v1 +kind: Secret +metadata: + name: tmi-postgres + namespace: tmi-platform +type: Opaque +stringData: + POSTGRES_USER: "tmi_dev" + POSTGRES_PASSWORD: "dev123" + POSTGRES_DB: "tmi_dev" +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: tmi-platform +spec: + selector: { app: postgres } + ports: + - port: 5432 + targetPort: 5432 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres + namespace: tmi-platform +spec: + serviceName: postgres + replicas: 1 + selector: + matchLabels: { app: postgres } + template: + metadata: + labels: { app: postgres } + spec: + containers: + - name: postgres + image: cgr.dev/chainguard/postgres:latest + envFrom: + - secretRef: { name: tmi-postgres } + env: + # Match Dockerfile.postgres so behavior mirrors the kind dev DB. + - { name: PGDATA, value: /var/lib/postgresql/data/pgdata } + - { name: LANG, value: en_US.UTF-8 } + - { name: LC_ALL, value: en_US.UTF-8 } + ports: + - containerPort: 5432 + volumeMounts: + - name: data + mountPath: /var/lib/postgresql/data + readinessProbe: + exec: { command: ["pg_isready", "-U", "tmi_dev", "-d", "tmi_dev"] } + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + livenessProbe: + exec: { command: ["pg_isready", "-U", "tmi_dev"] } + initialDelaySeconds: 20 + periodSeconds: 10 + resources: + requests: { cpu: 100m, memory: 256Mi } + limits: { cpu: "1", memory: 1Gi } + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 8Gi diff --git a/deployments/k8s/dev/kind-cluster.yml b/deployments/k8s/dev/kind-cluster.yml deleted file mode 100644 index 1afcaa35..00000000 --- a/deployments/k8s/dev/kind-cluster.yml +++ /dev/null @@ -1,24 +0,0 @@ -kind: Cluster -apiVersion: kind.x-k8s.io/v1alpha4 -name: tmi-dev -containerdConfigPatches: - - |- - [plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5000"] - endpoint = ["http://tmi-dev-registry:5000"] -nodes: - - role: control-plane - # Publish the tmi-server NodePort (30080) directly on the host as - # localhost:8080. This replaces the former `kubectl port-forward - # svc/tmi-server`, whose single-process userspace proxy collapsed under - # high connection rates (e.g. CATS fuzzing ~50 conn/s -> 99% CONNECTION_ERROR; - # see issue #463). Docker's published port + kube-proxy DNAT has no userspace - # bottleneck, so high-throughput clients reach the server reliably. - # NOTE: extraPortMappings are baked in at node-container creation time — - # an existing cluster must be recreated (`make dev-nuke` or - # `make dev-cluster-down && make dev-cluster-up`) to pick this up. - extraPortMappings: - - containerPort: 30080 - hostPort: 8080 - listenAddress: "127.0.0.1" - protocol: TCP - - role: worker diff --git a/deployments/k8s/dev/kustomization.yaml b/deployments/k8s/dev/kustomization.yaml deleted file mode 100644 index c2186248..00000000 --- a/deployments/k8s/dev/kustomization.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: tmi-platform -resources: - - controller.yml - - redis.yml - - server.yml - - ../platform/components/tmi-extractor.yml - - ../platform/components/tmi-chunk-embed.yml -patches: - - path: patches/extractor-image.yaml - target: - group: tmi.dev - version: v1alpha1 - kind: TMIComponent - name: tmi-extractor - - path: patches/chunkembed-image.yaml - target: - group: tmi.dev - version: v1alpha1 - kind: TMIComponent - name: tmi-chunk-embed diff --git a/deployments/k8s/dev/oracle/kustomization.yaml b/deployments/k8s/dev/oracle/kustomization.yaml deleted file mode 100644 index 892b2b27..00000000 --- a/deployments/k8s/dev/oracle/kustomization.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Oracle dev overlay: identical to ../kustomization.yaml but swaps server.yml -> -# server-oracle.yml (Oracle image + wallet mount). Selected by `make start-dev DB=oracle`. -# Keep resources/patches in sync with the parent overlay when either changes. -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: tmi-platform -resources: - - ../controller.yml - - ../redis.yml - - ../server-oracle.yml - - ../../platform/components/tmi-extractor.yml - - ../../platform/components/tmi-chunk-embed.yml -patches: - - path: ../patches/extractor-image.yaml - target: - group: tmi.dev - version: v1alpha1 - kind: TMIComponent - name: tmi-extractor - - path: ../patches/chunkembed-image.yaml - target: - group: tmi.dev - version: v1alpha1 - kind: TMIComponent - name: tmi-chunk-embed diff --git a/deployments/k8s/dev/patches/chunkembed-image.yaml b/deployments/k8s/dev/patches/chunkembed-image.yaml deleted file mode 100644 index 9b8f407b..00000000 --- a/deployments/k8s/dev/patches/chunkembed-image.yaml +++ /dev/null @@ -1,3 +0,0 @@ -- op: replace - path: /spec/image - value: localhost:5000/tmi-chunk-embed:dev diff --git a/deployments/k8s/dev/patches/extractor-image.yaml b/deployments/k8s/dev/patches/extractor-image.yaml deleted file mode 100644 index 0e51b984..00000000 --- a/deployments/k8s/dev/patches/extractor-image.yaml +++ /dev/null @@ -1,3 +0,0 @@ -- op: replace - path: /spec/image - value: localhost:5000/tmi-extractor:dev diff --git a/deployments/k8s/dev/server-oracle.yml b/deployments/k8s/dev/server-oracle.yml index c99dee0e..db347306 100644 --- a/deployments/k8s/dev/server-oracle.yml +++ b/deployments/k8s/dev/server-oracle.yml @@ -141,12 +141,11 @@ metadata: name: tmi-server namespace: tmi-platform spec: - # NodePort (not ClusterIP): the kind cluster maps host localhost:8080 -> - # this nodePort (30080) via extraPortMappings (deployments/k8s/dev/kind-cluster.yml), - # giving the host a userspace-proxy-free path to the server. This replaces - # `kubectl port-forward`, which dropped connections under load (issue #463). - # KEEP nodePort IN SYNC with kind-cluster.yml extraPortMappings and - # scripts/lib/deploy.py NODE_PORT. + # NodePort (not ClusterIP): the server is reached on the host at localhost:8080 + # via `kubectl port-forward` (deploy.py start_server_port_forward). The NodePort + # is also used for high-throughput CATS runs against k3s (hit rp2:30080 directly + # to bypass the userspace port-forward, which throttles under load — issue #463). + # KEEP nodePort IN SYNC with scripts/lib/deploy.py NODE_PORT. type: NodePort selector: { app: tmi-server } ports: diff --git a/deployments/k8s/dev/server.yml b/deployments/k8s/dev/server.yml index aba4cef7..c51b3afa 100644 --- a/deployments/k8s/dev/server.yml +++ b/deployments/k8s/dev/server.yml @@ -102,12 +102,11 @@ metadata: name: tmi-server namespace: tmi-platform spec: - # NodePort (not ClusterIP): the kind cluster maps host localhost:8080 -> - # this nodePort (30080) via extraPortMappings (deployments/k8s/dev/kind-cluster.yml), - # giving the host a userspace-proxy-free path to the server. This replaces - # `kubectl port-forward`, which dropped connections under load (issue #463). - # KEEP nodePort IN SYNC with kind-cluster.yml extraPortMappings and - # scripts/lib/deploy.py NODE_PORT. + # NodePort (not ClusterIP): the server is reached on the host at localhost:8080 + # via `kubectl port-forward` (deploy.py start_server_port_forward). The NodePort + # is also used for high-throughput CATS runs against k3s (hit rp2:30080 directly + # to bypass the userspace port-forward, which throttles under load — issue #463). + # KEEP nodePort IN SYNC with scripts/lib/deploy.py NODE_PORT. type: NodePort selector: { app: tmi-server } ports: diff --git a/docs/superpowers/plans/2026-07-03-docker-desktop-dev-target.md b/docs/superpowers/plans/2026-07-03-docker-desktop-dev-target.md new file mode 100644 index 00000000..17fa7439 --- /dev/null +++ b/docs/superpowers/plans/2026-07-03-docker-desktop-dev-target.md @@ -0,0 +1,557 @@ +# Docker Desktop Dev Target Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the standalone-kind local dev deployment path with Docker Desktop's kind-provisioned Kubernetes (`CLUSTER=docker-desktop`, the new default), delivering images registry-free via `ctr import` and reusing the k3s port-forward/in-cluster-Postgres machinery. + +**Architecture:** `docker-desktop` is treated like k3s — a cluster whose lifecycle we do NOT own (select context, never create/delete). The one new mechanism is registry-free image delivery: build locally, then `docker save | docker exec -i desktop-control-plane ctr -n k8s.io images import -`. Endpoint is a port-forward (DD LoadBalancer is not localhost here). DB is in-cluster Postgres. Tasks 1–5 add docker-desktop and make it the default while kind still works; Task 6 retires the dev kind machinery. + +**Tech Stack:** Python dev tooling (`scripts/devenv.py`, `scripts/lib/{cluster,deploy}.py`), kustomize overlays under `deployments/k8s/dev/`, Docker Desktop Kubernetes (kind provisioner), containerd `ctr`, GORM AutoMigrate. + +## Global Constraints + +- Tests run ONLY via make: `make test-dev-scripts` (dev-scripts unit tests). Never run `python`/`pytest`/`go test` directly. +- docker-desktop node container name: `desktop-control-plane`. Image import target namespace: `k8s.io`. +- docker-desktop images are referenced by BARE name+tag (`tmi-server:dev`), no registry prefix; `imagePullPolicy` must resolve to `IfNotPresent` (never `Always`) so the imported image is used without a registry pull. +- docker-desktop endpoint: port-forward `localhost:8080` → `svc/tmi-server`, `localhost:6379` → `svc/redis`. No LoadBalancer, no NodePort mapping, no extraPortMappings. +- docker-desktop DB: in-cluster Postgres, DB-URL host rewritten to the `postgres` Service. Postgres uses the cluster DEFAULT storageclass (`hostpath`) — NOT `longhorn` (that is k3s-only). +- docker-desktop redis: chainguard redis unchanged (DD kernel is 4KB pages; the k3s `redis:7-alpine` remap does NOT apply). +- Do NOT touch `deployments/k8s/platform/kind-cluster.yml`, `e2e-platform-*` Makefile targets, or `test/e2e/platform/` — e2e stays on standalone kind. +- Conventional commits; `feat(dev):`/`refactor(dev):`/`docs(dev):` scope. + +--- + +## File Structure + +- `scripts/lib/cluster.py` — add docker-desktop to `registry_for`/`local_image_ref`/`expected_context`/`up`/`down`; add `DD_NODE`, `DD_CONTEXT` constants. (Task 6 removes the kind machinery.) +- `scripts/lib/deploy.py` — add docker-desktop branches to `build_and_push` (import), `in_cluster_db_host`, `overlay_dir_for`; generalize `apply_k3s_postgres`→`apply_incluster_postgres` and `teardown_k3s_namespace`→`teardown_namespace`; extend port-forward gating; add `save_import_cmds` pure builder + `import_image_to_node`. +- `scripts/devenv.py` — add `docker-desktop` to `--cluster` choices; add cmd_nuke docker-desktop branch; (Task 5) change default; (Task 6) drop kind + Mac-Postgres path. +- `deployments/k8s/dev/docker-desktop/kustomization.yaml` — new overlay (bare image names, server pull-policy patch, worker image patches). +- `deployments/k8s/dev/docker-desktop/postgres.yml` — new in-cluster Postgres (default storageclass). +- `deployments/k8s/dev/docker-desktop/patches/{server-pullpolicy,extractor-image,chunkembed-image}.yaml` — new patches. +- `deployments/k8s/dev/docker-desktop/README.md` — new setup doc (enable DD k8s w/ kind provisioner; import mechanism; e2e-on-kind note). +- `Makefile` — `CLUSTER ?= docker-desktop`; help text. +- Tests: `scripts/lib/tests/test_cluster.py`, `test_deploy.py`, `test_devenv_cli.py`. + +--- + +### Task 1: docker-desktop cluster identity + selector plumbing + +Adds the docker-desktop target to the pure helpers and the `--cluster` parser, and its no-create lifecycle. No image/deploy behavior yet. + +**Files:** +- Modify: `scripts/lib/cluster.py` (constants ~16-42, `registry_for`, `local_image_ref`, `expected_context`, `up`, `down`) +- Modify: `scripts/lib/deploy.py` (`in_cluster_db_host`, `overlay_dir_for`) +- Modify: `scripts/devenv.py` (`--cluster` choices, both parser branches ~172, ~180) +- Test: `scripts/lib/tests/test_cluster.py`, `test_deploy.py`, `test_devenv_cli.py` + +**Interfaces:** +- Produces: `cluster.registry_for("docker-desktop") -> None`; `cluster.local_image_ref(name, cluster="docker-desktop") -> f"{name}:dev"` (bare); `cluster.expected_context("docker-desktop") -> "docker-desktop"`; `deploy.in_cluster_db_host("docker-desktop") -> "postgres"`; `deploy.overlay_dir_for(db, "docker-desktop") -> "deployments/k8s/dev/docker-desktop"`; `cluster.DD_CONTEXT="docker-desktop"`, `cluster.DD_NODE="desktop-control-plane"`. + +- [ ] **Step 1: Write failing tests** + +In `scripts/lib/tests/test_cluster.py`, add: +```python +class TestDockerDesktopIdentity(unittest.TestCase): + def test_registry_for_docker_desktop_is_none(self): + self.assertIsNone(cluster.registry_for("docker-desktop")) + + def test_local_image_ref_docker_desktop_is_bare(self): + # No registry prefix — the image is imported straight into the node's containerd. + self.assertEqual(cluster.local_image_ref("tmi-server", cluster="docker-desktop"), "tmi-server:dev") + + def test_expected_context_docker_desktop(self): + self.assertEqual(cluster.expected_context("docker-desktop"), "docker-desktop") + + def test_constants(self): + self.assertEqual(cluster.DD_CONTEXT, "docker-desktop") + self.assertEqual(cluster.DD_NODE, "desktop-control-plane") +``` +In `scripts/lib/tests/test_deploy.py`, extend `TestOverlayDirFor` and `TestInClusterDbHost`: +```python + def test_overlay_dir_docker_desktop(self): + self.assertTrue(deploy.overlay_dir_for("postgres", "docker-desktop").endswith("/docker-desktop")) + + def test_docker_desktop_uses_postgres_service(self): + self.assertEqual(deploy.in_cluster_db_host("docker-desktop"), "postgres") +``` +In `scripts/lib/tests/test_devenv_cli.py`, add (match the existing parser-test pattern in that file): +```python + def test_cluster_accepts_docker_desktop(self): + args = build_parser().parse_args(["--cluster", "docker-desktop", "up"]) + self.assertEqual(args.cluster, "docker-desktop") +``` +(If the existing tests call the parser differently, mirror that file's existing `--cluster k3s` test exactly, substituting `docker-desktop`.) + +- [ ] **Step 2: Run tests, verify they fail** + +Run: `make test-dev-scripts` +Expected: FAIL — `AttributeError: module 'cluster' has no attribute 'DD_CONTEXT'` and assertion errors on registry_for/overlay_dir. + +- [ ] **Step 3: Implement in cluster.py** + +Add constants after the K3S ones (~line 28): +```python +# Docker Desktop dev target (CLUSTER=docker-desktop, the default). DD owns the +# cluster lifecycle (kind provisioner); we only select its context and never +# create/delete it. Images are imported straight into the node's containerd +# (no registry): docker save | docker exec -i DD_NODE ctr -n k8s.io images import -. +DD_CONTEXT = "docker-desktop" +DD_NODE = "desktop-control-plane" +``` +Change `registry_for` and `local_image_ref`: +```python +def registry_for(cluster: str = "kind") -> str | None: + """Return the dev image-registry hostname, or None for docker-desktop (no + registry — images are imported into the node's containerd).""" + if cluster == "docker-desktop": + return None + return K3S_REGISTRY if cluster == "k3s" else LOCAL_REGISTRY + + +def local_image_ref(name: str, tag: str = "dev", *, cluster: str = "kind") -> str: + """Return the dev image reference for the cluster: registry-qualified for + kind/k3s, or a bare name:tag for docker-desktop (imported, not pulled).""" + reg = registry_for(cluster) + return f"{name}:{tag}" if reg is None else f"{reg}/{name}:{tag}" +``` +Change `expected_context`: +```python +def expected_context(cluster: str = "kind") -> str: + """Return the kube-context that must be active for the given cluster target.""" + if cluster == "docker-desktop": + return DD_CONTEXT + return K3S_CONTEXT if cluster == "k3s" else f"kind-{CLUSTER_NAME}" +``` +Extend `up` (add BEFORE the `for tool in ("docker","kind","kubectl")` kind block, after the k3s block): +```python + if cluster == "docker-desktop": + check_tool("kubectl") + log_info(f"Using Docker Desktop Kubernetes context '{DD_CONTEXT}' (no cluster create)") + run_cmd(["kubectl", "config", "use-context", DD_CONTEXT]) + log_success(f"kube context set to '{DD_CONTEXT}'") + return +``` +Extend `down` (after the k3s no-op block): +```python + if cluster == "docker-desktop": + log_info("cluster down is a no-op for docker-desktop (Docker Desktop owns the cluster)") + return +``` + +- [ ] **Step 4: Implement in deploy.py** + +`in_cluster_db_host` — return `postgres` for docker-desktop too: +```python + return "postgres" if cluster_target in ("k3s", "docker-desktop") else IN_CLUSTER_DB_HOST +``` +`overlay_dir_for` — add docker-desktop: +```python + if cluster_target == "k3s": + return f"{DEV_DIR}/k3s" + if cluster_target == "docker-desktop": + return f"{DEV_DIR}/docker-desktop" + return f"{DEV_DIR}/oracle" if db == "oracle" else DEV_DIR +``` + +- [ ] **Step 5: Implement in devenv.py** + +At BOTH `--cluster` argument definitions (~172, ~180), change `choices=["kind", "k3s"]` to `choices=["kind", "k3s", "docker-desktop"]`. Leave the defaults unchanged for now (Task 5 changes them). Update the module docstring line 19: `--cluster kind|k3s|docker-desktop`. + +- [ ] **Step 6: Run tests, verify pass** + +Run: `make test-dev-scripts` +Expected: PASS (all prior tests + the new ones). + +- [ ] **Step 7: Commit** +```bash +git add scripts/lib/cluster.py scripts/lib/deploy.py scripts/devenv.py \ + scripts/lib/tests/test_cluster.py scripts/lib/tests/test_deploy.py scripts/lib/tests/test_devenv_cli.py +git commit -m "feat(dev): add docker-desktop cluster identity + selector plumbing" +``` + +--- + +### Task 2: Registry-free image build + import + +Build images locally and import them into the DD node's containerd, no registry. + +**Files:** +- Modify: `scripts/lib/deploy.py` (`build_and_push`, add `save_import_cmds`, `import_image_to_node`) +- Test: `scripts/lib/tests/test_deploy.py` + +**Interfaces:** +- Consumes: `cluster.local_image_ref(name, cluster="docker-desktop")` (bare ref), `cluster.DD_NODE`. +- Produces: `deploy.save_import_cmds(ref, node) -> tuple[list[str], list[str]]` (the `docker save` and `docker exec … ctr import` argv pair); `deploy.import_image_to_node(ref, node)` (runs the pipe); `build_and_push(db, "docker-desktop")` builds + imports (no push). + +- [ ] **Step 1: Write failing test** (pure command builder) + +In `test_deploy.py`: +```python +class TestSaveImportCmds(unittest.TestCase): + def test_builds_docker_save_and_ctr_import_pair(self): + save, imp = deploy.save_import_cmds("tmi-server:dev", "desktop-control-plane") + self.assertEqual(save, ["docker", "save", "tmi-server:dev"]) + self.assertEqual( + imp, + ["docker", "exec", "-i", "desktop-control-plane", + "ctr", "-n", "k8s.io", "images", "import", "-"], + ) +``` + +- [ ] **Step 2: Run test, verify it fails** + +Run: `make test-dev-scripts` +Expected: FAIL — `module 'deploy' has no attribute 'save_import_cmds'`. + +- [ ] **Step 3: Implement the builder + importer** in `deploy.py` (near `build_and_push`): +```python +def save_import_cmds(ref: str, node: str) -> tuple[list[str], list[str]]: + """Return the (docker save, docker exec ctr import) argv pair that streams a + locally-built image straight into a cluster node's containerd (k8s.io ns). + This is exactly what `kind load docker-image` does under the hood — used for + docker-desktop, whose cluster our standalone kind CLI cannot address.""" + return ( + ["docker", "save", ref], + ["docker", "exec", "-i", node, "ctr", "-n", "k8s.io", "images", "import", "-"], + ) + + +def import_image_to_node(ref: str, node: str) -> None: + """Stream `ref` from the host Docker into `node`'s containerd via a pipe.""" + save_cmd, import_cmd = save_import_cmds(ref, node) + log_info(f"Importing {ref} -> {node} containerd (k8s.io)") + saver = subprocess.Popen(save_cmd, stdout=subprocess.PIPE) + try: + importer = subprocess.Popen(import_cmd, stdin=saver.stdout) + saver.stdout.close() # allow saver to receive SIGPIPE if importer exits + importer.communicate() + if importer.returncode != 0: + log_error(f"ctr import failed for {ref} (exit {importer.returncode})") + sys.exit(1) + finally: + saver.wait() + if saver.returncode != 0: + log_error(f"docker save failed for {ref} (exit {saver.returncode})") + sys.exit(1) +``` + +- [ ] **Step 4: Branch `build_and_push` for docker-desktop.** Replace the push line inside the build loop so it imports for docker-desktop and pushes otherwise: +```python + run_cmd(cmd) + + if cluster_target == "docker-desktop": + import_image_to_node(ref, cluster.DD_NODE) + else: + log_info(f"Pushing {ref}") + run_cmd(["docker", "push", ref]) + + if cluster_target == "docker-desktop": + log_success("All images built and imported into the docker-desktop node") + else: + log_success("All images built and pushed to local registry") +``` +Update the docstring's first paragraph to mention docker-desktop uses `ctr import` (no registry). + +- [ ] **Step 5: Run tests, verify pass** + +Run: `make test-dev-scripts` +Expected: PASS. + +- [ ] **Step 6: Commit** +```bash +git add scripts/lib/deploy.py scripts/lib/tests/test_deploy.py +git commit -m "feat(dev): registry-free image import for docker-desktop (docker save | ctr import)" +``` + +--- + +### Task 3: docker-desktop overlay + in-cluster Postgres manifest + +**Files:** +- Create: `deployments/k8s/dev/docker-desktop/kustomization.yaml` +- Create: `deployments/k8s/dev/docker-desktop/postgres.yml` +- Create: `deployments/k8s/dev/docker-desktop/patches/server-pullpolicy.yaml` +- Create: `deployments/k8s/dev/docker-desktop/patches/extractor-image.yaml` +- Create: `deployments/k8s/dev/docker-desktop/patches/chunkembed-image.yaml` + +**Interfaces:** +- Produces: overlay dir consumed by `deploy.overlay_dir_for(db, "docker-desktop")`; `postgres.yml` applied by `deploy.apply_incluster_postgres` (Task 4). + +- [ ] **Step 1: Create `postgres.yml`** — copy `deployments/k8s/dev/k3s/postgres.yml` verbatim EXCEPT the `volumeClaimTemplates` storageClassName: remove the `storageClassName: longhorn` line so the PVC uses DD's default (`hostpath`). Keep everything else (Secret `tmi-postgres` creds `tmi_dev/dev123/tmi_dev`, Service `postgres`, StatefulSet, PGDATA/locale env, probes, 8Gi request). Update the header comment to say "Docker Desktop dev target; uses the cluster default storageclass (hostpath)." + +- [ ] **Step 2: Create the worker image patches** (bare names, IfNotPresent applies by default on `:dev`): + +`patches/extractor-image.yaml`: +```yaml +- op: replace + path: /spec/image + value: tmi-extractor:dev +``` +`patches/chunkembed-image.yaml`: +```yaml +- op: replace + path: /spec/image + value: tmi-chunk-embed:dev +``` + +- [ ] **Step 3: Create the server pull-policy patch** `patches/server-pullpolicy.yaml`. server.yml sets `imagePullPolicy: Always`, which would try to pull the bare `tmi-server:dev` from a nonexistent registry. Force IfNotPresent: +```yaml +- op: replace + path: /spec/template/spec/containers/0/imagePullPolicy + value: IfNotPresent +``` + +- [ ] **Step 4: Create `kustomization.yaml`**: +```yaml +# docker-desktop (CLUSTER=docker-desktop, the default) dev overlay. Images are +# imported into the node's containerd by bare name (no registry), so the images +# transformer strips the localhost:5000/ prefix and the server's imagePullPolicy +# is forced to IfNotPresent. Postgres (postgres.yml) is applied as a prerequisite +# by deploy.py, not here. Redis stays chainguard (DD kernel is 4KB pages). +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: tmi-platform +resources: + - ../controller.yml + - ../redis.yml + - ../server.yml + - ../../platform/components/tmi-extractor.yml + - ../../platform/components/tmi-chunk-embed.yml +images: + - name: localhost:5000/tmi-server + newName: tmi-server + - name: localhost:5000/tmi-component-controller + newName: tmi-component-controller +patches: + - path: patches/server-pullpolicy.yaml + target: + kind: Deployment + name: tmi-server + - path: patches/extractor-image.yaml + target: + group: tmi.dev + version: v1alpha1 + kind: TMIComponent + name: tmi-extractor + - path: patches/chunkembed-image.yaml + target: + group: tmi.dev + version: v1alpha1 + kind: TMIComponent + name: tmi-chunk-embed +``` + +- [ ] **Step 5: Verify the overlay renders correctly** (manual render check — no pytest, matching how the k3s overlay is validated): + +Run: `kubectl kustomize --load-restrictor LoadRestrictionsNone deployments/k8s/dev/docker-desktop | rg -n "image:|imagePullPolicy"` +Expected: server → `image: tmi-server:dev` with `imagePullPolicy: IfNotPresent`; controller → `tmi-component-controller:dev`; redis → `cgr.dev/chainguard/redis:latest`; extractor → `tmi-extractor:dev`; chunk-embed → `tmi-chunk-embed:dev`. No `localhost:5000` anywhere: +Run: `kubectl kustomize --load-restrictor LoadRestrictionsNone deployments/k8s/dev/docker-desktop | rg "localhost:5000" && echo LEAK || echo OK` +Expected: `OK`. + +- [ ] **Step 6: Commit** +```bash +git add deployments/k8s/dev/docker-desktop/ +git commit -m "feat(dev): docker-desktop overlay + in-cluster Postgres (default storageclass)" +``` + +--- + +### Task 4: deploy.start / restart / nuke orchestration for docker-desktop + +Wire the full bring-up, reusing the k3s in-cluster-Postgres + port-forward flow (generalized), and add the docker-desktop nuke path. + +**Files:** +- Modify: `scripts/lib/deploy.py` (`apply_k3s_postgres`→`apply_incluster_postgres`, `teardown_k3s_namespace`→`teardown_namespace`, `start`, `restart`, `wait_and_forward`, port-forward gating) +- Modify: `scripts/devenv.py` (`cmd_nuke` docker-desktop branch) +- Test: `scripts/lib/tests/test_deploy.py` + +**Interfaces:** +- Consumes: Task 1–3 outputs (`overlay_dir_for`, `in_cluster_db_host`, `build_and_push` import path, the overlay + postgres.yml). +- Produces: `make dev-up CLUSTER=docker-desktop` brings up the full stack at `localhost:8080`. + +- [ ] **Step 1: Generalize the in-cluster Postgres + namespace helpers.** + +Rename `apply_k3s_postgres` → `apply_incluster_postgres(cluster_target)` so it applies the right overlay's `postgres.yml`: +```python +def apply_incluster_postgres(cluster_target: str) -> None: + """Apply the in-cluster Postgres for a cluster that hosts its own DB (k3s, + docker-desktop) and wait for it — a prerequisite before the server, mirroring + how the kind path brings the host DB up first.""" + project_root = get_project_root() + subdir = "k3s" if cluster_target == "k3s" else "docker-desktop" + kubectl(["apply", "-f", str(project_root / DEV_DIR / subdir / "postgres.yml")]) + kubectl(["-n", NS, "rollout", "status", "statefulset/postgres", "--timeout=180s"]) + log_success("In-cluster Postgres ready (svc/postgres:5432)") +``` +Update the k3s `start()` call site from `apply_k3s_postgres()` to `apply_incluster_postgres("k3s")`. + +Rename `teardown_k3s_namespace` → `teardown_namespace` (body unchanged — it deletes the `NS` namespace). Update its docstring to "Hard reset for a cluster we don't own (k3s, docker-desktop)…". Update the k3s call site in `devenv.py` `cmd_nuke`. + +- [ ] **Step 2: Extend port-forward gating.** In `wait_and_forward` and `restart`, change `if cluster_target == "k3s":` (guarding `start_server_port_forward()`) to: +```python + if cluster_target in ("k3s", "docker-desktop"): + start_server_port_forward() +``` + +- [ ] **Step 3: Add the docker-desktop branch to `start()`.** Mirror the k3s branch but with NO registry and the in-cluster Postgres from the DD overlay: +```python + if cluster_target == "k3s": + ensure_k3s_registry() + elif cluster_target != "docker-desktop": + cluster.ensure_registry() + cluster.connect_registry_to_kind() + build_and_push(db, cluster_target) + ensure_namespace() + apply_platform_base() + if cluster_target in ("k3s", "docker-desktop"): + apply_incluster_postgres(cluster_target) + deliver_config(cluster_target) +``` +(That replaces the current k3s-vs-else registry block and the `if cluster_target == "k3s": apply_k3s_postgres()` line. docker-desktop skips registry setup entirely; `build_and_push` imports the images.) + +- [ ] **Step 4: Add the docker-desktop branch to `restart()`** — same registry guard shape: +```python + if cluster_target == "k3s": + ensure_k3s_registry() + elif cluster_target != "docker-desktop": + cluster.ensure_registry() + cluster.connect_registry_to_kind() + build_and_push(db, cluster_target) +``` + +- [ ] **Step 5: Add the docker-desktop nuke branch in `devenv.py` `cmd_nuke`.** Change the k3s guard to cover both no-own-cluster targets: +```python + if args.cluster in ("k3s", "docker-desktop"): + cluster.up(cluster=args.cluster) # ensure the right context is active + deploy.teardown_namespace() + deploy.remove_local_images(args.db, cluster_target=args.cluster) + _clean_logs_and_files() + deploy.start(db=args.db, cluster_target=args.cluster, + no_workers=args.no_workers, skip_context_guard=args.yes) + log_success(f"dev-nuke complete (fresh {args.cluster} environment up)") + return +``` + +- [ ] **Step 6: Add a unit test for the generalized port-forward gating** in `test_deploy.py` (source-level, extending the existing `test_server_port_forward_is_k3s_only` idea to include docker-desktop): +```python + def test_server_port_forward_gated_for_docker_desktop_too(self): + src = (Path(deploy.__file__)).read_text() + # Both no-own-cluster targets gate the server port-forward together. + self.assertIn('if cluster_target in ("k3s", "docker-desktop"):', src) +``` +(If the existing `test_server_port_forward_is_k3s_only` test now conflicts — it asserted an exact `if cluster_target == "k3s":\n start_server_port_forward()` string — update that test's regex to accept the tuple form `cluster_target in ("k3s", "docker-desktop")` guarding `start_server_port_forward()`, keeping its intent: the server port-forward exists only for no-own-cluster targets and is always gated.) + +- [ ] **Step 7: Run unit tests** + +Run: `make test-dev-scripts` +Expected: PASS. + +- [ ] **Step 8: Live bring-up (the real gate).** Prereq: Docker Desktop → Settings → Kubernetes → Enable, provisioner = kind (context `docker-desktop` reachable). + +Run: `make dev-up CLUSTER=docker-desktop` +Expected: images built + imported; NATS/KEDA applied; in-cluster Postgres Ready; overlay applied; server + controller roll out; port-forwards start; final line `Dev environment ready at http://localhost:8080`. +Run: `curl -sS -o /dev/null -w "%{http_code}\n" http://localhost:8080/` +Expected: `200`. +Run: `kubectl --context docker-desktop -n tmi-platform get pods` +Expected: `postgres-0`, `redis`, `nats-0`, `tmi-server`, `tmi-component-controller`, `tmi-extractor`, `tmi-chunk-embed` all `Running`/`Ready`. + +- [ ] **Step 9: Commit** +```bash +git add scripts/lib/deploy.py scripts/devenv.py scripts/lib/tests/test_deploy.py +git commit -m "feat(dev): wire deploy.start/restart/nuke for the docker-desktop target" +``` + +--- + +### Task 5: Make docker-desktop the default + setup doc + +**Files:** +- Modify: `Makefile` (line 26 `CLUSTER ?= kind`; help text lines 323, 326) +- Modify: `scripts/devenv.py` (subparser `--cluster` default ~180; docstring) +- Create: `deployments/k8s/dev/docker-desktop/README.md` + +- [ ] **Step 1: Change the Makefile default.** Line 26: `CLUSTER ?= docker-desktop`. Update the two help/comment lines (323, 326) `CLUSTER=kind|k3s` → `CLUSTER=docker-desktop|k3s|kind`. + +- [ ] **Step 2: Change the devenv default.** At the subparser `--cluster` (the one with `default="kind"`, ~line 180): `default="docker-desktop"`. Leave the top-level parser's `default=argparse.SUPPRESS` as-is. + +- [ ] **Step 3: Write `deployments/k8s/dev/docker-desktop/README.md`** documenting: the one-time prereq (Docker Desktop → Settings → Kubernetes → Enable, provisioner = **kind**; verify `kubectl --context docker-desktop get nodes` shows `desktop-control-plane`); that images are imported into the node's containerd via `docker save | docker exec … ctr import` (no registry, no mirror); the endpoint is a port-forward on `localhost:8080`; and a note that **e2e-platform stays on standalone kind** because it needs a swappable NetworkPolicy-enforcing CNI (Calico) + ephemeral clusters that DD can't provide. + +- [ ] **Step 4: Verify the default works.** + +Run: `make dev-status` (should target docker-desktop context now) and `make dev-up` (no CLUSTER arg) +Expected: brings up docker-desktop; `curl localhost:8080` → 200. (If a stack is already up from Task 4, `make dev-reset` is the idempotent re-check.) + +- [ ] **Step 5: Commit** +```bash +git add Makefile scripts/devenv.py deployments/k8s/dev/docker-desktop/README.md +git commit -m "feat(dev): default CLUSTER to docker-desktop; document setup" +``` + +--- + +### Task 6: Retire the standalone-kind dev machinery + +Remove the dev kind path now that docker-desktop is the default. Keep the `kind` CLI + `deployments/k8s/platform/kind-cluster.yml` + `e2e-platform-*` (e2e only). + +**Files:** +- Delete: `deployments/k8s/dev/kind-cluster.yml` +- Modify: `scripts/lib/cluster.py` (remove kind lifecycle + registry helpers + kind constants) +- Modify: `scripts/lib/deploy.py` (remove kind push path, kind constants/comments referencing extraPortMappings) +- Modify: `scripts/devenv.py` (drop `kind` from `--cluster` choices; drop the Mac-Postgres `_uses_host_db` path) +- Modify: tests referencing kind +- Modify: `scripts/lib/devstatus.py` if it references removed symbols + +- [ ] **Step 1: Find every usage of the kind-only symbols** so nothing dangles: + +Run: `rg -n "ensure_registry|connect_registry_to_kind|cluster_exists|start_stopped_nodes|_kind_node_containers|LOCAL_REGISTRY|REGISTRY_CONTAINER|KIND_CONFIG|is_registry_running|kind-cluster.yml|_uses_host_db|database\.(up|down|destroy)" scripts/` +Record the call sites; each must be removed or updated below. Expected users: `cluster.py` (definitions), `deploy.py` (`start`/`restart` registry block, `remove_local_images`, `build_and_push`), `devenv.py` (`_uses_host_db`, cmd_*), possibly `devstatus.py`. + +- [ ] **Step 2: Remove the Mac-Postgres path in devenv.py.** With kind gone, no dev target uses the host Postgres container (k3s + docker-desktop are in-cluster). Delete `_uses_host_db` and every `if _uses_host_db(args): database.up/down/destroy(...)` block in `cmd_up`/`cmd_down`/`cmd_reset`/`cmd_nuke`. Remove the now-unused `database` import if nothing else uses it (verify with the Step-1 grep). `cmd_db` (the `db up/down` verb) — if it only managed the host container, remove it and its parser subcommand; if it is still referenced, leave it but have it log that it is a no-op for in-cluster DBs. + +- [ ] **Step 3: Drop `kind` from the `--cluster` choices** (both parser definitions): `choices=["k3s", "docker-desktop"]`. Update the docstring line 19. + +- [ ] **Step 4: Remove kind lifecycle from cluster.py.** Delete `ensure_registry`, `is_registry_running`, `cluster_exists`, `_kind_node_containers`, `start_stopped_nodes`, `connect_registry_to_kind`, and the entire kind branch of `up()` (the `for tool …`, `ensure_registry()`, `cluster_exists()`/create, `connect_registry_to_kind()` block) and the kind delete branch of `down()`. Remove now-unused constants: `REGISTRY_CONTAINER`, `REGISTRY_IMAGE`, `REGISTRY_PORT`, `LOCAL_REGISTRY`, `KIND_CONFIG`, and `CLUSTER_NAME` if unused after (check `expected_context` — it referenced `kind-{CLUSTER_NAME}`; since kind is no longer a valid target, simplify `expected_context` to only k3s + docker-desktop). Simplify `registry_for`/`local_image_ref` to drop the `LOCAL_REGISTRY` fallback (only `k3s` → registry, `docker-desktop` → None; make k3s explicit and raise/assert on unknown). Keep `is_local_kube_context` (still used by the guard). + + After edits, `up()` handles only `k3s` and `docker-desktop` (both: check_tool kubectl + use-context + return). `down()` is a no-op for both. + +- [ ] **Step 5: Remove the kind push path in deploy.py.** In `start()`/`restart()`, the registry block reduces to: +```python + if cluster_target == "k3s": + ensure_k3s_registry() + # docker-desktop: no registry — build_and_push imports the images. + build_and_push(db, cluster_target) +``` +In `build_and_push`, remove the kind branch: only `docker-desktop` (import) and `k3s` (push). In `remove_local_images`, drop kind handling. Update the `HOST_PORT`/`NODE_PORT` constant comment block (lines ~35-45) — it describes kind extraPortMappings; rewrite it to say the server is reached via a port-forward on both remaining targets (k3s/docker-desktop). `NODE_PORT` is still used by `server.yml`/`server-oracle.yml` (k3s CATS at `rp2:30080`) — keep the constant, drop the kind-extraPortMapping sentence. + +- [ ] **Step 6: Delete the dev kind config.** + +Run: `git rm deployments/k8s/dev/kind-cluster.yml` + +- [ ] **Step 7: Update/remove kind tests.** In `test_deploy.py`: delete `test_kind_cluster_maps_host_to_nodeport` (the file it reads is gone). Keep `test_server_service_is_nodeport`/`test_server_oracle_service_is_nodeport` (the Service is still NodePort for k3s). In `test_cluster.py`: remove tests asserting kind create/registry behavior or `local_image_ref(..., cluster="kind")`/`expected_context("kind")` (kind is no longer a target); keep the docker-desktop + k3s + `is_local_kube_context` tests. In `test_devenv_cli.py`: remove any `--cluster kind` acceptance test; the parser now rejects `kind`. In `test_database.py`: if it only tested the host-Postgres helper that is now unused, leave the helper+tests in place only if the helper is still imported anywhere; otherwise delete both. + +- [ ] **Step 8: Check devstatus.py** (make dev-status). If it references any removed symbol (e.g. `is_registry_running`, `REGISTRY_CONTAINER`, kind node discovery), update it to the k3s/docker-desktop reality (no local registry container; context-based status). + +- [ ] **Step 9: Run unit tests** + +Run: `make test-dev-scripts` +Expected: PASS (kind tests removed; docker-desktop + k3s green). + +- [ ] **Step 10: Live non-regression** — verify BOTH remaining targets still work end to end. + +Run: `make dev-up CLUSTER=docker-desktop` → `curl localhost:8080` → 200; `make dev-down`. +Run: `make dev-up CLUSTER=k3s` → `curl localhost:8080` → 200; `make dev-down`. + +- [ ] **Step 11: Commit** +```bash +git add -A +git commit -m "refactor(dev): retire the standalone-kind dev path (docker-desktop is the default)" +``` + +--- + +## Notes for the implementer + +- The docker-desktop node name (`desktop-control-plane`) is the DD kind-provisioner default. If a future DD version names it differently, `build_and_push`/`import_image_to_node` fail fast on `docker exec`; discover the real name with `kubectl --context docker-desktop get nodes` and update `cluster.DD_NODE`. +- `dev-nuke` for docker-desktop deletes ONLY the `tmi-platform` namespace — never the DD cluster or other namespaces the developer runs. +- e2e-platform is deliberately untouched. Do not "helpfully" migrate it — its Calico/NetworkPolicy + ephemeral-cluster requirements are incompatible with DD-managed Kubernetes (see the design spec). diff --git a/docs/superpowers/specs/2026-07-03-docker-desktop-dev-target-design.md b/docs/superpowers/specs/2026-07-03-docker-desktop-dev-target-design.md new file mode 100644 index 00000000..67f0fd12 --- /dev/null +++ b/docs/superpowers/specs/2026-07-03-docker-desktop-dev-target-design.md @@ -0,0 +1,156 @@ +# Docker Desktop Dev Target — Design + +**Goal:** Replace the standalone-kind local dev deployment path with Docker Desktop's +built-in Kubernetes (kind provisioner, context `docker-desktop`), so the dev tooling +stops managing its own cluster + local registry container + containerd mirror. Fewer +moving parts; reuse the DD cluster the developer already runs. + +**Status:** Design approved (brainstorm). Implementation sequenced AFTER the k3s dev +target (PR #516) lands on `main`, since this extends the `CLUSTER` selector introduced +there. + +## Context / Motivation + +- The developer already runs Docker Desktop Kubernetes and wants dev to reuse it. +- Current standalone-kind dev path carries several moving parts our scripts manage: + a self-created 2-node kind cluster (`tmi-dev`), a `tmi-dev-registry` container on + `localhost:5000`, a containerd registry-mirror injected via `kind-cluster.yml`, and + `extraPortMappings` for the NodePort→`localhost:8080` contract. +- Motivation (user-selected): fewer moving parts, lower resource usage, reuse the + existing DD cluster. + +## Decisions (locked) + +1. **Replace, not add.** `docker-desktop` becomes the default local target; the dev + standalone-kind machinery is retired. `CLUSTER` options become + `docker-desktop` (default) | `k3s`. Makefile default: `CLUSTER ?= docker-desktop`. +2. **In-cluster Postgres** (reuse the k3s pattern), not the Mac `tmi-postgresql` + container. +3. **e2e-platform stays on standalone kind.** It requires a swappable, + NetworkPolicy-enforcing CNI (`disableDefaultCNI: true` + Calico) and ephemeral + per-run clusters — neither of which DD-managed Kubernetes can provide. This change + does NOT touch `e2e-platform-up/down`, `deployments/k8s/platform/kind-cluster.yml`, + or `test/e2e/platform/`. The `kind` CLI remains a project dependency for e2e only. + +## Verified environment facts (probes, 2026-07-03) + +- DD Kubernetes is enabled with the **kind provisioner**: single node + `desktop-control-plane`, containerd, arm64 linuxkit kernel **6.12.x (4KB pages)**, + k8s v1.36.1. (kubeadm provisioner would name the node `docker-desktop`.) +- The standalone `kind` CLI **cannot see** the DD-managed cluster + (`kind get clusters` → none) — so `kind load` and our mirror approach are out. +- **Registry-free image import works**: `docker save | + docker exec -i desktop-control-plane ctr -n k8s.io images import -` lands the image + in the node's containerd; `crictl images` then sees it. Verified roundtrip with a + throwaway busybox image. +- **LoadBalancer is NOT localhost** here: a `type=LoadBalancer` service gets + EXTERNAL-IP `172.18.0.2` (kind network), not `localhost`. So the endpoint must be a + port-forward, as with k3s. +- **4KB pages** on DD's kernel → the chainguard redis jemalloc problem that forced + `redis:7-alpine` on the Pi (16KB) does NOT apply; DD uses chainguard redis unchanged. +- The component controller (`internal/platform/controller/render_deployment.go`) sets + `Image: c.Spec.Image` but never sets `ImagePullPolicy`; worker images are `:dev` + tagged, so k8s defaults to `IfNotPresent` → imported images are used without a pull. + +## Architecture + +Treat `docker-desktop` like k3s: a cluster whose lifecycle we do NOT own. We manage +only the `tmi-platform` namespace + workloads. ~80% of the k3s `cluster_target` +machinery is reused; the one new piece is registry-free image delivery. + +### Cluster lifecycle +- `cluster.up(docker-desktop)`: verify DD k8s reachable + `kubectl config use-context + docker-desktop`. No create. +- `cluster.down(docker-desktop)`: no-op (never delete DD's cluster). +- `dev-nuke` (docker-desktop): namespace-scoped hard reset (delete `tmi-platform`), + like k3s. +- Prereq (documented): DD → Settings → Kubernetes → Enable, provisioner = kind. + +### Image delivery (registry-free) +- Build the 4 TMI images locally (`docker build`, arm64-native, **no push, no + buildx**): tmi-server, tmi-component-controller, tmi-extractor, tmi-chunk-embed, + all `:dev`. +- Import each into the node: `docker save :dev | docker exec -i + desktop-control-plane ctr -n k8s.io images import -`. +- No registry container, no mirror. Public images (postgres, redis, nats, keda) are + pulled normally from their upstreams. + +### Endpoint +- Port-forward `localhost:8080` → `svc/tmi-server` and `localhost:6379` → + `svc/redis`, reusing the k3s `start_server_port_forward` / `start_redis_port_forward` + (both gated on cluster_target). No LoadBalancer, no NodePort, no extraPortMappings. + +### Database +- In-cluster Postgres: `deployments/k8s/dev/docker-desktop/postgres.yml` — chainguard + postgres, DD's default storageclass (`hostpath`; k3s uses `longhorn`, so this is a + DD-specific manifest), creds matching `config-development.yml`. DB-URL host rewritten + to the `postgres` Service via the k3s `in_cluster_db_host` (extended to + docker-desktop) + `rewrite_db_host_for_incluster`. + +### Redis +- In-cluster **chainguard** redis, unchanged (4KB pages). No alpine remap. + +### Overlay (`deployments/k8s/dev/docker-desktop/`) +- `kustomization.yaml`: base workloads (controller, redis, server, extractor, + chunk-embed). +- images-transformer: strip `localhost:5000/` → bare names (`tmi-server`, + `tmi-component-controller`); worker TMIComponent images patched to bare + `tmi-extractor:dev` / `tmi-chunk-embed:dev`. +- imagePullPolicy patch: server `Always → IfNotPresent` (server.yml hardcodes Always; + controller.yml and workers already default to IfNotPresent on `:dev`). +- Server/redis Services can stay as-is (base type); port-forward works regardless. +- Postgres applied as a prerequisite (not in the overlay), like k3s. +- Platform base (NATS + KEDA + CRD) applied same as k3s. + +## What gets retired (dev standalone-kind) + +- `deployments/k8s/dev/kind-cluster.yml` (dev only — NOT the platform one). +- `tmi-dev-registry` container + `ensure_registry` / `connect_registry_to_kind`. +- containerd registry mirror + `extraPortMappings`. +- `cluster.py` kind create / `start_stopped_nodes` / `cluster_exists` paths and the + kind branch of `up`/`down`. +- Makefile `CLUSTER ?= kind` → `docker-desktop`; help text and `dev-cluster-up/down` + descriptions updated. +- The Mac `tmi-postgresql` container from the dev path (in-cluster now). (The + `database.py` helper may remain for other uses; the dev-up path no longer calls it.) + +Kept: the `kind` CLI, `deployments/k8s/platform/kind-cluster.yml`, `e2e-platform-*` +targets, and all `test/e2e/platform/` usage. + +## deploy.py / devenv.py / cluster.py changes + +Add a `docker-desktop` branch alongside `kind`/`k3s`: +- `cluster.up/down`, `expected_context` (`docker-desktop`), `registry_for` (n/a — no + registry; image build path skips push for docker-desktop). +- `deploy.start`: for docker-desktop → build (no push) + `ctr import`; apply postgres + prereq; apply platform base; deliver_config with host=`postgres`; apply + docker-desktop overlay; port-forward server + redis. +- `overlay_dir_for(db, "docker-desktop")` → the new overlay dir. +- `in_cluster_db_host("docker-desktop")` → `postgres`. +- A new image-delivery helper `import_images_to_node(...)` (the `docker save | ctr + import` builder) — unit-testable command construction. + +## Testing + +- Unit (`make test-dev-scripts`): new-branch coverage for `overlay_dir_for`, + `in_cluster_db_host`, `expected_context`, the `--cluster docker-desktop` parser + value, the `ctr import` command builder, and port-forward gating. +- Live gate: `make dev-up` (now docker-desktop) → full stack, `curl localhost:8080` → + HTTP 200. And `make dev-up CLUSTER=k3s` non-regression. + +## Risks / open items + +- `docker exec desktop-control-plane` worked in probing though `docker ps` did not list + the node (likely a docker-context nuance). Implementation must confirm the exact + invocation is stable across DD restarts; fall back to `kind`-style node discovery if + the container name differs by DD version. +- imagePullPolicy for the server must be patched to IfNotPresent or the pod will try to + pull `tmi-server:dev` from a nonexistent registry. +- DD cluster is shared/persistent — `dev-nuke` scopes to the `tmi-platform` namespace + and must never touch other namespaces the developer runs. + +## Sequencing + +1. Land k3s (PR #516) on `main`. +2. Branch `feature/docker-desktop-dev-target` off `main`. +3. Implement per the plan; verify live; PR. diff --git a/scripts/devenv.py b/scripts/devenv.py index f54e2c29..e60be41f 100644 --- a/scripts/devenv.py +++ b/scripts/devenv.py @@ -2,22 +2,21 @@ # requires-python = ">=3.11" # dependencies = ["pyyaml>=6.0"] # /// -"""Single orchestrator for the kind-based TMI dev environment. +"""Single orchestrator for the TMI dev environment. Verbs (the make targets are 1:1 thin wrappers): - up create cluster + registry, start db, build+push images, deploy, wait - down tear down the in-cluster stack + cluster + registry; KEEP db data + up bring up the cluster context, build+push images, deploy, wait + down tear down the in-cluster stack; KEEP db data restart rebuild server image + roll the server pod (cluster + db untouched) reset soft known-state: redeploy the stack with fresh images; KEEP db data nuke hard known-state: destroy everything incl. db data + images, rebuild - status kind-aware status dashboard + status cluster-aware status dashboard deploy (re)apply manifests + rollout without recreating cluster/db logs stream the tmi-server pod logs - cluster up|down the kind cluster only - db up|down the postgres container only + cluster up|down the cluster target -Global: --db postgres|oracle (default postgres), --cluster kind|k3s - (default kind), --no-workers, --yes +Global: --db postgres|oracle (default postgres), --cluster docker-desktop|k3s + (default docker-desktop), --no-workers, --yes """ from __future__ import annotations @@ -27,7 +26,6 @@ sys.path.insert(0, str(Path(__file__).resolve().parent / "lib")) import cluster # noqa: E402 -import database # noqa: E402 import deploy # noqa: E402 import devstatus # noqa: E402 from tmi_common import ( # noqa: E402 @@ -35,32 +33,17 @@ ) VERBS = ["up", "down", "restart", "reset", "nuke", - "status", "deploy", "logs", "cluster", "db"] - - -def _db_profile(): - return database.dev_profile() - - -def _uses_host_db(args) -> bool: - """True when the Mac-hosted Postgres container is in play: postgres DB on the - kind cluster. For CLUSTER=k3s the DB runs in-cluster, so the host container is - never touched.""" - return args.db == "postgres" and args.cluster == "kind" + "status", "deploy", "logs", "cluster"] def cmd_up(args) -> None: cluster.up(cluster=args.cluster) - if _uses_host_db(args): - database.up(_db_profile()) deploy.start(db=args.db, cluster_target=args.cluster, no_workers=args.no_workers, skip_context_guard=args.yes) def cmd_down(args) -> None: deploy.teardown(db=args.db) - if _uses_host_db(args): - database.down(_db_profile()) # keep volume cluster.down(cluster=args.cluster) log_success("dev environment down (db data preserved)") @@ -73,8 +56,6 @@ def cmd_restart(args) -> None: def cmd_reset(args) -> None: log_info("dev-reset: redeploying the in-cluster stack (keeping cluster + db data)") deploy.teardown(db=args.db) - if _uses_host_db(args): - database.up(_db_profile()) # ensure db is up (no data loss) deploy.start(db=args.db, cluster_target=args.cluster, no_workers=args.no_workers, skip_context_guard=args.yes) log_success("dev-reset complete") @@ -82,30 +63,15 @@ def cmd_reset(args) -> None: def cmd_nuke(args) -> None: log_info("dev-nuke: destroying EVERYTHING incl. db data + built images") - if args.cluster == "k3s": - # Namespace-scoped hard reset: we don't own the k3s cluster, so wipe the - # tmi-platform namespace (workloads + registry + Postgres data) and redeploy. - cluster.up(cluster="k3s") # ensure the k3s-rp context is active - deploy.teardown_k3s_namespace() - deploy.remove_local_images(args.db, cluster_target="k3s") - _clean_logs_and_files() - deploy.start(db=args.db, cluster_target="k3s", - no_workers=args.no_workers, skip_context_guard=args.yes) - log_success("dev-nuke complete (fresh k3s environment up)") - return - deploy.teardown(db=args.db) - cluster.down(cluster=args.cluster) - if _uses_host_db(args): - database.destroy(_db_profile()) # removes container + volume (data wiped) - deploy.remove_local_images(args.db) + # Namespace-scoped hard reset: we don't own these clusters, so wipe the + # tmi-platform namespace (workloads + in-cluster Postgres data) and redeploy. + cluster.up(cluster=args.cluster) # ensure the right context is active + deploy.teardown_namespace() + deploy.remove_local_images(args.db, cluster_target=args.cluster) _clean_logs_and_files() - # rebuild from scratch - cluster.up(cluster=args.cluster) - if _uses_host_db(args): - database.up(_db_profile()) deploy.start(db=args.db, cluster_target=args.cluster, no_workers=args.no_workers, skip_context_guard=args.yes) - log_success("dev-nuke complete (fresh environment up)") + log_success(f"dev-nuke complete (fresh {args.cluster} environment up)") def _clean_logs_and_files() -> None: @@ -131,20 +97,10 @@ def cmd_cluster(args) -> None: {"up": cluster.up, "down": cluster.down}[args.action](cluster=args.cluster) -def cmd_db(args) -> None: - if args.db != "postgres": - log_info(f"db verb is a no-op for --db {args.db} (external database)") - return - if args.action == "up": - database.up(_db_profile()) - else: - database.down(_db_profile()) - - _DISPATCH = { "up": cmd_up, "down": cmd_down, "restart": cmd_restart, "reset": cmd_reset, "nuke": cmd_nuke, "status": cmd_status, "deploy": cmd_deploy, "logs": cmd_logs, - "cluster": cmd_cluster, "db": cmd_db, + "cluster": cmd_cluster, } @@ -169,7 +125,7 @@ def _add_global_options( if is_subparser: parser.add_argument("--db", choices=["postgres", "oracle"], default=argparse.SUPPRESS) - parser.add_argument("--cluster", choices=["kind", "k3s"], + parser.add_argument("--cluster", choices=["k3s", "docker-desktop"], default=argparse.SUPPRESS) parser.add_argument("--no-workers", action="store_true", dest="no_workers", default=argparse.SUPPRESS) @@ -177,8 +133,8 @@ def _add_global_options( help="Skip the local-kube-context safety check") else: parser.add_argument("--db", choices=["postgres", "oracle"], default="postgres") - parser.add_argument("--cluster", choices=["kind", "k3s"], default="kind", - help="Kube cluster target: kind (local, default) or k3s (remote k3s-rp)") + parser.add_argument("--cluster", choices=["k3s", "docker-desktop"], default="docker-desktop", + help="Kube cluster target: docker-desktop (default) or k3s (remote)") parser.add_argument("--no-workers", action="store_true", dest="no_workers") parser.add_argument("--yes", action="store_true", help="Skip the local-kube-context safety check") @@ -192,7 +148,7 @@ def build_parser() -> argparse.ArgumentParser: for v in VERBS: sp = sub.add_parser(v) _add_global_options(sp, is_subparser=True) - if v in ("cluster", "db"): + if v == "cluster": sp.add_argument("action", choices=["up", "down"]) return p diff --git a/scripts/help.py b/scripts/help.py index 0373121f..eae8095e 100644 --- a/scripts/help.py +++ b/scripts/help.py @@ -13,18 +13,16 @@ Usage: make [VARIABLE=value ...] Development Environment: - dev-up - Bring up the full kind dev environment (cluster + db + deploy). DB=postgres|oracle - dev-down - Tear down the kind dev environment; keep db data + dev-up - Bring up the full dev environment (cluster + deploy). DB=postgres|oracle CLUSTER=docker-desktop|k3s + dev-down - Tear down the dev environment; keep db data dev-restart - Rebuild the server image + roll the server pod dev-reset - Soft known-state: redeploy the stack with fresh images; keep db data dev-nuke - Hard known-state: destroy everything incl. db data + images, rebuild - dev-status - kind-aware dev environment status dashboard + dev-status - Dev environment status dashboard dev-logs - Stream the tmi-server pod logs dev-deploy - (Re)apply manifests + rollout without recreating cluster/db - dev-cluster-up - Create the local kind cluster only - dev-cluster-down - Delete the local kind cluster only - dev-db-up - Start the postgres dev container only - dev-db-down - Stop the postgres dev container only (keep data) + dev-cluster-up - Switch to the cluster kube context (docker-desktop or k3s) + dev-cluster-down - No-op for docker-desktop and k3s (clusters are not owned) start-dev - (deprecated, use dev-up) start-dev-oci - (deprecated, use dev-up DB=oracle) restart-dev - (deprecated, use dev-restart) diff --git a/scripts/lib/cluster.py b/scripts/lib/cluster.py index 81d079ad..696b98c9 100644 --- a/scripts/lib/cluster.py +++ b/scripts/lib/cluster.py @@ -1,4 +1,4 @@ -"""kind cluster + local dev-registry lifecycle. +"""Dev cluster lifecycle helpers: context switching for k3s and docker-desktop. Pure helpers (local_image_ref, is_local_kube_context) are unit-tested in scripts/lib/tests/test_cluster.py. Shell wrappers delegate to tmi_common.run_cmd @@ -6,45 +6,54 @@ """ from __future__ import annotations -from pathlib import Path - from tmi_common import ( - check_tool, container_exists, container_is_running, + check_tool, log_info, log_success, run_cmd, ) -CLUSTER_NAME = "tmi-dev" -REGISTRY_CONTAINER = "tmi-dev-registry" -REGISTRY_IMAGE = "registry:2" -REGISTRY_PORT = 5000 -LOCAL_REGISTRY = "localhost:5000" -_PROJECT_ROOT = Path(__file__).resolve().parents[2] -KIND_CONFIG = str(_PROJECT_ROOT / "deployments/k8s/dev/kind-cluster.yml") - # Remote k3s dev target (CLUSTER=k3s). We do not own this cluster: we select # its context but never create/delete it. Images go to an in-cluster registry # exposed at rp2:30500 (NodePort 30500). K3S_CONTEXT = "k3s-rp" K3S_REGISTRY = "rp2:30500" +# Docker Desktop dev target (CLUSTER=docker-desktop, the default). DD owns the +# cluster lifecycle; we only select its context and never create/delete it. +# Images are imported straight into the node's containerd (no registry): +# docker save | docker exec -i DD_NODE ctr -n k8s.io images import -. +DD_CONTEXT = "docker-desktop" +DD_NODE = "desktop-control-plane" -def registry_for(cluster: str = "kind") -> str: - """Return the dev image-registry hostname for the given cluster target.""" - return K3S_REGISTRY if cluster == "k3s" else LOCAL_REGISTRY +def registry_for(cluster: str = "docker-desktop") -> str | None: + """Return the dev image-registry hostname, or None for docker-desktop (no + registry — images are imported into the node's containerd).""" + if cluster == "docker-desktop": + return None + if cluster == "k3s": + return K3S_REGISTRY + raise ValueError(f"Unknown cluster target: {cluster!r}") -def expected_context(cluster: str = "kind") -> str: + +def expected_context(cluster: str = "docker-desktop") -> str: """Return the kube-context that must be active for the given cluster target.""" - return K3S_CONTEXT if cluster == "k3s" else f"kind-{CLUSTER_NAME}" + if cluster == "docker-desktop": + return DD_CONTEXT + if cluster == "k3s": + return K3S_CONTEXT + raise ValueError(f"Unknown cluster target: {cluster!r}") + # Contexts we consider safe to deploy a dev environment into without --yes. _LOCAL_CONTEXT_PREFIXES = ("kind-", "k3d-") _LOCAL_CONTEXT_EXACT = {"k3s", "default", "rancher-desktop", "docker-desktop", "minikube"} -def local_image_ref(name: str, tag: str = "dev", *, cluster: str = "kind") -> str: - """Return the fully-qualified dev-registry image reference for the cluster.""" - return f"{registry_for(cluster)}/{name}:{tag}" +def local_image_ref(name: str, tag: str = "dev", *, cluster: str = "docker-desktop") -> str: + """Return the dev image reference for the cluster: registry-qualified for + k3s, or a bare name:tag for docker-desktop (imported, not pulled).""" + reg = registry_for(cluster) + return f"{name}:{tag}" if reg is None else f"{reg}/{name}:{tag}" def is_local_kube_context(name: str) -> bool: @@ -56,86 +65,13 @@ def is_local_kube_context(name: str) -> bool: return any(name.startswith(p) for p in _LOCAL_CONTEXT_PREFIXES) -def is_registry_running() -> bool: - """True if the local dev registry container is currently running.""" - return container_is_running(REGISTRY_CONTAINER) - +def up(cluster: str = "docker-desktop") -> None: + """Bring up the dev cluster by selecting the appropriate kube context. -def ensure_registry() -> None: - """Start the local registry container if it is not already running.""" - name = REGISTRY_CONTAINER - if container_is_running(name): - log_info(f"Registry container already running: {name}") - return - if container_exists(name): - log_info(f"Starting existing registry container: {name}") - run_cmd(["docker", "start", name]) - log_success(f"Registry container started: {name}") - return - log_info(f"Creating registry container: {name}") - run_cmd([ - "docker", "run", "-d", - "--restart=always", - "-p", f"127.0.0.1:{REGISTRY_PORT}:{REGISTRY_PORT}", - "--name", name, - REGISTRY_IMAGE, - ]) - log_success(f"Registry container created and started: {name}") - - -def cluster_exists() -> bool: - """Return True if the kind cluster named CLUSTER_NAME already exists.""" - result = run_cmd( - ["kind", "get", "clusters"], - capture=True, - check=False, - ) - return CLUSTER_NAME in result.stdout.splitlines() - - -def _kind_node_containers(*, running_only: bool) -> list[str]: - """Return the kind node container names for this cluster (control-plane + workers). - - Discovered by kind's own container label rather than by guessing node names, - so it stays correct if the cluster topology in kind-cluster.yml changes. - """ - cmd = ["docker", "ps"] - if not running_only: - cmd.append("-a") - cmd += [ - "--filter", f"label=io.x-k8s.kind.cluster={CLUSTER_NAME}", - "--format", "{{.Names}}", - ] - result = run_cmd(cmd, capture=True, check=False) - return result.stdout.split() - - -def start_stopped_nodes() -> None: - """Start any kind node containers that exist but are stopped (idempotent). - - `kind create cluster` is a no-op once the cluster exists, but it will not - restart node containers that were halted by `devenv.py cluster down` (or a host - reboot). Bring them back so `up` reliably yields a running cluster. + For docker-desktop: select the DD context (DD owns the cluster lifecycle). + For k3s: select the remote context (the in-cluster registry and workloads + are applied by deploy; we never create/delete the remote cluster). """ - running = set(_kind_node_containers(running_only=True)) - stopped = [n for n in _kind_node_containers(running_only=False) if n not in running] - if stopped: - log_info(f"Starting stopped kind node container(s): {', '.join(stopped)}") - run_cmd(["docker", "start", *stopped], check=False) - - -def connect_registry_to_kind() -> None: - """Attach the registry container to the kind Docker network (idempotent).""" - run_cmd( - ["docker", "network", "connect", "kind", REGISTRY_CONTAINER], - check=False, - ) - - -def up(cluster: str = "kind") -> None: - """Bring up the dev cluster: create kind + local registry, or (k3s) select the - remote context without creating anything (the in-cluster registry and workloads - are applied by deploy).""" if cluster == "k3s": check_tool("kubectl") log_info(f"Using existing k3s context '{K3S_CONTEXT}' (no cluster create)") @@ -143,34 +79,22 @@ def up(cluster: str = "kind") -> None: log_success(f"kube context set to '{K3S_CONTEXT}'") return - for tool in ("docker", "kind", "kubectl"): - check_tool(tool) - - ensure_registry() - - if cluster_exists(): - log_info(f"kind cluster '{CLUSTER_NAME}' already exists — skipping create") - start_stopped_nodes() - else: - log_info(f"Creating kind cluster '{CLUSTER_NAME}' with config: {KIND_CONFIG}") - run_cmd(["kind", "create", "cluster", "--config", KIND_CONFIG]) - log_success(f"kind cluster '{CLUSTER_NAME}' created") - - # `kind create` sets the kubectl context, but the skip-create path does not, - # so an active context from another cluster (e.g. k3s-rp) would linger and fail - # deploy's context guard. Always select the kind context — idempotent. - run_cmd(["kubectl", "config", "use-context", f"kind-{CLUSTER_NAME}"]) + if cluster == "docker-desktop": + check_tool("kubectl") + log_info(f"Using Docker Desktop Kubernetes context '{DD_CONTEXT}' (no cluster create)") + run_cmd(["kubectl", "config", "use-context", DD_CONTEXT]) + log_success(f"kube context set to '{DD_CONTEXT}'") + return - connect_registry_to_kind() - log_success(f"kind cluster '{CLUSTER_NAME}' ready; context kind-{CLUSTER_NAME}") + raise ValueError(f"Unknown cluster target: {cluster!r}") -def down(cluster: str = "kind") -> None: - """Delete the kind cluster entirely; no-op for the remote k3s cluster (not ours - to delete — namespace teardown is handled by deploy).""" +def down(cluster: str = "docker-desktop") -> None: + """No-op for docker-desktop and k3s: we do not own either cluster.""" if cluster == "k3s": log_info("cluster down is a no-op for k3s (remote cluster is not ours to delete)") return - check_tool("kind") - run_cmd(["kind", "delete", "cluster", "--name", CLUSTER_NAME]) - log_success(f"kind cluster '{CLUSTER_NAME}' deleted") + if cluster == "docker-desktop": + log_info("cluster down is a no-op for docker-desktop (Docker Desktop owns the cluster)") + return + raise ValueError(f"Unknown cluster target: {cluster!r}") diff --git a/scripts/lib/deploy.py b/scripts/lib/deploy.py index 2746f253..8068bea2 100644 --- a/scripts/lib/deploy.py +++ b/scripts/lib/deploy.py @@ -33,22 +33,21 @@ CONFIG_FILE = "config-development.yml" CONFIGMAP_NAME = "tmi-server-config" -# The server is reached on the host at localhost:HOST_PORT. The kind cluster -# publishes the tmi-server Service's NodePort (NODE_PORT) directly on the host -# via extraPortMappings (deployments/k8s/dev/kind-cluster.yml), so there is NO -# `kubectl port-forward` for the server. The old userspace port-forward proxy -# collapsed under high connection rates (CATS fuzzing -> 99% CONNECTION_ERROR, -# issue #463); Docker's published port + kube-proxy DNAT has no such bottleneck. -# KEEP NODE_PORT IN SYNC with server.yml/server-oracle.yml (.spec.ports[].nodePort) -# and kind-cluster.yml (extraPortMappings[].containerPort). +# The server is reached on the host at localhost:HOST_PORT via a kubectl +# port-forward (start_server_port_forward). Neither docker-desktop nor k3s +# publishes the NodePort directly on the host (only the former kind cluster did +# that via extraPortMappings). The port-forward replaces the removed kind path; +# for high-throughput testing (CATS) against k3s, hit the NodePort at +# rp2:30080 directly — the userspace forward throttles under load (#463). +# KEEP NODE_PORT IN SYNC with server.yml/server-oracle.yml (.spec.ports[].nodePort). HOST_PORT = 8080 NODE_PORT = 30080 SERVER_URL = f"http://localhost:{HOST_PORT}" -# Legacy server port-forward pidfile. The server no longer uses a port-forward -# (it is reached via the NodePort above), but stop_port_forward() still cleans -# up this file so an upgrade from a port-forward-based checkout kills any stale -# forwarder left running on :8080 that would otherwise shadow the NodePort. +# Server port-forward pidfile. The server is reached on the host at localhost:8080 +# via the server port-forward (k3s and docker-desktop), which writes this pidfile +# so stop_port_forward() can tear it down. Also cleans up any stale forwarder +# left running on :8080 from a prior session. PORT_FORWARD_PID = "/tmp/tmi-dev-portforward.pid" # Redis is an in-cluster ClusterIP service; the server reaches it as redis:6379. # Integration tests that seed Redis directly (e.g. the step-up legacy refresh @@ -80,15 +79,20 @@ def image_builds_for(db: str) -> list[tuple[str, str, dict]]: ] -def overlay_dir_for(db: str, cluster_target: str = "kind") -> str: +def overlay_dir_for(db: str, cluster_target: str = "docker-desktop") -> str: """Return the kustomize overlay directory path for the chosen cluster + DB flavor. CLUSTER=k3s uses its own overlay (in-cluster registry image refs, full stack); - Oracle-on-k3s is out of scope, so k3s implies the postgres overlay. + CLUSTER=docker-desktop uses its own overlay (image import, no registry). + For docker-desktop the DB flavor further selects: oracle gets the dedicated + docker-desktop-oracle overlay (external ADB, no in-cluster Postgres); postgres + gets the standard docker-desktop overlay (in-cluster Postgres). """ if cluster_target == "k3s": return f"{DEV_DIR}/k3s" - return f"{DEV_DIR}/oracle" if db == "oracle" else DEV_DIR + if cluster_target == "docker-desktop": + return f"{DEV_DIR}/docker-desktop-oracle" if db == "oracle" else f"{DEV_DIR}/docker-desktop" + raise ValueError(f"unknown cluster target: {cluster_target!r}") def _no_workers_files(db: str) -> tuple[str, ...]: @@ -107,22 +111,48 @@ def content_hash(text: str) -> str: return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12] -# The host the in-cluster server uses to reach the host-published Postgres. -# config-development.yml carries `localhost` (correct for host-side tools like -# bin/tmi-dbtool, which connect from the Mac host). The server pod's localhost is -# the pod itself, so when we deliver that file as a ConfigMap we rewrite ONLY the -# database-URL authority to this host. Docker Desktop maps host.docker.internal to -# the host, reaching the 127.0.0.1:5432-published Postgres container. +def save_import_cmds(ref: str, node: str) -> tuple[list[str], list[str]]: + """Return the (docker save, docker exec ctr import) argv pair that streams a + locally-built image straight into a cluster node's containerd (k8s.io ns). + This is exactly what `kind load docker-image` does under the hood — used for + docker-desktop, whose cluster our standalone kind CLI cannot address.""" + return ( + ["docker", "save", ref], + ["docker", "exec", "-i", node, "ctr", "-n", "k8s.io", "images", "import", "-"], + ) + + +def import_image_to_node(ref: str, node: str) -> None: + """Stream `ref` from the host Docker into `node`'s containerd via a pipe.""" + save_cmd, import_cmd = save_import_cmds(ref, node) + log_info(f"Importing {ref} -> {node} containerd (k8s.io)") + saver = subprocess.Popen(save_cmd, stdout=subprocess.PIPE) + try: + importer = subprocess.Popen(import_cmd, stdin=saver.stdout) + saver.stdout.close() # allow saver to receive SIGPIPE if importer exits + importer.communicate() + if importer.returncode != 0: + log_error(f"ctr import failed for {ref} (exit {importer.returncode})") + sys.exit(1) + finally: + saver.wait() + if saver.returncode != 0: + log_error(f"docker save failed for {ref} (exit {saver.returncode})") + sys.exit(1) + + +# Fallback host for reaching a host-published Postgres from inside a cluster +# (kept for historical reference; k3s and docker-desktop use in-cluster Postgres +# and therefore use the `postgres` service name instead). IN_CLUSTER_DB_HOST = "host.docker.internal" -def in_cluster_db_host(cluster_target: str = "kind") -> str: +def in_cluster_db_host(cluster_target: str = "docker-desktop") -> str: """Host the in-cluster server uses to reach Postgres for the given cluster. - kind: Postgres is a Mac container, reached via host.docker.internal. - k3s: Postgres runs in-cluster as the `postgres` Service (postgres.yml). + k3s and docker-desktop: Postgres runs in-cluster as the `postgres` Service. """ - return "postgres" if cluster_target == "k3s" else IN_CLUSTER_DB_HOST + return "postgres" if cluster_target in ("k3s", "docker-desktop") else IN_CLUSTER_DB_HOST # Match the host (and optional :port) inside a postgres:// URL authority, after @@ -192,11 +222,11 @@ def _preflight() -> None: for tool in ("docker", "kubectl"): check_tool(tool) if run_cmd(["kubectl", "cluster-info"], check=False).returncode != 0: - log_error("No reachable cluster. Run 'make dev-cluster-up' (kind) or start your k3s cluster.") + log_error("No reachable cluster. Run 'make dev-cluster-up' to set the cluster context.") sys.exit(1) -def _guard_context(skip: bool, cluster_target: str = "kind") -> str: +def _guard_context(skip: bool, cluster_target: str = "docker-desktop") -> str: ctx = current_kube_context() log_info(f"kubectl context: {ctx or '(none)'} namespace: {NS}") if not ctx: @@ -316,13 +346,14 @@ def unstage_tmi_client(created: bool) -> None: # Image build + push # --------------------------------------------------------------------------- -def build_and_push(db: str, cluster_target: str = "kind") -> None: - """Build all images and push them to the target cluster's registry. +def build_and_push(db: str, cluster_target: str = "docker-desktop") -> None: + """Build all images and deliver them to the target cluster. - kind -> localhost:5000; k3s -> the in-cluster registry at rp2:30500. The Mac - and the k3s nodes are both arm64, so a plain host-arch `docker build` already - produces arm64 images — no buildx/--platform is needed; only the registry the - ref points at differs. + k3s -> push to the in-cluster registry at rp2:30500; docker-desktop -> + import straight into the DD node's containerd via `docker save | ctr import` + (no registry, no push). The Mac and the k3s nodes are both arm64, so a plain + host-arch `docker build` already produces arm64 images — no buildx/--platform + is needed; only the delivery step differs. All four Dockerfiles require the tmi-client staged in .docker-deps/. Stage once before the first build and clean up in a try/finally block @@ -345,10 +376,16 @@ def build_and_push(db: str, cluster_target: str = "kind") -> None: run_cmd(cmd) - log_info(f"Pushing {ref}") - run_cmd(["docker", "push", ref]) + if cluster_target == "docker-desktop": + import_image_to_node(ref, cluster.DD_NODE) + else: + log_info(f"Pushing {ref}") + run_cmd(["docker", "push", ref]) - log_success("All images built and pushed to local registry") + if cluster_target == "docker-desktop": + log_success("All images built and imported into the docker-desktop node") + else: + log_success("All images built and pushed to local registry") finally: unstage_tmi_client(created) @@ -382,20 +419,20 @@ def ensure_k3s_registry() -> None: log_success("In-cluster registry ready (rp2:30500)") -def apply_k3s_postgres() -> None: - """Apply the in-cluster Postgres and wait for it (k3s prerequisite before the - server, mirroring how the kind path brings the host DB up first).""" +def apply_incluster_postgres(cluster_target: str) -> None: + """Apply the in-cluster Postgres (k3s and docker-desktop) and wait for it — + a prerequisite before the server starts AutoMigrate.""" project_root = get_project_root() - kubectl(["apply", "-f", str(project_root / DEV_DIR / "k3s" / "postgres.yml")]) + subdir = "k3s" if cluster_target == "k3s" else "docker-desktop" + kubectl(["apply", "-f", str(project_root / DEV_DIR / subdir / "postgres.yml")]) kubectl(["-n", NS, "rollout", "status", "statefulset/postgres", "--timeout=180s"]) log_success("In-cluster Postgres ready (svc/postgres:5432)") -def deliver_config(cluster_target: str = "kind") -> None: +def deliver_config(cluster_target: str = "docker-desktop") -> None: content = (get_project_root() / CONFIG_FILE).read_text() # The on-disk config points the DB at localhost (for host-side tools); rewrite - # it to the host the in-cluster server uses: host.docker.internal (kind) or the - # in-cluster `postgres` Service (k3s). + # it to the in-cluster `postgres` Service (k3s and docker-desktop). content = rewrite_db_host_for_incluster(content, db_host=in_cluster_db_host(cluster_target)) manifest = render_configmap_yaml( name=CONFIGMAP_NAME, namespace=NS, file_key="config.yml", content=content, @@ -466,7 +503,7 @@ def create_oracle_db_secret() -> None: log_success("oracle DB connection delivered as Secret/tmi-oracle-db") -def apply_overlay(no_workers: bool, db: str, cluster_target: str = "kind") -> None: +def apply_overlay(no_workers: bool, db: str, cluster_target: str = "docker-desktop") -> None: """Apply the dev overlay. When --no-workers: apply the three core manifests individually to avoid @@ -475,9 +512,7 @@ def apply_overlay(no_workers: bool, db: str, cluster_target: str = "kind") -> No Otherwise: render the full kustomize overlay with --load-restrictor LoadRestrictionsNone (needed because the overlay references files outside - its own directory tree, i.e. ../platform/components/; the oracle overlay at - deployments/k8s/dev/oracle references ../../platform/components/ so the flag - is equally required for both flavors). + its own directory tree, e.g. ../../platform/components/). """ project_root = get_project_root() if no_workers: @@ -506,26 +541,25 @@ def server_rollout_timeout(db: str) -> str: return "1200s" if db == "oracle" else "180s" -def wait_and_forward(db: str = "postgres", cluster_target: str = "kind") -> None: +def wait_and_forward(db: str = "postgres", cluster_target: str = "docker-desktop") -> None: kubectl(["-n", NS, "rollout", "status", "deploy/tmi-component-controller", "--timeout=120s"]) kubectl(["-n", NS, "rollout", "status", "deploy/tmi-server", f"--timeout={server_rollout_timeout(db)}"]) start_redis_port_forward() - # k3s has no extraPortMappings, so preserve localhost:8080 with a server - # port-forward. Start it AFTER the redis forward, whose stop_port_forward() - # clears both pidfiles, so this one survives. - if cluster_target == "k3s": + # k3s and docker-desktop have no extraPortMappings, so preserve localhost:8080 + # with a server port-forward. Start it AFTER the redis forward, whose + # stop_port_forward() clears both pidfiles, so this one survives. + if cluster_target in ("k3s", "docker-desktop"): start_server_port_forward() wait_for_server() log_success(f"Dev environment ready at {SERVER_URL}") def wait_for_server(*, attempts: int = 30, delay_s: float = 1.0) -> None: - """Poll the server NodePort until it answers (or give up after attempts). + """Poll the server until it answers (or give up after attempts). - The Deployment rollout completing means the pod is Ready, but kube-proxy - programming the NodePort DNAT and the kind extraPortMapping becoming live on - the host can lag a beat. Poll localhost:8080 so callers (and CATS) don't race - a not-yet-reachable NodePort. + The Deployment rollout completing means the pod is Ready, but the kubectl + port-forward establishing the localhost:8080 path can lag a beat. Poll + localhost:8080 so callers (and CATS) don't race a not-yet-reachable server. """ for i in range(1, attempts + 1): reachable, code = server_http_status() @@ -536,18 +570,15 @@ def wait_for_server(*, attempts: int = 30, delay_s: float = 1.0) -> None: time.sleep(delay_s) log_error( f"Server not reachable at {SERVER_URL} after {attempts} attempts. " - f"If you upgraded an existing cluster, recreate it so the NodePort " - f"mapping takes effect: 'make dev-cluster-down && make dev-cluster-up' " - f"(or 'make dev-nuke')." + f"If the port-forward is not running, try 'make dev-nuke' for a clean restart." ) def start_redis_port_forward() -> None: """Forward the in-cluster Redis to localhost:6379 for host integration tests. - The server itself is NOT forwarded — it is reached via the NodePort published - on localhost:8080 by the kind extraPortMapping. Only Redis needs a host - forward, and only for test setup (low throughput), so a port-forward is fine. + Redis is low-throughput from the host (test setup only), so a port-forward + is fine here. """ stop_port_forward() redis_proc = subprocess.Popen( @@ -560,14 +591,14 @@ def start_redis_port_forward() -> None: def start_server_port_forward() -> None: - """Forward the in-cluster server to localhost:8080 (k3s only). + """Forward the in-cluster server to localhost:8080 (k3s and docker-desktop). - kind publishes the server NodePort on localhost:8080 via extraPortMappings, so - no forward is needed there. A remote k3s cluster has no such mapping, so we + Neither target publishes the server NodePort directly on the host, so we preserve the localhost:8080 contract with a port-forward. (For CATS/high- - throughput, hit the NodePort at rp2:30080 directly — the userspace forward - throttles under load, the #463 problem.) Stops only a prior SERVER forward so - it does not disturb the redis forward started just before it.""" + throughput against k3s, hit the NodePort at rp2:30080 directly — the + userspace forward throttles under load, the #463 problem.) Stops only a + prior SERVER forward so it does not disturb the redis forward started + just before it.""" _stop_port_forward_pidfile(PORT_FORWARD_PID) srv_proc = subprocess.Popen( ["kubectl", "-n", NS, "port-forward", "svc/tmi-server", f"{HOST_PORT}:{HOST_PORT}"], @@ -604,7 +635,7 @@ def tail_server_logs() -> None: kubectl(["-n", NS, "logs", "-f", "deploy/tmi-server", "--tail=200"], check=False) -def remove_local_images(db: str, cluster_target: str = "kind") -> None: +def remove_local_images(db: str, cluster_target: str = "docker-desktop") -> None: """Remove the locally-built dev images (used by `devenv.py nuke`).""" for name, _df, _args in image_builds_for(db): run_cmd(["docker", "rmi", "-f", cluster.local_image_ref(name, cluster=cluster_target)], @@ -612,7 +643,7 @@ def remove_local_images(db: str, cluster_target: str = "kind") -> None: def server_http_status() -> tuple[bool, str]: - """Return (reachable, http_code) for the server NodePort at localhost:8080.""" + """Return (reachable, http_code) for the server at localhost:8080 (via port-forward).""" r = subprocess.run( ["curl", "-s", "--connect-timeout", "2", "--max-time", "5", "-o", "/dev/null", "-w", "%{http_code}", SERVER_URL], @@ -626,21 +657,22 @@ def server_http_status() -> tuple[bool, str]: # Orchestration entry points # --------------------------------------------------------------------------- -def start(*, db: str, cluster_target: str = "kind", no_workers: bool = False, +def start(*, db: str, cluster_target: str = "docker-desktop", no_workers: bool = False, skip_context_guard: bool = False) -> None: """Build images, deploy all components, wait for readiness, and start port-forwards.""" _preflight() _guard_context(skip_context_guard, cluster_target) + if db == "oracle" and cluster_target == "k3s": + log_error("DB=oracle is not supported on CLUSTER=k3s. Use CLUSTER=docker-desktop for local Oracle dev.") + sys.exit(1) if cluster_target == "k3s": ensure_k3s_registry() # in-cluster registry must be up before push - else: - cluster.ensure_registry() - cluster.connect_registry_to_kind() + # docker-desktop: no registry — build_and_push imports the images directly. build_and_push(db, cluster_target) ensure_namespace() apply_platform_base() - if cluster_target == "k3s": - apply_k3s_postgres() # in-cluster DB up before the server (AutoMigrate) + if cluster_target in ("k3s", "docker-desktop") and db != "oracle": + apply_incluster_postgres(cluster_target) # in-cluster DB up before the server (AutoMigrate) deliver_config(cluster_target) create_embedding_secret() if db == "oracle": @@ -659,16 +691,14 @@ def start(*, db: str, cluster_target: str = "kind", no_workers: bool = False, wait_and_forward(db, cluster_target) -def restart(*, db: str, cluster_target: str = "kind", no_workers: bool = False, +def restart(*, db: str, cluster_target: str = "docker-desktop", no_workers: bool = False, skip_context_guard: bool = False) -> None: """Rebuild the server image, re-deliver config, and roll the server deployment.""" _preflight() _guard_context(skip_context_guard, cluster_target) if cluster_target == "k3s": ensure_k3s_registry() - else: - cluster.ensure_registry() - cluster.connect_registry_to_kind() + # docker-desktop: no registry — build_and_push imports the images directly. build_and_push(db, cluster_target) deliver_config(cluster_target) if db == "oracle": @@ -678,7 +708,7 @@ def restart(*, db: str, cluster_target: str = "kind", no_workers: bool = False, kubectl(["-n", NS, "rollout", "restart", "deploy/tmi-server"]) kubectl(["-n", NS, "rollout", "status", "deploy/tmi-server", f"--timeout={server_rollout_timeout(db)}"]) start_redis_port_forward() - if cluster_target == "k3s": + if cluster_target in ("k3s", "docker-desktop"): start_server_port_forward() wait_for_server() log_success(f"Server restarted; {SERVER_URL}") @@ -697,7 +727,6 @@ def teardown(*, db: str = "postgres") -> None: - Secret tmi-embedding - Secret tmi-oracle-wallet (defensive; no-op if never created) - Secret tmi-oracle-db (defensive; no-op if never created) - - local registry container (docker stop) """ stop_port_forward() @@ -748,16 +777,13 @@ def teardown(*, db: str = "postgres") -> None: check=False, ) - # Stop the local registry container - run_cmd(["docker", "stop", cluster.REGISTRY_CONTAINER], check=False) - log_success("Dev environment torn down (cluster left intact)") -def teardown_k3s_namespace() -> None: - """Hard reset for k3s: delete the entire tmi-platform namespace (all workloads, - the in-cluster registry, and the Postgres PVC/data). Never touches the k3s - cluster itself — we do not own it.""" +def teardown_namespace() -> None: + """Hard reset for a cluster we don't own (k3s, docker-desktop): delete the entire + tmi-platform namespace (all workloads, the in-cluster registry, and the Postgres + PVC/data). Never touches the cluster itself.""" stop_port_forward() kubectl(["delete", "namespace", NS, "--ignore-not-found", "--wait=true"]) - log_success(f"Namespace {NS} deleted (k3s hard reset)") + log_success(f"Namespace {NS} deleted (hard reset)") diff --git a/scripts/lib/devstatus.py b/scripts/lib/devstatus.py index 74d5c506..4160be3a 100644 --- a/scripts/lib/devstatus.py +++ b/scripts/lib/devstatus.py @@ -1,16 +1,12 @@ -"""kind-aware status dashboard for the TMI dev environment. +"""Status dashboard for the TMI dev environment (k3s and docker-desktop). -Replaces the old host-process scan. Reports the kind cluster, local registry, -external db container, in-cluster Deployments, server reachability, and the -optional OAuth stub. Pure parser deployment_readiness() is unit-tested. +Reports the active kube context, in-cluster Deployments, server reachability, +and the optional OAuth stub. Pure parser deployment_readiness() is unit-tested. """ from __future__ import annotations import json -import sys from pathlib import Path -import cluster -import database import deploy from tmi_common import GREEN, NC, RED, YELLOW, run_cmd @@ -29,11 +25,6 @@ def deployment_readiness(json_text: str) -> list[tuple[str, int, int]]: return rows -def _cluster_present() -> bool: - r = run_cmd(["kind", "get", "clusters"], check=False, capture=True) - return cluster.CLUSTER_NAME in r.stdout.split() - - def _row(ok: bool, label: str, detail: str) -> None: mark = f"{GREEN}✓{NC}" if ok else f"{RED}✗{NC}" print(f"{mark} {label:<26} {detail}") @@ -43,13 +34,8 @@ def print_dashboard() -> None: print("TMI Dev Environment Status") print("==========================\n") - _row(_cluster_present(), f"kind cluster ({cluster.CLUSTER_NAME})", - "present" if _cluster_present() else "absent — run 'make dev-up'") - _row(cluster.is_registry_running(), "local registry", cluster.REGISTRY_CONTAINER) - - db = database.dev_profile() - _row(database.is_running(db), "database (postgres)", - f"container: {db.container}" if database.is_running(db) else "stopped") + ctx = deploy.current_kube_context() + _row(bool(ctx), "kube context", ctx or "none — run 'make dev-cluster-up'") # In-cluster deployments r = run_cmd(["kubectl", "get", "deploy", "-n", deploy.NS, "-o", "json"], diff --git a/scripts/lib/tests/test_cluster.py b/scripts/lib/tests/test_cluster.py index c0a344f1..708f5811 100644 --- a/scripts/lib/tests/test_cluster.py +++ b/scripts/lib/tests/test_cluster.py @@ -7,18 +7,6 @@ class TestLocalImageRef(unittest.TestCase): - def test_local_image_ref_default_tag(self): - self.assertEqual(cluster.local_image_ref("tmi-server"), "localhost:5000/tmi-server:dev") - - def test_local_image_ref_custom_tag(self): - self.assertEqual(cluster.local_image_ref("tmi-server", tag="x"), "localhost:5000/tmi-server:x") - - def test_local_image_ref_kind_explicit(self): - self.assertEqual( - cluster.local_image_ref("tmi-server", cluster="kind"), - "localhost:5000/tmi-server:dev", - ) - def test_local_image_ref_k3s(self): self.assertEqual( cluster.local_image_ref("tmi-server", cluster="k3s"), @@ -27,26 +15,45 @@ def test_local_image_ref_k3s(self): class TestRegistryFor(unittest.TestCase): - def test_registry_for_default_is_kind(self): - self.assertEqual(cluster.registry_for(), "localhost:5000") - - def test_registry_for_kind(self): - self.assertEqual(cluster.registry_for("kind"), "localhost:5000") - def test_registry_for_k3s(self): self.assertEqual(cluster.registry_for("k3s"), "rp2:30500") + def test_registry_for_unknown_raises(self): + with self.assertRaises(ValueError): + cluster.registry_for("kind") -class TestExpectedContext(unittest.TestCase): - def test_expected_context_default_is_kind(self): - self.assertEqual(cluster.expected_context(), "kind-tmi-dev") - def test_expected_context_kind(self): - self.assertEqual(cluster.expected_context("kind"), "kind-tmi-dev") +class TestExpectedContext(unittest.TestCase): + def test_expected_context_default_is_docker_desktop(self): + self.assertEqual(cluster.expected_context(), "docker-desktop") def test_expected_context_k3s(self): self.assertEqual(cluster.expected_context("k3s"), "k3s-rp") + def test_expected_context_unknown_raises(self): + with self.assertRaises(ValueError): + cluster.expected_context("kind") + + +class TestDockerDesktopIdentity(unittest.TestCase): + def test_registry_for_docker_desktop_is_none(self): + self.assertIsNone(cluster.registry_for("docker-desktop")) + + def test_local_image_ref_docker_desktop_is_bare(self): + # No registry prefix — the image is imported straight into the node's containerd. + self.assertEqual(cluster.local_image_ref("tmi-server", cluster="docker-desktop"), "tmi-server:dev") + + def test_local_image_ref_default_is_docker_desktop(self): + # Default cluster is docker-desktop — no registry prefix. + self.assertEqual(cluster.local_image_ref("tmi-server"), "tmi-server:dev") + + def test_expected_context_docker_desktop(self): + self.assertEqual(cluster.expected_context("docker-desktop"), "docker-desktop") + + def test_constants(self): + self.assertEqual(cluster.DD_CONTEXT, "docker-desktop") + self.assertEqual(cluster.DD_NODE, "desktop-control-plane") + class TestIsLocalKubeContext(unittest.TestCase): def test_is_local_kube_context_kind_prefix(self): diff --git a/scripts/lib/tests/test_deploy.py b/scripts/lib/tests/test_deploy.py index 77abe687..27103270 100644 --- a/scripts/lib/tests/test_deploy.py +++ b/scripts/lib/tests/test_deploy.py @@ -33,26 +33,41 @@ def test_image_builds_oracle_includes_workers(self): class TestOverlayDirFor(unittest.TestCase): - def test_overlay_dir_oracle(self): - self.assertTrue(deploy.overlay_dir_for("oracle").endswith("/oracle")) + def test_overlay_dir_oracle_docker_desktop(self): + # docker-desktop + oracle uses the dedicated docker-desktop-oracle overlay. + self.assertTrue(deploy.overlay_dir_for("oracle", "docker-desktop").endswith("/docker-desktop-oracle")) - def test_overlay_dir_postgres(self): - self.assertFalse(deploy.overlay_dir_for("postgres").endswith("/oracle")) + def test_overlay_dir_postgres_docker_desktop(self): + self.assertTrue(deploy.overlay_dir_for("postgres", "docker-desktop").endswith("/docker-desktop")) def test_overlay_dir_k3s(self): # CLUSTER=k3s uses the k3s overlay regardless of DB flavor. self.assertTrue(deploy.overlay_dir_for("postgres", "k3s").endswith("/k3s")) self.assertTrue(deploy.overlay_dir_for("oracle", "k3s").endswith("/k3s")) + def test_overlay_dir_docker_desktop(self): + self.assertTrue(deploy.overlay_dir_for("postgres", "docker-desktop").endswith("/docker-desktop")) + + def test_overlay_dir_docker_desktop_oracle(self): + self.assertTrue(deploy.overlay_dir_for("oracle", "docker-desktop").endswith("/docker-desktop-oracle")) + + def test_overlay_dir_docker_desktop_postgres_not_oracle(self): + p = deploy.overlay_dir_for("postgres", "docker-desktop") + self.assertTrue(p.endswith("/docker-desktop")) + self.assertFalse(p.endswith("-oracle")) + class TestInClusterDbHost(unittest.TestCase): - def test_kind_uses_host_docker_internal(self): - self.assertEqual(deploy.in_cluster_db_host(), "host.docker.internal") - self.assertEqual(deploy.in_cluster_db_host("kind"), "host.docker.internal") + def test_default_uses_postgres_service(self): + # docker-desktop is the default cluster target + self.assertEqual(deploy.in_cluster_db_host(), "postgres") def test_k3s_uses_postgres_service(self): self.assertEqual(deploy.in_cluster_db_host("k3s"), "postgres") + def test_docker_desktop_uses_postgres_service(self): + self.assertEqual(deploy.in_cluster_db_host("docker-desktop"), "postgres") + def test_k3s_rewrites_url_host_to_postgres_service(self): src = 'url: "postgres://tmi_dev:dev123@localhost:5432/tmi_dev?sslmode=disable"' out = deploy.rewrite_db_host_for_incluster(src, db_host=deploy.in_cluster_db_host("k3s")) @@ -133,41 +148,36 @@ def test_server_service_is_nodeport(self): def test_server_oracle_service_is_nodeport(self): self._assert_service_is_nodeport("server-oracle.yml") - def test_kind_cluster_maps_host_to_nodeport(self): - text = (_DEV_DIR / "kind-cluster.yml").read_text() - # extraPortMappings entry must map hostPort HOST_PORT -> containerPort NODE_PORT. - self.assertRegex( - text, r"(?m)^\s*extraPortMappings:", - "kind-cluster.yml: missing extraPortMappings", - ) - self.assertRegex( - text, rf"(?m)^\s*-?\s*containerPort:\s*{deploy.NODE_PORT}\b", - "kind-cluster.yml: extraPortMappings containerPort must equal deploy.NODE_PORT", - ) - self.assertRegex( - text, rf"(?m)^\s*-?\s*hostPort:\s*{deploy.HOST_PORT}\b", - "kind-cluster.yml: extraPortMappings hostPort must equal deploy.HOST_PORT", - ) - def test_server_port_forward_is_k3s_only(self): """#463: the KIND server is reached via the NodePort, never a port-forward (the userspace proxy collapsed under CATS load). A server port-forward - exists ONLY for the remote k3s target — which has no extraPortMappings, and - where CATS uses the NodePort at rp2:30080 directly — and every invocation is - gated on cluster_target == 'k3s'.""" + exists ONLY for no-own-cluster targets (k3s, docker-desktop) — which have + no extraPortMappings — and every invocation is gated on the tuple check + cluster_target in ('k3s', 'docker-desktop').""" src = (Path(deploy.__file__)).read_text() # Exactly one server port-forward command, inside start_server_port_forward. cmd_lines = re.findall(r'port-forward".*svc/tmi-server', src) self.assertEqual(len(cmd_lines), 1, - "exactly one server port-forward command (the k3s helper)") - # Every call site (excluding the def) must be immediately gated on k3s. + "exactly one server port-forward command (the no-own-cluster helper)") + # Every call site (excluding the def) must be immediately gated on the tuple. call_sites = re.findall(r"(?