diff --git a/.github/workflows/docker-publish-storefront.yml b/.github/workflows/docker-publish-storefront.yml
index f78721fc..54ddc5de 100644
--- a/.github/workflows/docker-publish-storefront.yml
+++ b/.github/workflows/docker-publish-storefront.yml
@@ -6,6 +6,11 @@ on:
       - main
     tags:
       - 'v*'
+    # NOTE(review): keeping this path filter saves CI cost, but it can skip
+    # publishing storefront images for some main commits. Since runtime resolves
+    # storefront refs from obol's GitCommit, missing short-SHA tags can cause
+    # ImagePullBackOff. If the team wants per-commit immutable availability,
+    # remove this paths filter.
     paths:
       - 'web/public-storefront/**'
       - 'Dockerfile.public-storefront'
diff --git a/.github/workflows/docker-publish-x402.yml b/.github/workflows/docker-publish-x402.yml
index f66e7b46..490bb48a 100644
--- a/.github/workflows/docker-publish-x402.yml
+++ b/.github/workflows/docker-publish-x402.yml
@@ -6,6 +6,11 @@ on:
       - main
     tags:
       - 'v*'
+    # NOTE: this path filter saves CI cost, but it also means some main commits
+    # won't publish matching short-SHA tags for these images. Any runtime path
+    # that resolves image refs from obol's GitCommit (images.Resolve) can fail to
+    # pull if that SHA tag was never published. If per-commit immutability is
+    # required for all main commits, remove this filter (like storefront workflow).
     paths:
       - 'internal/x402/**'
       - 'internal/serviceoffercontroller/**'
diff --git a/cmd/obol/model.go b/cmd/obol/model.go
index f7c402e8..2d4b0670 100644
--- a/cmd/obol/model.go
+++ b/cmd/obol/model.go
@@ -233,6 +233,15 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
 
 		return err
 	}
+	// User expectation: rerunning `obol model setup` with a new provider/model
+	// should make that model active immediately. ConfigureLiteLLM merges entries
+	// but does not guarantee head-of-list ordering, so explicitly promote the
+	// selected model (when one was provided) to become the default.
+	if len(models) > 0 && strings.TrimSpace(models[0]) != "" {
+		if err := model.PreferModels(cfg, u, []string{models[0]}); err != nil {
+			return fmt.Errorf("set default model to %s: %w", models[0], err)
+		}
+	}
 
 	u.Print("")
 	u.Successf("Model configured. To change later, run: obol model setup (or obol model remove <name>)")
diff --git a/docs/guides/stale-resources-troubleshooting.md b/docs/guides/stale-resources-troubleshooting.md
new file mode 100644
index 00000000..3371a4ea
--- /dev/null
+++ b/docs/guides/stale-resources-troubleshooting.md
@@ -0,0 +1,248 @@
+# Stale Resource Troubleshooting Runbook
+
+This runbook helps operators and agents diagnose stale Kubernetes resources in Obol Stack and recover cleanly when `obol stack up` appears healthy but routes/images are stale.
+
+## Scope
+
+Use this when you see issues such as:
+
+- `obol.stack` serves old UI or wrong backend.
+- `503 Service Temporarily Unavailable` from nginx.
+- tunnel root (`https://<trycloudflare>/`) returns "no available server".
+- pods stuck in `ImagePullBackOff` for Obol-managed images.
+- `obol sell demo` times out while waiting for pod readiness.
+
+---
+
+## 1) Quick triage commands
+
+Run these first:
+
+```bash
+obol kubectl get pods -A -o wide
+obol kubectl get deploy -A
+obol kubectl get svc -A
+obol kubectl get httproutes.gateway.networking.k8s.io -A
+obol kubectl get ingress -A
+```
+
+If a pod is not ready:
+
+```bash
+obol kubectl describe pod -n <ns> <pod>
+obol kubectl get events -n <ns> --sort-by=.lastTimestamp
+```
+
+---
+
+## 2) Detect legacy ingress-nginx conflicts
+
+Symptoms:
+
+- `curl -I http://obol.stack` returns nginx 503.
+- old `Ingress` objects still exist in `default`.
+
+Check:
+
+```bash
+obol kubectl get ingress -A
+obol kubectl get deploy,svc -n default | rg "ingress-nginx|obol-frontend-obol-app|erpc"
+```
+
+Expected on modern stack:
+
+- `HTTPRoute` resources (Gateway API) are present.
+- no legacy nginx ingress objects for `obol.stack`.
+
+If legacy resources exist, remove them:
+
+```bash
+obol kubectl delete ingress obol-frontend-obol-app -n default --ignore-not-found
+obol kubectl delete ingress erpc -n default --ignore-not-found
+obol kubectl delete deployment ingress-nginx-controller -n default --ignore-not-found
+obol kubectl delete service ingress-nginx-controller -n default --ignore-not-found
+```
+
+Verify:
+
+```bash
+curl -I http://obol.stack
+curl -I http://obol.stack:8080
+```
+
+Both should be `200` from current frontend (Traefik path).
+
+---
+
+## 3) Fix tunnel storefront failures
+
+Symptoms:
+
+- tunnel root returns "no available server".
+- `tunnel-storefront` pod in `ImagePullBackOff`.
+
+Check:
+
+```bash
+obol kubectl get pods -n traefik
+obol kubectl describe pod -n traefik <tunnel-storefront-pod>
+```
+
+If error is image tag not found:
+
+```bash
+obol kubectl set image deployment/tunnel-storefront -n traefik \
+  storefront=ghcr.io/obolnetwork/obol-stack-public-storefront:latest
+obol kubectl rollout status deployment/tunnel-storefront -n traefik --timeout=180s
+```
+
+Verify:
+
+```bash
+curl -I https://<your-trycloudflare-domain>/
+```
+
+### Investigation: quick tunnel hostname drift causing storefront 404
+
+Observed pattern during incident:
+
+- `obol tunnel status` reported an active quick URL.
+- `https://<quick-url>/skill.md` returned `200`.
+- `https://<quick-url>/` returned `404 page not found` (or `530` during reconnect).
+
+Root cause:
+
+- `traefik/tunnel-storefront` `HTTPRoute` was pinned to an older quick tunnel
+  hostname in `spec.hostnames`.
+- `cloudflared` rotated to a new `*.trycloudflare.com` hostname.
+- The root storefront route missed due to hostname mismatch, while other public
+  routes without hostname pinning (for example `x402/obol-skill-md-route`) kept
+  working.
+
+How to confirm quickly:
+
+```bash
+# 1) Current quick tunnel URL from status/logs
+obol tunnel status
+obol kubectl logs -n traefik deploy/cloudflared --since=10m \
+  | rg -o 'https://[a-z0-9-]+\.trycloudflare\.com' | tail -1
+
+# 2) What hostname storefront route is pinned to
+obol kubectl get httproute tunnel-storefront -n traefik -o jsonpath='{.spec.hostnames[0]}'
+
+# 3) Reproduce mismatch behavior
+curl -i https://<quick-url>/
+curl -i https://<quick-url>/skill.md
+```
+
+Immediate recovery options:
+
+```bash
+# Option A: repin to current quick hostname
+obol kubectl patch httproute tunnel-storefront -n traefik --type merge \
+  -p '{"spec":{"hostnames":["<current-quick-host>"]}}'
+
+# Option B: remove host pin (works across quick hostname rotations)
+obol kubectl patch httproute tunnel-storefront -n traefik --type json \
+  -p='[{"op":"remove","path":"/spec/hostnames"}]'
+```
+
+Post-fix verification:
+
+```bash
+curl -i https://<quick-url>/
+curl -i https://<quick-url>/skill.md
+obol kubectl run -n traefik tmp-curl --rm -i --restart=Never --image=curlimages/curl -- \
+  sh -lc "curl -i -H 'Host: <quick-host>' http://traefik.traefik.svc.cluster.local/ | sed -n '1,12p'"
+```
+
+Expected:
+
+- storefront `/` returns `200`.
+- `/skill.md` continues to return `200`.
+- internal Traefik check with explicit `Host` header returns `200`.
+
+---
+
+## 4) Fix demo pod scheduling timeouts
+
+Symptoms:
+
+- `obol sell demo` fails waiting for pod readiness.
+- pod remains `Pending`.
+
+Check scheduler reason:
+
+```bash
+obol kubectl describe pod -n demo -l app=demo-hello
+obol kubectl describe node <node-name>
+```
+
+Common cause: `Insufficient memory`.
+
+Free memory by removing high-request workloads not needed for this test, then wait for rollout:
+
+```bash
+obol kubectl rollout status deployment/demo-hello -n demo --timeout=180s
+```
+
+---
+
+## 5) Image freshness model (important)
+
+`obol stack up` is deterministic and does not always "pull latest":
+
+- Some images are pinned by version tag in values files.
+- Some are digest-pinned.
+- Some are commit-derived via `images.Resolve(...)` (using obol binary `GitCommit`).
+
+If commit-derived image tags are not published for that commit SHA, runtime pull failures occur.
+
+---
+
+## 5b) LiteLLM 401 due to accidental OpenAI placeholders
+
+Observed failure pattern:
+
+- chat fails with `Incorrect API key provided: <placeholder>`
+- LiteLLM config contains a placeholder-style model alias and matching placeholder `OPENAI_API_KEY`
+
+Why this happens:
+
+- auto cloud-provider detection can import a stale/default agent model from `~/.openclaw/openclaw.json`
+- if shell env has a placeholder OpenAI key, LiteLLM can be patched with invalid credentials
+
+Mitigations:
+
+- set intended provider key (for Anthropic: `ANTHROPIC_API_KEY`) before `obol stack up`
+- run `obol model prefer <your-model>` then `obol model sync`
+- remove wrong aliases with `obol model remove <name>`
+
+---
+
+## 6) Agent checklist for stale resources
+
+Use this checklist in order:
+
+1. `obol kubectl get ingress -A` -> must be empty (or expected non-obol custom ingresses only).
+2. `obol kubectl get httproutes.gateway.networking.k8s.io -A` -> confirm `obol-frontend` and service routes exist.
+3. `obol kubectl get pods -n traefik` -> `traefik`, `cloudflared`, and `tunnel-storefront` healthy.
+4. `obol kubectl describe pod ...` for any `Pending`/`ImagePullBackOff`.
+5. `curl -I http://obol.stack` and `curl -I http://obol.stack:8080` -> expect `200`.
+6. `curl -I https://<tunnel-host>/` -> expect `200`.
+7. if image pull fails, verify referenced tag exists in registry and compare to `obol version` `Git Commit`.
+
+---
+
+## 7) Code-level hardening recommendations
+
+Current hardening implemented:
+
+- `stack up` runs ongoing reconciliation checks every run for known stale ingress conflicts.
+- Default mode is non-destructive (warn only). Auto-clean is opt-in with `OBOL_STACK_AUTO_CLEAN_LEGACY=true`.
+
+Additional recommended hardening:
+
+- Ensure publish workflows produce short-SHA images for all commits that can be deployed by commit-derived refs.
+- Prefer immutable pins (digest or explicit version tags) over `:latest` for client reproducibility.
+- Add CI checks that verify deploy-time image refs exist in registry before release.
diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml
index 4e70ec1e..8627852e 100644
--- a/internal/embed/infrastructure/base/templates/llm.yaml
+++ b/internal/embed/infrastructure/base/templates/llm.yaml
@@ -148,6 +148,8 @@ spec:
           # No Postgres required — /model/new and /model/delete work via
           # in-memory router + config.yaml persistence.
           # Source: https://github.com/ObolNetwork/litellm
+          # Pinned to a specific build for reproducibility; bump this ref to roll
+          # newer LiteLLM behavior into stack up.
           image: ghcr.io/obolnetwork/litellm:sha-c16b156
           imagePullPolicy: IfNotPresent
           args:
diff --git a/internal/embed/infrastructure/cloudflared/values.yaml b/internal/embed/infrastructure/cloudflared/values.yaml
index a41a4715..16ac7cec 100644
--- a/internal/embed/infrastructure/cloudflared/values.yaml
+++ b/internal/embed/infrastructure/cloudflared/values.yaml
@@ -5,6 +5,9 @@ transport:
 
 image:
   repository: cloudflare/cloudflared
+  # Keep this pinned to a reviewed release for reproducible tunnel behavior.
+  # If you want "latest" cadence with safety, automate bumping this value
+  # to newer immutable versions in PRs rather than using :latest directly.
   tag: "2026.3.0"
 
 metrics:
diff --git a/internal/stack/stack.go b/internal/stack/stack.go
index f5bae1a5..abe11560 100644
--- a/internal/stack/stack.go
+++ b/internal/stack/stack.go
@@ -429,6 +429,13 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s
 		return fmt.Errorf("failed to apply defaults helmfile: %w", err)
 	}
 
+	// Non-destructive stale-resource guard.
+	// By default this only warns. Set OBOL_STACK_AUTO_CLEAN_LEGACY=true to
+	// explicitly opt in to automatic cleanup of known legacy ingress resources.
+	if err := reconcileStackInvariants(cfg, u); err != nil {
+		u.Warnf("Stack reconciliation encountered errors: %v", err)
+	}
+
 	u.Success("Default infrastructure deployed")
 
 	if previousLiteLLMConfig != "" {
@@ -597,6 +604,81 @@ func obolPluginInstalled(marketplaceName string) bool {
 	return false
 }
 
+func reconcileStackInvariants(cfg *config.Config, u *ui.UI) error {
+	hasLegacy, err := hasLegacyIngressResources(cfg)
+	if err != nil {
+		return err
+	}
+	if hasLegacy {
+		if strings.EqualFold(strings.TrimSpace(os.Getenv("OBOL_STACK_AUTO_CLEAN_LEGACY")), "true") {
+			u.Dim("Found legacy ingress-nginx/default resources; auto-clean enabled")
+			if err := cleanupLegacyIngressResources(cfg); err != nil {
+				return err
+			}
+			u.Dim("Legacy ingress conflicts removed")
+			return nil
+		}
+		u.Warn("Legacy ingress resources detected from older stack layouts.")
+		u.Dim("  No resources were deleted (safe default).")
+		u.Dim("  To auto-clean on next run: OBOL_STACK_AUTO_CLEAN_LEGACY=true obol stack up")
+		u.Dim("  Or clean manually:")
+		u.Dim("    obol kubectl delete ingress obol-frontend-obol-app -n default --ignore-not-found")
+		u.Dim("    obol kubectl delete ingress erpc -n default --ignore-not-found")
+		u.Dim("    obol kubectl delete deployment ingress-nginx-controller -n default --ignore-not-found")
+		u.Dim("    obol kubectl delete service ingress-nginx-controller -n default --ignore-not-found")
+	}
+	return nil
+}
+
+// cleanupLegacyIngressResources removes obsolete ingress-nginx and default
+// namespace frontend/eRPC resources created by old stack layouts.
+// Safe/idempotent: deletes use --ignore-not-found.
+func cleanupLegacyIngressResources(cfg *config.Config) error {
+	bin, kc := kubectl.Paths(cfg)
+	resources := [][]string{
+		{"ingress", "obol-frontend-obol-app", "-n", "default"},
+		{"ingress", "erpc", "-n", "default"},
+		{"deployment", "ingress-nginx-controller", "-n", "default"},
+		{"service", "ingress-nginx-controller", "-n", "default"},
+	}
+
+	var errs []string
+	for _, r := range resources {
+		args := append([]string{"delete"}, r...)
+		args = append(args, "--ignore-not-found")
+		if err := kubectl.RunSilent(bin, kc, args...); err != nil {
+			errs = append(errs, err.Error())
+		}
+	}
+	if len(errs) > 0 {
+		return errors.New(strings.Join(errs, "; "))
+	}
+	return nil
+}
+
+func hasLegacyIngressResources(cfg *config.Config) (bool, error) {
+	bin, kc := kubectl.Paths(cfg)
+	resources := [][]string{
+		{"ingress", "obol-frontend-obol-app", "-n", "default"},
+		{"ingress", "erpc", "-n", "default"},
+		{"deployment", "ingress-nginx-controller", "-n", "default"},
+		{"service", "ingress-nginx-controller", "-n", "default"},
+	}
+	for _, r := range resources {
+		args := append([]string{"get"}, r...)
+		args = append(args, "-o", "name")
+		_, err := kubectl.Output(bin, kc, args...)
+		if err == nil {
+			return true, nil
+		}
+		if strings.Contains(strings.ToLower(err.Error()), "notfound") {
+			continue
+		}
+		return false, err
+	}
+	return false, nil
+}
+
 // autoConfigureLLM detects host Ollama and imported cloud providers, then
 // auto-configures LiteLLM so inference works out of the box.
 // Patches all providers first, then does a single restart.
diff --git a/internal/tunnel/tunnel.go b/internal/tunnel/tunnel.go
index 367ad877..30decc9d 100644
--- a/internal/tunnel/tunnel.go
+++ b/internal/tunnel/tunnel.go
@@ -922,7 +922,13 @@ func CreateStorefront(cfg *config.Config, tunnelURL string) error {
 				},
 			},
 		},
-		// HTTPRoute: tunnel hostname → storefront (more specific than frontend catch-all).
+		// HTTPRoute: tunnel hostname -> storefront (more specific than frontend catch-all).
+		//
+		// NOTE: quick tunnels rotate hostnames frequently and can leave stale
+		// host-pinned routes behind during restart races. If storefront "/" starts
+		// returning 404 while "/skill.md" still works, stale host pinning is the
+		// first thing to check. A robust fix is to avoid host pinning for quick
+		// tunnels (or update hostnames from the active cloudflared pod logs).
 		{
 			"apiVersion": "gateway.networking.k8s.io/v1",
 			"kind":       "HTTPRoute",