diff --git a/.gitignore b/.gitignore
index aedee04af7..cdd3039551 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,7 +39,6 @@ __pycache__
 
 # node env (used by devcontainers cli)
 node_modules
-package.json
 package-lock.json
 !docs/package.json
 !docs/package-lock.json
diff --git a/DOG-PHONE-INTERFACE.md b/DOG-PHONE-INTERFACE.md
new file mode 100644
index 0000000000..157044bf09
--- /dev/null
+++ b/DOG-PHONE-INTERFACE.md
@@ -0,0 +1,340 @@
+Before I commit a plan, let me check one critical thing — whether the Unitree WebRTC library supports remote (non-LAN) mode. Option B depends on this.
+
+Good — **`WebRTCConnectionMethod.Remote` exists** in `unitree_webrtc_connect/webrtc_driver.py:29`. It routes signaling through Unitree's own cloud using `send_sdp_to_remote_peer(sn, token, public_key)`. This is how the official Unitree Go mobile app talks to your dog from anywhere. Option B is fully viable.
+
+Let me also check what credentials it needs and confirm a few details, then I'll write the plan.
+
+OK, fully scoped. Here's the implementation plan. I'll be specific about what to change in code, what to provision, and what iPhone gotchas to watch for.
+
+## System diagram (the whole shape)
+
+```
+                     INTERNET
+   ┌─────────────────────────────────────────────────────────────────┐
+   │                                                                  │
+   │   Unitree cloud                                                  │
+   │   (signaling + TURN relay)                                       │
+   │           ▲                                                      │
+   │  ┌────────┼──────────┐                                          │
+   │  │ SDP    │   TURN   │                                          │
+   │  │ offer  │   relay  │                                          │
+   │  ▼        │          ▼                                          │
+   │ ┌──────────────┐    ┌─────────────────────┐                     │
+   │ │ Cloud dimos  │◄──►│  iPhone (Safari)    │                     │
+   │ │ (small VPS)  │    │  - Vercel PWA       │                     │
+   │ │              │    │  - cellular up      │                     │
+   │ │ Caddy+TLS    │    │  - hotspot ON       │                     │
+   │ │ FastAPI:5555 │    └─────────┬───────────┘                     │
+   │ │ agent loop   │              │                                  │
+   │ │ skills       │              │ WiFi 2.4GHz                      │
+   │ │ WebRTC peer  │              │ 172.20.10.0/28                   │
+   │ └──────────────┘              ▼                                  │
+   │        ▲           ┌──────────────────┐                          │
+   │        │           │     Go2 (STA)    │                          │
+   │        └───────────│  - joined iPhone │                          │
+   │       WebRTC data  │    hotspot       │                          │
+   │       channel via  │  - WebRTC peer   │                          │
+   │       Unitree TURN │  - Unitree SDK   │                          │
+   │                    └──────────────────┘                          │
+   └─────────────────────────────────────────────────────────────────┘
+
+   Three legs, all over public internet:
+   1. iPhone → Vercel webapp        (HTTPS, cellular)
+   2. iPhone → Cloud dimos API      (HTTPS POST + SSE, cellular)
+   3. Cloud dimos ↔ Go2             (WebRTC data channel, TURN-relayed)
+```
+
+The dog never touches your VPS directly — both ends connect to Unitree's signaling, exchange SDP, then a TURN-relayed data channel forms over Unitree's relay. You don't run a STUN/TURN server.
+
+## Phase 1 — Pre-work (do this first, before any coding)
+
+**1.1 Register the dog with Unitree's cloud** (one-time, ~10 minutes)
+- Install the official **Unitree Go** app on a phone, create an account
+- Pair the dog through the app (Bluetooth then WiFi setup)
+- Once paired, your Unitree account "owns" this dog's serial number
+- Verify by opening the app remotely (on cellular only) and confirming you can see the dog status
+
+**1.2 Get credentials from your Unitree account**
+- Note the **serial number** (printed on the dog, e.g. `B42D...`)
+- Your Unitree account **email** and **password** (used for OAuth-like token exchange via `fetch_token`)
+
+**1.3 Provision the VPS**
+- Any provider: DigitalOcean, Hetzner, Fly.io, Railway. ~$10/mo box is plenty
+- Ubuntu 22.04 / 24.04, 2 vCPU, 4 GB RAM, public IPv4
+- Open ports 80, 443 (Caddy will handle TLS)
+- Buy a domain or use a subdomain you control. e.g. `dog.yourname.dev`
+
+## Phase 2 — Patch dimos for Remote mode
+
+The existing `UnitreeWebRTCConnection` hard-codes `LocalSTA`. Add Remote support.
+
+**File**: [dimos/robot/unitree/connection.py](dimos/robot/unitree/connection.py) around line 93-101
+
+```python
+import os
+
+class UnitreeWebRTCConnection(Resource):
+    def __init__(
+        self,
+        ip: str | None = None,
+        mode: str = "ai",
+        connection_method: str = "LocalSTA",   # or "Remote"
+        serial_number: str | None = None,
+        username: str | None = None,
+        password: str | None = None,
+    ) -> None:
+        super().__init__()
+        self.ip = ip
+        self.mode = mode
+
+        if connection_method == "Remote":
+            sn = serial_number or os.environ["GO2_SERIAL_NUMBER"]
+            user = username or os.environ["UNITREE_USERNAME"]
+            pwd = password or os.environ["UNITREE_PASSWORD"]
+            self.conn = LegionConnection(
+                WebRTCConnectionMethod.Remote,
+                serialNumber=sn,
+                username=user,
+                password=pwd,
+            )
+        else:
+            self.conn = LegionConnection(WebRTCConnectionMethod.LocalSTA, ip=self.ip)
+```
+
+Then in the blueprint that boots GO2Connection, pass `connection_method="Remote"`. Easiest: read it from an env var:
+
+```python
+GO2Connection.blueprint(
+    connection_method=os.environ.get("GO2_CONNECTION_METHOD", "LocalSTA"),
+)
+```
+
+## Phase 3 — Deploy dimos to the VPS
+
+**3.1 Install**
+```bash
+ssh root@your-vps
+apt update && apt install -y python3.12 python3.12-venv git caddy
+git clone https://github.com/your-fork/dimos /opt/dimos
+cd /opt/dimos
+python3.12 -m venv .venv && source .venv/bin/activate
+pip install -e .
+```
+
+**3.2 Environment file** `/opt/dimos/.env`
+```bash
+GO2_CONNECTION_METHOD=Remote
+GO2_SERIAL_NUMBER=B42D...your-dog-sn...
+UNITREE_USERNAME=your.email@example.com
+UNITREE_PASSWORD=your-unitree-password
+OPENAI_API_KEY=sk-...              # for the agent + TTS
+DIMOS_API_TOKEN=$(openssl rand -hex 16)   # auth for the webapp
+HOST=0.0.0.0
+PORT=5555
+```
+
+**3.3 Add token auth** — anyone can hit your VPS otherwise. In [dimos/web/dimos_interface/api/server.py](dimos/web/dimos_interface/api/server.py) before the `/submit_query` and `/text_stream/*` handlers, add:
+
+```python
+from fastapi import Header, HTTPException
+import os
+
+EXPECTED_TOKEN = os.environ.get("DIMOS_API_TOKEN", "")
+
+def require_token(authorization: str = Header(None)):
+    if not EXPECTED_TOKEN or authorization != f"Bearer {EXPECTED_TOKEN}":
+        raise HTTPException(status_code=401, detail="invalid token")
+```
+Then add `Depends(require_token)` to your routes.
+
+**3.4 Caddy reverse proxy** — TLS termination + the right CORS so Vercel can call it.
+
+`/etc/caddy/Caddyfile`:
+```
+dog.yourname.dev {
+    @cors_preflight method OPTIONS
+    header Access-Control-Allow-Origin "https://your-app.vercel.app"
+    header Access-Control-Allow-Headers "Authorization, Content-Type"
+    header Access-Control-Allow-Methods "GET, POST, OPTIONS"
+    respond @cors_preflight 204
+
+    reverse_proxy localhost:5555 {
+        flush_interval -1     # critical for SSE — disables buffering
+    }
+}
+```
+Then: `systemctl reload caddy`. Caddy auto-provisions a Let's Encrypt cert.
+
+**3.5 systemd service** `/etc/systemd/system/dimos.service`
+```ini
+[Unit]
+After=network.target
+
+[Service]
+WorkingDirectory=/opt/dimos
+EnvironmentFile=/opt/dimos/.env
+ExecStart=/opt/dimos/.venv/bin/dimos run unitree-go2-agentic
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=multi-user.target
+```
+`systemctl enable --now dimos`. Tail with `journalctl -u dimos -f`.
+
+## Phase 4 — Vercel webapp (iPhone-tuned)
+
+Minimum surface: one page, voice button, status display. SSR not required — a static SPA is fine. Skeleton in Next.js App Router:
+
+`app/page.tsx`:
+```tsx
+"use client";
+import { useState, useEffect, useRef } from "react";
+
+const API = process.env.NEXT_PUBLIC_DIMOS_API!;       // https://dog.yourname.dev
+const TOKEN = process.env.NEXT_PUBLIC_DIMOS_TOKEN!;   // public for hackathon; secure later
+
+export default function Page() {
+  const [state, setState] = useState<any>({});
+  const [recording, setRecording] = useState(false);
+  const recRef = useRef<MediaRecorder | null>(null);
+  const chunksRef = useRef<Blob[]>([]);
+
+  // SSE for agent_state JSON snapshots
+  useEffect(() => {
+    const es = new EventSource(
+      `${API}/text_stream/agent_state?token=${TOKEN}`  // EventSource can't set headers, use query
+    );
+    es.onmessage = (e) => setState(JSON.parse(e.data));
+    es.onerror = () => console.log("SSE dropped, browser will reconnect");
+    return () => es.close();
+  }, []);
+
+  async function sendQuery(text: string) {
+    await fetch(`${API}/submit_query`, {
+      method: "POST",
+      headers: { "Authorization": `Bearer ${TOKEN}`, "Content-Type": "application/json" },
+      body: JSON.stringify({ query: `<user_speech>${text}</user_speech>` }),
+    });
+  }
+
+  async function startRecording() {
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    const rec = new MediaRecorder(stream, { mimeType: "audio/mp4" });   // iOS Safari needs mp4
+    chunksRef.current = [];
+    rec.ondataavailable = (e) => chunksRef.current.push(e.data);
+    rec.onstop = async () => {
+      const blob = new Blob(chunksRef.current, { type: "audio/mp4" });
+      const fd = new FormData(); fd.append("audio", blob, "rec.mp4");
+      const r = await fetch(`${API}/upload_audio?token=${TOKEN}`, { method: "POST", body: fd });
+      const { text } = await r.json();
+      if (text) sendQuery(text);
+    };
+    rec.start();
+    recRef.current = rec;
+    setRecording(true);
+  }
+  function stopRecording() {
+    recRef.current?.stop();
+    setRecording(false);
+  }
+
+  return (
+    <main>
+      <button
+        onTouchStart={startRecording}
+        onTouchEnd={stopRecording}
+        style={{ width: 200, height: 200, borderRadius: 100,
+                 background: recording ? "red" : "navy", color: "white" }}>
+        {recording ? "Listening…" : "Hold to speak"}
+      </button>
+      <pre>{JSON.stringify(state, null, 2)}</pre>
+    </main>
+  );
+}
+```
+
+**iPhone-specific webapp notes** (read every line, these are landmines):
+
+1. **`MediaRecorder` on iOS Safari requires `audio/mp4`**, not `audio/webm`. Most tutorials get this wrong.
+2. **`getUserMedia` requires HTTPS + a user gesture** (tap). The button's `onTouchStart` works; auto-starting on page load won't.
+3. **`EventSource` can't send custom headers** — that's why the token goes in the query string. Add `?token=` handling on the server.
+4. **Safari pauses SSE when the tab is backgrounded.** The connection auto-reconnects on resume, but expect a ~1s gap. Don't rely on state snapshots arriving during background. Show "reconnecting" if `es.onerror` fires.
+5. **No Service Worker for offline** unless you really want PWA. For a hackathon, just open Safari and use it as a normal web page. If you want "Add to Home Screen" feel, add a minimal `manifest.json`.
+6. **iOS audio echo cancellation is aggressive** — if the dog's TTS comes back through the phone mic somehow, Safari may suppress your speech. Since TTS comes out the dog's speaker (not the phone), this won't happen — but if you also play state audio on the phone, watch for it.
+7. **Tap-to-unlock audio**: if you ever play audio in the webapp, the first play must be inside a user gesture handler. Otherwise iOS silently ignores it.
+
+`.env.local`:
+```
+NEXT_PUBLIC_DIMOS_API=https://dog.yourname.dev
+NEXT_PUBLIC_DIMOS_TOKEN=<same hex string as VPS>
+```
+
+Deploy: `vercel --prod`. Done.
+
+## Phase 5 — iPhone hotspot configuration
+
+This is the single most fiddly part of the demo. Practice it before showtime.
+
+**Settings → Personal Hotspot:**
+- **Allow Others to Join: ON**
+- **Maximize Compatibility: ON** ← critical. Forces 2.4 GHz. Go2's WiFi only does 2.4 GHz.
+- Set a memorable password — the dog will need it
+- Note the network name (matches your iPhone name in Settings → General → About → Name)
+
+**Pair the dog to the hotspot:**
+- Open the Unitree Go app on a *second* phone (or same phone if you can briefly disable hotspot)
+- Connect that phone to the **dog's AP** (`UnitreeGo2-XXXX`)
+- In the app: device WiFi settings → enter your iPhone's hotspot SSID and password
+- Dog reboots, joins iPhone hotspot
+- Verify: dog's status LED shows connected. In your VPS dimos logs, `WebRTC connection 🟢` should appear.
+
+**Keep the iPhone hotspot alive during demo:**
+- Personal Hotspot drops if no clients are connected for ~90s. Once the dog connects this isn't an issue.
+- Don't lock the screen during demo. Or: Settings → Display & Brightness → Auto-Lock → Never.
+- Don't switch to a non-Safari app — iOS may pause Safari's SSE. If you must, keep Safari foreground.
+- Low Power Mode disables the hotspot. Turn it off.
+
+## Phase 6 — End-to-end smoke test
+
+In this order, with each step verified before moving on:
+
+1. **VPS**: `curl https://dog.yourname.dev/text_streams -H "Authorization: Bearer $TOKEN"` returns the stream list.
+2. **WebRTC**: `journalctl -u dimos -f` shows `WebRTC connection 🟢 connected` after dog joins iPhone hotspot.
+3. **Audio upload**: from Mac, `curl -F audio=@test.m4a "https://dog.yourname.dev/upload_audio?token=$TOKEN"` returns transcript.
+4. **Query**: `curl -X POST https://dog.yourname.dev/submit_query -H "Authorization: Bearer $TOKEN" -d '{"query":"<user_speech>say hello</user_speech>"}'` → dog speaks via TTS.
+5. **SSE**: `curl -N "https://dog.yourname.dev/text_stream/agent_state?token=$TOKEN"` streams JSON snapshots.
+6. **iPhone**: open Vercel URL in Safari, push the button, say "find the bathroom." Watch the dog move + the state JSON update.
+
+## What can break on demo day (and the fix)
+
+| Failure | Likely cause | Fix |
+|---|---|---|
+| Dog won't join hotspot | iPhone on 5GHz | Maximize Compatibility ON |
+| Dog connects but commands don't work | Unitree token expired | Restart dimos service; tokens refresh on boot |
+| State JSON stops updating | Safari backgrounded | Bring Safari foreground; SSE auto-reconnects |
+| Mic button does nothing | First tap didn't grant mic permission | Reload page, tap once to trigger permission dialog |
+| Dog moves the wrong way | `lx/ly` axis flip in `move()` | Already handled in [connection.py:182](dimos/robot/unitree/connection.py) but verify with replay first |
+| WebRTC reconnect loop | Cellular drops while dog was connected | Dog auto-reconnects through Unitree's signaling; just wait 10s |
+| ~500ms command latency | TURN relay path is long | Acceptable for nav; do *not* use this for real-time teleop |
+| `503` from `/submit_query` | Agent locked on previous query | Restart dimos; for the demo, queue queries client-side |
+
+## Latency expectations on cellular
+
+- HTTP POST iPhone → VPS: 50-120 ms
+- Agent thinks (LLM call): 800-2500 ms
+- Skill dispatches Twist → VPS publishes via WebRTC → Unitree TURN → dog: 100-250 ms
+- Dog responds physically: another ~200 ms
+
+**Total user-perceived: ~1.5-3 seconds from voice command to motion.** Fine for "find the bathroom," not for precise teleop. Plan the demo accordingly.
+
+## What I would build first
+
+Don't try the whole stack at once. Order of risk:
+
+1. **Day 1**: VPS + Caddy + dimos in replay mode + Vercel skeleton with SSE. Verify the JSON state shows up on iPhone Safari. No dog yet.
+2. **Day 2**: Patch Remote mode, pair dog to iPhone hotspot, get the dog talking via TTS over the cloud connection. No agent skills yet.
+3. **Day 3**: Wire the blind-assistant skills, system prompt, run a real "find the X" trip.
+4. **Day 4**: Iron out the failure modes from the table above. Practice the hotspot setup until it's muscle memory.
+
+Want me to write the actual `connection.py` patch as an Edit so you can drop it in, or the Vercel project files in full?
\ No newline at end of file
diff --git a/README.md b/README.md
index e1cdb5623a..caee9d6fac 100644
--- a/README.md
+++ b/README.md
@@ -1,329 +1,158 @@
 <div align="center">
 
-<img width="1000" alt="banner_bordered_trimmed" src="https://github.com/user-attachments/assets/64f13b39-da06-4f58-add0-cfc44f04db4e" />
+### Team Perception
 
-<h2>The Agentive Operating System for Physical Space</h2>
+# 🦮 Goldie
 
-[![Discord](https://img.shields.io/discord/1341146487186391173?style=flat-square&logo=discord&logoColor=white&label=Discord&color=5865F2)](https://discord.gg/dimos)
-[![Stars](https://img.shields.io/github/stars/dimensionalOS/dimos?style=flat-square)](https://github.com/dimensionalOS/dimos/stargazers)
-[![Forks](https://img.shields.io/github/forks/dimensionalOS/dimos?style=flat-square)](https://github.com/dimensionalOS/dimos/fork)
-[![Contributors](https://img.shields.io/github/contributors/dimensionalOS/dimos?style=flat-square)](https://github.com/dimensionalOS/dimos/graphs/contributors)
-![Nix](https://img.shields.io/badge/Nix-flakes-5277C3?style=flat-square&logo=NixOS&logoColor=white)
-![NixOS](https://img.shields.io/badge/NixOS-supported-5277C3?style=flat-square&logo=NixOS&logoColor=white)
-![CUDA](https://img.shields.io/badge/CUDA-supported-76B900?style=flat-square&logo=nvidia&logoColor=white)
-[![Docker](https://img.shields.io/badge/Docker-ready-2496ED?style=flat-square&logo=docker&logoColor=white)](https://www.docker.com/)
+### A phone-first guide-dog interface for the Unitree Go2
 
-<a href="https://trendshift.io/repositories/23169" target="_blank"><img src="https://trendshift.io/api/badge/repositories/23169" alt="dimensionalOS%2Fdimos | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+**Goldie** lets a low-vision or blind user point a real quadruped robot at a destination, hear it confirm out loud, and follow it there — using only their phone.
 
-<big><big>
+It's an iPhone-tuned PWA wired to an LLM-driven agent stack built on top of [DimOS](https://github.com/dimensionalOS/dimos), with real-time teleop fallback and a voice loop tuned end-to-end for accessibility.
 
-[Hardware](#hardware) •
-[Installation](#installation) •
-[Agent CLI & MCP](#agent-cli-and-mcp) •
-[Blueprints](#blueprints) •
-[Development](#development)
+### [Demo Video →](https://canva.link/74vvhbiw7jwr4as) · [Presentation →](https://canva.link/perception-goldie)
 
-⚠️ **Pre-Release Beta** ⚠️
+</div>
 
-</big></big>
+---
 
-</div>
+## The story
 
-# Intro
+People who are blind or low-vision rely on guide dogs to navigate the world. Guide dogs work — but they're scarce, expensive, take years to train, and can't be summoned on demand. We wanted to see how close we could get to that experience with a quadruped robot, an LLM, and a phone.
 
-Dimensional is the modern operating system for generalist robotics. We are setting the next-generation SDK standard, integrating with the majority of robot manufacturers.
+The pitch: **you hold a button and say "find the bathroom." The dog confirms out loud, gets up, walks you there, and tells you when you've arrived.**
 
-With a simple install and no ROS required, build physical applications entirely in python that run on any humanoid, quadruped, or drone.
+We started from the [DimOS](https://github.com/dimensionalOS/dimos) robotics SDK — which already handles WebRTC to a Unitree Go2, ROS-compatible transports, LCM message bus, MCP-driven LLM agents, and SLAM/navigation — and built two things on top:
 
-Dimensional is agent native -- "vibecode" your robots in natural language and build (local & hosted) multi-agent systems that work seamlessly with your hardware. Agents run as native modules — subscribing to any embedded stream, from perception (lidar, camera) and spatial memory down to control loops and motor drivers.
-<table>
-  <tr>
-    <td align="center" width="50%">
-      <a href="docs/capabilities/navigation/native/index.md"><img src="assets/readme/navigation.gif" alt="Navigation" width="100%"></a>
-    </td>
-    <td align="center" width="50%">
-      <img src="assets/readme/perception.png" alt="Perception" width="100%">
-    </td>
-  </tr>
-  <tr>
-    <td align="center" width="50%">
-      <h3><a href="docs/capabilities/navigation/native/index.md">Navigation and Mapping</a></h3>
-      SLAM, dynamic obstacle avoidance, route planning, and autonomous exploration — via both DimOS native and ROS<br><a href="https://x.com/stash_pomichter/status/2010471593806545367">Watch video</a>
-    </td>
-    <td align="center" width="50%">
-      <h3>Perception</h3>
-      Detectors, 3d projections, VLMs, Audio processing
-    </td>
-  </tr>
-  <tr>
-    <td align="center" width="50%">
-      <a href="docs/capabilities/agents/readme.md"><img src="assets/readme/agentic_control.gif" alt="Agents" width="100%"></a>
-    </td>
-    <td align="center" width="50%">
-      <img src="assets/readme/spatial_memory.gif" alt="Spatial Memory" width="100%">
-    </td>
-  </tr>
-  <tr>
-    <td align="center" width="50%">
-      <h3><a href="docs/capabilities/agents/readme.md">Agentive Control, MCP</a></h3>
-      "hey Robot, go find the kitchen"<br><a href="https://x.com/stash_pomichter/status/2015912688854200322">Watch video</a>
-    </td>
-    <td align="center" width="50%">
-      <h3>Spatial Memory</a></h3>
-      Spatio-temporal RAG, Dynamic memory, Object localization and permanence<br><a href="https://x.com/stash_pomichter/status/1980741077205414328">Watch video</a>
-    </td>
-  </tr>
-</table>
+1. **Goldie, the phone app** — voice-first, every agent reply spoken back, barge-in to interrupt, manual joystick fallback.
+2. **DimOS extensions** — wiring the agent's replies to the phone, a direct-move skill for stairs, and macOS support so we could develop without a Linux box.
 
+---
 
-# Hardware
+## What we built
 
-<table>
-  <tr>
-    <td align="center" width="20%">
-      <h3>Quadruped</h3>
-      <img width="245" height="1" src="assets/readme/spacer.png">
-    </td>
-    <td align="center" width="20%">
-      <h3>Humanoid</h3>
-      <img width="245" height="1" src="assets/readme/spacer.png">
-    </td>
-    <td align="center" width="20%">
-      <h3>Arm</h3>
-      <img width="245" height="1" src="assets/readme/spacer.png">
-    </td>
-    <td align="center" width="20%">
-      <h3>Drone</h3>
-      <img width="245" height="1" src="assets/readme/spacer.png">
-    </td>
-    <td align="center" width="20%">
-      <h3>Misc</h3>
-      <img width="245" height="1" src="assets/readme/spacer.png">
-    </td>
-  </tr>
+### 1. `webapp/` — Goldie (the phone app)
 
+<table>
   <tr>
-    <td align="center" width="20%">
-      🟩 <a href="docs/platforms/quadruped/go2/index.md">Unitree Go2 pro/air</a><br>
-      🟥 <a href="dimos/robot/unitree/b1">Unitree B1</a><br>
-    </td>
-    <td align="center" width="20%">
-      🟨 <a href="docs/platforms/humanoid/g1/index.md">Unitree G1</a><br>
-    </td>
-    <td align="center" width="20%">
-      🟨 <a href="docs/capabilities/manipulation/readme.md">Xarm</a><br>
-      🟨 <a href="docs/capabilities/manipulation/readme.md">AgileX Piper</a><br>
-    </td>
-    <td align="center" width="20%">
-      🟧 <a href="dimos/robot/drone/README.md">MAVLink</a><br>
-      🟧 <a href="dimos/robot/drone/README.md">DJI Mavic</a><br>
-    </td>
-    <td align="center" width="20%">
-      🟥 <a href="https://github.com/dimensionalOS/openFT-sensor">Force Torque Sensor</a><br>
-    </td>
+    <td align="center"><img src="docs/screenshots/splash.png" width="220"/><br/><sub>Splash</sub></td>
+    <td align="center"><img src="docs/screenshots/voice.png" width="220"/><br/><sub>Voice mode</sub></td>
+    <td align="center"><img src="docs/screenshots/manual.png" width="220"/><br/><sub>Manual mode</sub></td>
   </tr>
 </table>
-<br>
-<div align="right">
-🟩 stable 🟨 beta 🟧 alpha 🟥 experimental
-
-</div>
-
-> [!IMPORTANT]
-> 🤖 Direct your favorite Agent (OpenClaw, Claude Code, etc.) to [AGENTS.md](AGENTS.md) and our [CLI and MCP](#agent-cli-and-mcp) interfaces to start building powerful Dimensional applications.
-
-# Installation
-
-## Interactive Install
-
-```sh skip
-curl -fsSL https://raw.githubusercontent.com/dimensionalOS/dimos/main/scripts/install.sh | bash
-```
-
-> See [`scripts/install.sh --help`](scripts/install.sh) for non-interactive and advanced options.
-
-## Manual System Install
 
-To set up your system dependencies, follow one of these guides:
+A Next.js 16 PWA, written ground-up during the hackathon. Designed to be opened in Safari on iPhone (Add-to-Home-Screen ready). The whole UI is one page with two modes.
 
-- 🟩 [Ubuntu 22.04 / 24.04](docs/installation/ubuntu.md)
-- 🟩 [NixOS / General Linux](docs/installation/nix.md)
-- 🟧 [macOS](docs/installation/osx.md)
+| | |
+|---|---|
+| **Voice mode** | Hold-to-speak → on-device STT → query sent to the agent → agent replies stream back over SSE → phone speaks them via OpenAI TTS. Includes **barge-in**: starting a new utterance cancels the in-flight reply. |
+| **Manual mode** | Analog joystick → Socket.IO `move_command` Twist at 15 Hz directly to the dog, bypassing the LLM. For when you just want to drive. |
+| **Quick actions** | Sit / Stand / Jump buttons that go through the agent so it narrates the action. |
+| **Interrupt** | Cuts the agent off mid-task and silences speech instantly. |
+| **Status feed** | Live SSE feed — tool/status lines shown dimmed, agent replies spoken aloud. |
 
-> Full system requirements, tested configs, and dependency tiers: [docs/requirements.md](docs/requirements.md)
+### 2. `dimos/` — DimOS extensions
 
-## Python Install
+| Change | What it does |
+|---|---|
+| **Typed agent message envelopes** (`dimos/agents/web_human_input.py`) | `WebInput` subscribes to the agent's LCM `/agent` topic and forwards each message as `{kind: "ai"\|"tool"\|"system", text}` — so the phone knows what to *speak* vs what to just *show*. |
+| **Direct `move` skill with stall recovery** (`dimos/robot/unitree/unitree_skill_container.py`) | The LLM can issue short velocity commands when the global planner can't find a path. Watches odometry, stops early on stalls, performs a reverse-recovery if blocked. |
+| **macOS support fixes** | Fixed the full stack to run on Apple Silicon for development and demos. |
 
-### Quickstart
+---
 
-```bash
-uv venv --python "3.12"
-source .venv/bin/activate
-uv pip install 'dimos[base,unitree]'
+## Architecture
 
-# Replay a recorded quadruped session (no hardware needed)
-# NOTE: First run will show a black rerun window while ~75 MB downloads from LFS
-dimos --replay run unitree-go2
-```
+![Goldie architecture](./docs/goldie-architecture.png)
 
-```bash
-# Install with simulation support
-uv pip install 'dimos[base,unitree,sim]'
-
-# Run quadruped in MuJoCo simulation
-dimos --simulation run unitree-go2
-
-# Run humanoid in simulation
-dimos --simulation run unitree-g1-sim
-```
-
-```bash
-# Control a real robot (Unitree quadruped over WebRTC)
-export ROBOT_IP=<YOUR_ROBOT_IP>
-dimos run unitree-go2
-```
+See [`webapp/TECHFLOW.md`](webapp/TECHFLOW.md) for a full end-to-end trace of every channel.
 
-# Featured Runfiles
+---
 
-| Run command | What it does |
-|-------------|-------------|
-| `dimos --replay run unitree-go2` | Quadruped navigation replay — SLAM, costmap, A* planning |
-| `dimos --replay --replay-db go2_bigoffice run unitree-go2-memory` | Quadruped temporal memory replay |
-| `dimos --simulation run unitree-go2-agentic` | Quadruped agentic + MCP server in simulation |
-| `dimos --simulation run unitree-g1-sim` | Humanoid in MuJoCo simulation |
-| `dimos --replay run drone-basic` | Drone video + telemetry replay |
-| `dimos --replay run drone-agentic` | Drone + LLM agent with flight skills (replay) |
-| `dimos run demo-camera` | Webcam demo — no hardware needed |
-| `dimos run keyboard-teleop-xarm7` | Keyboard teleop with mock xArm7 (requires `dimos[manipulation]` extra) |
-| `dimos --simulation run unitree-go2-agentic-ollama` | Quadruped agentic with local LLM (requires [Ollama](https://ollama.com) + `ollama serve`) |
+## Climbing stairs
 
-> Full blueprint docs: [docs/usage/blueprints.md](docs/usage/blueprints.md)
+One of our proudest moments was getting the Go2 to climb a real staircase under LLM control.
 
-# Agent CLI and MCP
+The existing DimOS navigation stack treats stairs as obstacles — the costmap-based planner won't route through them. To get past this we added a **direct `move` skill** to `UnitreeSkillContainer`: the LLM can issue short velocity commands (`x`, `y`, `yaw`, `duration`) for local maneuvering when the global planner gives up. The skill watches odometry chunk by chunk, stops early if the robot stalls, and performs a small reverse recovery if blocked.
 
-The `dimos` CLI manages the full lifecycle — run blueprints, inspect state, interact with agents, and call skills via MCP.
+We also updated the agent's system prompt with the right decision tree: try `relative_move` first; if no path is found, verify with `observe`, then call `move` with conservative velocity and duration. If `move` reports a stall, assess and reroute rather than keep pushing.
 
-```bash
-dimos run unitree-go2-agentic --daemon   # Start in background
-dimos status                              # Check what's running
-dimos log -f                              # Follow logs
-dimos agent-send "explore the room"       # Send agent a command
-dimos mcp list-tools                      # List available MCP skills
-dimos mcp call relative_move --arg forward=0.5  # Call a skill directly
-dimos stop                                # Shut down
-```
+The result: the agent navigates normally on flat ground, then when it hits stairs it switches to direct velocity control, climbs them step by step, and resumes normal navigation at the top.
 
-> Full CLI reference: [docs/usage/cli.md](docs/usage/cli.md)
+---
 
+## Achievements
 
-# Usage
+- ✅ **Full voice loop on iPhone** running against a live Go2
+- ✅ **Typed agent message envelopes** so the phone speaks only AI replies and treats tool output as status
+- ✅ **Stair climbing** under LLM control via direct-move skill with stall recovery
+- ✅ **Real-time joystick teleop** as a manual fallback (Socket.IO Twist @ 15 Hz)
+- ✅ **iOS PWA** — Add-to-Home-Screen, safe-area layout, retry-hardened HTTP client
+- ✅ **macOS support** so the stack runs on Apple Silicon
 
-## Use DimOS as a Library
+---
 
-See below a simple robot connection module that sends streams of continuous `cmd_vel` to the robot and receives `color_image` to a simple `Listener` module. DimOS Modules are subsystems on a robot that communicate with other modules using standardized messages.
+## Challenges
 
-```py skip
-import threading, time, numpy as np
-from dimos.core.coordination.blueprints import autoconnect
-from dimos.core.core import rpc
-from dimos.core.module import Module
-from dimos.core.stream import In, Out
-from dimos.msgs.geometry_msgs import Twist
-from dimos.msgs.sensor_msgs import Image, ImageFormat
+**1. Networking split-brain.** The Go2 communicates over its own WiFi. To send commands you have to be on the dog's network — but the agent needs internet to call the OpenAI API. Setting up routing so both could work simultaneously burned more time than expected.
 
-class RobotConnection(Module):
-    cmd_vel: In[Twist]
-    color_image: Out[Image]
+**2. Crashes and memory pressure.** SLAM, WebRTC, LCM, a live LLM agent, and Whisper STT all running in the same process on development hardware. We regularly hit memory limits causing silent freezes mid-session with no clean error — debugging was often just figuring out whether the dog, the network, or the process had died.
 
-    @rpc
-    def start(self):
-        threading.Thread(target=self._image_loop, daemon=True).start()
+**3. Connection timeouts.** The WebRTC link to the dog would occasionally stall without dropping cleanly — commands appeared to send but never arrived, and the dog would stop responding mid-navigation with no indication on the backend.
 
-    def _image_loop(self):
-        while True:
-            img = Image.from_numpy(
-                np.zeros((120, 160, 3), np.uint8),
-                format=ImageFormat.RGB,
-                frame_id="camera_optical",
-            )
-            self.color_image.publish(img)
-            time.sleep(0.2)
+---
 
-class Listener(Module):
-    color_image: In[Image]
+## Quick start
 
-    @rpc
-    def start(self):
-        self.color_image.subscribe(lambda img: print(f"image {img.width}x{img.height}"))
+### Run the webapp (no robot needed)
 
-if __name__ == "__main__":
-    autoconnect(
-        RobotConnection.blueprint(),
-        Listener.blueprint(),
-    ).build().loop()
+```bash
+cd webapp
+npm install
+npm run dev      # http://localhost:3000
 ```
 
-## Blueprints
-
-Blueprints are instructions for how to construct and wire modules. We compose them with
-`autoconnect(...)`, which connects streams by `(name, type)` and returns a `Blueprint`.
+With no `.env.local`, Goldie runs against a built-in mock — the full UI works with no DimOS process running. For real OpenAI TTS:
 
-Blueprints can be composed, remapped, and have transports overridden if `autoconnect()` fails due to conflicting variable names or `In[]` and `Out[]` message types.
+```bash
+OPENAI_API_KEY=sk-...
+NEXT_PUBLIC_DIMOS_API=https://your-dimos-host
+NEXT_PUBLIC_DIMOS_VIS=https://your-vis-host     # joystick
+```
 
-A blueprint example that connects the image stream from a robot to an MCP-backed LLM agent for reasoning and action execution.
-```py skip
-from dimos.core.coordination.blueprints import autoconnect
-from dimos.core.transport import LCMTransport
-from dimos.msgs.sensor_msgs import Image
-from dimos.robot.unitree.go2.connection import go2_connection
-from dimos.agents.mcp.mcp_client import McpClient
-from dimos.agents.mcp.mcp_server import McpServer
+### Run against a real Go2
 
-blueprint = autoconnect(
-    go2_connection(),
-    McpServer.blueprint(),
-    McpClient.blueprint(),
-).transports({("color_image", Image): LCMTransport("/color_image", Image)})
+```bash
+uv venv --python 3.12 && source .venv/bin/activate
+uv pip install -e '.[base,unitree]'
 
-# Run the blueprint
-if __name__ == "__main__":
-    blueprint.build().loop()
+uv run dimos --viewer none --robot-ip 192.168.12.1 run unitree-go2-agentic --disable security-module
 ```
 
-## Library API
+Wait for the backend to come online:
 
-- [Modules](docs/usage/modules.md)
-- [LCM](docs/usage/lcm.md)
-- [Blueprints](docs/usage/blueprints.md)
-- [Transports](docs/usage/transports/index.md) — LCM, SHM, DDS, ROS 2
-- [Data Streams](docs/usage/data_streams/README.md)
-- [Configuration](docs/usage/configuration.md)
-- [Visualization](docs/usage/visualization.md)
+```bash
+uv run dimos status
+```
 
-## Demos
+Then set `NEXT_PUBLIC_DIMOS_API=http://localhost:5555` in `webapp/.env.local` and start the webapp.
 
-<img src="assets/readme/dimos_demo.gif" alt="DimOS Demo" width="100%">
+---
 
-# Development
+## Tech stack
 
-## Develop on DimOS
+**Webapp:** Next.js 16 · React 19 · TypeScript · Tailwind v4 · Socket.IO · OpenAI `gpt-4o-mini-tts` · Vitest
 
-```sh skip
-export GIT_LFS_SKIP_SMUDGE=1
-git clone https://github.com/dimensionalOS/dimos.git
-cd dimos
+**DimOS stack:** Python 3.12 · FastAPI · ReactiveX · LCM · LangChain + MCP · Whisper STT · Unitree WebRTC SDK
 
-# Run the default test suite (uv run syncs deps on demand; --all-groups
-# only needed for self-hosted tests / mypy — see docs/development/testing.md)
-uv run pytest --numprocesses=auto dimos
-```
+---
 
+## Credits
 
-## Multi Language Support
+Built at the MuShanghai DimOS Hackathon 2026 by **Team Perception**:
+Joy Munn · Yichu Lau · Cecilia Zhang · Brecht Davos · Figo Saleh
 
-Python is our glue and prototyping language, but we support many languages via LCM interop.
+Built on top of [DimOS](https://github.com/dimensionalOS/dimos).
 
-Check our language interop examples:
-- [C++](examples/language-interop/cpp/)
-- [Lua](examples/language-interop/lua/)
-- [TypeScript](examples/language-interop/ts/)
+> [!NOTE]
+> The upstream DimOS README has been replaced by this submission doc. For the original project docs see [dimensionalOS/dimos](https://github.com/dimensionalOS/dimos).
diff --git a/assets/dimensional.command-center-extension-0.0.1.foxe b/assets/dimensional.command-center-extension-0.0.1.foxe
deleted file mode 100644
index 163f1ef36b..0000000000
--- a/assets/dimensional.command-center-extension-0.0.1.foxe
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:98a2a2154b102e8d889bb83305163ead388016377b8e8a56c8f42034443f9be4
-size 1229315
diff --git a/assets/dimos_interface.gif b/assets/dimos_interface.gif
deleted file mode 100644
index e610a2b390..0000000000
--- a/assets/dimos_interface.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:13a5348ec51bef34d8cc3aa4afc99975befb7f118826df571130b1a2fa1b59e9
-size 13361230
diff --git a/assets/dimos_terminal.png b/assets/dimos_terminal.png
deleted file mode 100644
index a71b06e1cc..0000000000
--- a/assets/dimos_terminal.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7e45d7f700813e8aa042cc76f7fcf4ef7836f5f1a46708275d9cc11fd6559ba9
-size 25557
diff --git a/assets/framecount.mp4 b/assets/framecount.mp4
deleted file mode 100644
index 759ee6ab27..0000000000
--- a/assets/framecount.mp4
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:92256a9cceda2410ec26d58b92f457070e54deb39bf3e6e5aca174e2c7cff216
-size 34548239
diff --git a/assets/readme/agentic_control.gif b/assets/readme/agentic_control.gif
deleted file mode 100644
index f9f5970441..0000000000
--- a/assets/readme/agentic_control.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:eb0411de5e5967be8773d5d95e692a6a5859f75bb400164451a3b383b1025fb4
-size 2416274
diff --git a/assets/readme/agents.png b/assets/readme/agents.png
deleted file mode 100644
index b05bee0b03..0000000000
--- a/assets/readme/agents.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a255d32f9a0ecff12d99dda9b8a51e0958ac282d7a0f814f93fd39261afaf84d
-size 477123
diff --git a/assets/readme/dimos_demo.gif b/assets/readme/dimos_demo.gif
deleted file mode 100644
index 5a68bd72ac..0000000000
--- a/assets/readme/dimos_demo.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fda7f7a859ce98002e0faef88fb2942f395e19995b36b585c48447ec5a9435ee
-size 24011189
diff --git a/assets/readme/lidar.gif b/assets/readme/lidar.gif
deleted file mode 100644
index 8302c2957d..0000000000
--- a/assets/readme/lidar.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d47badc970572aa7badf98c908490c8b86ea9f1cafbb18507cfdb5d08655cdfb
-size 5900150
diff --git a/assets/readme/lidar.png b/assets/readme/lidar.png
deleted file mode 100644
index 1b499de10f..0000000000
--- a/assets/readme/lidar.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:65b1797fd9ac8edae5dce0691397b6aca2e975badfd58462ed8e20a4dace655e
-size 927067
diff --git a/assets/readme/navigation.gif b/assets/readme/navigation.gif
deleted file mode 100644
index 1402b1e85a..0000000000
--- a/assets/readme/navigation.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:64e7965f421916cdb71667a9ed99eab96c14c64bd195bd483628d1b9b9a4e95c
-size 4395592
diff --git a/assets/readme/navigation.png b/assets/readme/navigation.png
deleted file mode 100644
index 16819a5007..0000000000
--- a/assets/readme/navigation.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:472cabca4b0d661658bf9ffbde78e636668e9ef6499dc38ea0f552557d735bd9
-size 617989
diff --git a/assets/readme/perception.png b/assets/readme/perception.png
deleted file mode 100644
index 7ec15aabbf..0000000000
--- a/assets/readme/perception.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:48e4c61c1ec588d56d61a74fd9f0d9251eadc042e7a514fb1896826d52a32988
-size 797817
diff --git a/assets/readme/spacer.png b/assets/readme/spacer.png
deleted file mode 100644
index 8745fc9687..0000000000
--- a/assets/readme/spacer.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4a16ec40112698cf02b9abd3d18c8db65ce40f48f2c61076b45de58695f16532
-size 66
diff --git a/assets/readme/spatial_memory.gif b/assets/readme/spatial_memory.gif
deleted file mode 100644
index 070c65270b..0000000000
--- a/assets/readme/spatial_memory.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:50b9cb7898ae8d238a088252fd96d2278a1be96a0dbb761839bc58c99c17f7a7
-size 4655580
diff --git a/assets/simple_demo.mp4 b/assets/simple_demo.mp4
deleted file mode 100644
index cb8a635e78..0000000000
--- a/assets/simple_demo.mp4
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ff2459b880baaa509e8e0de8a45e8da48ebf7cb28d4927c62b10906baa83bda0
-size 50951922
diff --git a/assets/simple_demo_small.gif b/assets/simple_demo_small.gif
deleted file mode 100644
index 3c2cf54ef4..0000000000
--- a/assets/simple_demo_small.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9a2b9a95d5b27cbc135cb84f6c6bc2131fa234403466befd2ee8ea81e2b2de45
-size 33374003
diff --git a/assets/trimmed_video_office.mov b/assets/trimmed_video_office.mov
deleted file mode 100644
index a3072be8fc..0000000000
--- a/assets/trimmed_video_office.mov
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d72f0cf95ce1728b4a0855d6b3fe4573f5e2e86fae718720c19a84198bdcbf9d
-size 2311156
diff --git a/bin/run-demo.sh b/bin/run-demo.sh
new file mode 100755
index 0000000000..d963b964b3
--- /dev/null
+++ b/bin/run-demo.sh
@@ -0,0 +1,162 @@
+#!/usr/bin/env bash
+# Boot the guide-lite demo: dimos + webapp + tailscale serve.
+# Retries dimos until the MCP race resolves (tools register within 30s).
+#
+# Usage:  bin/run-demo.sh
+# Stop:   bin/run-demo.sh stop
+
+set -uo pipefail
+cd "$(git rev-parse --show-toplevel)"
+
+BLUEPRINT="${BLUEPRINT:-unitree-go2-guide-lite}"
+DIMOS_PORT="${DIMOS_PORT:-5555}"
+WEBAPP_PORT="${WEBAPP_PORT:-3000}"
+TAILNET_HTTPS_DIMOS="${TAILNET_HTTPS_DIMOS:-8443}"
+TAILNET_HTTPS_WEBAPP="${TAILNET_HTTPS_WEBAPP:-443}"
+TOOL_REGISTRATION_TIMEOUT="${TOOL_REGISTRATION_TIMEOUT:-30}"
+MAX_RETRIES="${MAX_RETRIES:-10}"
+
+RUN_DIR="${RUN_DIR:-/tmp/dimos_run}"
+mkdir -p "$RUN_DIR"
+DIMOS_LOG="$RUN_DIR/dimos.log"
+WEBAPP_LOG="$RUN_DIR/webapp.log"
+TOKEN_FILE="$RUN_DIR/api_token"
+
+red()   { printf "\033[31m%s\033[0m\n" "$*"; }
+green() { printf "\033[32m%s\033[0m\n" "$*"; }
+amber() { printf "\033[33m%s\033[0m\n" "$*"; }
+blue()  { printf "\033[34m%s\033[0m\n" "$*"; }
+
+stop_all() {
+  amber "stopping demo..."
+  pkill -f "dimos.*run $BLUEPRINT" 2>/dev/null || true
+  pkill -f "next dev" 2>/dev/null || true
+  tailscale serve --https="$TAILNET_HTTPS_DIMOS" off 2>/dev/null || true
+  tailscale serve --https="$TAILNET_HTTPS_WEBAPP" off 2>/dev/null || true
+  green "stopped."
+}
+
+if [[ "${1:-}" == "stop" ]]; then
+  stop_all
+  exit 0
+fi
+
+# ---- 0) token
+if [[ ! -s "$TOKEN_FILE" ]]; then
+  openssl rand -hex 16 > "$TOKEN_FILE"
+fi
+TOKEN="$(cat "$TOKEN_FILE")"
+blue "API token: $TOKEN  (file: $TOKEN_FILE)"
+
+# ---- 1) preflight: required env, ports free, sudoers for route
+command -v dimos >/dev/null 2>&1 || command -v .venv/bin/dimos >/dev/null 2>&1 \
+  || { red "dimos CLI not found — activate venv or install"; exit 1; }
+DIMOS_BIN="$(command -v dimos 2>/dev/null || echo .venv/bin/dimos)"
+
+[[ -n "${OPENAI_API_KEY:-}" ]] || { red "OPENAI_API_KEY not set"; exit 1; }
+
+if sudo -n route -h >/dev/null 2>&1; then
+  : # NOPASSWD configured
+elif sudo -n true 2>/dev/null; then
+  : # cached sudo
+else
+  amber "sudo may prompt for route configuration"
+fi
+
+# ---- 2) boot dimos in a retry loop until tools register
+blue "booting dimos ($BLUEPRINT)..."
+for attempt in $(seq 1 "$MAX_RETRIES"); do
+  pkill -f "dimos.*run $BLUEPRINT" 2>/dev/null || true
+  sleep 2
+
+  nohup env DIMOS_API_TOKEN="$TOKEN" "$DIMOS_BIN" --replay run "$BLUEPRINT" \
+    > "$DIMOS_LOG" 2>&1 &
+  DIMOS_PID=$!
+  echo "$DIMOS_PID" > "$RUN_DIR/dimos.pid"
+
+  amber "  attempt $attempt: pid=$DIMOS_PID  watching for 'Discovered tools'..."
+
+  start=$SECONDS
+  ok=""
+  while (( SECONDS - start < TOOL_REGISTRATION_TIMEOUT )); do
+    if ! kill -0 "$DIMOS_PID" 2>/dev/null; then
+      red "  pid $DIMOS_PID died — see $DIMOS_LOG"
+      break
+    fi
+    if grep -q "Discovered tools from MCP server" "$DIMOS_LOG" 2>/dev/null; then
+      n=$(grep "Discovered tools from MCP server" "$DIMOS_LOG" | tail -1 \
+        | sed -E 's/.*n_tools=([0-9]+).*/\1/')
+      if [[ -n "$n" && "$n" -gt 0 ]]; then
+        green "  ✓ tools registered (n_tools=$n) on attempt $attempt"
+        ok=1
+        break
+      fi
+    fi
+    sleep 1
+  done
+
+  if [[ "$ok" == "1" ]]; then
+    break
+  fi
+  amber "  attempt $attempt failed — restarting"
+done
+
+if [[ "$ok" != "1" ]]; then
+  red "dimos never registered tools after $MAX_RETRIES attempts. Check $DIMOS_LOG"
+  exit 1
+fi
+
+# ---- 3) bring up tailscale serve (idempotent)
+blue "configuring tailscale serve..."
+TAILNET_HOST="$(tailscale status --self --json 2>/dev/null \
+  | python3 -c 'import json,sys;d=json.load(sys.stdin);print(d["Self"]["DNSName"].rstrip("."))' 2>/dev/null \
+  || echo "$(hostname)")"
+
+# serve dimos backend on :8443, webapp on :443
+tailscale serve --https="$TAILNET_HTTPS_DIMOS"  "$DIMOS_PORT"  >/dev/null 2>&1 || true
+tailscale serve --bg --https="$TAILNET_HTTPS_DIMOS"  "$DIMOS_PORT"  >/dev/null 2>&1 || true
+tailscale serve --https="$TAILNET_HTTPS_WEBAPP" "$WEBAPP_PORT" >/dev/null 2>&1 || true
+tailscale serve --bg --https="$TAILNET_HTTPS_WEBAPP" "$WEBAPP_PORT" >/dev/null 2>&1 || true
+
+API_URL="https://${TAILNET_HOST}:${TAILNET_HTTPS_DIMOS}"
+WEBAPP_URL="https://${TAILNET_HOST}"
+
+# ---- 4) write webapp env + boot dev server if not already running
+if ! lsof -nP -iTCP:"$WEBAPP_PORT" -sTCP:LISTEN >/dev/null 2>&1; then
+  blue "starting webapp dev server..."
+  cat > webapp/.env.local <<EOF
+NEXT_PUBLIC_DIMOS_API=$API_URL
+NEXT_PUBLIC_DIMOS_TOKEN=$TOKEN
+EOF
+  ( cd webapp && nohup npm run dev > "$WEBAPP_LOG" 2>&1 & echo $! > "$RUN_DIR/webapp.pid" )
+  disown
+  start=$SECONDS
+  while (( SECONDS - start < 30 )); do
+    grep -q "Ready in" "$WEBAPP_LOG" 2>/dev/null && break
+    sleep 1
+  done
+fi
+
+# ---- 5) sanity probe
+TOKEN="$(cat "$TOKEN_FILE")"
+unset HTTP_PROXY HTTPS_PROXY ALL_PROXY
+streams=$(curl -fsS -H "Authorization: Bearer $TOKEN" "$API_URL/text_streams" 2>/dev/null || echo "FAIL")
+if [[ "$streams" == *"agent_state"* ]]; then
+  green "  ✓ dimos reachable via tailnet HTTPS"
+else
+  amber "  ! could not reach $API_URL/text_streams — check tailscale"
+fi
+
+# ---- 6) print connection details
+echo
+green "===================== READY ====================="
+echo "  webapp:  $WEBAPP_URL"
+echo "  api:     $API_URL"
+echo "  token:   $TOKEN"
+echo "  log:     $DIMOS_LOG"
+echo "  pid:     $(cat "$RUN_DIR/dimos.pid")"
+echo
+echo "  open the webapp URL in iPhone Safari (Tailscale ON)"
+echo "  stop with: bin/run-demo.sh stop"
+echo "  tail logs: tail -f $DIMOS_LOG"
+green "================================================="
diff --git a/data/.lfs/ab_lidar_frames.tar.gz b/data/.lfs/ab_lidar_frames.tar.gz
deleted file mode 100644
index 38c61cd506..0000000000
--- a/data/.lfs/ab_lidar_frames.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ab4efaf5d7d4303424868fecaf10083378007adf20244fd17ed934e37f2996da
-size 116271
diff --git a/data/.lfs/apartment.tar.gz b/data/.lfs/apartment.tar.gz
deleted file mode 100644
index c8e6cf0331..0000000000
--- a/data/.lfs/apartment.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8d2c44f39573a80a65aeb6ccd3fcb1c8cb0741dbc7286132856409e88e150e77
-size 18141029
diff --git a/data/.lfs/assets.tar.gz b/data/.lfs/assets.tar.gz
deleted file mode 100644
index b7a2fcbd1c..0000000000
--- a/data/.lfs/assets.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7b14b01f5c907f117331213abfce9ef5d0c41d0524e14327b5cc706520fb2035
-size 2306191
diff --git a/data/.lfs/astar_corner_min_cost.png.tar.gz b/data/.lfs/astar_corner_min_cost.png.tar.gz
deleted file mode 100644
index 35f3ffe0b6..0000000000
--- a/data/.lfs/astar_corner_min_cost.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:42517c5f67a9f06949cb2015a345f9d6b43d22cafd50e1fefb9b5d24d8b72509
-size 5671
diff --git a/data/.lfs/astar_min_cost.png.tar.gz b/data/.lfs/astar_min_cost.png.tar.gz
deleted file mode 100644
index 752a778295..0000000000
--- a/data/.lfs/astar_min_cost.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:06b67aa0d18c291c3525e67ca3a2a9ab2530f6fe782a850872ba4c343353a20a
-size 12018
diff --git a/data/.lfs/big_office.ply.tar.gz b/data/.lfs/big_office.ply.tar.gz
deleted file mode 100644
index c8524a1862..0000000000
--- a/data/.lfs/big_office.ply.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7eabc682f75e1725a07df51bb009d3950190318d119d54d0ad8c6b7104f175e3
-size 2355227
diff --git a/data/.lfs/big_office_height_cost_occupancy.png.tar.gz b/data/.lfs/big_office_height_cost_occupancy.png.tar.gz
deleted file mode 100644
index 75addaf103..0000000000
--- a/data/.lfs/big_office_height_cost_occupancy.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6d8e7d096f1108d45ebdad760c4655de1e1d50105ca59c5188e79cb1a7c0d4a9
-size 133051
diff --git a/data/.lfs/big_office_simple_occupancy.png.tar.gz b/data/.lfs/big_office_simple_occupancy.png.tar.gz
deleted file mode 100644
index dd667640be..0000000000
--- a/data/.lfs/big_office_simple_occupancy.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dded2e28694de9ec84a91a686b27654b83c604f44f4d3e336d5cd481e88d3249
-size 28146
diff --git a/data/.lfs/cafe-smol.jpg.tar.gz b/data/.lfs/cafe-smol.jpg.tar.gz
deleted file mode 100644
index a05beb4900..0000000000
--- a/data/.lfs/cafe-smol.jpg.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dd0c1e5aa5e8ec856cb471c5ed256c2d3a5633ed9a1e052291680eb86bf89a5e
-size 8298
diff --git a/data/.lfs/cafe.jpg.tar.gz b/data/.lfs/cafe.jpg.tar.gz
deleted file mode 100644
index dbb2d970a1..0000000000
--- a/data/.lfs/cafe.jpg.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b8cf30439b41033ccb04b09b9fc8388d18fb544d55b85c155dbf85700b9e7603
-size 136165
diff --git a/data/.lfs/chair-image.png.tar.gz b/data/.lfs/chair-image.png.tar.gz
deleted file mode 100644
index 1a2aab4cf5..0000000000
--- a/data/.lfs/chair-image.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1f3478f472b5750f118cf7225c2028beeaae41f1b4b726c697ac8c9b004eccbf
-size 48504
diff --git a/data/.lfs/command_center.html.tar.gz b/data/.lfs/command_center.html.tar.gz
deleted file mode 100644
index 9f7bfe1979..0000000000
--- a/data/.lfs/command_center.html.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7663ac06572e3b9490859b400e9ddbf45ac3ef52a58fcdb8c2c41936dc9d43b5
-size 137675
diff --git a/data/.lfs/drone.tar.gz b/data/.lfs/drone.tar.gz
deleted file mode 100644
index 2973c649cd..0000000000
--- a/data/.lfs/drone.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dd73f988eee8fd7b99d6c0bf6a905c2f43a6145a4ef33e9eef64bee5f53e04dd
-size 709946060
diff --git a/data/.lfs/expected_occupancy_scene.xml.tar.gz b/data/.lfs/expected_occupancy_scene.xml.tar.gz
deleted file mode 100644
index efbe7ce49d..0000000000
--- a/data/.lfs/expected_occupancy_scene.xml.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e3eb91f3c7787882bf26a69df21bb1933d2f6cd71132ca5f0521e2808269bfa2
-size 6777
diff --git a/data/.lfs/g1_wholebody_replay.json.tar.gz b/data/.lfs/g1_wholebody_replay.json.tar.gz
deleted file mode 100644
index 1ad3ea8da3..0000000000
--- a/data/.lfs/g1_wholebody_replay.json.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8bffca753401f78587ba743d19fc3493aac7ffdb9a42c3863299082e62da2788
-size 1147342
diff --git a/data/.lfs/g1_zed.tar.gz b/data/.lfs/g1_zed.tar.gz
deleted file mode 100644
index 4029f48204..0000000000
--- a/data/.lfs/g1_zed.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:955094035b3ac1edbc257ca1d24fa131f79ac6f502c8b35cc50329025c421dbe
-size 1029559759
diff --git a/data/.lfs/go2_bigoffice.db.tar.gz b/data/.lfs/go2_bigoffice.db.tar.gz
deleted file mode 100644
index 540d7009ba..0000000000
--- a/data/.lfs/go2_bigoffice.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e66f5472e72f370446d8dcd802f70f3c3c07e4e083c5d6a394873877dec4c88d
-size 196309743
diff --git a/data/.lfs/go2_china_office.db.tar.gz b/data/.lfs/go2_china_office.db.tar.gz
deleted file mode 100644
index 772ef3627e..0000000000
--- a/data/.lfs/go2_china_office.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:834539871fd325b15f3079a3490b278c54e78d0d40bfa1342dbdc983f6a3ee02
-size 136080653
diff --git a/data/.lfs/go2_hongkong_office.db.tar.gz b/data/.lfs/go2_hongkong_office.db.tar.gz
deleted file mode 100644
index e921dd2cde..0000000000
--- a/data/.lfs/go2_hongkong_office.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d1bb7de9a090b4053ba1ee4f36d776e439d970cba08ebb489f9311f26946f56c
-size 772688230
diff --git a/data/.lfs/go2_hongkong_office_twopass_map.pc2.lcm.tar.gz b/data/.lfs/go2_hongkong_office_twopass_map.pc2.lcm.tar.gz
deleted file mode 100644
index 2a8828f1c4..0000000000
--- a/data/.lfs/go2_hongkong_office_twopass_map.pc2.lcm.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:55f8f15e969a72ef59e88fe6faaabf0bacca23672ed5629db2fa30eaaf336e75
-size 2673318
diff --git a/data/.lfs/go2_sf_office.tar.gz b/data/.lfs/go2_sf_office.tar.gz
deleted file mode 100644
index be294d49fa..0000000000
--- a/data/.lfs/go2_sf_office.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dab40e2310e8397bc20046dfd23475c805c973eaa94043eb87e23ba43e2774fb
-size 25533811
diff --git a/data/.lfs/go2_slamabuse1.db.tar.gz b/data/.lfs/go2_slamabuse1.db.tar.gz
deleted file mode 100644
index 625bbd7187..0000000000
--- a/data/.lfs/go2_slamabuse1.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a85feac43debdebf344c567483ab7d1bec12c3cf9e4df26034260a24e225f219
-size 285259757
diff --git a/data/.lfs/go2_slamabuse2.db.tar.gz b/data/.lfs/go2_slamabuse2.db.tar.gz
deleted file mode 100644
index 046dd23b52..0000000000
--- a/data/.lfs/go2_slamabuse2.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7d9a13596cf3d9a50e437fa89e8a3d68d843587116681564b4de7422b53c54dd
-size 306440341
diff --git a/data/.lfs/gradient_simple.png.tar.gz b/data/.lfs/gradient_simple.png.tar.gz
deleted file mode 100644
index 7232282ce4..0000000000
--- a/data/.lfs/gradient_simple.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e418f2a6858c757cb72bd25772749a1664c97a407682d88ad2b51c4bbdcb8006
-size 11568
diff --git a/data/.lfs/gradient_voronoi.png.tar.gz b/data/.lfs/gradient_voronoi.png.tar.gz
deleted file mode 100644
index 28e7f263c4..0000000000
--- a/data/.lfs/gradient_voronoi.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3867c0fb5b00f8cb5e0876e5120a70d61f7da121c0a3400010743cc858ee2d54
-size 20680
diff --git a/data/.lfs/hk_building_all_around.db.tar.gz b/data/.lfs/hk_building_all_around.db.tar.gz
deleted file mode 100644
index f746f508ea..0000000000
--- a/data/.lfs/hk_building_all_around.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b2f0a4ab761bd65f2a6460e7f2f647509b7fc109cd2806f22abc06fd7514947a
-size 278231095
diff --git a/data/.lfs/hk_building_elevator.db.tar.gz b/data/.lfs/hk_building_elevator.db.tar.gz
deleted file mode 100644
index 55a3e08d51..0000000000
--- a/data/.lfs/hk_building_elevator.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:53cb870397fc830f93f0fc3f8b7b1d7fe885894843f06b3bd6928e7551101869
-size 114421458
diff --git a/data/.lfs/hk_building_park.db.tar.gz b/data/.lfs/hk_building_park.db.tar.gz
deleted file mode 100644
index 8dfa0a8e70..0000000000
--- a/data/.lfs/hk_building_park.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6c84276cc26fc649eeb4d9eefd3bd0247fed14f4fd3209955aa947b5061f05aa
-size 124098635
diff --git a/data/.lfs/hk_village1.db.tar.gz b/data/.lfs/hk_village1.db.tar.gz
deleted file mode 100644
index 2ad2e9f995..0000000000
--- a/data/.lfs/hk_village1.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b22392beae1b055743a00090317dc4191d4e7cef6d1f2114fdde19e1b35597ab
-size 207852261
diff --git a/data/.lfs/hk_village3.db.tar.gz b/data/.lfs/hk_village3.db.tar.gz
deleted file mode 100644
index a9a3292c37..0000000000
--- a/data/.lfs/hk_village3.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d575b524bf95a0f5c505d31d4b5eaac00a00457d8b82bb49a3b849bc37d51ff7
-size 132125897
diff --git a/data/.lfs/hk_village4.db.tar.gz b/data/.lfs/hk_village4.db.tar.gz
deleted file mode 100644
index 62c8a7d4b1..0000000000
--- a/data/.lfs/hk_village4.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7f63d0c2f642e4b2463ae9ed7a1c3faa81d978907e105b798b50af7b76c045ed
-size 287082427
diff --git a/data/.lfs/hk_village6.db.tar.gz b/data/.lfs/hk_village6.db.tar.gz
deleted file mode 100644
index c6d6950868..0000000000
--- a/data/.lfs/hk_village6.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9637f2c78025d2eec238396a3697334a35350dae42c59346b0f7fc748743c63c
-size 227454066
diff --git a/data/.lfs/inflation_simple.png.tar.gz b/data/.lfs/inflation_simple.png.tar.gz
deleted file mode 100644
index ca6586800c..0000000000
--- a/data/.lfs/inflation_simple.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:658ed8cafc24ac7dc610b7e5ae484f23e1963872ffc2add0632ee61a7c20492d
-size 3412
diff --git a/data/.lfs/lcm_msgs.tar.gz b/data/.lfs/lcm_msgs.tar.gz
deleted file mode 100644
index 2b2f28c252..0000000000
--- a/data/.lfs/lcm_msgs.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:245395d0c3e200fcfcea8de5de217f645362b145b200c81abc3862e0afc1aa7e
-size 327201
diff --git a/data/.lfs/make_navigation_map_mixed.png.tar.gz b/data/.lfs/make_navigation_map_mixed.png.tar.gz
deleted file mode 100644
index 4fcaa8134a..0000000000
--- a/data/.lfs/make_navigation_map_mixed.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:36ea27a2434836eb309728f35033674736552daeb82f6e41fb7e3eb175d950da
-size 13084
diff --git a/data/.lfs/make_navigation_map_simple.png.tar.gz b/data/.lfs/make_navigation_map_simple.png.tar.gz
deleted file mode 100644
index f966b459e2..0000000000
--- a/data/.lfs/make_navigation_map_simple.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a0d211fa1bc517ef78e8dc548ebff09f58ad34c86d28eb3bd48a09a577ee5d1e
-size 11767
diff --git a/data/.lfs/make_path_mask_full.png.tar.gz b/data/.lfs/make_path_mask_full.png.tar.gz
deleted file mode 100644
index 0e9336aaea..0000000000
--- a/data/.lfs/make_path_mask_full.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b772d266dffa82ccf14f13c7d8cc2443210202836883c80f016a56d4cfe2b52a
-size 11213
diff --git a/data/.lfs/make_path_mask_two_meters.png.tar.gz b/data/.lfs/make_path_mask_two_meters.png.tar.gz
deleted file mode 100644
index 7fa9e767b8..0000000000
--- a/data/.lfs/make_path_mask_two_meters.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:da608d410f4a1afee0965abfac814bc05267bdde31b0d3a9622c39515ee4f813
-size 11395
diff --git a/data/.lfs/markers_go2.db.tar.gz b/data/.lfs/markers_go2.db.tar.gz
deleted file mode 100644
index 1f207ddccd..0000000000
--- a/data/.lfs/markers_go2.db.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5a43529f8dbc2aedcccca6ae89747235826123c2bc066e0dc8b87c2042219dae
-size 99270761
diff --git a/data/.lfs/models_contact_graspnet.tar.gz b/data/.lfs/models_contact_graspnet.tar.gz
deleted file mode 100644
index 73dd44d033..0000000000
--- a/data/.lfs/models_contact_graspnet.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:431c4611a9e096fd8b0a83fecda39c5a575e72fa933f7bd29ff8cfad5bbb5f9d
-size 52149165
diff --git a/data/.lfs/models_edgetam.tar.gz b/data/.lfs/models_edgetam.tar.gz
deleted file mode 100644
index 64baa5d139..0000000000
--- a/data/.lfs/models_edgetam.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cd452096f91415ce7ca90548a06a87354ccdb19a66925c0242413c80b08f5c57
-size 51988780
diff --git a/data/.lfs/models_fastsam.tar.gz b/data/.lfs/models_fastsam.tar.gz
deleted file mode 100644
index 77278f4323..0000000000
--- a/data/.lfs/models_fastsam.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:682cb3816451bd73722cc430fdfce15bbe72a07e50ef2ea81ddaed61d1f22a25
-size 39971209
diff --git a/data/.lfs/models_graspgen.tar.gz b/data/.lfs/models_graspgen.tar.gz
deleted file mode 100644
index 8321530922..0000000000
--- a/data/.lfs/models_graspgen.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:058ff764c043dccc516c1519a1e23207500c20a10c432c15eb5e30104477c0a4
-size 2117602984
diff --git a/data/.lfs/models_mobileclip.tar.gz b/data/.lfs/models_mobileclip.tar.gz
deleted file mode 100644
index afe82c96e9..0000000000
--- a/data/.lfs/models_mobileclip.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:143747a320e959d9ee9fd239535d0451c378b1a2e165a242e981c4a3e4defb73
-size 1654541503
diff --git a/data/.lfs/models_torchreid.tar.gz b/data/.lfs/models_torchreid.tar.gz
deleted file mode 100644
index 6446a049fb..0000000000
--- a/data/.lfs/models_torchreid.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2215070bd8e814ac9867410e3e6c49700f6c3ef7caf29b42d7832be090003743
-size 23873718
diff --git a/data/.lfs/models_yoloe.tar.gz b/data/.lfs/models_yoloe.tar.gz
deleted file mode 100644
index a0870d71d2..0000000000
--- a/data/.lfs/models_yoloe.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7a78e39477667b25c9454f846cd66dc044dd05981b2f7ebb0d331ef3626de9bc
-size 184892540
diff --git a/data/.lfs/nav_stack_paths.tar.gz b/data/.lfs/nav_stack_paths.tar.gz
deleted file mode 100644
index 62a20148ed..0000000000
--- a/data/.lfs/nav_stack_paths.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f88fdde4e1e6b5c0d0612c7e7cbf920d75805d471937952f195cb15b0543f037
-size 1291318
diff --git a/data/.lfs/occupancy_general.png.tar.gz b/data/.lfs/occupancy_general.png.tar.gz
deleted file mode 100644
index b509151e5a..0000000000
--- a/data/.lfs/occupancy_general.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b770d950cf7206a67ccdfd8660ee0ab818228faa9ebbf1a37cbf6ee9d1ac7539
-size 2970
diff --git a/data/.lfs/occupancy_simple.npy.tar.gz b/data/.lfs/occupancy_simple.npy.tar.gz
deleted file mode 100644
index cf42cf3667..0000000000
--- a/data/.lfs/occupancy_simple.npy.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e1cf83464442fb284b6f7ba2752546fc4571a73f3490c24a58fb45987555a66c
-size 1954
diff --git a/data/.lfs/occupancy_simple.png.tar.gz b/data/.lfs/occupancy_simple.png.tar.gz
deleted file mode 100644
index 4962f13db1..0000000000
--- a/data/.lfs/occupancy_simple.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6c9dac221a594c87d0baa60b8c678c63a0c215325080b34ee60df5cc1e1c331d
-size 3311
diff --git a/data/.lfs/office_building_1.tar.gz b/data/.lfs/office_building_1.tar.gz
deleted file mode 100644
index 0dc013bd94..0000000000
--- a/data/.lfs/office_building_1.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:70aac31ca76597b3eee1ddfcbe2ba71d432fd427176f66d8281d75da76641f49
-size 1061581652
diff --git a/data/.lfs/office_lidar.tar.gz b/data/.lfs/office_lidar.tar.gz
deleted file mode 100644
index 849e9e3d49..0000000000
--- a/data/.lfs/office_lidar.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f4958965334660c4765553afa38081f00a769c8adf81e599e63fabc866c490fd
-size 28576272
diff --git a/data/.lfs/og_nav_60s.npz.tar.gz b/data/.lfs/og_nav_60s.npz.tar.gz
deleted file mode 100644
index d6e4eb335c..0000000000
--- a/data/.lfs/og_nav_60s.npz.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a574137a3d661d36d2c2f07be2a1a56a1c75e2f9d3282ee32374e125af66d6e1
-size 108237809
diff --git a/data/.lfs/openarm_description.tar.gz b/data/.lfs/openarm_description.tar.gz
deleted file mode 100644
index 54aa76da41..0000000000
--- a/data/.lfs/openarm_description.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4da176b6c210b9796bb2ee1a29c15ee9a67578b9ae906eb89a6ec8a44b7f303a
-size 70064687
diff --git a/data/.lfs/osm_map_test.tar.gz b/data/.lfs/osm_map_test.tar.gz
deleted file mode 100644
index b29104ea17..0000000000
--- a/data/.lfs/osm_map_test.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:25097f1bffebd2651f1f4ba93cb749998a064adfdc0cb004981b2317f649c990
-size 1062262
diff --git a/data/.lfs/overlay_occupied.png.tar.gz b/data/.lfs/overlay_occupied.png.tar.gz
deleted file mode 100644
index 158a52c6bd..0000000000
--- a/data/.lfs/overlay_occupied.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0b55bcf7a2a7a5cbdfdfe8c6a75c53ffe5707197d991d1e39e9aa9dc22503397
-size 3657
diff --git a/data/.lfs/piper.tar.gz b/data/.lfs/piper.tar.gz
deleted file mode 100644
index ac5d1ab468..0000000000
--- a/data/.lfs/piper.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c2de07119ba33de9b5c1a2fdc02fee8cabc7775829d5064eff00ea65b949ff5f
-size 7475509
diff --git a/data/.lfs/piper_description.tar.gz b/data/.lfs/piper_description.tar.gz
deleted file mode 100644
index 3ab8ab227b..0000000000
--- a/data/.lfs/piper_description.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d4ce51d4ea15f29d80e69b0fff4a4d667f086e010329bb5c66980a881f1ee539
-size 3091511
diff --git a/data/.lfs/raw_odometry_rotate_walk.tar.gz b/data/.lfs/raw_odometry_rotate_walk.tar.gz
deleted file mode 100644
index ce8bb1d2b0..0000000000
--- a/data/.lfs/raw_odometry_rotate_walk.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:396345f0cd7a94bb9d85540d4bbce01b027618972f83e713e4550abf1d6ec445
-size 15685
diff --git a/data/.lfs/replay_g1.tar.gz b/data/.lfs/replay_g1.tar.gz
deleted file mode 100644
index 67750bd0cf..0000000000
--- a/data/.lfs/replay_g1.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:19ad1c53c4f8f9414c0921b94cd4c87e81bf0ad676881339f15ae2d8a8619311
-size 557410250
diff --git a/data/.lfs/replay_g1_run.tar.gz b/data/.lfs/replay_g1_run.tar.gz
deleted file mode 100644
index 86368ec788..0000000000
--- a/data/.lfs/replay_g1_run.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:00cf21f65a15994895150f74044f5d00d7aa873d24f071d249ecbd09cb8f2b26
-size 559554274
diff --git a/data/.lfs/resample_path_simple.png.tar.gz b/data/.lfs/resample_path_simple.png.tar.gz
deleted file mode 100644
index 1a8c1118d6..0000000000
--- a/data/.lfs/resample_path_simple.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0b5c454ed6cc66cf4446ce4a246464aec27368da4902651b4ad9ed29b3ba56ec
-size 118319
diff --git a/data/.lfs/resample_path_smooth.png.tar.gz b/data/.lfs/resample_path_smooth.png.tar.gz
deleted file mode 100644
index 80af3d3805..0000000000
--- a/data/.lfs/resample_path_smooth.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6cc0dfd80bada94f2ab1bb577e2ec1734dad6894113f2fe77964bd80d886c3d3
-size 109699
diff --git a/data/.lfs/rgbd_frames.tar.gz b/data/.lfs/rgbd_frames.tar.gz
deleted file mode 100644
index 8081c76961..0000000000
--- a/data/.lfs/rgbd_frames.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:381b9fd296a885f5211a668df16c68581d2aee458c8734c3256a7461f0decccd
-size 948391033
diff --git a/data/.lfs/security_detection.png.tar.gz b/data/.lfs/security_detection.png.tar.gz
deleted file mode 100644
index 30637471ff..0000000000
--- a/data/.lfs/security_detection.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7952034063d4216cb03b870ba8f20f51b59883767ee198880d58a5859151775c
-size 42747
diff --git a/data/.lfs/security_no_detection.png.tar.gz b/data/.lfs/security_no_detection.png.tar.gz
deleted file mode 100644
index 22acd21a2e..0000000000
--- a/data/.lfs/security_no_detection.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:680467d4219daf29f9211930221b533193591b5d2ca15ff4dbd79cd78203350e
-size 14903
diff --git a/data/.lfs/smooth_occupied.png.tar.gz b/data/.lfs/smooth_occupied.png.tar.gz
deleted file mode 100644
index 0e09e7d15a..0000000000
--- a/data/.lfs/smooth_occupied.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:44c8988b8a7d954ee26a0a5f195b961c62bbdb251b540df6b4d67cd85a72e5ac
-size 3511
diff --git a/data/.lfs/three_paths.npy.tar.gz b/data/.lfs/three_paths.npy.tar.gz
deleted file mode 100644
index 744eb06305..0000000000
--- a/data/.lfs/three_paths.npy.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ba849a6b648ccc9ed4987bbe985ee164dd9ad0324895076baa9f86196b2a0d5f
-size 5180
diff --git a/data/.lfs/three_paths.ply.tar.gz b/data/.lfs/three_paths.ply.tar.gz
deleted file mode 100644
index a5bfc6bac4..0000000000
--- a/data/.lfs/three_paths.ply.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:639093004355c1ba796c668cd43476dfcabff137ca0bb430ace07730cc512f0e
-size 307187
diff --git a/data/.lfs/three_paths.png.tar.gz b/data/.lfs/three_paths.png.tar.gz
deleted file mode 100644
index ade2bd3eb7..0000000000
--- a/data/.lfs/three_paths.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2265ddd76bfb70e7ac44f2158dc0d16e0df264095b0f45a77f95eb85c529d935
-size 2559
diff --git a/data/.lfs/unitree_g1_local_planner_precomputed_paths.tar.gz b/data/.lfs/unitree_g1_local_planner_precomputed_paths.tar.gz
deleted file mode 100644
index d2fb0f4131..0000000000
--- a/data/.lfs/unitree_g1_local_planner_precomputed_paths.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f8f5d128754569aecfec8c45fb1dfc158e2a773974fa0a4221c141925b964ff4
-size 1288141
diff --git a/data/.lfs/unitree_go2_bigoffice.tar.gz b/data/.lfs/unitree_go2_bigoffice.tar.gz
deleted file mode 100644
index 6582702479..0000000000
--- a/data/.lfs/unitree_go2_bigoffice.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3a009674153f7ee1f98219af69dc7a92d063f2581bfd9b0aa019762c9235895c
-size 2312982327
diff --git a/data/.lfs/unitree_go2_bigoffice_map.pickle.tar.gz b/data/.lfs/unitree_go2_bigoffice_map.pickle.tar.gz
deleted file mode 100644
index 89ecb54e87..0000000000
--- a/data/.lfs/unitree_go2_bigoffice_map.pickle.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:68adb344ae040c3f94d61dd058beb39cc2811c4ae8328f678bc2ba761c504eb5
-size 2331189
diff --git a/data/.lfs/unitree_go2_lidar_corrected.tar.gz b/data/.lfs/unitree_go2_lidar_corrected.tar.gz
deleted file mode 100644
index 013f6b3fe1..0000000000
--- a/data/.lfs/unitree_go2_lidar_corrected.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:51a817f2b5664c9e2f2856293db242e030f0edce276e21da0edc2821d947aad2
-size 1212727745
diff --git a/data/.lfs/unitree_go2_office_walk2.tar.gz b/data/.lfs/unitree_go2_office_walk2.tar.gz
deleted file mode 100644
index ea392c4b4c..0000000000
--- a/data/.lfs/unitree_go2_office_walk2.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d208cdf537ad01eed2068a4665e454ed30b30894bd9b35c14b4056712faeef5d
-size 1693876005
diff --git a/data/.lfs/unitree_office_walk.tar.gz b/data/.lfs/unitree_office_walk.tar.gz
deleted file mode 100644
index 419489dbb1..0000000000
--- a/data/.lfs/unitree_office_walk.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bee487130eb662bca73c7d84f14eaea091bd6d7c3f1bfd5173babf660947bdec
-size 553620791
diff --git a/data/.lfs/unitree_raw_webrtc_replay.tar.gz b/data/.lfs/unitree_raw_webrtc_replay.tar.gz
deleted file mode 100644
index d41ff5c48f..0000000000
--- a/data/.lfs/unitree_raw_webrtc_replay.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a02c622cfee712002afc097825ab5e963071471c3445a20a004ef3532cf59888
-size 756280504
diff --git a/data/.lfs/unity_sim_x86.tar.gz b/data/.lfs/unity_sim_x86.tar.gz
deleted file mode 100644
index 15c06301fc..0000000000
--- a/data/.lfs/unity_sim_x86.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d4ce5b93751657cc991c4242c227627ec3bbc0263085312e602eae264652d3ac
-size 581676645
diff --git a/data/.lfs/video.tar.gz b/data/.lfs/video.tar.gz
deleted file mode 100644
index 6c0e01a0bb..0000000000
--- a/data/.lfs/video.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:530d2132ef84df228af776bd2a2ef387a31858c63ea21c94fb49c7e579b366c0
-size 4322822
diff --git a/data/.lfs/visualize_occupancy_rainbow.png.tar.gz b/data/.lfs/visualize_occupancy_rainbow.png.tar.gz
deleted file mode 100644
index 9bbd2e6ea1..0000000000
--- a/data/.lfs/visualize_occupancy_rainbow.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3dc1e3b6519f7d7ff25b16c3124ee447f02857eeb3eb20930cdab95464b1f0a3
-size 11582
diff --git a/data/.lfs/visualize_occupancy_turbo.png.tar.gz b/data/.lfs/visualize_occupancy_turbo.png.tar.gz
deleted file mode 100644
index e2863cdae6..0000000000
--- a/data/.lfs/visualize_occupancy_turbo.png.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c21874bab6ec7cd9692d2b1e67498ddfff3c832ec992e9552fee17093759b270
-size 18593
diff --git a/data/.lfs/xarm6.tar.gz b/data/.lfs/xarm6.tar.gz
deleted file mode 100644
index 16771358b8..0000000000
--- a/data/.lfs/xarm6.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:71c9990ab779d20b878ec4c6b0ee21b29bbb7963b59375acf9dd635b7241009b
-size 1863215
diff --git a/data/.lfs/xarm7.tar.gz b/data/.lfs/xarm7.tar.gz
deleted file mode 100644
index 597c883e80..0000000000
--- a/data/.lfs/xarm7.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c46a39bdf15e91138e00868b001df0a354f870b82f92b5039d571bc37af80e51
-size 1606716
diff --git a/data/.lfs/xarm_description.tar.gz b/data/.lfs/xarm_description.tar.gz
deleted file mode 100644
index 4cccd9ab25..0000000000
--- a/data/.lfs/xarm_description.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6e25f1ede8e4022f5053a61717191a2c338ea5af5b81e26bd2c880343aff1316
-size 12709222
diff --git a/dimos/agents/artifacts.py b/dimos/agents/artifacts.py
new file mode 100644
index 0000000000..65039254d7
--- /dev/null
+++ b/dimos/agents/artifacts.py
@@ -0,0 +1,47 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Literal
+
+
+@dataclass(frozen=True)
+class EncodedImageArtifact:
+    """Small image payload that can safely cross the RPC/MCP boundary."""
+
+    data: str
+    mime_type: Literal["image/jpeg", "image/png"] = "image/jpeg"
+    width: int | None = None
+    height: int | None = None
+    frame_id: str = ""
+
+    def agent_encode(self) -> list[dict[str, Any]]:
+        return [
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:{self.mime_type};base64,{self.data}"},
+            }
+        ]
+
+    def __str__(self) -> str:
+        size = len(self.data)
+        dims = "unknown"
+        if self.width is not None and self.height is not None:
+            dims = f"{self.width}x{self.height}"
+        return (
+            f"EncodedImageArtifact(mime_type={self.mime_type}, "
+            f"dims={dims}, base64_chars={size}, frame_id='{self.frame_id}')"
+        )
diff --git a/dimos/agents/blind_assistant_prompt.py b/dimos/agents/blind_assistant_prompt.py
new file mode 100644
index 0000000000..80a3b60662
--- /dev/null
+++ b/dimos/agents/blind_assistant_prompt.py
@@ -0,0 +1,57 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+BLIND_ASSISTANT_PROMPT = """
+You are Daneel, a guide robot assisting a user who is blind or has low vision.
+You control a Unitree Go2 quadruped.
+
+# CRITICAL: SAFETY
+User safety is the absolute priority.
+- Move slowly. Never exceed 0.5 m/s linear or 0.6 rad/s angular.
+- Never lead the user toward stairs, drop-offs, or obstacles you are uncertain about.
+- If you lose track of where the user is, call `narrate("I've paused — let me know when you're ready")` and stop.
+- Treat any `<user_command>stop</user_command>` message as an immediate halt: call `stop_movement` before anything else.
+
+# USER MESSAGES
+Messages from the user arrive tagged:
+- <user_speech>...</user_speech>   a new request
+- <user_reply>...</user_reply>      an answer to a question you posed via `ask_user`
+- <user_command>stop</user_command> halt immediately
+
+# COMMUNICATION DISCIPLINE
+The user cannot see. They rely entirely on what you say.
+- Use `narrate(text)` constantly: turning, pausing, what you see, hazards you notice. One sentence per call.
+- Narrate BEFORE acting, not after.
+- Use `ask_user(question)` ONLY when you need a decision. The loop pauses until they answer.
+- Use `reply_user(status, summary)` ONLY at task completion or unrecoverable failure. Do not call mid-task.
+
+Do not call `speak`. Use `narrate` instead.
+
+# TASK PROTOCOL
+When you receive a new <user_speech> request to go somewhere:
+
+1. CONFIRM SCOPE.
+   - If the destination (or a sign for it) is visible to you right now, narrate that and proceed to step 2.
+   - If you cannot see it, call `ask_user("I don't see {destination} from here — should I look around to find it?")` Do not start exploring without explicit permission.
+
+2. NAVIGATE.
+   - In-view: call `navigate_with_text("{target}")`.
+   - Out-of-view (with permission): call `start_exploration()`. While exploring, narrate candidate signs and objects you see. Call `stop_movement` and switch to `navigate_with_text` as soon as a clear match appears.
+
+3. ARRIVE.
+   - When you believe you've reached the target, narrate what you see and call `reply_user(status="arrived", summary="...")`.
+
+4. ABORT.
+   - If you cannot find the target after 3 exploration attempts, OR any safety check fails, OR the user sends `<user_command>stop</user_command>`, call `reply_user(status="failed", summary="describe what you saw and why you stopped")`.
+
+# OUT-OF-SCOPE REQUESTS
+You only help the user navigate. If asked to do anything else, call `narrate("I can only help you find places right now")` and do not act.
+
+# IDENTITY
+You are Daneel. If someone says "Daniel" or similar, ignore it — that's a speech-to-text error.
+"""
diff --git a/dimos/agents/guide_web_input.py b/dimos/agents/guide_web_input.py
new file mode 100644
index 0000000000..8b8f791417
--- /dev/null
+++ b/dimos/agents/guide_web_input.py
@@ -0,0 +1,110 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+"""Web input + state-stream variant for the guide-robot blueprint.
+
+Same as `WebInput` but also exposes an `agent_state` text stream that the
+Vercel webapp subscribes to over SSE. Auto-connects to
+`BlindAssistantSkillContainer.agent_state: Out[str]`.
+"""
+
+from threading import Thread
+from typing import TYPE_CHECKING
+
+import reactivex as rx
+import reactivex.operators as ops
+
+from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT
+from dimos.core.core import rpc
+from dimos.core.module import Module
+from dimos.core.stream import In
+from dimos.core.transport import pLCMTransport
+from dimos.stream.audio.node_normalizer import AudioNormalizer
+from dimos.utils.logging_config import setup_logger
+from dimos.web.robot_web_interface import RobotWebInterface
+
+if TYPE_CHECKING:
+    from dimos.stream.audio.base import AudioEvent
+
+logger = setup_logger()
+
+
+class GuideWebInput(Module):
+    """Web input + agent_state SSE for the guide blueprint.
+
+    Set `enable_stt=False` to skip Whisper initialization (and its model
+    download). Without STT, voice via `/upload_audio` is disabled, but text
+    queries via `/submit_query` still work — useful for testing the network
+    path without waiting for the model to land.
+    """
+
+    agent_state: In[str]
+
+    _web_interface: RobotWebInterface | None = None
+    _thread: Thread | None = None
+    _human_transport: pLCMTransport[str] | None = None
+    _agent_state_subject: rx.subject.Subject[str] | None = None
+
+    def __init__(self, enable_stt: bool = True, **kwargs):  # type: ignore[no-untyped-def]
+        super().__init__(**kwargs)
+        self.enable_stt = enable_stt
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+
+        self._human_transport = pLCMTransport("/human_input")
+        self._agent_state_subject = rx.subject.Subject()
+
+        audio_subject: rx.subject.Subject[AudioEvent] | None = (
+            rx.subject.Subject() if self.enable_stt else None
+        )
+
+        self._web_interface = RobotWebInterface(
+            port=5555,
+            text_streams={
+                "agent_responses": rx.subject.Subject(),
+                "agent_state": self._agent_state_subject,
+            },
+            audio_subject=audio_subject,
+        )
+
+        if self.enable_stt and audio_subject is not None:
+            normalizer = AudioNormalizer()
+            from dimos.stream.audio.stt.node_whisper import WhisperNode
+
+            stt_node = WhisperNode()
+            normalizer.consume_audio(audio_subject.pipe(ops.share()))
+            stt_node.consume_audio(normalizer.emit_audio())
+            unsub = stt_node.emit_text().subscribe(self._human_transport.publish)
+            self.register_disposable(unsub)
+        else:
+            logger.info("STT disabled — voice uploads will be ignored, text only.")
+
+        # Browser → /human_input
+        unsub = self._web_interface.query_stream.subscribe(self._human_transport.publish)
+        self.register_disposable(unsub)
+
+        # BlindAssistant.agent_state → SSE text_stream
+        unsub = self.agent_state.subscribe(self._agent_state_subject.on_next)
+        self.register_disposable(unsub)
+
+        self._thread = Thread(target=self._web_interface.run, daemon=True)
+        self._thread.start()
+
+        logger.info("Guide web interface started at http://localhost:5555")
+
+    @rpc
+    def stop(self) -> None:
+        if self._web_interface:
+            self._web_interface.shutdown()
+        if self._thread:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+        if self._human_transport:
+            self._human_transport.lcm.stop()
+        super().stop()
diff --git a/dimos/agents/mcp/mcp_client.py b/dimos/agents/mcp/mcp_client.py
index 75b532e9cc..e94ace0c7e 100644
--- a/dimos/agents/mcp/mcp_client.py
+++ b/dimos/agents/mcp/mcp_client.py
@@ -210,8 +210,39 @@ def _on_human_input(string: str) -> None:
 
     @rpc
     def on_system_modules(self, _modules: list[RPCClient]) -> None:
-        tools = self._fetch_tools()
+        # Quick initial attempt — if McpServer.on_system_modules already ran,
+        # this returns the registered tools and we're done.
+        tools = self._fetch_tools_safe(timeout=5.0, interval=0.5)
+        self._rebuild_agent(tools)
+        if not self._thread.is_alive():
+            self._thread.start()
+
+        # If the initial fetch came back empty, McpServer's on_system_modules
+        # hasn't populated app.state.skills yet (lifecycle race — both modules
+        # receive on_system_modules but the order is non-deterministic). Keep
+        # polling in the background until tools register, then atomically swap
+        # in a new state graph.
+        if not tools:
+            logger.warning(
+                "MCP tools empty after initial fetch — will retry in background"
+            )
+            retry_thread = Thread(
+                target=self._background_retry_tools,
+                name=f"{self.__class__.__name__}-tool-refetch",
+                daemon=True,
+            )
+            retry_thread.start()
 
+    def _fetch_tools_safe(
+        self, timeout: float = 5.0, interval: float = 0.5
+    ) -> list[StructuredTool]:
+        try:
+            return self._fetch_tools(timeout=timeout, interval=interval)
+        except Exception as e:
+            logger.warning("MCP tool fetch failed: %s", e)
+            return []
+
+    def _rebuild_agent(self, tools: list[StructuredTool]) -> None:
         model: str | Any = self.config.model
         if self.config.model_fixture is not None:
             from dimos.agents.testing import MockModel
@@ -224,8 +255,24 @@ def on_system_modules(self, _modules: list[RPCClient]) -> None:
                 tools=tools,
                 system_prompt=self.config.system_prompt,
             )
-            if not self._thread.is_alive():
-                self._thread.start()
+
+    def _background_retry_tools(self) -> None:
+        # Poll forever (capped) until tools register, then rebuild the agent.
+        deadline = time.monotonic() + 600.0  # 10 min ceiling
+        interval = 2.0
+        while not self._stop_event.is_set() and time.monotonic() < deadline:
+            time.sleep(interval)
+            tools = self._fetch_tools_safe(timeout=2.0, interval=0.5)
+            if tools:
+                self._rebuild_agent(tools)
+                logger.info(
+                    "Background MCP tool refetch succeeded.",
+                    n_tools=len(tools),
+                    tools=[t.name for t in tools],
+                )
+                return
+        if not self._stop_event.is_set():
+            logger.error("Background MCP tool refetch gave up after 10 minutes.")
 
     @rpc
     def stop(self) -> None:
diff --git a/dimos/agents/mcp/test_mcp_server.py b/dimos/agents/mcp/test_mcp_server.py
index fd514b0643..3b633ee33c 100644
--- a/dimos/agents/mcp/test_mcp_server.py
+++ b/dimos/agents/mcp/test_mcp_server.py
@@ -18,6 +18,7 @@
 import json
 from unittest.mock import MagicMock
 
+from dimos.agents.artifacts import EncodedImageArtifact
 from dimos.agents.mcp.mcp_server import handle_request
 from dimos.core.module import SkillInfo
 
@@ -155,6 +156,25 @@ def test_mcp_module_handles_errors() -> None:
     assert "not found" in response["result"]["content"][0]["text"].lower()
 
 
+def test_mcp_module_returns_agent_encoded_artifacts() -> None:
+    schema = json.dumps({"type": "object", "properties": {}})
+    skills = [SkillInfo(class_name="TestSkills", func_name="observe", args_schema=schema)]
+    artifact = EncodedImageArtifact(data="abc123", width=320, height=240, frame_id="camera")
+    rpc_calls = _make_rpc_calls(skills, {"observe": artifact})
+
+    response = asyncio.run(
+        handle_request(
+            {"method": "tools/call", "id": 9, "params": {"name": "observe", "arguments": {}}},
+            skills,
+            rpc_calls,
+        )
+    )
+
+    assert response is not None
+    assert response["result"]["content"] == artifact.agent_encode()
+    assert "abc123" not in str(artifact)
+
+
 def test_mcp_module_initialize_and_unknown() -> None:
     response = asyncio.run(handle_request({"method": "initialize", "id": 1}, [], {}))
     assert response["result"]["serverInfo"]["name"] == "dimensional"
diff --git a/dimos/agents/skills/blind_assistant_skills.py b/dimos/agents/skills/blind_assistant_skills.py
new file mode 100644
index 0000000000..12aa903ce6
--- /dev/null
+++ b/dimos/agents/skills/blind_assistant_skills.py
@@ -0,0 +1,230 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+"""Skills for guiding a blind / low-vision user.
+
+Exposes three agent-callable skills:
+    - narrate(text):         continuous spoken updates (the user can't see)
+    - ask_user(question):    blocks the agent loop until any user reply arrives
+    - reply_user(status, summary): task termination signal
+
+Maintains a JSON state snapshot published to the `agent_state` output stream
+on every change, for display in the debug web UI.
+
+Wiring (in your blueprint):
+    - `user_reply` <- the same text channel `submit_query` already feeds
+                      (e.g. LCM /human_input). Both initial requests and
+                      replies arrive here; ask_user just unblocks on the
+                      next message.
+    - `agent_state` -> a text_stream named "agent_state" on the web server,
+                       which the Vercel app subscribes to over SSE.
+
+This module owns its own TTS. Remove SpeakSkill from the blueprint when
+using BlindAssistant to avoid two pipelines fighting over the audio device.
+"""
+
+import json
+import threading
+import time
+
+from reactivex import Subject
+from reactivex.disposable import Disposable
+
+from dimos.agents.annotation import skill
+from dimos.core.core import rpc
+from dimos.core.module import Module
+from dimos.core.stream import In, Out
+from dimos.stream.audio.node_output import SounddeviceAudioOutput
+from dimos.stream.audio.tts.node_openai import OpenAITTSNode, Voice
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+VALID_STATUSES = ("arrived", "failed", "stopped")
+
+
+class BlindAssistantSkillContainer(Module):
+    user_reply: In[str]
+    agent_state: Out[str]
+
+    _tts_node: OpenAITTSNode | None = None
+    _audio_output: SounddeviceAudioOutput | None = None
+    _audio_lock: threading.Lock = threading.Lock()
+    _reply_event: threading.Event = threading.Event()
+    _latest_reply: str = ""
+
+    _intent: str = ""
+    _phase: str = "idle"
+    _current_skill: dict | None = None
+    _last_observation: str = ""
+    _last_narration: str = ""
+    _awaiting_user: str | None = None
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self._tts_node = OpenAITTSNode(speed=1.2, voice=Voice.ONYX)
+        self._audio_output = SounddeviceAudioOutput(sample_rate=24000)
+        self._audio_output.consume_audio(self._tts_node.emit_audio())
+
+        self.register_disposable(
+            Disposable(self.user_reply.subscribe(self._on_user_reply))
+        )
+        self._publish_state()
+
+    @rpc
+    def stop(self) -> None:
+        if self._tts_node:
+            self._tts_node.dispose()
+            self._tts_node = None
+        if self._audio_output:
+            self._audio_output.stop()
+            self._audio_output = None
+        super().stop()
+
+    def _on_user_reply(self, text: str) -> None:
+        self._latest_reply = text
+        self._awaiting_user = None
+        if self._phase == "awaiting_user":
+            self._phase = "searching"
+        self._reply_event.set()
+        self._publish_state()
+
+    def _publish_state(self) -> None:
+        snapshot = {
+            "ts": time.time(),
+            "intent": self._intent,
+            "phase": self._phase,
+            "current_skill": self._current_skill,
+            "last_observation": self._last_observation,
+            "last_narration": self._last_narration,
+            "awaiting_user": self._awaiting_user,
+        }
+        try:
+            self.agent_state.publish(json.dumps(snapshot))
+        except Exception as e:
+            logger.warning("agent_state publish failed: %s", e)
+
+    def _tts(self, text: str, timeout: float | None = None) -> bool:
+        if self._tts_node is None:
+            logger.error("TTS not initialized")
+            return False
+        with self._audio_lock:
+            text_subject: Subject[str] = Subject()
+            audio_complete = threading.Event()
+            self._tts_node.consume_text(text_subject)
+
+            subscription = self._tts_node.emit_text().subscribe(
+                on_next=lambda _: audio_complete.set(),
+                on_error=lambda _: audio_complete.set(),
+            )
+            text_subject.on_next(text)
+            text_subject.on_completed()
+
+            wait = timeout if timeout is not None else max(5, len(text) * 0.1)
+            ok = audio_complete.wait(timeout=wait)
+            subscription.dispose()
+            if ok:
+                time.sleep(0.3)
+            return ok
+
+    @skill
+    def narrate(self, text: str) -> str:
+        """Speak a short status update to the user.
+
+        The user is blind. Narrate BEFORE acting, and whenever anything
+        noteworthy happens (turning, pausing, seeing a sign, approaching an
+        obstacle, losing track of the destination). One sentence per call.
+
+        Example:
+            narrate("I see a hallway with two doors, looking for a bathroom sign.")
+
+        Args:
+            text: one short sentence describing the current action or observation.
+        """
+        self._last_narration = text
+        self._publish_state()
+        ok = self._tts(text)
+        return f"Narrated: {text}" if ok else f"TTS timeout: {text}"
+
+    @skill
+    def ask_user(self, question: str, timeout_s: float = 30.0) -> str:
+        """Ask the user a question and BLOCK until they reply.
+
+        Use this when you need a decision (e.g. 'should I look around?').
+        The question is spoken aloud, the state phase becomes
+        'awaiting_user', and this call returns only when a new user message
+        arrives or the timeout expires.
+
+        Args:
+            question: one sentence, yes/no preferred.
+            timeout_s: seconds to wait before giving up. Default 30.
+
+        Returns:
+            The text of the user's reply, or 'TIMEOUT' if none arrived.
+
+        Example:
+            answer = ask_user("I don't see a bathroom from here. Should I look around?")
+        """
+        self._awaiting_user = question
+        self._phase = "awaiting_user"
+        self._reply_event.clear()
+        self._latest_reply = ""
+        self._publish_state()
+
+        self._tts(question)
+
+        got_reply = self._reply_event.wait(timeout=timeout_s)
+        if not got_reply:
+            self._awaiting_user = None
+            self._phase = "searching"
+            self._publish_state()
+            return "TIMEOUT"
+        return self._latest_reply
+
+    @skill
+    def reply_user(self, status: str, summary: str) -> str:
+        """Terminate the current task and report its outcome.
+
+        Call ONLY when the task is fully complete or has unrecoverably failed.
+        Do not call mid-task — use `narrate` for progress updates.
+
+        Args:
+            status: one of 'arrived', 'failed', 'stopped'.
+            summary: one sentence describing the outcome.
+
+        Example:
+            reply_user(status="arrived", summary="We're at the bathroom entrance.")
+        """
+        if status not in VALID_STATUSES:
+            return f"Error: invalid status '{status}'. Use one of {VALID_STATUSES}."
+
+        self._phase = {"arrived": "done", "failed": "failed", "stopped": "idle"}[status]
+        self._last_narration = summary
+        self._publish_state()
+        self._tts(summary)
+        return f"Task ended: status={status}"
+
+    @rpc
+    def set_intent(self, intent: str) -> None:
+        """Called by the planner / agent at the start of a new task."""
+        self._intent = intent
+        self._phase = "searching"
+        self._publish_state()
+
+    @rpc
+    def set_observation(self, observation: str) -> None:
+        """Called by the VLM loop with a one-line scene description."""
+        self._last_observation = observation
+        self._publish_state()
+
+    @rpc
+    def set_current_skill(self, name: str, args: dict, state: str) -> None:
+        """Called from the agent loop when a skill is dispatched / completes."""
+        self._current_skill = {"name": name, "args": args, "state": state}
+        self._publish_state()
diff --git a/dimos/agents/skills/navigation.py b/dimos/agents/skills/navigation.py
index d88bec452e..f7c70b0614 100644
--- a/dimos/agents/skills/navigation.py
+++ b/dimos/agents/skills/navigation.py
@@ -25,6 +25,8 @@
 from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
 from dimos.msgs.geometry_msgs.Quaternion import Quaternion
 from dimos.msgs.geometry_msgs.Vector3 import Vector3, make_vector3
+from dimos.msgs.nav_msgs.OccupancyGrid import OccupancyGrid
+from dimos.msgs.nav_msgs.Path import Path
 from dimos.msgs.sensor_msgs.Image import Image
 from dimos.navigation.base import NavigationState
 from dimos.navigation.navigation_spec import NavigationInterfaceSpec
@@ -40,6 +42,8 @@
 class NavigationSkillContainer(Module):
     _latest_image: Image | None = None
     _latest_odom: PoseStamped | None = None
+    _latest_path: Path | None = None
+    _latest_terrain_classmap: OccupancyGrid | None = None
     _skill_started: bool = False
     _similarity_threshold: float = 0.23
 
@@ -49,6 +53,8 @@ class NavigationSkillContainer(Module):
 
     color_image: In[Image]
     odom: In[PoseStamped]
+    path: In[Path]
+    terrain_classmap: In[OccupancyGrid]
 
     def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
@@ -64,6 +70,12 @@ def start(self) -> None:
         super().start()
         self.register_disposable(Disposable(self.color_image.subscribe(self._on_color_image)))
         self.register_disposable(Disposable(self.odom.subscribe(self._on_odom)))
+        if self._can_subscribe(self.path):
+            self.register_disposable(Disposable(self.path.subscribe(self._on_path)))
+        if self._can_subscribe(self.terrain_classmap):
+            self.register_disposable(
+                Disposable(self.terrain_classmap.subscribe(self._on_terrain_classmap))
+            )
         self._skill_started = True
 
     @rpc
@@ -76,6 +88,15 @@ def _on_color_image(self, image: Image) -> None:
     def _on_odom(self, odom: PoseStamped) -> None:
         self._latest_odom = odom
 
+    def _on_path(self, path: Path) -> None:
+        self._latest_path = path
+
+    def _on_terrain_classmap(self, terrain_classmap: OccupancyGrid) -> None:
+        self._latest_terrain_classmap = terrain_classmap
+
+    def _can_subscribe(self, stream: Any) -> bool:
+        return stream.connection is not None or getattr(stream, "_transport", None) is not None
+
     @skill
     def tag_location(self, location_name: str) -> str:
         """Tag this location in the spatial memory with a name.
@@ -162,13 +183,40 @@ def _navigate_to(self, pose: PoseStamped, message: str) -> str:
         logger.info(
             f"Navigating to pose: ({pose.position.x:.2f}, {pose.position.y:.2f}, {pose.position.z:.2f})"
         )
+        request_time = time.time()
         self._navigation.set_goal(pose)
+        stair_notice = self._planned_stair_route_notice(request_time)
 
         return (
             f"{message}. Started navigating to that position. "
             f"To cancel movement call the 'stop_navigation' tool."
+            f"{stair_notice}"
         )
 
+    def _planned_stair_route_notice(self, request_time: float, timeout: float = 1.0) -> str:
+        deadline = time.monotonic() + timeout
+        while time.monotonic() < deadline:
+            path = self._latest_path
+            if path is not None and path.ts >= request_time and self._path_crosses_stairs(path):
+                return " The planned route includes stairs; treating them as traversable terrain."
+            time.sleep(0.05)
+        return ""
+
+    def _path_crosses_stairs(self, path: Path) -> bool:
+        terrain_classmap = self._latest_terrain_classmap
+        if terrain_classmap is None or not path.poses:
+            return False
+
+        for pose in path.poses:
+            grid_position = terrain_classmap.world_to_grid(pose.position)
+            x = round(grid_position.x)
+            y = round(grid_position.y)
+            if 0 <= x < terrain_classmap.width and 0 <= y < terrain_classmap.height:
+                if terrain_classmap.grid[y, x] == 50:
+                    return True
+
+        return False
+
     def _navigate_to_object(self, query: str) -> str | None:
         if self._object_tracking is None:
             return None
diff --git a/dimos/agents/skills/test_navigation.py b/dimos/agents/skills/test_navigation.py
index f206d63ba0..2a3bd851af 100644
--- a/dimos/agents/skills/test_navigation.py
+++ b/dimos/agents/skills/test_navigation.py
@@ -21,6 +21,8 @@
 from dimos.core.module import Module
 from dimos.core.stream import Out
 from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
+from dimos.msgs.nav_msgs.OccupancyGrid import OccupancyGrid
+from dimos.msgs.nav_msgs.Path import Path
 from dimos.msgs.sensor_msgs.Image import Image
 from dimos.navigation.base import NavigationState
 from dimos.types.robot_location import RobotLocation
@@ -159,3 +161,15 @@ def test_go_to_semantic_location(agent_setup) -> None:
     )
 
     assert "success" in history[-1].content.lower()
+
+
+def test_stair_route_notice_for_path_crossing_terrain_classmap() -> None:
+    skill = object.__new__(NavigationSkillContainer)
+    grid = OccupancyGrid(width=10, height=10, resolution=1.0)
+    grid.grid[5, 5] = 50
+    skill._latest_terrain_classmap = grid
+    skill._latest_path = Path(poses=[PoseStamped(position=[5.0, 5.0, 0.0])])
+
+    notice = skill._planned_stair_route_notice(skill._latest_path.ts - 0.1, timeout=0.01)
+
+    assert "stairs" in notice.lower()
diff --git a/dimos/agents/skills/test_unitree_skill_container.py b/dimos/agents/skills/test_unitree_skill_container.py
index 30bf6139e8..7fa484f8da 100644
--- a/dimos/agents/skills/test_unitree_skill_container.py
+++ b/dimos/agents/skills/test_unitree_skill_container.py
@@ -20,6 +20,7 @@
 from dimos.core.core import rpc
 from dimos.core.module import Module
 from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
+from dimos.msgs.geometry_msgs.Twist import Twist
 from dimos.navigation.base import NavigationState
 from dimos.robot.unitree.unitree_skill_container import _UNITREE_COMMANDS, UnitreeSkillContainer
 
@@ -43,15 +44,62 @@ def cancel_goal(self) -> bool:
 
 
 class StubGO2Connection(Module):
+    last_twist: Twist | None = None
+    last_duration: float | None = None
+
+    @rpc
+    def move(self, twist: Twist, duration: float = 0.0) -> bool:
+        self.last_twist = twist
+        self.last_duration = duration
+        return True
+
     @rpc
     def publish_request(self, topic: str, data: dict[str, Any]) -> dict[Any, Any]:
         return {}
 
 
+class PlainGO2Connection:
+    last_twist: Twist | None = None
+    last_duration: float | None = None
+
+    def __init__(self) -> None:
+        self.calls: list[tuple[Twist, float]] = []
+
+    def move(self, twist: Twist, duration: float = 0.0) -> bool:
+        self.last_twist = twist
+        self.last_duration = duration
+        self.calls.append((twist, duration))
+        return True
+
+
+class FakeTransform:
+    def __init__(self, pose: PoseStamped) -> None:
+        self._pose = pose
+        self.ts = pose.ts
+
+    def to_pose(self) -> PoseStamped:
+        return self._pose
+
+
+class FakeTF:
+    def __init__(self, poses: list[PoseStamped]) -> None:
+        self._poses = poses
+        self._index = 0
+
+    def get(self, _source: str, _target: str) -> FakeTransform:
+        pose = self._poses[min(self._index, len(self._poses) - 1)]
+        self._index += 1
+        return FakeTransform(pose)
+
+
 class MockedUnitreeSkill(UnitreeSkillContainer):
     pass
 
 
+def _pose(x: float, y: float = 0.0, ts: float = 1.0) -> PoseStamped:
+    return PoseStamped(ts=ts, position=[x, y, 0.0])
+
+
 def test_pounce(agent_setup) -> None:
     history = agent_setup(
         blueprints=[
@@ -70,3 +118,57 @@ def test_did_you_mean() -> None:
     suggestions = difflib.get_close_matches("Pounce", _UNITREE_COMMANDS.keys(), n=3, cutoff=0.6)
     assert "FrontPounce" in suggestions
     assert "Pose" in suggestions
+
+
+def test_move_clamps_direct_velocity_without_tf_monitoring() -> None:
+    skill = object.__new__(MockedUnitreeSkill)
+    connection = PlainGO2Connection()
+    skill._connection = connection  # type: ignore[assignment]
+
+    result = skill.move(x=2.0, y=-2.0, yaw=3.0, duration=30.0)
+
+    assert "velocity=(0.50, -0.40, 1.00)" in result
+    assert len(connection.calls) == 11
+    first_twist, first_duration = connection.calls[0]
+    assert first_duration == 1.0
+    assert first_twist.linear.x == 0.5
+    assert first_twist.linear.y == -0.4
+    assert first_twist.angular.z == 1.0
+    assert connection.calls[-1][0].is_zero()
+    assert connection.calls[-1][1] == 0.0
+
+
+def test_move_stops_and_recovers_when_tf_progress_stalls() -> None:
+    skill = object.__new__(MockedUnitreeSkill)
+    connection = PlainGO2Connection()
+    skill._connection = connection  # type: ignore[assignment]
+    skill._tf = FakeTF([_pose(0.0, ts=1.0), _pose(0.0, ts=2.0), _pose(0.0, ts=3.0)])  # type: ignore[assignment]
+
+    result = skill.move(x=0.5, duration=5.0)
+
+    assert "appears blocked" in result
+    assert "reverse recovery" in result
+    assert len(connection.calls) == 5
+    assert connection.calls[0][0].linear.x == 0.5
+    assert connection.calls[0][1] == 1.0
+    assert connection.calls[1][0].linear.x == 0.5
+    assert connection.calls[1][1] == 1.0
+    assert connection.calls[2][0].is_zero()
+    assert connection.calls[3][0].linear.x == -0.2
+    assert connection.calls[3][1] == 0.8
+    assert connection.calls[4][0].is_zero()
+
+
+def test_move_does_not_report_blocked_when_tf_timestamp_is_stale() -> None:
+    skill = object.__new__(MockedUnitreeSkill)
+    connection = PlainGO2Connection()
+    skill._connection = connection  # type: ignore[assignment]
+    skill._tf = FakeTF([_pose(0.0, ts=1.0)] * 5)  # type: ignore[assignment]
+
+    result = skill.move(x=0.5, duration=3.0)
+
+    assert "Completed direct movement" in result
+    assert "appears blocked" not in result
+    assert len(connection.calls) == 4
+    assert all(call[0].linear.x == 0.5 for call in connection.calls[:-1])
+    assert connection.calls[-1][0].is_zero()
diff --git a/dimos/agents/system_prompt.py b/dimos/agents/system_prompt.py
index 54f713f538..43d518e237 100644
--- a/dimos/agents/system_prompt.py
+++ b/dimos/agents/system_prompt.py
@@ -28,7 +28,11 @@
 
 ## Navigation Flow
 - Use `navigate_with_text` for most navigation. It searches tagged locations first, then visible objects, then the semantic map.
+- Use `relative_move` for map-based local moves. If it reports that no path was found, do not assume there is an obstacle; the costmap may be stale or too conservative.
+- Use `move` for short direct velocity movement only when the user indicates the path is clear or after `observe` confirms the path is clear. Convert distance requests into conservative velocity and duration.
+- If `move` reports that the robot appears blocked, do not keep moving forward. Use `observe`, choose a short clear sidestep/turn/retry route if visible, or tell the user you are blocked if no safe bypass is visible.
 - Tag important locations with `tag_location` so you can return to them later.
+- If navigation reports that the planned route includes stairs, treat that as normal traversable terrain and continue unless the user asks you to stop.
 - During `start_exploration`, avoid calling other skills except `stop_movement`.
 - Always run `execute_sport_command("RecoveryStand")` after dynamic movements (flips, jumps, sit) before navigating.
 
diff --git a/dimos/agents/web_human_input.py b/dimos/agents/web_human_input.py
index 0a4fe7c3f3..ae1adb74c6 100644
--- a/dimos/agents/web_human_input.py
+++ b/dimos/agents/web_human_input.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 from threading import Thread
 from typing import TYPE_CHECKING
 
@@ -36,6 +37,7 @@ class WebInput(Module):
     _web_interface: RobotWebInterface | None = None
     _thread: Thread | None = None
     _human_transport: pLCMTransport[str] | None = None
+    _agent_transport: pLCMTransport | None = None
 
     @rpc
     def start(self) -> None:
@@ -45,12 +47,34 @@ def start(self) -> None:
 
         audio_subject: rx.subject.Subject[AudioEvent] = rx.subject.Subject()
 
+        agent_responses: rx.subject.Subject[str] = rx.subject.Subject()
         self._web_interface = RobotWebInterface(
             port=5555,
-            text_streams={"agent_responses": rx.subject.Subject()},
+            text_streams={"agent_responses": agent_responses},
             audio_subject=audio_subject,
         )
 
+        # Forward the LLM agent's replies (published on LCM "/agent") to the
+        # agent_responses SSE stream so the web UI can display them.
+        self._agent_transport = pLCMTransport("/agent")
+
+        def _on_agent_message(msg: object) -> None:
+            kind = getattr(msg, "type", None)  # "human" | "ai" | "tool" | "system"
+            if kind == "human":
+                return  # skip the echoed user input
+            content = getattr(msg, "content", None)
+            if isinstance(content, list):
+                content = " ".join(
+                    str(part.get("text", "")) if isinstance(part, dict) else str(part)
+                    for part in content
+                )
+            if content:
+                # Emit a typed envelope so the UI can read out only the agent's
+                # spoken replies (kind == "ai") and treat tool output as status.
+                agent_responses.on_next(json.dumps({"kind": kind, "text": str(content)}))
+
+        self._agent_transport.subscribe(_on_agent_message)
+
         normalizer = AudioNormalizer()
 
         # Here to prevent unwanted imports in the file.
@@ -84,4 +108,6 @@ def stop(self) -> None:
             self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
         if self._human_transport:
             self._human_transport.lcm.stop()
+        if self._agent_transport:
+            self._agent_transport.lcm.stop()
         super().stop()
diff --git a/dimos/agents_deprecated/memory/image_embedding.py b/dimos/agents_deprecated/memory/image_embedding.py
index 373f8c5663..79e6b3441d 100644
--- a/dimos/agents_deprecated/memory/image_embedding.py
+++ b/dimos/agents_deprecated/memory/image_embedding.py
@@ -22,7 +22,9 @@
 import base64
 import io
 import os
+from pathlib import Path
 import sys
+from typing import cast
 
 import cv2
 import numpy as np
@@ -58,15 +60,77 @@ def __init__(self, model_name: str = "clip", dimensions: int = 512) -> None:
         self.model: ort.InferenceSession | PreTrainedModel | None = None
         self.processor: ProcessorMixin | None = None
         self.model_path: str | None = None
+        self._onnx_providers: list[str] = []
 
         self._initialize_model()  # type: ignore[no-untyped-call]
 
         logger.info(f"ImageEmbeddingProvider initialized with model {model_name}")
 
+    def _preferred_onnx_providers(self) -> list[str]:
+        available_providers = set(ort.get_available_providers())
+        providers: list[str] = []
+
+        coreml_enabled = os.getenv("DIMOS_ENABLE_COREML_CLIP", "").lower() in {
+            "1",
+            "true",
+            "yes",
+        }
+        if coreml_enabled and "CoreMLExecutionProvider" in available_providers:
+            providers.append("CoreMLExecutionProvider")
+
+        if sys.platform != "darwin" and "CUDAExecutionProvider" in available_providers:
+            providers.append("CUDAExecutionProvider")
+
+        if "CPUExecutionProvider" in available_providers:
+            providers.append("CPUExecutionProvider")
+
+        if not providers:
+            providers = list(available_providers)
+
+        return providers
+
+    def _load_onnx_session(
+        self, model_path: str | Path, providers: list[str]
+    ) -> ort.InferenceSession:
+        session = ort.InferenceSession(str(model_path), providers=providers)
+        self._onnx_providers = list(session.get_providers())
+        return session
+
+    def _reload_clip_session_on_cpu(self) -> bool:
+        if self.model_path is None:
+            return False
+        if self._onnx_providers == ["CPUExecutionProvider"]:
+            return False
+        if "CPUExecutionProvider" not in ort.get_available_providers():
+            return False
+
+        logger.warning(
+            f"Reloading CLIP ONNX model on CPU after provider failure. Previous providers: {self._onnx_providers}"
+        )
+        self.model = self._load_onnx_session(self.model_path, ["CPUExecutionProvider"])
+        return True
+
+    def _run_onnx_with_cpu_retry(self, ort_inputs: dict[str, np.ndarray]) -> list[np.ndarray]:
+        session = cast("ort.InferenceSession", self.model)
+        try:
+            return session.run(None, ort_inputs)
+        except Exception:
+            if not self._reload_clip_session_on_cpu():
+                raise
+            session = cast("ort.InferenceSession", self.model)
+            return session.run(None, ort_inputs)
+
+    def _fallback_embedding(self) -> np.ndarray:
+        embedding = np.random.randn(self.dimensions).astype(np.float32)
+        norm = np.linalg.norm(embedding)
+        if norm == 0:
+            embedding[0] = 1.0
+            return embedding
+        return embedding / norm
+
     def _initialize_model(self):  # type: ignore[no-untyped-def]
         """Initialize the specified embedding model."""
         try:
-            import onnxruntime as ort  # type: ignore[import-untyped]
             import torch  # noqa: F401
             from transformers import (
                 AutoFeatureExtractor,
@@ -79,19 +143,11 @@ def _initialize_model(self):  # type: ignore[no-untyped-def]
                 self.model_path = str(model_id)  # type: ignore[assignment]  # Store for pickling
                 processor_id = "openai/clip-vit-base-patch32"
 
-                providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-                if sys.platform == "darwin":
-                    # 2025-11-17 12:36:47.877215 [W:onnxruntime:, helper.cc:82 IsInputSupported] CoreML does not support input dim > 16384. Input:text_model.embeddings.token_embedding.weight, shape: {49408,512}
-                    # 2025-11-17 12:36:47.878496 [W:onnxruntime:, coreml_execution_provider.cc:107 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 88 number of nodes in the graph: 1504 number of nodes supported by CoreML: 933
-                    providers = ["CoreMLExecutionProvider"] + [
-                        each for each in providers if each != "CUDAExecutionProvider"
-                    ]
-
-                self.model = ort.InferenceSession(str(model_id), providers=providers)
+                providers = self._preferred_onnx_providers()
+                self.model = self._load_onnx_session(model_id, providers)
 
-                actual_providers = self.model.get_providers()  # type: ignore[attr-defined]
                 self.processor = CLIPProcessor.from_pretrained(processor_id)
-                logger.info(f"Loaded CLIP model: {model_id} with providers: {actual_providers}")
+                logger.info(f"Loaded CLIP model: {model_id} with providers: {self._onnx_providers}")
             elif self.model_name == "resnet":
                 model_id = "microsoft/resnet-50"  # type: ignore[assignment]
                 self.model = AutoModel.from_pretrained(model_id)
@@ -120,7 +176,7 @@ def get_embedding(self, image: np.ndarray | str | bytes) -> np.ndarray:
         """
         if self.model is None or self.processor is None:
             logger.error("Model not initialized. Using fallback random embedding.")
-            return np.random.randn(self.dimensions).astype(np.float32)
+            return self._fallback_embedding()
 
         pil_image = self._prepare_image(image)
 
@@ -148,7 +204,7 @@ def get_embedding(self, image: np.ndarray | str | bytes) -> np.ndarray:
                         ort_inputs["attention_mask"] = np.ones((batch_size, 1), dtype=np.int64)
 
                     # Run inference
-                    ort_outputs = self.model.run(None, ort_inputs)
+                    ort_outputs = self._run_onnx_with_cpu_retry(ort_inputs)
 
                     # Look up correct output name
                     output_names = [o.name for o in self.model.get_outputs()]
@@ -171,7 +227,7 @@ def get_embedding(self, image: np.ndarray | str | bytes) -> np.ndarray:
                 embedding = outputs.last_hidden_state[:, 0, :].numpy()[0]
             else:
                 logger.warning(f"Unsupported model: {self.model_name}. Using random embedding.")
-                embedding = np.random.randn(self.dimensions).astype(np.float32)
+                embedding = self._fallback_embedding()
 
             # Normalize and ensure correct dimensions
             embedding = embedding / np.linalg.norm(embedding)
@@ -181,7 +237,7 @@ def get_embedding(self, image: np.ndarray | str | bytes) -> np.ndarray:
 
         except Exception as e:
             logger.error(f"Error generating embedding: {e}")
-            return np.random.randn(self.dimensions).astype(np.float32)
+            return self._fallback_embedding()
 
     def get_text_embedding(self, text: str) -> np.ndarray:
         """
@@ -195,13 +251,13 @@ def get_text_embedding(self, text: str) -> np.ndarray:
         """
         if self.model is None or self.processor is None:
             logger.error("Model not initialized. Using fallback random embedding.")
-            return np.random.randn(self.dimensions).astype(np.float32)
+            return self._fallback_embedding()
 
         if self.model_name != "clip":
             logger.warning(
                 f"Text embeddings are only supported with CLIP model, not {self.model_name}. Using random embedding."
             )
-            return np.random.randn(self.dimensions).astype(np.float32)
+            return self._fallback_embedding()
 
         try:
             import torch
@@ -227,7 +283,7 @@ def get_text_embedding(self, text: str) -> np.ndarray:
                     )
 
                 # Run inference
-                ort_outputs = self.model.run(None, ort_inputs)
+                ort_outputs = self._run_onnx_with_cpu_retry(ort_inputs)
 
                 # Determine correct output (usually 'last_hidden_state' or 'text_embeds')
                 output_names = [o.name for o in self.model.get_outputs()]
@@ -249,7 +305,7 @@ def get_text_embedding(self, text: str) -> np.ndarray:
 
         except Exception as e:
             logger.error(f"Error generating text embedding: {e}")
-            return np.random.randn(self.dimensions).astype(np.float32)
+            return self._fallback_embedding()
 
     def _prepare_image(self, image: np.ndarray | str | bytes) -> Image.Image:
         """
diff --git a/dimos/agents_deprecated/memory/test_image_embedding.py b/dimos/agents_deprecated/memory/test_image_embedding.py
index 61daf8c399..c8d0479d4e 100644
--- a/dimos/agents_deprecated/memory/test_image_embedding.py
+++ b/dimos/agents_deprecated/memory/test_image_embedding.py
@@ -28,6 +28,55 @@
 from dimos.stream.video_provider import VideoProvider
 
 
+def test_clip_provider_prefers_cpu_on_macos(monkeypatch) -> None:
+    provider = object.__new__(ImageEmbeddingProvider)
+
+    monkeypatch.setattr(
+        "dimos.agents_deprecated.memory.image_embedding.ort.get_available_providers",
+        lambda: ["CoreMLExecutionProvider", "CPUExecutionProvider"],
+    )
+    monkeypatch.setattr("dimos.agents_deprecated.memory.image_embedding.sys.platform", "darwin")
+    monkeypatch.delenv("DIMOS_ENABLE_COREML_CLIP", raising=False)
+
+    assert provider._preferred_onnx_providers() == ["CPUExecutionProvider"]
+
+
+def test_clip_provider_allows_coreml_opt_in_on_macos(monkeypatch) -> None:
+    provider = object.__new__(ImageEmbeddingProvider)
+
+    monkeypatch.setattr(
+        "dimos.agents_deprecated.memory.image_embedding.ort.get_available_providers",
+        lambda: ["CoreMLExecutionProvider", "CPUExecutionProvider"],
+    )
+    monkeypatch.setattr("dimos.agents_deprecated.memory.image_embedding.sys.platform", "darwin")
+    monkeypatch.setenv("DIMOS_ENABLE_COREML_CLIP", "1")
+
+    assert provider._preferred_onnx_providers() == [
+        "CoreMLExecutionProvider",
+        "CPUExecutionProvider",
+    ]
+
+
+def test_clip_provider_retries_failed_onnx_inference_on_cpu() -> None:
+    class FailingSession:
+        def run(self, _outputs, _inputs):
+            raise RuntimeError("provider failed")
+
+    class CpuSession:
+        def run(self, _outputs, _inputs):
+            return [np.array([[1.0, 2.0]], dtype=np.float32)]
+
+    provider = object.__new__(ImageEmbeddingProvider)
+    provider.model = FailingSession()
+    provider.model_path = "/tmp/model.onnx"
+    provider._onnx_providers = ["CoreMLExecutionProvider", "CPUExecutionProvider"]
+    provider._load_onnx_session = lambda _model_path, _providers: CpuSession()
+
+    outputs = provider._run_onnx_with_cpu_retry({})
+
+    assert outputs[0].tolist() == [[1.0, 2.0]]
+
+
 @pytest.mark.self_hosted
 class TestImageEmbedding:
     """Test class for CLIP image embedding functionality."""
diff --git a/dimos/experimental/security_demo/security_module.py b/dimos/experimental/security_demo/security_module.py
index 9569227805..1b611ce808 100644
--- a/dimos/experimental/security_demo/security_module.py
+++ b/dimos/experimental/security_demo/security_module.py
@@ -156,7 +156,8 @@ def __init__(self, **kwargs: Any) -> None:
         self._router: PatrolRouter = _create_router(self.config.g)
         self._visual_servo = _create_visual_servo(self.config, self.config.g)
         self._detector = YoloPersonDetector()
-        self._tracker = EdgeTAMProcessor()
+        self._tracker: EdgeTAMProcessor | None = None
+        self._tracker_unavailable: str | None = None
 
         self._depth_estimator = DepthEstimator(self.depth_image.publish)
 
@@ -185,7 +186,9 @@ def stop(self) -> None:
         self._stop_security_patrol_internal()
         self._depth_estimator.stop()
         self._detector.stop()
-        self._tracker.stop()
+        if self._tracker is not None:
+            self._tracker.stop()
+            self._tracker = None
         super().stop()
 
     @skill
@@ -310,7 +313,11 @@ def _patrol_step(self) -> None:
 
         # Init EdgeTAM with YOLO bbox for continuous tracking
         box = np.array(list(best.bbox), dtype=np.float32)
-        self._tracker.init_track(image=image, box=box, obj_id=1)
+        tracker = self._ensure_tracker()
+        if tracker is None:
+            self._stop_event.wait(timeout=2.0)
+            return
+        tracker.init_track(image=image, box=box, obj_id=1)
 
         self._cancel_current_goal()
         self._has_active_goal = False
@@ -326,7 +333,14 @@ def _follow_step(self) -> None:
             self._stop_event.wait(timeout=_ANTI_BUSY_LOOP_TIMEOUT)
             return
 
-        detections = self._tracker.process_image(latest_image)
+        tracker = self._ensure_tracker()
+        if tracker is None:
+            self.cmd_vel.publish(Twist.zero())
+            self._router.reset()
+            self._has_active_goal = False
+            self._transition_to("PATROLLING")
+            return
+        detections = tracker.process_image(latest_image)
 
         if len(detections) == 0:
             self.cmd_vel.publish(Twist.zero())
@@ -400,3 +414,16 @@ def _stop_security_patrol_internal(self) -> None:
             thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
             with self._lock:
                 self._main_thread = None
+
+    def _ensure_tracker(self) -> EdgeTAMProcessor | None:
+        with self._lock:
+            if self._tracker_unavailable is not None:
+                return None
+            if self._tracker is None:
+                try:
+                    self._tracker = EdgeTAMProcessor()
+                except RuntimeError as exc:
+                    self._tracker_unavailable = str(exc)
+                    logger.warning("EdgeTAM tracking disabled", error=str(exc))
+                    return None
+            return self._tracker
diff --git a/dimos/mapping/costmapper.py b/dimos/mapping/costmapper.py
index 2c374bdf91..453bc65f2d 100644
--- a/dimos/mapping/costmapper.py
+++ b/dimos/mapping/costmapper.py
@@ -25,6 +25,7 @@
     OCCUPANCY_ALGOS,
     HeightCostConfig,
     OccupancyConfig,
+    height_cost_and_terrain_class_occupancy,
 )
 from dimos.msgs.nav_msgs.OccupancyGrid import OccupancyGrid
 from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
@@ -43,6 +44,7 @@ class CostMapper(Module):
     global_map: In[PointCloud2]
     merged_map: In[PointCloud2]
     global_costmap: Out[OccupancyGrid]
+    terrain_classmap: Out[OccupancyGrid]
 
     @rpc
     def start(self) -> None:
@@ -54,17 +56,24 @@ def _select_map(
             gmap, merged = pair
             return merged if merged is not None else gmap
 
-        def _publish_costmap(grid: OccupancyGrid, calc_time_ms: float, rx_monotonic: float) -> None:
-            self.global_costmap.publish(grid)
+        def _publish_costmap(
+            maps: tuple[OccupancyGrid, OccupancyGrid | None],
+            calc_time_ms: float,
+            rx_monotonic: float,
+        ) -> None:
+            costmap, terrain_classmap = maps
+            self.global_costmap.publish(costmap)
+            if terrain_classmap is not None:
+                self.terrain_classmap.publish(terrain_classmap)
 
         def _calculate_and_time(
             msg: PointCloud2,
-        ) -> tuple[OccupancyGrid, float, float]:
+        ) -> tuple[tuple[OccupancyGrid, OccupancyGrid | None], float, float]:
             rx_monotonic = time.monotonic()  # Capture receipt time
             start = time.perf_counter()
-            grid = self._calculate_costmap(msg)
+            maps = self._calculate_maps(msg)
             elapsed_ms = (time.perf_counter() - start) * 1000
-            return grid, elapsed_ms, rx_monotonic
+            return maps, elapsed_ms, rx_monotonic
 
         self.register_disposable(
             combine_latest(
@@ -82,5 +91,13 @@ def stop(self) -> None:
 
     # @timed()  # TODO: fix thread leak in timed decorator
     def _calculate_costmap(self, msg: PointCloud2) -> OccupancyGrid:
+        return self._calculate_maps(msg)[0]
+
+    def _calculate_maps(self, msg: PointCloud2) -> tuple[OccupancyGrid, OccupancyGrid | None]:
+        if self.config.algo == "height_cost":
+            return height_cost_and_terrain_class_occupancy(
+                msg, **asdict(self.config.config)
+            )
+
         fn = OCCUPANCY_ALGOS[self.config.algo]
-        return fn(msg, **asdict(self.config.config))
+        return fn(msg, **asdict(self.config.config)), None
diff --git a/dimos/mapping/pointclouds/occupancy.py b/dimos/mapping/pointclouds/occupancy.py
index 2884cacec3..3c8908ee30 100644
--- a/dimos/mapping/pointclouds/occupancy.py
+++ b/dimos/mapping/pointclouds/occupancy.py
@@ -138,6 +138,16 @@ class HeightCostConfig(OccupancyConfig):
     can_climb: float = 0.15
     ignore_noise: float = 0.05
     smoothing: float = 1.0
+    enable_stair_classification: bool = True
+    stair_min_rise: float = 0.08
+    stair_max_rise: float = 0.25
+    stair_max_cost: int = 70
+
+
+TERRAIN_CLASS_UNKNOWN = -1
+TERRAIN_CLASS_FLAT = 0
+TERRAIN_CLASS_STAIRS = 50
+TERRAIN_CLASS_OBSTACLE = 100
 
 
 def height_cost_occupancy(cloud: PointCloud2, **kwargs: Any) -> OccupancyGrid:
@@ -155,18 +165,40 @@ def height_cost_occupancy(cloud: PointCloud2, **kwargs: Any) -> OccupancyGrid:
     Returns:
         OccupancyGrid with costs 0-100 based on terrain slope, -1 for unknown
     """
+    costmap, _ = height_cost_and_terrain_class_occupancy(cloud, **kwargs)
+    return costmap
+
+
+def height_cost_and_terrain_class_occupancy(
+    cloud: PointCloud2, **kwargs: Any
+) -> tuple[OccupancyGrid, OccupancyGrid]:
+    """Create navigation cost and terrain class maps from a point cloud.
+
+    Terrain class values:
+    - -1: unknown
+    - 0: flat/free
+    - 50: stairs/traversable repeated height change
+    - 100: obstacle/lethal terrain
+    """
     cfg = HeightCostConfig(**kwargs)
     points, _ = cloud.as_numpy()
     points = points.astype(np.float64)  # Upcast to avoid float32 rounding
     ts = cloud.ts if hasattr(cloud, "ts") and cloud.ts is not None else 0.0
 
     if len(points) == 0:
-        return OccupancyGrid(
+        costmap = OccupancyGrid(
+            width=1,
+            height=1,
+            resolution=cfg.resolution,
+            frame_id=cfg.frame_id or cloud.frame_id,
+        )
+        terrain_classmap = OccupancyGrid(
             width=1,
             height=1,
             resolution=cfg.resolution,
             frame_id=cfg.frame_id or cloud.frame_id,
         )
+        return costmap, terrain_classmap
 
     # Find bounds of the point cloud in X-Y plane (use all points)
     min_x = np.min(points[:, 0])
@@ -270,18 +302,77 @@ def height_cost_occupancy(cloud: PointCloud2, **kwargs: Any) -> OccupancyGrid:
         structure = ndimage.generate_binary_structure(2, 1)  # 4-connectivity
         valid_gradient_mask = ndimage.binary_erosion(observed_mask, structure=structure)
 
+        terrain_class = _classify_height_cost_terrain(
+            height_map=height_map,
+            observed_mask=observed_mask,
+            valid_gradient_mask=valid_gradient_mask,
+            cost_float=cost_float,
+            cfg=cfg,
+        )
+        if cfg.enable_stair_classification:
+            stair_mask = terrain_class == TERRAIN_CLASS_STAIRS
+            cost_float = np.where(stair_mask, np.minimum(cost_float, cfg.stair_max_cost), cost_float)
+
         # Convert to int8, marking cells without valid gradients as -1
         cost = np.where(valid_gradient_mask, cost_float.astype(np.int8), -1)
     else:
         cost = np.full((height, width), -1, dtype=np.int8)
+        terrain_class = np.full((height, width), TERRAIN_CLASS_UNKNOWN, dtype=np.int8)
 
-    return OccupancyGrid(
+    costmap = OccupancyGrid(
         grid=cost,
         resolution=cfg.resolution,
         origin=origin,
         frame_id=cfg.frame_id or cloud.frame_id,
         ts=ts,
     )
+    terrain_classmap = OccupancyGrid(
+        grid=terrain_class,
+        resolution=cfg.resolution,
+        origin=origin,
+        frame_id=cfg.frame_id or cloud.frame_id,
+        ts=ts,
+    )
+    return costmap, terrain_classmap
+
+
+def _classify_height_cost_terrain(
+    *,
+    height_map: NDArray[np.floating[Any]],
+    observed_mask: NDArray[np.bool_],
+    valid_gradient_mask: NDArray[np.bool_],
+    cost_float: NDArray[np.floating[Any]],
+    cfg: HeightCostConfig,
+) -> NDArray[np.int8]:
+    terrain_class = np.full(height_map.shape, TERRAIN_CLASS_UNKNOWN, dtype=np.int8)
+    terrain_class[valid_gradient_mask] = TERRAIN_CLASS_FLAT
+    terrain_class[valid_gradient_mask & (cost_float >= 100.0)] = TERRAIN_CLASS_OBSTACLE
+
+    if not cfg.enable_stair_classification:
+        return terrain_class
+
+    stair_transition = np.zeros(height_map.shape, dtype=bool)
+
+    for dy, dx in ((0, 1), (1, 0)):
+        current = height_map[:-dy or None, :-dx or None]
+        neighbor = height_map[dy:, dx:]
+        current_observed = observed_mask[:-dy or None, :-dx or None]
+        neighbor_observed = observed_mask[dy:, dx:]
+
+        diff = np.abs(neighbor - current)
+        transition = (
+            current_observed
+            & neighbor_observed
+            & (diff >= cfg.stair_min_rise)
+            & (diff <= cfg.stair_max_rise)
+        )
+
+        stair_transition[:-dy or None, :-dx or None] |= transition
+        stair_transition[dy:, dx:] |= transition
+
+    stair_mask = stair_transition & valid_gradient_mask
+    terrain_class[stair_mask] = TERRAIN_CLASS_STAIRS
+    return terrain_class
 
 
 @dataclass(frozen=True)
diff --git a/dimos/mapping/pointclouds/test_occupancy.py b/dimos/mapping/pointclouds/test_occupancy.py
index 1ad3ef684d..789db26802 100644
--- a/dimos/mapping/pointclouds/test_occupancy.py
+++ b/dimos/mapping/pointclouds/test_occupancy.py
@@ -21,6 +21,10 @@
 from dimos.core.transport import LCMTransport
 from dimos.mapping.occupancy.visualizations import visualize_occupancy_grid
 from dimos.mapping.pointclouds.occupancy import (
+    TERRAIN_CLASS_FLAT,
+    TERRAIN_CLASS_OBSTACLE,
+    TERRAIN_CLASS_STAIRS,
+    height_cost_and_terrain_class_occupancy,
     height_cost_occupancy,
     simple_occupancy,
 )
@@ -128,3 +132,65 @@ def test_height_cost_occupancy_from_lidar(height_cost_moment) -> None:
     known_mask = costmap.grid >= 0
     assert known_mask.sum() > 0, "Expected some known cells"
     assert (~known_mask).sum() > 0, "Expected some unknown cells"
+
+
+def _synthetic_terrain_cloud(kind: str) -> PointCloud2:
+    xs = np.arange(0.0, 1.0, 0.05)
+    ys = np.arange(0.0, 0.8, 0.05)
+    points: list[tuple[float, float, float]] = []
+
+    for x in xs:
+        for y in ys:
+            if kind == "flat":
+                z = 0.0
+            elif kind == "stairs":
+                z = np.floor(x / 0.2) * 0.1
+            elif kind == "wall":
+                z = 0.0 if x < 0.5 else 0.5
+            else:
+                raise ValueError(kind)
+            points.append((x, y, z))
+
+    return PointCloud2.from_numpy(np.asarray(points, dtype=np.float32), frame_id="map")
+
+
+def test_height_cost_classifies_flat_floor() -> None:
+    costmap, terrain_classmap = height_cost_and_terrain_class_occupancy(
+        _synthetic_terrain_cloud("flat"),
+        resolution=0.05,
+        smoothing=0,
+    )
+
+    known = terrain_classmap.grid >= 0
+    assert np.any(known)
+    assert np.all(terrain_classmap.grid[known] == TERRAIN_CLASS_FLAT)
+    assert costmap.grid[known].max() == 0
+
+
+def test_height_cost_classifies_stairs_as_traversable() -> None:
+    costmap, terrain_classmap = height_cost_and_terrain_class_occupancy(
+        _synthetic_terrain_cloud("stairs"),
+        resolution=0.05,
+        smoothing=0,
+        stair_min_rise=0.08,
+        stair_max_rise=0.25,
+        stair_max_cost=70,
+    )
+
+    stair_mask = terrain_classmap.grid == TERRAIN_CLASS_STAIRS
+    assert np.any(stair_mask)
+    assert costmap.grid[stair_mask].max() <= 70
+
+
+def test_height_cost_keeps_wall_as_obstacle() -> None:
+    costmap, terrain_classmap = height_cost_and_terrain_class_occupancy(
+        _synthetic_terrain_cloud("wall"),
+        resolution=0.05,
+        smoothing=0,
+        stair_min_rise=0.08,
+        stair_max_rise=0.25,
+    )
+
+    assert not np.any(terrain_classmap.grid == TERRAIN_CLASS_STAIRS)
+    assert np.any(terrain_classmap.grid == TERRAIN_CLASS_OBSTACLE)
+    assert costmap.grid.max() == 100
diff --git a/dimos/navigation/replanning_a_star/test_min_cost_astar.py b/dimos/navigation/replanning_a_star/test_min_cost_astar.py
index d9b140b7fb..bcad89dd05 100644
--- a/dimos/navigation/replanning_a_star/test_min_cost_astar.py
+++ b/dimos/navigation/replanning_a_star/test_min_cost_astar.py
@@ -109,6 +109,40 @@ def test_astar_unknown_penalty_allows_with_low_penalty(costmap) -> None:
         assert len(path.poses) > 0
 
 
+def test_astar_crosses_stair_cost_when_only_viable_route() -> None:
+    grid = np.full((20, 20), 100, dtype=np.int8)
+    grid[10, 1:19] = 50
+    grid[10, 1] = 0
+    grid[10, 18] = 0
+    og = OccupancyGrid(grid, resolution=0.1)
+
+    start = og.grid_to_world((1, 10))
+    goal = og.grid_to_world((18, 10))
+
+    path = min_cost_astar(og, goal, start, use_cpp=False)
+    assert path is not None
+    crossed_stairs = False
+    for pose in path.poses:
+        gp = og.world_to_grid((pose.position.x, pose.position.y))
+        gx, gy = round(gp.x), round(gp.y)
+        assert grid[gy, gx] != 100
+        crossed_stairs = crossed_stairs or grid[gy, gx] == 50
+    assert crossed_stairs
+
+
+def test_astar_does_not_cross_lethal_obstacles() -> None:
+    grid = np.full((20, 20), 100, dtype=np.int8)
+    grid[10, 1] = 0
+    grid[10, 18] = 0
+    og = OccupancyGrid(grid, resolution=0.1)
+
+    start = og.grid_to_world((1, 10))
+    goal = og.grid_to_world((18, 10))
+
+    path = min_cost_astar(og, goal, start, use_cpp=False)
+    assert path is None
+
+
 def test_astar_python_and_cpp(costmap) -> None:
     start = Vector3(4.0, 2.0, 0)
     goal = Vector3(6.15, 10.0)
diff --git a/dimos/protocol/service/system_configurator/lcm.py b/dimos/protocol/service/system_configurator/lcm.py
index 13538c5419..035b248372 100644
--- a/dimos/protocol/service/system_configurator/lcm.py
+++ b/dimos/protocol/service/system_configurator/lcm.py
@@ -185,18 +185,20 @@ def explanation(self) -> str | None:
         return f"Multicast: - sudo {' '.join(self.add_route_cmd)}"
 
     def fix(self) -> None:
-        # Delete any existing 224.0.0.0/4 route (e.g. on en0) before adding on lo0,
-        # otherwise `route add` fails with "route already in use"
-        prompt.sudo_run(
-            "route",
-            "delete",
-            "-net",
-            "224.0.0.0/4",
-            check=False,
-            text=True,
-            capture_output=True,
-        )
-        prompt.sudo_run(*self.add_route_cmd, check=True, text=True, capture_output=True)
+        # macOS can hold multiple 224.0.0.0/4 routes simultaneously (e.g. en0
+        # and utun6 from Tailscale). `route delete` removes only one at a time
+        # — loop until none remain, then add ours.
+        for _ in range(8):
+            r = prompt.sudo_run(
+                "route", "delete", "-net", "224.0.0.0/4",
+                check=False, text=True, capture_output=True,
+            )
+            if r.returncode != 0:
+                break
+        prompt.sudo_run(*self.add_route_cmd, check=False, text=True, capture_output=True)
+        if not self.check():
+            # As a last resort, re-run with check=True so the error surfaces.
+            prompt.sudo_run(*self.add_route_cmd, check=True, text=True, capture_output=True)
 
 
 # specific checks: buffers
diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py
index 3d101cca79..c53b16638f 100644
--- a/dimos/robot/all_blueprints.py
+++ b/dimos/robot/all_blueprints.py
@@ -100,6 +100,8 @@
     "unitree-go2-coordinator": "dimos.robot.unitree.go2.blueprints.basic.unitree_go2_coordinator:unitree_go2_coordinator",
     "unitree-go2-detection": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2_detection:unitree_go2_detection",
     "unitree-go2-fleet": "dimos.robot.unitree.go2.blueprints.basic.unitree_go2_fleet:unitree_go2_fleet",
+    "unitree-go2-guide": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_guide:unitree_go2_guide",
+    "unitree-go2-guide-lite": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_guide_lite:unitree_go2_guide_lite",
     "unitree-go2-keyboard-teleop": "dimos.robot.unitree.go2.blueprints.basic.unitree_go2_keyboard_teleop:unitree_go2_keyboard_teleop",
     "unitree-go2-markers": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2:unitree_go2_markers",
     "unitree-go2-memory": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2:unitree_go2_memory",
diff --git a/dimos/robot/unitree/connection.py b/dimos/robot/unitree/connection.py
index 44101cc19d..161f93b92c 100644
--- a/dimos/robot/unitree/connection.py
+++ b/dimos/robot/unitree/connection.py
@@ -15,6 +15,7 @@
 import asyncio
 from dataclasses import dataclass
 import functools
+import os
 import threading
 import time
 from typing import Any, TypeAlias, TypeVar
@@ -93,12 +94,45 @@ def to_ndarray(self, format=None):  # type: ignore[no-untyped-def]
 class UnitreeWebRTCConnection(Resource):
     _SPORT_API_ID_RAGEMODE: int = 2059
 
-    def __init__(self, ip: str, mode: str = "ai") -> None:
+    def __init__(
+        self,
+        ip: str | None = None,
+        mode: str = "ai",
+        connection_method: str | None = None,
+        serial_number: str | None = None,
+        username: str | None = None,
+        password: str | None = None,
+    ) -> None:
         self.ip = ip
         self.mode = mode
         self.stop_timer: threading.Timer | None = None
         self.cmd_vel_timeout = 0.2
-        self.conn = LegionConnection(WebRTCConnectionMethod.LocalSTA, ip=self.ip)
+
+        # Env-var fallback so deployments can configure without code changes.
+        method = (connection_method or os.environ.get("GO2_CONNECTION_METHOD", "LocalSTA")).strip()
+
+        if method == "Remote":
+            sn = serial_number or os.environ.get("GO2_SERIAL_NUMBER")
+            user = username or os.environ.get("UNITREE_USERNAME")
+            pwd = password or os.environ.get("UNITREE_PASSWORD")
+            if not (sn and user and pwd):
+                raise ValueError(
+                    "Remote WebRTC mode requires GO2_SERIAL_NUMBER, UNITREE_USERNAME, "
+                    "UNITREE_PASSWORD (env or constructor args)."
+                )
+            self.conn = LegionConnection(
+                WebRTCConnectionMethod.Remote,
+                serialNumber=sn,
+                username=user,
+                password=pwd,
+            )
+        elif method == "LocalSTA":
+            self.conn = LegionConnection(WebRTCConnectionMethod.LocalSTA, ip=self.ip)
+        elif method == "LocalAP":
+            self.conn = LegionConnection(WebRTCConnectionMethod.LocalAP)
+        else:
+            raise ValueError(f"Unknown GO2_CONNECTION_METHOD: {method!r}")
+
         self.connect()
 
     def connect(self) -> None:
diff --git a/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_guide.py b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_guide.py
new file mode 100644
index 0000000000..bf1a901ac7
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_guide.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+"""Guide-robot blueprint: Unitree Go2 assisting a blind user via a Vercel webapp.
+
+Differs from `unitree_go2_agentic`:
+  - Uses `BlindAssistantSkillContainer` (narrate / ask_user / reply_user) instead of `SpeakSkill`.
+  - Replaces `WebInput` with `GuideWebInput`, which exposes an `agent_state` SSE stream.
+  - Loads `BLIND_ASSISTANT_PROMPT` into the MCP client so the agent obeys the guide protocol.
+
+Usage:
+    dimos run unitree-go2-guide
+"""
+
+from dimos.agents.blind_assistant_prompt import BLIND_ASSISTANT_PROMPT
+from dimos.agents.guide_web_input import GuideWebInput
+from dimos.agents.mcp.mcp_client import McpClient
+from dimos.agents.mcp.mcp_server import McpServer
+from dimos.agents.skills.blind_assistant_skills import BlindAssistantSkillContainer
+from dimos.agents.skills.navigation import NavigationSkillContainer
+from dimos.agents.skills.person_follow import PersonFollowSkillContainer
+from dimos.core.coordination.blueprints import autoconnect
+from dimos.robot.unitree.go2.blueprints.smart.unitree_go2_spatial import unitree_go2_spatial
+from dimos.robot.unitree.go2.connection import GO2Connection
+from dimos.robot.unitree.unitree_skill_container import UnitreeSkillContainer
+
+unitree_go2_guide = autoconnect(
+    unitree_go2_spatial,
+    NavigationSkillContainer.blueprint(),
+    PersonFollowSkillContainer.blueprint(camera_info=GO2Connection.camera_info_static),
+    UnitreeSkillContainer.blueprint(),
+    BlindAssistantSkillContainer.blueprint(),
+    GuideWebInput.blueprint(),
+    McpServer.blueprint(),
+    McpClient.blueprint(system_prompt=BLIND_ASSISTANT_PROMPT),
+).global_config(obstacle_avoidance=True)
+
+__all__ = ["unitree_go2_guide"]
diff --git a/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_guide_lite.py b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_guide_lite.py
new file mode 100644
index 0000000000..c39e255762
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_guide_lite.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+"""Lightweight variant of unitree-go2-guide for network-pipe testing.
+
+Same agent + skills + web stack, but without spatial perception (CLIP) or
+person tracking (YOLO). The robot can speak / ask / reply but cannot navigate.
+Use this to verify the Vercel <-> Tailscale <-> dimos HTTPS path end to end
+before pulling the heavy ML model archives.
+
+Usage:
+    dimos --replay run unitree-go2-guide-lite
+"""
+
+from dimos.agents.blind_assistant_prompt import BLIND_ASSISTANT_PROMPT
+from dimos.agents.guide_web_input import GuideWebInput
+from dimos.agents.mcp.mcp_client import McpClient
+from dimos.agents.mcp.mcp_server import McpServer
+from dimos.agents.skills.blind_assistant_skills import BlindAssistantSkillContainer
+from dimos.core.coordination.blueprints import autoconnect
+from dimos.robot.unitree.go2.connection import GO2Connection
+
+unitree_go2_guide_lite = autoconnect(
+    GO2Connection.blueprint(),
+    BlindAssistantSkillContainer.blueprint(),
+    GuideWebInput.blueprint(enable_stt=False),
+    McpServer.blueprint(),
+    McpClient.blueprint(system_prompt=BLIND_ASSISTANT_PROMPT),
+)
+
+__all__ = ["unitree_go2_guide_lite"]
diff --git a/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py b/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py
index 96a291163d..2223e3edcf 100644
--- a/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py
+++ b/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py
@@ -59,6 +59,15 @@ def _convert_navigation_costmap(grid: Any) -> Any:
     )
 
 
+def _convert_terrain_classmap(grid: Any) -> Any:
+    return grid.to_rerun(
+        colormap="Set1",
+        z_offset=0.03,
+        opacity=0.45,
+        background="#1f2330",
+    )
+
+
 def _static_base_link(rr: Any) -> list[Any]:
     return [
         rr.Boxes3D(
@@ -107,11 +116,13 @@ def _go2_rerun_blueprint() -> Any:
         "world/global_map": _convert_global_map,
         "world/merged_map": _convert_global_map,
         "world/navigation_costmap": _convert_navigation_costmap,
+        "world/terrain_classmap": _convert_terrain_classmap,
     },
     "max_hz": {
         "world/global_map": 0,  # publishes at ~7.8 Hz
         "world/color_image": 0,  # publishes at ~14 Hz
         "world/global_costmap": 0,  # publishes at ~7.6 Hz
+        "world/terrain_classmap": 0,
     },
     # slapping a go2 shaped box on top of tf/base_link
     "static": {
diff --git a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py
index 365a4e3f88..5eee64adf1 100644
--- a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py
+++ b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py
@@ -18,6 +18,7 @@
 from dimos.core.coordination.blueprints import autoconnect
 from dimos.core.stream import In
 from dimos.mapping.costmapper import CostMapper
+from dimos.mapping.pointclouds.occupancy import HeightCostConfig
 from dimos.mapping.relocalization.module import RelocalizationModule
 from dimos.mapping.voxels import VoxelGridMapper
 from dimos.memory2.module import Recorder, RecorderConfig
@@ -33,15 +34,21 @@
 from dimos.perception.fiducial.marker_tf_module import MarkerTfModule
 from dimos.robot.unitree.go2.blueprints.basic.unitree_go2_basic import unitree_go2_basic
 
+_go2_height_cost_config = HeightCostConfig(
+    # Standard stairs are often 0.17-0.20 m. A slightly higher climb tolerance
+    # prevents the 2D costmap from treating every stair riser as a hard obstacle.
+    can_climb=0.25,
+)
+
 unitree_go2 = autoconnect(
     unitree_go2_basic,
     VoxelGridMapper.blueprint(emit_every=5),
-    CostMapper.blueprint(),
+    CostMapper.blueprint(config=_go2_height_cost_config),
     ReplanningAStarPlanner.blueprint(),
     WavefrontFrontierExplorer.blueprint(),
     PatrollingModule.blueprint(),
     MovementManager.blueprint(),
-).global_config(n_workers=10, robot_model="unitree_go2")
+).global_config(n_workers=10, robot_model="unitree_go2", obstacle_avoidance=False)
 
 
 class Go2MemoryConfig(RecorderConfig):
diff --git a/dimos/robot/unitree/go2/connection.py b/dimos/robot/unitree/go2/connection.py
index 5568a473ef..ccf4a1f773 100644
--- a/dimos/robot/unitree/go2/connection.py
+++ b/dimos/robot/unitree/go2/connection.py
@@ -25,6 +25,7 @@
 import rerun.blueprint as rrb
 
 from dimos.agents.annotation import skill
+from dimos.agents.artifacts import EncodedImageArtifact
 from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT
 from dimos.core.coordination.module_coordinator import ModuleCoordinator
 from dimos.core.core import rpc
@@ -353,13 +354,22 @@ def publish_request(self, topic: str, data: dict[str, Any]) -> dict[Any, Any]:
         return self.connection.publish_request(topic, data)
 
     @skill
-    def observe(self) -> Image | None:
+    def observe(self) -> EncodedImageArtifact | str:
         """Returns the latest video frame from the robot camera. Use this skill for any visual world queries.
 
         This skill provides the current camera view for perception tasks.
-        Returns None if no frame has been captured yet.
+        Returns a status string if no frame has been captured yet.
         """
-        return self._latest_video_frame
+        frame = self._latest_video_frame
+        if frame is None:
+            return "no camera frame available yet — try again in a moment"
+        scale = min(1.0, 320 / frame.width, 320 / frame.height)
+        return EncodedImageArtifact(
+            data=frame.to_base64(quality=70, max_width=320, max_height=320),
+            width=max(1, round(frame.width * scale)),
+            height=max(1, round(frame.height * scale)),
+            frame_id=frame.frame_id,
+        )
 
 
 def deploy(dimos: ModuleCoordinator, ip: str, prefix: str = "") -> "ModuleProxy":
diff --git a/dimos/robot/unitree/go2/connection_spec.py b/dimos/robot/unitree/go2/connection_spec.py
index dd6aab9c40..0a2614a95f 100644
--- a/dimos/robot/unitree/go2/connection_spec.py
+++ b/dimos/robot/unitree/go2/connection_spec.py
@@ -14,8 +14,10 @@
 
 from typing import Any, Protocol
 
+from dimos.msgs.geometry_msgs.Twist import Twist
 from dimos.spec.utils import Spec
 
 
 class GO2ConnectionSpec(Spec, Protocol):
+    def move(self, twist: Twist, duration: float = 0.0) -> bool: ...
     def publish_request(self, topic: str, data: dict[str, Any]) -> dict[Any, Any]: ...
diff --git a/dimos/robot/unitree/mujoco_connection.py b/dimos/robot/unitree/mujoco_connection.py
index 4c455899e8..839db8e5b7 100644
--- a/dimos/robot/unitree/mujoco_connection.py
+++ b/dimos/robot/unitree/mujoco_connection.py
@@ -77,11 +77,11 @@ def __init__(self, global_config: GlobalConfig) -> None:
         # Pre-download the mujoco_sim data.
         get_data("mujoco_sim")
 
-        # Trigger the download of the mujoco_menagerie package. This is so it
-        # doesn't trigger in the mujoco process where it can time out.
-        from mujoco_playground._src import mjx_env
+        # Trigger the download of the robot assets. This is so it doesn't
+        # trigger in the MuJoCo process where it can time out.
+        from dimos.simulation.mujoco.model import ensure_menagerie_exists
 
-        mjx_env.ensure_menagerie_exists()
+        ensure_menagerie_exists()
 
         self.global_config = global_config
         self.process: subprocess.Popen[bytes] | None = None
@@ -126,6 +126,7 @@ def start(self) -> None:
 
             self.process = subprocess.Popen(
                 [executable, str(LAUNCHER_PATH), config_pickle, shm_names_json],
+                stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 env=env,
             )
@@ -141,8 +142,13 @@ def start(self) -> None:
         while time.time() - start_time < ready_timeout:
             if self.process.poll() is not None:
                 exit_code = self.process.returncode
+                stdout = self._read_process_pipe(self.process.stdout)
+                stderr = self._read_process_pipe(self.process.stderr)
                 self.stop()
-                raise RuntimeError(f"MuJoCo process failed to start (exit code {exit_code})")
+                output = self._format_process_output(stdout, stderr)
+                raise RuntimeError(
+                    f"MuJoCo process failed to start (exit code {exit_code}){output}"
+                )
             if self.shm_data.is_ready():
                 logger.info("MuJoCo process started successfully")
                 # Register atexit handler to ensure subprocess is cleaned up
@@ -225,6 +231,23 @@ def stop(self) -> None:
         self.odom_stream.cache_clear()
         self.video_stream.cache_clear()
 
+    @staticmethod
+    def _read_process_pipe(pipe: Any) -> str:
+        if pipe is None:
+            return ""
+        return pipe.read().decode("utf-8", errors="replace")
+
+    @staticmethod
+    def _format_process_output(stdout: str, stderr: str) -> str:
+        output = []
+        if stdout:
+            output.append(f"stdout:\n{stdout[-4000:]}")
+        if stderr:
+            output.append(f"stderr:\n{stderr[-4000:]}")
+        if not output:
+            return ""
+        return "\n" + "\n".join(output)
+
     def standup(self) -> bool:
         return True
 
diff --git a/dimos/robot/unitree/unitree_skill_container.py b/dimos/robot/unitree/unitree_skill_container.py
index 88194473e6..7369b2bf39 100644
--- a/dimos/robot/unitree/unitree_skill_container.py
+++ b/dimos/robot/unitree/unitree_skill_container.py
@@ -18,6 +18,7 @@
 import difflib
 import math
 import time
+from typing import cast
 
 from unitree_webrtc_connect.constants import RTC_TOPIC
 
@@ -26,6 +27,7 @@
 from dimos.core.module import Module
 from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
 from dimos.msgs.geometry_msgs.Quaternion import Quaternion
+from dimos.msgs.geometry_msgs.Twist import Twist
 from dimos.msgs.geometry_msgs.Vector3 import Vector3
 from dimos.navigation.base import NavigationState
 from dimos.navigation.navigation_spec import NavigationInterfaceSpec
@@ -35,6 +37,10 @@
 logger = setup_logger()
 
 
+def _clamp(value: float, min_value: float, max_value: float) -> float:
+    return min(max(value, min_value), max_value)
+
+
 UNITREE_WEBRTC_CONTROLS: list[tuple[str, int, str]] = [
     # ("Damp", 1001, "Lowers the robot to the ground fully."),
     (
@@ -207,10 +213,126 @@ def start(self) -> None:
     def stop(self) -> None:
         super().stop()
 
+    @skill
+    def move(self, x: float, y: float = 0.0, yaw: float = 0.0, duration: float = 0.0) -> str:
+        """Move the robot using direct velocity commands.
+
+        Use this for short local movement when the user has indicated the path is clear,
+        or when map-based `relative_move` reports that it cannot find a path but you
+        have visually confirmed there is no obstacle. The command stops early if odometry
+        shows the robot is not making progress, then performs a small reverse recovery.
+        For distance requests, choose a conservative speed and duration.
+
+        Args:
+            x: Forward velocity in m/s. Positive = forward, negative = backward.
+            y: Left/right velocity in m/s. Positive = left, negative = right.
+            yaw: Rotational velocity in rad/s. Positive = turn left, negative = turn right.
+            duration: How long to move in seconds. Maximum is 10 seconds.
+        """
+        x = _clamp(float(x), -0.5, 0.5)
+        y = _clamp(float(y), -0.4, 0.4)
+        yaw = _clamp(float(yaw), -1.0, 1.0)
+        duration = _clamp(float(duration), 0.0, 10.0)
+
+        twist = Twist(linear=Vector3(x, y, 0.0), angular=Vector3(0.0, 0.0, yaw))
+
+        command_duration = duration if duration > 0.0 else 0.5
+        linear_speed = math.hypot(x, y)
+        chunk_duration = 0.5 if command_duration <= 1.0 else 1.0
+        start_pose = self._get_base_pose()
+        previous_pose = start_pose
+        stalled_chunks = 0
+        elapsed = 0.0
+
+        while elapsed < command_duration:
+            this_chunk = min(chunk_duration, command_duration - elapsed)
+            if not self._connection.move(twist, duration=this_chunk):
+                self._stop_direct_movement()
+                return "Direct movement command failed before completion."
+
+            elapsed += this_chunk
+
+            # Pure rotations can legitimately have near-zero translation.
+            if linear_speed < 0.05:
+                continue
+
+            current_pose = self._get_base_pose()
+            if previous_pose is None or current_pose is None:
+                previous_pose = current_pose
+                continue
+
+            if current_pose.ts <= previous_pose.ts:
+                logger.debug("TF pose timestamp did not advance; skipping direct-move stall check")
+                continue
+
+            moved = previous_pose.position.distance(current_pose.position)
+            min_expected_progress = min(0.12, max(0.04, linear_speed * this_chunk * 0.2))
+            if moved < min_expected_progress:
+                stalled_chunks += 1
+            else:
+                stalled_chunks = 0
+
+            previous_pose = current_pose
+
+            if stalled_chunks >= 2:
+                self._stop_direct_movement()
+                recovered = self._recover_from_blocked_direct_move(x, y)
+                total_moved = self._distance_between(start_pose, current_pose)
+                recovery_text = " Performed a small reverse recovery." if recovered else ""
+                return (
+                    "Direct movement stopped early because the robot appears blocked: "
+                    f"after {elapsed:.1f}s it only moved {total_moved:.2f}m."
+                    f"{recovery_text} Use `observe` before trying a different route."
+                )
+
+        self._stop_direct_movement()
+        end_pose = self._get_base_pose()
+        total_moved = self._distance_between(start_pose, end_pose)
+        return (
+            f"Completed direct movement with velocity=({x:.2f}, {y:.2f}, {yaw:.2f}) "
+            f"for {command_duration:.1f} seconds; estimated displacement={total_moved:.2f}m"
+        )
+
+    def _get_base_pose(self) -> PoseStamped | None:
+        tf_provider = getattr(self, "_tf", None)
+        if tf_provider is None:
+            return None
+        tf = tf_provider.get("world", "base_link")
+        if tf is None:
+            return None
+        pose = cast("PoseStamped", tf.to_pose())
+        pose.ts = tf.ts
+        return pose
+
+    def _distance_between(self, start: PoseStamped | None, end: PoseStamped | None) -> float:
+        if start is None or end is None:
+            return 0.0
+        return float(start.position.distance(end.position))
+
+    def _stop_direct_movement(self) -> None:
+        self._connection.move(Twist.zero(), duration=0.0)
+
+    def _recover_from_blocked_direct_move(self, x: float, y: float) -> bool:
+        reverse_x = -0.2 if x > 0.05 else 0.2 if x < -0.05 else 0.0
+        reverse_y = -0.2 if y > 0.05 else 0.2 if y < -0.05 else 0.0
+        if reverse_x == 0.0 and reverse_y == 0.0:
+            return False
+
+        recovery_twist = Twist(
+            linear=Vector3(reverse_x, reverse_y, 0.0), angular=Vector3(0.0, 0.0, 0.0)
+        )
+        ok = self._connection.move(recovery_twist, duration=0.8)
+        self._stop_direct_movement()
+        return bool(ok)
+
     @skill
     def relative_move(self, forward: float = 0.0, left: float = 0.0, degrees: float = 0.0) -> str:
         """Move the robot relative to its current position.
 
+        This uses map-based planning. If it fails with no path found, that means the
+        planner could not find a route through the current costmap; it does not prove
+        there is a physical obstacle directly in front of the robot.
+
         The `degrees` arguments refers to the rotation the robot should be at the end, relative to its current rotation.
 
         Example calls:
@@ -250,7 +372,11 @@ def relative_move(self, forward: float = 0.0, left: float = 0.0, degrees: float
         time.sleep(1.0)
 
         if not self._navigation.is_goal_reached():
-            return "Navigation was cancelled or failed"
+            return (
+                "Map-based navigation was cancelled or failed. This does not prove there is "
+                "a physical obstacle; the costmap may be stale or too conservative. Use "
+                "`observe` to inspect the path, then use direct `move` only if the path is clear."
+            )
         else:
             return "Navigation goal reached"
 
diff --git a/dimos/simulation/mujoco/model.py b/dimos/simulation/mujoco/model.py
index bc309b7307..cf8cf2e392 100644
--- a/dimos/simulation/mujoco/model.py
+++ b/dimos/simulation/mujoco/model.py
@@ -16,11 +16,12 @@
 
 
 from pathlib import Path
+import shutil
+import subprocess
 import xml.etree.ElementTree as ET
 
 from etils import epath
 import mujoco
-from mujoco_playground._src import mjx_env
 import numpy as np
 
 from dimos.core.global_config import GlobalConfig
@@ -30,27 +31,83 @@
 from dimos.simulation.mujoco.policy import G1OnnxController, Go1OnnxController, OnnxController
 from dimos.utils.data import get_data
 
+_MENAGERIE_COMMIT_SHA = "1b86ece576591213e2b666ebf59508454200ca97"
+
 
 def _get_data_dir() -> epath.Path:
     return epath.Path(str(get_data("mujoco_sim")))
 
 
+def _get_menagerie_path() -> epath.Path:
+    return _get_data_dir() / "mujoco_menagerie"
+
+
+def _update_assets(
+    assets: dict[str, bytes],
+    path: str | epath.Path,
+    glob: str = "*",
+    recursive: bool = False,
+) -> None:
+    for f in epath.Path(path).glob(glob):
+        if f.is_file():
+            assets[f.name] = f.read_bytes()
+        elif f.is_dir() and recursive:
+            _update_assets(assets, f, glob, recursive)
+
+
+def ensure_menagerie_exists() -> None:
+    menagerie_path = Path(str(_get_menagerie_path()))
+    if (menagerie_path / "unitree_go1" / "assets").exists() and (
+        menagerie_path / "unitree_g1" / "assets"
+    ).exists():
+        return
+
+    tmp_path = menagerie_path.with_name(f"{menagerie_path.name}.tmp")
+    if tmp_path.exists():
+        shutil.rmtree(tmp_path)
+
+    subprocess.run(
+        [
+            "git",
+            "clone",
+            "--filter=blob:none",
+            "--sparse",
+            "https://github.com/google-deepmind/mujoco_menagerie.git",
+            str(tmp_path),
+        ],
+        check=True,
+    )
+    subprocess.run(
+        ["git", "-C", str(tmp_path), "sparse-checkout", "set", "unitree_go1", "unitree_g1"],
+        check=True,
+    )
+    subprocess.run(
+        ["git", "-C", str(tmp_path), "checkout", _MENAGERIE_COMMIT_SHA],
+        check=True,
+    )
+
+    if menagerie_path.exists():
+        shutil.rmtree(menagerie_path)
+    shutil.move(str(tmp_path), str(menagerie_path))
+
+
 def get_assets() -> dict[str, bytes]:
     data_dir = _get_data_dir()
+    menagerie_path = _get_menagerie_path()
     assets: dict[str, bytes] = {}
 
     # Assets used from https://sketchfab.com/3d-models/mersus-office-8714be387bcd406898b2615f7dae3a47
     # Created by Ryan Cassidy and Coleman Costello
-    mjx_env.update_assets(assets, data_dir, "*.xml")
-    mjx_env.update_assets(assets, data_dir / "scene_office1/textures", "*.png")
-    mjx_env.update_assets(assets, data_dir / "scene_office1/office_split", "*.obj")
-    mjx_env.update_assets(assets, mjx_env.MENAGERIE_PATH / "unitree_go1" / "assets")
-    mjx_env.update_assets(assets, mjx_env.MENAGERIE_PATH / "unitree_g1" / "assets")
+    _update_assets(assets, data_dir, "*.xml")
+    _update_assets(assets, data_dir / "scene_office1/textures", "*.png")
+    _update_assets(assets, data_dir / "scene_office1/office_split", "*.obj")
+    _update_assets(assets, menagerie_path / "unitree_go1" / "assets")
+    _update_assets(assets, menagerie_path / "unitree_g1" / "assets")
 
     # From: https://sketchfab.com/3d-models/jeong-seun-34-42956ca979404a038b8e0d3e496160fd
     person_dir = epath.Path(str(get_data("person")))
-    mjx_env.update_assets(assets, person_dir, "*.obj")
-    mjx_env.update_assets(assets, person_dir, "*.png")
+    _update_assets(assets, person_dir, "*.obj")
+    _update_assets(assets, person_dir, "*.png")
 
     return assets
 
diff --git a/dimos/web/dimos_interface/api/server.py b/dimos/web/dimos_interface/api/server.py
index b73a1e5fdb..e6e402064d 100644
--- a/dimos/web/dimos_interface/api/server.py
+++ b/dimos/web/dimos_interface/api/server.py
@@ -29,14 +29,16 @@
 
 # For audio processing
 import io
+import os
 from pathlib import Path
 from queue import Empty, Queue
+import secrets
 import subprocess
 from threading import Lock
 import time
 
 import cv2
-from fastapi import FastAPI, File, Form, HTTPException, Request, UploadFile
+from fastapi import Depends, FastAPI, File, Form, Header, HTTPException, Query, Request, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
 from fastapi.templating import Jinja2Templates
@@ -240,6 +242,22 @@ def _decode_audio(raw: bytes) -> tuple[np.ndarray, int]:
     def setup_routes(self) -> None:
         """Set up FastAPI routes."""
 
+        expected_token = os.environ.get("DIMOS_API_TOKEN", "")
+
+        def require_token(
+            authorization: str | None = Header(default=None),
+            token: str | None = Query(default=None),
+        ) -> None:
+            if not expected_token:
+                return  # auth disabled when no token configured (local dev)
+            supplied = ""
+            if authorization and authorization.startswith("Bearer "):
+                supplied = authorization[len("Bearer ") :]
+            elif token:
+                supplied = token
+            if not secrets.compare_digest(supplied, expected_token):
+                raise HTTPException(status_code=401, detail="invalid token")
+
         @self.app.get("/streams")
         async def get_streams():  # type: ignore[no-untyped-def]
             """Get list of available video streams"""
@@ -266,7 +284,7 @@ async def index(request: Request):  # type: ignore[no-untyped-def]
             )
 
         @self.app.post("/submit_query")
-        async def submit_query(query: str = Form(...)):  # type: ignore[no-untyped-def]
+        async def submit_query(query: str = Form(...), _: None = Depends(require_token)):  # type: ignore[no-untyped-def]
             # Using Form directly as a dependency ensures proper form handling
             try:
                 if query:
@@ -282,7 +300,7 @@ async def submit_query(query: str = Form(...)):  # type: ignore[no-untyped-def]
                 )
 
         @self.app.post("/upload_audio")
-        async def upload_audio(file: UploadFile = File(...)):  # type: ignore[no-untyped-def]
+        async def upload_audio(file: UploadFile = File(...), _: None = Depends(require_token)):  # type: ignore[no-untyped-def]
             """Handle audio upload from the browser."""
             if self.audio_subject is None:
                 return JSONResponse(
@@ -321,7 +339,7 @@ async def unitree_status():  # type: ignore[no-untyped-def]
             return JSONResponse({"status": "online", "service": "unitree"})
 
         @self.app.post("/unitree/command")
-        async def unitree_command(request: Request):  # type: ignore[no-untyped-def]
+        async def unitree_command(request: Request, _: None = Depends(require_token)):  # type: ignore[no-untyped-def]
             """Process commands sent from the terminal frontend"""
             try:
                 data = await request.json()
@@ -345,7 +363,7 @@ async def unitree_command(request: Request):  # type: ignore[no-untyped-def]
                 )
 
         @self.app.get("/text_stream/{key}")
-        async def text_stream(key: str):  # type: ignore[no-untyped-def]
+        async def text_stream(key: str, _: None = Depends(require_token)):  # type: ignore[no-untyped-def]
             if key not in self.text_streams:
                 raise HTTPException(status_code=404, detail=f"Text stream '{key}' not found")
             return EventSourceResponse(self.text_stream_generator(key))  # type: ignore[no-untyped-call]
diff --git a/dimos/web/dimos_interface/public/icon.png b/dimos/web/dimos_interface/public/icon.png
deleted file mode 100644
index 4b0b2f153a..0000000000
--- a/dimos/web/dimos_interface/public/icon.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:92d072d6c735eb45c8c8004b99d825c39bd0d21b62d8b2115ef18a55b8a16cab
-size 2147
diff --git a/docs/assets/dimensional-logo-master-transparent.png b/docs/assets/dimensional-logo-master-transparent.png
deleted file mode 100644
index fd42835ede..0000000000
--- a/docs/assets/dimensional-logo-master-transparent.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8e3aa356739d10d839093e32bf7f78cad274e283ecb667a4e670256a7448aaf3
-size 18536
diff --git a/docs/assets/favicon.png b/docs/assets/favicon.png
deleted file mode 100644
index db864bd8da..0000000000
--- a/docs/assets/favicon.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:951874cdcd06bca6d23f3c1976ebf16e6d0681112c73782d93e222c7049edb22
-size 15578
diff --git a/docs/capabilities/mapping/assets/reloc_and_nav_to.webp b/docs/capabilities/mapping/assets/reloc_and_nav_to.webp
deleted file mode 100644
index 65f9d43f76..0000000000
--- a/docs/capabilities/mapping/assets/reloc_and_nav_to.webp
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d1536e49f730e1521b579a0ca8f8ab0f73dac6544f54eb2eb671ec440b86e29
-size 1014488
diff --git a/docs/capabilities/memory/assets/all_images.png b/docs/capabilities/memory/assets/all_images.png
deleted file mode 100644
index 721fea793b..0000000000
--- a/docs/capabilities/memory/assets/all_images.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a873f01d1d9cf853cbadff9f63c49310817c126ca2249c165f857084227058b7
-size 7355457
diff --git a/docs/capabilities/memory/assets/brightness.svg b/docs/capabilities/memory/assets/brightness.svg
deleted file mode 100644
index 0523c3839a..0000000000
--- a/docs/capabilities/memory/assets/brightness.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:566d6c7009d0e3d2e8a6da49bdaa4e865c44a5b6e3dc10082bf1ac200bb9681b
-size 333223
diff --git a/docs/capabilities/memory/assets/color_image.svg b/docs/capabilities/memory/assets/color_image.svg
deleted file mode 100644
index 6f62941d85..0000000000
--- a/docs/capabilities/memory/assets/color_image.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5e605e92f886497e6fc18c050799a3757566033a5f6d86424ee773f71d4ecd86
-size 828448
diff --git a/docs/capabilities/memory/assets/embedding.svg b/docs/capabilities/memory/assets/embedding.svg
deleted file mode 100644
index 63236672e1..0000000000
--- a/docs/capabilities/memory/assets/embedding.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:220331fd87d2b21b5c7bce6da84f427c97bd6637ed6509ec6a288e4764c2b013
-size 210887
diff --git a/docs/capabilities/memory/assets/embedding_focused.svg b/docs/capabilities/memory/assets/embedding_focused.svg
deleted file mode 100644
index ea4927c727..0000000000
--- a/docs/capabilities/memory/assets/embedding_focused.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:15ebaad56a73c5ebbfc8d78b5715099187fc245d96e81852288f3c16334a407e
-size 34641
diff --git a/docs/capabilities/memory/assets/grid.png b/docs/capabilities/memory/assets/grid.png
deleted file mode 100644
index 27d7dd939f..0000000000
--- a/docs/capabilities/memory/assets/grid.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fa3a8d7ab0b11f50b08377a005ec9797f35283ac8fb53f5d0dde88cda23aeb62
-size 1858645
diff --git a/docs/capabilities/memory/assets/peak_detections.svg b/docs/capabilities/memory/assets/peak_detections.svg
deleted file mode 100644
index 255d4fc70a..0000000000
--- a/docs/capabilities/memory/assets/peak_detections.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:522ec0640c685f5637743ee4a4e9b6691be7838d6a1d40d9295e4ffde8c961f1
-size 35245
diff --git a/docs/capabilities/memory/assets/peak_space.svg b/docs/capabilities/memory/assets/peak_space.svg
deleted file mode 100644
index 7a6e729166..0000000000
--- a/docs/capabilities/memory/assets/peak_space.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9b7e8ea30b99cf2a86da4b9c25bba67de9f7d2e23e854ea347af4625b63f2231
-size 33190
diff --git a/docs/capabilities/memory/assets/plants.png b/docs/capabilities/memory/assets/plants.png
deleted file mode 100644
index 4edff3ffd8..0000000000
--- a/docs/capabilities/memory/assets/plants.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4732b965e06e47aa9b1f8e8cfdca4aa859832442719f46c9a7d77b5e202dcfc4
-size 173223
diff --git a/docs/capabilities/memory/assets/plants_auto.png b/docs/capabilities/memory/assets/plants_auto.png
deleted file mode 100644
index 8647a3458f..0000000000
--- a/docs/capabilities/memory/assets/plants_auto.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:744d9661017781864661621af6aab25625408b602b84a5eec228ada445587d59
-size 1011358
diff --git a/docs/capabilities/memory/assets/plants_meaningful.png b/docs/capabilities/memory/assets/plants_meaningful.png
deleted file mode 100644
index 77b864981d..0000000000
--- a/docs/capabilities/memory/assets/plants_meaningful.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:02a9b707e75f17b9557f004f77a578d2a3010f6402d609dcfd5985da9082e490
-size 132383
diff --git a/docs/capabilities/memory/assets/plants_peak_detections.png b/docs/capabilities/memory/assets/plants_peak_detections.png
deleted file mode 100644
index 06193bccfd..0000000000
--- a/docs/capabilities/memory/assets/plants_peak_detections.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc9fcf23e5b72e4d0005e54487d52860460662c82a08a9d9582286349016d98c
-size 1564191
diff --git a/docs/capabilities/memory/assets/plot_brightness_algo.svg b/docs/capabilities/memory/assets/plot_brightness_algo.svg
deleted file mode 100644
index 11942261a1..0000000000
--- a/docs/capabilities/memory/assets/plot_brightness_algo.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c738e3425aded30ed0c0af864837d6a6a526c4abf9655e426b9cc96eeed10520
-size 80187
diff --git a/docs/capabilities/memory/assets/plot_brightness_algo_delta.svg b/docs/capabilities/memory/assets/plot_brightness_algo_delta.svg
deleted file mode 100644
index f6719d0beb..0000000000
--- a/docs/capabilities/memory/assets/plot_brightness_algo_delta.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5032a62e9891e849a568389b1be253c895ee399fec3f6010e0339bfd6faee5e5
-size 35161
diff --git a/docs/capabilities/memory/assets/plot_colors.svg b/docs/capabilities/memory/assets/plot_colors.svg
deleted file mode 100644
index fc194cc8ca..0000000000
--- a/docs/capabilities/memory/assets/plot_colors.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:802a0e541d26b370cb5eacec25461d1c708330ceff973f54ed4330cba6441db9
-size 76264
diff --git a/docs/capabilities/memory/assets/plot_named.svg b/docs/capabilities/memory/assets/plot_named.svg
deleted file mode 100644
index 00986f6d33..0000000000
--- a/docs/capabilities/memory/assets/plot_named.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1d48cf86d1faf94abed5aa5271b9a42feb099960f5a71ce5bcd0f5a5e4b68471
-size 31180
diff --git a/docs/capabilities/memory/assets/plot_plantness.svg b/docs/capabilities/memory/assets/plot_plantness.svg
deleted file mode 100644
index c3d3b529a9..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:058b8f99cb88b11fbd77fbbaa36c104a78ec6868b983f2499cda9f13e83b13f8
-size 28561
diff --git a/docs/capabilities/memory/assets/plot_plantness_autopeaks.svg b/docs/capabilities/memory/assets/plot_plantness_autopeaks.svg
deleted file mode 100644
index a33a048c70..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness_autopeaks.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2502a08c864b23ba933adc4cd06a721f4d0aaa0631e6b73d346c7fa5d7ad5965
-size 32085
diff --git a/docs/capabilities/memory/assets/plot_plantness_autopeaks2.svg b/docs/capabilities/memory/assets/plot_plantness_autopeaks2.svg
deleted file mode 100644
index 811f432047..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness_autopeaks2.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:660595c2d7bf72df91c62a22f4f7a9ff42274debbf21a2d62047d389ede5b4e5
-size 35079
diff --git a/docs/capabilities/memory/assets/plot_plantness_autopeaks_map.svg b/docs/capabilities/memory/assets/plot_plantness_autopeaks_map.svg
deleted file mode 100644
index a0a5552381..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness_autopeaks_map.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c8b6e17ad03a5202097b1d1eacbc24ecaa2ee5d1ff39f1b5404d8cb6d09ef584
-size 66566
diff --git a/docs/capabilities/memory/assets/plot_plantness_brightness.svg b/docs/capabilities/memory/assets/plot_plantness_brightness.svg
deleted file mode 100644
index 95525661f8..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness_brightness.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:19bf4a9034729e87056d17d3c8afb34e3e3ee57a27829b4157f3547a7e56995e
-size 48452
diff --git a/docs/capabilities/memory/assets/plot_plantness_gap_fill.svg b/docs/capabilities/memory/assets/plot_plantness_gap_fill.svg
deleted file mode 100644
index 119647b9db..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness_gap_fill.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:70898fb90c688b522cbb4cfc4288e024656ecdf4392b1876acf3afb0da92b968
-size 29091
diff --git a/docs/capabilities/memory/assets/plot_plantness_marked.svg b/docs/capabilities/memory/assets/plot_plantness_marked.svg
deleted file mode 100644
index 4df7057395..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness_marked.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c09717f67461b9b1424a68244f20a7c954368a048370c146b4038a4908dccf31
-size 29818
diff --git a/docs/capabilities/memory/assets/plot_plantness_significant.svg b/docs/capabilities/memory/assets/plot_plantness_significant.svg
deleted file mode 100644
index d298fbbd92..0000000000
--- a/docs/capabilities/memory/assets/plot_plantness_significant.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5039f3ae678ef9c5986d1e93d54b12cc1d268278b2bd8037e02c11ea7f7848ce
-size 29491
diff --git a/docs/capabilities/memory/assets/plot_robot_data.svg b/docs/capabilities/memory/assets/plot_robot_data.svg
deleted file mode 100644
index ff8b3d33e1..0000000000
--- a/docs/capabilities/memory/assets/plot_robot_data.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d95d619243b3636c4c8a92aaa39d648b68b967f1e2fc5d487db385cf6f3151c0
-size 109884
diff --git a/docs/capabilities/memory/assets/speed.svg b/docs/capabilities/memory/assets/speed.svg
deleted file mode 100644
index 814d65d1be..0000000000
--- a/docs/capabilities/memory/assets/speed.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:68634cf9f8e888c94e335a9044cb827abb55f4a6e65c4e2315a1e0a037c43cee
-size 828283
diff --git a/docs/capabilities/navigation/native/assets/1-lidar.png b/docs/capabilities/navigation/native/assets/1-lidar.png
deleted file mode 100644
index 6584ee90cb..0000000000
--- a/docs/capabilities/navigation/native/assets/1-lidar.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d76742ada18d20dc0e3a3be04159d3412e7df6acee8596ff37916f0f269d3e0
-size 597386
diff --git a/docs/capabilities/navigation/native/assets/2-globalmap.png b/docs/capabilities/navigation/native/assets/2-globalmap.png
deleted file mode 100644
index 55541a8fcb..0000000000
--- a/docs/capabilities/navigation/native/assets/2-globalmap.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bc2f27ec2dcc4048acde6b53229c7596b3a7f6ed6afad30c4cd062cf5751bd24
-size 1104485
diff --git a/docs/capabilities/navigation/native/assets/3-globalcostmap.png b/docs/capabilities/navigation/native/assets/3-globalcostmap.png
deleted file mode 100644
index 907d0b0448..0000000000
--- a/docs/capabilities/navigation/native/assets/3-globalcostmap.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d1f9e6c142b220f1a4be7b08950f628a2d34e26caba8a1f5c100726bec6c88ef
-size 793366
diff --git a/docs/capabilities/navigation/native/assets/4-navcostmap.png b/docs/capabilities/navigation/native/assets/4-navcostmap.png
deleted file mode 100644
index 6c40bce0e0..0000000000
--- a/docs/capabilities/navigation/native/assets/4-navcostmap.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9ee4332e3d92162ddf41a0137c2ab5b6a885d758aa5a27037e413cdd4d946436
-size 741912
diff --git a/docs/capabilities/navigation/native/assets/5-all.png b/docs/capabilities/navigation/native/assets/5-all.png
deleted file mode 100644
index 655be72c1c..0000000000
--- a/docs/capabilities/navigation/native/assets/5-all.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1a777d315beac6f4773adcb5c27384fd983720083941b4f62060958ddf6c16d2
-size 1209867
diff --git a/docs/capabilities/navigation/native/assets/coverage.png b/docs/capabilities/navigation/native/assets/coverage.png
deleted file mode 100644
index 2ad2112071..0000000000
--- a/docs/capabilities/navigation/native/assets/coverage.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7c5ef9943e14c2d02fa2e19032ffeb2fc79f927c903e552e7c0db01b858f5297
-size 256502
diff --git a/docs/capabilities/navigation/native/assets/frontier.png b/docs/capabilities/navigation/native/assets/frontier.png
deleted file mode 100644
index 97089338f5..0000000000
--- a/docs/capabilities/navigation/native/assets/frontier.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1f2e35b3a6cc1e82667958f6bb3120a5f0bb5bba99f156df7283b774559168b5
-size 251903
diff --git a/docs/capabilities/navigation/native/assets/noros_nav.gif b/docs/capabilities/navigation/native/assets/noros_nav.gif
deleted file mode 100644
index ab47bb9cb5..0000000000
--- a/docs/capabilities/navigation/native/assets/noros_nav.gif
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:60f842cd2fda539338443b3c501197fbb875f5c5f3883ba3ffdd17005e9bd786
-size 612786
diff --git a/docs/capabilities/navigation/native/assets/patrol_path.png b/docs/capabilities/navigation/native/assets/patrol_path.png
deleted file mode 100644
index 4d53c29409..0000000000
--- a/docs/capabilities/navigation/native/assets/patrol_path.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0cecf773affedca3d14d781e956d20ec9b396df53b5473e41fb7a182d700bef2
-size 476239
diff --git a/docs/capabilities/navigation/native/assets/random.png b/docs/capabilities/navigation/native/assets/random.png
deleted file mode 100644
index b407034eb6..0000000000
--- a/docs/capabilities/navigation/native/assets/random.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:18ab48a549d02d1cd63c8c21b4294acc9c235e8c8f704e2c6ee71d0399ca4aa0
-size 260526
diff --git a/docs/capabilities/navigation/native/index.md b/docs/capabilities/navigation/native/index.md
index 43abbb568b..31166c3768 100644
--- a/docs/capabilities/navigation/native/index.md
+++ b/docs/capabilities/navigation/native/index.md
@@ -76,6 +76,7 @@ We don't have proper loop closure and stable odometry, we trust the data go2 odo
 ### 3. Global Costmap — [`CostMapper`](/dimos/mapping/costmapper.py)
 
 The [`CostMapper`](/dimos/mapping/costmapper.py) converts the 3D voxel map into a 2D occupancy grid. The default algorithm (`height_cost`) maps rate of change of Z, with some smoothing.
+It also publishes a terrain classmap so visualization and agentic navigation can distinguish flat ground, stairs, obstacles, and unknown space.
 
 algo settings are in [`occupancy.py`](/dimos/mapping/pointclouds/occupancy.py) and can be configured per robot
 
@@ -90,15 +91,26 @@ class HeightCostConfig(OccupancyConfig):
     can_climb: float = 0.15
     ignore_noise: float = 0.05
     smoothing: float = 1.0
+    enable_stair_classification: bool = True
+    stair_min_rise: float = 0.08
+    stair_max_rise: float = 0.25
+    stair_max_cost: int = 70
 ```
 
-| Cost | Meaning                                                  |
+| Cost | Meaning in `global_costmap`                              |
 |------|----------------------------------------------------------|
 | 0    | Flat, easy to traverse                                   |
 | 50   | Moderate slope (~7.5cm rise per cell in case of go2)     |
 | 100  | Steep or impassable (≥15cm rise per cell in case of go2) |
 | -1   | Unknown (no observations)                                |
 
+| Class | Meaning in `terrain_classmap` |
+|-------|-------------------------------|
+| 0     | Flat/free terrain             |
+| 50    | Stairs/traversable steps      |
+| 100   | Obstacle/lethal terrain       |
+| -1    | Unknown                       |
+
 ![Global costmap](assets/3-globalcostmap.png)
 
 ### 4. Navigation Costmap — [`ReplanningAStarPlanner`](/dimos/navigation/replanning_a_star/module.py)
diff --git a/docs/goldie-architecture.png b/docs/goldie-architecture.png
new file mode 100644
index 0000000000..ca84a23bdf
--- /dev/null
+++ b/docs/goldie-architecture.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0742fa7bad54a9e66fe386f551c24bc17643ff70f4bb021cb3bbc5d7518bfbf2
+size 326752
diff --git a/docs/platforms/quadruped/go2/index.md b/docs/platforms/quadruped/go2/index.md
index 9d99ca3732..c34dcaba2b 100644
--- a/docs/platforms/quadruped/go2/index.md
+++ b/docs/platforms/quadruped/go2/index.md
@@ -76,7 +76,7 @@ export ROBOT_IP=<discovered_ip>
 ping $ROBOT_IP
 ```
 
-2. Built-in obstacle avoidance is on. (DimOS handles path planning, but the onboard obstacle avoidance provides an extra safety layer around tight spots)
+2. For basic teleop, built-in obstacle avoidance can stay on. For the full `unitree-go2` navigation stack, DimOS disables onboard obstacle avoidance so its own planner can handle traversable terrain like stair risers instead of having the robot stop early.
 
 ### Ready to run DimOS
 
@@ -93,7 +93,7 @@ That's it. DimOS connects via WebRTC (no jailbreak required), starts the full na
 |--------|-------------|
 | **GO2Connection** | WebRTC connection to the robot — streams LiDAR, video, odometry |
 | **VoxelGridMapper** | Builds a 3D voxel map using column-carving (CUDA accelerated) |
-| **CostMapper** | Converts 3D map → 2D costmap via terrain slope analysis |
+| **CostMapper** | Converts 3D map → 2D costmap and terrain classmap, including traversable stair cells |
 | **ReplanningAStarPlanner** | Continuous A* path planning with dynamic replanning |
 | **WavefrontFrontierExplorer** | Autonomous exploration of unmapped areas |
 | **RerunBridge** | 3D visualization in browser |
diff --git a/docs/screenshots/manual.png b/docs/screenshots/manual.png
new file mode 100644
index 0000000000..2fbeadda27
--- /dev/null
+++ b/docs/screenshots/manual.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32e09a072797aca3100e948d6dd2f8399a707533b8adce62b78560a59e69c54b
+size 225674
diff --git a/docs/screenshots/splash.png b/docs/screenshots/splash.png
new file mode 100644
index 0000000000..cefe8d7277
--- /dev/null
+++ b/docs/screenshots/splash.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2de96052293df35a179820cf320b13caadf709dcec3dfbc53ae71e6b6cbba006
+size 50068
diff --git a/docs/screenshots/voice.png b/docs/screenshots/voice.png
new file mode 100644
index 0000000000..6e7f2890ba
--- /dev/null
+++ b/docs/screenshots/voice.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0db941d1cb6da26d6a1ba762af6054d37ee69cd9b8cbba7c7343791646889ee
+size 201698
diff --git a/docs/usage/assets/lcmspy.png b/docs/usage/assets/lcmspy.png
deleted file mode 100644
index 6e68fde03a..0000000000
--- a/docs/usage/assets/lcmspy.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:91da9ef9f7797cce332da448739e28591f7ecfc0fd674e8b4be973cf28331438
-size 7118
diff --git a/docs/usage/assets/pubsub_benchmark.png b/docs/usage/assets/pubsub_benchmark.png
deleted file mode 100644
index 759a8b3977..0000000000
--- a/docs/usage/assets/pubsub_benchmark.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:728484a4358df18ced7b5763a88a962701c2b02b5d319eb9a8b28c6c72d009fe
-size 23946
diff --git a/docs/usage/assets/transforms.png b/docs/usage/assets/transforms.png
deleted file mode 100644
index 49dba4ab9a..0000000000
--- a/docs/usage/assets/transforms.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6597e0008197902e321a3ad3dfb1e838f860fa7ca1277c369ed6ff7da8bf757d
-size 101102
diff --git a/docs/usage/data_streams/assets/alignment_timeline.png b/docs/usage/data_streams/assets/alignment_timeline.png
deleted file mode 100644
index 235ddd7be0..0000000000
--- a/docs/usage/data_streams/assets/alignment_timeline.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cfea5a6aac40182b25decb9ddaeb387ed97a7708e2c51a48f47453c8df7adf57
-size 16136
diff --git a/docs/usage/data_streams/assets/alignment_timeline2.png b/docs/usage/data_streams/assets/alignment_timeline2.png
deleted file mode 100644
index 2bf8ec5eef..0000000000
--- a/docs/usage/data_streams/assets/alignment_timeline2.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:22b64923637d05f8f40c9f7c0f0597ee894dc4f31a0f10674aeb809101b54765
-size 23471
diff --git a/docs/usage/data_streams/assets/alignment_timeline3.png b/docs/usage/data_streams/assets/alignment_timeline3.png
deleted file mode 100644
index 61ddc3b54b..0000000000
--- a/docs/usage/data_streams/assets/alignment_timeline3.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b8e9589dcd5308f511a2ec7d41bd36978204ccfe1441907bd139029b0489d605
-size 9969
diff --git a/docs/usage/data_streams/assets/frame_mosaic.jpg b/docs/usage/data_streams/assets/frame_mosaic.jpg
deleted file mode 100644
index 5c3fbf8350..0000000000
--- a/docs/usage/data_streams/assets/frame_mosaic.jpg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e83934e1179651fbca6c9b62cceb7425d1b2f0e8da18a63d4d95bcb4e6ac33ca
-size 88206
diff --git a/docs/usage/data_streams/assets/frame_mosaic2.jpg b/docs/usage/data_streams/assets/frame_mosaic2.jpg
deleted file mode 100644
index 5e3032acf2..0000000000
--- a/docs/usage/data_streams/assets/frame_mosaic2.jpg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d73f683e92fda39bac9d1bb840f1fc375c821b4099714829e81f3e739f4d602
-size 91036
diff --git a/docs/usage/sensor_streams/assets/alignment_timeline.png b/docs/usage/sensor_streams/assets/alignment_timeline.png
deleted file mode 100644
index 235ddd7be0..0000000000
--- a/docs/usage/sensor_streams/assets/alignment_timeline.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cfea5a6aac40182b25decb9ddaeb387ed97a7708e2c51a48f47453c8df7adf57
-size 16136
diff --git a/docs/usage/sensor_streams/assets/alignment_timeline2.png b/docs/usage/sensor_streams/assets/alignment_timeline2.png
deleted file mode 100644
index 2bf8ec5eef..0000000000
--- a/docs/usage/sensor_streams/assets/alignment_timeline2.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:22b64923637d05f8f40c9f7c0f0597ee894dc4f31a0f10674aeb809101b54765
-size 23471
diff --git a/docs/usage/sensor_streams/assets/alignment_timeline3.png b/docs/usage/sensor_streams/assets/alignment_timeline3.png
deleted file mode 100644
index 61ddc3b54b..0000000000
--- a/docs/usage/sensor_streams/assets/alignment_timeline3.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b8e9589dcd5308f511a2ec7d41bd36978204ccfe1441907bd139029b0489d605
-size 9969
diff --git a/docs/usage/sensor_streams/assets/frame_mosaic.jpg b/docs/usage/sensor_streams/assets/frame_mosaic.jpg
deleted file mode 100644
index 5c3fbf8350..0000000000
--- a/docs/usage/sensor_streams/assets/frame_mosaic.jpg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e83934e1179651fbca6c9b62cceb7425d1b2f0e8da18a63d4d95bcb4e6ac33ca
-size 88206
diff --git a/docs/usage/sensor_streams/assets/frame_mosaic2.jpg b/docs/usage/sensor_streams/assets/frame_mosaic2.jpg
deleted file mode 100644
index 5e3032acf2..0000000000
--- a/docs/usage/sensor_streams/assets/frame_mosaic2.jpg
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d73f683e92fda39bac9d1bb840f1fc375c821b4099714829e81f3e739f4d602
-size 91036
diff --git a/examples/language-interop/assets/lcmspy.png b/examples/language-interop/assets/lcmspy.png
deleted file mode 100644
index dc6a824f69..0000000000
--- a/examples/language-interop/assets/lcmspy.png
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:87d3af9d9105d048c3e55faff52981c15cc1bfd8168c58a3d8c8f603aa8b7769
-size 5110
diff --git a/ops/Caddyfile b/ops/Caddyfile
new file mode 100644
index 0000000000..1eea73a220
--- /dev/null
+++ b/ops/Caddyfile
@@ -0,0 +1,22 @@
+# Replace dog.yourname.dev with your actual hostname.
+# Replace https://your-app.vercel.app with your Vercel deployment URL.
+dog.yourname.dev {
+    @options method OPTIONS
+    handle @options {
+        header Access-Control-Allow-Origin "https://your-app.vercel.app"
+        header Access-Control-Allow-Headers "Authorization, Content-Type"
+        header Access-Control-Allow-Methods "GET, POST, OPTIONS"
+        header Access-Control-Max-Age "86400"
+        respond 204
+    }
+
+    header Access-Control-Allow-Origin "https://your-app.vercel.app"
+    header Access-Control-Allow-Credentials "true"
+
+    reverse_proxy localhost:5555 {
+        flush_interval -1
+        transport http {
+            read_timeout 0
+        }
+    }
+}
diff --git a/ops/dimos.env.example b/ops/dimos.env.example
new file mode 100644
index 0000000000..44d7395a3a
--- /dev/null
+++ b/ops/dimos.env.example
@@ -0,0 +1,14 @@
+# WebRTC mode for cloud deployment behind Unitree's signaling/TURN.
+GO2_CONNECTION_METHOD=Remote
+GO2_SERIAL_NUMBER=
+UNITREE_USERNAME=
+UNITREE_PASSWORD=
+
+# Bearer token shared with the Vercel webapp (any random hex string).
+# Generate: openssl rand -hex 16
+DIMOS_API_TOKEN=
+
+OPENAI_API_KEY=
+
+HOST=0.0.0.0
+PORT=5555
diff --git a/ops/dimos.service b/ops/dimos.service
new file mode 100644
index 0000000000..f6abb5d5fc
--- /dev/null
+++ b/ops/dimos.service
@@ -0,0 +1,17 @@
+[Unit]
+Description=Dimos guide-robot agent
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+Type=simple
+WorkingDirectory=/opt/dimos
+EnvironmentFile=/opt/dimos/.env
+ExecStart=/opt/dimos/.venv/bin/dimos run unitree-go2-agentic
+Restart=on-failure
+RestartSec=5
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target
diff --git a/webapp/.env.example b/webapp/.env.example
new file mode 100644
index 0000000000..05f10fde26
--- /dev/null
+++ b/webapp/.env.example
@@ -0,0 +1,29 @@
+# Copy to .env.local. NEXT_PUBLIC_* vars are read in the browser; the rest are
+# server-only (used by Next API routes) and never reach the client.
+
+# Voice/agent API + SSE (DimOS port 5555). Leave unset to use the built-in mock.
+#   Local DimOS:   http://localhost:5555
+#   Via ngrok:     https://api-xxxx.ngrok-free.app
+# NEXT_PUBLIC_DIMOS_API=http://localhost:5555
+
+# Visualization/teleop server for the joystick (Socket.IO, DimOS port 7779).
+# Used by the manual-mode joystick (Integration Brick B).
+#   Local DimOS:   http://localhost:7779
+#   Via ngrok:     https://vis-xxxx.ngrok-free.app
+# NEXT_PUBLIC_DIMOS_VIS=http://localhost:7779
+
+# Speech-to-text provider: "webspeech" (default, on-device) or "upload"
+# (MediaRecorder -> /upload_audio; needs the 5555 backend reachable).
+# NEXT_PUBLIC_STT=webspeech
+
+# Bearer token (production only; empty in dev).
+# NEXT_PUBLIC_DIMOS_TOKEN=
+
+# OpenAI text-to-speech: agent replies are spoken on the phone via gpt-4o-mini-tts
+# through the /api/tts route. Server-only — do NOT prefix with NEXT_PUBLIC_ (that
+# would leak the key to the browser). Required for spoken responses.
+# OPENAI_API_KEY=sk-...
+# Optional overrides:
+# OPENAI_TTS_VOICE=coral       # coral, nova, shimmer, sage, alloy, ash, ballad, echo, onyx, ...
+# OPENAI_TTS_MODEL=gpt-4o-mini-tts
+# OPENAI_TTS_INSTRUCTIONS="Speak in a warm, friendly, upbeat female voice at a brisk pace."
diff --git a/webapp/.gitignore b/webapp/.gitignore
new file mode 100644
index 0000000000..1de9c210f6
--- /dev/null
+++ b/webapp/.gitignore
@@ -0,0 +1,45 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files (can opt-in for committing if needed)
+.env*
+!.env.example
+
+# brainstorming scratch (visual companion mockups)
+.superpowers/
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
diff --git a/webapp/AGENTS.md b/webapp/AGENTS.md
new file mode 100644
index 0000000000..83ab9583be
--- /dev/null
+++ b/webapp/AGENTS.md
@@ -0,0 +1,121 @@
+# Goldie Webapp — briefing
+
+The phone-facing control app for the Unitree Go2, **built for low-vision users**:
+hold-to-speak voice commands, a manual joystick, live agent responses, and
+**on-device text-to-speech** of what the robot says. It is the only piece in
+`/webapp`; it talks to a DimOS backend over HTTP + SSE (and Socket.IO for the
+joystick). It never touches the robot directly.
+
+> This replaced an earlier App-Router stub. That stub is preserved verbatim in
+> [`SCAFFOLD-REFERENCE.md`](./SCAFFOLD-REFERENCE.md) because it was wired to the
+> monorepo backend's contract — see **Backend contracts** below.
+
+## Quick start
+
+```bash
+cd webapp
+npm install
+cp .env.example .env.local   # then edit (see Environment)
+npm run dev                  # http://localhost:3000
+```
+
+With **no** `.env.local`, it runs fully against a built-in **mock** (`/api/mock`)
+— so the UI works with no backend at all. For a real iPhone, expose `:3000`
+through a tunnel (ngrok/Tailscale) and open it in Safari.
+
+## Stack
+
+Next.js 16 (**Pages Router**, `src/pages`), React 19, TypeScript (strict),
+**Tailwind v4** (CSS-first `@theme` in `src/styles/globals.css`), Vitest.
+Self-contained — it does not depend on the rest of the monorepo at build time.
+
+## Structure
+
+```
+src/
+  pages/
+    index.tsx              # the one screen: orchestrates everything
+    _app.tsx, _document.tsx# viewport + iOS/PWA meta
+    api/mock/[...path].ts  # mock backend (SSE + endpoints) for offline dev
+    api/log.ts             # dev: prints speech/tts events to the `npm run dev` terminal
+  components/              # Header, ModeToggle, VoiceButton, StatusCard,
+                           # QuickActions, InterruptButton, Joystick, Voice/ManualPanel
+  hooks/                   # useStt, useAgentFeed, useStatus, useTeleop
+  lib/                     # dimos.ts (API client), stt.ts, speech.ts (TTS),
+                           # agentMessage.ts, joystick.ts, types.ts
+public/                    # manifest.json + icons (Add-to-Home-Screen / PWA)
+```
+
+## Backend contracts ⚠️ (read before integrating)
+
+There are **two** DimOS contracts in play. Goldie currently targets the first;
+the monorepo backend uses the second.
+
+| | Goldie targets now | Monorepo backend (Tailscale :8443) |
+|---|---|---|
+| SSE stream key | `agent_responses` (plain text) | `agent_state` (structured JSON) |
+| Auth | `ngrok-skip-browser-warning` header | `Authorization: Bearer` + `?token=` on SSE |
+| State shape | text lines | `{ phase, current_skill, last_observation, last_narration, awaiting_user, intent }` |
+| Input tags | `<user_speech>` | `<user_speech>` / `<user_reply>` (when `awaiting_user`) / `<user_command>` |
+
+**Common to both:** `POST /submit_query` as **multipart form-data**, field
+`query` (never JSON). Voice transcripts are wrapped in `<user_speech>…</user_speech>`.
+
+**To run Goldie against the monorepo backend** (see `SCAFFOLD-REFERENCE.md` for
+the working example): in `src/lib/dimos.ts` + `src/hooks/useAgentFeed.ts`, switch
+the stream key to `agent_state`, send the token (Bearer + `?token=`), map
+`last_narration`/`awaiting_user` into the feed, add the `awaiting_user →
+<user_reply>` flow, and speak `last_narration`/`awaiting_user` via TTS
+(`src/lib/speech.ts`). The mock currently emulates the `agent_responses` shape.
+
+## Environment
+
+| Var | Purpose |
+|---|---|
+| `NEXT_PUBLIC_DIMOS_API` | Voice/agent API + SSE base. Unset → built-in mock. |
+| `NEXT_PUBLIC_DIMOS_VIS` | Socket.IO vis/teleop server (joystick, `move_command`). |
+| `NEXT_PUBLIC_DIMOS_TOKEN` | Bearer token (monorepo backend). |
+| `NEXT_PUBLIC_STT` | `webspeech` (default, on-device) or `upload` (MediaRecorder → `/upload_audio`). |
+
+## Features
+
+- **Voice mode** — hold-to-speak. STT is the on-device Web Speech API by default
+  (`lib/stt.ts`, behind a swappable provider; an `upload` provider posts
+  `audio/mp4` to `/upload_audio` for server-side Whisper).
+- **Manual mode** — analog joystick → Socket.IO `move_command` Twist on the vis
+  server at ~15 Hz, zero-twist on release (`hooks/useTeleop.ts`, `lib/joystick.ts`).
+- **Agent feed** — classified, de-duplicated, noise-filtered messages
+  (`lib/agentMessage.ts`, `hooks/useAgentFeed.ts`).
+- **Text-to-speech** — speaks agent replies via browser `SpeechSynthesis`
+  (`lib/speech.ts`), on by default, with a header mute toggle.
+- **PWA** — manifest + apple-touch-icon + iOS meta (Add-to-Home-Screen).
+
+## iOS Safari caveats
+
+- `MediaRecorder` needs `audio/mp4` (not webm); `getUserMedia` needs HTTPS + a tap.
+- `EventSource` can't set headers → we use `@microsoft/fetch-event-source`.
+- **TTS limitation (open issue):** iOS only allows `speechSynthesis` inside a user
+  gesture; speech triggered by an incoming SSE message is silently dropped (works
+  on desktop). We unlock on the talk-button tap, but auto-speaking replies is
+  unreliable on iPhone. Reliable fallback for blind users: **VoiceOver** reads the
+  `aria-live` response feed automatically.
+
+## Accessibility
+
+`aria-live` on the response feed, `aria-label`/`aria-pressed` on the talk button
+and speech toggle. TODO: spoken cues for state changes (listening/sent/connected),
+logical focus order, and resolving the iOS auto-TTS issue above.
+
+## Testing
+
+```bash
+npm run test     # vitest: payload wrapping, message classification, joystick math
+npm run build    # production build (also type-checks)
+```
+
+## Known gaps / deferred
+
+- **Quick actions** (Sit/Jump/Lie down → `/unitree/command`) and **Interrupt**
+  (`/interrupt`) are not on the verified backend — wired to the mock, route TBD.
+- iOS auto-TTS (above).
+- Wiring to the monorepo `agent_state` backend (above).
diff --git a/webapp/SCAFFOLD-REFERENCE.md b/webapp/SCAFFOLD-REFERENCE.md
new file mode 100644
index 0000000000..6e9705b229
--- /dev/null
+++ b/webapp/SCAFFOLD-REFERENCE.md
@@ -0,0 +1,336 @@
+# Scaffold reference — original /webapp (teammate's App Router stub)
+
+Preserved for merging. Goldie (the current /webapp) replaced this stub. The stub
+was wired to the **monorepo DimOS backend** (Tailscale, port 8443), which uses a
+DIFFERENT contract than Goldie currently targets — keep this to merge later:
+
+- SSE stream key: `agent_state` (structured JSON), NOT `agent_responses` (plain text).
+- Auth: `Authorization: Bearer <token>` on fetch + `?token=<token>` on the SSE URL.
+- Structured state: phase, current_skill{name,args,state}, last_observation,
+  last_narration ("robot said"), awaiting_user ("robot is asking"), intent.
+- Input tags: <user_speech>, <user_reply> (when awaiting_user set), <user_command> (e.g. "stop").
+
+TODO to run Goldie against the monorepo backend: re-point `src/lib/dimos.ts` + the
+agent feed to `agent_state` + token, add the awaiting_user → <user_reply> flow, and
+speak `last_narration` / `awaiting_user` via TTS. See `webapp/AGENTS.md`.
+
+## app/page.tsx
+```tsx
+"use client";
+
+import { useCallback, useEffect, useRef, useState } from "react";
+
+const API = process.env.NEXT_PUBLIC_DIMOS_API ?? "";
+const TOKEN = process.env.NEXT_PUBLIC_DIMOS_TOKEN ?? "";
+
+type AgentState = {
+  ts?: number;
+  intent?: string;
+  phase?: string;
+  current_skill?: { name?: string; args?: unknown; state?: string } | null;
+  last_observation?: string;
+  last_narration?: string;
+  awaiting_user?: string | null;
+};
+
+export default function Page() {
+  const [state, setState] = useState<AgentState>({});
+  const [recording, setRecording] = useState(false);
+  const [connected, setConnected] = useState(false);
+  const [lastError, setLastError] = useState<string>("");
+  const [partialTranscript, setPartialTranscript] = useState<string>("");
+
+  const recognitionRef = useRef<any>(null);
+  const finalTranscriptRef = useRef<string>("");
+
+  useEffect(() => {
+    if (!API) return;
+    const url = `${API}/text_stream/agent_state?token=${encodeURIComponent(TOKEN)}`;
+    const es = new EventSource(url);
+    es.onopen = () => setConnected(true);
+    es.onmessage = (e) => {
+      try {
+        setState(JSON.parse(e.data));
+      } catch {
+        // ignore non-JSON keepalives
+      }
+    };
+    es.onerror = () => setConnected(false);
+    return () => es.close();
+  }, []);
+
+  const sendText = useCallback(async (text: string, tag: "user_speech" | "user_reply" | "user_command") => {
+    setLastError("");
+    try {
+      const fd = new FormData();
+      fd.append("query", `<${tag}>${text}</${tag}>`);
+      const r = await fetch(`${API}/submit_query`, {
+        method: "POST",
+        headers: { Authorization: `Bearer ${TOKEN}` },
+        body: fd,
+      });
+      if (!r.ok) setLastError(`submit_query ${r.status}`);
+    } catch (e) {
+      setLastError(String(e));
+    }
+  }, []);
+
+  const startRecording = useCallback(async () => {
+    setLastError("");
+    setPartialTranscript("");
+    finalTranscriptRef.current = "";
+
+    // Browser-side STT via Web Speech API (free, fast, runs on the phone).
+    // iOS Safari exposes it as webkitSpeechRecognition; Chrome/Edge also have it.
+    const SR: any =
+      (typeof window !== "undefined" && (window as any).SpeechRecognition) ||
+      (typeof window !== "undefined" && (window as any).webkitSpeechRecognition);
+
+    if (!SR) {
+      setLastError("speech recognition not supported in this browser");
+      return;
+    }
+
+    try {
+      const rec = new SR();
+      rec.continuous = false;
+      rec.interimResults = true;
+      rec.lang = navigator.language || "en-US";
+      rec.maxAlternatives = 1;
+
+      rec.onresult = (event: any) => {
+        let final = "";
+        let interim = "";
+        for (let i = event.resultIndex; i < event.results.length; i++) {
+          const r = event.results[i];
+          if (r.isFinal) final += r[0].transcript;
+          else interim += r[0].transcript;
+        }
+        if (final) finalTranscriptRef.current += final;
+        setPartialTranscript(finalTranscriptRef.current + interim);
+      };
+
+      rec.onerror = (event: any) => {
+        setLastError(`stt: ${event.error || "unknown"}`);
+        setRecording(false);
+      };
+
+      rec.onend = () => {
+        setRecording(false);
+        const text = finalTranscriptRef.current.trim();
+        setPartialTranscript("");
+        if (text) {
+          const tag = state.awaiting_user ? "user_reply" : "user_speech";
+          sendText(text, tag);
+        }
+      };
+
+      rec.start();
+      recognitionRef.current = rec;
+      setRecording(true);
+    } catch (e) {
+      setLastError(`mic: ${String(e)}`);
+    }
+  }, [sendText, state.awaiting_user]);
+
+  const stopRecording = useCallback(() => {
+    try {
+      recognitionRef.current?.stop();
+    } catch {
+      // already stopped
+    }
+  }, []);
+
+  const stopButton = useCallback(() => {
+    sendText("stop", "user_command");
+  }, [sendText]);
+
+  const [textDraft, setTextDraft] = useState("");
+  const sendDraft = useCallback(() => {
+    if (!textDraft.trim()) return;
+    const tag = state.awaiting_user ? "user_reply" : "user_speech";
+    sendText(textDraft.trim(), tag);
+    setTextDraft("");
+  }, [textDraft, sendText, state.awaiting_user]);
+
+  return (
+    <main style={{ minHeight: "100vh", padding: 24, display: "flex", flexDirection: "column", gap: 16 }}>
+      <header style={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
+        <h1 style={{ fontSize: 18, fontWeight: 600 }}>Dimos Guide</h1>
+        <span style={{ fontSize: 12, color: connected ? "#4ade80" : "#f87171" }}>
+          {connected ? "● connected" : "○ reconnecting"}
+        </span>
+      </header>
+
+      <section style={{ display: "flex", flexDirection: "column", alignItems: "center", gap: 12, marginTop: 24 }}>
+        <button
+          onTouchStart={(e) => {
+            e.preventDefault();
+            startRecording();
+          }}
+          onTouchEnd={(e) => {
+            e.preventDefault();
+            stopRecording();
+          }}
+          onMouseDown={startRecording}
+          onMouseUp={stopRecording}
+          onMouseLeave={() => recording && stopRecording()}
+          style={{
+            width: 220,
+            height: 220,
+            borderRadius: "50%",
+            border: "none",
+            background: recording ? "#dc2626" : state.awaiting_user ? "#f59e0b" : "#1e3a8a",
+            color: "white",
+            fontSize: 18,
+            fontWeight: 600,
+            touchAction: "none",
+            transition: "background 0.1s",
+          }}
+          aria-label={recording ? "Recording — release to send" : "Hold to speak"}
+        >
+          {recording ? "Listening…" : state.awaiting_user ? "Tap to reply" : "Hold to speak"}
+        </button>
+
+        <button
+          onClick={stopButton}
+          style={{
+            marginTop: 8,
+            padding: "12px 24px",
+            borderRadius: 8,
+            border: "1px solid #6b7280",
+            background: "transparent",
+            color: "#e5e5e5",
+          }}
+        >
+          Stop
+        </button>
+
+        <div style={{ display: "flex", gap: 8, width: "100%", marginTop: 16 }}>
+          <input
+            type="text"
+            inputMode="text"
+            autoComplete="off"
+            autoCapitalize="off"
+            placeholder="Or type a query…"
+            value={textDraft}
+            onChange={(e) => setTextDraft(e.target.value)}
+            onKeyDown={(e) => { if (e.key === "Enter") sendDraft(); }}
+            style={{
+              flex: 1,
+              padding: "12px 14px",
+              borderRadius: 8,
+              border: "1px solid #374151",
+              background: "#111827",
+              color: "#e5e5e5",
+              fontSize: 16,
+              minWidth: 0,
+            }}
+          />
+          <button
+            onClick={sendDraft}
+            disabled={!textDraft.trim()}
+            style={{
+              padding: "12px 18px",
+              borderRadius: 8,
+              border: "1px solid #2563eb",
+              background: textDraft.trim() ? "#2563eb" : "#1e3a8a",
+              color: "white",
+              opacity: textDraft.trim() ? 1 : 0.6,
+            }}
+          >
+            Send
+          </button>
+        </div>
+      </section>
+
+      {partialTranscript ? (
+        <section style={{ padding: 12, background: "#1f1f1f", borderRadius: 8, border: "1px solid #2563eb" }}>
+          <div style={{ fontSize: 11, color: "#60a5fa", textTransform: "uppercase", marginBottom: 4 }}>
+            Hearing
+          </div>
+          <div style={{ fontStyle: "italic" }}>{partialTranscript}</div>
+        </section>
+      ) : null}
+
+      {state.awaiting_user ? (
+        <section style={{ padding: 12, background: "#1f1f1f", borderRadius: 8, border: "1px solid #f59e0b" }}>
+          <div style={{ fontSize: 11, color: "#f59e0b", textTransform: "uppercase", marginBottom: 4 }}>
+            Robot is asking
+          </div>
+          <div>{state.awaiting_user}</div>
+        </section>
+      ) : null}
+
+      {state.last_narration ? (
+        <section style={{ padding: 12, background: "#1f1f1f", borderRadius: 8 }}>
+          <div style={{ fontSize: 11, color: "#9ca3af", textTransform: "uppercase", marginBottom: 4 }}>
+            Robot said
+          </div>
+          <div>{state.last_narration}</div>
+        </section>
+      ) : null}
+
+      <section style={{ marginTop: "auto", fontSize: 11, color: "#6b7280" }}>
+        <div>phase: {state.phase ?? "—"}</div>
+        <div>skill: {state.current_skill?.name ?? "—"} ({state.current_skill?.state ?? "—"})</div>
+        <div>sees: {state.last_observation ?? "—"}</div>
+        {lastError && <div style={{ color: "#f87171", marginTop: 6 }}>error: {lastError}</div>}
+      </section>
+
+      <details style={{ fontSize: 11, color: "#6b7280" }}>
+        <summary>Raw state JSON</summary>
+        <pre>{JSON.stringify(state, null, 2)}</pre>
+      </details>
+    </main>
+  );
+}
+```
+
+## app/layout.tsx
+```tsx
+import "./globals.css";
+import type { Metadata, Viewport } from "next";
+
+export const metadata: Metadata = {
+  title: "Dimos Guide",
+  description: "Voice-driven robot guide for low-vision users",
+};
+
+export const viewport: Viewport = {
+  width: "device-width",
+  initialScale: 1,
+  maximumScale: 1,
+  userScalable: false,
+  themeColor: "#0a0a0a",
+};
+
+export default function RootLayout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <body>{children}</body>
+    </html>
+  );
+}
+```
+
+## app/globals.css
+```css
+* { box-sizing: border-box; margin: 0; padding: 0; }
+html, body {
+  height: 100%;
+  background: #0a0a0a;
+  color: #e5e5e5;
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+  -webkit-tap-highlight-color: transparent;
+  overscroll-behavior: none;
+}
+button { font: inherit; -webkit-user-select: none; user-select: none; }
+pre { white-space: pre-wrap; word-break: break-word; }
+```
+
+## .env.local.example
+```
+NEXT_PUBLIC_DIMOS_API=https://dog.yourname.dev
+NEXT_PUBLIC_DIMOS_TOKEN=replace-with-DIMOS_API_TOKEN-from-vps
+```
diff --git a/webapp/TECHFLOW.md b/webapp/TECHFLOW.md
new file mode 100644
index 0000000000..6b257973f6
--- /dev/null
+++ b/webapp/TECHFLOW.md
@@ -0,0 +1,350 @@
+# Goldie — full techflow
+
+The app is called **Goldie**: a phone-tuned PWA that controls a Unitree Go2 robot dog through DimOS. It has two modes — **voice** (the main path, designed for blind users — every agent reply is spoken back) and **manual** (joystick + buttons, bypasses the LLM). The whole stack has three independent network channels that flow simultaneously, plus an internal LCM message bus on the backend.
+
+## 1. Webapp side (Next.js 16, React 19, TS, Tailwind v4)
+
+### 1a. Page structure
+
+`webapp/src/pages/index.tsx` is the only page. It composes:
+
+- `<Header>` — Goldie badge, **TTS toggle**, **connection chip** (`useStatus` polls `/unitree/status` every 5s, `useStatus.ts:5-22`)
+- `<ModeToggle>` — `voice` vs `manual`
+- Either `<VoicePanel>` or `<ManualPanel>` depending on mode
+
+Mode-independent state in `index.tsx`:
+
+- `ttsEnabled` (mute switch for spoken replies)
+- `recordingRef` (used for barge-in — suppresses incoming TTS while the user is mid-utterance, `index.tsx:32-40`)
+- `useAgentFeed` — SSE subscription to agent replies (always on, both modes)
+
+### 1b. Voice flow (the golden path)
+
+This is the most interesting one. Step by step:
+
+**(1) Hold-to-speak** — `<VoiceButton>` uses pointer events (`VoiceButton.tsx:45-50`). On `pointerdown`:
+
+```
+handleStart() in index.tsx:72
+  → setActionError(null)
+  → cancelSpeech()       (kill any reply still playing so it doesn't overlap)
+  → unlockSpeech()       (CRITICAL: plays a silent WAV inside the user gesture
+                          so iOS will later allow async <audio> playback)
+  → start() from useStt
+```
+
+**(2) STT** — `useStt` is a thin shell around two providers selected by `NEXT_PUBLIC_STT` (`stt.ts:199-204`):
+
+- **`WebSpeechStt`** (default) — uses the browser's `webkitSpeechRecognition` API. Live partial transcripts via `onInterim`, final transcript on `onend`. Fully client-side, never hits the network until the query is submitted.
+- **`UploadStt`** (production iOS path) — records `audio/mp4` via `MediaRecorder`, POSTs to backend `/upload_audio`, gets the transcript back. iOS requires mp4, not webm (`stt.ts:166-169`).
+
+**(3) Submit** — once we have final text, `submitSpeech` in `index.tsx:53-63`:
+
+```
+text = "stand up"
+  → wrapUserSpeech(text)   → "<user_speech>stand up</user_speech>"
+  → dimos.submitQuery(payload)
+       → multipart/form-data POST to <API>/submit_query
+       → field name: "query" (NOT JSON — the FastAPI server uses Form(...))
+       → Authorization: Bearer $NEXT_PUBLIC_DIMOS_TOKEN
+       → header `ngrok-skip-browser-warning: true` (avoids ngrok's interstitial)
+       → fetchWithRetry: 2 retries, 8s timeout, retries on 5xx + network errors
+```
+
+The `<user_speech>…</user_speech>` wrapper is a signal to the backend agent's system prompt that this came from voice (vs typed text).
+
+**(4) Reply stream** — separately, `useAgentFeed` is already listening on `/text_stream/agent_responses` via Server-Sent Events. The client uses `@microsoft/fetch-event-source` (not the native `EventSource`) specifically so it can attach the `Authorization` header (`dimos.ts:145-172`). Each SSE `data:` frame is a JSON envelope:
+
+```json
+{"kind": "ai" | "tool" | "system", "text": "..."}
+```
+
+`classifyAgentMessage` in `agentMessage.ts:26-49` parses each frame:
+- Dedups consecutive identical text
+- Drops empty/ping frames
+- Has a legacy fallback that treats plain-text as `kind: "ai"`
+
+For each new message:
+- Always appended to the on-screen `messages` list (last 5, `useAgentFeed.ts:40`)
+- Triggers `onMessage` callback in `index.tsx:34-40`
+- `<StatusCard>` shows `tool`/`system` messages in a faint color (status only); `ai` messages bold
+
+**(5) TTS playback** — In `index.tsx:34-40`, only `kind === "ai"` messages get spoken, AND only if `recordingRef.current === false` (barge-in suppression). Then:
+
+```
+speak(text) in lib/speech.ts
+  → enqueue text
+  → pump():
+       POST /api/tts {text}                  ← LOCAL Next.js API route
+         → server uses OPENAI_API_KEY (never on client)
+         → OpenAI gpt-4o-mini-tts, voice "coral", with steering instructions
+           "warm, friendly, upbeat female voice, brisk pace"
+         → returns audio/mpeg MP3
+       → URL.createObjectURL(blob)
+       → audioEl.src = url; audioEl.play()
+       → wait for `onended`, then pump next item in queue
+```
+
+Why this complexity? `speech.ts:1-10` explains it: iOS Safari's `SpeechSynthesis` API silently drops anything not initiated inside a user gesture, and agent replies arrive **async** over SSE — so they never spoke. The fix is a single `<audio>` element that gets "unlocked" by playing a tiny silent WAV in the tap handler (`unlockSpeech`), after which iOS will play later async audio. The queue ensures multiple AI messages don't talk over each other.
+
+`cancelSpeech` aborts both the in-flight fetch and the playing audio (used on barge-in and on the Interrupt button).
+
+### 1c. Quick action flow
+
+Sit / Jump / Stand buttons (`QuickActions.tsx`). These send **natural-language commands through the same `/submit_query` path** — not the direct `/unitree/command` sport endpoint:
+
+```
+handleAction({label:"Sit", command:"sit"})
+  → dimos.submitQuery("sit")     (no <user_speech> wrapper)
+```
+
+The design intent is so the agent can narrate what it's doing ("Okay, sitting now…") through the same SSE → TTS pipeline as voice. Direct `/unitree/command` would skip the agent and skip narration.
+
+### 1d. Manual / joystick flow (totally separate channel)
+
+`useTeleop` in `useTeleop.ts` opens a **Socket.IO** connection (not HTTP) to a **second backend port (7779)** at `NEXT_PUBLIC_DIMOS_VIS`. This is DimOS's visualization server (`WebsocketVisModule`), and it accepts a `move_command` event with ROS-style Twist payloads:
+
+```js
+socket.emit("move_command", {
+  linear:  { x: vx * 0.6,    y: 0, z: 0 },  // Go2 max forward: 0.6 m/s
+  angular: { x: 0, y: 0, z: -turn * 1.0 },  // negative z = clockwise
+});
+```
+
+In `index.tsx:118-141`:
+- On joystick drag: emit immediately, then `setInterval` re-emit every 66ms (~15Hz)
+- On release: emit a zero Twist (`teleop.stop()`)
+
+`Joystick.tsx` uses pointer capture and `computeDrive` (`joystick.ts`) which clamps to the pad rim, applies a 12% dead zone, maps screen-down to negative `vx`, etc. This path is real-time teleop — it never touches the LLM agent.
+
+### 1e. Status & connection
+
+- `useStatus` polls `GET /unitree/status` every 5s. Accepts either the real backend's `{status:"online"}` or the mock's `{connected:true}` (`dimos.ts:103-118`).
+- `useTeleop` exposes `configured` (env var set) and `connected` (Socket.IO actually connected) so the manual panel can show "teleop link down" (`ManualPanel.tsx:33-37`).
+
+### 1f. Next.js API routes (run on the webapp server, NOT the dimos backend)
+
+- **`/api/tts`** — OpenAI proxy described above. Keeps `OPENAI_API_KEY` server-side. Model and voice are env-overridable (`tts.ts:21-27`).
+- **`/api/log`** — Dev-only sink. Every `devLog({event:"agent-msg"|"tts"|"stt-error"|...})` from the frontend POSTs here and gets pretty-printed to the `npm run dev` terminal — so you can see speech transcripts and TTS decisions on your laptop while testing from a phone over ngrok (`log.ts`).
+- **`/api/mock/[...path]`** — A complete in-memory mock of the dimos backend (`mock/[...path].ts`). When `NEXT_PUBLIC_DIMOS_API` is unset, the client defaults to `/api/mock`, and you get a scripted SSE script ("On it." → "Navigation goal reached" → "Done — what would you like next?") so the UI is fully runnable with no robot. The mock also handles `submit_query`, `upload_audio`, `unitree/status`, `unitree/command`, `interrupt`.
+
+### 1g. Cross-cutting iOS / PWA details
+
+- `_app.tsx` viewport meta has `maximum-scale=1, viewport-fit=cover` — stops Safari auto-zoom and lets the gradient extend under the notch
+- `_document.tsx` declares `apple-mobile-web-app-capable`, manifest, apple-touch-icon — A2HS works without a service worker
+- `ngrok-skip-browser-warning` header on every request (`dimos.ts:19-28`) — avoids the free-tier ngrok HTML interstitial that otherwise breaks fetch/SSE through the tunnel
+
+---
+
+## 2. Backend side (Python, DimOS)
+
+The backend is **two HTTP servers and an internal message bus**, plus the dog connection.
+
+### 2a. Server topology
+
+```
+        ┌─────────────────────────────────────────────────┐
+        │   DimOS backend process (a single Python proc)  │
+        │                                                 │
+        │   ┌─────────────────────┐                       │
+        │   │ FastAPI/uvicorn     │  port 5555            │
+        │   │ (RobotWebInterface) │                       │
+        │   │  • /submit_query    │                       │
+        │   │  • /upload_audio    │                       │
+        │   │  • /text_stream/*   │  ← SSE                │
+        │   │  • /unitree/*       │                       │
+        │   │  • /video_feed/*    │                       │
+        │   └──────────┬──────────┘                       │
+        │              │                                  │
+        │              ▼ rx Subjects                      │
+        │        ┌─────────────────┐                      │
+        │        │ WebInput module │                      │
+        │        │ • query_subject │                      │
+        │        │ • audio_subject │──┐                   │
+        │        └────────┬────────┘  │                   │
+        │                 │           ▼                   │
+        │                 │      AudioNormalizer          │
+        │                 │      → WhisperNode (STT)      │
+        │                 │           │                   │
+        │                 ▼           ▼                   │
+        │        ┌──────────────────────────┐             │
+        │        │ LCM bus (pLCMTransport)  │             │
+        │        │ topic: "/human_input"    │             │
+        │        │ topic: "/agent"          │             │
+        │        └──────────┬───────────────┘             │
+        │                   │                             │
+        │                   ▼                             │
+        │        ┌────────────────────┐                   │
+        │        │ LLM Agent process  │                   │
+        │        │ (LangChain + MCP   │                   │
+        │        │  skills, GPT)      │                   │
+        │        └──────────┬─────────┘                   │
+        │                   │ ROS Twist                   │
+        │                   ▼                             │
+        │   ┌─────────────────────┐                       │
+        │   │ Socket.IO + uvicorn │  port 7779            │
+        │   │ (WebsocketVisModule)│                       │
+        │   │  • move_command     │  ← teleop in          │
+        │   │  • robot_pose, etc. │  ← vis out            │
+        │   └──────────┬──────────┘                       │
+        │              ▼                                  │
+        │     ROS-style transport (cmd_vel, ...)          │
+        │              │                                  │
+        └──────────────┼──────────────────────────────────┘
+                       ▼
+              UnitreeWebRTCConnection
+              (LocalSTA or Remote mode via Unitree cloud)
+                       │
+                       ▼
+                    Go2 dog
+```
+
+### 2b. FastAPI server (port 5555)
+
+File: `dimos/web/dimos_interface/api/server.py`. Wrapped as `RobotWebInterface` (`robot_web_interface.py`) when the agent uses it.
+
+Key routes (`server.py:242-372`):
+
+| Route | What it does |
+|---|---|
+| `POST /submit_query` | Reads `query` from form data → pushes onto `query_subject` (an rx Subject). That's it — async. Returns `{success: true}` (`server.py:286-300`). |
+| `POST /upload_audio` | Reads the multipart file → `_decode_audio` runs ffmpeg pipe to convert webm/opus → 16kHz mono PCM → builds an `AudioEvent` and pushes onto `audio_subject` (`server.py:302-333`). |
+| `GET /text_stream/{key}` | `EventSourceResponse` that pulls from `self.text_queues[key]` and emits SSE frames. Sends a `ping` event every 100ms when the queue is empty (`server.py:191-213, 365-369`). |
+| `GET /text_streams` | List of available text streams (used by the client to discover `agent_responses`). |
+| `POST /unitree/command` | Reads `{command}` JSON → pushes onto the same `query_subject` (same path as a typed query, no agent bypass). |
+| `GET /unitree/status` | Just returns `{status:"online"}`. The connection chip works off this. |
+| `GET /video_feed/{key}` | MJPEG over `multipart/x-mixed-replace` for the dog's cameras. Currently unused by the webapp. |
+
+**Auth** (`server.py:245-259`): `DIMOS_API_TOKEN` env var. If set, all protected endpoints require `Authorization: Bearer <token>` OR `?token=<token>` (the latter is for browsers that can't set headers on `EventSource`). If the env is empty, auth is disabled — that's the local-dev mode.
+
+**CORS** is wide open (`allow_origins=["*"]`).
+
+The text-stream plumbing is `rx → Queue → SSE`: each named text stream is an rx Subject; on init the server subscribes each one and puts every emission into a per-stream `Queue` (`server.py:114-123`); the SSE handler pops from that queue.
+
+### 2c. WebInput module — the bridge
+
+File: `dimos/agents/web_human_input.py`. This is what wires the FastAPI server to the LLM agent. On `start()`:
+
+1. **Creates** the `RobotWebInterface` with `text_streams={"agent_responses": Subject()}` and `audio_subject`.
+2. **Audio pipeline** (`web_human_input.py:78-87`):
+   ```
+   audio_subject → AudioNormalizer → WhisperNode (Whisper STT) → text
+   ```
+   So if you use the upload-STT path on the webapp, it's actually Whisper running on the backend.
+3. **Text out** (`web_human_input.py:91-96`):
+   ```
+   query_subject (typed/voice queries from /submit_query)    ─┐
+                                                              ├─→ LCM publish on "/human_input"
+   WhisperNode.emit_text() (transcripts from /upload_audio)  ─┘
+   ```
+4. **Replies in** (`web_human_input.py:57-76`) — this is the part the recent backend commit added:
+   ```
+   LCM subscribe "/agent"
+     for each BaseMessage:
+       kind = msg.type   # "human" | "ai" | "tool" | "system"
+       if kind == "human": skip   # echo of what the user said
+       content = msg.content (flatten list-of-parts if needed)
+       agent_responses.on_next(json.dumps({"kind": kind, "text": content}))
+   ```
+   This is what produces the typed JSON envelope the webapp parses in `classifyAgentMessage`. `tool` messages (e.g. "Navigation goal reached") get rendered as faint status on the phone; `ai` messages get spoken aloud.
+
+### 2d. The LLM agent (separate concern)
+
+The actual agent isn't in this commit set — it lives elsewhere in the codebase and is launched by the DimOS CLI. From the LCM topology you can derive its contract:
+
+- **Subscribes**: LCM `/human_input` (string, e.g. `<user_speech>stand up</user_speech>` or `sit`)
+- **Publishes**: LCM `/agent` (LangChain `BaseMessage` objects with `.type` ∈ `{human, ai, tool, system}`)
+- **Internally**: runs a LangChain MCP loop over a skill catalog (the `dimos/agents/skills/` tree has e.g. `blind_assistant_skills.py`). When it needs to move the dog, it calls a skill that publishes a Twist on the same transport the teleop endpoint publishes to.
+
+LCM (`pLCMTransport`) is just an in-process pub/sub bus on top of the LCM library — decouples the web layer from the agent layer.
+
+### 2e. Socket.IO vis server (port 7779)
+
+File: `dimos/web/websocket_vis/websocket_vis_module.py`. Separate ASGI app on a separate port. Two purposes:
+
+- **Inbound `move_command`** (`websocket_vis_module.py:332-353`): when the webapp's joystick fires, this handler builds a `Twist` (and `TwistStamped`) and publishes on `tele_cmd_vel` / `movecmd_stamped` — which are wired into the same Twist topic the agent's skills publish to. So manual joystick and the LLM both ultimately push the same Twist messages to the dog.
+- **Outbound vis** (`_emit` calls): `robot_pose`, `gps_location`, `path`, `costmap`, etc. — used by a debug visualization the webapp doesn't currently consume.
+
+### 2f. Dog connection (the last leg)
+
+Twist commands flow through DimOS's transport layer down to `UnitreeWebRTCConnection`, which holds a WebRTC data channel to the Go2. From `DOG-PHONE-INTERFACE.md` it's clear there are two modes:
+
+- **`LocalSTA`** — direct WebRTC on the same LAN
+- **`Remote`** — signaling routed through Unitree's cloud + TURN-relayed data channel, so the dog and the dimos backend can be on different networks (used in the "iPhone hotspot, VPS in the cloud, dog on the hotspot" demo topology)
+
+The dog returns telemetry (pose, etc.) the same way back into the rx streams.
+
+---
+
+## 3. End-to-end golden path (voice → motion → spoken reply)
+
+For your diagram, this is the sequence to trace:
+
+```
+[Phone Safari]  user holds button
+              → unlockSpeech (silent WAV) + start STT
+              → "find the bathroom"   (Web Speech API, on-device)
+              → wrapUserSpeech → "<user_speech>find the bathroom</user_speech>"
+              → POST /submit_query (multipart, Bearer token)
+
+[FastAPI :5555]  Form("query") → query_subject.on_next(text)
+
+[WebInput rx]   query_subject → pLCMTransport("/human_input").publish(text)
+
+[LCM bus]       /human_input → LLM Agent
+
+[LLM Agent]     LangChain MCP loop:
+                  emits "ai" message: "Heading to the bathroom now."
+                  calls nav skill → publishes Twist on cmd_vel
+                  emits "tool" message: "Navigation goal reached"
+                  emits "ai" message: "We're there."
+
+[LCM bus]       /agent ─┬─→ WebInput._on_agent_message
+                        │     → json.dumps({kind, text})
+                        │     → agent_responses Subject.on_next(...)
+                        │
+                        └─→ (Twist for dog flows separately
+                             through cmd_vel transport → WebRTC → Go2)
+
+[FastAPI :5555]  text_queues["agent_responses"] ← Subject
+                 → SSE: data: {"kind":"ai","text":"Heading…"}\n\n
+                 → SSE: data: {"kind":"tool","text":"Navigation goal reached"}\n\n
+                 → SSE: data: {"kind":"ai","text":"We're there."}\n\n
+
+[Phone Safari]  useAgentFeed → classifyAgentMessage → setMessages
+                onMessage callback:
+                  if kind=="ai" && !recording  → speak(text)
+                    → POST /api/tts {text}
+                    → /api/tts (Next API) → OpenAI gpt-4o-mini-tts → MP3
+                    → audio element plays (unlocked earlier)
+                  if kind=="tool" → render only, don't speak
+
+[Phone]         User hears "Heading to the bathroom now… We're there."
+                Dog has physically moved during this.
+```
+
+And the **parallel manual channel** for comparison:
+
+```
+[Phone Safari]  joystick drag
+              → useTeleop.drive({vx, vy, turn}) @ 15Hz
+              → socket.emit("move_command", Twist)  via Socket.IO
+
+[Socket.IO :7779]  move_command handler
+              → builds Twist
+              → tele_cmd_vel.publish(twist)
+
+[transport]   cmd_vel topic → UnitreeWebRTCConnection → WebRTC data channel → Go2
+
+(No LLM, no SSE, no TTS — pure teleop loop.)
+```
+
+---
+
+The three independent channels for the diagram:
+
+1. **HTTP/JSON + SSE (port 5555)** — voice & quick actions → agent → spoken reply
+2. **Socket.IO (port 7779)** — joystick → Twist → dog
+3. **OpenAI TTS (via local Next.js /api/tts)** — agent text → speech in the phone
+
+…and the internal LCM bus (`/human_input` ↔ `/agent`) is what decouples the web layer from the agent layer on the backend.
diff --git a/webapp/eslint.config.mjs b/webapp/eslint.config.mjs
new file mode 100644
index 0000000000..05e726d1b4
--- /dev/null
+++ b/webapp/eslint.config.mjs
@@ -0,0 +1,18 @@
+import { defineConfig, globalIgnores } from "eslint/config";
+import nextVitals from "eslint-config-next/core-web-vitals";
+import nextTs from "eslint-config-next/typescript";
+
+const eslintConfig = defineConfig([
+  ...nextVitals,
+  ...nextTs,
+  // Override default ignores of eslint-config-next.
+  globalIgnores([
+    // Default ignores of eslint-config-next:
+    ".next/**",
+    "out/**",
+    "build/**",
+    "next-env.d.ts",
+  ]),
+]);
+
+export default eslintConfig;
diff --git a/webapp/next-env.d.ts b/webapp/next-env.d.ts
new file mode 100644
index 0000000000..19709046af
--- /dev/null
+++ b/webapp/next-env.d.ts
@@ -0,0 +1,6 @@
+/// <reference types="next" />
+/// <reference types="next/image-types/global" />
+import "./.next/types/routes.d.ts";
+
+// NOTE: This file should not be edited
+// see https://nextjs.org/docs/pages/api-reference/config/typescript for more information.
diff --git a/webapp/next.config.ts b/webapp/next.config.ts
new file mode 100644
index 0000000000..6e46d5194c
--- /dev/null
+++ b/webapp/next.config.ts
@@ -0,0 +1,10 @@
+import type { NextConfig } from "next";
+
+const nextConfig: NextConfig = {
+  reactStrictMode: true,
+  // Allow any ngrok tunnel to hit the dev server (real-iPhone testing).
+  // Wildcard so you never have to update this when the tunnel URL changes.
+  allowedDevOrigins: ["*.ngrok-free.app"],
+};
+
+export default nextConfig;
diff --git a/webapp/package.json b/webapp/package.json
new file mode 100644
index 0000000000..31532f2afb
--- /dev/null
+++ b/webapp/package.json
@@ -0,0 +1,31 @@
+{
+  "name": "my-app",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start",
+    "lint": "eslint",
+    "test": "vitest run"
+  },
+  "dependencies": {
+    "@microsoft/fetch-event-source": "^2.0.1",
+    "next": "16.2.6",
+    "openai": "^6.39.0",
+    "react": "19.2.4",
+    "react-dom": "19.2.4",
+    "socket.io-client": "^4.8.3"
+  },
+  "devDependencies": {
+    "@tailwindcss/postcss": "^4",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "eslint": "^9",
+    "eslint-config-next": "16.2.6",
+    "tailwindcss": "^4",
+    "typescript": "^5",
+    "vitest": "^3.2.4"
+  }
+}
diff --git a/webapp/postcss.config.mjs b/webapp/postcss.config.mjs
new file mode 100644
index 0000000000..61e36849cf
--- /dev/null
+++ b/webapp/postcss.config.mjs
@@ -0,0 +1,7 @@
+const config = {
+  plugins: {
+    "@tailwindcss/postcss": {},
+  },
+};
+
+export default config;
diff --git a/webapp/public/favicon.ico b/webapp/public/favicon.ico
new file mode 100644
index 0000000000..718d6fea48
Binary files /dev/null and b/webapp/public/favicon.ico differ
diff --git a/webapp/public/file.svg b/webapp/public/file.svg
new file mode 100644
index 0000000000..004145cddf
--- /dev/null
+++ b/webapp/public/file.svg
@@ -0,0 +1 @@
+<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
\ No newline at end of file
diff --git a/webapp/public/globe.svg b/webapp/public/globe.svg
new file mode 100644
index 0000000000..567f17b0d7
--- /dev/null
+++ b/webapp/public/globe.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
\ No newline at end of file
diff --git a/webapp/public/icons/apple-touch-icon.png b/webapp/public/icons/apple-touch-icon.png
new file mode 100644
index 0000000000..3e6e3093b4
--- /dev/null
+++ b/webapp/public/icons/apple-touch-icon.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:979cfe71d1d082f1ae6d13f68820a2a71d0d143c839fd3186fba6722ffce6fea
+size 5064
diff --git a/webapp/public/icons/icon-192.png b/webapp/public/icons/icon-192.png
new file mode 100644
index 0000000000..0b9ae33fd7
--- /dev/null
+++ b/webapp/public/icons/icon-192.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9ded871c983144d33f043b42350b5ede95470e27b1dfe7c0d8af84c9bcb20da
+size 5427
diff --git a/webapp/public/icons/icon-512.png b/webapp/public/icons/icon-512.png
new file mode 100644
index 0000000000..e733596e80
--- /dev/null
+++ b/webapp/public/icons/icon-512.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98a03a908821d278809569eaa087d00b179c30c2286f018120c8cb2cfb4b2611
+size 15467
diff --git a/webapp/public/icons/icon.svg b/webapp/public/icons/icon.svg
new file mode 100644
index 0000000000..3ac305a9a0
--- /dev/null
+++ b/webapp/public/icons/icon.svg
@@ -0,0 +1,12 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
+  <defs>
+    <linearGradient id="g" x1="0" y1="0" x2="0" y2="1">
+      <stop offset="0" stop-color="#f0cd86"/>
+      <stop offset="0.6" stop-color="#e3b15e"/>
+      <stop offset="1" stop-color="#c79236"/>
+    </linearGradient>
+  </defs>
+  <rect width="512" height="512" rx="112" fill="#0a0b0e"/>
+  <text x="256" y="356" font-family="-apple-system, BlinkMacSystemFont, system-ui, sans-serif"
+        font-size="320" font-weight="700" text-anchor="middle" fill="url(#g)">G</text>
+</svg>
diff --git a/webapp/public/manifest.json b/webapp/public/manifest.json
new file mode 100644
index 0000000000..5701e7c5e9
--- /dev/null
+++ b/webapp/public/manifest.json
@@ -0,0 +1,15 @@
+{
+  "name": "Goldie",
+  "short_name": "Goldie",
+  "description": "Voice and joystick control for Goldie.",
+  "start_url": "/",
+  "display": "standalone",
+  "orientation": "portrait",
+  "background_color": "#0a0b0e",
+  "theme_color": "#0a0b0e",
+  "icons": [
+    { "src": "/icons/icon.svg", "sizes": "any", "type": "image/svg+xml" },
+    { "src": "/icons/icon-192.png", "sizes": "192x192", "type": "image/png" },
+    { "src": "/icons/icon-512.png", "sizes": "512x512", "type": "image/png" }
+  ]
+}
diff --git a/webapp/public/next.svg b/webapp/public/next.svg
new file mode 100644
index 0000000000..5174b28c56
--- /dev/null
+++ b/webapp/public/next.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
\ No newline at end of file
diff --git a/webapp/public/vercel.svg b/webapp/public/vercel.svg
new file mode 100644
index 0000000000..7705396033
--- /dev/null
+++ b/webapp/public/vercel.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
\ No newline at end of file
diff --git a/webapp/public/window.svg b/webapp/public/window.svg
new file mode 100644
index 0000000000..b2b2a44f6e
--- /dev/null
+++ b/webapp/public/window.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
\ No newline at end of file
diff --git a/webapp/src/components/ConnectionChip.tsx b/webapp/src/components/ConnectionChip.tsx
new file mode 100644
index 0000000000..a94a151686
--- /dev/null
+++ b/webapp/src/components/ConnectionChip.tsx
@@ -0,0 +1,10 @@
+export default function ConnectionChip({ connected }: { connected: boolean }) {
+  return (
+    <span className="flex items-center gap-1.5 rounded-full border border-line bg-surface px-2.5 py-1 text-[11px] text-muted">
+      <span
+        className={`h-1.5 w-1.5 rounded-full ${connected ? "bg-green" : "bg-faint"}`}
+      />
+      {connected ? "connected" : "offline"}
+    </span>
+  );
+}
diff --git a/webapp/src/components/Header.tsx b/webapp/src/components/Header.tsx
new file mode 100644
index 0000000000..516ee4f9b0
--- /dev/null
+++ b/webapp/src/components/Header.tsx
@@ -0,0 +1,37 @@
+import ConnectionChip from "./ConnectionChip";
+
+export default function Header({
+  connected,
+  ttsEnabled,
+  onToggleTts,
+}: {
+  connected: boolean;
+  ttsEnabled: boolean;
+  onToggleTts: () => void;
+}) {
+  return (
+    <header className="flex items-center justify-between px-5 pt-7">
+      <span className="flex items-center gap-2 text-[17px] font-semibold tracking-tight text-fg">
+        <span className="h-[7px] w-[7px] rounded-full bg-gold shadow-[0_0_7px_var(--color-gold)]" />
+        Goldie
+      </span>
+      <div className="flex items-center gap-2">
+        <button
+          type="button"
+          onClick={onToggleTts}
+          aria-pressed={ttsEnabled}
+          aria-label={
+            ttsEnabled ? "Mute spoken responses" : "Unmute spoken responses"
+          }
+          className="flex items-center gap-1.5 rounded-full border border-line bg-surface px-2.5 py-1 text-[11px] text-muted"
+        >
+          <span
+            className={`h-1.5 w-1.5 rounded-full ${ttsEnabled ? "bg-gold" : "bg-faint"}`}
+          />
+          speech
+        </button>
+        <ConnectionChip connected={connected} />
+      </div>
+    </header>
+  );
+}
diff --git a/webapp/src/components/InterruptButton.tsx b/webapp/src/components/InterruptButton.tsx
new file mode 100644
index 0000000000..212674ae79
--- /dev/null
+++ b/webapp/src/components/InterruptButton.tsx
@@ -0,0 +1,19 @@
+export default function InterruptButton({
+  onInterrupt,
+  disabled,
+}: {
+  onInterrupt: () => void;
+  disabled?: boolean;
+}) {
+  return (
+    <button
+      type="button"
+      disabled={disabled}
+      onClick={onInterrupt}
+      className="mt-3 flex items-center justify-center gap-2.5 rounded-[14px] border border-red/30 bg-red-soft py-4 text-sm font-bold tracking-wide text-red transition active:scale-[0.99] disabled:opacity-50"
+    >
+      <span className="inline-block h-[11px] w-[11px] rounded-[2.5px] bg-red" />
+      Interrupt
+    </button>
+  );
+}
diff --git a/webapp/src/components/Joystick.tsx b/webapp/src/components/Joystick.tsx
new file mode 100644
index 0000000000..a93d6b8808
--- /dev/null
+++ b/webapp/src/components/Joystick.tsx
@@ -0,0 +1,99 @@
+import { useRef, useState } from "react";
+import type { CSSProperties, PointerEvent } from "react";
+import type { MoveCommand } from "@/lib/types";
+import { computeDrive } from "@/lib/joystick";
+
+const KNOB = 84; // px, must match the knob element size below
+
+const padStyle: CSSProperties = {
+  background:
+    "radial-gradient(circle at 50% 42%, #16191f 0%, #0e1014 72%, #0a0c0f 100%)",
+  boxShadow:
+    "inset 0 2px 6px rgba(0,0,0,.6), inset 0 -2px 10px rgba(255,255,255,.02), 0 0 0 1px var(--color-line)",
+};
+
+const knobStyle: CSSProperties = {
+  background: "radial-gradient(circle at 50% 34%, #f0cd86, #e3b15e 60%, #c79236)",
+  boxShadow:
+    "0 8px 18px rgba(0,0,0,.55), inset 0 2px 2px rgba(255,255,255,.5), inset 0 -6px 12px rgba(0,0,0,.25)",
+};
+
+function Tick({ className }: { className: string }) {
+  return (
+    <span
+      className={`pointer-events-none absolute h-[11px] w-[3px] rounded bg-faint opacity-50 ${className}`}
+    />
+  );
+}
+
+export default function Joystick({
+  onMove,
+  onEnd,
+}: {
+  onMove?: (m: MoveCommand) => void;
+  onEnd?: () => void;
+}) {
+  const padRef = useRef<HTMLDivElement>(null);
+  const activeId = useRef<number | null>(null);
+  const [knob, setKnob] = useState({ x: 0, y: 0 });
+
+  function update(e: PointerEvent) {
+    const el = padRef.current;
+    if (!el) return;
+    const r = el.getBoundingClientRect();
+    const travel = r.width / 2 - KNOB / 2; // keep knob inside the pad
+    const d = computeDrive(
+      e.clientX - (r.left + r.width / 2),
+      e.clientY - (r.top + r.height / 2),
+      travel,
+    );
+    setKnob({ x: d.knobX, y: d.knobY });
+    onMove?.({ vx: d.vx, vy: 0, turn: d.turn });
+  }
+
+  function handleDown(e: PointerEvent) {
+    e.preventDefault();
+    activeId.current = e.pointerId;
+    padRef.current?.setPointerCapture(e.pointerId);
+    update(e);
+  }
+
+  function handleMove(e: PointerEvent) {
+    if (activeId.current === e.pointerId) update(e);
+  }
+
+  function handleUp(e: PointerEvent) {
+    if (activeId.current !== e.pointerId) return;
+    activeId.current = null;
+    setKnob({ x: 0, y: 0 });
+    onEnd?.();
+  }
+
+  return (
+    <div className="flex flex-1 flex-col items-center justify-center gap-3.5">
+      <div
+        ref={padRef}
+        className="relative h-[218px] w-[218px] touch-none rounded-full"
+        style={padStyle}
+        onPointerDown={handleDown}
+        onPointerMove={handleMove}
+        onPointerUp={handleUp}
+        onPointerCancel={handleUp}
+      >
+        <Tick className="left-1/2 top-[14px] -translate-x-1/2" />
+        <Tick className="bottom-[14px] left-1/2 -translate-x-1/2" />
+        <Tick className="left-[14px] top-1/2 -translate-y-1/2 rotate-90" />
+        <Tick className="right-[14px] top-1/2 -translate-y-1/2 rotate-90" />
+        <div
+          className="absolute left-1/2 top-1/2 h-[84px] w-[84px] rounded-full"
+          style={{
+            ...knobStyle,
+            transform: `translate(calc(-50% + ${knob.x}px), calc(-50% + ${knob.y}px))`,
+            transition: activeId.current === null ? "transform 0.15s ease-out" : "none",
+          }}
+        />
+      </div>
+      <div className="text-[13px] text-faint">Drag to drive · release to stop</div>
+    </div>
+  );
+}
diff --git a/webapp/src/components/ManualPanel.tsx b/webapp/src/components/ManualPanel.tsx
new file mode 100644
index 0000000000..b95ff7a964
--- /dev/null
+++ b/webapp/src/components/ManualPanel.tsx
@@ -0,0 +1,45 @@
+import type { MoveCommand, QuickAction } from "@/lib/types";
+import Joystick from "./Joystick";
+import QuickActions from "./QuickActions";
+
+export default function ManualPanel({
+  drive,
+  busy,
+  linkConnected,
+  linkConfigured,
+  onMove,
+  onEnd,
+  onAction,
+}: {
+  drive: MoveCommand;
+  busy?: boolean;
+  linkConnected?: boolean;
+  linkConfigured?: boolean;
+  onMove: (m: MoveCommand) => void;
+  onEnd: () => void;
+  onAction: (a: QuickAction) => void;
+}) {
+  const active = drive.vx !== 0 || drive.turn !== 0;
+  // teleop link state: not configured (mock) / configured-but-down / live
+  const linkDown = linkConfigured && !linkConnected;
+  return (
+    <div className="flex flex-1 flex-col">
+      <Joystick onMove={onMove} onEnd={onEnd} />
+      <div className="rounded-2xl border border-line bg-surface px-4 py-3.5">
+        <div className="flex items-baseline justify-between">
+          <span className="text-[9.5px] uppercase tracking-[1.5px] text-faint">
+            {active ? "Driving" : "Manual"}
+          </span>
+          {linkDown ? (
+            <span className="font-mono text-[12px] text-red">teleop link down</span>
+          ) : (
+            <span className="font-mono text-[12px] text-[#c4c8d0]">
+              vx {drive.vx.toFixed(2)} · turn {drive.turn.toFixed(2)}
+            </span>
+          )}
+        </div>
+      </div>
+      <QuickActions onAction={onAction} disabled={busy} />
+    </div>
+  );
+}
diff --git a/webapp/src/components/ModeToggle.tsx b/webapp/src/components/ModeToggle.tsx
new file mode 100644
index 0000000000..11b8e9848b
--- /dev/null
+++ b/webapp/src/components/ModeToggle.tsx
@@ -0,0 +1,36 @@
+import type { Mode } from "@/lib/types";
+
+const MODES: Mode[] = ["voice", "manual"];
+
+export default function ModeToggle({
+  mode,
+  onChange,
+}: {
+  mode: Mode;
+  onChange: (m: Mode) => void;
+}) {
+  return (
+    <div className="mx-5 mt-4 flex rounded-[13px] border border-line bg-[#0c0d11] p-[3px]">
+      {MODES.map((m) => {
+        const active = mode === m;
+        const goldActive = active && m === "voice";
+        return (
+          <button
+            key={m}
+            type="button"
+            onClick={() => onChange(m)}
+            className={`flex-1 rounded-[10px] py-2 text-[13px] font-semibold capitalize transition-colors ${
+              goldActive
+                ? "bg-gold text-[#1a1500]"
+                : active
+                  ? "bg-surface-2 text-fg"
+                  : "text-muted"
+            }`}
+          >
+            {m}
+          </button>
+        );
+      })}
+    </div>
+  );
+}
diff --git a/webapp/src/components/QuickActions.tsx b/webapp/src/components/QuickActions.tsx
new file mode 100644
index 0000000000..569c9b3f1b
--- /dev/null
+++ b/webapp/src/components/QuickActions.tsx
@@ -0,0 +1,35 @@
+import type { QuickAction } from "@/lib/types";
+
+// Natural-language commands sent through /submit_query (the agent), not the
+// direct sport endpoint — so the agent can narrate the action like a voice turn.
+const DEFAULT_ACTIONS: QuickAction[] = [
+  { label: "Sit", command: "sit" },
+  { label: "Jump", command: "jump" },
+  { label: "Stand", command: "stand up" },
+];
+
+export default function QuickActions({
+  onAction,
+  disabled,
+  actions = DEFAULT_ACTIONS,
+}: {
+  onAction: (a: QuickAction) => void;
+  disabled?: boolean;
+  actions?: QuickAction[];
+}) {
+  return (
+    <div className="mt-3 flex gap-2.5">
+      {actions.map((a) => (
+        <button
+          key={a.label}
+          type="button"
+          disabled={disabled}
+          onClick={() => onAction(a)}
+          className="flex-1 rounded-[13px] border border-line bg-surface py-3 text-[13px] font-semibold text-[#cfd3da] transition-colors active:bg-surface-2 disabled:opacity-40"
+        >
+          {a.label}
+        </button>
+      ))}
+    </div>
+  );
+}
diff --git a/webapp/src/components/StatusCard.tsx b/webapp/src/components/StatusCard.tsx
new file mode 100644
index 0000000000..e7ec4fed5c
--- /dev/null
+++ b/webapp/src/components/StatusCard.tsx
@@ -0,0 +1,64 @@
+import { useEffect, useRef } from "react";
+import type { AgentMessage } from "@/lib/agentMessage";
+
+export default function StatusCard({
+  messages,
+  active,
+}: {
+  messages: AgentMessage[];
+  active: boolean;
+}) {
+  const endRef = useRef<HTMLDivElement>(null);
+  useEffect(() => {
+    endRef.current?.scrollIntoView({ block: "end" });
+  }, [messages]);
+
+  const has = messages.length > 0;
+
+  return (
+    <div
+      className="rounded-2xl border border-line bg-surface px-4 py-3"
+      aria-live="polite"
+      aria-label="Goldie responses"
+    >
+      <div className="flex items-center justify-between">
+        <span className="text-[9.5px] uppercase tracking-[1.5px] text-faint">
+          Goldie
+        </span>
+        {active && (
+          <span className="flex items-center gap-1.5 text-[10px] text-gold">
+            <span className="h-1.5 w-1.5 animate-pulse rounded-full bg-gold" />
+            thinking…
+          </span>
+        )}
+      </div>
+
+      {has ? (
+        <div className="mt-2 flex max-h-32 flex-col gap-1.5 overflow-y-auto">
+          {messages.map((m, i) => {
+            const latest = i === messages.length - 1;
+            const tone =
+              m.kind === "tool" || m.kind === "system"
+                ? "text-[12px] text-faint"
+                : latest
+                  ? "text-fg"
+                  : "text-muted";
+            return (
+              <p
+                key={i}
+                className={`text-[13px] leading-snug ${tone} ${latest ? "" : "opacity-80"}`}
+              >
+                {m.text}
+              </p>
+            );
+          })}
+          <div ref={endRef} />
+        </div>
+      ) : (
+        <p className="mt-1.5 text-[13px] text-muted">
+          Ready — hold to give a command
+        </p>
+      )}
+    </div>
+  );
+}
diff --git a/webapp/src/components/VoiceButton.tsx b/webapp/src/components/VoiceButton.tsx
new file mode 100644
index 0000000000..73049306bc
--- /dev/null
+++ b/webapp/src/components/VoiceButton.tsx
@@ -0,0 +1,76 @@
+import type { CSSProperties, PointerEvent } from "react";
+
+const idleStyle: CSSProperties = {
+  background: "radial-gradient(circle at 50% 36%, #1f232b, #15181e)",
+  boxShadow:
+    "0 14px 30px rgba(0,0,0,.5), inset 0 1px 1px rgba(255,255,255,.06), inset 0 -10px 20px rgba(0,0,0,.45), 0 0 0 1px var(--color-line-2)",
+  color: "#cfd3da",
+};
+
+const recStyle: CSSProperties = {
+  background: "radial-gradient(circle at 50% 40%, #ff5a4f, #c4271d)",
+  boxShadow:
+    "0 0 34px rgba(255,69,58,.45), inset 0 1px 2px rgba(255,255,255,.25), 0 0 0 1px #ff6b61",
+  color: "#fff",
+};
+
+export default function VoiceButton({
+  recording,
+  disabled,
+  transcript,
+  error,
+  onStart,
+  onStop,
+}: {
+  recording: boolean;
+  disabled?: boolean;
+  transcript?: string;
+  error?: string;
+  onStart: () => void;
+  onStop: () => void;
+}) {
+  const start = (e: PointerEvent) => {
+    e.preventDefault();
+    if (!disabled) onStart();
+  };
+  const stop = (e: PointerEvent) => {
+    e.preventDefault();
+    onStop();
+  };
+  return (
+    <div className="flex flex-1 flex-col items-center justify-center gap-3.5">
+      <button
+        type="button"
+        disabled={disabled}
+        onPointerDown={start}
+        onPointerUp={stop}
+        onPointerCancel={stop}
+        onPointerLeave={(e) => {
+          if (recording) stop(e);
+        }}
+        onContextMenu={(e) => e.preventDefault()}
+        aria-label="Hold to speak a command"
+        aria-pressed={recording}
+        style={recording ? recStyle : idleStyle}
+        className="flex h-[184px] w-[184px] items-center justify-center rounded-full text-center text-sm font-semibold leading-tight transition-transform active:scale-[0.98] disabled:opacity-40"
+      >
+        {recording ? (
+          "Listening…"
+        ) : (
+          <span>
+            Hold
+            <br />
+            to speak
+          </span>
+        )}
+      </button>
+      <div
+        className={`min-h-[18px] px-6 text-center text-[13px] italic ${
+          error ? "text-red" : transcript ? "text-gold" : "text-faint"
+        }`}
+      >
+        {error || transcript ||" "}
+      </div>
+    </div>
+  );
+}
diff --git a/webapp/src/components/VoicePanel.tsx b/webapp/src/components/VoicePanel.tsx
new file mode 100644
index 0000000000..129616450e
--- /dev/null
+++ b/webapp/src/components/VoicePanel.tsx
@@ -0,0 +1,48 @@
+import type { AgentMessage } from "@/lib/agentMessage";
+import type { QuickAction } from "@/lib/types";
+import VoiceButton from "./VoiceButton";
+import StatusCard from "./StatusCard";
+import QuickActions from "./QuickActions";
+import InterruptButton from "./InterruptButton";
+
+export default function VoicePanel({
+  messages,
+  active,
+  recording,
+  transcript,
+  error,
+  busy,
+  onStart,
+  onStop,
+  onAction,
+  onInterrupt,
+}: {
+  messages: AgentMessage[];
+  active: boolean;
+  recording: boolean;
+  transcript?: string;
+  error?: string;
+  busy?: boolean;
+  onStart: () => void;
+  onStop: () => void;
+  onAction: (a: QuickAction) => void;
+  onInterrupt: () => void;
+}) {
+  return (
+    <div className="flex flex-1 flex-col">
+      <VoiceButton
+        recording={recording}
+        transcript={transcript}
+        error={error}
+        onStart={onStart}
+        onStop={onStop}
+      />
+      <StatusCard messages={messages} active={active} />
+      {active ? (
+        <InterruptButton onInterrupt={onInterrupt} />
+      ) : (
+        <QuickActions onAction={onAction} disabled={busy} />
+      )}
+    </div>
+  );
+}
diff --git a/webapp/src/hooks/useAgentFeed.ts b/webapp/src/hooks/useAgentFeed.ts
new file mode 100644
index 0000000000..daf3b3c96c
--- /dev/null
+++ b/webapp/src/hooks/useAgentFeed.ts
@@ -0,0 +1,60 @@
+import { useEffect, useRef, useState } from "react";
+import * as dimos from "@/lib/dimos";
+import type { StreamStatus } from "@/lib/dimos";
+import { classifyAgentMessage, type AgentMessage } from "@/lib/agentMessage";
+import { devLog } from "@/lib/devlog";
+
+/**
+ * Subscribe to the `agent_responses` stream and expose a clean, de-duplicated
+ * feed of the last few messages (warnings/keepalives filtered out), an `active`
+ * flag while the agent is replying, and the connection status. `onMessage`
+ * fires once per new message (used to speak it aloud).
+ */
+export function useAgentFeed(opts?: {
+  key?: string;
+  limit?: number;
+  onMessage?: (m: AgentMessage) => void;
+}) {
+  const key = opts?.key ?? "agent_responses";
+  const limit = opts?.limit ?? 5;
+  const onMessageRef = useRef(opts?.onMessage);
+  onMessageRef.current = opts?.onMessage;
+
+  const [messages, setMessages] = useState<AgentMessage[]>([]);
+  const [status, setStatus] = useState<StreamStatus>("connecting");
+  const [active, setActive] = useState(false);
+  const lastTextRef = useRef<string | null>(null);
+  const idleTimer = useRef<number | null>(null);
+
+  useEffect(() => {
+    const unsubscribe = dimos.subscribeStream(
+      key,
+      (data) => {
+        const msg = classifyAgentMessage(data);
+        if (!msg) return; // empty / keepalive frame
+        devLog({ event: "agent-msg", kind: msg.kind, text: msg.text }); // trace the raw stream
+        if (lastTextRef.current === msg.text) return; // drop echoes
+        lastTextRef.current = msg.text;
+
+        onMessageRef.current?.(msg); // side effect (speak) — outside the updater
+        setMessages((prev) => [...prev, msg].slice(-limit));
+
+        setActive(true);
+        if (idleTimer.current) window.clearTimeout(idleTimer.current);
+        idleTimer.current = window.setTimeout(() => setActive(false), 5000);
+      },
+      setStatus,
+    );
+    return () => {
+      unsubscribe();
+      if (idleTimer.current) window.clearTimeout(idleTimer.current);
+    };
+  }, [key, limit]);
+
+  const markIdle = () => {
+    setActive(false);
+    if (idleTimer.current) window.clearTimeout(idleTimer.current);
+  };
+
+  return { messages, active, status, markIdle };
+}
diff --git a/webapp/src/hooks/useStatus.ts b/webapp/src/hooks/useStatus.ts
new file mode 100644
index 0000000000..535755b3c0
--- /dev/null
+++ b/webapp/src/hooks/useStatus.ts
@@ -0,0 +1,23 @@
+import { useEffect, useState } from "react";
+import * as dimos from "@/lib/dimos";
+
+/** Poll /unitree/status for the connection indicator. */
+export function useStatus(intervalMs = 5000) {
+  const [connected, setConnected] = useState(false);
+
+  useEffect(() => {
+    let alive = true;
+    const tick = async () => {
+      const { connected } = await dimos.getStatus();
+      if (alive) setConnected(connected);
+    };
+    tick();
+    const id = setInterval(tick, intervalMs);
+    return () => {
+      alive = false;
+      clearInterval(id);
+    };
+  }, [intervalMs]);
+
+  return connected;
+}
diff --git a/webapp/src/hooks/useStt.ts b/webapp/src/hooks/useStt.ts
new file mode 100644
index 0000000000..9303c5af6e
--- /dev/null
+++ b/webapp/src/hooks/useStt.ts
@@ -0,0 +1,79 @@
+import { useCallback, useRef, useState } from "react";
+import { createSttProvider, type SttProvider } from "@/lib/stt";
+import { devLog } from "@/lib/devlog";
+import * as dimos from "@/lib/dimos";
+
+/** Map a SpeechRecognition error code to a user-facing message, or null to stay silent. */
+function sttErrorMessage(code: string): string | null {
+  switch (code) {
+    case "no-speech":
+      return "Didn't catch that — try again.";
+    case "aborted":
+      return null; // user released / interrupted — not an error
+    case "not-allowed":
+    case "service-not-allowed":
+      return "Microphone blocked — allow mic access for this site.";
+    case "audio-capture":
+      return "No microphone found.";
+    case "network":
+      return "Network hiccup — try again.";
+    default:
+      return "Couldn't capture speech — try again.";
+  }
+}
+
+/**
+ * Hold-to-speak controller. `onResult` fires with the final transcript when
+ * recording ends (used to submit the query).
+ */
+export function useStt(onResult: (finalText: string) => void) {
+  const [recording, setRecording] = useState(false);
+  const [transcript, setTranscript] = useState("");
+  const [error, setError] = useState<string | null>(null);
+  const providerRef = useRef<SttProvider | null>(null);
+
+  const start = useCallback(() => {
+    if (recording) return;
+    const provider =
+      providerRef.current ??
+      (providerRef.current = createSttProvider(dimos.uploadAudio));
+    if (!provider.isSupported()) {
+      setError("Speech input isn't supported in this browser.");
+      return;
+    }
+    setError(null);
+    setTranscript("");
+    setRecording(true);
+    Promise.resolve(
+      provider.start({
+        onInterim: (t) => setTranscript(t),
+        onFinal: (t) => {
+          setRecording(false);
+          setTranscript(t);
+          if (t.trim()) onResult(t.trim());
+        },
+        onError: (e) => {
+          setRecording(false);
+          const code =
+            e && typeof e === "object" && "error" in e
+              ? String((e as { error: unknown }).error)
+              : "unknown";
+          devLog({ event: "stt-error", code });
+          setError(sttErrorMessage(code)); // null for no-speech-release noise
+        },
+      }),
+    ).catch((e) => {
+      setRecording(false);
+      setError("Couldn't start recording.");
+      console.error("stt start failed", e);
+    });
+  }, [recording, onResult]);
+
+  const stop = useCallback(() => {
+    if (!recording) return;
+    providerRef.current?.stop();
+    // onFinal flips `recording` to false
+  }, [recording]);
+
+  return { recording, transcript, error, start, stop };
+}
diff --git a/webapp/src/hooks/useTeleop.ts b/webapp/src/hooks/useTeleop.ts
new file mode 100644
index 0000000000..a7016fa0e4
--- /dev/null
+++ b/webapp/src/hooks/useTeleop.ts
@@ -0,0 +1,52 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { io, type Socket } from "socket.io-client";
+import type { MoveCommand } from "@/lib/types";
+
+const VIS = (process.env.NEXT_PUBLIC_DIMOS_VIS ?? "").replace(/\/+$/, "");
+
+// Go2 velocity ranges (WEBAPP-INTEGRATION.md §3).
+const MAX_VX = 0.6; // m/s forward/back
+const MAX_VY = 0.4; // m/s strafe (unused — joystick maps L/R to yaw)
+const MAX_YAW = 1.0; // rad/s turn
+
+/**
+ * Socket.IO teleop link to the DimOS visualization server (port 7779) for
+ * direct, no-LLM joystick driving. Emits `move_command` Twist payloads.
+ * If NEXT_PUBLIC_DIMOS_VIS is unset, drive()/stop() are no-ops.
+ */
+export function useTeleop() {
+  const socketRef = useRef<Socket | null>(null);
+  const [connected, setConnected] = useState(false);
+
+  useEffect(() => {
+    if (!VIS) return;
+    const socket = io(VIS, {
+      transports: ["websocket", "polling"],
+      extraHeaders: { "ngrok-skip-browser-warning": "true" },
+    });
+    socketRef.current = socket;
+    socket.on("connect", () => setConnected(true));
+    socket.on("disconnect", () => setConnected(false));
+    return () => {
+      socket.disconnect();
+      socketRef.current = null;
+    };
+  }, []);
+
+  // stick right (turn > 0) = clockwise = negative angular.z (CCW is positive).
+  const drive = useCallback((m: MoveCommand) => {
+    socketRef.current?.emit("move_command", {
+      linear: { x: m.vx * MAX_VX, y: m.vy * MAX_VY, z: 0 },
+      angular: { x: 0, y: 0, z: -m.turn * MAX_YAW },
+    });
+  }, []);
+
+  const stop = useCallback(() => {
+    socketRef.current?.emit("move_command", {
+      linear: { x: 0, y: 0, z: 0 },
+      angular: { x: 0, y: 0, z: 0 },
+    });
+  }, []);
+
+  return { connected, configured: !!VIS, drive, stop };
+}
diff --git a/webapp/src/lib/agentMessage.test.ts b/webapp/src/lib/agentMessage.test.ts
new file mode 100644
index 0000000000..e9a7a58090
--- /dev/null
+++ b/webapp/src/lib/agentMessage.test.ts
@@ -0,0 +1,53 @@
+import { describe, it, expect } from "vitest";
+import { classifyAgentMessage } from "./agentMessage";
+
+describe("classifyAgentMessage", () => {
+  it("drops empty / whitespace frames", () => {
+    expect(classifyAgentMessage("")).toBeNull();
+    expect(classifyAgentMessage("   ")).toBeNull();
+  });
+
+  it("parses an ai envelope (the spoken reply)", () => {
+    expect(
+      classifyAgentMessage('{"kind":"ai","text":"I moved forward."}'),
+    ).toEqual({ kind: "ai", text: "I moved forward." });
+  });
+
+  it("parses a tool envelope (status — not spoken)", () => {
+    expect(
+      classifyAgentMessage('{"kind":"tool","text":"Navigation goal reached"}'),
+    ).toEqual({ kind: "tool", text: "Navigation goal reached" });
+  });
+
+  it("parses a system envelope", () => {
+    expect(
+      classifyAgentMessage('{"kind":"system","text":"reconnected"}'),
+    ).toEqual({ kind: "system", text: "reconnected" });
+  });
+
+  it("defaults missing or unknown kind to ai", () => {
+    expect(classifyAgentMessage('{"text":"hi there"}')).toEqual({
+      kind: "ai",
+      text: "hi there",
+    });
+    expect(classifyAgentMessage('{"kind":"weird","text":"hey"}')).toEqual({
+      kind: "ai",
+      text: "hey",
+    });
+  });
+
+  it("drops envelopes with empty text", () => {
+    expect(classifyAgentMessage('{"kind":"tool","text":""}')).toBeNull();
+  });
+
+  it("treats legacy plain text as an ai reply", () => {
+    expect(classifyAgentMessage("Hello! I am Goldie.")).toEqual({
+      kind: "ai",
+      text: "Hello! I am Goldie.",
+    });
+  });
+
+  it("strips wrapping quotes on legacy plain text", () => {
+    expect(classifyAgentMessage("'Hello there'")!.text).toBe("Hello there");
+  });
+});
diff --git a/webapp/src/lib/agentMessage.ts b/webapp/src/lib/agentMessage.ts
new file mode 100644
index 0000000000..cfb4645a97
--- /dev/null
+++ b/webapp/src/lib/agentMessage.ts
@@ -0,0 +1,49 @@
+export type AgentMsgKind = "ai" | "tool" | "system";
+
+export interface AgentMessage {
+  kind: AgentMsgKind;
+  text: string;
+}
+
+function normalizeKind(k: unknown): AgentMsgKind {
+  return k === "tool" || k === "system" ? k : "ai";
+}
+
+function stripWrappingQuotes(t: string): string {
+  const m = t.match(/^(['"])([\s\S]*)\1$/);
+  return m ? m[2].trim() : t;
+}
+
+/**
+ * Turn one raw `agent_responses` SSE frame into a classified message, or null
+ * for empty/keepalive frames.
+ *
+ * The backend tags each message as a typed JSON envelope
+ * `{ kind: "ai" | "tool" | "system", text }`. Only `ai` messages are the
+ * agent's spoken replies; `tool`/`system` are status (shown, never spoken —
+ * see index.tsx). Plain-text frames from the legacy backend are treated as `ai`.
+ */
+export function classifyAgentMessage(raw: string): AgentMessage | null {
+  const s = (raw ?? "").trim();
+  if (!s) return null;
+
+  // Typed JSON envelope (current backend).
+  if (s[0] === "{" || s[0] === "[" || s[0] === '"') {
+    try {
+      const obj = JSON.parse(s);
+      if (obj && typeof obj === "object" && !Array.isArray(obj)) {
+        const text = String(obj.text ?? obj.message ?? obj.content ?? "").trim();
+        return text ? { kind: normalizeKind(obj.kind), text } : null;
+      }
+      if (typeof obj === "string") {
+        const text = obj.trim();
+        return text ? { kind: "ai", text } : null;
+      }
+    } catch {
+      /* not JSON — fall through to legacy plain-text handling */
+    }
+  }
+
+  // Legacy plain-text frame (pre-envelope backend): treat as a spoken reply.
+  return { kind: "ai", text: stripWrappingQuotes(s) };
+}
diff --git a/webapp/src/lib/devlog.ts b/webapp/src/lib/devlog.ts
new file mode 100644
index 0000000000..0a90eb8a9d
--- /dev/null
+++ b/webapp/src/lib/devlog.ts
@@ -0,0 +1,13 @@
+/**
+ * Fire-and-forget log to the Next dev server terminal (same-origin /api/log),
+ * independent of the DimOS backend. See src/pages/api/log.ts.
+ */
+export function devLog(entry: Record<string, unknown>): void {
+  fetch("/api/log", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(entry),
+  }).catch(() => {
+    // logging must never break the app
+  });
+}
diff --git a/webapp/src/lib/dimos.ts b/webapp/src/lib/dimos.ts
new file mode 100644
index 0000000000..4bbc8e54e2
--- /dev/null
+++ b/webapp/src/lib/dimos.ts
@@ -0,0 +1,172 @@
+import { fetchEventSource } from "@microsoft/fetch-event-source";
+import type { MoveCommand } from "./types";
+
+/**
+ * Single client for the DimOS backend (WEBAPP-BRIEF §5).
+ * Defaults to the local mock (`/api/mock`) so the UI runs with no backend;
+ * point NEXT_PUBLIC_DIMOS_API at the real VPS to switch.
+ *
+ * NOTE (monorepo merge): this client currently targets the `agent_responses`
+ * (plain-text) + ngrok-header contract. The monorepo DimOS backend instead
+ * streams structured `agent_state` JSON and uses token auth (Bearer + ?token=).
+ * To run against it: switch the stream key to `agent_state`, send the token,
+ * and map last_narration/awaiting_user in the feed. See ../../SCAFFOLD-REFERENCE.md
+ * and webapp/AGENTS.md ("Backend contracts").
+ */
+const API = (process.env.NEXT_PUBLIC_DIMOS_API ?? "/api/mock").replace(/\/+$/, "");
+const TOKEN = process.env.NEXT_PUBLIC_DIMOS_TOKEN ?? "";
+
+function headers(extra: Record<string, string> = {}): Record<string, string> {
+  // ngrok-skip-browser-warning avoids the free-tier HTML interstitial that
+  // otherwise breaks fetch/SSE through the tunnel (WEBAPP-INTEGRATION.md).
+  const h: Record<string, string> = {
+    "ngrok-skip-browser-warning": "true",
+    ...extra,
+  };
+  if (TOKEN) h.Authorization = `Bearer ${TOKEN}`;
+  return h;
+}
+
+/**
+ * fetch with a timeout and a few retries — rides out the transient connection
+ * drops you get through an ngrok free tunnel on mobile (which otherwise surface
+ * as "TypeError: Load failed" and silently lose the command). Retries network
+ * failures, timeouts, and 5xx; returns 4xx as-is for the caller to handle.
+ */
+async function fetchWithRetry(
+  url: string,
+  init: RequestInit = {},
+  { retries = 2, timeoutMs = 8000 }: { retries?: number; timeoutMs?: number } = {},
+): Promise<Response> {
+  let lastErr: unknown;
+  for (let attempt = 0; attempt <= retries; attempt++) {
+    const ctrl = new AbortController();
+    const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+    try {
+      const res = await fetch(url, { ...init, signal: ctrl.signal });
+      clearTimeout(timer);
+      if (res.status >= 500) throw new Error(`HTTP ${res.status}`);
+      return res;
+    } catch (err) {
+      clearTimeout(timer);
+      lastErr = err;
+      if (attempt < retries) await new Promise((r) => setTimeout(r, 400 * (attempt + 1)));
+    }
+  }
+  throw lastErr;
+}
+
+/** Text query → agent. multipart/form-data, field `query` (NOT JSON). */
+export async function submitQuery(query: string): Promise<void> {
+  const fd = new FormData();
+  fd.append("query", query);
+  const res = await fetchWithRetry(`${API}/submit_query`, {
+    method: "POST",
+    body: fd,
+    headers: headers(),
+  });
+  if (!res.ok) throw new Error(`submit_query failed: ${res.status}`);
+}
+
+/** Upload a recorded clip; field name MUST be `file`. Returns the transcript. */
+export async function uploadAudio(
+  blob: Blob,
+  filename = "recording.mp4",
+): Promise<string> {
+  const fd = new FormData();
+  fd.append("file", blob, filename);
+  const res = await fetch(`${API}/upload_audio`, {
+    method: "POST",
+    body: fd,
+    headers: headers(),
+  });
+  const data = await res.json();
+  return (data?.text as string) ?? "";
+}
+
+export async function listStreams(): Promise<string[]> {
+  const res = await fetch(`${API}/text_streams`, { headers: headers() });
+  const data = await res.json();
+  return data?.streams ?? [];
+}
+
+/** Direct sport command, bypassing the LLM. JSON body. */
+export async function unitreeCommand(command: string): Promise<void> {
+  const res = await fetchWithRetry(`${API}/unitree/command`, {
+    method: "POST",
+    headers: headers({ "Content-Type": "application/json" }),
+    body: JSON.stringify({ command }),
+  });
+  if (!res.ok) throw new Error(`unitree/command failed: ${res.status}`);
+}
+
+export async function getStatus(): Promise<{ connected: boolean }> {
+  try {
+    // timeout only (no retries) — a poll that hangs shouldn't pile up.
+    const res = await fetchWithRetry(
+      `${API}/unitree/status`,
+      { headers: headers() },
+      { retries: 0, timeoutMs: 4000 },
+    );
+    if (!res.ok) return { connected: false };
+    const data = await res.json();
+    // Real backend: {status:"online",...}; mock: {connected:true}. Accept both.
+    return { connected: data?.status === "online" || data?.connected === true };
+  } catch {
+    return { connected: false };
+  }
+}
+
+/** TBD: joystick drive — endpoint not yet defined in the brief; stubbed in the mock. */
+export async function move(cmd: MoveCommand): Promise<void> {
+  await fetch(`${API}/unitree/move`, {
+    method: "POST",
+    headers: headers({ "Content-Type": "application/json" }),
+    body: JSON.stringify(cmd),
+  });
+}
+
+/** TBD: interrupt the running agent — endpoint not yet defined; stubbed in the mock. */
+export async function interrupt(): Promise<void> {
+  await fetchWithRetry(
+    `${API}/interrupt`,
+    { method: "POST", headers: headers() },
+    { retries: 1 },
+  );
+}
+
+export type StreamStatus = "connecting" | "open" | "error";
+
+/**
+ * Subscribe to a named SSE stream. Calls `onData` with each frame's raw `data`
+ * string (caller parses defensively). Returns an unsubscribe function.
+ * Uses fetch-event-source so it works with auth headers in production.
+ */
+export function subscribeStream(
+  key: string,
+  onData: (data: string) => void,
+  onStatus?: (s: StreamStatus) => void,
+): () => void {
+  const ctrl = new AbortController();
+  onStatus?.("connecting");
+
+  fetchEventSource(`${API}/text_stream/${key}`, {
+    signal: ctrl.signal,
+    headers: headers(),
+    openWhenHidden: true,
+    onopen: async () => {
+      onStatus?.("open");
+    },
+    onmessage: (ev) => {
+      if (ev.data) onData(ev.data);
+    },
+    onerror: () => {
+      onStatus?.("error");
+      // returning undefined lets fetch-event-source retry with backoff
+    },
+  }).catch(() => {
+    // aborted on unsubscribe — ignore
+  });
+
+  return () => ctrl.abort();
+}
diff --git a/webapp/src/lib/joystick.test.ts b/webapp/src/lib/joystick.test.ts
new file mode 100644
index 0000000000..dcfc114ee1
--- /dev/null
+++ b/webapp/src/lib/joystick.test.ts
@@ -0,0 +1,48 @@
+import { describe, it, expect } from "vitest";
+import { computeDrive } from "./joystick";
+
+const R = 100;
+
+describe("computeDrive", () => {
+  it("returns zero at center", () => {
+    expect(computeDrive(0, 0, R)).toEqual({ vx: 0, turn: 0, knobX: 0, knobY: 0 });
+  });
+
+  it("full up = full forward", () => {
+    const d = computeDrive(0, -R, R);
+    expect(d.vx).toBeCloseTo(1, 5);
+    expect(d.turn).toBeCloseTo(0, 5);
+  });
+
+  it("full down = full backward", () => {
+    expect(computeDrive(0, R, R).vx).toBeCloseTo(-1, 5);
+  });
+
+  it("right = turn right, no forward", () => {
+    const d = computeDrive(R, 0, R);
+    expect(d.turn).toBeCloseTo(1, 5);
+    expect(d.vx).toBeCloseTo(0, 5);
+  });
+
+  it("left = turn left", () => {
+    expect(computeDrive(-R, 0, R).turn).toBeCloseTo(-1, 5);
+  });
+
+  it("clamps magnitude beyond the radius", () => {
+    const d = computeDrive(0, -2 * R, R);
+    expect(d.vx).toBeCloseTo(1, 5); // not 2
+    expect(d.knobY).toBeCloseTo(-R, 5); // knob pinned to the rim
+  });
+
+  it("applies a dead zone near center", () => {
+    const d = computeDrive(0, -0.05 * R, R); // 5% < 12% dead zone
+    expect(d.vx).toBe(0);
+    expect(d.turn).toBe(0);
+  });
+
+  it("normalizes a diagonal to the rim", () => {
+    const d = computeDrive(R, -R, R); // distance = R*sqrt(2)
+    expect(d.vx).toBeCloseTo(Math.SQRT1_2, 3);
+    expect(d.turn).toBeCloseTo(Math.SQRT1_2, 3);
+  });
+});
diff --git a/webapp/src/lib/joystick.ts b/webapp/src/lib/joystick.ts
new file mode 100644
index 0000000000..ed51686d4c
--- /dev/null
+++ b/webapp/src/lib/joystick.ts
@@ -0,0 +1,39 @@
+export interface DriveVector {
+  /** forward (+) / backward (−), in [-1, 1] */
+  vx: number;
+  /** turn right (+) / left (−), in [-1, 1] */
+  turn: number;
+  /** clamped knob offset from center, in px (for rendering) */
+  knobX: number;
+  knobY: number;
+}
+
+/**
+ * Map a pointer offset (dx, dy from the pad center, in px) to a drive vector.
+ * Up = forward, right = turn right. Clamps to the pad rim and applies a dead zone.
+ */
+export function computeDrive(
+  dx: number,
+  dy: number,
+  radius: number,
+  deadZone = 0.12,
+): DriveVector {
+  const dist = Math.hypot(dx, dy);
+
+  // clamp the knob to the rim
+  const scale = dist > radius ? radius / dist : 1;
+  const knobX = dx * scale;
+  const knobY = dy * scale;
+
+  const magnitude = Math.min(dist / radius, 1);
+  if (magnitude < deadZone) {
+    return { vx: 0, turn: 0, knobX, knobY };
+  }
+
+  return {
+    vx: -knobY / radius, // screen y grows downward, so up is forward
+    turn: knobX / radius,
+    knobX,
+    knobY,
+  };
+}
diff --git a/webapp/src/lib/speech.ts b/webapp/src/lib/speech.ts
new file mode 100644
index 0000000000..c763ffd17d
--- /dev/null
+++ b/webapp/src/lib/speech.ts
@@ -0,0 +1,147 @@
+/**
+ * Speaks the agent's replies on the phone via OpenAI TTS (/api/tts) played
+ * through a single <audio> element. Primary feedback channel for blind users.
+ *
+ * Why not the browser SpeechSynthesis API: iOS silently drops speech that isn't
+ * started inside a user gesture, and agent replies arrive asynchronously over
+ * SSE — so they never spoke. An <audio> element instead can be "unlocked" by
+ * one tap (unlockSpeech) and then play network audio asynchronously, which iOS
+ * allows. unlockSpeech MUST run synchronously inside a tap handler.
+ */
+
+let audioEl: HTMLAudioElement | null = null;
+
+// Pending utterances, played one at a time so replies don't cut each other off.
+let queue: string[] = [];
+let playing = false;
+let inflight: AbortController | null = null;
+let endCurrent: (() => void) | null = null;
+
+export function isSpeechSupported(): boolean {
+  return typeof window !== "undefined" && typeof Audio !== "undefined";
+}
+
+function getAudio(): HTMLAudioElement {
+  if (!audioEl) {
+    audioEl = new Audio();
+    audioEl.preload = "auto";
+  }
+  return audioEl;
+}
+
+/** A short, valid, silent WAV data URI — played in-gesture to unlock iOS audio. */
+let silenceUri: string | null = null;
+function silentWav(ms = 60, sampleRate = 8000): string {
+  if (silenceUri) return silenceUri;
+  const samples = Math.floor((sampleRate * ms) / 1000);
+  const dataSize = samples; // 8-bit mono
+  const buf = new ArrayBuffer(44 + dataSize);
+  const view = new DataView(buf);
+  const str = (off: number, s: string) => {
+    for (let i = 0; i < s.length; i++) view.setUint8(off + i, s.charCodeAt(i));
+  };
+  str(0, "RIFF");
+  view.setUint32(4, 36 + dataSize, true);
+  str(8, "WAVE");
+  str(12, "fmt ");
+  view.setUint32(16, 16, true); // PCM header size
+  view.setUint16(20, 1, true); // PCM
+  view.setUint16(22, 1, true); // mono
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate, true); // byte rate (1 byte/sample)
+  view.setUint16(32, 1, true); // block align
+  view.setUint16(34, 8, true); // bits per sample
+  str(36, "data");
+  view.setUint32(40, dataSize, true);
+  const bytes = new Uint8Array(buf);
+  for (let i = 0; i < dataSize; i++) bytes[44 + i] = 128; // 8-bit silence
+  let binary = "";
+  for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
+  silenceUri = "data:audio/wav;base64," + btoa(binary);
+  return silenceUri;
+}
+
+/**
+ * Must be called synchronously inside a user gesture (tap). Plays a brief
+ * silent clip to unlock the <audio> element so later async play() calls (the
+ * agent's replies) produce sound on iOS.
+ */
+export function unlockSpeech(): void {
+  if (!isSpeechSupported()) return;
+  try {
+    const a = getAudio();
+    a.src = silentWav();
+    const p = a.play();
+    if (p && typeof p.then === "function") p.then(() => {}, () => {});
+  } catch {
+    /* ignore */
+  }
+}
+
+export function speak(text: string): void {
+  if (!isSpeechSupported() || !text.trim()) return;
+  queue.push(text.trim());
+  void pump();
+}
+
+async function pump(): Promise<void> {
+  if (playing) return;
+  const text = queue.shift();
+  if (text === undefined) return;
+  playing = true;
+
+  try {
+    const ac = new AbortController();
+    inflight = ac;
+    const res = await fetch("/api/tts", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ text }),
+      signal: ac.signal,
+    });
+    inflight = null;
+    if (!res.ok) throw new Error(`tts ${res.status}`);
+
+    const url = URL.createObjectURL(await res.blob());
+    const a = getAudio();
+    a.src = url;
+
+    await new Promise<void>((resolve) => {
+      const finish = () => {
+        endCurrent = null;
+        a.onended = null;
+        a.onerror = null;
+        resolve();
+      };
+      endCurrent = finish;
+      a.onended = finish;
+      a.onerror = finish;
+      const p = a.play();
+      if (p && typeof p.then === "function") p.then(() => {}, finish);
+    });
+
+    URL.revokeObjectURL(url);
+  } catch {
+    inflight = null;
+    /* aborted or failed — fall through to the next item */
+  } finally {
+    playing = false;
+    void pump();
+  }
+}
+
+export function cancelSpeech(): void {
+  queue = [];
+  inflight?.abort();
+  inflight = null;
+  endCurrent?.(); // resolve the in-progress pump so it stops cleanly
+  if (audioEl) {
+    try {
+      audioEl.pause();
+      audioEl.removeAttribute("src");
+      audioEl.load();
+    } catch {
+      /* ignore */
+    }
+  }
+}
diff --git a/webapp/src/lib/stt.test.ts b/webapp/src/lib/stt.test.ts
new file mode 100644
index 0000000000..02d002e210
--- /dev/null
+++ b/webapp/src/lib/stt.test.ts
@@ -0,0 +1,16 @@
+import { describe, it, expect } from "vitest";
+import { wrapUserSpeech } from "./stt";
+
+describe("wrapUserSpeech", () => {
+  it("wraps the transcript in <user_speech> tags", () => {
+    expect(wrapUserSpeech("stand up")).toBe(
+      "<user_speech>stand up</user_speech>",
+    );
+  });
+
+  it("trims surrounding whitespace before wrapping", () => {
+    expect(wrapUserSpeech("  go to the kitchen \n")).toBe(
+      "<user_speech>go to the kitchen</user_speech>",
+    );
+  });
+});
diff --git a/webapp/src/lib/stt.ts b/webapp/src/lib/stt.ts
new file mode 100644
index 0000000000..1c76630733
--- /dev/null
+++ b/webapp/src/lib/stt.ts
@@ -0,0 +1,204 @@
+/** Wrap a transcript so the agent knows it came from voice (WEBAPP-BRIEF §5). */
+export function wrapUserSpeech(text: string): string {
+  return `<user_speech>${text.trim()}</user_speech>`;
+}
+
+export interface SttCallbacks {
+  /** live partial transcript (Web Speech only) */
+  onInterim?: (text: string) => void;
+  /** final transcript to submit */
+  onFinal: (text: string) => void;
+  onError?: (err: unknown) => void;
+}
+
+export interface SttProvider {
+  readonly mode: "webspeech" | "upload";
+  isSupported(): boolean;
+  start(cb: SttCallbacks): void | Promise<void>;
+  stop(): void;
+}
+
+/* ---- minimal Web Speech API typings (not in lib.dom for all targets) ---- */
+interface SpeechRecognitionAlternativeLike {
+  transcript: string;
+}
+interface SpeechRecognitionResultLike
+  extends ArrayLike<SpeechRecognitionAlternativeLike> {
+  isFinal: boolean;
+}
+interface SpeechRecognitionEventLike {
+  results: ArrayLike<SpeechRecognitionResultLike>;
+}
+/** onerror payload — `.error` is a code like "no-speech" | "network" | "not-allowed". */
+export interface SpeechRecognitionErrorLike {
+  error: string;
+  message?: string;
+}
+interface SpeechRecognitionLike {
+  lang: string;
+  continuous: boolean;
+  interimResults: boolean;
+  onresult: ((e: SpeechRecognitionEventLike) => void) | null;
+  onerror: ((e: SpeechRecognitionErrorLike) => void) | null;
+  onend: (() => void) | null;
+  start(): void;
+  stop(): void;
+  abort(): void;
+}
+type SpeechRecognitionCtor = new () => SpeechRecognitionLike;
+
+declare global {
+  interface Window {
+    SpeechRecognition?: SpeechRecognitionCtor;
+    webkitSpeechRecognition?: SpeechRecognitionCtor;
+  }
+}
+
+/** Live, on-device transcription. The active provider for the design-first build. */
+class WebSpeechStt implements SttProvider {
+  readonly mode = "webspeech" as const;
+  private rec: SpeechRecognitionLike | null = null;
+  private latest = "";
+
+  private ctor(): SpeechRecognitionCtor | undefined {
+    if (typeof window === "undefined") return undefined;
+    return window.SpeechRecognition ?? window.webkitSpeechRecognition;
+  }
+
+  isSupported() {
+    return !!this.ctor();
+  }
+
+  /**
+   * Detach handlers and abort any live session so it can't keep the mic open
+   * or fire stale callbacks. Prevents overlapping sessions when the button is
+   * pressed again before the previous recognition has fully ended.
+   */
+  private teardown() {
+    const rec = this.rec;
+    if (!rec) return;
+    this.rec = null;
+    rec.onresult = null;
+    rec.onerror = null;
+    rec.onend = null;
+    try {
+      rec.abort();
+    } catch {
+      /* already stopped */
+    }
+  }
+
+  start(cb: SttCallbacks) {
+    const Ctor = this.ctor();
+    if (!Ctor) {
+      cb.onError?.(new Error("SpeechRecognition unsupported"));
+      return;
+    }
+    this.teardown(); // replace any previous session cleanly
+
+    const rec = new Ctor();
+    this.rec = rec;
+    this.latest = "";
+    rec.lang = "en-US";
+    rec.continuous = true;
+    rec.interimResults = true;
+    rec.onresult = (e) => {
+      let full = "";
+      for (let i = 0; i < e.results.length; i++) {
+        full += e.results[i][0]?.transcript ?? "";
+      }
+      this.latest = full;
+      cb.onInterim?.(full.trim());
+    };
+    rec.onerror = (ev) => cb.onError?.(ev);
+    rec.onend = () => {
+      if (this.rec !== rec) return; // superseded by a newer session — ignore
+      this.rec = null;
+      cb.onFinal(this.latest.trim());
+    };
+    try {
+      rec.start();
+    } catch (e) {
+      // e.g. iOS throws if a prior session is still tearing down
+      this.teardown();
+      cb.onError?.(e);
+    }
+  }
+
+  stop() {
+    try {
+      this.rec?.stop();
+    } catch {
+      /* not running */
+    }
+  }
+}
+
+/**
+ * Production path: record audio/mp4 and POST to /upload_audio for transcription.
+ * Written and ready; enable with NEXT_PUBLIC_STT=upload for the real iPhone demo.
+ */
+class UploadStt implements SttProvider {
+  readonly mode = "upload" as const;
+  private mr: MediaRecorder | null = null;
+  private stream: MediaStream | null = null;
+  private chunks: Blob[] = [];
+
+  constructor(
+    private upload: (blob: Blob, filename?: string) => Promise<string>,
+  ) {}
+
+  isSupported() {
+    return (
+      typeof window !== "undefined" &&
+      typeof MediaRecorder !== "undefined" &&
+      !!navigator.mediaDevices?.getUserMedia
+    );
+  }
+
+  async start(cb: SttCallbacks) {
+    try {
+      this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    } catch (e) {
+      cb.onError?.(e);
+      return;
+    }
+    // iOS Safari requires audio/mp4, not audio/webm (WEBAPP-BRIEF §6).
+    const mime = MediaRecorder.isTypeSupported("audio/mp4")
+      ? "audio/mp4"
+      : "audio/webm";
+    const mr = new MediaRecorder(this.stream, { mimeType: mime });
+    this.mr = mr;
+    this.chunks = [];
+    mr.ondataavailable = (e) => {
+      if (e.data.size) this.chunks.push(e.data);
+    };
+    mr.onstop = async () => {
+      this.stream?.getTracks().forEach((t) => t.stop());
+      const blob = new Blob(this.chunks, { type: mime });
+      try {
+        const text = await this.upload(
+          blob,
+          mime.includes("mp4") ? "recording.mp4" : "recording.webm",
+        );
+        cb.onFinal(text.trim());
+      } catch (e) {
+        cb.onError?.(e);
+      }
+    };
+    cb.onInterim?.("…");
+    mr.start();
+  }
+
+  stop() {
+    if (this.mr && this.mr.state !== "inactive") this.mr.stop();
+  }
+}
+
+/** Pick the provider from NEXT_PUBLIC_STT (default: webspeech). */
+export function createSttProvider(
+  upload: (blob: Blob, filename?: string) => Promise<string>,
+): SttProvider {
+  const mode = process.env.NEXT_PUBLIC_STT ?? "webspeech";
+  return mode === "upload" ? new UploadStt(upload) : new WebSpeechStt();
+}
diff --git a/webapp/src/lib/types.ts b/webapp/src/lib/types.ts
new file mode 100644
index 0000000000..55ec34e40d
--- /dev/null
+++ b/webapp/src/lib/types.ts
@@ -0,0 +1,42 @@
+export type Mode = "voice" | "manual";
+
+/** Documented Unitree sport commands (WEBAPP-BRIEF §5). */
+export type UnitreeCommand =
+  | "StandUp"
+  | "StandDown"
+  | "Sit"
+  | "Hello"
+  | "Stretch"
+  | "Dance1"
+  | "Dance2";
+
+/** A quick-action button. `command` is the natural-language query sent to the
+ *  agent via /submit_query (e.g. "sit", "stand up") — NOT the direct sport command. */
+export interface QuickAction {
+  label: string;
+  command: string;
+}
+
+/** Continuous drive vector from the joystick, components roughly in [-1, 1]. */
+export interface MoveCommand {
+  /** forward (+) / backward (−) */
+  vx: number;
+  /** strafe — reserved, unused for now */
+  vy: number;
+  /** yaw: turn left (−) / right (+) */
+  turn: number;
+}
+
+export type AgentPhase = "idle" | "running" | "error";
+
+/** Snapshot pushed over the `agent_state` SSE stream. */
+export interface AgentState {
+  phase: AgentPhase;
+  task?: string;
+  skill?: string;
+  /** 0..1 */
+  progress?: number;
+  message?: string;
+  /** set when a stream frame couldn't be parsed as JSON */
+  raw?: string;
+}
diff --git a/webapp/src/pages/_app.tsx b/webapp/src/pages/_app.tsx
new file mode 100644
index 0000000000..2c206495d9
--- /dev/null
+++ b/webapp/src/pages/_app.tsx
@@ -0,0 +1,20 @@
+import "@/styles/globals.css";
+import type { AppProps } from "next/app";
+import Head from "next/head";
+
+export default function App({ Component, pageProps }: AppProps) {
+  return (
+    <>
+      <Head>
+        {/* viewport-fit=cover lets the gradient extend under the notch/home bar.
+            maximum-scale=1 stops iOS auto-zoom when focusing the (hidden) text input. */}
+        <meta
+          name="viewport"
+          content="width=device-width, initial-scale=1, maximum-scale=1, viewport-fit=cover"
+        />
+        <meta name="theme-color" content="#0a0b0e" />
+      </Head>
+      <Component {...pageProps} />
+    </>
+  );
+}
diff --git a/webapp/src/pages/_document.tsx b/webapp/src/pages/_document.tsx
new file mode 100644
index 0000000000..7d9f77e698
--- /dev/null
+++ b/webapp/src/pages/_document.tsx
@@ -0,0 +1,26 @@
+import { Html, Head, Main, NextScript } from "next/document";
+
+export default function Document() {
+  return (
+    <Html lang="en">
+      <Head>
+        {/* PWA / Add-to-Home-Screen (no service worker — iOS doesn't need one for A2HS) */}
+        <link rel="manifest" href="/manifest.json" />
+        <link rel="apple-touch-icon" href="/icons/apple-touch-icon.png" />
+        <link rel="icon" href="/icons/icon.svg" type="image/svg+xml" />
+        <meta name="apple-mobile-web-app-capable" content="yes" />
+        <meta name="mobile-web-app-capable" content="yes" />
+        <meta name="apple-mobile-web-app-title" content="Goldie" />
+        <meta
+          name="apple-mobile-web-app-status-bar-style"
+          content="black-translucent"
+        />
+        <meta name="description" content="Voice and joystick control for Goldie." />
+      </Head>
+      <body>
+        <Main />
+        <NextScript />
+      </body>
+    </Html>
+  );
+}
diff --git a/webapp/src/pages/api/hello.ts b/webapp/src/pages/api/hello.ts
new file mode 100644
index 0000000000..ea77e8f35b
--- /dev/null
+++ b/webapp/src/pages/api/hello.ts
@@ -0,0 +1,13 @@
+// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
+import type { NextApiRequest, NextApiResponse } from "next";
+
+type Data = {
+  name: string;
+};
+
+export default function handler(
+  req: NextApiRequest,
+  res: NextApiResponse<Data>,
+) {
+  res.status(200).json({ name: "John Doe" });
+}
diff --git a/webapp/src/pages/api/log.ts b/webapp/src/pages/api/log.ts
new file mode 100644
index 0000000000..653c3fef13
--- /dev/null
+++ b/webapp/src/pages/api/log.ts
@@ -0,0 +1,30 @@
+import type { NextApiRequest, NextApiResponse } from "next";
+
+/**
+ * Dev-only sink for speech input. The browser POSTs here so the entry prints
+ * server-side — i.e. in the terminal where you run `yarn dev` / `npm run dev`
+ * (works even when speaking on the phone over an ngrok tunnel).
+ */
+export default function handler(req: NextApiRequest, res: NextApiResponse) {
+  if (req.method !== "POST") {
+    res.status(405).end();
+    return;
+  }
+  const body = req.body ?? {};
+  if (body.event === "agent-msg") {
+    // Every frame the frontend parses, before any speak/dedup decision.
+    console.log(
+      `[goldie] 📩 agent-msg [kind=${body.kind ?? "?"}]: ${JSON.stringify(body.text ?? "")}`,
+    );
+  } else if (body.event === "tts") {
+    console.log(
+      `[goldie] 🔊 tts(${body.spoke ? "SPOKEN" : "muted"}, kind=${body.kind ?? "?"}): ${JSON.stringify(body.text ?? "")}`,
+    );
+  } else if (body.event === "stt-error") {
+    console.log(`[goldie] 🎤⚠️  stt error: ${body.code ?? "unknown"}`);
+  } else {
+    console.log(`\n[goldie] 🎤 speech: ${JSON.stringify(body.transcript ?? "")}`);
+    if (body.payload) console.log(`[goldie]    → submit_query  query=${body.payload}`);
+  }
+  res.status(204).end();
+}
diff --git a/webapp/src/pages/api/mock/[...path].ts b/webapp/src/pages/api/mock/[...path].ts
new file mode 100644
index 0000000000..2b23dac6bd
--- /dev/null
+++ b/webapp/src/pages/api/mock/[...path].ts
@@ -0,0 +1,101 @@
+import type { NextApiRequest, NextApiResponse } from "next";
+
+// SSE must not be size-limited or buffered.
+export const config = {
+  api: { bodyParser: { sizeLimit: "1mb" }, responseLimit: false },
+};
+
+type Run = { startedAt: number } | null;
+
+// Module-level state persists across requests within the single `next dev`
+// process — enough to simulate "after a query, the agent replies for a bit".
+let run: Run = null;
+
+// Scripted agent_responses (mirrors the real backend's typed JSON envelopes:
+// kind "ai" = spoken reply, kind "tool" = status that is shown but not spoken).
+const SCRIPT: { at: number; data: string }[] = [
+  { at: 300, data: JSON.stringify({ kind: "ai", text: "On it." }) },
+  { at: 1400, data: JSON.stringify({ kind: "tool", text: "Navigation goal reached" }) },
+  { at: 2400, data: JSON.stringify({ kind: "ai", text: "Done — what would you like next?" }) },
+];
+
+const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
+
+export default async function handler(
+  req: NextApiRequest,
+  res: NextApiResponse,
+) {
+  const path = (Array.isArray(req.query.path) ? req.query.path : [req.query.path])
+    .filter(Boolean)
+    .join("/");
+
+  // --- SSE: agent_responses stream (matches the real backend key) ---
+  if (path === "text_stream/agent_responses" && req.method === "GET") {
+    res.writeHead(200, {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-store, no-transform",
+      Connection: "keep-alive",
+    });
+    let closed = false;
+    req.on("close", () => {
+      closed = true;
+    });
+    let seenStart = 0;
+    let idx = 0;
+    while (!closed && !res.writableEnded) {
+      if (run) {
+        if (run.startedAt !== seenStart) {
+          seenStart = run.startedAt;
+          idx = 0;
+        }
+        const elapsed = Date.now() - run.startedAt;
+        while (idx < SCRIPT.length && SCRIPT[idx].at <= elapsed) {
+          res.write(`data: ${SCRIPT[idx].data}\n\n`);
+          idx++;
+        }
+        if (idx >= SCRIPT.length) run = null;
+      } else {
+        res.write(`event: ping\ndata: \n\n`);
+      }
+      await sleep(400);
+    }
+    res.end();
+    return;
+  }
+
+  if (path === "text_streams" && req.method === "GET") {
+    return res.status(200).json({ streams: ["agent_responses"] });
+  }
+
+  if (path === "unitree/status" && req.method === "GET") {
+    return res.status(200).json({ status: "online", service: "unitree" });
+  }
+
+  if (path === "submit_query" && req.method === "POST") {
+    run = { startedAt: Date.now() };
+    return res.status(200).json({ ok: true });
+  }
+
+  if (path === "upload_audio" && req.method === "POST") {
+    return res.status(200).json({ text: "stand up please" });
+  }
+
+  if (path === "unitree/command" && req.method === "POST") {
+    return res
+      .status(200)
+      .json({ ok: true, command: req.body?.command ?? null });
+  }
+
+  if (path === "unitree/move" && req.method === "POST") {
+    return res.status(200).json({ ok: true });
+  }
+
+  if (path === "interrupt" && req.method === "POST") {
+    run = null;
+    return res.status(200).json({ ok: true });
+  }
+
+  return res
+    .status(404)
+    .json({ error: `mock: no route for ${req.method} /${path}` });
+}
diff --git a/webapp/src/pages/api/tts.ts b/webapp/src/pages/api/tts.ts
new file mode 100644
index 0000000000..f273afb208
--- /dev/null
+++ b/webapp/src/pages/api/tts.ts
@@ -0,0 +1,76 @@
+import type { NextApiRequest, NextApiResponse } from "next";
+import OpenAI from "openai";
+
+/**
+ * Server-side proxy to OpenAI text-to-speech (gpt-4o-mini-tts).
+ *
+ * The browser POSTs `{ text }` and gets back an MP3, which the phone plays via
+ * an <audio> element (see src/lib/speech.ts). This runs server-side so the
+ * OPENAI_API_KEY never reaches the client — keep it as OPENAI_API_KEY (NOT
+ * NEXT_PUBLIC_*) in .env.local.
+ *
+ * We use cloud TTS instead of the browser SpeechSynthesis API because iOS
+ * silently drops speech that isn't started inside a user gesture; agent replies
+ * arrive asynchronously over SSE, so they never spoke. An <audio> element,
+ * once unlocked by one tap, plays async audio reliably on iOS.
+ */
+
+// MP3 for a sentence is tiny, but don't let Next cap the response.
+export const config = { api: { responseLimit: false } };
+
+const MODEL = process.env.OPENAI_TTS_MODEL ?? "gpt-4o-mini-tts";
+const VOICE = process.env.OPENAI_TTS_VOICE ?? "coral"; // warm, pleasant female voice
+// gpt-4o-mini-tts is steerable via `instructions` (the numeric `speed` param is
+// not honored by this model), so we set the pace/tone here.
+const INSTRUCTIONS =
+  process.env.OPENAI_TTS_INSTRUCTIONS ??
+  "Speak in a warm, friendly, upbeat female voice at a brisk, slightly faster-than-normal pace. Sound natural and clear.";
+
+let client: OpenAI | null = null;
+function getClient(apiKey: string): OpenAI {
+  if (!client) client = new OpenAI({ apiKey });
+  return client;
+}
+
+export default async function handler(
+  req: NextApiRequest,
+  res: NextApiResponse,
+) {
+  if (req.method !== "POST") {
+    res.status(405).json({ error: "method not allowed" });
+    return;
+  }
+
+  const apiKey = process.env.OPENAI_API_KEY;
+  if (!apiKey) {
+    res.status(500).json({ error: "OPENAI_API_KEY is not set" });
+    return;
+  }
+
+  const text = String(req.body?.text ?? "").trim();
+  if (!text) {
+    res.status(400).json({ error: "missing text" });
+    return;
+  }
+
+  try {
+    const speech = await getClient(apiKey).audio.speech.create({
+      model: MODEL,
+      voice: VOICE,
+      input: text,
+      instructions: INSTRUCTIONS,
+      response_format: "mp3",
+    });
+
+    const audio = Buffer.from(await speech.arrayBuffer());
+    res.setHeader("Content-Type", "audio/mpeg");
+    res.setHeader("Cache-Control", "no-store");
+    res.status(200).send(audio);
+  } catch (err) {
+    const status =
+      err && typeof err === "object" && "status" in err
+        ? Number((err as { status: unknown }).status) || 502
+        : 502;
+    res.status(status).json({ error: "tts failed", detail: String(err) });
+  }
+}
diff --git a/webapp/src/pages/index.tsx b/webapp/src/pages/index.tsx
new file mode 100644
index 0000000000..0e5ff80f33
--- /dev/null
+++ b/webapp/src/pages/index.tsx
@@ -0,0 +1,191 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import Head from "next/head";
+import type { Mode, MoveCommand, QuickAction } from "@/lib/types";
+import type { AgentMessage } from "@/lib/agentMessage";
+import * as dimos from "@/lib/dimos";
+import { wrapUserSpeech } from "@/lib/stt";
+import { devLog } from "@/lib/devlog";
+import { speak, cancelSpeech, unlockSpeech } from "@/lib/speech";
+import { useAgentFeed } from "@/hooks/useAgentFeed";
+import { useStt } from "@/hooks/useStt";
+import { useStatus } from "@/hooks/useStatus";
+import { useTeleop } from "@/hooks/useTeleop";
+import Header from "@/components/Header";
+import ModeToggle from "@/components/ModeToggle";
+import VoicePanel from "@/components/VoicePanel";
+import ManualPanel from "@/components/ManualPanel";
+
+const ZERO: MoveCommand = { vx: 0, vy: 0, turn: 0 };
+
+export default function Home() {
+  const [mode, setMode] = useState<Mode>("voice");
+
+  // Speak the agent's replies on the phone (primary feedback for blind users).
+  const [ttsEnabled, setTtsEnabled] = useState(true);
+  const ttsEnabledRef = useRef(true);
+  useEffect(() => {
+    ttsEnabledRef.current = ttsEnabled;
+  }, [ttsEnabled]);
+
+  // Barge-in: while the user is holding to speak, suppress incoming TTS so the
+  // previous turn's reply doesn't talk over them. Synced from `recording` below.
+  const recordingRef = useRef(false);
+
+  const handleAgentMessage = useCallback((m: AgentMessage) => {
+    // Speak only final `ai` replies, and not while the user is mid-utterance.
+    const willSpeak =
+      ttsEnabledRef.current && m.kind === "ai" && !recordingRef.current;
+    devLog({ event: "tts", spoke: willSpeak, kind: m.kind, text: m.text });
+    if (willSpeak) speak(m.text);
+  }, []);
+
+  const { messages, active, markIdle } = useAgentFeed({
+    onMessage: handleAgentMessage,
+  });
+  const teleop = useTeleop();
+  const [drive, setDrive] = useState<MoveCommand>(ZERO);
+  const driveRef = useRef<MoveCommand>(ZERO);
+  const tickRef = useRef<number | null>(null);
+  const [busy, setBusy] = useState(false);
+  const [actionError, setActionError] = useState<string | null>(null);
+  const connected = useStatus();
+
+  const submitSpeech = useCallback(async (text: string) => {
+    const payload = wrapUserSpeech(text);
+    devLog({ transcript: text, payload });
+    setActionError(null);
+    try {
+      await dimos.submitQuery(payload);
+    } catch (err) {
+      console.error("submit failed", err);
+      setActionError("Couldn't reach the robot — try again.");
+    }
+  }, []);
+
+  const { recording, transcript, error, start, stop } = useStt(submitSpeech);
+  useEffect(() => {
+    recordingRef.current = recording;
+  }, [recording]);
+
+  // Pressing the talk button is a user gesture: unlock iOS speech and stop any
+  // current playback so it doesn't talk over the new command.
+  const handleStart = useCallback(() => {
+    setActionError(null); // clear any stale "couldn't reach the robot" notice
+    cancelSpeech(); // stop any current playback first…
+    unlockSpeech(); // …then prime within this gesture
+    start();
+  }, [start]);
+
+  const toggleTts = useCallback(() => {
+    // side effects outside the state updater (StrictMode double-invokes updaters)
+    const next = !ttsEnabledRef.current;
+    setTtsEnabled(next);
+    if (next) {
+      unlockSpeech();
+      speak("Speech on"); // audible confirmation, spoken inside this tap (gesture)
+    } else {
+      cancelSpeech();
+    }
+  }, []);
+
+  async function handleAction(a: QuickAction) {
+    setBusy(true);
+    setActionError(null);
+    try {
+      // Route through the agent (so it narrates "I'm standing up…"), same path
+      // as a voice command — but no <user_speech> wrap since this is a button.
+      await dimos.submitQuery(a.command);
+    } catch (err) {
+      console.error("command failed", err);
+      setActionError("Couldn't reach the robot — try again.");
+    } finally {
+      setBusy(false);
+    }
+  }
+
+  async function handleInterrupt() {
+    markIdle();
+    cancelSpeech();
+    try {
+      await dimos.interrupt();
+    } catch (err) {
+      console.error("interrupt failed", err);
+    }
+  }
+
+  // Joystick: reflect every frame in the readout, emit move_command over the
+  // teleop socket, re-emit at ~15Hz while held, zero twist on release.
+  const handleMove = useCallback(
+    (m: MoveCommand) => {
+      driveRef.current = m;
+      setDrive(m);
+      if (tickRef.current === null) {
+        teleop.drive(m);
+        tickRef.current = window.setInterval(
+          () => teleop.drive(driveRef.current),
+          66,
+        );
+      }
+    },
+    [teleop.drive],
+  );
+
+  const handleDriveEnd = useCallback(() => {
+    if (tickRef.current !== null) {
+      clearInterval(tickRef.current);
+      tickRef.current = null;
+    }
+    driveRef.current = ZERO;
+    setDrive(ZERO);
+    teleop.stop();
+  }, [teleop.stop]);
+
+  useEffect(
+    () => () => {
+      if (tickRef.current !== null) clearInterval(tickRef.current);
+    },
+    [],
+  );
+
+  return (
+    <>
+      <Head>
+        <title>Goldie</title>
+      </Head>
+      <main className="mx-auto flex min-h-screen w-full max-w-[440px] flex-col">
+        <Header
+          connected={connected}
+          ttsEnabled={ttsEnabled}
+          onToggleTts={toggleTts}
+        />
+        <ModeToggle mode={mode} onChange={setMode} />
+        <div className="flex flex-1 flex-col px-5 pb-6 pt-3">
+          {mode === "voice" ? (
+            <VoicePanel
+              messages={messages}
+              active={active}
+              recording={recording}
+              transcript={transcript || undefined}
+              error={error || actionError || undefined}
+              busy={busy}
+              onStart={handleStart}
+              onStop={stop}
+              onAction={handleAction}
+              onInterrupt={handleInterrupt}
+            />
+          ) : (
+            <ManualPanel
+              drive={drive}
+              busy={busy}
+              linkConnected={teleop.connected}
+              linkConfigured={teleop.configured}
+              onMove={handleMove}
+              onEnd={handleDriveEnd}
+              onAction={handleAction}
+            />
+          )}
+        </div>
+      </main>
+    </>
+  );
+}
diff --git a/webapp/src/styles/globals.css b/webapp/src/styles/globals.css
new file mode 100644
index 0000000000..eefa01dfdf
--- /dev/null
+++ b/webapp/src/styles/globals.css
@@ -0,0 +1,57 @@
+@import "tailwindcss";
+
+/*
+  Goldie design tokens — refined dark system.
+  One warm gold accent; green reserved for connection, red for Interrupt only.
+  See approved mockup: .superpowers/brainstorm/.../goldie-refined.html
+*/
+@theme {
+  --color-bg-1: #0a0b0e;
+  --color-bg-2: #06070a;
+  --color-surface: #14161b;
+  --color-surface-2: #1b1e25;
+
+  --color-line: rgba(255, 255, 255, 0.07);
+  --color-line-2: rgba(255, 255, 255, 0.11);
+
+  --color-fg: #f3f4f6;
+  --color-muted: #969ba5;
+  --color-faint: #595e68;
+
+  --color-gold: #e3b15e;
+  --color-gold-soft: rgba(227, 177, 94, 0.13);
+  --color-green: #3ecf8e;
+  --color-red: #e15b57;
+  --color-red-soft: rgba(225, 91, 87, 0.1);
+
+  --font-sans: -apple-system, BlinkMacSystemFont, system-ui, "Segoe UI", Roboto,
+    sans-serif;
+  --font-mono: ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;
+}
+
+html,
+body {
+  height: 100%;
+}
+
+body {
+  margin: 0;
+  background: linear-gradient(180deg, var(--color-bg-1), var(--color-bg-2));
+  background-attachment: fixed;
+  color: var(--color-fg);
+  font-family: var(--font-sans);
+  -webkit-font-smoothing: antialiased;
+  /* respect iOS safe areas (notch / home indicator) */
+  padding: env(safe-area-inset-top) env(safe-area-inset-right)
+    env(safe-area-inset-bottom) env(safe-area-inset-left);
+}
+
+/* Touch ergonomics: kill tap-highlight + text selection on controls */
+button,
+[role="button"],
+.no-select {
+  -webkit-tap-highlight-color: transparent;
+  -webkit-user-select: none;
+  user-select: none;
+  touch-action: manipulation;
+}
diff --git a/webapp/tsconfig.json b/webapp/tsconfig.json
new file mode 100644
index 0000000000..10ba618043
--- /dev/null
+++ b/webapp/tsconfig.json
@@ -0,0 +1,29 @@
+{
+  "compilerOptions": {
+    "target": "ES2017",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "react-jsx",
+    "incremental": true,
+    "paths": {
+      "@/*": ["./src/*"]
+    }
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts",
+    ".next/dev/types/**/*.ts",
+    "**/*.mts"
+  ],
+  "exclude": ["node_modules"]
+}
diff --git a/webapp/vitest.config.ts b/webapp/vitest.config.ts
new file mode 100644
index 0000000000..b5310573e6
--- /dev/null
+++ b/webapp/vitest.config.ts
@@ -0,0 +1,14 @@
+import { defineConfig } from "vitest/config";
+import { fileURLToPath } from "node:url";
+
+export default defineConfig({
+  test: {
+    environment: "node",
+    include: ["src/**/*.test.ts"],
+  },
+  resolve: {
+    alias: {
+      "@": fileURLToPath(new URL("./src", import.meta.url)),
+    },
+  },
+});