diff --git a/.gitignore b/.gitignore
index aedee04af7..a3c0a05a4a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -86,3 +86,8 @@ htmlcov/
 
 # Memory2 autorecord
 recording*.db
+recording*.db-wal
+recording*.db-shm
+
+# MuJoCo runtime log
+MUJOCO_LOG.TXT
diff --git a/dimos/agents/skills/gemini_speak_skill.py b/dimos/agents/skills/gemini_speak_skill.py
new file mode 100644
index 0000000000..aecdad9cbe
--- /dev/null
+++ b/dimos/agents/skills/gemini_speak_skill.py
@@ -0,0 +1,145 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A speak skill backed by Google's Gemini TTS API.
+
+Drop-in replacement for ``SpeakSkill`` that reuses ``GOOGLE_API_KEY`` (already
+used for the LLM and embeddings) instead of an OpenAI key, and works on any
+platform with an audio output device. Satisfies ``SpeakSkillSpec``
+(``speak(text, blocking=True) -> str``).
+"""
+
+import threading
+import time
+
+from reactivex import Subject
+
+from dimos.agents.annotation import skill
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.stream.audio.node_output import SounddeviceAudioOutput
+from dimos.stream.audio.tts.node_gemini import GeminiTTSNode
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+class GeminiSpeakSkillConfig(ModuleConfig):
+    # Prebuilt Gemini voice name (e.g. "Kore", "Puck", "Charon", "Aoede").
+    voice: str = "Kore"
+    # Gemini TTS model; must be a `*-preview-tts` model to emit audio.
+    model: str = "gemini-2.5-flash-preview-tts"
+
+
+class GeminiSpeakSkill(Module):
+    config: GeminiSpeakSkillConfig
+    _tts_node: GeminiTTSNode | None = None
+    _audio_output: SounddeviceAudioOutput | None = None
+    _audio_lock: threading.Lock = threading.Lock()
+    _text_subject: "Subject[str] | None" = None
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self._tts_node = GeminiTTSNode(voice=self.config.voice, model=self.config.model)
+        self._audio_output = SounddeviceAudioOutput(sample_rate=24000)
+        self._audio_output.consume_audio(self._tts_node.emit_audio())
+        # Wire the text pipeline ONCE. Each speak() just pushes onto this subject;
+        # the TTS node's own worker drains it FIFO. (Previously consume_text was
+        # called per speak(), spawning a fresh worker thread + subscription every
+        # call — a leak, and the source of the repeated "Starting GeminiTTSNode".)
+        self._text_subject = Subject()
+        self._tts_node.consume_text(self._text_subject)
+
+    @rpc
+    def stop(self) -> None:
+        if self._tts_node:
+            # dispose() clears the queue and joins the worker, so in-flight/queued
+            # speech is torn down here — no separate bg-thread bookkeeping needed.
+            self._tts_node.dispose()
+            self._tts_node = None
+        if self._audio_output:
+            self._audio_output.stop()
+            self._audio_output = None
+        self._text_subject = None
+        super().stop()
+
+    @skill
+    def speak(self, text: str, blocking: bool = False) -> str:
+        """Speak text out loud through the robot's speakers.
+
+        USE THIS TOOL AS OFTEN AS NEEDED. People can't normally see what you say in text, but can hear what you speak.
+
+        Try to be as concise as possible. Remember that speaking takes time, so get to the point quickly.
+
+        Returns immediately by default (the audio plays in the background); pass
+        ``blocking=True`` only when you must wait until the utterance finishes.
+
+        Example usage:
+
+            speak("Hello, I am your robot assistant.")
+        """
+        if self._tts_node is None or self._text_subject is None:
+            return "Error: TTS not initialized"
+
+        if not blocking:
+            # Fire-and-forget: enqueue on the shared pipeline and return now.
+            self._text_subject.on_next(text)
+            return f"Speaking (non-blocking): {text}"
+
+        return self._speak_blocking(text)
+
+    def _speak_blocking(self, text: str) -> str:
+        # Serialize blocking speech so utterances don't overlap on the speaker.
+        with self._audio_lock:
+            if self._tts_node is None or self._text_subject is None:
+                return "Error: TTS not initialized"
+
+            audio_complete = threading.Event()
+
+            # emit_text() re-emits the exact utterance once its synthesis finishes;
+            # match on the text so a concurrent non-blocking speak can't trip us.
+            def on_text(t: str) -> None:
+                if t == text:
+                    audio_complete.set()
+
+            def on_error(_e: Exception) -> None:
+                audio_complete.set()
+
+            subscription = self._tts_node.emit_text().subscribe(
+                on_next=on_text,
+                on_error=on_error,
+            )
+
+            self._text_subject.on_next(text)
+
+            # Gemini synthesis is a network round-trip; allow more headroom than
+            # the local-output time so first-token latency doesn't trip the wait.
+            timeout = max(15, len(text) * 0.1)
+            try:
+                if not audio_complete.wait(timeout=timeout):
+                    logger.warning(f"TTS timeout reached for: {text}")
+                    return f"Warning: TTS timeout while speaking: {text}"
+                # Small delay to ensure buffers flush.
+                time.sleep(0.3)
+                return f"Spoke: {text}"
+            finally:
+                subscription.dispose()
+
+
+if __name__ == "__main__":
+    skill_module = GeminiSpeakSkill()
+    skill_module.start()
+    print(skill_module.speak("Hello, I am your robot assistant, powered by Gemini.", blocking=True))
+    skill_module.stop()
diff --git a/dimos/agents/skills/local_speak_skill.py b/dimos/agents/skills/local_speak_skill.py
new file mode 100644
index 0000000000..1af0e8666a
--- /dev/null
+++ b/dimos/agents/skills/local_speak_skill.py
@@ -0,0 +1,122 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A speak skill backed by the macOS ``say`` command.
+
+Drop-in replacement for ``SpeakSkill`` that requires no OpenAI key and no
+audio pipeline: ``say`` synthesizes and plays the audio itself. macOS only.
+Satisfies ``SpeakSkillSpec`` (``speak(text, blocking=True) -> str``).
+"""
+
+import shutil
+import subprocess
+import threading
+
+from dimos.agents.annotation import skill
+from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+class LocalSpeakSkillConfig(ModuleConfig):
+    # macOS `say` voice name (e.g. "Daniel", "Samantha"); None = system default.
+    voice: str | None = None
+    # Speech rate in words per minute; None = `say` default (~175).
+    rate: int | None = None
+
+
+class LocalSpeakSkill(Module):
+    """Speak text out loud through the local macOS ``say`` command."""
+
+    config: LocalSpeakSkillConfig
+    _bg_threads: list[threading.Thread] = []
+    _bg_threads_lock: threading.Lock = threading.Lock()
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        if shutil.which("say") is None:
+            logger.warning(
+                "LocalSpeakSkill: `say` not found on PATH; speak() will no-op. "
+                "This skill is macOS-only."
+            )
+
+    @rpc
+    def stop(self) -> None:
+        with self._bg_threads_lock:
+            threads = list(self._bg_threads)
+        for t in threads:
+            t.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+        super().stop()
+
+    def _command(self, text: str) -> list[str]:
+        cmd = ["say"]
+        if self.config.voice:
+            cmd += ["-v", self.config.voice]
+        if self.config.rate:
+            cmd += ["-r", str(self.config.rate)]
+        cmd.append(text)
+        return cmd
+
+    @skill
+    def speak(self, text: str, blocking: bool = True) -> str:
+        """Speak text out loud through the robot's speakers.
+
+        USE THIS TOOL AS OFTEN AS NEEDED. People can't normally see what you say in text, but can hear what you speak.
+
+        Try to be as concise as possible. Remember that speaking takes time, so get to the point quickly.
+
+        Example usage:
+
+            speak("Hello, I am your robot assistant.")
+        """
+        if shutil.which("say") is None:
+            return "Error: `say` command not available (LocalSpeakSkill is macOS-only)"
+
+        if not text.strip():
+            return "Error: nothing to speak"
+
+        if not blocking:
+            thread = threading.Thread(
+                target=self._speak_blocking, args=(text,), daemon=True, name="LocalSpeakSkill-bg"
+            )
+            with self._bg_threads_lock:
+                self._bg_threads.append(thread)
+            thread.start()
+            return f"Speaking (non-blocking): {text}"
+
+        return self._speak_blocking(text)
+
+    def _speak_blocking(self, text: str) -> str:
+        try:
+            subprocess.run(self._command(text), check=True)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"`say` failed: {e}")
+            return f"Error: failed to speak: {text}"
+        finally:
+            with self._bg_threads_lock:
+                self._bg_threads = [
+                    t for t in self._bg_threads if t is not threading.current_thread()
+                ]
+        return f"Spoke: {text}"
+
+
+if __name__ == "__main__":
+    skill_module = LocalSpeakSkill()
+    skill_module.start()
+    print(skill_module.speak("Hello, I am your robot assistant, powered by Gemini."))
+    skill_module.stop()
diff --git a/dimos/agents/skills/map_uploader.py b/dimos/agents/skills/map_uploader.py
new file mode 100644
index 0000000000..9cbcde99f1
--- /dev/null
+++ b/dimos/agents/skills/map_uploader.py
@@ -0,0 +1,91 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Periodically upload the 2D occupancy map (global_costmap) to the robomoo app.
+
+Subscribes to `global_costmap`, encodes it as a *value-preserving* grayscale PNG
+(free=0, occupied=1..100, unknown=255 — NOT a pre-colored image), throttles, and
+POSTs it plus grid metadata (resolution, origin, width, height) to robomoo's
+`/api/robot/map`. The web app reads the raw cell values back and applies its own
+colormap + overlays, mapping world→pixel via
+`col = (x - originX) / resolution`, `row = (y - originY) / resolution`.
+
+Env: ROBOMOO_URL, ROBOT_INGEST_TOKEN.
+"""
+
+import os
+import time
+
+import cv2
+import numpy as np
+import httpx
+
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In
+from dimos.msgs.nav_msgs.OccupancyGrid import OccupancyGrid
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+class MapUploaderConfig(ModuleConfig):
+    robomoo_url: str = os.getenv("ROBOMOO_URL", "")
+    ingest_token: str = os.getenv("ROBOT_INGEST_TOKEN", "")
+    min_period_s: float = 5.0  # throttle: at most one upload every N seconds
+
+
+class MapUploader(Module):
+    config: MapUploaderConfig
+    global_costmap: In[OccupancyGrid]
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self._last = 0.0
+        self.global_costmap.subscribe(self._on_costmap)
+
+    def _on_costmap(self, grid: OccupancyGrid) -> None:
+        now = time.monotonic()
+        if now - self._last < self.config.min_period_s:
+            return
+        url = self.config.robomoo_url
+        token = self.config.ingest_token
+        if not url or not token:
+            return
+        self._last = now
+
+        try:
+            # Value-preserving encoding: free/occupied 0..100 stay as-is, unknown
+            # (-1) → 255. The web recolors from these raw values, so we never bake
+            # a colormap into the upload. (H, W) uint8 → grayscale PNG.
+            enc = np.where(grid.grid == -1, 255, np.clip(grid.grid, 0, 100)).astype(np.uint8)
+            ok, buf = cv2.imencode(".png", enc)
+            if not ok:
+                return
+            httpx.post(
+                f"{url.rstrip('/')}/api/robot/map",
+                headers={"Authorization": f"Bearer {token}"},
+                files={"file": ("map.png", buf.tobytes(), "image/png")},
+                data={
+                    "resolution": str(grid.resolution),
+                    "originX": str(grid.origin.position.x),
+                    "originY": str(grid.origin.position.y),
+                    "width": str(grid.width),
+                    "height": str(grid.height),
+                },
+                timeout=30.0,
+            ).raise_for_status()
+        except Exception as e:  # noqa: BLE001 — best-effort; never break the stream
+            logger.warning("map upload failed: %s", e)
diff --git a/dimos/agents/skills/navigation.py b/dimos/agents/skills/navigation.py
index d88bec452e..111ba27700 100644
--- a/dimos/agents/skills/navigation.py
+++ b/dimos/agents/skills/navigation.py
@@ -19,9 +19,11 @@
 
 from dimos.agents.annotation import skill
 from dimos.core.core import rpc
-from dimos.core.module import Module
+from dimos.core.module import Module, ModuleConfig
 from dimos.core.stream import In
 from dimos.models.qwen.bbox import BBox
+from dimos.models.vl.create import create
+from dimos.models.vl.types import VlModelName
 from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
 from dimos.msgs.geometry_msgs.Quaternion import Quaternion
 from dimos.msgs.geometry_msgs.Vector3 import Vector3, make_vector3
@@ -37,7 +39,14 @@
 logger = setup_logger()
 
 
+class Config(ModuleConfig):
+    # VL model used to detect objects in view for semantic navigation.
+    vl_model_name: VlModelName = "qwen"
+
+
 class NavigationSkillContainer(Module):
+    config: Config
+
     _latest_image: Image | None = None
     _latest_odom: PoseStamped | None = None
     _skill_started: bool = False
@@ -54,10 +63,7 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
         self._skill_started = False
 
-        # Here to prevent unwanted imports in the file.
-        from dimos.models.vl.qwen import QwenVlModel
-
-        self._vl_model = QwenVlModel()
+        self._vl_model = create(self.config.vl_model_name)
 
     @rpc
     def start(self) -> None:
diff --git a/dimos/agents/skills/person_follow.py b/dimos/agents/skills/person_follow.py
index 4175898e45..35ec768fa2 100644
--- a/dimos/agents/skills/person_follow.py
+++ b/dimos/agents/skills/person_follow.py
@@ -15,7 +15,7 @@
 import base64
 from threading import Event, RLock, Thread
 import time
-from typing import Any
+from typing import Any, Literal
 
 import numpy as np
 from reactivex.disposable import Disposable
@@ -30,6 +30,7 @@
 from dimos.models.segmentation.edge_tam import EdgeTAMProcessor
 from dimos.models.vl.base import VlModel
 from dimos.models.vl.create import create
+from dimos.models.vl.types import VlModelName
 from dimos.msgs.geometry_msgs.Twist import Twist
 from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
 from dimos.msgs.sensor_msgs.Image import Image, ImageFormat
@@ -46,6 +47,12 @@
 class Config(ModuleConfig):
     camera_info: CameraInfo
     use_3d_navigation: bool = False
+    # VL model used for the initial (and, in "redetect" mode, continuous) detection.
+    vl_model_name: VlModelName = "qwen"
+    # "edgetam": GPU frame-to-frame tracking (default where CUDA exists).
+    # "redetect": periodic re-detection with the VL model (no GPU, runs on Mac).
+    # "auto": pick "edgetam" if CUDA is available, else "redetect".
+    tracking_mode: Literal["auto", "edgetam", "redetect"] = "auto"
 
 
 class PersonFollowSkillContainer(Module):
@@ -66,13 +73,15 @@ class PersonFollowSkillContainer(Module):
 
     _frequency: float = 20.0  # Hz - control loop frequency
     _max_lost_frames: int = 15  # number of frames to wait before declaring person lost
+    _lost_timeout: float = 5.0  # seconds without a re-detection before declaring person lost
+    _redetect_period: float = 0.8  # seconds between VL re-anchors in "redetect" mode
     _patrolling_module_spec: PatrollingModuleSpec
 
     def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
         self._latest_image: Image | None = None
         self._latest_pointcloud: PointCloud2 | None = None
-        self._vl_model: VlModel = create("qwen")
+        self._vl_model: VlModel = create(self.config.vl_model_name)
         self._tracker: EdgeTAMProcessor | None = None
         self._thread: Thread | None = None
         self._should_stop: Event = Event()
@@ -206,9 +215,24 @@ def _on_pointcloud(self, pointcloud: PointCloud2) -> None:
         with self._lock:
             self._latest_pointcloud = pointcloud
 
+    def _resolve_tracking_mode(self) -> Literal["edgetam", "redetect"]:
+        """Resolve the effective tracking mode, auto-selecting by hardware."""
+        mode = self.config.tracking_mode
+        if mode != "auto":
+            return mode
+        try:
+            import torch
+
+            return "edgetam" if torch.cuda.is_available() else "redetect"
+        except Exception:
+            return "redetect"
+
     def _follow_person(
         self, query: str, initial_bbox: BBox, detection_image: Image | None = None
     ) -> str:
+        if self._resolve_tracking_mode() == "redetect":
+            return self._follow_person_redetect(query)
+
         x1, y1, x2, y2 = initial_bbox
         box = np.array([x1, y1, x2, y2], dtype=np.float32)
 
@@ -310,6 +334,145 @@ def _follow_loop(self, tracker: "EdgeTAMProcessor", query: str) -> None:
 
         self._send_stop_reason(query, "it was requested to stop following")
 
+    def _follow_person_redetect(self, query: str) -> str:
+        """Start following using periodic VL re-detection (no GPU tracker).
+
+        Used when no CUDA GPU is available (e.g. on a Mac). The initial detection
+        has already succeeded by the time this is called; the loop keeps
+        re-detecting the person with the VL model and servoing toward the bbox.
+        """
+        self.start_tool("follow_person")
+        self._thread = Thread(target=self._follow_loop_redetect, args=(query,), daemon=True)
+        self._thread.start()
+
+        message = (
+            "Found the person. Starting to follow. You can stop following by calling "
+            "the 'stop_following' tool. You will receive streaming updates."
+        )
+
+        if self._patrolling_module_spec.is_patrolling():
+            message += (
+                " Note: since the robot was patrolling, this has been stopped automatically "
+                "(the equivalent of calling the `stop_patrol` tool call) so you don't have "
+                "to do it. "
+            )
+            self._patrolling_module_spec.stop_patrol()
+
+        return message
+
+    @staticmethod
+    def _create_tracker() -> Any:
+        """Best available OpenCV single-object tracker, or None.
+
+        Prefers CSRT > KCF > MIL and checks both the main and legacy namespaces,
+        so it auto-upgrades to CSRT where opencv-contrib is installed and falls
+        back to MIL (present in base OpenCV) otherwise.
+        """
+        import cv2
+
+        for ns in (cv2, getattr(cv2, "legacy", None)):
+            if ns is None:
+                continue
+            for name in ("TrackerCSRT_create", "TrackerKCF_create", "TrackerMIL_create"):
+                ctor = getattr(ns, name, None)
+                if ctor is not None:
+                    return ctor()
+        return None
+
+    def _follow_loop_redetect(self, query: str) -> None:
+        """Follow without a GPU tracker (e.g. on a Mac).
+
+        A single Gemini detection takes ~1-2s, so running it inline would cap the
+        control loop at ~0.5-1 Hz and the robot would act on stale commands. Instead
+        a background thread re-detects every ``_redetect_period`` seconds to *anchor*
+        a cheap local OpenCV tracker, while this control loop runs at ``_frequency``
+        Hz: each cycle it updates the tracker locally and publishes a fresh twist.
+        """
+        det_lock = RLock()
+        # Latest VL detection handed from the detect thread to the control loop.
+        det: dict[str, Any] = {"bbox": None, "image": None, "seq": 0}
+
+        def _detect_worker() -> None:
+            while not self._should_stop.is_set():
+                with self._lock:
+                    img = self._latest_image
+                if img is None:
+                    self._should_stop.wait(0.05)
+                    continue
+                bbox = get_object_bbox_from_image(self._vl_model, img, query)
+                if bbox is not None:
+                    with det_lock:
+                        det["bbox"] = bbox
+                        det["image"] = img
+                        det["seq"] += 1
+                # The call itself already takes ~1-2s; pace re-anchors on top.
+                self._should_stop.wait(self._redetect_period)
+
+        det_thread = Thread(target=_detect_worker, daemon=True, name="follow-redetect")
+        det_thread.start()
+
+        period = 1.0 / self._frequency
+        next_time = time.monotonic()
+        last_good_time = time.monotonic()
+        tracker: Any = None
+        last_seq = -1
+
+        try:
+            while not self._should_stop.is_set():
+                next_time += period
+
+                with self._lock:
+                    latest_image = self._latest_image
+                if latest_image is None:
+                    self._sleep_until(next_time)
+                    continue
+
+                with det_lock:
+                    seq, det_bbox, det_image = det["seq"], det["bbox"], det["image"]
+
+                bbox = None
+                if seq != last_seq and det_bbox is not None:
+                    # Fresh detection: (re)anchor the local tracker on it.
+                    last_seq = seq
+                    tracker = self._create_tracker()
+                    if tracker is not None:
+                        anchor = det_image.data if det_image is not None else latest_image.data
+                        x1, y1, x2, y2 = det_bbox
+                        try:
+                            tracker.init(anchor, (int(x1), int(y1), int(x2 - x1), int(y2 - y1)))
+                        except Exception as e:  # noqa: BLE001 — tracker init is best-effort
+                            logger.warning("tracker init failed, detection-only: %s", e)
+                            tracker = None
+                    bbox = det_bbox
+                    last_good_time = time.monotonic()
+                elif tracker is not None:
+                    # Between detections: cheap local update keeps the loop at rate.
+                    ok, box = tracker.update(latest_image.data)
+                    if ok:
+                        x, y, w, h = box
+                        bbox = (float(x), float(y), float(x + w), float(y + h))
+                        last_good_time = time.monotonic()
+
+                if bbox is not None:
+                    twist = self._visual_servo.compute_twist(bbox, latest_image.width)
+                    self.cmd_vel.publish(twist)
+                elif time.monotonic() - last_good_time > self._lost_timeout:
+                    self.cmd_vel.publish(Twist.zero())
+                    self._send_stop_reason(query, "lost track of the person")
+                    return
+                # else: no bbox yet but within timeout — hold the last command.
+
+                self._sleep_until(next_time)
+        finally:
+            det_thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+
+        self._send_stop_reason(query, "it was requested to stop following")
+
+    def _sleep_until(self, next_time: float) -> None:
+        sleep_duration = next_time - time.monotonic()
+        if sleep_duration > 0:
+            time.sleep(sleep_duration)
+
     def _stop_following(self) -> None:
         self._should_stop.set()
 
diff --git a/dimos/agents/skills/take_picture_skill.py b/dimos/agents/skills/take_picture_skill.py
new file mode 100644
index 0000000000..1920254987
--- /dev/null
+++ b/dimos/agents/skills/take_picture_skill.py
@@ -0,0 +1,312 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Capture robot camera frames and upload them to the robomoo app.
+
+Skills:
+  - take_picture: one-shot capture of the current frame.
+  - explore_and_capture: start autonomous exploration and take a photo every few
+    seconds as the robot moves (deterministic — drives the cadence itself rather
+    than relying on the agent to keep calling take_picture).
+
+Each frame is JPEG-encoded and POSTed (with the robot's odom pose) to robomoo's
+`/api/robot/frame` (shared-secret bearer token). Configure via env:
+
+    ROBOMOO_URL=https://gateway-...up.railway.app
+    ROBOT_INGEST_TOKEN=<secret matching the server>
+"""
+
+import os
+import threading
+import time
+
+import cv2
+import httpx
+from dimos_lcm.std_msgs import Bool
+
+from dimos.agents.annotation import skill
+from dimos.agents.skill_result import SkillResult
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In, Out
+from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.navigation.frontier_exploration.frontier_explorer_spec import (
+    FrontierExplorerSpec,
+)
+from dimos.robot.unitree.tilt_spec import TiltSpec
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+class TakePictureSkillConfig(ModuleConfig):
+    robomoo_url: str = os.getenv("ROBOMOO_URL", "")
+    ingest_token: str = os.getenv("ROBOT_INGEST_TOKEN", "")
+
+
+class TakePictureSkill(Module):
+    config: TakePictureSkillConfig
+    color_image: In[Image]
+    odom: In[PoseStamped]
+    # Drive frontier exploration (consumed by WavefrontFrontierExplorer).
+    explore_cmd: Out[Bool]
+    stop_explore_cmd: Out[Bool]
+    # Auto-wired (structurally) to WavefrontFrontierExplorer — lets us gate the
+    # capture loop on whether exploration is still running.
+    _explorer: FrontierExplorerSpec
+    # Auto-wired (structurally) to UnitreeSkillContainer.tilt_body — lets
+    # tilt_and_capture aim the body-fixed camera without owning the connection.
+    _tilt: TiltSpec
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self._latest: Image | None = None
+        self._pose: PoseStamped | None = None
+        self._capture_thread: threading.Thread | None = None
+        self._capture_stop = threading.Event()
+        # Outstanding fire-and-forget upload threads from take_picture().
+        self._uploads: list[threading.Thread] = []
+        self._uploads_lock = threading.Lock()
+        self.color_image.subscribe(self._on_image)
+        self.odom.subscribe(self._on_odom)
+
+    @rpc
+    def stop(self) -> None:
+        self._capture_stop.set()
+        thread = getattr(self, "_capture_thread", None)
+        if thread is not None and thread.is_alive():
+            thread.join(timeout=5.0)
+        with self._uploads_lock:
+            uploads = list(self._uploads)
+        for t in uploads:
+            if t.is_alive():
+                t.join(timeout=5.0)
+        super().stop()
+
+    def _on_image(self, image: Image) -> None:
+        self._latest = image
+
+    def _on_odom(self, pose: PoseStamped) -> None:
+        self._pose = pose
+
+    def _configured(self) -> bool:
+        return bool(self.config.robomoo_url and self.config.ingest_token)
+
+    # Encode a given frame and POST it (with pose) to robomoo. Returns the stored
+    # key, or None if there's no frame / encode failed. Raises on HTTP error.
+    def _upload_frame(
+        self,
+        frame: Image | None,
+        pose: PoseStamped | None,
+        note: str = "",
+        label: str = "",
+    ) -> str | None:
+        if frame is None:
+            return None
+        ok, buf = cv2.imencode(".jpg", frame.data)
+        if not ok:
+            return None
+
+        data: dict[str, str] = {}
+        if note:
+            data["note"] = note
+        if label:
+            data["label"] = label
+        if pose is not None:
+            data["poseX"] = str(pose.position.x)
+            data["poseY"] = str(pose.position.y)
+
+        resp = httpx.post(
+            f"{self.config.robomoo_url.rstrip('/')}/api/robot/frame",
+            headers={"Authorization": f"Bearer {self.config.ingest_token}"},
+            files={"file": ("frame.jpg", buf.tobytes(), "image/jpeg")},
+            data=data,
+            timeout=30.0,
+        )
+        resp.raise_for_status()
+        return resp.json().get("key", "")
+
+    # Thin wrapper used by the explore capture loop: upload the latest frame/pose.
+    def _upload_current(self, note: str = "", label: str = "") -> str | None:
+        return self._upload_frame(
+            getattr(self, "_latest", None),
+            getattr(self, "_pose", None),
+            note=note,
+            label=label,
+        )
+
+    @skill
+    def take_picture(self, note: str = "") -> SkillResult:
+        """Capture a photo from the robot's camera and upload it.
+
+        Use whenever the user asks the robot to take or capture a single picture
+        or photo of what it currently sees. `note` is an optional short caption
+        to tag the image with (e.g. "kitchen", "plant"). Returns immediately; the
+        encode + upload happen in the background.
+        """
+        frame = getattr(self, "_latest", None)
+        if frame is None:
+            return SkillResult.fail("NO_FRAME", "No camera frame received yet")
+        if not self._configured():
+            return SkillResult.fail(
+                "NOT_CONFIGURED", "ROBOMOO_URL / ROBOT_INGEST_TOKEN not set"
+            )
+
+        # Snapshot the frame + pose now so the background upload sends exactly what
+        # the robot saw at call time, not a later frame.
+        pose = getattr(self, "_pose", None)
+
+        def _bg() -> None:
+            try:
+                key = self._upload_frame(frame, pose, note=note, label=note)
+                logger.info("take_picture uploaded frame key=%s", key)
+            except Exception:  # noqa: BLE001 — fire-and-forget: failures only logged
+                logger.exception("take_picture upload failed")
+
+        t = threading.Thread(target=_bg, daemon=True, name="take-picture-upload")
+        with self._uploads_lock:
+            # Drop finished threads so the list doesn't grow unbounded.
+            self._uploads = [u for u in self._uploads if u.is_alive()]
+            self._uploads.append(t)
+        t.start()
+        return SkillResult.ok("Picture captured; uploading in the background.")
+
+    @skill
+    def tilt_and_capture(
+        self,
+        pitch_deg: float = -20.0,
+        note: str = "",
+        settle_s: float = 1.0,
+    ) -> SkillResult:
+        """Tilt the body to aim the camera, photograph that view, then re-level.
+
+        Use to photograph things above or below the robot's straight-ahead view
+        (the camera is body-fixed). NEGATIVE pitch_deg looks UP, positive looks
+        DOWN. Runs in the background: tilts, waits `settle_s` for the body to
+        settle, captures + uploads the tilted view, then returns the body to
+        level. Returns immediately. The robot should be standing first.
+        """
+        if getattr(self, "_latest", None) is None:
+            return SkillResult.fail("NO_FRAME", "No camera frame received yet")
+        if not self._configured():
+            return SkillResult.fail(
+                "NOT_CONFIGURED", "ROBOMOO_URL / ROBOT_INGEST_TOKEN not set"
+            )
+
+        def _bg() -> None:
+            try:
+                self._tilt.tilt_body(pitch_deg=pitch_deg)
+                # Wait for the body to physically reach the pose and a fresh
+                # camera frame to arrive before snapshotting.
+                time.sleep(settle_s)
+                key = self._upload_frame(
+                    getattr(self, "_latest", None),
+                    getattr(self, "_pose", None),
+                    note=note,
+                    label=note,
+                )
+                logger.info("tilt_and_capture uploaded frame key=%s", key)
+            except Exception:  # noqa: BLE001 — fire-and-forget: failures only logged
+                logger.exception("tilt_and_capture failed")
+            finally:
+                # Always return the body to level, even if capture failed.
+                try:
+                    self._tilt.tilt_body()
+                except Exception:  # noqa: BLE001 — best effort
+                    logger.exception("tilt_and_capture re-level failed")
+
+        t = threading.Thread(target=_bg, daemon=True, name="tilt-and-capture")
+        with self._uploads_lock:
+            self._uploads = [u for u in self._uploads if u.is_alive()]
+            self._uploads.append(t)
+        t.start()
+        return SkillResult.ok(
+            f"Tilting to pitch={pitch_deg} deg, capturing, then re-leveling (background)."
+        )
+
+    @skill
+    def explore_and_capture(
+        self,
+        interval_s: float = 4.0,
+        max_duration_s: float = 600.0,
+        note: str = "exploring",
+    ) -> SkillResult:
+        """Explore the room and keep taking photos until exploration is complete.
+
+        Use when the user asks the robot to explore / wander / look around AND
+        take pictures (or capture/photograph) as it goes. Starts autonomous
+        frontier exploration, then captures and uploads a frame every
+        `interval_s` seconds for as long as the robot is still exploring — it
+        stops on its own once the room is fully explored. `max_duration_s` is
+        only a safety cap. Returns immediately; capturing runs in the background.
+        """
+        if not self._configured():
+            return SkillResult.fail(
+                "NOT_CONFIGURED", "ROBOMOO_URL / ROBOT_INGEST_TOKEN not set"
+            )
+
+        # Cancel any in-flight run before starting a new one.
+        self._capture_stop.set()
+        if self._capture_thread is not None and self._capture_thread.is_alive():
+            self._capture_thread.join(timeout=5.0)
+        self._capture_stop = threading.Event()
+
+        self.explore_cmd.publish(Bool(data=True))
+        self._capture_thread = threading.Thread(
+            target=self._capture_loop,
+            args=(interval_s, max_duration_s, note),
+            daemon=True,
+            name="explore-and-capture",
+        )
+        self._capture_thread.start()
+
+        return SkillResult.ok(
+            f"Exploring and capturing a photo every {interval_s:.0f}s until the "
+            "room is fully explored."
+        )
+
+    def _exploring(self) -> bool:
+        try:
+            return bool(self._explorer.is_exploration_active())
+        except Exception as e:  # noqa: BLE001 — if the ref errors, keep capturing
+            logger.warning("is_exploration_active() failed: %s", e)
+            return True
+
+    def _capture_loop(self, interval_s: float, max_duration_s: float, note: str) -> None:
+        start = time.monotonic()
+        grace_s = 6.0  # let exploration spin up before trusting the active flag
+        count = 0
+        try:
+            while not self._capture_stop.is_set():
+                elapsed = time.monotonic() - start
+                if elapsed > max_duration_s:
+                    logger.info("explore_and_capture hit max_duration_s cap")
+                    break
+                if elapsed > grace_s and not self._exploring():
+                    logger.info("exploration finished — stopping capture loop")
+                    break
+                try:
+                    if self._upload_current(note=note, label=note):
+                        count += 1
+                except Exception as e:  # noqa: BLE001 — keep going on transient errors
+                    logger.warning("explore_and_capture upload failed: %s", e)
+                self._capture_stop.wait(interval_s)
+        finally:
+            try:
+                self.stop_explore_cmd.publish(Bool(data=True))
+            except Exception:  # noqa: BLE001 — best effort on shutdown
+                pass
+            logger.info("explore_and_capture finished: uploaded %d photos", count)
diff --git a/dimos/agents_deprecated/memory/image_embedding.py b/dimos/agents_deprecated/memory/image_embedding.py
index 373f8c5663..939159cca8 100644
--- a/dimos/agents_deprecated/memory/image_embedding.py
+++ b/dimos/agents_deprecated/memory/image_embedding.py
@@ -45,12 +45,18 @@ class ImageEmbeddingProvider:
     that can be stored in a vector database and used for similarity search.
     """
 
+    # Remote multimodal embedding model used when model_name == "gemini".
+    GEMINI_EMBED_MODEL = "gemini-embedding-2"
+
     def __init__(self, model_name: str = "clip", dimensions: int = 512) -> None:
         """
         Initialize the image embedding provider.
 
         Args:
-            model_name: Name of the embedding model to use ("clip", "resnet").
+            model_name: Name of the embedding model to use ("clip", "resnet",
+                "gemini"). "gemini" uses the remote Gemini Embedding API
+                (multimodal: image + text in one space) and needs
+                GEMINI_API_KEY or GOOGLE_API_KEY.
             dimensions: Dimensions of the embedding vectors
         """
         self.model_name = model_name
@@ -65,6 +71,23 @@ def __init__(self, model_name: str = "clip", dimensions: int = 512) -> None:
 
     def _initialize_model(self):  # type: ignore[no-untyped-def]
         """Initialize the specified embedding model."""
+        if self.model_name == "gemini":
+            from google import genai
+
+            api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+            if not api_key:
+                raise ValueError(
+                    "Gemini embeddings require GEMINI_API_KEY or GOOGLE_API_KEY to be set"
+                )
+            # Client doubles as the "model"; sentinel processor keeps the
+            # None-guards in get_embedding / get_text_embedding happy.
+            self.model = genai.Client(api_key=api_key)  # type: ignore[assignment]
+            self.processor = "gemini"  # type: ignore[assignment]
+            logger.info(
+                f"Using remote Gemini embeddings: {self.GEMINI_EMBED_MODEL} "
+                f"({self.dimensions}d)"
+            )
+            return
         try:
             import onnxruntime as ort  # type: ignore[import-untyped]
             import torch  # noqa: F401
@@ -119,11 +142,23 @@ def get_embedding(self, image: np.ndarray | str | bytes) -> np.ndarray:
             A numpy array containing the embedding vector
         """
         if self.model is None or self.processor is None:
-            logger.error("Model not initialized. Using fallback random embedding.")
-            return np.random.randn(self.dimensions).astype(np.float32)
+            raise RuntimeError(
+                f"Image embedding model '{self.model_name}' is not initialized; refusing to "
+                "return a random vector that would poison the vector store."
+            )
 
         pil_image = self._prepare_image(image)
 
+        if self.model_name == "gemini":
+            # No silent fallback: a failed embedding must abort loudly rather than
+            # writing a random vector into the spatial DB.
+            from google.genai import types
+
+            buf = io.BytesIO()
+            pil_image.convert("RGB").save(buf, format="PNG")
+            part = types.Part.from_bytes(data=buf.getvalue(), mime_type="image/png")
+            return self._gemini_embed_content([part])
+
         embedding: np.ndarray
         try:
             import torch
@@ -194,8 +229,14 @@ def get_text_embedding(self, text: str) -> np.ndarray:
             A numpy array containing the embedding vector
         """
         if self.model is None or self.processor is None:
-            logger.error("Model not initialized. Using fallback random embedding.")
-            return np.random.randn(self.dimensions).astype(np.float32)
+            raise RuntimeError(
+                f"Text embedding model '{self.model_name}' is not initialized; refusing to "
+                "return a random vector that would poison the vector store."
+            )
+
+        if self.model_name == "gemini":
+            # No silent fallback: surface the failure instead of corrupting the store.
+            return self._gemini_embed_content(text)
 
         if self.model_name != "clip":
             logger.warning(
@@ -251,6 +292,24 @@ def get_text_embedding(self, text: str) -> np.ndarray:
             logger.error(f"Error generating text embedding: {e}")
             return np.random.randn(self.dimensions).astype(np.float32)
 
+    def _gemini_embed_content(self, contents: object) -> np.ndarray:
+        """Call the Gemini multimodal embedding API and return a normalized vector.
+
+        `contents` may be a text string or a list of `types.Part` (e.g. an image).
+        Image and text land in the same space, so semantic text search over
+        stored frames keeps working.
+        """
+        from google.genai import types
+
+        cfg = types.EmbedContentConfig(output_dimensionality=self.dimensions)
+        result = self.model.models.embed_content(  # type: ignore[union-attr]
+            model=self.GEMINI_EMBED_MODEL, contents=contents, config=cfg
+        )
+        vec = np.array(result.embeddings[0].values, dtype=np.float32)
+        # Truncated (<3072d) Gemini embeddings are not pre-normalized.
+        norm = np.linalg.norm(vec)
+        return vec / norm if norm > 0 else vec
+
     def _prepare_image(self, image: np.ndarray | str | bytes) -> Image.Image:
         """
         Convert the input image to PIL format required by the models.
diff --git a/dimos/constants.py b/dimos/constants.py
index d849f4aaf3..5b83bce636 100644
--- a/dimos/constants.py
+++ b/dimos/constants.py
@@ -45,6 +45,11 @@
 DEFAULT_CAPACITY_COLOR_IMAGE = 1920 * 1080 * 3
 # Default depth image size: 1280x720 frame * 4 (float32 size)
 DEFAULT_CAPACITY_DEPTH_IMAGE = 1280 * 720 * 4
+# Fixed-capacity SHM channels must be sized before the first message arrives.
+# These defaults cover current Go2 replay and navigation payloads while keeping
+# large local streams off UDP multicast.
+DEFAULT_CAPACITY_POINTCLOUD = 64 * 1024 * 1024
+DEFAULT_CAPACITY_OCCUPANCY_GRID = 16 * 1024 * 1024
 
 # From https://github.com/lcm-proj/lcm.git
 LCM_MAX_CHANNEL_NAME_LENGTH = 63
diff --git a/dimos/core/coordination/module_coordinator.py b/dimos/core/coordination/module_coordinator.py
index 60c41a6ba1..d0593b8e02 100644
--- a/dimos/core/coordination/module_coordinator.py
+++ b/dimos/core/coordination/module_coordinator.py
@@ -30,7 +30,14 @@
 from dimos.core.global_config import GlobalConfig, global_config
 from dimos.core.module import ModuleBase, ModuleSpec
 from dimos.core.resource import Resource
-from dimos.core.transport import LCMTransport, PubSubTransport, pLCMTransport
+from dimos.core.transport import (
+    JpegShmTransport,
+    LCMTransport,
+    PubSubTransport,
+    SHMTransport,
+    pLCMTransport,
+    pSHMTransport,
+)
 from dimos.spec.utils import is_spec, spec_annotation_compliance, spec_structural_compliance
 from dimos.utils.generic import short_id
 from dimos.utils.logging_config import setup_logger
@@ -279,6 +286,9 @@ def _connect_streams(self, blueprint: Blueprint) -> None:
                     module=module.__name__,
                     transport=transport.__class__.__name__,
                 )
+        # SHM streams are concrete transport objects, not LCM topics. Forward
+        # them to Rerun after stream wiring has resolved the transport registry.
+        _configure_rerun_bridge_visual_transports(self)
 
     @classmethod
     def build(
@@ -584,6 +594,31 @@ def _get_transport_for(blueprint: Blueprint, name: str, stream_type: type) -> Pu
     return transport
 
 
+def _configure_rerun_bridge_visual_transports(coordinator: ModuleCoordinator) -> None:
+    """Send resolved SHM transports to an active Rerun bridge.
+
+    RerunBridgeModule subscribes to configured pubsubs directly. For SHM
+    streams, the coordinator forwards the concrete transport objects after
+    stream wiring has selected them.
+    """
+    from dimos.visualization.rerun.bridge import RerunBridgeModule
+
+    if RerunBridgeModule not in coordinator._deployed_modules:
+        return
+
+    # LCM transports are already visible through RerunBridgeModule.config.pubsubs.
+    transports = [
+        transport
+        for transport in coordinator._transport_registry.values()
+        if isinstance(transport, SHMTransport | pSHMTransport | JpegShmTransport)
+    ]
+    if not transports:
+        return
+
+    bridge = coordinator.get_instance(RerunBridgeModule)
+    bridge.set_visual_transports(transports)
+
+
 def _verify_no_name_conflicts(blueprint: Blueprint) -> None:
     name_to_types: dict[Any, set[type]] = defaultdict(set)
     name_to_modules: dict[Any, list[tuple[type, type]]] = defaultdict(list)
diff --git a/dimos/core/transport.py b/dimos/core/transport.py
index 6435003758..de2ced9cbb 100644
--- a/dimos/core/transport.py
+++ b/dimos/core/transport.py
@@ -163,14 +163,23 @@ def stop(self) -> None:
 
 
 class pSHMTransport(PubSubTransport[T]):
+    """Pickled shared-memory transport for local Python object streams."""
+
     _started: bool = False
 
     def __init__(self, topic: str, **kwargs) -> None:  # type: ignore[no-untyped-def]
         super().__init__(topic)
+        self._kwargs = kwargs
         self.shm = PickleSharedMemory(**kwargs)
 
     def __reduce__(self):  # type: ignore[no-untyped-def]
-        return (pSHMTransport, (self.topic,))
+        # Preserve sizing options such as default_capacity when the coordinator
+        # sends this transport to workers or to Rerun.
+        return (pSHMTransport, (self.topic,), self._kwargs)
+
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        self._kwargs = state
+        self.shm = PickleSharedMemory(**state)
 
     def broadcast(self, _, msg) -> None:  # type: ignore[no-untyped-def]
         if not self._started:
@@ -193,14 +202,23 @@ def stop(self) -> None:
 
 
 class SHMTransport(PubSubTransport[T]):
+    """Raw bytes shared-memory transport for local fixed-size payloads."""
+
     _started: bool = False
 
     def __init__(self, topic: str, **kwargs) -> None:  # type: ignore[no-untyped-def]
         super().__init__(topic)
+        self._kwargs = kwargs
         self.shm = BytesSharedMemory(**kwargs)
 
     def __reduce__(self):  # type: ignore[no-untyped-def]
-        return (SHMTransport, (self.topic,))
+        # Preserve sizing options such as default_capacity when the coordinator
+        # sends this transport to workers or to Rerun.
+        return (SHMTransport, (self.topic,), self._kwargs)
+
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        self._kwargs = state
+        self.shm = BytesSharedMemory(**state)
 
     def broadcast(self, _, msg) -> None:  # type: ignore[no-untyped-def]
         if not self._started:
@@ -223,6 +241,8 @@ def stop(self) -> None:
 
 
 class JpegShmTransport(PubSubTransport[T]):
+    """JPEG-compressed shared-memory transport for local image streams."""
+
     _started: bool = False
 
     def __init__(self, topic: str, quality: int = 75, **kwargs) -> None:  # type: ignore[no-untyped-def]
@@ -233,9 +253,19 @@ def __init__(self, topic: str, quality: int = 75, **kwargs) -> None:  # type: ig
 
         self.shm = JpegSharedMemory(quality=quality, **kwargs)
         self.quality = quality
+        self._kwargs = kwargs
 
     def __reduce__(self):  # type: ignore[no-untyped-def]
-        return (JpegShmTransport, (self.topic, self.quality))
+        # Preserve quality and sizing options when crossing worker boundaries.
+        return (JpegShmTransport, (self.topic, self.quality), self._kwargs)
+
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        from dimos.protocol.pubsub.impl.jpeg_shm import (
+            JpegSharedMemory,
+        )  # deferred to avoid pulling in Image/cv2/rerun
+
+        self._kwargs = state
+        self.shm = JpegSharedMemory(quality=self.quality, **state)
 
     def broadcast(self, _, msg) -> None:  # type: ignore[no-untyped-def]
         if not self._started:
diff --git a/dimos/models/vl/create.py b/dimos/models/vl/create.py
index 362a95b000..f57a113b02 100644
--- a/dimos/models/vl/create.py
+++ b/dimos/models/vl/create.py
@@ -29,3 +29,7 @@ def create(name: VlModelName) -> VlModel:
             from dimos.models.vl.moondream import MoondreamVlModel
 
             return MoondreamVlModel()
+        case "gemini":
+            from dimos.models.vl.gemini import GeminiVlModel
+
+            return GeminiVlModel()
diff --git a/dimos/models/vl/gemini.py b/dimos/models/vl/gemini.py
new file mode 100644
index 0000000000..0aecf6e1c7
--- /dev/null
+++ b/dimos/models/vl/gemini.py
@@ -0,0 +1,87 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import cached_property
+import os
+from typing import TYPE_CHECKING, Any
+import warnings
+
+import numpy as np
+from PIL import Image as PILImage
+
+from dimos.models.vl.base import VlModel, VlModelConfig
+from dimos.msgs.sensor_msgs.Image import Image
+
+if TYPE_CHECKING:
+    from google import genai
+
+
+class GeminiVlModelConfig(VlModelConfig):
+    """Configuration for the Gemini VL model."""
+
+    model_name: str = "gemini-2.5-flash"
+    api_key: str | None = None
+
+
+class GeminiVlModel(VlModel):
+    """VL model backed by Google's Gemini API (google-genai).
+
+    Reuses the same ``GOOGLE_API_KEY`` / ``GEMINI_API_KEY`` auth as the Gemini
+    TTS and embedding components, so no extra key or dependency is required.
+    """
+
+    config: GeminiVlModelConfig
+
+    @cached_property
+    def _client(self) -> "genai.Client":
+        from google import genai
+
+        api_key = (
+            self.config.api_key or os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
+        )
+        if not api_key:
+            raise ValueError(
+                "Gemini VL model requires GOOGLE_API_KEY or GEMINI_API_KEY to be set"
+            )
+
+        return genai.Client(api_key=api_key)
+
+    def _to_pil(self, image: Image | np.ndarray[Any, Any]) -> PILImage.Image:
+        """Convert dimos Image or numpy array to PIL Image, applying auto_resize."""
+        if isinstance(image, np.ndarray):
+            warnings.warn(
+                "GeminiVlModel.query should receive standard dimos Image type, not a numpy array",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            image = Image.from_numpy(image)
+
+        image, _ = self._prepare_image(image)
+        rgb_image = image.to_rgb()
+        return PILImage.fromarray(rgb_image.data)
+
+    def query(self, image: Image | np.ndarray, query: str) -> str:  # type: ignore[override]
+        pil_image = self._to_pil(image)
+
+        response = self._client.models.generate_content(
+            model=self.config.model_name,
+            contents=[pil_image, query],
+        )
+
+        return response.text or ""
+
+    def stop(self) -> None:
+        """Release the Gemini client."""
+        if "_client" in self.__dict__:
+            del self.__dict__["_client"]
diff --git a/dimos/models/vl/types.py b/dimos/models/vl/types.py
index d20a61ae37..7aa42762fe 100644
--- a/dimos/models/vl/types.py
+++ b/dimos/models/vl/types.py
@@ -14,4 +14,4 @@
 
 from typing import Literal
 
-VlModelName = Literal["qwen", "moondream"]
+VlModelName = Literal["qwen", "moondream", "gemini"]
diff --git a/dimos/navigation/frontier_exploration/frontier_explorer_spec.py b/dimos/navigation/frontier_exploration/frontier_explorer_spec.py
new file mode 100644
index 0000000000..b08cfcb1cd
--- /dev/null
+++ b/dimos/navigation/frontier_exploration/frontier_explorer_spec.py
@@ -0,0 +1,24 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Protocol
+
+from dimos.spec.utils import Spec
+
+
+# Module-ref spec for querying frontier-exploration state. Resolves (structurally)
+# to WavefrontFrontierExplorer, the only deployed module exposing
+# is_exploration_active(). Lets a skill gate work on whether exploration is running.
+class FrontierExplorerSpec(Spec, Protocol):
+    def is_exploration_active(self) -> bool: ...
diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py
index 3d101cca79..196c84dd94 100644
--- a/dimos/robot/all_blueprints.py
+++ b/dimos/robot/all_blueprints.py
@@ -94,6 +94,7 @@
     "unitree-g1-sim": "dimos.robot.unitree.g1.blueprints.perceptive.unitree_g1_sim:unitree_g1_sim",
     "unitree-go2": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2:unitree_go2",
     "unitree-go2-agentic": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_agentic:unitree_go2_agentic",
+    "unitree-go2-agentic-gemini": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_agentic_gemini:unitree_go2_agentic_gemini",
     "unitree-go2-agentic-huggingface": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_agentic_huggingface:unitree_go2_agentic_huggingface",
     "unitree-go2-agentic-ollama": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_agentic_ollama:unitree_go2_agentic_ollama",
     "unitree-go2-basic": "dimos.robot.unitree.go2.blueprints.basic.unitree_go2_basic:unitree_go2_basic",
@@ -125,10 +126,13 @@
 all_modules = {
     "alfred-high-level": "dimos.robot.diy.alfred.effector_high_level.AlfredHighLevel",
     "arm-teleop-module": "dimos.teleop.quest.quest_extensions.ArmTeleopModule",
+    "audio-ws-module": "dimos.web.audio_ws_module.AudioWsModule",
     "b-box-navigation-module": "dimos.navigation.bbox_navigation.BBoxNavigationModule",
     "b1-connection-module": "dimos.robot.unitree.b1.connection.B1ConnectionModule",
+    "camera-mjpeg-module": "dimos.web.mjpeg_module.CameraMjpegModule",
     "camera-module": "dimos.hardware.sensors.camera.module.CameraModule",
     "cartesian-motion-controller": "dimos.manipulation.control.servo_control.cartesian_motion_controller.CartesianMotionController",
+    "cmd-bridge-module": "dimos.web.cmd_bridge_module.CmdBridgeModule",
     "control-coordinator": "dimos.control.coordinator.ControlCoordinator",
     "cost-mapper": "dimos.mapping.costmapper.CostMapper",
     "demo-calculator-skill": "dimos.agents.skills.demo_calculator_skill.DemoCalculatorSkill",
@@ -149,6 +153,7 @@
     "g1-high-level-web-rtc": "dimos.robot.unitree.g1.effectors.high_level.webrtc.G1HighLevelWebRtc",
     "g1-sim-connection": "dimos.robot.unitree.g1.mujoco_sim.G1SimConnection",
     "g1-whole-body-connection": "dimos.robot.unitree.g1.wholebody_connection.G1WholeBodyConnection",
+    "gemini-speak-skill": "dimos.agents.skills.gemini_speak_skill.GeminiSpeakSkill",
     "go2-connection": "dimos.robot.unitree.go2.connection.GO2Connection",
     "go2-fleet-connection": "dimos.robot.unitree.go2.fleet_connection.Go2FleetConnection",
     "go2-memory": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2.Go2Memory",
@@ -162,8 +167,10 @@
     "keyboard-teleop": "dimos.robot.unitree.keyboard_teleop.KeyboardTeleop",
     "keyboard-teleop-module": "dimos.teleop.keyboard.keyboard_teleop_module.KeyboardTeleopModule",
     "local-planner": "dimos.navigation.nav_stack.modules.local_planner.local_planner.LocalPlanner",
+    "local-speak-skill": "dimos.agents.skills.local_speak_skill.LocalSpeakSkill",
     "manipulation-module": "dimos.manipulation.manipulation_module.ManipulationModule",
     "map": "dimos.robot.unitree.type.map.Map",
+    "map-uploader": "dimos.agents.skills.map_uploader.MapUploader",
     "marker-tf-module": "dimos.perception.fiducial.marker_tf_module.MarkerTfModule",
     "mcp-client": "dimos.agents.mcp.mcp_client.McpClient",
     "mcp-server": "dimos.agents.mcp.mcp_server.McpServer",
@@ -205,6 +212,7 @@
     "simple-planner": "dimos.navigation.nav_stack.modules.simple_planner.simple_planner.SimplePlanner",
     "spatial-memory": "dimos.perception.spatial_perception.SpatialMemory",
     "speak-skill": "dimos.agents.skills.speak_skill.SpeakSkill",
+    "take-picture-skill": "dimos.agents.skills.take_picture_skill.TakePictureSkill",
     "tare-planner": "dimos.navigation.nav_stack.modules.tare_planner.tare_planner.TarePlanner",
     "temporal-memory": "dimos.perception.experimental.temporal_memory.temporal_memory.TemporalMemory",
     "terrain-analysis": "dimos.navigation.nav_stack.modules.terrain_analysis.terrain_analysis.TerrainAnalysis",
diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py
index 4cbb8e1153..8a8504f95a 100644
--- a/dimos/robot/cli/dimos.py
+++ b/dimos/robot/cli/dimos.py
@@ -422,6 +422,75 @@ def _on_sigint(_sig: int, _frame: object) -> None:
             typer.echo(format_line(line, json_output=json_output))
 
 
+@main.command("prefetch-models")
+def prefetch_models(
+    skip_lfs: bool = typer.Option(False, "--skip-lfs", help="Skip git-LFS model weight pulls"),
+    skip_whisper: bool = typer.Option(False, "--skip-whisper", help="Skip the Whisper STT model"),
+) -> None:
+    """Pre-download local ML models so they don't cold-download mid-mission.
+
+    Warms the HuggingFace cache (Moondream, CLIP), pulls git-LFS model weights
+    (YOLO, EdgeTAM, etc.) and the Whisper STT model. Safe to re-run — already
+    cached models are quick no-ops. Run this once after `uv sync` or before a
+    mission to avoid the 120s tool-call timeout caused by a 3.85GB Moondream
+    download happening lazily inside `look_out_for`.
+    """
+    results: list[tuple[str, bool]] = []
+
+    def _run(label: str, fn: Any) -> None:
+        typer.echo(f"→ {label} ...")
+        try:
+            fn()
+            results.append((label, True))
+            typer.echo(f"  ✓ {label}")
+        except Exception as e:  # noqa: BLE001 - report and continue per model
+            results.append((label, False))
+            typer.echo(f"  ✗ {label}: {e}", err=True)
+
+    # HuggingFace models: instantiate + start() forces from_pretrained() to download.
+    def _warm_vl(name: str) -> None:
+        from dimos.models.vl.create import create
+
+        create(name).start()
+
+    def _warm_clip() -> None:
+        from dimos.models.embedding.clip import CLIPModel
+
+        CLIPModel().start()
+
+    _run("VL: moondream (vikhyatk/moondream2)", lambda: _warm_vl("moondream"))
+    _run("Embedding: CLIP (openai/clip-vit-base-patch32)", _warm_clip)
+
+    # git-LFS model weights (pulled into the repo data dir on first access).
+    if not skip_lfs:
+        from dimos.utils.data import get_data
+
+        for category in (
+            "models_yolo",
+            "models_yoloe",
+            "models_edgetam",
+            "models_mobileclip",
+            "models_torchreid",
+            "models_clip",
+        ):
+            _run(f"LFS: {category}", lambda c=category: get_data(c))
+
+    # Whisper STT (matches node_whisper's preferred openai-whisper backend).
+    if not skip_whisper:
+
+        def _warm_whisper() -> None:
+            import whisper
+
+            whisper.load_model("base")
+
+        _run("STT: whisper base", _warm_whisper)
+
+    ok = sum(1 for _, success in results if success)
+    typer.echo(f"\nPrefetch complete: {ok}/{len(results)} succeeded.")
+    if ok != len(results):
+        raise typer.Exit(1)
+
+
 mcp_app = typer.Typer(help="Interact with the running MCP server")
 main.add_typer(mcp_app, name="mcp")
 
diff --git a/dimos/robot/test_all_blueprints_generation.py b/dimos/robot/test_all_blueprints_generation.py
index d8b0081d7f..d141f4e3ed 100644
--- a/dimos/robot/test_all_blueprints_generation.py
+++ b/dimos/robot/test_all_blueprints_generation.py
@@ -32,7 +32,14 @@
     "dimos/core/blueprints.py",
     "dimos/core/test_blueprints.py",
 }
-BLUEPRINT_METHODS = {"transports", "global_config", "remappings", "requirements", "configurators"}
+BLUEPRINT_METHODS = {
+    "transports",
+    "global_config",
+    "remappings",
+    "requirements",
+    "configurators",
+    "disabled_modules",
+}
 _EXCLUDED_MODULE_NAMES = {"Module", "ModuleBase", "StreamModule"}
 
 
diff --git a/dimos/robot/unitree/connection.py b/dimos/robot/unitree/connection.py
index 44101cc19d..a1fb5e55a7 100644
--- a/dimos/robot/unitree/connection.py
+++ b/dimos/robot/unitree/connection.py
@@ -15,9 +15,13 @@
 import asyncio
 from dataclasses import dataclass
 import functools
+import io
+import os
+import tempfile
 import threading
 import time
 from typing import Any, TypeAlias, TypeVar
+import wave
 
 import numpy as np
 from numpy.typing import NDArray
@@ -54,6 +58,15 @@
 VideoMessage: TypeAlias = NDArray[np.uint8]  # Shape: (height, width, 3)
 
 
+@dataclass
+class AudioMessage:
+    """PCM audio chunk received from the robot mic."""
+
+    data: bytes  # int16 little-endian PCM
+    sample_rate: int
+    channels: int
+
+
 _T = TypeVar("_T", bound=Timestamped)
 
 
@@ -405,6 +418,102 @@ def switch_video_channel_off() -> None:
 
         return subject.pipe(ops.finally_action(stop))
 
+    @simple_mcache
+    def audio_stream(self) -> Observable[AudioMessage]:
+        """Subscribe to the robot's mic via WebRTC.
+
+        Emits int16 PCM chunks. The Go2's WebRTC audio transceiver is sendrecv;
+        first frame triggers channel activation.
+        """
+        subject: Subject[AudioMessage] = Subject()
+        stop_event = threading.Event()
+
+        async def accept_track(frame: Any) -> None:
+            if stop_event.is_set():
+                return
+            try:
+                arr = frame.to_ndarray()
+                rate = getattr(frame, "sample_rate", 48000)
+                # aiortc returns shape (channels, samples) for planar; assume mono if 1-D.
+                channels = arr.shape[0] if arr.ndim == 2 else 1
+                if np.issubdtype(arr.dtype, np.floating):
+                    arr = np.clip(arr * 32767.0, -32768, 32767).astype(np.int16)
+                elif arr.dtype != np.int16:
+                    arr = arr.astype(np.int16)
+                subject.on_next(
+                    AudioMessage(data=arr.tobytes(), sample_rate=rate, channels=channels)
+                )
+            except Exception:
+                pass
+
+        self.conn.audio.add_track_callback(accept_track)
+
+        def switch_on() -> None:
+            try:
+                self.conn.audio.switchAudioChannel(True)
+            except Exception:
+                pass
+
+        self.loop.call_soon_threadsafe(switch_on)
+
+        def stop() -> None:
+            stop_event.set()
+            try:
+                self.conn.audio.track_callbacks.remove(accept_track)
+            except ValueError:
+                pass
+
+            def switch_off() -> None:
+                try:
+                    self.conn.audio.switchAudioChannel(False)
+                except Exception:
+                    pass
+
+            self.loop.call_soon_threadsafe(switch_off)
+
+        return subject.pipe(ops.finally_action(stop))
+
+    def play_wav_bytes(self, wav: bytes) -> None:
+        """Play a WAV through the Go2 speaker via the megaphone path.
+
+        Fire-and-forget — returns immediately. The Go2 enters megaphone mode
+        for the duration of the clip then exits, so subsequent commands are
+        unaffected.
+        """
+        if not wav:
+            return
+
+        try:
+            with wave.open(io.BytesIO(wav), "rb") as wf:
+                duration = wf.getnframes() / float(wf.getframerate())
+        except (wave.Error, EOFError):
+            duration = 5.0  # unknown format; bail out conservatively
+
+        async def _upload_play_exit() -> None:
+            from unitree_webrtc_connect.webrtc_audiohub import WebRTCAudioHub
+
+            tmp_path: str | None = None
+            try:
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+                    fp.write(wav)
+                    tmp_path = fp.name
+
+                hub = WebRTCAudioHub(self.conn)
+                await hub.upload_megaphone(tmp_path)
+                await hub.enter_megaphone()
+                # Hold megaphone for the clip's duration, plus a small flush margin,
+                # then release so other commands work normally.
+                await asyncio.sleep(duration + 0.5)
+                await hub.exit_megaphone()
+            finally:
+                if tmp_path is not None:
+                    try:
+                        os.unlink(tmp_path)
+                    except OSError:
+                        pass
+
+        asyncio.run_coroutine_threadsafe(_upload_play_exit(), self.loop)
+
     def get_video_stream(self, fps: int = 30) -> Observable[Image]:
         """Get the video stream from the robot's camera.
 
diff --git a/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_gemini.py b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_gemini.py
new file mode 100644
index 0000000000..1a5e435223
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_gemini.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dimos.agents.mcp.mcp_client import McpClient
+from dimos.agents.mcp.mcp_server import McpServer
+from dimos.agents.skills.gemini_speak_skill import GeminiSpeakSkill
+from dimos.agents.skills.navigation import NavigationSkillContainer
+from dimos.agents.skills.person_follow import PersonFollowSkillContainer
+from dimos.agents.skills.map_uploader import MapUploader
+from dimos.agents.skills.speak_skill import SpeakSkill
+from dimos.agents.skills.take_picture_skill import TakePictureSkill
+from dimos.core.coordination.blueprints import autoconnect
+from dimos.experimental.security_demo.security_module import SecurityModule
+from dimos.perception.spatial_perception import SpatialMemory
+from dimos.robot.unitree.go2.blueprints.agentic._common_agentic import _common_agentic
+from dimos.robot.unitree.go2.blueprints.smart.unitree_go2_spatial import unitree_go2_spatial
+from dimos.robot.unitree.go2.connection import GO2Connection
+
+# Disabled for a no-OpenAI / non-CUDA setup:
+#  - SecurityModule needs a CUDA GPU (EdgeTAM) -> won't boot on Apple Silicon.
+#  - SpeakSkill (TTS) hardcodes OpenAI -> needs OPENAI_API_KEY; replaced below
+#    by GeminiSpeakSkill, which uses the Gemini TTS API (reuses GOOGLE_API_KEY,
+#    no GPU, cross-platform so it can also run on the robot).
+# SpatialMemory is re-declared to use remote Gemini embeddings instead of the
+# local CLIP model (which fails on Apple CoreML); the later atom overrides the
+# one inside unitree_go2_spatial.
+# ALL compute runs on the Mac (the dog has no onboard brain), so VL detection is
+# ALSO Gemini — no local Moondream (~6 min/inference + crashes Metal on Apple
+# Silicon, which aborted the blueprint at startup):
+#  - .global_config(detection_model="gemini") drives look_out_for / PerceiveLoop.
+#  - NavigationSkillContainer + PersonFollowSkillContainer use vl_model_name="gemini"
+#    (these read the per-skill name, not the global knob).
+unitree_go2_agentic_gemini = (
+    autoconnect(
+        unitree_go2_spatial,
+        SpatialMemory.blueprint(embedding_model="gemini", embedding_dimensions=768),
+        McpServer.blueprint(),
+        McpClient.blueprint(model="google_genai:gemini-2.5-flash"),
+        _common_agentic,
+        NavigationSkillContainer.blueprint(vl_model_name="gemini"),
+        PersonFollowSkillContainer.blueprint(
+            camera_info=GO2Connection.camera_info_static,
+            vl_model_name="gemini",
+        ),
+        GeminiSpeakSkill.blueprint(),
+        TakePictureSkill.blueprint(),
+        MapUploader.blueprint(),
+    )
+    .global_config(detection_model="gemini")
+    .disabled_modules(SecurityModule, SpeakSkill)
+)
+
+__all__ = ["unitree_go2_agentic_gemini"]
diff --git a/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py b/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py
index 96a291163d..ea315119c5 100644
--- a/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py
+++ b/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py
@@ -14,29 +14,47 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import platform
 from typing import Any
 
-from dimos.constants import DEFAULT_CAPACITY_COLOR_IMAGE
+from dimos.constants import (
+    DEFAULT_CAPACITY_COLOR_IMAGE,
+    DEFAULT_CAPACITY_OCCUPANCY_GRID,
+    DEFAULT_CAPACITY_POINTCLOUD,
+)
 from dimos.core.coordination.blueprints import autoconnect
 from dimos.core.global_config import global_config
 from dimos.core.transport import pSHMTransport
+from dimos.msgs.nav_msgs.OccupancyGrid import OccupancyGrid
 from dimos.msgs.sensor_msgs.Image import Image
+from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
 from dimos.robot.unitree.go2.connection import GO2Connection
 from dimos.visualization.vis_module import vis_module
 
-# Mac has some issue with high bandwidth UDP, so we use pSHMTransport for color_image
-# actually we can use pSHMTransport for all platforms, and for all streams
-# TODO need a global transport toggle on blueprints/global config
-_mac_transports: dict[tuple[str, type], pSHMTransport[Image]] = {
+# Route large local replay and mapping streams through SHM on every platform.
+# Small control/status streams continue to use the default LCM transport.
+_local_high_bandwidth_transports: dict[tuple[str, type], pSHMTransport[Any]] = {
     ("color_image", Image): pSHMTransport(
-        "color_image", default_capacity=DEFAULT_CAPACITY_COLOR_IMAGE
+        "/color_image", default_capacity=DEFAULT_CAPACITY_COLOR_IMAGE
+    ),
+    ("lidar", PointCloud2): pSHMTransport("/lidar", default_capacity=DEFAULT_CAPACITY_POINTCLOUD),
+    ("pointcloud", PointCloud2): pSHMTransport(
+        "/pointcloud", default_capacity=DEFAULT_CAPACITY_POINTCLOUD
+    ),
+    ("global_map", PointCloud2): pSHMTransport(
+        "/global_map", default_capacity=DEFAULT_CAPACITY_POINTCLOUD
+    ),
+    ("merged_map", PointCloud2): pSHMTransport(
+        "/merged_map", default_capacity=DEFAULT_CAPACITY_POINTCLOUD
+    ),
+    ("global_costmap", OccupancyGrid): pSHMTransport(
+        "/global_costmap", default_capacity=DEFAULT_CAPACITY_OCCUPANCY_GRID
+    ),
+    ("navigation_costmap", OccupancyGrid): pSHMTransport(
+        "/navigation_costmap", default_capacity=DEFAULT_CAPACITY_OCCUPANCY_GRID
     ),
 }
 
-_transports_base = (
-    autoconnect() if platform.system() == "Linux" else autoconnect().transports(_mac_transports)
-)
+_transports_base = autoconnect().transports(_local_high_bandwidth_transports)
 
 
 def _convert_camera_info(camera_info: Any) -> Any:
diff --git a/dimos/robot/unitree/go2/connection.py b/dimos/robot/unitree/go2/connection.py
index 5568a473ef..e472c39bdb 100644
--- a/dimos/robot/unitree/go2/connection.py
+++ b/dimos/robot/unitree/go2/connection.py
@@ -48,7 +48,7 @@
 from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
 from dimos.msgs.sensor_msgs.Image import Image
 from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
-from dimos.robot.unitree.connection import UnitreeWebRTCConnection
+from dimos.robot.unitree.connection import AudioMessage, UnitreeWebRTCConnection
 from dimos.utils.decorators.decorators import cached_property, simple_mcache
 
 if sys.version_info < (3, 13):
@@ -207,6 +207,8 @@ class GO2Connection(Module, Camera, Pointcloud):
     lidar: Out[PointCloud2]
     color_image: Out[Image]
     camera_info: Out[CameraInfo]
+    audio: Out[AudioMessage]
+    audio_in: In[bytes]
 
     connection: Go2ConnectionProtocol
     camera_info_static: CameraInfo = _camera_info_static()
@@ -246,6 +248,19 @@ def onimage(image: Image) -> None:
         self.register_disposable(self.connection.video_stream().subscribe(onimage))
         self.register_disposable(Disposable(self.cmd_vel.subscribe(self.move)))
 
+        if hasattr(self.connection, "audio_stream"):
+            try:
+                self.register_disposable(
+                    self.connection.audio_stream().subscribe(self.audio.publish)
+                )
+            except Exception as e:
+                logger.warning(f"audio_stream not started: {e}")
+
+        if hasattr(self.connection, "play_wav_bytes"):
+            self.register_disposable(
+                Disposable(self.audio_in.subscribe(self.connection.play_wav_bytes))
+            )
+
         self._camera_info_thread = Thread(
             target=self.publish_camera_info,
             daemon=True,
@@ -352,6 +367,35 @@ def publish_request(self, topic: str, data: dict[str, Any]) -> dict[Any, Any]:
         """
         return self.connection.publish_request(topic, data)
 
+    @skill
+    def play_wav(self, wav_path: str) -> str:
+        """Play a WAV file through the robot's speaker (megaphone mode).
+
+        Args:
+            wav_path: Path on the dimos host filesystem. Use `play_wav_b64`
+                instead when the agent runs on a different machine.
+        """
+        if not hasattr(self.connection, "play_wav_bytes"):
+            return "play_wav unavailable on this connection (simulation?)"
+        with open(wav_path, "rb") as f:
+            self.connection.play_wav_bytes(f.read())
+        return f"queued {wav_path} for playback"
+
+    @skill
+    def play_wav_b64(self, wav_b64: str) -> str:
+        """Play a base64-encoded WAV through the robot's speaker.
+
+        Use this from remote MCP clients that don't share the dimos host's
+        filesystem. 44.1 kHz mono WAVs play most reliably.
+        """
+        import base64
+
+        if not hasattr(self.connection, "play_wav_bytes"):
+            return "play_wav unavailable on this connection (simulation?)"
+        wav = base64.b64decode(wav_b64)
+        self.connection.play_wav_bytes(wav)
+        return f"queued {len(wav)} bytes for playback"
+
     @skill
     def observe(self) -> Image | None:
         """Returns the latest video frame from the robot camera. Use this skill for any visual world queries.
diff --git a/dimos/robot/unitree/tilt_spec.py b/dimos/robot/unitree/tilt_spec.py
new file mode 100644
index 0000000000..a1ef528c74
--- /dev/null
+++ b/dimos/robot/unitree/tilt_spec.py
@@ -0,0 +1,26 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Protocol
+
+from dimos.spec.utils import Spec
+
+
+# Module-ref spec for tilting the robot body. Resolves (structurally) to
+# UnitreeSkillContainer.tilt_body, letting another module (e.g. TakePictureSkill)
+# aim the body-fixed camera without owning the WebRTC connection.
+class TiltSpec(Spec, Protocol):
+    def tilt_body(
+        self, pitch_deg: float = 0.0, roll_deg: float = 0.0, yaw_deg: float = 0.0
+    ) -> str: ...
diff --git a/dimos/robot/unitree/unitree_skill_container.py b/dimos/robot/unitree/unitree_skill_container.py
index 88194473e6..160a1b268a 100644
--- a/dimos/robot/unitree/unitree_skill_container.py
+++ b/dimos/robot/unitree/unitree_skill_container.py
@@ -19,7 +19,7 @@
 import math
 import time
 
-from unitree_webrtc_connect.constants import RTC_TOPIC
+from unitree_webrtc_connect.constants import RTC_TOPIC, SPORT_CMD
 
 from dimos.agents.annotation import skill
 from dimos.core.core import rpc
@@ -300,6 +300,37 @@ def execute_sport_command(self, command_name: str) -> str:
             logger.error(f"Failed to execute {command_name}: {e}")
             return "Failed to execute the command."
 
+    @skill
+    def tilt_body(
+        self, pitch_deg: float = 0.0, roll_deg: float = 0.0, yaw_deg: float = 0.0
+    ) -> str:
+        """Tilt the robot's body to aim its (fixed) camera up or down.
+
+        The Go2's camera is body-mounted, so "looking up/down" means pitching the
+        whole body. NEGATIVE pitch_deg looks UP, positive looks DOWN. Useful range
+        is about -40..40 degrees; roll/yaw are optional. The robot must be standing
+        first (run StandUp or BalanceStand). This is a held pose — call tilt_body()
+        with no args to return level.
+
+        Example: tilt_body(pitch_deg=-20)   # look up at e.g. a tabletop
+        """
+        # Clamp to the Go2's safe standing envelope (radians).
+        roll = max(-0.75, min(0.75, math.radians(float(roll_deg))))
+        pitch = max(-0.75, min(0.75, math.radians(float(pitch_deg))))
+        yaw = max(-0.6, min(0.6, math.radians(float(yaw_deg))))
+        try:
+            self._connection.publish_request(
+                RTC_TOPIC["SPORT_MOD"],
+                {
+                    "api_id": SPORT_CMD["Euler"],
+                    "parameter": {"data": {"x": roll, "y": pitch, "z": yaw}},
+                },
+            )
+            return f"Body tilted (pitch={pitch_deg}, roll={roll_deg}, yaw={yaw_deg} deg)."
+        except Exception as e:
+            logger.error(f"Failed to tilt body: {e}")
+            return "Failed to tilt the body."
+
 
 _commands = "\n".join(
     [f'- "{name}": {description}' for name, (_, description) in _UNITREE_COMMANDS.items()]
diff --git a/dimos/stream/audio/tts/node_gemini.py b/dimos/stream/audio/tts/node_gemini.py
new file mode 100644
index 0000000000..6563fb6f9d
--- /dev/null
+++ b/dimos/stream/audio/tts/node_gemini.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import threading
+import time
+
+from google import genai
+from google.genai import types
+import numpy as np
+from reactivex import Observable, Subject
+
+from dimos.stream.audio.base import (
+    AbstractAudioEmitter,
+    AudioEvent,
+)
+from dimos.stream.audio.text.base import AbstractTextConsumer, AbstractTextEmitter
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+# Gemini TTS returns single-channel 16-bit PCM at 24 kHz, little-endian.
+_SAMPLE_RATE = 24000
+
+
+class GeminiTTSNode(AbstractTextConsumer, AbstractAudioEmitter, AbstractTextEmitter):
+    """
+    A text-to-speech node that consumes text, emits audio using Google's Gemini TTS API, and passes through text.
+
+    This node implements AbstractTextConsumer to receive text input, AbstractAudioEmitter
+    to provide audio output, and AbstractTextEmitter to pass through the text being spoken,
+    allowing it to be inserted into a text-to-audio pipeline with text passthrough capabilities.
+
+    Mirrors OpenAITTSNode but uses the Gemini API directly (google-genai), so it
+    needs no OpenAI key and reuses the GOOGLE_API_KEY already used for the LLM
+    and embeddings. Only the ``*-preview-tts`` models support audio output.
+    """
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        voice: str = "Kore",
+        model: str = "gemini-2.5-flash-preview-tts",
+        instruction: str = "Read the following text aloud verbatim",
+    ) -> None:
+        """
+        Initialize GeminiTTSNode.
+
+        Args:
+            api_key: Gemini API key (if None, reads GOOGLE_API_KEY / GEMINI_API_KEY)
+            voice: Prebuilt Gemini voice name (e.g. "Kore", "Puck", "Charon")
+            model: Gemini TTS model (must be a ``*-preview-tts`` model)
+            instruction: Style directive prepended to each utterance. Gemini
+                narrates only the text after it (not the directive itself); it
+                also stops the model from "answering" short prompts instead of
+                reading them. Set to "" to send the raw text unmodified.
+        """
+        self.voice = voice
+        self.model = model
+        self.instruction = instruction
+
+        api_key = api_key or os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise ValueError("Gemini TTS requires GEMINI_API_KEY or GOOGLE_API_KEY to be set")
+        self.client = genai.Client(api_key=api_key)
+
+        # Initialize state
+        self.audio_subject = Subject()  # type: ignore[var-annotated]
+        self.text_subject = Subject()  # type: ignore[var-annotated]
+        self.subscription = None
+        self.processing_thread = None
+        self.is_running = True
+        self.text_queue = []  # type: ignore[var-annotated]
+        self.queue_lock = threading.Lock()
+
+    def emit_audio(self) -> Observable:  # type: ignore[type-arg]
+        """
+        Returns an observable that emits audio frames.
+
+        Returns:
+            Observable emitting AudioEvent objects
+        """
+        return self.audio_subject
+
+    def emit_text(self) -> Observable:  # type: ignore[type-arg]
+        """
+        Returns an observable that emits the text being spoken.
+
+        Returns:
+            Observable emitting text strings
+        """
+        return self.text_subject
+
+    def consume_text(self, text_observable: Observable) -> "AbstractTextConsumer":  # type: ignore[type-arg]
+        """
+        Start consuming text from the observable source.
+
+        Args:
+            text_observable: Observable source of text strings
+
+        Returns:
+            Self for method chaining
+        """
+        # Idempotent: start the worker + subscription only once. This used to be
+        # called per utterance, leaking a new thread + subscription each time
+        # (and re-logging "Starting GeminiTTSNode" on every speak). If called
+        # again with a new source, just re-point the subscription.
+        if self.processing_thread is not None and self.processing_thread.is_alive():
+            if self.subscription is not None:
+                self.subscription.dispose()
+            self.subscription = text_observable.subscribe(  # type: ignore[assignment]
+                on_next=self._queue_text,
+                on_error=lambda e: logger.error(f"Error in GeminiTTSNode: {e}"),
+            )
+            return self
+
+        logger.info("Starting GeminiTTSNode")
+
+        # Start the processing thread
+        self.processing_thread = threading.Thread(target=self._process_queue, daemon=True)  # type: ignore[assignment]
+        self.processing_thread.start()  # type: ignore[attr-defined]
+
+        # Subscribe to the text observable
+        self.subscription = text_observable.subscribe(  # type: ignore[assignment]
+            on_next=self._queue_text,
+            on_error=lambda e: logger.error(f"Error in GeminiTTSNode: {e}"),
+        )
+
+        return self
+
+    def _queue_text(self, text: str) -> None:
+        """
+        Add text to the processing queue.
+
+        Args:
+            text: The text to synthesize
+        """
+        if not text.strip():
+            return
+
+        with self.queue_lock:
+            self.text_queue.append(text)
+
+    def _process_queue(self) -> None:
+        """Background thread to process the text queue."""
+        while self.is_running:
+            # Check if there's text to process
+            text_to_process = None
+            with self.queue_lock:
+                if self.text_queue:
+                    text_to_process = self.text_queue.pop(0)
+
+            if text_to_process:
+                self._synthesize_speech(text_to_process)
+            else:
+                # Sleep a bit to avoid busy-waiting
+                time.sleep(0.1)
+
+    def _synthesize_speech(self, text: str) -> None:
+        """
+        Convert text to speech using the Gemini TTS API.
+
+        Args:
+            text: The text to synthesize
+        """
+        try:
+            contents = f"{self.instruction}: {text}" if self.instruction else text
+            response = self.client.models.generate_content(
+                model=self.model,
+                contents=contents,
+                config=types.GenerateContentConfig(
+                    response_modalities=["AUDIO"],
+                    speech_config=types.SpeechConfig(
+                        voice_config=types.VoiceConfig(
+                            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=self.voice)
+                        )
+                    ),
+                ),
+            )
+            self.text_subject.on_next(text)
+
+            # Gemini returns raw 16-bit PCM bytes (24 kHz, mono, little-endian).
+            pcm_bytes = response.candidates[0].content.parts[0].inline_data.data
+            audio_array = np.frombuffer(pcm_bytes, dtype=np.int16)
+
+            audio_event = AudioEvent(
+                data=audio_array,
+                sample_rate=_SAMPLE_RATE,
+                timestamp=time.time(),
+                channels=1,
+            )
+
+            self.audio_subject.on_next(audio_event)
+
+        except Exception as e:
+            logger.error(f"Error synthesizing speech: {e}")
+
+    def dispose(self) -> None:
+        """Clean up resources."""
+        logger.info("Disposing GeminiTTSNode")
+
+        self.is_running = False
+
+        # Clear pending items so the thread doesn't start new synthesis.
+        with self.queue_lock:
+            self.text_queue.clear()
+
+        if self.processing_thread and self.processing_thread.is_alive():
+            self.processing_thread.join(timeout=2.0)
+
+        if self.subscription:
+            self.subscription.dispose()
+            self.subscription = None
+
+        # Complete the subjects
+        self.audio_subject.on_completed()
+        self.text_subject.on_completed()
+
+
+if __name__ == "__main__":
+    from dimos.stream.audio.node_output import SounddeviceAudioOutput
+    from dimos.stream.audio.text.node_stdout import TextPrinterNode
+    from dimos.stream.audio.utils import keepalive
+
+    # Create a simple text subject that we can push values to
+    text_subject = Subject()  # type: ignore[var-annotated]
+
+    tts_node = GeminiTTSNode(voice="Kore")
+    tts_node.consume_text(text_subject)
+
+    # Create and connect an audio output node - match Gemini's 24 kHz output
+    audio_output = SounddeviceAudioOutput(sample_rate=_SAMPLE_RATE)
+    audio_output.consume_audio(tts_node.emit_audio())
+
+    stdout = TextPrinterNode(prefix="[Spoken Text] ")
+    stdout.consume_text(tts_node.emit_text())
+
+    test_messages = [
+        "Hello!",
+        "This is a test of the Gemini text to speech system.",
+    ]
+
+    print("Starting Gemini TTS test...")
+    print("-" * 60)
+
+    for message in test_messages:
+        text_subject.on_next(message)
+
+    keepalive()
diff --git a/dimos/visualization/rerun/bridge.py b/dimos/visualization/rerun/bridge.py
index 2f5fb1efa9..eb00f23730 100644
--- a/dimos/visualization/rerun/bridge.py
+++ b/dimos/visualization/rerun/bridge.py
@@ -43,6 +43,7 @@
 
 from dimos.core.core import rpc
 from dimos.core.module import Module, ModuleConfig
+from dimos.core.transport import PubSubTransport
 from dimos.protocol.pubsub.impl.lcmpubsub import LCM
 from dimos.protocol.pubsub.patterns import Glob, pattern_matches
 from dimos.protocol.pubsub.spec import SubscribeAllCapable
@@ -164,7 +165,10 @@ def _default_blueprint() -> Blueprint:
 
 
 class Config(ModuleConfig):
+    # Pubsubs cover discoverable sources such as LCM. visual_transports is
+    # populated by the coordinator for concrete local streams such as SHM.
     pubsubs: list[SubscribeAllCapable[Any, Any]] = field(default_factory=lambda: [LCM()])
+    visual_transports: list[PubSubTransport[Any]] = field(default_factory=list)
 
     visual_override: dict[Glob | str, Callable[[Any], Archetype] | None] = field(
         default_factory=dict
@@ -186,10 +190,12 @@ class Config(ModuleConfig):
 
 
 class RerunBridgeModule(Module):
-    """Bridge that logs messages from pubsubs to Rerun.
+    """Bridge that logs transport messages to Rerun.
 
-    Spawns its own Rerun viewer and subscribes to all topics on each provided
-    pubsub. Any message that has a to_rerun() method is automatically logged.
+    Spawns its own Rerun viewer and subscribes to configured pubsubs and
+    explicit visual transports. Pubsubs cover discoverable transports such as
+    LCM; visual_transports covers concrete local transports such as SHM.
+    Any message that has a to_rerun() method is automatically logged.
 
     Example:
         from dimos.protocol.pubsub.impl.lcmpubsub import LCM
@@ -215,6 +221,8 @@ def __init__(self, **kwargs: Any) -> None:
         self._last_log = {}
         self._override_cache: dict[str, Callable[[Any], RerunData | None]] = {}
         self._frame_attached: dict[str, str] = {}
+        self._subscribed_visual_transport_topics: set[str] = set()
+        self._started = False
 
     @property
     def host(self) -> str:
@@ -265,13 +273,56 @@ def composed(msg: Any) -> RerunData | None:
         return composed
 
     def _get_entity_path(self, topic: Any) -> str:
+        """Map a transport topic to a Rerun entity path.
+
+        LCM topics usually already include a leading slash and a type suffix.
+        SHM topics are plain strings. Normalize both forms so visual overrides
+        such as "world/color_image" match consistently.
+        """
         if self.config.topic_to_entity:
             return self.config.topic_to_entity(topic)
 
         topic_str = getattr(topic, "name", None) or str(topic)
         topic_str = topic_str.split("#")[0]  # strip LCM topic suffix
+        if not topic_str.startswith("/"):
+            topic_str = f"/{topic_str}"
         return f"{self.config.entity_prefix}{topic_str}"
 
+    @rpc
+    def set_visual_transports(self, transports: list[PubSubTransport[Any]]) -> None:
+        """Replace explicit visual transports and subscribe when running.
+
+        The coordinator calls this after stream wiring and after loading
+        additional blueprints into an existing coordinator.
+        """
+        self.config.visual_transports = transports
+        if self._started:
+            self._subscribe_visual_transports()
+
+    def _subscribe_visual_transports(self) -> None:
+        """Attach to configured SHM streams once per topic."""
+        for transport in self.config.visual_transports:
+            topic = str(getattr(transport, "topic", ""))
+            if not topic or topic in self._subscribed_visual_transport_topics:
+                continue
+            self._subscribed_visual_transport_topics.add(topic)
+            if hasattr(transport, "start"):
+                transport.start()
+            # If subscribe raises, the bridge still owns cleanup for the transport it started.
+            self.register_disposable(Disposable(transport.stop))
+            transport_topic = getattr(transport, "topic", topic)
+
+            def on_visual_message(msg: Any, transport_topic: Any = transport_topic) -> None:
+                self._on_message(msg, transport_topic)
+
+            unsub = transport.subscribe(
+                # Capture the current topic so callbacks keep the correct
+                # entity path even as this loop advances to the next transport.
+                on_visual_message
+            )
+            if unsub is not None:
+                self.register_disposable(Disposable(unsub))
+
     def _on_message(self, msg: Any, topic: Any) -> None:
         """Handle incoming message - log to rerun."""
 
@@ -306,6 +357,7 @@ def _on_message(self, msg: Any, topic: Any) -> None:
     @rpc
     def start(self) -> None:
         super().start()
+        self._started = True
 
         logger.info("Rerun bridge starting")
 
@@ -397,6 +449,8 @@ def start(self) -> None:
             unsub = pubsub.subscribe_all(self._on_message)
             self.register_disposable(Disposable(unsub))
 
+        self._subscribe_visual_transports()
+
         for pubsub in self.config.pubsubs:
             if hasattr(pubsub, "stop"):
                 self.register_disposable(Disposable(pubsub.stop))  # type: ignore[union-attr]
@@ -506,8 +560,10 @@ def log_blueprint_graph(self, dot_code: str, module_names: list[str]) -> None:
 
     @rpc
     def stop(self) -> None:
+        self._started = False
         self._override_cache.clear()
         self._frame_attached.clear()
+        self._subscribed_visual_transport_topics.clear()
         super().stop()
 
 
diff --git a/dimos/web/audio_ws_module.py b/dimos/web/audio_ws_module.py
new file mode 100644
index 0000000000..8f54d0a57f
--- /dev/null
+++ b/dimos/web/audio_ws_module.py
@@ -0,0 +1,169 @@
+"""Bridge robot audio to a web client.
+
+  - WebSocket /audio_out  -> sends int16 PCM frames from the Go2 mic
+  - GET       /audio_info -> reports the current sample_rate + channels (so
+                              consumers know how to decode /audio_out frames)
+  - POST      /play       -> uploads a WAV body, plays it on the Go2 speaker
+
+Compose with `unitree-go2-basic`:
+
+    dimos run unitree-go2-basic audio-ws-module
+
+Then:
+    ws://127.0.0.1:7781/audio_out                      (mic, binary frames)
+    curl http://127.0.0.1:7781/audio_info              (sample_rate, channels)
+    curl --data-binary @clip.wav http://127.0.0.1:7781/play
+"""
+
+import asyncio
+import queue
+import threading
+from typing import Any
+
+from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In, Out
+from dimos.robot.unitree.connection import AudioMessage
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+class AudioWsConfig(ModuleConfig):
+    port: int = 7781
+
+
+class AudioWsModule(Module):
+    config: AudioWsConfig
+    audio: In[AudioMessage]   # robot mic (from GO2Connection.audio)
+    audio_in: Out[bytes]      # WAV bytes for robot speaker (-> GO2Connection.audio_in)
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._app = FastAPI()
+        self._app.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_methods=["GET", "POST"],
+            allow_headers=["*"],
+        )
+        # Cross-thread handoff: subscriber thread enqueues, broadcast task drains.
+        self._frame_queue: queue.Queue[bytes] = queue.Queue(maxsize=200)
+        self._clients: set[WebSocket] = set()
+        self._server: uvicorn.Server | None = None
+        self._thread: threading.Thread | None = None
+        self._last_format: dict[str, int] | None = None
+        self._stop_event = threading.Event()
+        self._setup_routes()
+
+    def _setup_routes(self) -> None:
+        @self._app.get("/audio_info")
+        def audio_info() -> dict[str, Any]:
+            if self._last_format is None:
+                return {"status": "no frame yet"}
+            return {"status": "ok", **self._last_format}
+
+        @self._app.websocket("/audio_out")
+        async def audio_out(ws: WebSocket) -> None:
+            await ws.accept()
+            if self._last_format is not None:
+                await ws.send_json({"event": "format", **self._last_format})
+            self._clients.add(ws)
+            try:
+                while True:
+                    # Server -> client only; sink any inbound messages.
+                    await ws.receive()
+            except WebSocketDisconnect:
+                pass
+            finally:
+                self._clients.discard(ws)
+
+        @self._app.post("/play")
+        async def play(req: Request) -> dict[str, str]:
+            wav = await req.body()
+            if not wav:
+                return {"status": "empty"}
+            self.audio_in.publish(wav)
+            return {"status": "queued", "bytes": str(len(wav))}
+
+        @self._app.on_event("startup")
+        async def _spawn_broadcaster() -> None:
+            asyncio.create_task(self._broadcast_loop())
+
+    async def _broadcast_loop(self) -> None:
+        """Drain the cross-thread queue and fan out to WebSocket clients."""
+        loop = asyncio.get_running_loop()
+        while not self._stop_event.is_set():
+            try:
+                chunk = await loop.run_in_executor(None, self._frame_queue.get, True, 0.5)
+            except queue.Empty:
+                continue
+            if not self._clients:
+                continue
+            stale: list[WebSocket] = []
+            for ws in list(self._clients):
+                try:
+                    await ws.send_bytes(chunk)
+                except Exception:
+                    stale.append(ws)
+            for ws in stale:
+                self._clients.discard(ws)
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+
+        self.audio.subscribe(self._on_audio)
+
+        def run() -> None:
+            config = uvicorn.Config(
+                self._app,
+                host="127.0.0.1",
+                port=self.config.port,
+                log_level="warning",
+                lifespan="on",
+            )
+            self._server = uvicorn.Server(config)
+            try:
+                # Server.run() owns its own asyncio loop and cleanup; this
+                # avoids the run_until_complete/run_coroutine_threadsafe race
+                # that produced "Event loop stopped before Future completed".
+                self._server.run()
+            except OSError as e:
+                logger.error(
+                    f"audio-ws failed to bind :{self.config.port} ({e}); "
+                    f"is another instance running? `lsof -ti :{self.config.port} | xargs kill -9`"
+                )
+            except Exception:
+                logger.exception("audio-ws server crashed")
+
+        self._thread = threading.Thread(target=run, daemon=True, name="audio-ws-uvicorn")
+        self._thread.start()
+        logger.info(
+            f"audio-ws-module: ws://127.0.0.1:{self.config.port}/audio_out  "
+            f"| GET http://127.0.0.1:{self.config.port}/audio_info  "
+            f"| POST http://127.0.0.1:{self.config.port}/play"
+        )
+
+    def _on_audio(self, msg: AudioMessage) -> None:
+        self._last_format = {"sample_rate": msg.sample_rate, "channels": msg.channels}
+        try:
+            self._frame_queue.put_nowait(msg.data)
+        except queue.Full:
+            # Drop oldest to bound latency.
+            try:
+                self._frame_queue.get_nowait()
+                self._frame_queue.put_nowait(msg.data)
+            except queue.Empty:
+                pass
+
+    @rpc
+    def stop(self) -> None:
+        self._stop_event.set()
+        if self._server is not None:
+            self._server.should_exit = True
+        super().stop()
diff --git a/dimos/web/cmd_bridge_module.py b/dimos/web/cmd_bridge_module.py
new file mode 100644
index 0000000000..266808bfa9
--- /dev/null
+++ b/dimos/web/cmd_bridge_module.py
@@ -0,0 +1,222 @@
+"""HTTP bridge for driving the robot from an external process.
+
+  - POST /cmd_vel  -> publish a single Twist for `duration` seconds (then 0)
+  - POST /path     -> execute a sequence of relative moves in order
+  - POST /stop     -> emergency zero-Twist
+  - GET  /pose     -> latest base_link pose in the world frame
+
+Compose with any blueprint that has GO2Connection (sim or real robot):
+
+    dimos --simulation run unitree-go2-basic cmd-bridge-module
+
+The "path" is a list of relative steps that are sent as raw cmd_vel
+Twists for a per-step duration, then a stop. This is open-loop — no
+SLAM, no obstacle avoidance — but works in sim and on the bare-metal
+robot without requiring the nav stack. Good fit for a VLM that decides
+the next short step from a camera frame and re-plans every iteration.
+"""
+
+import asyncio
+import math
+import queue
+import threading
+import time
+from typing import Any
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+import uvicorn
+
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In, Out
+from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
+from dimos.msgs.geometry_msgs.Twist import Twist
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+class CmdBridgeConfig(ModuleConfig):
+    port: int = 7782
+
+
+class TwistRequest(BaseModel):
+    linear: list[float] = Field(default=[0.0, 0.0, 0.0], min_length=3, max_length=3)
+    angular: list[float] = Field(default=[0.0, 0.0, 0.0], min_length=3, max_length=3)
+    duration: float = 0.5  # seconds to hold the command before stopping
+
+
+class PathStep(BaseModel):
+    """One step of an open-loop path.
+
+    `linear`/`angular` are the same Twist components as `/cmd_vel`. The step
+    runs for `duration` seconds, then the next step starts. If the model
+    prefers semantic steps, use `forward`/`left`/`degrees` and let the bridge
+    convert.
+    """
+
+    linear: list[float] | None = None
+    angular: list[float] | None = None
+    # Semantic alternative: distances in meters, rotation in degrees,
+    # executed over `duration` seconds. Converted to Twist by dividing.
+    forward: float | None = None
+    left: float | None = None
+    degrees: float | None = None
+    duration: float = 1.0
+
+
+class PathRequest(BaseModel):
+    steps: list[PathStep]
+
+
+class CmdBridgeModule(Module):
+    """Open-loop HTTP -> cmd_vel bridge."""
+
+    config: CmdBridgeConfig
+    cmd_vel: Out[Twist]
+    odom: In[PoseStamped]
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._app = FastAPI()
+        self._app.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_methods=["GET", "POST"],
+            allow_headers=["*"],
+        )
+        self._latest_pose: PoseStamped | None = None
+        self._server: uvicorn.Server | None = None
+        self._thread: threading.Thread | None = None
+        # Serialize POSTs so /path doesn't interleave with /cmd_vel.
+        self._drive_lock = threading.Lock()
+        # Cooperative cancel for an in-flight /path when /stop arrives.
+        self._cancel_event = threading.Event()
+        self._setup_routes()
+
+    def _setup_routes(self) -> None:
+        @self._app.get("/pose")
+        def pose() -> dict[str, Any]:
+            if self._latest_pose is None:
+                return {"status": "no pose yet"}
+            p = self._latest_pose
+            q = p.orientation
+            # Yaw from quaternion (assumes near-zero roll/pitch, fine for a quadruped).
+            siny = 2.0 * (q.w * q.z + q.x * q.y)
+            cosy = 1.0 - 2.0 * (q.y * q.y + q.z * q.z)
+            theta = math.atan2(siny, cosy)
+            return {
+                "status": "ok",
+                "x": p.position.x,
+                "y": p.position.y,
+                "z": p.position.z,
+                "theta": theta,
+                "ts": p.ts,
+            }
+
+        @self._app.post("/cmd_vel")
+        def cmd_vel(req: TwistRequest) -> dict[str, Any]:
+            with self._drive_lock:
+                self._cancel_event.clear()
+                self._publish_twist(req.linear, req.angular)
+                self._sleep_or_cancel(req.duration)
+                self._publish_twist([0, 0, 0], [0, 0, 0])
+            return {"status": "ok"}
+
+        @self._app.post("/path")
+        def path(req: PathRequest) -> dict[str, Any]:
+            with self._drive_lock:
+                self._cancel_event.clear()
+                executed = 0
+                for step in req.steps:
+                    if self._cancel_event.is_set():
+                        break
+                    linear, angular = _step_to_twist(step)
+                    self._publish_twist(linear, angular)
+                    if self._sleep_or_cancel(step.duration):
+                        break
+                    executed += 1
+                self._publish_twist([0, 0, 0], [0, 0, 0])
+            return {
+                "status": "cancelled" if self._cancel_event.is_set() else "ok",
+                "executed": executed,
+                "total": len(req.steps),
+            }
+
+        @self._app.post("/stop")
+        def stop() -> dict[str, str]:
+            self._cancel_event.set()
+            self._publish_twist([0, 0, 0], [0, 0, 0])
+            return {"status": "stopped"}
+
+    def _publish_twist(self, linear: list[float], angular: list[float]) -> None:
+        self.cmd_vel.publish(
+            Twist(
+                linear=Vector3(linear[0], linear[1], linear[2]),
+                angular=Vector3(angular[0], angular[1], angular[2]),
+                ts=time.time(),
+            )
+        )
+
+    def _sleep_or_cancel(self, duration: float) -> bool:
+        """Sleep up to `duration`, returns True if cancelled mid-sleep."""
+        return self._cancel_event.wait(timeout=max(0.0, duration))
+
+    def _on_odom(self, pose: PoseStamped) -> None:
+        self._latest_pose = pose
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self.register_disposable(self.odom.subscribe(self._on_odom))
+
+        def run() -> None:
+            config = uvicorn.Config(
+                self._app,
+                host="127.0.0.1",
+                port=self.config.port,
+                log_level="warning",
+                lifespan="on",
+            )
+            self._server = uvicorn.Server(config)
+            try:
+                self._server.run()
+            except OSError as e:
+                logger.error(
+                    f"cmd-bridge failed to bind :{self.config.port} ({e}); "
+                    f"another instance? `lsof -ti :{self.config.port} | xargs kill -9`"
+                )
+            except Exception:
+                logger.exception("cmd-bridge server crashed")
+
+        self._thread = threading.Thread(target=run, daemon=True, name="cmd-bridge-uvicorn")
+        self._thread.start()
+        logger.info(
+            f"cmd-bridge-module: POST http://127.0.0.1:{self.config.port}/cmd_vel  "
+            f"| POST /path | POST /stop | GET /pose"
+        )
+
+    @rpc
+    def stop(self) -> None:
+        self._cancel_event.set()
+        self._publish_twist([0, 0, 0], [0, 0, 0])
+        if self._server is not None:
+            self._server.should_exit = True
+        super().stop()
+
+
+def _step_to_twist(step: PathStep) -> tuple[list[float], list[float]]:
+    """Convert a PathStep to (linear, angular) Twist components."""
+    if step.linear is not None or step.angular is not None:
+        return (
+            step.linear or [0.0, 0.0, 0.0],
+            step.angular or [0.0, 0.0, 0.0],
+        )
+    dur = max(step.duration, 1e-3)
+    fwd = (step.forward or 0.0) / dur
+    lat = (step.left or 0.0) / dur
+    yaw = math.radians(step.degrees or 0.0) / dur
+    return [fwd, lat, 0.0], [0.0, 0.0, yaw]
diff --git a/dimos/web/mjpeg_module.py b/dimos/web/mjpeg_module.py
new file mode 100644
index 0000000000..ccac9200f8
--- /dev/null
+++ b/dimos/web/mjpeg_module.py
@@ -0,0 +1,97 @@
+"""Expose color_image as an MJPEG HTTP stream + single-frame snapshot.
+
+Usage:
+    dimos --simulation run unitree-go2-basic camera-mjpeg-module
+    # MJPEG:    http://127.0.0.1:7780/video_feed/color_image
+    # snapshot: http://127.0.0.1:7780/snapshot/color_image
+"""
+
+import threading
+from typing import Any
+
+import cv2
+from fastapi import HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import Response
+import numpy as np
+import reactivex as rx
+from reactivex import operators as ops
+
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.utils.logging_config import setup_logger
+from dimos.web.robot_web_interface import RobotWebInterface
+
+logger = setup_logger()
+
+
+class CameraMjpegConfig(ModuleConfig):
+    port: int = 7780
+    stream_key: str = "color_image"
+
+
+class CameraMjpegModule(Module):
+    config: CameraMjpegConfig
+    color_image: In[Image]
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._subject: rx.subject.Subject = rx.subject.Subject()
+        self._web: RobotWebInterface | None = None
+        self._thread: threading.Thread | None = None
+        self._latest_jpeg: bytes | None = None
+        self._latest_lock = threading.Lock()
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self._web = RobotWebInterface(
+            port=self.config.port,
+            **{self.config.stream_key: self._subject.pipe(ops.share())},
+        )
+        self._web.app.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_methods=["GET"],
+            allow_headers=["*"],
+        )
+
+        stream_key = self.config.stream_key
+
+        @self._web.app.get(f"/snapshot/{stream_key}")
+        def snapshot() -> Response:
+            with self._latest_lock:
+                buf = self._latest_jpeg
+            if buf is None:
+                raise HTTPException(status_code=503, detail="no frame yet")
+            return Response(content=buf, media_type="image/jpeg")
+
+        self._thread = threading.Thread(target=self._web.run, daemon=True)
+        self._thread.start()
+        self.color_image.subscribe(self._on_image)
+        logger.info(
+            f"MJPEG: http://127.0.0.1:{self.config.port}/video_feed/{stream_key}  "
+            f"| snapshot: http://127.0.0.1:{self.config.port}/snapshot/{stream_key}"
+        )
+
+    def _on_image(self, img: Image) -> None:
+        arr = img.as_numpy()
+        if arr.ndim == 3 and arr.shape[2] == 3:
+            arr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
+        self._subject.on_next(arr)
+
+        ok, jpg = cv2.imencode(".jpg", arr)
+        if ok:
+            with self._latest_lock:
+                self._latest_jpeg = np.asarray(jpg).tobytes()
+
+    @rpc
+    def stop(self) -> None:
+        if self._web is not None:
+            try:
+                self._web.shutdown()
+            except Exception:
+                pass
+        super().stop()
diff --git a/go2-start.sh b/go2-start.sh
new file mode 100755
index 0000000000..180cd6a77c
--- /dev/null
+++ b/go2-start.sh
@@ -0,0 +1,235 @@
+#!/usr/bin/env bash
+# go2-start.sh — hackathon quickstart for the dimairos05 Go2
+#
+# Usage:
+#   ./go2-start.sh                                   # unitree-go2-basic + web bridges
+#   ./go2-start.sh unitree-go2-agentic-gemini        # any blueprint
+#   ROBOT_IP=10.0.0.42 ./go2-start.sh                # override IP
+#   BRIDGES=0 ./go2-start.sh                         # skip camera-mjpeg + audio-ws
+#   EXTRA="foo-module bar-module" ./go2-start.sh     # tack on more modules
+#   SIMULATION=1 ./go2-start.sh                      # MuJoCo sim (camera only, no audio)
+#
+# Local LLM presets — pick exactly one (default model can be overridden):
+#   LMSTUDIO=1 ./go2-start.sh unitree-go2-agentic
+#     -> OpenAI-compat at http://127.0.0.1:1234/v1
+#     -> LMSTUDIO_MODEL=qwen/qwen3-8b (override if a different model is loaded)
+#
+#   MLXVLM=1 ./go2-start.sh unitree-go2-agentic
+#     -> OpenAI-compat at http://127.0.0.1:8080/v1  (mlxvlm Gemma-4 server)
+#     -> MLXVLM_MODEL=lmstudio-community/gemma-4-E4B-it-MLX-4bit
+#
+# Default extras (with BRIDGES=1):
+#   camera-mjpeg-module     http://127.0.0.1:7780/video_feed/color_image
+#                           http://127.0.0.1:7780/snapshot/color_image
+#   audio-ws-module         ws://127.0.0.1:7781/audio_out   (robot only)
+#                           POST http://127.0.0.1:7781/play (robot only)
+
+set -euo pipefail
+
+# ---- config ----------------------------------------------------------------
+ROBOT_IP="${ROBOT_IP:-192.168.12.1}"
+EXPECTED_SSID="${EXPECTED_SSID:-dimairos05}"
+BLUEPRINT="${1:-unitree-go2-basic}"
+VENV_DIR="${VENV_DIR:-.venv}"
+BRIDGES="${BRIDGES:-1}"
+EXTRA="${EXTRA:-}"
+SIMULATION="${SIMULATION:-0}"
+LMSTUDIO="${LMSTUDIO:-0}"
+MLXVLM="${MLXVLM:-0}"
+LMSTUDIO_MODEL="${LMSTUDIO_MODEL:-qwen/qwen3-8b}"
+MLXVLM_MODEL="${MLXVLM_MODEL:-lmstudio-community/gemma-4-E4B-it-MLX-4bit}"
+# ----------------------------------------------------------------------------
+
+c_red()   { printf '\033[31m%s\033[0m\n' "$*"; }
+c_green() { printf '\033[32m%s\033[0m\n' "$*"; }
+c_yellow(){ printf '\033[33m%s\033[0m\n' "$*"; }
+c_bold()  { printf '\033[1m%s\033[0m\n'  "$*"; }
+
+# Sanity: only one LLM preset at a time.
+if [[ "$LMSTUDIO" == "1" && "$MLXVLM" == "1" ]]; then
+  c_red "✗ pick one: LMSTUDIO=1 or MLXVLM=1, not both"
+  exit 1
+fi
+
+# Optional LLM endpoint override + model selection for the McpClient.
+LLM_ARGS=()
+LLM_NAME=""
+LLM_URL=""
+LLM_MODEL=""
+if [[ "$LMSTUDIO" == "1" ]]; then
+  LLM_NAME="LM Studio"
+  LLM_URL="http://127.0.0.1:1234/v1"
+  LLM_MODEL="$LMSTUDIO_MODEL"
+elif [[ "$MLXVLM" == "1" ]]; then
+  LLM_NAME="mlxvlm (Gemma 4)"
+  LLM_URL="http://127.0.0.1:8080/v1"
+  LLM_MODEL="$MLXVLM_MODEL"
+fi
+
+# When an LLM preset is set but the chosen blueprint has no McpClient,
+# auto-inject mcp-server + mcp-client so the override actually binds.
+# (Mirrors what sim-with-llm.sh does, so real-robot LLM mode behaves the
+# same as sim.)
+if [[ -n "$LLM_URL" && "$BLUEPRINT" != *"agentic"* ]]; then
+  if [[ -z "$EXTRA" ]]; then
+    EXTRA="mcp-server mcp-client"
+  elif [[ "$EXTRA" != *"mcp-client"* ]]; then
+    EXTRA="$EXTRA mcp-server mcp-client"
+  fi
+fi
+
+# Assemble the module list once so it's reused for echo + exec.
+MODULES=("$BLUEPRINT")
+if [[ "$BRIDGES" == "1" ]]; then
+  MODULES+=("camera-mjpeg-module" "audio-ws-module" "cmd-bridge-module")
+fi
+if [[ -n "$EXTRA" ]]; then
+  # shellcheck disable=SC2206
+  MODULES+=( $EXTRA )
+fi
+
+if [[ -n "$LLM_URL" ]]; then
+  export OPENAI_BASE_URL="$LLM_URL"
+  export OPENAI_API_KEY="${OPENAI_API_KEY:-local-llm}"  # placeholder; servers ignore it
+  LLM_ARGS=( -o "mcpclient.model=openai:$LLM_MODEL" )
+fi
+
+c_bold "▶ Go2 hackathon quickstart"
+echo "  modules   : ${MODULES[*]}"
+if [[ "$SIMULATION" == "1" ]]; then
+  echo "  mode      : simulation (no robot)"
+else
+  echo "  robot ip  : $ROBOT_IP"
+fi
+if [[ -n "$LLM_NAME" ]]; then
+  echo "  LLM       : $LLM_NAME → $LLM_URL"
+  echo "  model     : $LLM_MODEL"
+fi
+echo
+
+if [[ "$SIMULATION" != "1" ]]; then
+  # 1. Wifi check (macOS) ----------------------------------------------------
+  if [[ "$(uname)" == "Darwin" ]]; then
+    SSID="$(ipconfig getsummary en0 2>/dev/null | awk -F' SSID : ' '/ SSID : /{print $2; exit}')"
+    if [[ -z "$SSID" ]]; then
+      SSID="$(ipconfig getsummary en1 2>/dev/null | awk -F' SSID : ' '/ SSID : /{print $2; exit}')"
+    fi
+    if [[ "$SSID" == "$EXPECTED_SSID" ]]; then
+      c_green "✓ on wifi $SSID"
+    else
+      c_yellow "⚠ current wifi is '${SSID:-unknown}', expected '$EXPECTED_SSID'"
+      c_yellow "  if the robot is on a different network, set ROBOT_IP and ignore this"
+    fi
+  else
+    c_yellow "⚠ non-macOS host, skipping wifi check"
+  fi
+
+  # 2. Reachability ----------------------------------------------------------
+  echo
+  echo "→ pinging $ROBOT_IP …"
+  if ping -c 3 -W 1000 "$ROBOT_IP" >/dev/null 2>&1; then
+    c_green "✓ robot reachable"
+  else
+    c_red "✗ cannot reach $ROBOT_IP"
+    c_red "  check: joined dimairos05? robot powered on? sport mode on in the Unitree app?"
+    c_red "  (use SIMULATION=1 ./go2-start.sh to run MuJoCo instead)"
+    exit 1
+  fi
+
+  # 3. Clock sync (Unitree video desyncs without this) -----------------------
+  echo
+  echo "→ syncing clock (sudo, may prompt for password) …"
+  if command -v sntp >/dev/null 2>&1; then
+    sudo sntp -sS pool.ntp.org >/dev/null 2>&1 \
+      && c_green "✓ clock synced" \
+      || c_yellow "⚠ clock sync failed (non-fatal, but video may lag lidar)"
+  elif command -v ntpdate >/dev/null 2>&1; then
+    sudo ntpdate pool.ntp.org >/dev/null 2>&1 \
+      && c_green "✓ clock synced" \
+      || c_yellow "⚠ clock sync failed (non-fatal)"
+  else
+    c_yellow "⚠ no sntp/ntpdate found, skipping clock sync"
+  fi
+fi
+
+# 4. LLM endpoint reachability check ----------------------------------------
+if [[ -n "$LLM_URL" ]]; then
+  echo
+  echo "→ probing $LLM_NAME at $LLM_URL/models …"
+  if curl -fsS -m 3 "$LLM_URL/models" >/dev/null 2>&1; then
+    c_green "✓ $LLM_NAME reachable"
+  else
+    c_red "✗ cannot reach $LLM_NAME at $LLM_URL"
+    if [[ "$LMSTUDIO" == "1" ]]; then
+      c_red "  start LM Studio's Local Server (Cmd-Shift-2) and load a tool-capable model"
+    else
+      c_red "  start mlxvlm: cd /Users/tex/repos/ai/mlx/mlxvlm && scripts/start-all.sh"
+    fi
+    exit 1
+  fi
+fi
+
+# 5. Venv --------------------------------------------------------------------
+echo
+if [[ -d "$VENV_DIR" ]]; then
+  c_green "✓ using existing venv $VENV_DIR"
+else
+  c_yellow "no venv at $VENV_DIR — creating one"
+  if ! command -v uv >/dev/null 2>&1; then
+    c_red "✗ uv is not installed. install: brew install uv  (or curl -LsSf https://astral.sh/uv/install.sh | sh)"
+    exit 1
+  fi
+  uv venv --python 3.12 "$VENV_DIR"
+  # shellcheck disable=SC1091
+  source "$VENV_DIR/bin/activate"
+  if [[ "$SIMULATION" == "1" ]]; then
+    uv pip install 'dimos[base,unitree,sim]'
+  else
+    uv pip install 'dimos[base,unitree]'
+  fi
+fi
+# shellcheck disable=SC1091
+source "$VENV_DIR/bin/activate"
+
+# 6. Env vars ----------------------------------------------------------------
+export ROBOT_IP
+# Agentic-gemini blueprint needs these; warn if missing for that path
+if [[ "$BLUEPRINT" == *"gemini"* && -z "${GOOGLE_API_KEY:-}" ]]; then
+  c_yellow "⚠ GOOGLE_API_KEY not set — gemini blueprint will fail when the agent runs"
+fi
+if [[ -z "$LLM_URL" && "$BLUEPRINT" == *"agentic"* && "$BLUEPRINT" != *"ollama"* && "$BLUEPRINT" != *"gemini"* ]]; then
+  if [[ -z "${OPENAI_API_KEY:-}" ]]; then
+    c_yellow "⚠ OPENAI_API_KEY not set — agentic blueprint will fail when the agent runs"
+    c_yellow "  (or set LMSTUDIO=1 / MLXVLM=1 to use a local LLM)"
+  fi
+fi
+
+# 7. Launch ------------------------------------------------------------------
+echo
+c_bold "▶ endpoints"
+echo "  command center : http://localhost:7779"
+if [[ "$BRIDGES" == "1" ]]; then
+  echo "  camera MJPEG   : http://127.0.0.1:7780/video_feed/color_image"
+  echo "  camera snapshot: http://127.0.0.1:7780/snapshot/color_image"
+  if [[ "$SIMULATION" == "1" ]]; then
+    echo "  audio          : (robot only — sim has no audio)"
+  else
+    echo "  audio out (ws) : ws://127.0.0.1:7781/audio_out"
+    echo "  audio info     : http://127.0.0.1:7781/audio_info"
+    echo "  audio play     : POST http://127.0.0.1:7781/play"
+  fi
+  echo "  cmd_vel        : POST http://127.0.0.1:7782/cmd_vel"
+  echo "  path           : POST http://127.0.0.1:7782/path"
+  echo "  stop           : POST http://127.0.0.1:7782/stop"
+  echo "  pose           : http://127.0.0.1:7782/pose"
+fi
+echo
+c_bold "▶ launching: dimos $([[ "$SIMULATION" == "1" ]] && echo "--simulation ")run ${MODULES[*]} ${LLM_ARGS[*]:-}"
+echo "  ctrl-c to stop"
+echo
+
+if [[ "$SIMULATION" == "1" ]]; then
+  exec dimos --simulation run "${MODULES[@]}" "${LLM_ARGS[@]}"
+else
+  exec dimos run "${MODULES[@]}" "${LLM_ARGS[@]}"
+fi
diff --git a/journal/2026-05-27-camera-audio-bridges.md b/journal/2026-05-27-camera-audio-bridges.md
new file mode 100644
index 0000000000..9c03d04122
--- /dev/null
+++ b/journal/2026-05-27-camera-audio-bridges.md
@@ -0,0 +1,163 @@
+# 2026-05-27 — camera + audio web bridges, Go2 audio scaffold
+
+Branch: `feat/gemini-go2-2245`
+
+## Goal
+Stream the Go2 (sim or real) camera into a web page consumable by mlxvlm,
+then add the same shape for robot audio (mic out, speaker in).
+
+## Camera
+
+### `dimos/web/mjpeg_module.py` (new) — `camera-mjpeg-module`
+- Module subscribes to `color_image: In[Image]` and republishes via FastAPI on
+  `:7780`.
+- Endpoints:
+  - `GET /video_feed/color_image` — MJPEG (`multipart/x-mixed-replace`)
+  - `GET /snapshot/color_image` — single JPEG (for backend pull, e.g. mlxvlm
+    `/api/analyze` server-side)
+- CORS allow-all so a browser at a different origin can `drawImage` onto a
+  canvas without tainting.
+- Default port moved from 5555 → 7780 to avoid common collisions.
+
+### `dimos/robot/all_blueprints.py`
+- Registered `"camera-mjpeg-module": "dimos.web.mjpeg_module.CameraMjpegModule"`.
+
+### Use
+```
+dimos --simulation run unitree-go2-basic camera-mjpeg-module
+# http://127.0.0.1:7780/video_feed/color_image
+# http://127.0.0.1:7780/snapshot/color_image
+```
+Same URLs work against the real Go2 — drop `--simulation`, set `ROBOT_IP`.
+
+## Audio (real-robot only — sim has no audio)
+
+### `dimos/robot/unitree/connection.py` (modified)
+- Added `AudioMessage(data: bytes, sample_rate: int, channels: int)` dataclass.
+- `UnitreeWebRTCConnection.audio_stream()` — subscribes to
+  `LegionConnection.audio` via `add_track_callback`, calls
+  `switchAudioChannel(True)`, emits `AudioMessage` (int16 PCM at the frame's
+  native rate, usually 48 kHz mono).
+- `UnitreeWebRTCConnection.play_wav_bytes(wav: bytes)` — writes a tempfile,
+  uploads via `WebRTCAudioHub.upload_megaphone`, then `enter_megaphone`. Fires
+  on the connection's asyncio loop; returns immediately.
+
+### `dimos/robot/unitree/go2/connection.py` (modified)
+- New streams on `GO2Connection`:
+  - `audio: Out[bytes]` — mic data (currently only the PCM bytes — see
+    review issue #1, metadata is dropped).
+  - `audio_in: In[bytes]` — WAV bytes to play through the speaker.
+- `start()` wires both if the connection supports them (`hasattr` guard for
+  sim).
+- New skill `play_wav(wav_path: str)` — agent-facing, reads local WAV and
+  invokes `play_wav_bytes`.
+
+### `dimos/web/audio_ws_module.py` (new) — `audio-ws-module`
+- Module with `audio: In[bytes]` (mic) and `audio_in: Out[bytes]` (speaker).
+- FastAPI on `:7781`:
+  - `WebSocket /audio_out` — pushes binary PCM frames to connected clients.
+  - `POST /play` — body = WAV bytes; publishes to `audio_in` for the robot
+    speaker.
+- CORS open on the HTTP routes.
+
+### `dimos/robot/all_blueprints.py`
+- Registered `"audio-ws-module": "dimos.web.audio_ws_module.AudioWsModule"`.
+
+### Use (real Go2 only)
+```
+dimos run unitree-go2-basic audio-ws-module
+# ws://127.0.0.1:7781/audio_out         (mic out, binary)
+# curl --data-binary @clip.wav http://127.0.0.1:7781/play
+```
+
+## Other changes
+
+### `go2-start.sh`
+- Quickstart script for the dimairos05 Go2 — wifi check, ping, NTP sync, venv
+  bootstrap, env-var sanity, `exec dimos run <blueprint>`. Useful at the
+  hackathon table.
+
+### Editable install
+- Switched the venv from a one-shot `uv pip install 'dimos[sim] @ .'` to
+  `uv pip install -e '.[sim]'` so source edits apply without reinstall.
+
+## Review fixes applied
+- `GO2Connection.audio` retyped `Out[AudioMessage]`; rate + channels carry
+  through to `audio-ws-module`, which now exposes them via `GET /audio_info` and
+  sends an initial `{"event":"format", ...}` JSON frame on WS connect.
+- `play_wav_bytes` reads WAV duration via `wave`, sleeps for it, then calls
+  `exit_megaphone` so the robot doesn't stay in megaphone mode.
+- Float frames from aiortc are now scaled `(arr * 32767).clip(...)` before
+  the int16 cast instead of being silently truncated.
+- Added `play_wav_b64` skill alongside `play_wav` for remote MCP clients
+  that don't share the dimos host's filesystem.
+- Moved `os` / `tempfile` / `wave` imports to module top.
+
+## Known issues / follow-ups
+1. Stereo packed-vs-planar layout — `arr.shape[0]` assumes planar; Go2 mic
+   is mono so OK in practice. Document, don't fix yet.
+2. `audio-ws-module` alone (without `unitree-go2-basic`) has an unbound `In` —
+   autoconnect doesn't warn loudly enough.
+3. Untested: no real Go2 available during this session. Sim has no audio.
+
+## Mac + local-LLM agentic — what works, what doesn't
+
+Goal: drive an MCP agent on Apple Silicon with LM Studio (:1234) or the
+mlxvlm Gemma-4 server (:8080), no CUDA, no Google services.
+
+### Blueprints
+
+| Blueprint | Boots on Mac? | Notes |
+|---|---|---|
+| `unitree-go2-agentic` | ✗ | imports SecurityModule (EdgeTAM/CUDA) and Moondream VL (crashes Metal) |
+| `unitree-go2-agentic-gemini` | ✗ as-is | imports `GeminiSpeakSkill` → `from google import genai` at module load. Run `uv pip install google-genai` first, even if you don't call any Gemini APIs. |
+| `unitree-go2-agentic-ollama` | ✓ | clean compose; `./sim-with-llm.sh ollama` uses it |
+| `unitree-go2-basic + mcp-server + mcp-client` | ✓ | minimal but reliable; default for `./sim-with-llm.sh lmstudio` and `mlxvlm` |
+
+### Skill availability by compose
+
+| Compose | Skills the agent gets |
+|---|---|
+| `unitree-go2-basic + mcp-server + mcp-client` | `observe`, `play_wav`, `play_wav_b64` (last two no-op in sim) |
+| `… + unitree-skill-container + replanning-a-star-planner` | adds `relative_move`, `wait`, `current_time`, `tilt_body`, `execute_sport_command` — heavier startup (full nav stack) |
+
+`unitree-skill-container` alone fails build:
+`"No module met NavigationInterfaceSpec spec"`. The implementor is
+`replanning-a-star-planner`.
+
+### VL backends — Mac gotcha
+
+`dimos/models/vl/types.py:17` literal: `qwen | moondream | gemini`.
+
+- `qwen` — **not local**: hits Alibaba DashScope cloud (`dashscope-intl.aliyuncs.com`), needs `ALIBABA_API_KEY`.
+- `moondream` — local; crashes Metal on Apple Silicon (per the `-gemini` blueprint docstring).
+- `gemini` — Google cloud, needs `GOOGLE_API_KEY`.
+
+There is currently **no Mac-local VL backend**. The user has mlxvlm on :8080
+(Gemma-4-E4B + Falcon-Perception, OpenAI-compatible) — natural place to plug
+a new `mlxvlm` / `openai_compat` backend. Not implemented yet; needs a
+new file under `dimos/models/vl/`.
+
+### LLM chat backends
+
+- **LM Studio at :1234** and **mlxvlm at :8080** both speak `/v1/chat/completions`
+  with proper OpenAI tool-call passthrough — `langchain-openai` (already
+  installed) handles them via `OPENAI_BASE_URL`. No new dimos module
+  needed for chat.
+- McpClient model override via `-o mcpclient.model=openai:<id>` works.
+  Wired up in `go2-start.sh` LMSTUDIO/MLXVLM presets.
+
+### Next iteration
+Add `mlxvlm` to `VlModelName` so `NavigationSkillContainer` and
+`PersonFollowSkillContainer` can run with a local VL backend. With that
++ `google-genai` installed, `unitree-go2-agentic-gemini` becomes the
+richest Mac agentic compose if we also `--disable` the Gemini-runtime
+modules (`gemini-speak-skill`, `map-uploader`, `spatial-memory`).
+
+## Composition cheatsheet
+| Run | Endpoints |
+|---|---|
+| `unitree-go2-basic camera-mjpeg-module` | `:7780/video_feed/color_image`, `:7780/snapshot/color_image` |
+| `unitree-go2-basic audio-ws-module` | `ws://:7781/audio_out`, `POST :7781/play` |
+| `unitree-go2-basic camera-mjpeg-module audio-ws-module` | both |
+| `unitree-go2-agentic camera-mjpeg-module audio-ws-module` | MCP + agent + both bridges |
diff --git a/journal/2026-05-27-mlxvlm-robot-integration-prompt.md b/journal/2026-05-27-mlxvlm-robot-integration-prompt.md
new file mode 100644
index 0000000000..e5b0c343c1
--- /dev/null
+++ b/journal/2026-05-27-mlxvlm-robot-integration-prompt.md
@@ -0,0 +1,123 @@
+# mlxvlm → dimos: drive the Go2 from a VLM
+
+Use this as the prompt for the Claude working on `/Users/tex/repos/ai/mlx/mlxvlm`.
+
+## Context
+
+dimos is the robot OS. It runs as a separate process on the same Mac and
+exposes three OpenAI/HTTP-shaped bridges on localhost so mlxvlm can plug in
+without speaking any internal dimos APIs:
+
+| URL | Direction | Format |
+|---|---|---|
+| `GET http://127.0.0.1:7780/video_feed/color_image` | dimos → you | MJPEG (`multipart/x-mixed-replace; boundary=frame`) |
+| `GET http://127.0.0.1:7780/snapshot/color_image` | dimos → you | one JPEG |
+| `GET ws://127.0.0.1:7781/audio_out` | dimos → you | binary int16 PCM frames; first WS message is `{"event":"format","sample_rate":N,"channels":N}` |
+| `GET http://127.0.0.1:7781/audio_info` | dimos → you | `{sample_rate,channels}` JSON |
+| `POST http://127.0.0.1:7781/play` | you → dimos | body = raw WAV bytes; plays on robot speaker (no-op in sim) |
+| `POST http://127.0.0.1:7782/cmd_vel` | you → dimos | one Twist step |
+| `POST http://127.0.0.1:7782/path` | you → dimos | sequence of Twist steps, executed in order |
+| `POST http://127.0.0.1:7782/stop` | you → dimos | zero-Twist emergency stop |
+| `GET http://127.0.0.1:7782/pose` | dimos → you | `{x, y, z, theta, ts}` of base_link in world frame |
+
+All endpoints are CORS-open. Endpoints stay the same whether dimos is in
+MuJoCo sim or driving a real Go2 — only audio is robot-only.
+
+## Twist + Path semantics
+
+`cmd_vel` is a standard ROS-style Twist: linear `[x_forward, y_left, z]` in
+m/s, angular `[x_roll, y_pitch, z_yaw]` in rad/s. Open-loop: the bridge
+publishes the Twist, sleeps for `duration` seconds, then publishes zero.
+
+`/path` takes a list of steps. Two ways to spell a step:
+
+1. **Raw Twist** — the model emits velocity components:
+   ```json
+   {"linear":[0.3,0,0],"angular":[0,0,0],"duration":1.0}
+   ```
+
+2. **Semantic** — the model emits distances in m / rotation in deg, and the
+   bridge divides by `duration` to make a Twist:
+   ```json
+   {"forward":0.5,"left":0,"degrees":0,"duration":2.0}
+   {"forward":0,"left":0,"degrees":-90,"duration":1.5}
+   ```
+
+Steps run sequentially. `/stop` cancels an in-flight `/path` cooperatively.
+
+No SLAM, no obstacle avoidance — for closed-loop, **re-plan** every iteration
+from a fresh camera frame instead of sending long paths.
+
+## What to implement in mlxvlm
+
+Add a `POST /api/robot/navigate` route to `app.py` that runs a perceive-act
+loop until the goal is reached, the user cancels, or `max_steps` is hit:
+
+```
+input:  { "goal": "find the red chair and stop in front of it",
+          "max_steps": 12,
+          "step_seconds": 1.0 }
+
+loop:
+  1. frame = httpx.get(DIMOS_CAMERA_URL).content
+  2. pose  = httpx.get(DIMOS_POSE_URL).json()
+  3. ask Gemma-4-Falcon-Perception via the existing /api/analyze pipeline:
+       "Camera frame attached. Robot pose: {pose}. Goal: {goal}.
+        Decide the next ≤2 second movement. Reply ONLY with strict JSON:
+          { 'action':'move'|'stop'|'arrived',
+            'forward': float meters,
+            'left':    float meters,
+            'degrees': float rotation,
+            'reason':  '<short>' }
+        Positive forward=ahead, positive left=left, positive degrees=CCW.
+        Keep |forward|≤1.0, |left|≤0.5, |degrees|≤45 per step."
+  4. parse JSON; if action=='arrived' or 'stop', POST /stop and break
+  5. POST /api/robot/navigate/steps a single PathStep with the chosen
+     values + duration=step_seconds, wait for response
+  6. SSE-stream {step, action, reason, pose} so the UI shows progress
+
+env config (read at startup):
+  DIMOS_CAMERA_URL  = http://127.0.0.1:7780/snapshot/color_image
+  DIMOS_POSE_URL    = http://127.0.0.1:7782/pose
+  DIMOS_PATH_URL    = http://127.0.0.1:7782/path
+  DIMOS_STOP_URL    = http://127.0.0.1:7782/stop
+  DIMOS_PLAY_URL    = http://127.0.0.1:7781/play     (optional, for spoken feedback)
+```
+
+Failure modes the loop must handle:
+- camera/pose endpoint 5xx or timeout (skip the step, log, retry once)
+- JSON parse failure from the model (treat as `stop`)
+- per-step Twist limits exceeded by the model (clamp, log)
+- max_steps hit without `arrived` (return `{status: "exhausted", history: […]}`)
+
+UI (optional, in `static/`):
+- "Goal" text input + "Go" button → POST /api/robot/navigate
+- live event stream → render the latest camera frame, current pose, last
+  reason, step counter
+- big red "Stop" button → POST `DIMOS_STOP_URL` directly (don't wait for
+  the agent)
+
+## Audio loop (optional follow-up)
+If you want the robot to speak / listen:
+- Mic: open `ws://127.0.0.1:7781/audio_out`, parse the first JSON
+  `format` frame, buffer ~1s of PCM, wrap in a WAV header, POST to the
+  existing mlx-audio whisper server on `:8000`.
+- Speaker: TTS output → POST raw WAV to `http://127.0.0.1:7781/play`.
+
+## Things NOT to do
+- Don't try to import anything from dimos. All integration is over HTTP/WS.
+- Don't long-poll `/path` — it blocks until the steps finish. Use shorter
+  paths (1–3 steps) per loop iteration so the VLM can re-evaluate.
+- Don't assume metric scale from the camera — the VLM reasons about
+  "ahead", "left", "approaching", not centimeters. Conservative step sizes
+  (0.3–0.6 m forward, ≤30° turns) are safer.
+
+## Testing without a real robot
+The dimos sim publishes the same endpoints. Start it with:
+```
+cd ~/repos/robotics/dimos
+./sim-with-llm.sh mlxvlm    # routes the in-sim agent's LLM to your :8080
+```
+Then in another terminal, your mlxvlm `POST /api/robot/navigate` should
+drive the sim Go2 in the MuJoCo viewer, and you'll see its 3D position
+change via `GET /pose`.
diff --git a/pyproject.toml b/pyproject.toml
index 7bc41d2cd3..ef6e6126e9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -201,6 +201,8 @@ agents = [
     "langchain-text-splitters>=1,<2",
     "langchain-huggingface>=1,<2",
     "langchain-ollama>=1,<2",
+    "langchain-google-genai>=2,<3",
+    "google-genai>=2,<3",
     "ollama>=0.6.0",
     "anthropic>=0.19.0",
 
diff --git a/scripts/export_recording.py b/scripts/export_recording.py
new file mode 100644
index 0000000000..7ab2bf1876
--- /dev/null
+++ b/scripts/export_recording.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+
+"""Export a memory2 recording into the robomoo web app.
+
+Reads a SqliteStore `.db` and pushes three structured artifacts over the same
+token-guarded robot-ingest endpoints the live skills use (`map_uploader.py`,
+`take_picture_skill.py`):
+
+  1. occupancy map   — top-down lidar histogram → value-encoded grayscale PNG
+                       → POST /api/robot/map
+  2. trajectory      — downsampled odom path [{ts,x,y,theta}] (JSON)
+                       → POST /api/robot/trajectory
+  3. embedded frames — throttled color_image keyframes, each CLIP-embedded
+                       (same model the web searches with) + thumbnail + pose
+                       → POST /api/robot/frame (with an `embedding` field)
+
+The web then renders an interactive vector map (pan/zoom), the driven path, and
+in-browser CLIP semantic search over the frames.
+
+Usage:
+  ROBOMOO_URL=http://localhost:4470 ROBOT_INGEST_TOKEN=... \
+    uv run python scripts/export_recording.py recording_go2.db
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import os
+
+import cv2
+import httpx
+import numpy as np
+
+from dimos.memory2.store.sqlite import SqliteStore
+from dimos.memory2.transform import downsample, throttle
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+def quat_to_yaw(qx: float, qy: float, qz: float, qw: float) -> float:
+    """Heading (yaw) about world +z, radians."""
+    return math.atan2(2.0 * (qw * qz + qx * qy), 1.0 - 2.0 * (qy * qy + qz * qz))
+
+
+def quat_to_matrix(qx: float, qy: float, qz: float, qw: float) -> np.ndarray:
+    """3x3 rotation matrix from a (x, y, z, w) quaternion."""
+    return np.array(
+        [
+            [1 - 2 * (qy * qy + qz * qz), 2 * (qx * qy - qz * qw), 2 * (qx * qz + qy * qw)],
+            [2 * (qx * qy + qz * qw), 1 - 2 * (qx * qx + qz * qz), 2 * (qy * qz - qx * qw)],
+            [2 * (qx * qz - qy * qw), 2 * (qy * qz + qx * qw), 1 - 2 * (qx * qx + qy * qy)],
+        ],
+        dtype=np.float64,
+    )
+
+
+def post(url: str, token: str, path: str, **kwargs) -> None:
+    resp = httpx.post(
+        f"{url.rstrip('/')}{path}",
+        headers={"Authorization": f"Bearer {token}"},
+        timeout=60.0,
+        **kwargs,
+    )
+    resp.raise_for_status()
+
+
+def build_and_push_map(
+    store: SqliteStore, url: str, token: str, resolution: float, every: int
+) -> None:
+    """Accumulate lidar into a world-frame top-down occupancy and upload it."""
+    if "lidar" not in store.list_streams():
+        logger.warning("no lidar stream — skipping map")
+        return
+
+    pts_world: list[np.ndarray] = []
+    for obs in store.streams.lidar.transform(downsample(every)):
+        if obs.pose is None:
+            continue
+        x, y, z, qx, qy, qz, qw = obs.pose
+        pts = np.asarray(obs.data.points_f32(), dtype=np.float64)
+        if pts.size == 0:
+            continue
+        world = pts @ quat_to_matrix(qx, qy, qz, qw).T + np.array([x, y, z])
+        pts_world.append(world[:, :2])  # XY only
+    if not pts_world:
+        logger.warning("no lidar points with pose — skipping map")
+        return
+
+    xy = np.concatenate(pts_world, axis=0)
+    min_x, min_y = xy.min(axis=0)
+    max_x, max_y = xy.max(axis=0)
+    width = max(1, int(math.ceil((max_x - min_x) / resolution)))
+    height = max(1, int(math.ceil((max_y - min_y) / resolution)))
+
+    cols = np.clip(((xy[:, 0] - min_x) / resolution).astype(int), 0, width - 1)
+    rows = np.clip(((xy[:, 1] - min_y) / resolution).astype(int), 0, height - 1)
+    counts = np.zeros((height, width), dtype=np.int32)
+    np.add.at(counts, (rows, cols), 1)
+
+    # value-encoded grayscale (matches map_uploader.py): unknown=255, occupied=100,
+    # lightly-hit cells scaled, free space left unknown (transparent on the web).
+    enc = np.full((height, width), 255, dtype=np.uint8)
+    hit = counts > 0
+    enc[hit] = np.clip(counts[hit] * 25, 1, 100).astype(np.uint8)
+
+    ok, buf = cv2.imencode(".png", enc)
+    if not ok:
+        logger.warning("map PNG encode failed")
+        return
+    post(
+        url,
+        token,
+        "/api/robot/map",
+        files={"file": ("map.png", buf.tobytes(), "image/png")},
+        data={
+            "resolution": str(resolution),
+            "originX": str(min_x),
+            "originY": str(min_y),
+            "width": str(width),
+            "height": str(height),
+        },
+    )
+    logger.info("pushed map %dx%d (res %.3f)", width, height, resolution)
+
+
+def push_trajectory(store: SqliteStore, url: str, token: str, interval: float) -> None:
+    if "odom" not in store.list_streams():
+        logger.warning("no odom stream — skipping trajectory")
+        return
+    points = []
+    for obs in store.streams.odom.transform(throttle(interval)):
+        if obs.pose is None:
+            continue
+        x, y, _z, qx, qy, qz, qw = obs.pose
+        points.append({"ts": obs.ts, "x": x, "y": y, "theta": quat_to_yaw(qx, qy, qz, qw)})
+    if not points:
+        logger.warning("no odom poses — skipping trajectory")
+        return
+    blob = json.dumps(points).encode("utf-8")
+    post(
+        url,
+        token,
+        "/api/robot/trajectory",
+        files={"file": ("trajectory.json", blob, "application/json")},
+    )
+    logger.info("pushed trajectory (%d points)", len(points))
+
+
+def push_frames(
+    store: SqliteStore, url: str, token: str, interval: float, max_frames: int, thumb_w: int
+) -> None:
+    if "color_image" not in store.list_streams():
+        logger.warning("no color_image stream — skipping frames")
+        return
+    from dimos.models.embedding.clip import CLIPModel
+
+    clip = CLIPModel()
+    n = 0
+    for obs in store.streams.color_image.transform(throttle(interval)):
+        if n >= max_frames:
+            break
+        img = obs.data
+        emb = clip.embed(img)
+        vec = emb.to_numpy().astype(float).ravel().tolist()
+
+        bgr = img.to_opencv()
+        h, w = bgr.shape[:2]
+        if w > thumb_w:
+            bgr = cv2.resize(bgr, (thumb_w, int(h * thumb_w / w)))
+        ok, buf = cv2.imencode(".jpg", bgr, [cv2.IMWRITE_JPEG_QUALITY, 80])
+        if not ok:
+            continue
+
+        data = {"label": "frame", "embedding": json.dumps(vec)}
+        if obs.pose is not None:
+            data["poseX"] = str(obs.pose[0])
+            data["poseY"] = str(obs.pose[1])
+        post(
+            url,
+            token,
+            "/api/robot/frame",
+            files={"file": ("frame.jpg", buf.tobytes(), "image/jpeg")},
+            data=data,
+        )
+        n += 1
+    logger.info("pushed %d embedded frames", n)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("db", help="path to the recording .db")
+    ap.add_argument("--robomoo-url", default=os.getenv("ROBOMOO_URL", ""))
+    ap.add_argument("--token", default=os.getenv("ROBOT_INGEST_TOKEN", ""))
+    ap.add_argument("--map-resolution", type=float, default=0.05)
+    ap.add_argument("--lidar-every", type=int, default=10, help="use every Nth lidar scan")
+    ap.add_argument("--traj-interval", type=float, default=0.5, help="seconds between path points")
+    ap.add_argument("--frame-interval", type=float, default=2.0, help="seconds between keyframes")
+    ap.add_argument("--max-frames", type=int, default=120)
+    ap.add_argument("--thumb-width", type=int, default=384)
+    ap.add_argument("--no-map", action="store_true")
+    ap.add_argument("--no-trajectory", action="store_true")
+    ap.add_argument("--no-frames", action="store_true")
+    args = ap.parse_args()
+
+    if not args.robomoo_url or not args.token:
+        raise SystemExit("set ROBOMOO_URL and ROBOT_INGEST_TOKEN (or pass --robomoo-url/--token)")
+
+    store = SqliteStore(path=args.db)
+    logger.info("opened %s — streams: %s", args.db, store.list_streams())
+
+    if not args.no_map:
+        build_and_push_map(store, args.robomoo_url, args.token, args.map_resolution, args.lidar_every)
+    if not args.no_trajectory:
+        push_trajectory(store, args.robomoo_url, args.token, args.traj_interval)
+    if not args.no_frames:
+        push_frames(
+            store,
+            args.robomoo_url,
+            args.token,
+            args.frame_interval,
+            args.max_frames,
+            args.thumb_width,
+        )
+    logger.info("done")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sim-with-llm.sh b/sim-with-llm.sh
new file mode 100755
index 0000000000..0f30e3394e
--- /dev/null
+++ b/sim-with-llm.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# sim-with-llm.sh — sim + local LLM, one command.
+#
+# Thin wrapper around go2-start.sh: sets SIMULATION=1 and the backend preset.
+# go2-start.sh handles the rest (auto-injects mcp-server/mcp-client when an
+# LLM preset is set against a non-agentic blueprint).
+#
+# Usage:
+#   ./sim-with-llm.sh                   # mlxvlm Gemma-4 (default chat LLM)
+#   ./sim-with-llm.sh lmstudio          # LM Studio
+#   ./sim-with-llm.sh mlxvlm qwen3      # mlxvlm with a specific loaded model
+#   ./sim-with-llm.sh ollama            # local ollama (uses native ollama blueprint)
+#
+# Override the base blueprint if you want a different shape:
+#   BLUEPRINT=unitree-go2-basic ./sim-with-llm.sh lmstudio   # default
+#   BLUEPRINT=unitree-go2-agentic-ollama ./sim-with-llm.sh   # full agentic compose
+
+set -euo pipefail
+
+BACKEND="${1:-mlxvlm}"
+shift || true
+
+BLUEPRINT="${BLUEPRINT:-unitree-go2-basic}"
+
+case "$BACKEND" in
+  mlxvlm)
+    export MLXVLM=1
+    if [[ -n "${1:-}" ]]; then export MLXVLM_MODEL="$1"; fi
+    ;;
+  lmstudio)
+    export LMSTUDIO=1
+    if [[ -n "${1:-}" ]]; then export LMSTUDIO_MODEL="$1"; fi
+    ;;
+  ollama)
+    # The ollama agentic blueprint already composes mcp-server/mcp-client.
+    BLUEPRINT="unitree-go2-agentic-ollama"
+    ;;
+  *)
+    echo "unknown backend: $BACKEND  (use mlxvlm | lmstudio | ollama)" >&2
+    exit 2
+    ;;
+esac
+
+exec env SIMULATION=1 ./go2-start.sh "$BLUEPRINT"
diff --git a/uv.lock b/uv.lock
index 55a2997df3..01c45ff589 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1883,9 +1883,11 @@ dependencies = [
 agents = [
     { name = "anthropic" },
     { name = "faster-whisper" },
+    { name = "google-genai" },
     { name = "langchain" },
     { name = "langchain-chroma" },
     { name = "langchain-core" },
+    { name = "langchain-google-genai" },
     { name = "langchain-huggingface" },
     { name = "langchain-ollama" },
     { name = "langchain-openai" },
@@ -1913,6 +1915,7 @@ all = [
     { name = "ffmpeg-python" },
     { name = "filterpy" },
     { name = "gdown" },
+    { name = "google-genai" },
     { name = "googlemaps" },
     { name = "gtsam-extended" },
     { name = "hydra-core" },
@@ -1922,6 +1925,7 @@ all = [
     { name = "langchain" },
     { name = "langchain-chroma" },
     { name = "langchain-core" },
+    { name = "langchain-google-genai" },
     { name = "langchain-huggingface" },
     { name = "langchain-ollama" },
     { name = "langchain-openai" },
@@ -2004,11 +2008,13 @@ base = [
     { name = "faster-whisper" },
     { name = "ffmpeg-python" },
     { name = "filterpy" },
+    { name = "google-genai" },
     { name = "hydra-core" },
     { name = "jinja2" },
     { name = "langchain" },
     { name = "langchain-chroma" },
     { name = "langchain-core" },
+    { name = "langchain-google-genai" },
     { name = "langchain-huggingface" },
     { name = "langchain-ollama" },
     { name = "langchain-openai" },
@@ -2133,12 +2139,14 @@ unitree = [
     { name = "faster-whisper" },
     { name = "ffmpeg-python" },
     { name = "filterpy" },
+    { name = "google-genai" },
     { name = "gtsam-extended" },
     { name = "hydra-core" },
     { name = "jinja2" },
     { name = "langchain" },
     { name = "langchain-chroma" },
     { name = "langchain-core" },
+    { name = "langchain-google-genai" },
     { name = "langchain-huggingface" },
     { name = "langchain-ollama" },
     { name = "langchain-openai" },
@@ -2166,12 +2174,14 @@ unitree-dds = [
     { name = "faster-whisper" },
     { name = "ffmpeg-python" },
     { name = "filterpy" },
+    { name = "google-genai" },
     { name = "gtsam-extended" },
     { name = "hydra-core" },
     { name = "jinja2" },
     { name = "langchain" },
     { name = "langchain-chroma" },
     { name = "langchain-core" },
+    { name = "langchain-google-genai" },
     { name = "langchain-huggingface" },
     { name = "langchain-ollama" },
     { name = "langchain-openai" },
@@ -2384,6 +2394,7 @@ requires-dist = [
     { name = "ffmpeg-python", marker = "extra == 'web'" },
     { name = "filterpy", marker = "extra == 'perception'", specifier = ">=1.4.5" },
     { name = "gdown", marker = "extra == 'misc'", specifier = ">=5.2.2" },
+    { name = "google-genai", marker = "extra == 'agents'", specifier = ">=2,<3" },
     { name = "googlemaps", marker = "extra == 'misc'", specifier = ">=4.10.0" },
     { name = "gtsam-extended", marker = "extra == 'mapping'", specifier = ">=4.3a1.post1" },
     { name = "hydra-core", marker = "extra == 'perception'", specifier = ">=1.3.0" },
@@ -2393,6 +2404,7 @@ requires-dist = [
     { name = "langchain", marker = "extra == 'agents'", specifier = ">=1.2.3,<2" },
     { name = "langchain-chroma", marker = "extra == 'agents'", specifier = ">=1,<2" },
     { name = "langchain-core", marker = "extra == 'agents'", specifier = ">=1.2.22,<2" },
+    { name = "langchain-google-genai", marker = "extra == 'agents'", specifier = ">=2,<3" },
     { name = "langchain-huggingface", marker = "extra == 'agents'", specifier = ">=1,<2" },
     { name = "langchain-ollama", marker = "extra == 'agents'", specifier = ">=1,<2" },
     { name = "langchain-openai", marker = "extra == 'agents'", specifier = ">=1,<2" },
@@ -3142,6 +3154,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0b/10/da216e25ef2f3c9dfa75574aa27f5f4c7e5fb5540308f04e4d8c4d834ecb/filelock-3.23.0-py3-none-any.whl", hash = "sha256:4203c3f43983c7c95e4bbb68786f184f6acb7300899bf99d686bb82d526bdf62", size = 22227, upload-time = "2026-02-14T02:53:56.122Z" },
 ]
 
+[[package]]
+name = "filetype"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bb/29/745f7d30d47fe0f251d3ad3dc2978a23141917661998763bebb6da007eb1/filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb", size = 998020, upload-time = "2022-11-02T17:34:04.141Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970, upload-time = "2022-11-02T17:34:01.425Z" },
+]
+
 [[package]]
 name = "filterpy"
 version = "1.4.5"
@@ -3449,6 +3470,62 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cc/9b/4366ad3e1c0688146c70aa6143584d6a8d88583b9390f106250e25a3d5cd/glfw-2.10.0-py2.py3-none-win_amd64.whl", hash = "sha256:7f787ee8645781f10e8800438ce4357ab38c573ffb191aba380c1e72eba6311c", size = 559423, upload-time = "2026-03-10T17:21:34.766Z" },
 ]
 
+[[package]]
+name = "google-ai-generativelanguage"
+version = "0.11.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "grpcio" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/36/9365ca196c4753d0d22951ee0147ecb2124382a010d5dbfb9f3ecd57f2cf/google_ai_generativelanguage-0.11.0.tar.gz", hash = "sha256:d9e24e9836e894a85b52ca03d03530988aeb492d48df71cd1573dc1c3b6d81fc", size = 1539696, upload-time = "2026-03-30T22:51:43.143Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/8c/dc1faf90d7a6ec77e09a6706abefabb41ae5b8d838e5e5b16914da26ee1e/google_ai_generativelanguage-0.11.0-py3-none-any.whl", hash = "sha256:f797f307f0969ae49622e09c1d1a23aa86c7538ffae881279506548166e91d45", size = 1432183, upload-time = "2026-03-30T22:47:56.02Z" },
+]
+
+[[package]]
+name = "google-api-core"
+version = "2.30.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-auth" },
+    { name = "googleapis-common-protos" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/16/ce/502a57fb0ec752026d24df1280b162294b22a0afb98a326084f9a979138b/google_api_core-2.30.3.tar.gz", hash = "sha256:e601a37f148585319b26db36e219df68c5d07b6382cff2d580e83404e44d641b", size = 177001, upload-time = "2026-04-10T00:41:28.035Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/03/15/e56f351cf6ef1cfea58e6ac226a7318ed1deb2218c4b3cc9bd9e4b786c5a/google_api_core-2.30.3-py3-none-any.whl", hash = "sha256:a85761ba72c444dad5d611c2220633480b2b6be2521eca69cca2dbb3ffd6bfe8", size = 173274, upload-time = "2026-04-09T22:57:16.198Z" },
+]
+
+[package.optional-dependencies]
+grpc = [
+    { name = "grpcio" },
+    { name = "grpcio-status" },
+]
+
+[[package]]
+name = "google-auth"
+version = "2.53.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "pyasn1-modules" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/ad/ff781329bbbdc0974a098d996e89c9e1f7024262f9e3eec442fbb9ad1ac6/google_auth-2.53.0.tar.gz", hash = "sha256:e7e6aa16f6bee7b2b264830fd04f08087a1d5a836df516251a5d15327b246c9c", size = 335844, upload-time = "2026-05-15T20:53:07.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/c9/db44165ba7c581268c6d46017ef63339110378305062830104fc7fa144cb/google_auth-2.53.0-py3-none-any.whl", hash = "sha256:6e7449917c599b35126a99ec268ec6880301f2fea41dce198fe8fd83ff642b68", size = 246071, upload-time = "2026-05-15T20:53:05.609Z" },
+]
+
+[package.optional-dependencies]
+requests = [
+    { name = "requests" },
+]
+
 [[package]]
 name = "google-crc32c"
 version = "1.8.0"
@@ -3484,6 +3561,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9c/97/7d75fe37a7a6ed171a2cf17117177e7aab7e6e0d115858741b41e9dd4254/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c", size = 28800, upload-time = "2025-12-16T00:40:30.322Z" },
 ]
 
+[[package]]
+name = "google-genai"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "google-auth", extra = ["requests"] },
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "requests" },
+    { name = "sniffio" },
+    { name = "tenacity" },
+    { name = "typing-extensions" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b6/91/15fff1b7c22dc0b5b44fa348f151379721353d06a3da22dbbe15bf2c4dd9/google_genai-2.4.0.tar.gz", hash = "sha256:3f8d4bd618be2801e805dc698726731a70f34b438ea25a6c92800eef5b1f513e", size = 552025, upload-time = "2026-05-18T00:25:14.556Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/c8/1b49f6bb69dc7e291ba5d14926937bec4dffcc09ee875822636bd5ef3cf4/google_genai-2.4.0-py3-none-any.whl", hash = "sha256:48df1c44190b05b834fee9cffd360af6d6fd7f68164588e7ebd670fa34f71ee1", size = 818207, upload-time = "2026-05-18T00:25:12.426Z" },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.72.0"
@@ -3566,6 +3664,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" },
 ]
 
+[[package]]
+name = "grpcio-status"
+version = "1.78.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos" },
+    { name = "grpcio" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8a/cd/89ce482a931b543b92cdd9b2888805518c4620e0094409acb8c81dd4610a/grpcio_status-1.78.0.tar.gz", hash = "sha256:a34cfd28101bfea84b5aa0f936b4b423019e9213882907166af6b3bddc59e189", size = 13808, upload-time = "2026-02-06T10:01:48.034Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/83/8a/1241ec22c41028bddd4a052ae9369267b4475265ad0ce7140974548dc3fa/grpcio_status-1.78.0-py3-none-any.whl", hash = "sha256:b492b693d4bf27b47a6c32590701724f1d3b9444b36491878fb71f6208857f34", size = 14523, upload-time = "2026-02-06T10:01:32.584Z" },
+]
+
 [[package]]
 name = "gtsam-extended"
 version = "4.3a1.post1"
@@ -4590,6 +4702,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/01/4771b7ab2af1d1aba5b710bd8f13d9225c609425214b357590a17b01be77/langchain_core-1.3.3-py3-none-any.whl", hash = "sha256:18aae8506f37da7f74398492279a7d6efcee4f8e23c4c41c7af080eeb7ef7bd1", size = 543857, upload-time = "2026-05-05T19:02:34.52Z" },
 ]
 
+[[package]]
+name = "langchain-google-genai"
+version = "2.1.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filetype" },
+    { name = "google-ai-generativelanguage" },
+    { name = "langchain-core" },
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/38/8b3a71c729bd03e9eb0fd8bdb19e06a074c35bc2eaa61b1b9edfa863f38d/langchain_google_genai-2.1.12.tar.gz", hash = "sha256:4a98371e545eb97fcdf483086a4aebbb8eceeb9597ca5a9c4c35e92f4fbbd271", size = 77566, upload-time = "2025-09-17T01:27:11.747Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e1/8d/9dd9653e5414e73cae3480e5947bbbbd94ba7fa824efdf46e7ff2c0faef2/langchain_google_genai-2.1.12-py3-none-any.whl", hash = "sha256:4c07630419a8fbe7a2ec512c6dea68289663bfe7d5fae0ba431d2cd59a0d0880", size = 50746, upload-time = "2025-09-17T01:27:10.653Z" },
+]
+
 [[package]]
 name = "langchain-huggingface"
 version = "1.2.0"
@@ -7637,6 +7764,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" },
 ]
 
+[[package]]
+name = "proto-plus"
+version = "1.28.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/56/e647b0c675392d2da368da7b6f158f7368b18542fd6f7d7400a2f39de000/proto_plus-1.28.0.tar.gz", hash = "sha256:38e5696342835b08fc116f30a25665b29531cda9d5d5643e9b81fc312385abd9", size = 57221, upload-time = "2026-05-07T08:04:50.811Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/20/b122d4626976acb81132036d2ad1bb35a1a8775fceb837ec30964622516a/proto_plus-1.28.0-py3-none-any.whl", hash = "sha256:a630604310899e73c59ec302e5765c058d412b2f090b9c79c8822589f14955b8", size = 50410, upload-time = "2026-05-07T08:03:31.962Z" },
+]
+
 [[package]]
 name = "protobuf"
 version = "6.33.5"
@@ -7842,6 +7981,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" },
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyasn1" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
+]
+
 [[package]]
 name = "pyaudio"
 version = "0.2.14"