diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/__init__.py b/dimos/robot/unitree/go2/blueprints/hackathon/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/blueprint.py b/dimos/robot/unitree/go2/blueprints/hackathon/blueprint.py
new file mode 100644
index 0000000000..a69e8457b3
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/hackathon/blueprint.py
@@ -0,0 +1,50 @@
+"""unitree-go2-hackathon blueprint.
+
+Build chain (avoids SecurityModule which requires CUDA/EdgeTAM):
+  unitree_go2_basic → unitree_go2 → +SpatialMemory +PerceiveLoopSkill
+                                    (SecurityModule intentionally excluded)
+Then adds: McpServer + hackathon skill containers.
+"""
+
+from dimos.agents.mcp.mcp_client import McpClient
+from dimos.agents.mcp.mcp_server import McpServer
+from dimos.agents.skills.navigation import NavigationSkillContainer
+from dimos.agents.skills.speak_skill import SpeakSkill
+from dimos.agents.web_human_input import WebInput
+from dimos.core.coordination.blueprints import autoconnect
+from dimos.perception.perceive_loop_skill import PerceiveLoopSkill
+from dimos.perception.spatial_perception import SpatialMemory
+from dimos.robot.unitree.go2.blueprints.smart.unitree_go2 import unitree_go2
+from dimos.robot.unitree.go2.connection import GO2Connection
+from dimos.robot.unitree.unitree_skill_container import UnitreeSkillContainer
+from dimos.robot.unitree.go2.blueprints.hackathon.dog_mode import DogModeModule
+from dimos.robot.unitree.go2.blueprints.hackathon.find import FindSkillContainer
+from dimos.robot.unitree.go2.blueprints.hackathon.perception_loop import PerceptionLoopModule
+from dimos.robot.unitree.go2.blueprints.hackathon.smart_follow import SmartFollowSkillContainer
+
+# Spatial layer without SecurityModule (no CUDA required)
+_unitree_go2_spatial_no_security = autoconnect(
+    unitree_go2,
+    SpatialMemory.blueprint(),
+    PerceiveLoopSkill.blueprint(),
+).global_config(n_workers=8)
+
+_hackathon_agentic = autoconnect(
+    NavigationSkillContainer.blueprint(),
+    PerceptionLoopModule.blueprint(camera_info=GO2Connection.camera_info_static),
+    SmartFollowSkillContainer.blueprint(camera_info=GO2Connection.camera_info_static),
+    FindSkillContainer.blueprint(),
+    DogModeModule.blueprint(camera_info=GO2Connection.camera_info_static),
+    UnitreeSkillContainer.blueprint(),
+    WebInput.blueprint(),
+    SpeakSkill.blueprint(),
+)
+
+unitree_go2_hackathon = autoconnect(
+    _unitree_go2_spatial_no_security,
+    McpServer.blueprint(),
+    McpClient.blueprint(),
+    _hackathon_agentic,
+)
+
+__all__ = ["unitree_go2_hackathon"]
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/dog_mode.py b/dimos/robot/unitree/go2/blueprints/hackathon/dog_mode.py
new file mode 100644
index 0000000000..d66135f075
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/hackathon/dog_mode.py
@@ -0,0 +1,353 @@
+"""DogModeModule — autonomous dog behavior for Unitree Go2.
+
+State machine:
+  WANDERING  — explore environment, periodic sniff animation
+  ALERT      — person detected, face them, bark randomly based on threat level
+  CLOSE      — person within growl range, track and growl, stop moving
+
+Threat levels (assigned per track_id on first detection, random):
+  0.0–0.4  curious:    occasional soft bark, no growl
+  0.4–0.7  cautious:   bark after short delay, growl at 2.0m
+  0.7–1.0  aggressive: bark immediately, growl at 2.5m
+"""
+
+from __future__ import annotations
+
+import random
+import time
+from pathlib import Path
+from threading import Event, RLock, Thread
+from typing import Any
+
+from reactivex.disposable import Disposable
+from unitree_webrtc_connect.constants import RTC_TOPIC, SPORT_CMD
+
+from dimos.agents.annotation import skill
+from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In, Out
+from dimos.msgs.geometry_msgs.Twist import Twist
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.navigation.visual_servoing.visual_servoing_2d import VisualServoing2D
+from dimos.perception.detection.detectors.person.yolo import YoloPersonDetector
+from dimos.robot.unitree.go2.connection_spec import GO2ConnectionSpec
+from dimos.utils.logging_config import setup_logger
+from dimos_lcm.std_msgs import Bool
+from dimos.robot.unitree.go2.blueprints.hackathon.frame_writer import write_annotated_frame, write_state
+from dimos.robot.unitree.go2.blueprints.hackathon.perception_loop import get_shared_detections, set_active_mode, shared_detections_fresh
+
+logger = setup_logger()
+
+SOUNDS_DIR = Path(__file__).parent / "sounds"
+SOUND_FILES = {
+    "bark":    SOUNDS_DIR / "minecraft-dog-bark.mp3",
+    "deepbark": SOUNDS_DIR / "deepbark.mp3",
+    "growl":   SOUNDS_DIR / "dog-growling-and-barking.mp3",
+    "perry":   SOUNDS_DIR / "perry-the-platypuss-growl.mp3",
+}
+
+# Threat thresholds
+_AGGRESSIVE_THRESHOLD = 0.7
+_CAUTIOUS_THRESHOLD   = 0.4
+
+# Distance thresholds (meters, estimated from bbox width)
+_CLOSE_AGGRESSIVE = 2.5   # growl distance for aggressive
+_CLOSE_CAUTIOUS   = 2.0   # growl distance for cautious
+_FACE_RANGE       = 5.0   # start facing at this distance
+
+# Timing
+_SNIFF_INTERVAL_MIN = 20.0   # minimum seconds between sniffs
+_SNIFF_INTERVAL_MAX = 45.0
+_BARK_COOLDOWN      = 4.0    # minimum seconds between barks
+_LOOP_HZ            = 10.0   # main loop frequency
+
+
+def _play_sound(name: str) -> None:
+    path = SOUND_FILES.get(name)
+    if path and path.exists():
+        try:
+            import sounddevice as sd
+            import soundfile as sf
+            data, sr = sf.read(str(path))
+            sd.play(data.astype("float32"), sr)
+            sd.wait()
+        except Exception as e:
+            logger.warning(f"Sound playback failed: {e}")
+
+
+class Config(ModuleConfig):
+    camera_info: CameraInfo
+
+
+class DogModeModule(Module):
+    """Autonomous dog behavior module."""
+
+    config: Config
+
+    color_image: In[Image]
+    cmd_vel: Out[Twist]
+    explore_cmd: Out[Bool]
+    stop_explore_cmd: Out[Bool]
+
+    _connection: GO2ConnectionSpec  # injected by dimOS — used for sport commands
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._detector = YoloPersonDetector()  # fallback only
+        self._servo = VisualServoing2D(self.config.camera_info)
+
+        self._latest_image: Image | None = None
+        self._thread: Thread | None = None
+        self._should_stop = Event()
+        self._lock = RLock()
+
+        # Threat memory: track_id → threat float [0,1]
+        self._threats: dict[int, float] = {}
+        self._state = "IDLE"
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self.register_disposable(Disposable(self.color_image.subscribe(self._on_image)))
+
+    @rpc
+    def stop(self) -> None:
+        self._should_stop.set()
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        self._detector.stop()
+        super().stop()
+
+    def _on_image(self, image: Image) -> None:
+        with self._lock:
+            self._latest_image = image
+
+    @skill
+    def start_dog_mode(self) -> str:
+        """Start autonomous dog mode.
+
+        The robot will:
+        - Roam and explore the environment
+        - Periodically sniff the ground (StandDown animation)
+        - Detect people and assess threat level (random, per person)
+        - Bark at people based on threat level
+        - Growl and track anyone who gets close
+        - Face people when they move toward it
+        """
+        if self._thread and self._thread.is_alive():
+            return "Dog mode is already running."
+
+        self._should_stop.clear()
+        self._threats.clear()
+        self._state = "WANDERING"
+        set_active_mode("dog")
+
+        self._thread = Thread(target=self._dog_loop, daemon=True, name="DogMode")
+        self._thread.start()
+
+        return "Woof. Dog mode activated. I'll explore, sniff around, and keep an eye on people."
+
+    @skill
+    def stop_dog_mode(self) -> str:
+        """Stop dog mode and return the robot to idle."""
+        self._should_stop.set()
+        set_active_mode(None)
+        self._send_stop_explore()
+        self.cmd_vel.publish(Twist.zero())
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        self._state = "IDLE"
+        return "Dog mode stopped."
+
+    # ──────────────────────────────────────────────
+    # Internal helpers
+    # ──────────────────────────────────────────────
+
+    def _sport(self, name: str) -> None:
+        try:
+            cmd_id = SPORT_CMD[name]
+            self._connection.publish_request(RTC_TOPIC["SPORT_MOD"], {"api_id": cmd_id})
+        except Exception as e:
+            logger.warning(f"Sport command '{name}' failed: {e}")
+
+    def _send_explore(self) -> None:
+        msg = Bool(); msg.data = True
+        self.explore_cmd.publish(msg)
+
+    def _send_stop_explore(self) -> None:
+        msg = Bool(); msg.data = True
+        self.stop_explore_cmd.publish(msg)
+
+    def _threat_for(self, track_id: int) -> float:
+        if track_id not in self._threats:
+            self._threats[track_id] = random.random()
+            logger.info(f"DogMode: new person track_id={track_id} threat={self._threats[track_id]:.2f}")
+        return self._threats[track_id]
+
+    def _growl_distance(self, threat: float) -> float:
+        if threat >= _AGGRESSIVE_THRESHOLD:
+            return _CLOSE_AGGRESSIVE
+        if threat >= _CAUTIOUS_THRESHOLD:
+            return _CLOSE_CAUTIOUS
+        return 0.0  # curious — no growl
+
+    def _face_person(self, bbox: tuple, image_width: int) -> None:
+        """Publish angular-only twist to face person (no linear movement)."""
+        x1, _, x2, _ = bbox
+        cx = (x1 + x2) / 2.0
+        fx = self.config.camera_info.K[0]
+        optical_cx = self.config.camera_info.K[2]
+        x_norm = (cx - optical_cx) / fx
+        angular_z = float(-x_norm * 1.2)
+        angular_z = max(-0.6, min(0.6, angular_z))
+        if abs(angular_z) > 0.05:
+            self.cmd_vel.publish(
+                Twist(linear=Vector3(0.0, 0.0, 0.0), angular=Vector3(0.0, 0.0, angular_z))
+            )
+        else:
+            self.cmd_vel.publish(Twist.zero())
+
+    def _do_sniff(self) -> None:
+        """StandDown → pause → RecoveryStand in a background thread."""
+        def _sniff() -> None:
+            self.cmd_vel.publish(Twist.zero())
+            self._sport("StandDown")
+            time.sleep(2.0)
+            self._sport("RecoveryStand")
+            time.sleep(1.0)
+        Thread(target=_sniff, daemon=True).start()
+
+    # ──────────────────────────────────────────────
+    # Main loop
+    # ──────────────────────────────────────────────
+
+    def _dog_loop(self) -> None:
+        last_sniff = time.monotonic() - random.uniform(0, _SNIFF_INTERVAL_MIN)
+        last_bark  = 0.0
+        next_sniff_in = random.uniform(_SNIFF_INTERVAL_MIN, _SNIFF_INTERVAL_MAX)
+        sniffing   = False
+        frame_count = 0
+
+        # Start exploring
+        self._send_explore()
+
+        period = 1.0 / _LOOP_HZ
+        next_t = time.monotonic()
+
+        while not self._should_stop.is_set():
+            next_t += period
+            now = time.monotonic()
+
+            with self._lock:
+                image = self._latest_image
+
+            if image is None:
+                time.sleep(period)
+                continue
+
+            frame_count += 1
+
+            # ── Detect people — use shared YOLO pass when fresh ──
+            if shared_detections_fresh():
+                all_dets, _ = get_shared_detections()
+                persons = [d for d in all_dets if getattr(d, "name", "") == "person"]
+            else:
+                dets = self._detector.process_image(image)
+                persons = [d for d in dets.detections if d.is_valid()]
+
+            if not persons:
+                # Nobody around — wander
+                if self._state != "WANDERING":
+                    self._state = "WANDERING"
+                    self._send_explore()
+
+                # Sniff periodically
+                if not sniffing and (now - last_sniff) >= next_sniff_in:
+                    sniffing = True
+                    last_sniff = now
+                    next_sniff_in = random.uniform(_SNIFF_INTERVAL_MIN, _SNIFF_INTERVAL_MAX)
+                    self._do_sniff()
+                    self._should_stop.wait(timeout=3.2)
+                    sniffing = False
+
+            else:
+                # ── Person(s) detected ──
+                # Pick the most prominent (nearest)
+                target = max(persons, key=lambda d: d.bbox_2d_volume())
+                tid    = target.track_id
+                threat = self._threat_for(tid)
+                dist   = self._servo._estimate_distance(target.bbox)
+                growl_dist = self._growl_distance(threat)
+
+                # Stop exploration while attending to person
+                if self._state == "WANDERING":
+                    self._send_stop_explore()
+                    self._state = "ALERT"
+
+                is_close = dist is not None and growl_dist > 0 and dist <= growl_dist
+
+                if is_close:
+                    # ── CLOSE: growl and face, stop moving ──
+                    if self._state != "CLOSE":
+                        self._state = "CLOSE"
+                        Thread(target=_play_sound, args=("growl",), daemon=True).start()
+                    self._face_person(target.bbox, image.width)
+
+                else:
+                    # ── ALERT: face person, decide whether to bark ──
+                    self._state = "ALERT"
+
+                    # Face if within range
+                    if dist is None or dist <= _FACE_RANGE:
+                        self._face_person(target.bbox, image.width)
+                    else:
+                        self.cmd_vel.publish(Twist.zero())
+
+                    # Bark logic by threat level
+                    if now - last_bark >= _BARK_COOLDOWN:
+                        if threat >= _AGGRESSIVE_THRESHOLD:
+                            # Aggressive: always bark on sight
+                            last_bark = now
+                            sound = "deepbark" if random.random() < 0.5 else "bark"
+                            Thread(target=_play_sound, args=(sound,), daemon=True).start()
+                        elif threat >= _CAUTIOUS_THRESHOLD:
+                            # Cautious: 40% chance each cycle (at 10Hz → ~0.5Hz effective)
+                            if random.random() < 0.04:
+                                last_bark = now
+                                Thread(target=_play_sound, args=("bark",), daemon=True).start()
+                        else:
+                            # Curious: occasional soft bark (10% per cycle)
+                            if random.random() < 0.01:
+                                last_bark = now
+                                Thread(target=_play_sound, args=("bark",), daemon=True).start()
+
+            # ── Annotate frame every 3 ticks for dashboard (~3fps) ──
+            if frame_count % 3 == 0:
+                try:
+                    bgr = image.to_opencv()
+                    det_list = []
+                    for p in persons:
+                        tid = p.track_id
+                        det_list.append({
+                            "bbox": p.bbox,
+                            "track_id": tid,
+                            "threat": self._threats.get(tid),
+                        })
+                    write_annotated_frame(bgr, det_list, overlay_text=f"DOG MODE\n{self._state}")
+                    write_state({"mode": "dog", "state": self._state, "persons": len(persons),
+                                 "threats": {str(k): round(v, 2) for k, v in self._threats.items()}})
+                except Exception:
+                    pass
+
+            # Pace loop
+            sleep_dur = next_t - time.monotonic()
+            if sleep_dur > 0:
+                time.sleep(sleep_dur)
+
+        self.cmd_vel.publish(Twist.zero())
+        self._send_stop_explore()
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/find.py b/dimos/robot/unitree/go2/blueprints/hackathon/find.py
new file mode 100644
index 0000000000..f26b99d818
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/hackathon/find.py
@@ -0,0 +1,299 @@
+"""FindSkillContainer — explore + find anything using YOLO+CLIP in the background.
+
+Architecture:
+  - Declares explore_cmd / stop_explore_cmd Out streams → auto-wires to WavefrontFrontierExplorer
+  - Declares odom In stream → knows where it found the target
+  - YOLO+CLIP runs in background while the robot is moving
+  - Found → stops exploration, reports position, optionally hands off to SmartFollow
+"""
+
+from __future__ import annotations
+
+import time
+from threading import Event, RLock, Thread
+from typing import Any
+
+from reactivex.disposable import Disposable
+
+from dimos.agents.annotation import skill
+from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In, Out
+from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.utils.logging_config import setup_logger
+from dimos_lcm.std_msgs import Bool
+from dimos.robot.unitree.go2.blueprints.hackathon.frame_writer import write_annotated_frame, write_state
+from dimos.robot.unitree.go2.blueprints.hackathon.perception_loop import PerceptionLoopModule, set_active_mode
+
+logger = setup_logger()
+
+_MATCH_THRESHOLD = 0.22
+
+# Full YOLO11 COCO class list — these skip CLIP, pure class filter
+_YOLO_CLASSES = {
+    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
+    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
+    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
+    "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
+    "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
+    "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
+    "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
+    "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
+    "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier",
+    "toothbrush",
+}
+
+
+class Config(ModuleConfig):
+    pass
+
+
+class _Det:
+    """Wrap dict-returned detections from PerceptionLoop with the small attr
+    surface the rest of this module needs (.track_id / .bbox / .name)."""
+    __slots__ = ("track_id", "bbox", "name", "confidence")
+    def __init__(self, m: dict) -> None:
+        self.track_id = m.get("track_id", -1)
+        self.bbox = tuple(m["bbox"])
+        self.name = m.get("name", "")
+        self.confidence = m.get("score", m.get("confidence", 0.0))
+    def bbox_2d_volume(self) -> float:
+        x1, y1, x2, y2 = self.bbox
+        return max(0.0, x2 - x1) * max(0.0, y2 - y1)
+
+
+class FindSkillContainer(Module):
+    """Find + explore skill.
+
+    Starts autonomous frontier exploration while watching the camera for a target.
+    When found, stops exploration and reports position to Claude via tool_update.
+    Shares CLIP + YOLO with PerceptionLoop via RPC (no local model loads).
+    """
+
+    color_image: In[Image]
+    odom: In[PoseStamped]
+    # Shared inference layer — ONE MobileCLIP + ONE YOLO across all skills.
+    _perception: PerceptionLoopModule
+
+    # These wire directly to WavefrontFrontierExplorer via name+type matching
+    explore_cmd: Out[Bool]
+    stop_explore_cmd: Out[Bool]
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        # No own MobileCLIP / YOLO — both come from PerceptionLoop via RPC.
+        # Saves ~1.85 GB of duplicated model weights per skill module.
+        self._latest_image: Image | None = None
+        self._latest_odom: PoseStamped | None = None
+        self._thread: Thread | None = None
+        self._should_stop = Event()
+        self._lock = RLock()
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        # CLIP loads lazily on first embed — eager start() caused concurrent
+        # multi-process model loads to stall startup.
+        self.register_disposable(Disposable(self.color_image.subscribe(self._on_image)))
+        self.register_disposable(Disposable(self.odom.subscribe(self._on_odom)))
+
+    @rpc
+    def stop(self) -> None:
+        self._should_stop.set()
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        super().stop()
+
+    def _on_image(self, image: Image) -> None:
+        with self._lock:
+            self._latest_image = image
+
+    def _on_odom(self, odom: PoseStamped) -> None:
+        with self._lock:
+            self._latest_odom = odom
+
+    @skill
+    def smart_find(self, query: str, explore: bool = True, then_approach: bool = False, timeout_s: float = 0.0) -> str:
+        """Explore the environment while searching for a specific person or object.
+
+        Starts autonomous frontier exploration and watches the camera for the target.
+        When found, stops exploration and reports back. If then_approach=True, the robot
+        will also move close to the object after finding it.
+
+        Works for people ("person with red shirt"), YOLO objects ("chair", "dog", "bottle"),
+        or any visual description.
+
+        Args:
+            query: What to look for, e.g. "red chair", "dog", "person with backpack"
+            explore: If True (default), start autonomous exploration while searching.
+            then_approach: If True, automatically approach and get close after finding.
+            timeout_s: Give up after this many seconds (0 = no timeout, runs until found or stopped).
+
+        Returns:
+            Immediate confirmation. Will send tool_update when found or timed out.
+        """
+        self._should_stop.set()
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        self._should_stop.clear()
+
+        with self._lock:
+            image = self._latest_image
+
+        if image is None:
+            return "No camera image yet — is DimOS running?"
+
+        if explore:
+            cmd = Bool()
+            cmd.data = True
+            self.explore_cmd.publish(cmd)
+
+        set_active_mode("find")
+        self.start_tool("smart_find")
+        self._thread = Thread(
+            target=self._find_loop,
+            args=(query, explore, then_approach, timeout_s),
+            daemon=True,
+            name="SmartFind",
+        )
+        self._thread.start()
+
+        return (
+            f"Searching for '{query}'. "
+            + ("Exploration started. " if explore else "Watching without moving. ")
+            + f"Timeout: {int(timeout_s)}s. Will update you when found."
+        )
+
+    @skill
+    def stop_find(self) -> str:
+        """Stop an active find/explore mission and halt exploration."""
+        self._should_stop.set()
+        set_active_mode(None)
+        self._send_stop_explore()
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        return "Find mission stopped."
+
+    def _send_stop_explore(self) -> None:
+        cmd = Bool()
+        cmd.data = True
+        self.stop_explore_cmd.publish(cmd)
+
+    def _get_detections(self, image: Image, class_filter: str | None = None) -> list:
+        """Pull detections from the shared PerceptionLoop (RPC)."""
+        try:
+            raw = self._perception.current_detections(class_filter=class_filter)
+        except Exception:
+            return []
+        return [_Det(m) for m in raw]
+
+    def _find_loop(self, query: str, exploring: bool, then_approach: bool, timeout_s: float) -> None:
+        class_mode = query.lower() in _YOLO_CLASSES
+
+        start_time  = time.monotonic()
+        frame_count = 0
+        clip_interval = 5
+
+        unbounded = timeout_s <= 0
+        while not self._should_stop.is_set():
+            elapsed = time.monotonic() - start_time
+            if not unbounded and elapsed > timeout_s:
+                if exploring:
+                    self._send_stop_explore()
+                self.tool_update("smart_find", f"Search timed out after {int(elapsed)}s. '{query}' not found.")
+                set_active_mode(None)
+                self.stop_tool("smart_find")
+                return
+
+            with self._lock:
+                image = self._latest_image
+                odom  = self._latest_odom
+
+            if image is None:
+                time.sleep(0.1)
+                continue
+
+            frame_count += 1
+
+            if class_mode:
+                matches = self._get_detections(image, class_filter=query.lower())
+                if matches:
+                    best = max(matches, key=lambda d: d.bbox_2d_volume())
+                    try:
+                        all_dets = self._get_detections(image)
+                        det_list = [{"bbox": d.bbox, "track_id": d.track_id,
+                                     "label": "FOUND" if d.track_id == best.track_id else None}
+                                    for d in all_dets]
+                        write_annotated_frame(image.to_opencv(), det_list, overlay_text=f"FIND\nFOUND\n{query}")
+                        write_state({"mode": "find", "state": "FOUND", "query": query})
+                    except Exception:
+                        pass
+                    set_active_mode(None)
+                    self._report_found(query, best, odom, exploring, then_approach)
+                    return
+
+            elif frame_count % clip_interval == 0:
+                # Shared-CLIP scoring via PerceptionLoop — no local inference,
+                # no duplicate MobileCLIP load.
+                try:
+                    ranked = self._perception.match_text(query)
+                except Exception:
+                    ranked = []
+                if ranked and ranked[0]["score"] >= _MATCH_THRESHOLD:
+                    best = _Det(ranked[0])
+                    try:
+                        all_dets = self._get_detections(image)
+                        det_list = [{"bbox": d.bbox, "track_id": d.track_id,
+                                     "label": "FOUND" if d.track_id == best.track_id else None}
+                                    for d in all_dets]
+                        write_annotated_frame(image.to_opencv(), det_list, overlay_text=f"FIND\nFOUND\n{query}")
+                        write_state({"mode": "find", "state": "FOUND", "query": query})
+                    except Exception:
+                        pass
+                    set_active_mode(None)
+                    self._report_found(query, best, odom, exploring, then_approach)
+                    return
+
+            # Annotate searching frames every 3 ticks
+            if frame_count % 3 == 0:
+                try:
+                    cur_dets = self._get_detections(image)
+                    det_list = [{"bbox": d.bbox, "track_id": d.track_id} for d in cur_dets]
+                    write_annotated_frame(image.to_opencv(), det_list, overlay_text=f"FIND\nSEARCHING\n{query}")
+                    write_state({"mode": "find", "state": "SEARCHING", "query": query})
+                except Exception:
+                    pass
+
+            time.sleep(1.0 / 15.0)
+
+        if exploring:
+            self._send_stop_explore()
+        set_active_mode(None)
+        self.tool_update("smart_find", f"Find mission for '{query}' was stopped.")
+        self.stop_tool("smart_find")
+
+    def _report_found(self, query: str, detection: Any, odom: PoseStamped | None, exploring: bool, then_approach: bool) -> None:
+        if exploring:
+            self._send_stop_explore()
+
+        pos_str = (
+            f"x={odom.x:.2f}, y={odom.y:.2f}" if odom is not None else "position unknown"
+        )
+        x1, y1, x2, y2 = detection.bbox
+        center_x = (x1 + x2) / 2
+        img_w = detection.image.width if hasattr(detection, "image") else 1280
+        side = "left" if center_x < img_w * 0.4 else "right" if center_x > img_w * 0.6 else "center"
+
+        approach_hint = f" Now call smart_approach('{query}') to move close." if then_approach else ""
+        self.tool_update(
+            "smart_find",
+            f"Found '{query}'! Robot at {pos_str}, target is {side} of frame.{approach_hint}",
+        )
+        self.stop_tool("smart_find")
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/frame_writer.py b/dimos/robot/unitree/go2/blueprints/hackathon/frame_writer.py
new file mode 100644
index 0000000000..4c43878aa6
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/hackathon/frame_writer.py
@@ -0,0 +1,97 @@
+"""Shared annotated frame writer for dashboard visualization.
+
+Modules call write_annotated_frame() to push an OpenCV-annotated JPEG to
+/tmp/go2_hackathon_frame.jpg, which the dashboard MJPEG stream reads.
+"""
+
+from __future__ import annotations
+
+import json
+import time
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+
+FRAME_PATH  = Path("/tmp/go2_hackathon_frame.jpg")
+STATE_PATH  = Path("/tmp/go2_hackathon_state.json")
+
+# Threat level colors: BGR
+_COLORS = {
+    "aggressive": (0,   0,   220),   # red
+    "cautious":   (0,  160,  255),   # orange
+    "curious":    (0,  200,   80),   # green
+    "tracking":   (255, 200,   0),   # cyan
+    "approach":   (255,  80, 200),   # purple
+    "unknown":    (180, 180, 180),   # grey
+}
+
+_FONT      = cv2.FONT_HERSHEY_SIMPLEX
+_FONT_SCALE = 0.5
+_THICKNESS  = 1
+
+
+def _threat_label(threat: float | None) -> tuple[str, tuple]:
+    if threat is None:
+        return "tracking", _COLORS["tracking"]
+    if threat >= 0.7:
+        return f"THREAT {threat:.0%}", _COLORS["aggressive"]
+    if threat >= 0.4:
+        return f"cautious {threat:.0%}", _COLORS["cautious"]
+    return f"curious {threat:.0%}", _COLORS["curious"]
+
+
+def write_annotated_frame(
+    image_bgr: np.ndarray,
+    detections: list[dict],   # [{"bbox":(x1,y1,x2,y2), "track_id":int, "threat":float|None, "label":str}]
+    overlay_text: str = "",
+    quality: int = 75,
+) -> None:
+    """Draw bboxes + labels on frame and write to shared JPEG path."""
+    frame = image_bgr.copy()
+    h, w = frame.shape[:2]
+
+    for det in detections:
+        x1, y1, x2, y2 = [int(v) for v in det["bbox"]]
+        tid   = det.get("track_id", -1)
+        threat = det.get("threat")
+        label_text, color = _threat_label(threat)
+        custom = det.get("label")
+        if custom:
+            label_text = custom
+
+        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+
+        tag = f"#{tid} {label_text}"
+        (tw, th), _ = cv2.getTextSize(tag, _FONT, _FONT_SCALE, _THICKNESS)
+        ty = max(y1 - 6, th + 4)
+        cv2.rectangle(frame, (x1, ty - th - 4), (x1 + tw + 4, ty + 2), color, -1)
+        cv2.putText(frame, tag, (x1 + 2, ty), _FONT, _FONT_SCALE, (255, 255, 255), _THICKNESS, cv2.LINE_AA)
+
+    # Overlay text (state label) in top-left
+    if overlay_text:
+        lines = overlay_text.split("\n")
+        y0 = 22
+        for line in lines:
+            (lw, lh), _ = cv2.getTextSize(line, _FONT, 0.6, 2)
+            cv2.rectangle(frame, (8, y0 - lh - 4), (14 + lw, y0 + 4), (30, 30, 30), -1)
+            cv2.putText(frame, line, (10, y0), _FONT, 0.6, (0, 255, 180), 2, cv2.LINE_AA)
+            y0 += lh + 10
+
+    ok, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, quality])
+    if ok:
+        FRAME_PATH.write_bytes(buf.tobytes())
+
+
+def write_state(state: dict[str, Any]) -> None:
+    """Write current module state as JSON for dashboard polling."""
+    state["ts"] = time.time()
+    STATE_PATH.write_text(json.dumps(state))
+
+
+def read_state() -> dict[str, Any]:
+    try:
+        return json.loads(STATE_PATH.read_text())
+    except Exception:
+        return {}
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/perception_loop.py b/dimos/robot/unitree/go2/blueprints/hackathon/perception_loop.py
new file mode 100644
index 0000000000..cb5360e91c
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/hackathon/perception_loop.py
@@ -0,0 +1,321 @@
+"""PerceptionLoopModule — always-on L2 shared perception layer.
+
+Runs YOLO at ~15fps. Maintains a rolling SceneBuffer (2s TTL) with MobileCLIP
+embeddings. Exposes a process-singleton so smart_follow / find / dog_mode all
+read from *one* YOLO pass instead of each running their own detector.
+
+Cache hierarchy role:
+  HDD  Claude via MCP          — novel situations only
+  SSD  dimOS SQLite map        — spatial memory
+  RAM  BehaviorFSMs            — smart_follow, dog_mode recovery
+  L2   This module             — always-on YOLO+MobileCLIP, SceneBuffer <50ms
+  L1   VisualServoing2D        — servo math <1ms
+
+Module-level API (used by smart_follow / find / dog_mode):
+  get_shared_detections() → (list[Detection], timestamp_float)
+  get_shared_buffer()     → dict[track_id, TrackedObject]
+  set_active_mode(str|None) — active mode suppresses idle frame-writing
+"""
+
+from __future__ import annotations
+
+import time
+from dataclasses import dataclass, field
+from threading import Event, RLock, Thread
+from typing import Any
+
+import numpy as np
+from reactivex.disposable import Disposable
+
+from dimos.agents.annotation import skill
+from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In
+from dimos.models.embedding.mobileclip import MobileCLIPModel
+from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.perception.detection.detectors.yolo import Yolo2DDetector
+from dimos.utils.logging_config import setup_logger
+from dimos.robot.unitree.go2.blueprints.hackathon.frame_writer import write_annotated_frame, write_state
+
+logger = setup_logger()
+
+_BUFFER_TTL     = 2.0   # seconds before a track expires
+_CLIP_INTERVAL  = 5     # MobileCLIP every N frames (~3fps on CPU)
+_DET_FRESHNESS  = 0.12  # shared detection max age in seconds
+_LOOP_HZ        = 15.0
+_CLIP_THRESHOLD = 0.20
+
+# ── Process-singleton shared state ───────────────────────────────────────────
+_shared_lock       = RLock()
+_shared_detections: list = []
+_shared_det_ts: float   = 0.0
+_shared_buffer: dict    = {}   # track_id → TrackedObject
+_active_mode: str | None = None  # "follow" | "find" | "dog" | None
+
+
+def get_shared_detections() -> tuple[list, float]:
+    """Return (detections, monotonic_ts) from the shared YOLO pass."""
+    with _shared_lock:
+        return list(_shared_detections), _shared_det_ts
+
+
+def get_shared_buffer() -> dict:
+    """Return a snapshot of the current SceneBuffer."""
+    with _shared_lock:
+        return dict(_shared_buffer)
+
+
+def set_active_mode(mode: str | None) -> None:
+    """Signal that a behaviour module is active (suppresses idle frame-writing)."""
+    global _active_mode
+    with _shared_lock:
+        _active_mode = mode
+
+
+def shared_detections_fresh() -> bool:
+    """True if the shared YOLO pass ran within the freshness window."""
+    with _shared_lock:
+        return (time.monotonic() - _shared_det_ts) <= _DET_FRESHNESS
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class TrackedObject:
+    track_id:   int
+    name:       str
+    bbox:       tuple
+    confidence: float
+    last_seen:  float
+    clip_emb:   Any | None = field(default=None, repr=False)  # Embedding | None
+
+
+class Config(ModuleConfig):
+    camera_info: CameraInfo
+
+
+class PerceptionLoopModule(Module):
+    """Always-on L2 perception: YOLO+MobileCLIP, shared SceneBuffer, query_scene skill."""
+
+    config: Config
+    color_image: In[Image]
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._detector    = Yolo2DDetector()
+        self._clip        = MobileCLIPModel()
+        self._lock        = RLock()
+        self._latest_image: Image | None = None
+        self._last_fast_write: float = 0.0  # gate for _on_image dashboard writes
+        self._should_stop = Event()
+        self._thread: Thread | None = None
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        self.register_disposable(Disposable(self.color_image.subscribe(self._on_image)))
+        self._should_stop.clear()
+        self._thread = Thread(target=self._perception_loop, daemon=True, name="PerceptionLoop")
+        self._thread.start()
+        logger.info("PerceptionLoopModule started.")
+
+    @rpc
+    def stop(self) -> None:
+        self._should_stop.set()
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        super().stop()
+
+    def _on_image(self, image: Image) -> None:
+        with self._lock:
+            self._latest_image = image
+
+    # ── Shared-CLIP RPCs (so SmartFollow/Find don't each load their own) ──
+
+    @rpc
+    def embed_text(self, text: str) -> list[float]:
+        """Embed a text query using the SHARED MobileCLIP (no duplicate model loads).
+        Returns the embedding as a plain list[float] (RPC-serializable)."""
+        emb = self._clip.embed_text(text)
+        return emb.vector.detach().cpu().float().tolist()
+
+    @rpc
+    def match_text(self, text: str, class_filter: str | None = None) -> list[dict]:
+        """Score tracked objects against `text` using the shared CLIP.
+        Returns [{track_id, bbox, name, score}] sorted by score desc.
+        Uses cached track embeddings — zero extra inference per frame.
+        Optional class_filter narrows to a YOLO class (e.g. 'person')."""
+        text_emb = self._clip.embed_text(text)
+        now = time.monotonic()
+        out = []
+        with _shared_lock:
+            for t in _shared_buffer.values():
+                if now - t.last_seen > _BUFFER_TTL:
+                    continue
+                if class_filter and t.name.lower() != class_filter.lower():
+                    continue
+                if t.clip_emb is None:
+                    continue
+                try:
+                    score = float(text_emb @ t.clip_emb)
+                except Exception:
+                    continue
+                out.append({
+                    "track_id": int(t.track_id),
+                    "bbox": [float(v) for v in t.bbox],
+                    "name": t.name,
+                    "score": score,
+                })
+        out.sort(key=lambda m: m["score"], reverse=True)
+        return out
+
+    @rpc
+    def current_detections(self, class_filter: str | None = None) -> list[dict]:
+        """Snapshot of the live YOLO detections (fresh-window only).
+        Returns [{track_id, bbox, name, confidence}]."""
+        with _shared_lock:
+            dets = list(_shared_detections)
+            ts = _shared_det_ts
+        if (time.monotonic() - ts) > _DET_FRESHNESS:
+            return []
+        out = []
+        for d in dets:
+            name = getattr(d, "name", "")
+            if class_filter and name.lower() != class_filter.lower():
+                continue
+            bbox = getattr(d, "bbox", None)
+            if bbox is None:
+                continue
+            out.append({
+                "track_id": int(getattr(d, "track_id", -1) or -1),
+                "bbox": [float(v) for v in bbox],
+                "name": name,
+                "confidence": float(getattr(d, "confidence", 0.0) or 0.0),
+            })
+        return out
+
+    # ── Skill ─────────────────────────────────────────────────────────────────
+
+    @skill
+    def query_scene(self, description: str = "") -> str:
+        """Ask what the robot's camera currently sees — no new inference triggered.
+
+        Reads directly from the live SceneBuffer (updated at ~15fps). Use this
+        before making follow/find decisions — it's cheap and instant.
+
+        Args:
+            description: Optional text to filter by similarity (e.g. "person with
+                         red shirt", "dog", "chair"). Leave empty to list everything.
+        """
+        now = time.monotonic()
+        with _shared_lock:
+            tracks = [t for t in _shared_buffer.values() if now - t.last_seen <= _BUFFER_TTL]
+
+        if not tracks:
+            return "Nothing detected in current frame."
+
+        if description:
+            text_emb = self._clip.embed_text(description)
+            scored = []
+            for t in tracks:
+                if t.clip_emb is not None:
+                    score = float(text_emb @ t.clip_emb)
+                    scored.append((score, t))
+            scored.sort(key=lambda x: x[0], reverse=True)
+            matches = [(s, t) for s, t in scored if s >= _CLIP_THRESHOLD]
+            if not matches:
+                names = ", ".join(sorted({t.name for t in tracks}))
+                return f"No match for '{description}'. Visible: {names}."
+            lines = [f"Matches for '{description}':"]
+            for score, t in matches[:5]:
+                x1, _, x2, _ = t.bbox
+                cx = (x1 + x2) / 2
+                side = "left" if cx < 0.4 else "right" if cx > 0.6 else "center"
+                lines.append(f"  #{t.track_id} {t.name} ({score:.0%}) — {side} of frame")
+            return "\n".join(lines)
+
+        by_class: dict[str, list] = {}
+        for t in tracks:
+            by_class.setdefault(t.name, []).append(t)
+        lines = [f"{len(tracks)} object(s) visible:"]
+        for name, objs in sorted(by_class.items()):
+            ids = ", ".join(f"#{o.track_id}" for o in objs)
+            lines.append(f"  {name} × {len(objs)}  [{ids}]")
+        return "\n".join(lines)
+
+    # ── Main loop ─────────────────────────────────────────────────────────────
+
+    def _perception_loop(self) -> None:
+        global _shared_detections, _shared_det_ts, _shared_buffer
+
+        frame_count = 0
+        period = 1.0 / _LOOP_HZ
+        next_t = time.monotonic()
+
+        while not self._should_stop.is_set():
+            next_t += period
+            frame_count += 1
+            now = time.monotonic()
+
+            with self._lock:
+                image = self._latest_image
+
+            if image is None:
+                time.sleep(period)
+                continue
+
+            # YOLO every frame
+            result = self._detector.process_image(image)
+            valid  = [d for d in result.detections if d.is_valid()]
+
+            # MobileCLIP every N frames
+            embs: list = [None] * len(valid)
+            if valid and frame_count % _CLIP_INTERVAL == 0:
+                try:
+                    crops = [d.cropped_image() for d in valid]
+                    raw = self._clip.embed(*crops) if len(crops) > 1 else [self._clip.embed(crops[0])]
+                    embs = raw
+                except Exception:
+                    pass
+
+            # Write into shared singleton
+            with _shared_lock:
+                _shared_detections = valid
+                _shared_det_ts     = now
+                for det, emb in zip(valid, embs):
+                    tid      = det.track_id
+                    existing = _shared_buffer.get(tid)
+                    _shared_buffer[tid] = TrackedObject(
+                        track_id   = tid,
+                        name       = getattr(det, "name", "object"),
+                        bbox       = det.bbox,
+                        confidence = float(getattr(det, "confidence", 0.0)),
+                        last_seen  = now,
+                        clip_emb   = emb if emb is not None else (existing.clip_emb if existing else None),
+                    )
+                _shared_buffer = {k: v for k, v in _shared_buffer.items()
+                                  if now - v.last_seen <= _BUFFER_TTL}
+
+            with _shared_lock:
+                mode = _active_mode
+            if mode is None:
+                try:
+                    snap = [t for t in _shared_buffer.values() if now - t.last_seen <= _BUFFER_TTL]
+                    det_list = [{"bbox": t.bbox, "track_id": t.track_id, "label": t.name} for t in snap]
+                    write_annotated_frame(image.to_opencv(), det_list, overlay_text="PERCEPTION\nACTIVE")
+                    write_state({
+                        "mode":    "perception",
+                        "state":   "ACTIVE",
+                        "objects": len(det_list),
+                        "classes": list({t.name for t in snap}),
+                    })
+                except Exception:
+                    pass
+
+            sleep_dur = next_t - time.monotonic()
+            if sleep_dur > 0:
+                time.sleep(sleep_dur)
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/smart_follow.py b/dimos/robot/unitree/go2/blueprints/hackathon/smart_follow.py
new file mode 100644
index 0000000000..1c8eeb6c8d
--- /dev/null
+++ b/dimos/robot/unitree/go2/blueprints/hackathon/smart_follow.py
@@ -0,0 +1,428 @@
+"""SmartFollowSkillContainer — multi-target YOLO+MobileCLIP person/object follower.
+
+L2: reads shared YOLO detections from PerceptionLoopModule (zero duplicate inference).
+    Falls back to its own Yolo2DDetector if the shared pass is stale.
+RAM: active_track_ids set, SpinSearch recovery, nearest-target servo.
+L1: VisualServoing2D control loop.
+
+Design:
+- YOLO servo uses shared detections (already running in PerceptionLoopModule)
+- CLIP re-scoring reads embeddings from SceneBuffer — no redundant CLIP calls
+- Multi-target: servo toward nearest active match (largest bbox = closest)
+- Class-only fast path: if query is a YOLO class name, skip CLIP entirely
+"""
+
+from __future__ import annotations
+
+import time
+from threading import Event, RLock, Thread
+from typing import Any
+
+from reactivex.disposable import Disposable
+
+from dimos.agents.annotation import skill
+from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.stream import In, Out
+from dimos.robot.unitree.go2.blueprints.hackathon.perception_loop import PerceptionLoopModule
+from dimos.msgs.geometry_msgs.Twist import Twist
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.navigation.visual_servoing.visual_servoing_2d import VisualServoing2D
+from dimos.utils.logging_config import setup_logger
+from dimos.robot.unitree.go2.blueprints.hackathon.frame_writer import write_annotated_frame, write_state
+from dimos.robot.unitree.go2.blueprints.hackathon.perception_loop import get_shared_buffer, set_active_mode
+
+logger = setup_logger()
+
+_YOLO_CLASSES = {
+    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
+    "boat", "cat", "dog", "horse", "sheep", "cow", "bear", "zebra", "giraffe",
+    "backpack", "umbrella", "handbag", "tie", "suitcase", "sports ball", "bottle",
+    "cup", "chair", "couch", "potted plant", "bed", "tv", "laptop", "phone",
+    "keyboard", "mouse", "remote", "microwave", "oven", "sink", "refrigerator", "book",
+}
+
+_MATCH_THRESHOLD    = 0.22
+_REACQUIRE_THRESHOLD = 0.20
+
+
+class Config(ModuleConfig):
+    camera_info: CameraInfo
+
+
+class _Det:
+    """Lightweight wrapper around dict-returned detections from PerceptionLoop.
+    Exposes the .track_id / .bbox / .name / .bbox_2d_volume() API the rest of
+    this module already expects, so we can swap detector source without
+    rewriting the follow loop."""
+    __slots__ = ("track_id", "bbox", "name", "confidence")
+    def __init__(self, m: dict) -> None:
+        self.track_id = m.get("track_id", -1)
+        self.bbox = tuple(m["bbox"])
+        self.name = m.get("name", "")
+        self.confidence = m.get("score", m.get("confidence", 0.0))
+    def bbox_2d_volume(self) -> float:
+        x1, y1, x2, y2 = self.bbox
+        return max(0.0, x2 - x1) * max(0.0, y2 - y1)
+    def is_valid(self) -> bool:
+        return True
+
+
+class SmartFollowSkillContainer(Module):
+    """Multi-target person/object follower using shared L2 perception."""
+
+    config: Config
+    color_image: In[Image]
+    cmd_vel: Out[Twist]
+    # Shared inference: ONE MobileCLIP + ONE YOLO lives in PerceptionLoop.
+    # We don't load our own; all detection + text matching is RPC'd.
+    _perception: PerceptionLoopModule
+
+    _frequency: float       = 15.0
+    _clip_interval: int     = 5      # re-score from SceneBuffer every N frames
+    _max_lost_frames: int   = 20
+    _spin_speed: float      = 0.45
+    _spin_timeout_s: float  = 12.0
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        # No more own MobileCLIP / Yolo2DDetector — both come from PerceptionLoop
+        # via RPC. Saves ~1.8 GB of duplicated model weights per skill module.
+        self._visual_servo = VisualServoing2D(self.config.camera_info)
+
+        self._latest_image: Image | None = None
+        self._active_ids: set[int]       = set()
+        self._query: str                 = ""
+        self._text_emb                   = None
+        self._class_mode: bool           = False
+        self._class_name: str            = ""
+
+        self._thread: Thread | None = None
+        self._should_stop = Event()
+        self._lock = RLock()
+        self._tool_name: str = "smart_follow_person"
+
+    @rpc
+    def start(self) -> None:
+        super().start()
+        # CLIP loads lazily on first embed (see _clip cached model) — eager
+        # start() here caused concurrent multi-process model loads to stall.
+        self.register_disposable(Disposable(self.color_image.subscribe(self._on_image)))
+
+    @rpc
+    def stop(self) -> None:
+        self._should_stop.set()
+        set_active_mode(None)
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        super().stop()
+
+    def _on_image(self, image: Image) -> None:
+        with self._lock:
+            self._latest_image = image
+
+    def _get_detections(self, image: Image, class_filter: str | None = None) -> list:
+        """Pull detections from the shared PerceptionLoop (RPC). image param
+        retained for signature compatibility; not used now (PerceptionLoop has
+        its own subscription to the same color_image stream)."""
+        try:
+            raw = self._perception.current_detections(class_filter=class_filter)
+        except Exception:
+            return []
+        return [_Det(m) for m in raw]
+
+    # ── Skills ────────────────────────────────────────────────────────────────
+
+    @skill
+    def smart_follow_person(self, query: str) -> str:
+        """Follow one or more people matching a description.
+
+        YOLO+MobileCLIP instead of QwenVL — faster lock-on, auto SpinSearch recovery.
+        If multiple people match, follows the nearest. Switches target automatically.
+
+        Args:
+            query: e.g. "gray pants", "black shirt", "woman with backpack"
+        """
+        return self._start_follow(query, mode="person")
+
+    @skill
+    def smart_follow_object(self, class_name: str) -> str:
+        """Follow any YOLO-detectable object by class name (no CLIP — pure fast path).
+
+        Args:
+            class_name: YOLO class: "dog", "cat", "chair", "bottle", "person", etc.
+        """
+        return self._start_follow(class_name, mode="object")
+
+    @skill
+    def smart_approach(self, query: str, stop_distance_m: float = 0.8) -> str:
+        """Move toward a person or object and stop when close enough.
+
+        Args:
+            query: Description or YOLO class name.
+            stop_distance_m: Stop distance in meters (default 0.8m).
+        """
+        self._should_stop.set()
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+        self._should_stop.clear()
+
+        with self._lock:
+            image = self._latest_image
+        if image is None:
+            return "No camera image yet."
+
+        if query.lower() in _YOLO_CLASSES:
+            self._class_mode  = True
+            self._class_name  = query.lower()
+            self._text_emb    = None
+            self._query       = query
+            matches = self._get_detections(image, class_filter=self._class_name)
+        else:
+            self._class_mode  = False
+            self._query       = query
+            self._text_emb    = None  # PerceptionLoop holds the embedding
+            ranked = self._perception.match_text(query, class_filter="person")
+            matches = [_Det(m) for m in ranked if m["score"] >= _MATCH_THRESHOLD]
+            if not matches:
+                return "No persons matching that description in current scene."
+
+        if not matches:
+            return f"Cannot see '{query}' — use smart_find first."
+
+        self._active_ids = {d.track_id for d in matches}
+        set_active_mode("follow")
+        self.start_tool("smart_approach")
+        self._thread = Thread(target=self._approach_loop, args=(stop_distance_m,), daemon=True, name="SmartApproach")
+        self._thread.start()
+        return f"Approaching '{query}', stopping at {stop_distance_m}m."
+
+    @skill
+    def stop_following(self) -> str:
+        """Stop following or approaching."""
+        self._should_stop.set()
+        set_active_mode(None)
+        self.cmd_vel.publish(Twist.zero())
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+            self._thread = None
+        return "Stopped."
+
+    # ── Loops ─────────────────────────────────────────────────────────────────
+
+    def _approach_loop(self, stop_distance_m: float) -> None:
+        lost_count = 0
+        period     = 1.0 / self._frequency
+        next_time  = time.monotonic()
+
+        while not self._should_stop.is_set():
+            next_time += period
+            with self._lock:
+                image = self._latest_image
+            if image is None:
+                time.sleep(period)
+                continue
+
+            if self._class_mode:
+                candidates = self._get_detections(image, class_filter=self._class_name)
+                self._active_ids = {d.track_id for d in candidates}
+            else:
+                candidates = self._get_detections(image, class_filter="person")
+
+            active = [d for d in candidates if d.track_id in self._active_ids]
+
+            if active:
+                lost_count = 0
+                target = max(active, key=lambda d: d.bbox_2d_volume())
+                dist   = self._visual_servo._estimate_distance(target.bbox)
+                if dist is not None and dist <= stop_distance_m:
+                    self.cmd_vel.publish(Twist.zero())
+                    self.tool_update("smart_approach", f"Arrived. Distance: {dist:.2f}m.")
+                    self.stop_tool("smart_approach")
+                    set_active_mode(None)
+                    return
+                self.cmd_vel.publish(self._visual_servo.compute_twist(target.bbox, image.width))
+            else:
+                lost_count += 1
+                self.cmd_vel.publish(Twist.zero())
+                if lost_count > self._max_lost_frames * 2:
+                    self.tool_update("smart_approach", "Lost target during approach.")
+                    self.stop_tool("smart_approach")
+                    set_active_mode(None)
+                    return
+
+            sleep_dur = next_time - time.monotonic()
+            if sleep_dur > 0:
+                time.sleep(sleep_dur)
+
+        self.cmd_vel.publish(Twist.zero())
+        set_active_mode(None)
+        self.stop_tool("smart_approach")
+
+    def _start_follow(self, query: str, mode: str) -> str:
+        self._should_stop.set()
+        if self._thread is not None:
+            self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT)
+        self._should_stop.clear()
+
+        with self._lock:
+            image = self._latest_image
+        if image is None:
+            return "No camera image yet — is DimOS running?"
+
+        if mode == "object" or query.lower() in _YOLO_CLASSES:
+            self._class_mode  = True
+            self._class_name  = query.lower()
+            self._text_emb    = None
+            self._query       = query
+            # Use the SceneBuffer (2s TTL) instead of current_detections() (120ms TTL)
+            # so MCP round-trip + thread-join latency don't produce a false "not visible"
+            # result for objects that were clearly in frame when the user clicked.
+            _now = time.monotonic()
+            _buf = get_shared_buffer()
+            _cls = query.lower()
+            buf_matches = [t for t in _buf.values()
+                           if t.name.lower() == _cls and (_now - t.last_seen) <= 2.0]
+            self._active_ids = {t.track_id for t in buf_matches}
+            self._tool_name = "smart_follow_object"
+            set_active_mode("follow")
+            self.start_tool(self._tool_name)
+            self._thread = Thread(target=self._follow_loop, daemon=True, name="SmartFollow")
+            self._thread.start()
+            if not buf_matches:
+                return f"No '{query}' visible — will search when spinning."
+            return f"Tracking {len(buf_matches)} '{query}' (ids={list(self._active_ids)}). Following nearest."
+
+        # Person mode: use shared CLIP via PerceptionLoop.match_text — scores
+        # cached track embeddings (zero extra inference) and returns ranked
+        # candidates with bbox/score. Same effective semantics as the old
+        # SceneBuffer fast path, just across-process instead of broken local.
+        self._class_mode = False
+        self._query      = query
+        self._text_emb   = None
+
+        ranked = self._perception.match_text(query, class_filter="person")
+        matches = [_Det(m) for m in ranked if m["score"] >= _MATCH_THRESHOLD]
+        self._active_ids = {d.track_id for d in matches}
+
+        self._tool_name = "smart_follow_person"
+        set_active_mode("follow")
+        self.start_tool(self._tool_name)
+        self._thread = Thread(target=self._follow_loop, daemon=True, name="SmartFollow")
+        self._thread.start()
+
+        if not matches:
+            if ranked:
+                return (f"No match for '{query}' (best={ranked[0]['score']:.2f}, "
+                        f"need {_MATCH_THRESHOLD}). Spinning to search.")
+            return "No persons in frame — spinning to search."
+        return f"Locked {len(matches)} match(es) for '{query}' (ids={list(self._active_ids)})."
+
+    def _follow_loop(self) -> None:
+        lost_count  = 0
+        spin_start: float | None = None
+        frame_count = 0
+        period      = 1.0 / self._frequency
+        next_time   = time.monotonic()
+
+        while not self._should_stop.is_set():
+            next_time  += period
+            frame_count += 1
+
+            with self._lock:
+                image = self._latest_image
+            if image is None:
+                time.sleep(period)
+                continue
+
+            # Get detections — shared YOLO pass when fresh
+            if self._class_mode:
+                candidates = self._get_detections(image, class_filter=self._class_name)
+                self._active_ids = {d.track_id for d in candidates}
+            else:
+                candidates = self._get_detections(image, class_filter="person")
+                # Re-score conditions:
+                #   1) IMMEDIATE if we have candidates but none of them match our
+                #      current active_ids. This is what stops the 360 spin the
+                #      moment the target comes into view (was: only every 5
+                #      frames, so the bot would happily rotate past the target).
+                #   2) Periodic during steady tracking to keep active_ids
+                #      refreshed against shape/lighting changes.
+                _none_active_visible = candidates and not any(
+                    d.track_id in self._active_ids for d in candidates
+                )
+                _should_rescore = candidates and self._query and (
+                    _none_active_visible or frame_count % self._clip_interval == 0
+                )
+                if _should_rescore:
+                    try:
+                        ranked = self._perception.match_text(self._query, class_filter="person")
+                    except Exception:
+                        ranked = []
+                    if ranked:
+                        new_active = {m["track_id"] for m in ranked if m["score"] >= _MATCH_THRESHOLD}
+                        ranked_ids = {m["track_id"] for m in ranked}
+                        # Only swap if PerceptionLoop has scored any of our candidates.
+                        if any(d.track_id in ranked_ids for d in candidates):
+                            self._active_ids = new_active
+
+            active = [d for d in candidates if d.track_id in self._active_ids]
+            # If we just (re)acquired the target while spinning, kill the spin
+            # immediately — don't finish the rotation before reacting.
+            if active and spin_start is not None:
+                spin_start = None
+                lost_count = 0
+                self.cmd_vel.publish(Twist.zero())
+
+            if active:
+                target = max(active, key=lambda d: d.bbox_2d_volume())
+                self.cmd_vel.publish(self._visual_servo.compute_twist(target.bbox, image.width))
+                lost_count = 0
+                spin_start = None
+            else:
+                lost_count += 1
+                self.cmd_vel.publish(Twist.zero())
+
+                if lost_count > self._max_lost_frames:
+                    if spin_start is None:
+                        spin_start = time.monotonic()
+                        self.tool_update(self._tool_name, f"Lost '{self._query}' — spinning to search.")
+
+                    if time.monotonic() - spin_start > self._spin_timeout_s:
+                        self.tool_update(self._tool_name,
+                                         f"Full 360° search failed. Could not re-acquire '{self._query}'.")
+                        self.stop_tool(self._tool_name)
+                        set_active_mode(None)
+                        self.cmd_vel.publish(Twist.zero())
+                        return
+
+                    self.cmd_vel.publish(
+                        Twist(linear=Vector3(0.0, 0.0, 0.0), angular=Vector3(0.0, 0.0, self._spin_speed))
+                    )
+
+            # Dashboard frame every 3 ticks
+            if frame_count % 3 == 0:
+                try:
+                    state_label = "LOST" if lost_count > self._max_lost_frames else "FOLLOWING"
+                    det_list = [{"bbox": d.bbox, "track_id": d.track_id,
+                                 "label": "TARGET" if d.track_id in self._active_ids else None}
+                                for d in candidates]
+                    write_annotated_frame(image.to_opencv(), det_list,
+                                          overlay_text=f"FOLLOW\n{state_label}\n{self._query}")
+                    write_state({"mode": "follow", "state": state_label,
+                                 "query": self._query, "targets": len(active)})
+                except Exception:
+                    pass
+
+            sleep_dur = next_time - time.monotonic()
+            if sleep_dur > 0:
+                time.sleep(sleep_dur)
+
+        self.cmd_vel.publish(Twist.zero())
+        set_active_mode(None)
+        self.tool_update(self._tool_name, "Following stopped.")
+        self.stop_tool(self._tool_name)
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/sounds/deepbark.mp3 b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/deepbark.mp3
new file mode 100644
index 0000000000..bc57834328
Binary files /dev/null and b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/deepbark.mp3 differ
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/sounds/dog-growling-and-barking.mp3 b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/dog-growling-and-barking.mp3
new file mode 100644
index 0000000000..e937653895
Binary files /dev/null and b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/dog-growling-and-barking.mp3 differ
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/sounds/minecraft-dog-bark.mp3 b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/minecraft-dog-bark.mp3
new file mode 100644
index 0000000000..6bcb504bdc
Binary files /dev/null and b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/minecraft-dog-bark.mp3 differ
diff --git a/dimos/robot/unitree/go2/blueprints/hackathon/sounds/perry-the-platypuss-growl.mp3 b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/perry-the-platypuss-growl.mp3
new file mode 100644
index 0000000000..efa6f0fbe5
Binary files /dev/null and b/dimos/robot/unitree/go2/blueprints/hackathon/sounds/perry-the-platypuss-growl.mp3 differ