From ceb08bacf278753497f7726b0e1957c3635dea78 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 17:26:36 +0800
Subject: [PATCH 01/36] map tool first pass

---
 dimos/mapping/relocalization/pgo.py   |  32 ++++++-
 dimos/msgs/sensor_msgs/PointCloud2.py |   5 +-
 dimos/robot/cli/dimos.py              |  20 ++++
 dimos/utils/cli/map.py                | 132 ++++++++++++++++++++++++++
 4 files changed, 183 insertions(+), 6 deletions(-)
 create mode 100644 dimos/utils/cli/map.py

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index 1c2ee8aa6c..b04cb61ebc 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -18,6 +18,7 @@
 
 from __future__ import annotations
 
+from collections.abc import Callable
 from dataclasses import dataclass
 from typing import Any
 
@@ -375,6 +376,9 @@ def pgo_then_voxels(
     voxel_size: float = 0.05,
     block_count: int = 2_000_000,
     device: str = "CUDA:0",
+    corrected_path_out: list[tuple[float, float, float]] | None = None,
+    pass1_on_frame: Callable[[Any], None] | None = None,
+    pass2_on_frame: Callable[[Any], None] | None = None,
     **pgo_cfg: Any,
 ) -> PointCloud2:
     """Two-pass PGO mapping (eliminates duplicate-wall artifacts).
@@ -401,9 +405,16 @@ def pgo_then_voxels(
 
     n_frames = 0
     for obs in stream:
+        if pass1_on_frame is not None:
+            pass1_on_frame(obs)
         if obs.pose is None:
             continue
         x, y, z, qx, qy, qz, qw = obs.pose
+        # Skip placeholder poses: zero translation, or invalid (zero-norm) quaternion.
+        if x == 0 and y == 0 and z == 0:
+            continue
+        if qx == 0 and qy == 0 and qz == 0 and qw == 0:
+            continue
         r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
         t = np.array([x, y, z])
         points, _ = obs.data.as_numpy()
@@ -420,24 +431,35 @@ def pgo_then_voxels(
     n_kf = pgo.num_key_poses
     print(f"  Pass 1: {n_frames} frames, {n_kf} keyframes")
 
+    if corrected_path_out is not None:
+        corrected_path_out.extend(
+            (float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2]))
+            for kp in pgo._key_poses
+        )
+
     grid = VoxelGrid(voxel_size=voxel_size, block_count=block_count, device=device)
     try:
         if n_kf < 2:
             for obs in stream:
+                if pass2_on_frame is not None:
+                    pass2_on_frame(obs)
                 grid.add_frame(obs.data)
             return grid.get_global_pointcloud2()
 
-        kf_ts = np.array([kp.timestamp for kp in pgo._key_poses])
+        # Sort + dedupe by timestamp: Slerp requires strictly increasing ts.
+        kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp)
+        kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp]
+        kf_ts = np.array([kp.timestamp for kp in kps])
         # Per-keyframe rigid drift correction: T_corr = T_global @ T_local.inv()
-        R_corr_list = [kp.r_global @ kp.r_local.T for kp in pgo._key_poses]
-        t_corr_list = [
-            kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in pgo._key_poses
-        ]
+        R_corr_list = [kp.r_global @ kp.r_local.T for kp in kps]
+        t_corr_list = [kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in kps]
         t_corrs = np.stack(t_corr_list)
         rot_slerp = Slerp(kf_ts, Rotation.from_matrix(np.stack(R_corr_list)))
 
         n_inserted = 0
         for obs in stream:
+            if pass2_on_frame is not None:
+                pass2_on_frame(obs)
             if obs.pose is None:
                 continue
             ts = float(np.clip(obs.ts, kf_ts[0], kf_ts[-1]))
diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py
index 20d6b09354..598cb0b094 100644
--- a/dimos/msgs/sensor_msgs/PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/PointCloud2.py
@@ -729,7 +729,10 @@ def to_rerun(
             point_colors = colors
         else:
             z = points[:, 2]
-            class_ids = ((z - z.min()) / (z.max() - z.min() + 1e-8) * 255).astype(np.uint8)
+
+            # Clip to robust percentiles so a few outliers don't squash the colormap range.
+            z_lo, z_hi = np.percentile(z, [1, 99])
+            class_ids = (np.clip((z - z_lo) / (z_hi - z_lo + 1e-8), 0, 1) * 255).astype(np.uint8)
 
         if mode == "points":
             return rr.Points3D(
diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py
index 34b74b7441..b344d9be10 100644
--- a/dimos/robot/cli/dimos.py
+++ b/dimos/robot/cli/dimos.py
@@ -685,6 +685,26 @@ def send(
     topic_send(topic, message_expr)
 
 
+@main.command(name="map")
+def map_cmd(
+    dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"),
+    voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"),
+    device: str = typer.Option(
+        "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)"
+    ),
+    pgo: bool = typer.Option(
+        False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)"
+    ),
+    block_count: int = typer.Option(
+        2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)"
+    ),
+) -> None:
+    """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun."""
+    from dimos.utils.cli.map import main as map_main
+
+    map_main(dataset=dataset, voxel=voxel, device=device, pgo=pgo, block_count=block_count)
+
+
 @main.command(name="export-premap")
 def export_premap_cmd(
     dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"),
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
new file mode 100644
index 0000000000..1f82617871
--- /dev/null
+++ b/dimos/utils/cli/map.py
@@ -0,0 +1,132 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+import rerun as rr
+import rerun.blueprint as rrb
+import typer
+
+from dimos.mapping.voxels import VoxelMapTransformer
+from dimos.memory2.store.sqlite import SqliteStore
+from dimos.utils.data import resolve_named_path
+from dimos.visualization.rerun.init import rerun_init
+
+
+def progress(total: int, label: str = ""):
+    seen = 0
+    wall_start: float | None = None
+    last_wall: float | None = None
+    first_ts: float | None = None
+
+    def _progress(obs):
+        nonlocal seen, wall_start, last_wall, first_ts
+        now = time.monotonic()
+        if wall_start is None:
+            wall_start = now
+            first_ts = obs.ts
+        frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0
+        last_wall = now
+        seen += 1
+        pct = 100 * seen // total if total else 100
+        wall = now - wall_start
+        data = obs.ts - first_ts
+        speed = data / wall if wall > 0 else 0.0
+        end = "\n" if seen >= total else ""
+        prefix = f"{label} " if label else ""
+        print(
+            f"\r{prefix}{pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame",
+            end=end,
+            flush=True,
+        )
+
+    return _progress
+
+
+def main(
+    dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"),
+    voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"),
+    device: str = typer.Option(
+        "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)"
+    ),
+    pgo: bool = typer.Option(
+        False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)"
+    ),
+    block_count: int = typer.Option(
+        2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)"
+    ),
+) -> None:
+    db_path = resolve_named_path(dataset, ".db")
+
+    store = SqliteStore(path=db_path)
+    lidar = store.streams.lidar
+
+    print(lidar.summary())
+
+    path: list[tuple[float, float, float]] = []
+
+    def collect_path(obs):
+        if obs.pose is None:
+            return
+        # Reject placeholder poses at the world origin (translation = 0,0,0).
+        if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0:
+            return
+        path.append((obs.pose[0], obs.pose[1], obs.pose[2]))
+
+    pgo_map = None
+    pgo_path: list[tuple[float, float, float]] = []
+    if pgo:
+        from dimos.mapping.relocalization.pgo import pgo_then_voxels
+
+        total = lidar.count()
+        print("running PGO twopass map...")
+        pgo_map = pgo_then_voxels(
+            lidar,
+            voxel_size=voxel,
+            block_count=block_count,
+            device=device,
+            corrected_path_out=pgo_path,
+            pass1_on_frame=progress(total, "pgo pass 1 (optimizing)"),
+            pass2_on_frame=progress(total, "pgo pass 2 (rebuilding)"),
+        )
+
+    global_map = (
+        lidar.tap(collect_path)
+        .transform(VoxelMapTransformer(voxel_size=voxel, device=device))
+        .tap(progress(lidar.count(), "reconstructing global map"))
+        .last()
+        .data
+    )
+
+    rerun_init("coloring", spawn=True)
+    rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world")))
+    rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True)
+    if path:
+        rr.log(
+            "world/raw_map/path",
+            rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.08]),
+            static=True,
+        )
+    if pgo_map is not None:
+        rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True)
+    if pgo_path:
+        rr.log(
+            "world/pgo_map/path",
+            rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.08]),
+            static=True,
+        )
+
+
+if __name__ == "__main__":
+    typer.run(main)

From 4e0cec34c3e31989c1fb6e3e2995130bc01564e0 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 17:45:21 +0800
Subject: [PATCH 02/36] map tool wrap

---
 dimos/mapping/relocalization/pgo.py         |  2 +-
 dimos/robot/cli/dimos.py                    | 57 +++++----------------
 dimos/utils/cli/map.py                      | 55 ++++++++++++++------
 docs/capabilities/mapping/relocalization.md | 30 +++++------
 4 files changed, 66 insertions(+), 78 deletions(-)

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index b04cb61ebc..3b95ba426f 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # NOTE: This lives under mapping/relocalization/ for now because the only
-# consumer is the premap export pipeline (`dimos export-premap`). It is
+# consumer is the premap export pipeline (`dimos map --export`). It is
 # temporary and can be moved/split out later when PGO grows other consumers.
 
 from __future__ import annotations
diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py
index b344d9be10..5607c8b1ca 100644
--- a/dimos/robot/cli/dimos.py
+++ b/dimos/robot/cli/dimos.py
@@ -698,55 +698,26 @@ def map_cmd(
     block_count: int = typer.Option(
         2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)"
     ),
+    export: bool = typer.Option(
+        False,
+        "--export",
+        help="Export PGO twopass map to ./<dataset>.pc2.lcm in cwd (implies --pgo)",
+    ),
+    no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"),
 ) -> None:
     """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun."""
     from dimos.utils.cli.map import main as map_main
 
-    map_main(dataset=dataset, voxel=voxel, device=device, pgo=pgo, block_count=block_count)
-
-
-@main.command(name="export-premap")
-def export_premap_cmd(
-    dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"),
-    output: Path | None = typer.Option(None, "-o", "--output", help="Output .pc2.lcm path"),
-    voxel_size: float = typer.Option(0.05, "--voxel-size", help="Voxel size for the rebuild"),
-    duration: float | None = typer.Option(
-        None, "--duration", help="Limit to first N seconds (default: full log)"
-    ),
-    device: str = typer.Option(
-        "CUDA:0",
-        "--device",
-        help="Open3D compute device (e.g. CUDA:0, CPU:0); fallback to CPU if unavailable",
-    ),
-    block_count: int = typer.Option(
-        2_000_000,
-        "--block-count",
-        help="VoxelBlockGrid capacity",
-    ),
-) -> None:
-    """Export a twopass relocalization premap (.pc2.lcm) from a recorded SQLite dataset."""
-    from dimos.mapping.relocalization.pgo import pgo_then_voxels
-    from dimos.memory2.store.sqlite import SqliteStore
-    from dimos.utils.data import get_data_dir, resolve_named_path
-
-    db_path = resolve_named_path(dataset, ".db")
-
-    store = SqliteStore(path=db_path)
-    lidar = store.streams.lidar
-    if duration is not None:
-        lidar = lidar.before(lidar.first().ts + duration)
-
-    typer.echo(f"computing twopass map from {db_path} (voxel_size={voxel_size})...")
-    twopass_map = pgo_then_voxels(
-        lidar, voxel_size=voxel_size, block_count=block_count, device=device
+    map_main(
+        dataset=dataset,
+        voxel=voxel,
+        device=device,
+        pgo=pgo,
+        block_count=block_count,
+        export=export,
+        no_gui=no_gui,
     )
 
-    if output is None:
-        output = get_data_dir() / f"{db_path.stem}_twopass_map.pc2.lcm"
-    output.parent.mkdir(parents=True, exist_ok=True)
-    output.write_bytes(twopass_map.lcm_encode())
-    typer.echo(f"wrote {output}")
-
 
 @main.command()
 def cameracalibrate(
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 1f82617871..9c7ac61794 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -66,8 +66,16 @@ def main(
     block_count: int = typer.Option(
         2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)"
     ),
+    export: bool = typer.Option(
+        False,
+        "--export",
+        help="Export PGO twopass map to ./<dataset>.pc2.lcm in cwd (implies --pgo)",
+    ),
+    no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"),
 ) -> None:
     db_path = resolve_named_path(dataset, ".db")
+    if export:
+        pgo = True
 
     store = SqliteStore(path=db_path)
     lidar = store.streams.lidar
@@ -109,23 +117,36 @@ def collect_path(obs):
         .data
     )
 
-    rerun_init("coloring", spawn=True)
-    rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world")))
-    rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True)
-    if path:
-        rr.log(
-            "world/raw_map/path",
-            rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.08]),
-            static=True,
-        )
-    if pgo_map is not None:
-        rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True)
-    if pgo_path:
-        rr.log(
-            "world/pgo_map/path",
-            rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.08]),
-            static=True,
-        )
+    if not no_gui:
+        rerun_init("coloring", spawn=True)
+        rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world")))
+        rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True)
+        if path:
+            rr.log(
+                "world/raw_map/path",
+                rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.05]),
+                static=True,
+            )
+        if pgo_map is not None:
+            rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True)
+        if pgo_path:
+            rr.log(
+                "world/pgo_map/path",
+                rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]),
+                static=True,
+            )
+
+    if export and pgo_map is not None:
+        from pathlib import Path
+
+        out_path = Path.cwd() / f"{db_path.stem}.pc2.lcm"
+        print(f"exporting PGO twopass map to {out_path}...")
+        out_path.write_bytes(pgo_map.lcm_encode())
+        print(f"wrote {out_path}")
+        print()
+        print("load back with:")
+        print("    from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2")
+        print(f'    pcd = PointCloud2.lcm_decode(open("{out_path.name}", "rb").read())')
 
 
 if __name__ == "__main__":
diff --git a/docs/capabilities/mapping/relocalization.md b/docs/capabilities/mapping/relocalization.md
index 51a966a1c6..a794f27f04 100644
--- a/docs/capabilities/mapping/relocalization.md
+++ b/docs/capabilities/mapping/relocalization.md
@@ -33,12 +33,16 @@ Convert the recording to a relocalization premap (`.pc2.lcm`):
 
 ```bash
 # default name from step 1:
-dimos export-premap recording_go2
+dimos map recording_go2 --export
 
 # renamed:
-dimos export-premap {DB_NAME}
+dimos map {DB_NAME} --export
 ```
 
+`--export` implies `--pgo` (runs pose graph optimization) and writes
+`./{DB_NAME}.pc2.lcm` to the current working directory. Add `--no-gui`
+to skip the rerun viewer for headless runs.
+
 `{DB_NAME}` can be a file name (with or without extension), or a relative / absolute path.
 
 When a bare file name is given, the tool searches in:
@@ -49,28 +53,20 @@ When a bare file name is given, the tool searches in:
 Examples:
 
 ```bash
-dimos export-premap go2_hongkong_office
-dimos export-premap ./go2_hongkong_office.db
-dimos export-premap data/go2_hongkong_office.db
-dimos export-premap /abs/path/to/scan.db
-```
-
-Output defaults to `data/{basename}_twopass_map.pc2.lcm`. Pass `-o
-<path>` to save elsewhere. Relative paths resolve against the current
-working directory; parent directories are created on demand.
-
-```bash
-dimos export-premap recording_go2 -o /tmp/scan.pc2.lcm
-dimos export-premap recording_go2 -o ./recording_go2_twopass_map.pc2.lcm
+dimos map go2_hongkong_office --export
+dimos map ./go2_hongkong_office.db --export
+dimos map data/go2_hongkong_office.db --export
+dimos map /abs/path/to/scan.db --export
 ```
 
 Sample log:
 
 ```
-computing twopass map from /Users/dimos/Desktop/dimos-reloc/recording_go2.db (voxel_size=0.05)...
+running PGO twopass map...
   Pass 1: 908 frames, 1 keyframes
 12:54:32.025[inf][dimos/mapping/voxels.py       ] VoxelGrid using device: CPU:0
-wrote /Users/dimos/Desktop/dimos-reloc/data/recording_go2_twopass_map.pc2.lcm
+exporting PGO twopass map to /Users/dimos/Desktop/dimos-reloc/recording_go2.pc2.lcm...
+wrote /Users/dimos/Desktop/dimos-reloc/recording_go2.pc2.lcm
 ```
 
 ## 3. Relocalize against replay

From 2099e006bd8b6ab2c6dd0848b6b2bbf4bc581fb7 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 18:16:05 +0800
Subject: [PATCH 03/36] renamed rerun blueprint to dimos map tool

---
 dimos/utils/cli/map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 9c7ac61794..9ef30b24b2 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -118,7 +118,7 @@ def collect_path(obs):
     )
 
     if not no_gui:
-        rerun_init("coloring", spawn=True)
+        rerun_init("dimos map tool", spawn=True)
         rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world")))
         rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True)
         if path:

From 47848d44e881ce8779e9488743b343807275a8ba Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 18:49:39 +0800
Subject: [PATCH 04/36] readme cleanup

---
 docs/capabilities/mapping/relocalization.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/capabilities/mapping/relocalization.md b/docs/capabilities/mapping/relocalization.md
index a794f27f04..a7fef58ccd 100644
--- a/docs/capabilities/mapping/relocalization.md
+++ b/docs/capabilities/mapping/relocalization.md
@@ -77,16 +77,16 @@ standard `unitree-go2` stack plus `RelocalizationModule`:
 
 ```bash
 dimos --replay --replay-db {DB_NAME} run unitree-go2-relocalization \
-  -o relocalizationmodule.map_file={DB_NAME}_twopass_map
+  -o relocalizationmodule.map_file={DB_NAME}
 ```
 
-`{DB_NAME}_twopass_map` is resolved the same way as in section 2: cwd
+`{DB_NAME}` is resolved the same way as in section 2: cwd
 first, then `data/`.
 
 Sample log:
 
 ```
-12:58:51.469[inf][imos/mapping/relocalization.py] Relocalization module started: map_file='recording_go2_twopass_map'  loaded_map.frame_id='map'  placeholder TF 'world' -> 'map'  z_offset=20.0
+12:58:51.469[inf][imos/mapping/relocalization.py] Relocalization module started: map_file='recording_go2'  loaded_map.frame_id='map'  placeholder TF 'world' -> 'map'  z_offset=20.0
 12:58:56.528[war][imos/mapping/relocalization.py] relocalize skipped: n_pts=14198 < MIN_LOCAL_POINTS=20000
 12:59:04.777[war][imos/mapping/relocalization.py] relocalize rejected: fitness=0.466 < threshold=0.6 time_cost=5.3s n_pts=20231
 12:59:14.880[war][imos/mapping/relocalization.py] relocalize rejected: fitness=0.433 < threshold=0.6 time_cost=8.1s n_pts=37770
@@ -125,5 +125,5 @@ recorded `.db`:
 
 ```bash
 dimos --robot-ip {YOUR_ROBOT_IP} run unitree-go2-relocalization \
-  -o relocalizationmodule.map_file={DB_NAME}_twopass_map
+  -o relocalizationmodule.map_file={DB_NAME}
 ```

From d829c0067de680ddf9a613f7bd4d84b7cde43486 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 18:35:40 +0800
Subject: [PATCH 05/36] mapping/relocalization: split PGO into composable
 Stream stages (pgo2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the monolithic pgo_then_voxels with four primitives over
mem2 Streams and Transforms:

  pgo_keyframes(lidar)            -> Stream[Keyframe]
  keyframes_to_corrections(kfs)   -> Stream[Transform]   (world_corrected <- world_raw)
  make_interpolator(corrections)  -> (ts) -> Transform   (SLERP + linear, endpoint-clipped)
  apply_corrections(stream, corr) -> Stream[T]           (shuffles obs.pose)

Drift correction is now a first-class, reusable Stream[Transform] that
any pose-stamped consumer can apply — same math as before (rigid
T_corr = T_global @ T_local^-1 per keyframe, SLERP + linear interpolation
between bracketing keyframes), just composable.

dimos map --pgo migrates to the new primitives; pgo_then_voxels is
deleted. The internal _SimplePGO / PGOConfig / _KeyPose machinery stays
in pgo.py and is imported by pgo2.
---
 dimos/mapping/relocalization/pgo.py  | 126 +-------------
 dimos/mapping/relocalization/pgo2.py | 235 +++++++++++++++++++++++++++
 dimos/utils/cli/map.py               |  48 ++++--
 3 files changed, 277 insertions(+), 132 deletions(-)
 create mode 100644 dimos/mapping/relocalization/pgo2.py

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index 3b95ba426f..74a48c7236 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# NOTE: This lives under mapping/relocalization/ for now because the only
-# consumer is the premap export pipeline (`dimos map --export`). It is
-# temporary and can be moved/split out later when PGO grows other consumers.
+# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use
+# the Stream-shaped API in pgo2.py — pgo_keyframes, keyframes_to_corrections,
+# make_interpolator, apply_corrections. The classes here (_SimplePGO, PGOConfig,
+# _KeyPose) are imported from pgo2 to do the heavy lifting.
 
 from __future__ import annotations
 
-from collections.abc import Callable
 from dataclasses import dataclass
 from typing import Any
 
@@ -30,7 +30,6 @@
 from scipy.spatial.transform import Rotation
 
 from dimos.core.module import ModuleConfig
-from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
 from dimos.utils.logging_config import setup_logger
 
 FRAME_MAP = "world"
@@ -368,120 +367,3 @@ def build_global_map(self, voxel_size: float) -> np.ndarray:
     @property
     def num_key_poses(self) -> int:
         return len(self._key_poses)
-
-
-def pgo_then_voxels(
-    stream: Any,
-    *,
-    voxel_size: float = 0.05,
-    block_count: int = 2_000_000,
-    device: str = "CUDA:0",
-    corrected_path_out: list[tuple[float, float, float]] | None = None,
-    pass1_on_frame: Callable[[Any], None] | None = None,
-    pass2_on_frame: Callable[[Any], None] | None = None,
-    **pgo_cfg: Any,
-) -> PointCloud2:
-    """Two-pass PGO mapping (eliminates duplicate-wall artifacts).
-
-    Pass 1 runs PGO over the lidar stream to build its corrected
-    keyframe trajectory.
-
-    Pass 2 re-streams every lidar frame through ``VoxelGrid``, but each
-    frame's world-frame cloud is first transformed by the rigid drift
-    correction interpolated (SLERP for rotation, linear for translation)
-    from the keyframe corrections at the frame's timestamp.
-
-    Each frame is therefore inserted exactly once at its converged
-    corrected pose, so walls collapse to a single layer instead of the
-    "smear of slightly-offset re-projections" that the single-pass
-    ``_SimplePGO.build_global_map`` produces.
-    """
-    from scipy.spatial.transform import Slerp
-
-    from dimos.mapping.voxels import VoxelGrid
-
-    cfg = PGOConfig(**pgo_cfg)
-    pgo = _SimplePGO(cfg)
-
-    n_frames = 0
-    for obs in stream:
-        if pass1_on_frame is not None:
-            pass1_on_frame(obs)
-        if obs.pose is None:
-            continue
-        x, y, z, qx, qy, qz, qw = obs.pose
-        # Skip placeholder poses: zero translation, or invalid (zero-norm) quaternion.
-        if x == 0 and y == 0 and z == 0:
-            continue
-        if qx == 0 and qy == 0 and qz == 0 and qw == 0:
-            continue
-        r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
-        t = np.array([x, y, z])
-        points, _ = obs.data.as_numpy()
-        if len(points) == 0:
-            continue
-        body_pts = (
-            (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3]
-        )
-        if pgo.add_key_pose(r, t, obs.ts, body_pts):
-            pgo.search_for_loops()
-            pgo.smooth_and_update()
-        n_frames += 1
-
-    n_kf = pgo.num_key_poses
-    print(f"  Pass 1: {n_frames} frames, {n_kf} keyframes")
-
-    if corrected_path_out is not None:
-        corrected_path_out.extend(
-            (float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2]))
-            for kp in pgo._key_poses
-        )
-
-    grid = VoxelGrid(voxel_size=voxel_size, block_count=block_count, device=device)
-    try:
-        if n_kf < 2:
-            for obs in stream:
-                if pass2_on_frame is not None:
-                    pass2_on_frame(obs)
-                grid.add_frame(obs.data)
-            return grid.get_global_pointcloud2()
-
-        # Sort + dedupe by timestamp: Slerp requires strictly increasing ts.
-        kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp)
-        kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp]
-        kf_ts = np.array([kp.timestamp for kp in kps])
-        # Per-keyframe rigid drift correction: T_corr = T_global @ T_local.inv()
-        R_corr_list = [kp.r_global @ kp.r_local.T for kp in kps]
-        t_corr_list = [kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in kps]
-        t_corrs = np.stack(t_corr_list)
-        rot_slerp = Slerp(kf_ts, Rotation.from_matrix(np.stack(R_corr_list)))
-
-        n_inserted = 0
-        for obs in stream:
-            if pass2_on_frame is not None:
-                pass2_on_frame(obs)
-            if obs.pose is None:
-                continue
-            ts = float(np.clip(obs.ts, kf_ts[0], kf_ts[-1]))
-            r_correction = rot_slerp([ts])[0].as_matrix()
-            idx = int(np.searchsorted(kf_ts, ts))
-            if idx == 0:
-                t_correction = t_corrs[0]
-            elif idx >= len(kf_ts):
-                t_correction = t_corrs[-1]
-            else:
-                t_lo, t_hi = kf_ts[idx - 1], kf_ts[idx]
-                alpha = (ts - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0
-                t_correction = (1 - alpha) * t_corrs[idx - 1] + alpha * t_corrs[idx]
-
-            points, _ = obs.data.as_numpy()
-            if len(points) == 0:
-                continue
-            corrected_pts = (r_correction @ points[:, :3].T).T + t_correction
-            grid.add_frame(PointCloud2.from_numpy(corrected_pts.astype(np.float32)))
-            n_inserted += 1
-
-        print(f"  Pass 2: {n_inserted} frames inserted with PGO-corrected poses")
-        return grid.get_global_pointcloud2()
-    finally:
-        grid.dispose()
diff --git a/dimos/mapping/relocalization/pgo2.py b/dimos/mapping/relocalization/pgo2.py
new file mode 100644
index 0000000000..3aec9cb0eb
--- /dev/null
+++ b/dimos/mapping/relocalization/pgo2.py
@@ -0,0 +1,235 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""PGO drift corrections as composable Stream stages.
+
+Pipeline:
+
+    lidar: Stream[PointCloud2]
+        -> pgo_keyframes(...)            -> Stream[Keyframe]
+        -> keyframes_to_corrections(...) -> Stream[Transform]   (world_corrected <- world_raw)
+        -> apply_corrections(any_stream, corrections) -> Stream[T]   (obs.pose shuffled)
+
+The math: per keyframe, the drift correction is
+    R_corr = R_global @ R_local.T
+    t_corr = t_global - R_corr @ t_local
+and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t,
+clipping out-of-range to endpoints.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable, Iterator
+from dataclasses import dataclass
+from typing import Any, TypeVar
+
+import numpy as np
+from scipy.spatial.transform import Rotation, Slerp
+
+from dimos.memory2.store.memory import MemoryStore
+from dimos.memory2.stream import Stream
+from dimos.memory2.type.observation import Observation
+from dimos.msgs.geometry_msgs.Quaternion import Quaternion
+from dimos.msgs.geometry_msgs.Transform import Transform
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
+
+T = TypeVar("T")
+
+
+@dataclass(frozen=True)
+class Keyframe:
+    ts: float
+    r_local: np.ndarray  # 3x3
+    t_local: np.ndarray  # (3,)
+    r_global: np.ndarray  # 3x3
+    t_global: np.ndarray  # (3,)
+
+
+def pgo_keyframes(
+    stream: Stream[PointCloud2],
+    *,
+    on_frame: Callable[[Any], None] | None = None,
+    **pgo_cfg: Any,
+) -> Stream[Keyframe]:
+    """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe."""
+    # Imported here to keep pgo.py the only place that imports gtsam at module scope.
+    from dimos.mapping.relocalization.pgo import PGOConfig, _SimplePGO
+
+    cfg = PGOConfig(**pgo_cfg)
+    pgo = _SimplePGO(cfg)
+
+    for obs in stream:
+        if on_frame is not None:
+            on_frame(obs)
+        if obs.pose is None:
+            continue
+        x, y, z, qx, qy, qz, qw = obs.pose
+        if x == 0 and y == 0 and z == 0:
+            continue
+        if qx == 0 and qy == 0 and qz == 0 and qw == 0:
+            continue
+        r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
+        t = np.array([x, y, z])
+        points, _ = obs.data.as_numpy()
+        if len(points) == 0:
+            continue
+        body_pts = (
+            (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3]
+        )
+        if pgo.add_key_pose(r, t, obs.ts, body_pts):
+            pgo.search_for_loops()
+            pgo.smooth_and_update()
+
+    kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp)
+    kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp]
+
+    mem = MemoryStore()
+    out: Stream[Keyframe] = mem.stream("keyframes", Keyframe)
+    for kp in kps:
+        out.append(
+            Keyframe(
+                ts=kp.timestamp,
+                r_local=np.ascontiguousarray(kp.r_local),
+                t_local=np.ascontiguousarray(kp.t_local),
+                r_global=np.ascontiguousarray(kp.r_global),
+                t_global=np.ascontiguousarray(kp.t_global),
+            ),
+            ts=kp.timestamp,
+        )
+    return out
+
+
+def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]:
+    """Per-keyframe drift correction as Transform(world_corrected <- world_raw)."""
+    mem = MemoryStore()
+    out: Stream[Transform] = mem.stream("corrections", Transform)
+    for obs in keyframes:
+        kp = obs.data
+        R_corr = kp.r_global @ kp.r_local.T
+        t_corr = kp.t_global - R_corr @ kp.t_local
+        tf = Transform(
+            translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])),
+            rotation=Quaternion.from_rotation_matrix(R_corr),
+            frame_id="world_corrected",
+            child_frame_id="world_raw",
+            ts=kp.ts,
+        )
+        out.append(tf, ts=kp.ts)
+    return out
+
+
+def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]:
+    """Materialize corrections once; return a fast ts -> Transform lookup."""
+    ts_list: list[float] = []
+    R_list: list[np.ndarray] = []
+    t_list: list[np.ndarray] = []
+    for obs in corrections:
+        tf = obs.data
+        ts_list.append(tf.ts)
+        q = tf.rotation
+        R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix())
+        t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z]))
+
+    if not ts_list:
+        raise ValueError("empty corrections stream")
+
+    if len(ts_list) == 1:
+        only_ts = ts_list[0]
+        only_R = R_list[0]
+        only_t = t_list[0]
+
+        def _const(ts: float) -> Transform:
+            return Transform(
+                translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])),
+                rotation=Quaternion.from_rotation_matrix(only_R),
+                frame_id="world_corrected",
+                child_frame_id="world_raw",
+                ts=ts if ts is not None else only_ts,
+            )
+
+        return _const
+
+    ts_arr = np.array(ts_list)
+    R_stack = np.stack(R_list)
+    t_stack = np.stack(t_list)
+    slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack))
+
+    def interp(ts: float) -> Transform:
+        ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1]))
+        R = slerp([ts_clip])[0].as_matrix()
+        idx = int(np.searchsorted(ts_arr, ts_clip))
+        if idx == 0:
+            t = t_stack[0]
+        elif idx >= len(ts_arr):
+            t = t_stack[-1]
+        else:
+            t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx]
+            alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0
+            t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx]
+        return Transform(
+            translation=Vector3(float(t[0]), float(t[1]), float(t[2])),
+            rotation=Quaternion.from_rotation_matrix(R),
+            frame_id="world_corrected",
+            child_frame_id="world_raw",
+            ts=float(ts),
+        )
+
+    return interp
+
+
+def correction_at(corrections: Stream[Transform], ts: float) -> Transform:
+    """One-off lookup. For hot paths build `make_interpolator` once and reuse."""
+    return make_interpolator(corrections)(ts)
+
+
+def apply_corrections(
+    stream: Stream[T],
+    corrections: Stream[Transform],
+) -> Stream[T]:
+    """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts.
+
+    `obs.data` is untouched. Frames with `obs.pose is None` pass through
+    unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped).
+    """
+    interp = make_interpolator(corrections)
+
+    def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
+        for obs in upstream:
+            if obs.pose is None:
+                yield obs
+                continue
+            x, y, z, qx, qy, qz, qw = obs.pose
+            tf = interp(obs.ts)
+            R_corr = Rotation.from_quat(
+                [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w]
+            ).as_matrix()
+            t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
+            R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
+            t_raw = np.array([x, y, z])
+            R_new = R_corr @ R_raw
+            t_new = R_corr @ t_raw + t_corr
+            q_new = Rotation.from_matrix(R_new).as_quat()  # xyzw
+            new_pose = (
+                float(t_new[0]),
+                float(t_new[1]),
+                float(t_new[2]),
+                float(q_new[0]),
+                float(q_new[1]),
+                float(q_new[2]),
+                float(q_new[3]),
+            )
+            yield obs.derive(data=obs.data, pose=new_pose)
+
+    return stream.transform(xf)
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 9ef30b24b2..dc8f934347 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -95,19 +95,47 @@ def collect_path(obs):
     pgo_map = None
     pgo_path: list[tuple[float, float, float]] = []
     if pgo:
-        from dimos.mapping.relocalization.pgo import pgo_then_voxels
+        import numpy as np
+        from scipy.spatial.transform import Rotation
+
+        from dimos.mapping.relocalization.pgo2 import (
+            keyframes_to_corrections,
+            make_interpolator,
+            pgo_keyframes,
+        )
+        from dimos.mapping.voxels import VoxelGrid
+        from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
 
         total = lidar.count()
         print("running PGO twopass map...")
-        pgo_map = pgo_then_voxels(
-            lidar,
-            voxel_size=voxel,
-            block_count=block_count,
-            device=device,
-            corrected_path_out=pgo_path,
-            pass1_on_frame=progress(total, "pgo pass 1 (optimizing)"),
-            pass2_on_frame=progress(total, "pgo pass 2 (rebuilding)"),
-        )
+        keyframes = pgo_keyframes(lidar, on_frame=progress(total, "pgo pass 1 (optimizing)"))
+        corrections = keyframes_to_corrections(keyframes)
+        interp = make_interpolator(corrections)
+
+        for obs in keyframes:
+            kp = obs.data
+            pgo_path.append((float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2])))
+
+        pass2_pb = progress(total, "pgo pass 2 (rebuilding)")
+        grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
+        try:
+            for obs in lidar:
+                pass2_pb(obs)
+                if obs.pose is None:
+                    continue
+                pts, _ = obs.data.as_numpy()
+                if len(pts) == 0:
+                    continue
+                tf = interp(obs.ts)
+                R = Rotation.from_quat(
+                    [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w]
+                ).as_matrix()
+                t = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
+                corrected = (R @ pts[:, :3].T).T + t
+                grid.add_frame(PointCloud2.from_numpy(corrected.astype(np.float32)))
+            pgo_map = grid.get_global_pointcloud2()
+        finally:
+            grid.dispose()
 
     global_map = (
         lidar.tap(collect_path)

From ebef675f98249bc429c8c636e855042520f063aa Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 18:39:44 +0800
Subject: [PATCH 06/36] mapping/relocalization: reclaim pgo.py for the public
 API

Renames:
  pgo.py  -> pgo_internals.py   (gtsam/ICP machinery)
  pgo2.py -> pgo.py             (public Stream-shaped API)

`dimos.mapping.relocalization.pgo` is now the canonical import path
(`pgo_keyframes`, `keyframes_to_corrections`, `make_interpolator`,
`apply_corrections`, `correction_at`, `Keyframe`). The internal
_SimplePGO / PGOConfig / _KeyPose / _icp / _voxel_downsample helpers
live in pgo_internals.py and are imported lazily inside pgo.py to keep
gtsam off the public-API import path.
---
 dimos/mapping/relocalization/pgo.py           | 540 +++++++-----------
 dimos/mapping/relocalization/pgo2.py          | 235 --------
 dimos/mapping/relocalization/pgo_internals.py | 369 ++++++++++++
 dimos/utils/cli/map.py                        |   2 +-
 4 files changed, 573 insertions(+), 573 deletions(-)
 delete mode 100644 dimos/mapping/relocalization/pgo2.py
 create mode 100644 dimos/mapping/relocalization/pgo_internals.py

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index 74a48c7236..653c7158d2 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -12,358 +12,224 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use
-# the Stream-shaped API in pgo2.py — pgo_keyframes, keyframes_to_corrections,
-# make_interpolator, apply_corrections. The classes here (_SimplePGO, PGOConfig,
-# _KeyPose) are imported from pgo2 to do the heavy lifting.
+"""PGO drift corrections as composable Stream stages.
+
+Pipeline:
+
+    lidar: Stream[PointCloud2]
+        -> pgo_keyframes(...)            -> Stream[Keyframe]
+        -> keyframes_to_corrections(...) -> Stream[Transform]   (world_corrected <- world_raw)
+        -> apply_corrections(any_stream, corrections) -> Stream[T]   (obs.pose shuffled)
+
+The math: per keyframe, the drift correction is
+    R_corr = R_global @ R_local.T
+    t_corr = t_global - R_corr @ t_local
+and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t,
+clipping out-of-range to endpoints.
+"""
 
 from __future__ import annotations
 
+from collections.abc import Callable, Iterator
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, TypeVar
 
-import gtsam  # type: ignore[import-not-found,import-untyped]
 import numpy as np
-import open3d as o3d  # type: ignore[import-untyped]
-import open3d.core as o3c  # type: ignore[import-untyped]
-from scipy.spatial import KDTree
-from scipy.spatial.transform import Rotation
-
-from dimos.core.module import ModuleConfig
-from dimos.utils.logging_config import setup_logger
-
-FRAME_MAP = "world"
-
-logger = setup_logger()
-
-
-class PGOConfig(ModuleConfig):
-    world_frame: str = FRAME_MAP
-
-    # Keyframe detection
-    key_pose_delta_trans: float = 0.5
-    key_pose_delta_deg: float = 10.0
-
-    # Loop closure
-    loop_search_radius: float = 2.0
-    loop_time_thresh: float = 20.0
-    loop_score_thresh: float = 0.3
-    loop_submap_half_range: int = 10
-    min_icp_inliers: int = 10
-    min_keyframes_for_loop_search: int = 10
-    loop_closure_extra_iterations: int = 4
-    submap_resolution: float = 0.2
-    min_loop_detect_duration: float = 5.0
-
-    # Input mode
-    unregister_input: bool = True  # Transform world-frame scans to body-frame using odom
-
-    # Global map
-    publish_global_map: bool = True
-    global_map_publish_rate: float = 0.5
-    global_map_voxel_size: float = 0.15
-
-    # ICP
-    max_icp_iterations: int = 50
-    max_icp_correspondence_dist: float = 1.0
-
-
-@dataclass
-class _KeyPose:
-    r_local: np.ndarray  # 3x3 rotation in local/odom frame
-    t_local: np.ndarray  # 3-vec translation in local/odom frame
-    r_global: np.ndarray  # 3x3 corrected rotation
-    t_global: np.ndarray  # 3-vec corrected translation
-    timestamp: float
-    body_cloud: np.ndarray  # Nx3 points in body frame
-
-
-def _icp(
-    source: np.ndarray,
-    target: np.ndarray,
-    max_iter: int = 50,
-    max_dist: float = 1.0,
-    tol: float = 1e-6,
-    min_inliers: int = 10,
-    init: np.ndarray | None = None,
-) -> tuple[np.ndarray, float]:
-    """Point-to-point ICP using Open3D's tensor pipeline.
-
-    Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier
-    distance (m²) — same semantic as the previous SVD implementation, so
-    the GTSAM noise model in ``smooth_and_update`` keeps working.
-    """
-    if len(source) < min_inliers or len(target) < min_inliers:
-        return np.eye(4), float("inf")
-
-    cpu = o3c.Device("CPU:0")
-    src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu))
-    tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu))
-
-    # Normals on the target enable point-to-plane ICP, which converges
-    # tighter than point-to-point on indoor scenes (walls give unambiguous
-    # normals that resolve the slide-along-wall ambiguity).
-    tgt_pcd.estimate_normals(max_nn=30, radius=0.3)
-
-    init_T = (
-        o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu)
-        if init is not None
-        else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu)
-    )
-
-    # Silence Open3D's "0 correspondence" warning — we deliberately use a
-    # tight max_correspondence_distance and reject loops with poor fitness;
-    # the warning is informational, not an error.
-    with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error):
-        result = o3d.t.pipelines.registration.icp(
-            source=src_pcd,
-            target=tgt_pcd,
-            max_correspondence_distance=max_dist,
-            init_source_to_target=init_T,
-            estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(),
-            criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria(
-                relative_fitness=tol,
-                relative_rmse=tol,
-                max_iteration=max_iter,
+from scipy.spatial.transform import Rotation, Slerp
+
+from dimos.memory2.store.memory import MemoryStore
+from dimos.memory2.stream import Stream
+from dimos.memory2.type.observation import Observation
+from dimos.msgs.geometry_msgs.Quaternion import Quaternion
+from dimos.msgs.geometry_msgs.Transform import Transform
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
+
+T = TypeVar("T")
+
+
+@dataclass(frozen=True)
+class Keyframe:
+    ts: float
+    r_local: np.ndarray  # 3x3
+    t_local: np.ndarray  # (3,)
+    r_global: np.ndarray  # 3x3
+    t_global: np.ndarray  # (3,)
+
+
+def pgo_keyframes(
+    stream: Stream[PointCloud2],
+    *,
+    on_frame: Callable[[Any], None] | None = None,
+    **pgo_cfg: Any,
+) -> Stream[Keyframe]:
+    """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe."""
+    # Imported here to keep pgo_internals.py the only place that imports gtsam at module scope.
+    from dimos.mapping.relocalization.pgo_internals import PGOConfig, _SimplePGO
+
+    cfg = PGOConfig(**pgo_cfg)
+    pgo = _SimplePGO(cfg)
+
+    for obs in stream:
+        if on_frame is not None:
+            on_frame(obs)
+        if obs.pose is None:
+            continue
+        x, y, z, qx, qy, qz, qw = obs.pose
+        if x == 0 and y == 0 and z == 0:
+            continue
+        if qx == 0 and qy == 0 and qz == 0 and qw == 0:
+            continue
+        r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
+        t = np.array([x, y, z])
+        points, _ = obs.data.as_numpy()
+        if len(points) == 0:
+            continue
+        body_pts = (
+            (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3]
+        )
+        if pgo.add_key_pose(r, t, obs.ts, body_pts):
+            pgo.search_for_loops()
+            pgo.smooth_and_update()
+
+    kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp)
+    kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp]
+
+    mem = MemoryStore()
+    out: Stream[Keyframe] = mem.stream("keyframes", Keyframe)
+    for kp in kps:
+        out.append(
+            Keyframe(
+                ts=kp.timestamp,
+                r_local=np.ascontiguousarray(kp.r_local),
+                t_local=np.ascontiguousarray(kp.t_local),
+                r_global=np.ascontiguousarray(kp.r_global),
+                t_global=np.ascontiguousarray(kp.t_global),
             ),
+            ts=kp.timestamp,
         )
-
-    fitness_inlier_frac = float(result.fitness)
-    if fitness_inlier_frac == 0.0:
-        return np.eye(4), float("inf")
-
-    rmse = float(result.inlier_rmse)
-    T = result.transformation.numpy()
-    # Return mean squared inlier distance (m²) to match prior _icp contract.
-    return T, rmse * rmse
-
-
-def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray:
-    if len(pts) == 0 or voxel_size <= 0:
-        return pts
-    keys = np.floor(pts / voxel_size).astype(np.int32)
-    _, idx = np.unique(keys, axis=0, return_index=True)
-    return pts[idx]
-
-
-class _SimplePGO:
-    def __init__(self, config: PGOConfig) -> None:
-        self._cfg = config
-        self._key_poses: list[_KeyPose] = []
-        self._history_pairs: list[tuple[int, int]] = []
-        self._cache_pairs: list[dict[str, Any]] = []
-        self._r_offset = np.eye(3)
-        self._t_offset = np.zeros(3)
-
-        params = gtsam.ISAM2Params()
-        params.setRelinearizeThreshold(0.01)
-        params.relinearizeSkip = 1
-        self._isam2 = gtsam.ISAM2(params)
-        self._graph = gtsam.NonlinearFactorGraph()
-        self._values = gtsam.Values()
-
-    def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool:
-        if not self._key_poses:
-            return True
-        last = self._key_poses[-1]
-        delta_trans = np.linalg.norm(t - last.t_local)
-        # Angular distance via quaternion dot product
-        q_cur = Rotation.from_matrix(r).as_quat()  # [x,y,z,w]
-        q_last = Rotation.from_matrix(last.r_local).as_quat()
-        dot = abs(np.dot(q_cur, q_last))
-        delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0)))
-        return bool(
-            delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg
+    return out
+
+
+def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]:
+    """Per-keyframe drift correction as Transform(world_corrected <- world_raw)."""
+    mem = MemoryStore()
+    out: Stream[Transform] = mem.stream("corrections", Transform)
+    for obs in keyframes:
+        kp = obs.data
+        R_corr = kp.r_global @ kp.r_local.T
+        t_corr = kp.t_global - R_corr @ kp.t_local
+        tf = Transform(
+            translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])),
+            rotation=Quaternion.from_rotation_matrix(R_corr),
+            frame_id="world_corrected",
+            child_frame_id="world_raw",
+            ts=kp.ts,
         )
+        out.append(tf, ts=kp.ts)
+    return out
+
+
+def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]:
+    """Materialize corrections once; return a fast ts -> Transform lookup."""
+    ts_list: list[float] = []
+    R_list: list[np.ndarray] = []
+    t_list: list[np.ndarray] = []
+    for obs in corrections:
+        tf = obs.data
+        ts_list.append(tf.ts)
+        q = tf.rotation
+        R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix())
+        t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z]))
+
+    if not ts_list:
+        raise ValueError("empty corrections stream")
+
+    if len(ts_list) == 1:
+        only_ts = ts_list[0]
+        only_R = R_list[0]
+        only_t = t_list[0]
+
+        def _const(ts: float) -> Transform:
+            return Transform(
+                translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])),
+                rotation=Quaternion.from_rotation_matrix(only_R),
+                frame_id="world_corrected",
+                child_frame_id="world_raw",
+                ts=ts if ts is not None else only_ts,
+            )
 
-    def add_key_pose(
-        self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray
-    ) -> bool:
-        if not self.is_key_pose(r_local, t_local):
-            return False
-
-        idx = len(self._key_poses)
-        init_r = self._r_offset @ r_local
-        init_t = self._r_offset @ t_local + self._t_offset
+        return _const
 
-        pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t))
-        self._values.insert(idx, pose)
+    ts_arr = np.array(ts_list)
+    R_stack = np.stack(R_list)
+    t_stack = np.stack(t_list)
+    slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack))
 
+    def interp(ts: float) -> Transform:
+        ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1]))
+        R = slerp([ts_clip])[0].as_matrix()
+        idx = int(np.searchsorted(ts_arr, ts_clip))
         if idx == 0:
-            noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12))
-            self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise))
+            t = t_stack[0]
+        elif idx >= len(ts_arr):
+            t = t_stack[-1]
         else:
-            last = self._key_poses[-1]
-            r_between = last.r_local.T @ r_local
-            t_between = last.r_local.T @ (t_local - last.t_local)
-            noise = gtsam.noiseModel.Diagonal.Variances(
-                np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6])
-            )
-            self._graph.add(
-                gtsam.BetweenFactorPose3(
-                    idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise
-                )
-            )
-
-        kp = _KeyPose(
-            r_local=r_local.copy(),
-            t_local=t_local.copy(),
-            r_global=init_r.copy(),
-            t_global=init_t.copy(),
-            timestamp=timestamp,
-            body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution),
-        )
-        self._key_poses.append(kp)
-        return True
-
-    def _get_submap(self, idx: int, half_range: int) -> np.ndarray:
-        lo = max(0, idx - half_range)
-        hi = min(len(self._key_poses) - 1, idx + half_range)
-        parts = []
-        for i in range(lo, hi + 1):
-            kp = self._key_poses[i]
-            world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global
-            parts.append(world)
-        if not parts:
-            return np.empty((0, 3))
-        cloud = np.vstack(parts)
-        return _voxel_downsample(cloud, self._cfg.submap_resolution)
-
-    def search_for_loops(self) -> None:
-        if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search:
-            return
-
-        # Rate limit
-        if self._history_pairs:
-            cur_time = self._key_poses[-1].timestamp
-            last_time = self._key_poses[self._history_pairs[-1][1]].timestamp
-            if cur_time - last_time < self._cfg.min_loop_detect_duration:
-                return
-
-        cur_idx = len(self._key_poses) - 1
-        cur_kp = self._key_poses[-1]
-
-        # Build KD-tree of previous keyframe positions
-        positions = np.array([kp.t_global for kp in self._key_poses[:-1]])
-        tree = KDTree(positions)
-
-        idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius)
-        if not idxs:
-            return
-
-        # Pick the spatially closest keyframe that's also old enough in time.
-        # query_ball_point doesn't sort, so we sort by distance ourselves.
-        candidates = [
-            (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i)
-            for i in idxs
-            if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh
-        ]
-        if not candidates:
-            return
-        candidates.sort()
-        loop_idx = candidates[0][1]
-
-        # ICP verification
-        target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range)
-        source = self._get_submap(cur_idx, 0)
-
-        transform, fitness = _icp(
-            source,
-            target,
-            max_iter=self._cfg.max_icp_iterations,
-            max_dist=self._cfg.max_icp_correspondence_dist,
-            min_inliers=self._cfg.min_icp_inliers,
-        )
-        if fitness > self._cfg.loop_score_thresh:
-            return
-
-        # Compute relative pose
-        R_icp = transform[:3, :3]
-        t_icp = transform[:3, 3]
-        r_refined = R_icp @ cur_kp.r_global
-        t_refined = R_icp @ cur_kp.t_global + t_icp
-        r_offset = self._key_poses[loop_idx].r_global.T @ r_refined
-        t_offset = self._key_poses[loop_idx].r_global.T @ (
-            t_refined - self._key_poses[loop_idx].t_global
+            t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx]
+            alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0
+            t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx]
+        return Transform(
+            translation=Vector3(float(t[0]), float(t[1]), float(t[2])),
+            rotation=Quaternion.from_rotation_matrix(R),
+            frame_id="world_corrected",
+            child_frame_id="world_raw",
+            ts=float(ts),
         )
 
-        self._cache_pairs.append(
-            {
-                "source": cur_idx,
-                "target": loop_idx,
-                "r_offset": r_offset,
-                "t_offset": t_offset,
-                "score": fitness,
-            }
-        )
-        self._history_pairs.append((loop_idx, cur_idx))
-        logger.info(
-            "Loop closure detected",
-            source=cur_idx,
-            target=loop_idx,
-            score=round(fitness, 4),
-        )
+    return interp
 
-    def smooth_and_update(self) -> None:
-        has_loop = bool(self._cache_pairs)
-
-        for pair in self._cache_pairs:
-            # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves
-            # have different units (rad² vs m²), so a uniform variance —
-            # the original behaviour — silently makes one half pathological
-            # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of
-            # those is too tight, one is too loose, depending on the loop).
-            # Use ICP fitness as the *translation* variance and a
-            # generous fixed rotation variance — loops shouldn't be
-            # trusted to fix rotation tightly without normals + p2plane.
-            trans_var = max(0.01, float(pair["score"]))  # ≥ σ_trans = 10 cm
-            rot_var = 0.05  # σ_rot ≈ 13°
-            noise = gtsam.noiseModel.Diagonal.Variances(
-                np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var])
-            )
-            self._graph.add(
-                gtsam.BetweenFactorPose3(
-                    pair["target"],
-                    pair["source"],
-                    gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])),
-                    noise,
-                )
+
+def correction_at(corrections: Stream[Transform], ts: float) -> Transform:
+    """One-off lookup. For hot paths build `make_interpolator` once and reuse."""
+    return make_interpolator(corrections)(ts)
+
+
+def apply_corrections(
+    stream: Stream[T],
+    corrections: Stream[Transform],
+) -> Stream[T]:
+    """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts.
+
+    `obs.data` is untouched. Frames with `obs.pose is None` pass through
+    unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped).
+    """
+    interp = make_interpolator(corrections)
+
+    def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
+        for obs in upstream:
+            if obs.pose is None:
+                yield obs
+                continue
+            x, y, z, qx, qy, qz, qw = obs.pose
+            tf = interp(obs.ts)
+            R_corr = Rotation.from_quat(
+                [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w]
+            ).as_matrix()
+            t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
+            R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
+            t_raw = np.array([x, y, z])
+            R_new = R_corr @ R_raw
+            t_new = R_corr @ t_raw + t_corr
+            q_new = Rotation.from_matrix(R_new).as_quat()  # xyzw
+            new_pose = (
+                float(t_new[0]),
+                float(t_new[1]),
+                float(t_new[2]),
+                float(q_new[0]),
+                float(q_new[1]),
+                float(q_new[2]),
+                float(q_new[3]),
             )
-        self._cache_pairs.clear()
-
-        self._isam2.update(self._graph, self._values)
-        self._isam2.update()
-        if has_loop:
-            for _ in range(self._cfg.loop_closure_extra_iterations):
-                self._isam2.update()
-        self._graph = gtsam.NonlinearFactorGraph()
-        self._values = gtsam.Values()
-
-        estimates = self._isam2.calculateBestEstimate()
-        for i in range(len(self._key_poses)):
-            pose = estimates.atPose3(i)
-            self._key_poses[i].r_global = pose.rotation().matrix()
-            self._key_poses[i].t_global = pose.translation()
-
-        last = self._key_poses[-1]
-        self._r_offset = last.r_global @ last.r_local.T
-        self._t_offset = last.t_global - self._r_offset @ last.t_local
-
-    def get_corrected_pose(
-        self, r_local: np.ndarray, t_local: np.ndarray
-    ) -> tuple[np.ndarray, np.ndarray]:
-        return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset
-
-    def build_global_map(self, voxel_size: float) -> np.ndarray:
-        if not self._key_poses:
-            return np.empty((0, 3), dtype=np.float32)
-        parts = []
-        for kp in self._key_poses:
-            world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global
-            parts.append(world)
-        cloud = np.vstack(parts).astype(np.float32)
-        return _voxel_downsample(cloud, voxel_size)
-
-    @property
-    def num_key_poses(self) -> int:
-        return len(self._key_poses)
+            yield obs.derive(data=obs.data, pose=new_pose)
+
+    return stream.transform(xf)
diff --git a/dimos/mapping/relocalization/pgo2.py b/dimos/mapping/relocalization/pgo2.py
deleted file mode 100644
index 3aec9cb0eb..0000000000
--- a/dimos/mapping/relocalization/pgo2.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright 2026 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""PGO drift corrections as composable Stream stages.
-
-Pipeline:
-
-    lidar: Stream[PointCloud2]
-        -> pgo_keyframes(...)            -> Stream[Keyframe]
-        -> keyframes_to_corrections(...) -> Stream[Transform]   (world_corrected <- world_raw)
-        -> apply_corrections(any_stream, corrections) -> Stream[T]   (obs.pose shuffled)
-
-The math: per keyframe, the drift correction is
-    R_corr = R_global @ R_local.T
-    t_corr = t_global - R_corr @ t_local
-and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t,
-clipping out-of-range to endpoints.
-"""
-
-from __future__ import annotations
-
-from collections.abc import Callable, Iterator
-from dataclasses import dataclass
-from typing import Any, TypeVar
-
-import numpy as np
-from scipy.spatial.transform import Rotation, Slerp
-
-from dimos.memory2.store.memory import MemoryStore
-from dimos.memory2.stream import Stream
-from dimos.memory2.type.observation import Observation
-from dimos.msgs.geometry_msgs.Quaternion import Quaternion
-from dimos.msgs.geometry_msgs.Transform import Transform
-from dimos.msgs.geometry_msgs.Vector3 import Vector3
-from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
-
-T = TypeVar("T")
-
-
-@dataclass(frozen=True)
-class Keyframe:
-    ts: float
-    r_local: np.ndarray  # 3x3
-    t_local: np.ndarray  # (3,)
-    r_global: np.ndarray  # 3x3
-    t_global: np.ndarray  # (3,)
-
-
-def pgo_keyframes(
-    stream: Stream[PointCloud2],
-    *,
-    on_frame: Callable[[Any], None] | None = None,
-    **pgo_cfg: Any,
-) -> Stream[Keyframe]:
-    """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe."""
-    # Imported here to keep pgo.py the only place that imports gtsam at module scope.
-    from dimos.mapping.relocalization.pgo import PGOConfig, _SimplePGO
-
-    cfg = PGOConfig(**pgo_cfg)
-    pgo = _SimplePGO(cfg)
-
-    for obs in stream:
-        if on_frame is not None:
-            on_frame(obs)
-        if obs.pose is None:
-            continue
-        x, y, z, qx, qy, qz, qw = obs.pose
-        if x == 0 and y == 0 and z == 0:
-            continue
-        if qx == 0 and qy == 0 and qz == 0 and qw == 0:
-            continue
-        r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
-        t = np.array([x, y, z])
-        points, _ = obs.data.as_numpy()
-        if len(points) == 0:
-            continue
-        body_pts = (
-            (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3]
-        )
-        if pgo.add_key_pose(r, t, obs.ts, body_pts):
-            pgo.search_for_loops()
-            pgo.smooth_and_update()
-
-    kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp)
-    kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp]
-
-    mem = MemoryStore()
-    out: Stream[Keyframe] = mem.stream("keyframes", Keyframe)
-    for kp in kps:
-        out.append(
-            Keyframe(
-                ts=kp.timestamp,
-                r_local=np.ascontiguousarray(kp.r_local),
-                t_local=np.ascontiguousarray(kp.t_local),
-                r_global=np.ascontiguousarray(kp.r_global),
-                t_global=np.ascontiguousarray(kp.t_global),
-            ),
-            ts=kp.timestamp,
-        )
-    return out
-
-
-def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]:
-    """Per-keyframe drift correction as Transform(world_corrected <- world_raw)."""
-    mem = MemoryStore()
-    out: Stream[Transform] = mem.stream("corrections", Transform)
-    for obs in keyframes:
-        kp = obs.data
-        R_corr = kp.r_global @ kp.r_local.T
-        t_corr = kp.t_global - R_corr @ kp.t_local
-        tf = Transform(
-            translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])),
-            rotation=Quaternion.from_rotation_matrix(R_corr),
-            frame_id="world_corrected",
-            child_frame_id="world_raw",
-            ts=kp.ts,
-        )
-        out.append(tf, ts=kp.ts)
-    return out
-
-
-def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]:
-    """Materialize corrections once; return a fast ts -> Transform lookup."""
-    ts_list: list[float] = []
-    R_list: list[np.ndarray] = []
-    t_list: list[np.ndarray] = []
-    for obs in corrections:
-        tf = obs.data
-        ts_list.append(tf.ts)
-        q = tf.rotation
-        R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix())
-        t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z]))
-
-    if not ts_list:
-        raise ValueError("empty corrections stream")
-
-    if len(ts_list) == 1:
-        only_ts = ts_list[0]
-        only_R = R_list[0]
-        only_t = t_list[0]
-
-        def _const(ts: float) -> Transform:
-            return Transform(
-                translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])),
-                rotation=Quaternion.from_rotation_matrix(only_R),
-                frame_id="world_corrected",
-                child_frame_id="world_raw",
-                ts=ts if ts is not None else only_ts,
-            )
-
-        return _const
-
-    ts_arr = np.array(ts_list)
-    R_stack = np.stack(R_list)
-    t_stack = np.stack(t_list)
-    slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack))
-
-    def interp(ts: float) -> Transform:
-        ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1]))
-        R = slerp([ts_clip])[0].as_matrix()
-        idx = int(np.searchsorted(ts_arr, ts_clip))
-        if idx == 0:
-            t = t_stack[0]
-        elif idx >= len(ts_arr):
-            t = t_stack[-1]
-        else:
-            t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx]
-            alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0
-            t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx]
-        return Transform(
-            translation=Vector3(float(t[0]), float(t[1]), float(t[2])),
-            rotation=Quaternion.from_rotation_matrix(R),
-            frame_id="world_corrected",
-            child_frame_id="world_raw",
-            ts=float(ts),
-        )
-
-    return interp
-
-
-def correction_at(corrections: Stream[Transform], ts: float) -> Transform:
-    """One-off lookup. For hot paths build `make_interpolator` once and reuse."""
-    return make_interpolator(corrections)(ts)
-
-
-def apply_corrections(
-    stream: Stream[T],
-    corrections: Stream[Transform],
-) -> Stream[T]:
-    """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts.
-
-    `obs.data` is untouched. Frames with `obs.pose is None` pass through
-    unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped).
-    """
-    interp = make_interpolator(corrections)
-
-    def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
-        for obs in upstream:
-            if obs.pose is None:
-                yield obs
-                continue
-            x, y, z, qx, qy, qz, qw = obs.pose
-            tf = interp(obs.ts)
-            R_corr = Rotation.from_quat(
-                [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w]
-            ).as_matrix()
-            t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
-            R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
-            t_raw = np.array([x, y, z])
-            R_new = R_corr @ R_raw
-            t_new = R_corr @ t_raw + t_corr
-            q_new = Rotation.from_matrix(R_new).as_quat()  # xyzw
-            new_pose = (
-                float(t_new[0]),
-                float(t_new[1]),
-                float(t_new[2]),
-                float(q_new[0]),
-                float(q_new[1]),
-                float(q_new[2]),
-                float(q_new[3]),
-            )
-            yield obs.derive(data=obs.data, pose=new_pose)
-
-    return stream.transform(xf)
diff --git a/dimos/mapping/relocalization/pgo_internals.py b/dimos/mapping/relocalization/pgo_internals.py
new file mode 100644
index 0000000000..c3465d8ee5
--- /dev/null
+++ b/dimos/mapping/relocalization/pgo_internals.py
@@ -0,0 +1,369 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use
+# the Stream-shaped API in pgo.py — pgo_keyframes, keyframes_to_corrections,
+# make_interpolator, apply_corrections. The classes here (_SimplePGO,
+# PGOConfig, _KeyPose) are imported from pgo to do the heavy lifting.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+import gtsam  # type: ignore[import-not-found,import-untyped]
+import numpy as np
+import open3d as o3d  # type: ignore[import-untyped]
+import open3d.core as o3c  # type: ignore[import-untyped]
+from scipy.spatial import KDTree
+from scipy.spatial.transform import Rotation
+
+from dimos.core.module import ModuleConfig
+from dimos.utils.logging_config import setup_logger
+
+FRAME_MAP = "world"
+
+logger = setup_logger()
+
+
+class PGOConfig(ModuleConfig):
+    world_frame: str = FRAME_MAP
+
+    # Keyframe detection
+    key_pose_delta_trans: float = 0.5
+    key_pose_delta_deg: float = 10.0
+
+    # Loop closure
+    loop_search_radius: float = 2.0
+    loop_time_thresh: float = 20.0
+    loop_score_thresh: float = 0.3
+    loop_submap_half_range: int = 10
+    min_icp_inliers: int = 10
+    min_keyframes_for_loop_search: int = 10
+    loop_closure_extra_iterations: int = 4
+    submap_resolution: float = 0.2
+    min_loop_detect_duration: float = 5.0
+
+    # Input mode
+    unregister_input: bool = True  # Transform world-frame scans to body-frame using odom
+
+    # Global map
+    publish_global_map: bool = True
+    global_map_publish_rate: float = 0.5
+    global_map_voxel_size: float = 0.15
+
+    # ICP
+    max_icp_iterations: int = 50
+    max_icp_correspondence_dist: float = 1.0
+
+
+@dataclass
+class _KeyPose:
+    r_local: np.ndarray  # 3x3 rotation in local/odom frame
+    t_local: np.ndarray  # 3-vec translation in local/odom frame
+    r_global: np.ndarray  # 3x3 corrected rotation
+    t_global: np.ndarray  # 3-vec corrected translation
+    timestamp: float
+    body_cloud: np.ndarray  # Nx3 points in body frame
+
+
+def _icp(
+    source: np.ndarray,
+    target: np.ndarray,
+    max_iter: int = 50,
+    max_dist: float = 1.0,
+    tol: float = 1e-6,
+    min_inliers: int = 10,
+    init: np.ndarray | None = None,
+) -> tuple[np.ndarray, float]:
+    """Point-to-point ICP using Open3D's tensor pipeline.
+
+    Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier
+    distance (m²) — same semantic as the previous SVD implementation, so
+    the GTSAM noise model in ``smooth_and_update`` keeps working.
+    """
+    if len(source) < min_inliers or len(target) < min_inliers:
+        return np.eye(4), float("inf")
+
+    cpu = o3c.Device("CPU:0")
+    src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu))
+    tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu))
+
+    # Normals on the target enable point-to-plane ICP, which converges
+    # tighter than point-to-point on indoor scenes (walls give unambiguous
+    # normals that resolve the slide-along-wall ambiguity).
+    tgt_pcd.estimate_normals(max_nn=30, radius=0.3)
+
+    init_T = (
+        o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu)
+        if init is not None
+        else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu)
+    )
+
+    # Silence Open3D's "0 correspondence" warning — we deliberately use a
+    # tight max_correspondence_distance and reject loops with poor fitness;
+    # the warning is informational, not an error.
+    with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error):
+        result = o3d.t.pipelines.registration.icp(
+            source=src_pcd,
+            target=tgt_pcd,
+            max_correspondence_distance=max_dist,
+            init_source_to_target=init_T,
+            estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(),
+            criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria(
+                relative_fitness=tol,
+                relative_rmse=tol,
+                max_iteration=max_iter,
+            ),
+        )
+
+    fitness_inlier_frac = float(result.fitness)
+    if fitness_inlier_frac == 0.0:
+        return np.eye(4), float("inf")
+
+    rmse = float(result.inlier_rmse)
+    T = result.transformation.numpy()
+    # Return mean squared inlier distance (m²) to match prior _icp contract.
+    return T, rmse * rmse
+
+
+def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray:
+    if len(pts) == 0 or voxel_size <= 0:
+        return pts
+    keys = np.floor(pts / voxel_size).astype(np.int32)
+    _, idx = np.unique(keys, axis=0, return_index=True)
+    return pts[idx]
+
+
+class _SimplePGO:
+    def __init__(self, config: PGOConfig) -> None:
+        self._cfg = config
+        self._key_poses: list[_KeyPose] = []
+        self._history_pairs: list[tuple[int, int]] = []
+        self._cache_pairs: list[dict[str, Any]] = []
+        self._r_offset = np.eye(3)
+        self._t_offset = np.zeros(3)
+
+        params = gtsam.ISAM2Params()
+        params.setRelinearizeThreshold(0.01)
+        params.relinearizeSkip = 1
+        self._isam2 = gtsam.ISAM2(params)
+        self._graph = gtsam.NonlinearFactorGraph()
+        self._values = gtsam.Values()
+
+    def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool:
+        if not self._key_poses:
+            return True
+        last = self._key_poses[-1]
+        delta_trans = np.linalg.norm(t - last.t_local)
+        # Angular distance via quaternion dot product
+        q_cur = Rotation.from_matrix(r).as_quat()  # [x,y,z,w]
+        q_last = Rotation.from_matrix(last.r_local).as_quat()
+        dot = abs(np.dot(q_cur, q_last))
+        delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0)))
+        return bool(
+            delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg
+        )
+
+    def add_key_pose(
+        self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray
+    ) -> bool:
+        if not self.is_key_pose(r_local, t_local):
+            return False
+
+        idx = len(self._key_poses)
+        init_r = self._r_offset @ r_local
+        init_t = self._r_offset @ t_local + self._t_offset
+
+        pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t))
+        self._values.insert(idx, pose)
+
+        if idx == 0:
+            noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12))
+            self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise))
+        else:
+            last = self._key_poses[-1]
+            r_between = last.r_local.T @ r_local
+            t_between = last.r_local.T @ (t_local - last.t_local)
+            noise = gtsam.noiseModel.Diagonal.Variances(
+                np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6])
+            )
+            self._graph.add(
+                gtsam.BetweenFactorPose3(
+                    idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise
+                )
+            )
+
+        kp = _KeyPose(
+            r_local=r_local.copy(),
+            t_local=t_local.copy(),
+            r_global=init_r.copy(),
+            t_global=init_t.copy(),
+            timestamp=timestamp,
+            body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution),
+        )
+        self._key_poses.append(kp)
+        return True
+
+    def _get_submap(self, idx: int, half_range: int) -> np.ndarray:
+        lo = max(0, idx - half_range)
+        hi = min(len(self._key_poses) - 1, idx + half_range)
+        parts = []
+        for i in range(lo, hi + 1):
+            kp = self._key_poses[i]
+            world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global
+            parts.append(world)
+        if not parts:
+            return np.empty((0, 3))
+        cloud = np.vstack(parts)
+        return _voxel_downsample(cloud, self._cfg.submap_resolution)
+
+    def search_for_loops(self) -> None:
+        if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search:
+            return
+
+        # Rate limit
+        if self._history_pairs:
+            cur_time = self._key_poses[-1].timestamp
+            last_time = self._key_poses[self._history_pairs[-1][1]].timestamp
+            if cur_time - last_time < self._cfg.min_loop_detect_duration:
+                return
+
+        cur_idx = len(self._key_poses) - 1
+        cur_kp = self._key_poses[-1]
+
+        # Build KD-tree of previous keyframe positions
+        positions = np.array([kp.t_global for kp in self._key_poses[:-1]])
+        tree = KDTree(positions)
+
+        idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius)
+        if not idxs:
+            return
+
+        # Pick the spatially closest keyframe that's also old enough in time.
+        # query_ball_point doesn't sort, so we sort by distance ourselves.
+        candidates = [
+            (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i)
+            for i in idxs
+            if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh
+        ]
+        if not candidates:
+            return
+        candidates.sort()
+        loop_idx = candidates[0][1]
+
+        # ICP verification
+        target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range)
+        source = self._get_submap(cur_idx, 0)
+
+        transform, fitness = _icp(
+            source,
+            target,
+            max_iter=self._cfg.max_icp_iterations,
+            max_dist=self._cfg.max_icp_correspondence_dist,
+            min_inliers=self._cfg.min_icp_inliers,
+        )
+        if fitness > self._cfg.loop_score_thresh:
+            return
+
+        # Compute relative pose
+        R_icp = transform[:3, :3]
+        t_icp = transform[:3, 3]
+        r_refined = R_icp @ cur_kp.r_global
+        t_refined = R_icp @ cur_kp.t_global + t_icp
+        r_offset = self._key_poses[loop_idx].r_global.T @ r_refined
+        t_offset = self._key_poses[loop_idx].r_global.T @ (
+            t_refined - self._key_poses[loop_idx].t_global
+        )
+
+        self._cache_pairs.append(
+            {
+                "source": cur_idx,
+                "target": loop_idx,
+                "r_offset": r_offset,
+                "t_offset": t_offset,
+                "score": fitness,
+            }
+        )
+        self._history_pairs.append((loop_idx, cur_idx))
+        logger.info(
+            "Loop closure detected",
+            source=cur_idx,
+            target=loop_idx,
+            score=round(fitness, 4),
+        )
+
+    def smooth_and_update(self) -> None:
+        has_loop = bool(self._cache_pairs)
+
+        for pair in self._cache_pairs:
+            # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves
+            # have different units (rad² vs m²), so a uniform variance —
+            # the original behaviour — silently makes one half pathological
+            # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of
+            # those is too tight, one is too loose, depending on the loop).
+            # Use ICP fitness as the *translation* variance and a
+            # generous fixed rotation variance — loops shouldn't be
+            # trusted to fix rotation tightly without normals + p2plane.
+            trans_var = max(0.01, float(pair["score"]))  # ≥ σ_trans = 10 cm
+            rot_var = 0.05  # σ_rot ≈ 13°
+            noise = gtsam.noiseModel.Diagonal.Variances(
+                np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var])
+            )
+            self._graph.add(
+                gtsam.BetweenFactorPose3(
+                    pair["target"],
+                    pair["source"],
+                    gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])),
+                    noise,
+                )
+            )
+        self._cache_pairs.clear()
+
+        self._isam2.update(self._graph, self._values)
+        self._isam2.update()
+        if has_loop:
+            for _ in range(self._cfg.loop_closure_extra_iterations):
+                self._isam2.update()
+        self._graph = gtsam.NonlinearFactorGraph()
+        self._values = gtsam.Values()
+
+        estimates = self._isam2.calculateBestEstimate()
+        for i in range(len(self._key_poses)):
+            pose = estimates.atPose3(i)
+            self._key_poses[i].r_global = pose.rotation().matrix()
+            self._key_poses[i].t_global = pose.translation()
+
+        last = self._key_poses[-1]
+        self._r_offset = last.r_global @ last.r_local.T
+        self._t_offset = last.t_global - self._r_offset @ last.t_local
+
+    def get_corrected_pose(
+        self, r_local: np.ndarray, t_local: np.ndarray
+    ) -> tuple[np.ndarray, np.ndarray]:
+        return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset
+
+    def build_global_map(self, voxel_size: float) -> np.ndarray:
+        if not self._key_poses:
+            return np.empty((0, 3), dtype=np.float32)
+        parts = []
+        for kp in self._key_poses:
+            world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global
+            parts.append(world)
+        cloud = np.vstack(parts).astype(np.float32)
+        return _voxel_downsample(cloud, voxel_size)
+
+    @property
+    def num_key_poses(self) -> int:
+        return len(self._key_poses)
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index dc8f934347..645844b735 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -98,7 +98,7 @@ def collect_path(obs):
         import numpy as np
         from scipy.spatial.transform import Rotation
 
-        from dimos.mapping.relocalization.pgo2 import (
+        from dimos.mapping.relocalization.pgo import (
             keyframes_to_corrections,
             make_interpolator,
             pgo_keyframes,

From 25cf609bdc4dd56e8e207c37744507b49ccef86b Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 19:58:19 +0800
Subject: [PATCH 07/36] pgo rewrite

---
 dimos/mapping/relocalization/pgo.py           | 558 +++++++++++++++---
 dimos/mapping/relocalization/pgo_internals.py | 369 ------------
 dimos/memory2/type/observation.py             |  38 +-
 dimos/utils/cli/map.py                        |   6 +-
 4 files changed, 502 insertions(+), 469 deletions(-)
 delete mode 100644 dimos/mapping/relocalization/pgo_internals.py

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index 653c7158d2..e006cc1b7b 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -26,15 +26,20 @@
     t_corr = t_global - R_corr @ t_local
 and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t,
 clipping out-of-range to endpoints.
+
+`gtsam` and `open3d` are imported lazily inside hot helpers so importing this module
+stays cheap and gtsam-free for consumers that only need `Keyframe` / `apply_corrections`.
 """
 
 from __future__ import annotations
 
 from collections.abc import Callable, Iterator
 from dataclasses import dataclass
-from typing import Any, TypeVar
+from typing import TYPE_CHECKING, Any, TypeVar
 
 import numpy as np
+import open3d as o3d  # type: ignore[import-untyped]
+import open3d.core as o3c  # type: ignore[import-untyped]
 from scipy.spatial.transform import Rotation, Slerp
 
 from dimos.memory2.store.memory import MemoryStore
@@ -44,17 +49,53 @@
 from dimos.msgs.geometry_msgs.Transform import Transform
 from dimos.msgs.geometry_msgs.Vector3 import Vector3
 from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
+from dimos.protocol.service.spec import BaseConfig
+from dimos.utils.logging_config import setup_logger
+
+if TYPE_CHECKING:
+    import gtsam  # type: ignore[import-not-found,import-untyped]
 
 T = TypeVar("T")
 
+FRAME_WORLD_CORRECTED = "world_corrected"
+FRAME_WORLD_RAW = "world_raw"
+FRAME_BODY = "body"
+
+logger = setup_logger()
+
+
+class PGOConfig(BaseConfig):
+    # Keyframe detection
+    key_pose_delta_trans: float = 0.5
+    key_pose_delta_deg: float = 10.0
+
+    # Loop closure
+    loop_search_radius: float = 2.0
+    loop_time_thresh: float = 20.0
+    loop_score_thresh: float = 0.3
+    loop_submap_half_range: int = 10
+    min_icp_inliers: int = 10
+    min_keyframes_for_loop_search: int = 10
+    loop_closure_extra_iterations: int = 4
+    submap_resolution: float = 0.2
+    min_loop_detect_duration: float = 5.0
+
+    # ICP
+    max_icp_iterations: int = 50
+    max_icp_correspondence_dist: float = 1.0
+
 
 @dataclass(frozen=True)
 class Keyframe:
+    """Keyframe emitted by `pgo_keyframes`.
+
+    `local` is the body pose in the odom (`world_raw`) frame, `optimized` is
+    the body pose in the drift-corrected (`world_corrected`) frame.
+    """
+
     ts: float
-    r_local: np.ndarray  # 3x3
-    t_local: np.ndarray  # (3,)
-    r_global: np.ndarray  # 3x3
-    t_global: np.ndarray  # (3,)
+    local: Transform
+    optimized: Transform
 
 
 def pgo_keyframes(
@@ -63,51 +104,32 @@ def pgo_keyframes(
     on_frame: Callable[[Any], None] | None = None,
     **pgo_cfg: Any,
 ) -> Stream[Keyframe]:
-    """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe."""
-    # Imported here to keep pgo_internals.py the only place that imports gtsam at module scope.
-    from dimos.mapping.relocalization.pgo_internals import PGOConfig, _SimplePGO
-
+    """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe."""
     cfg = PGOConfig(**pgo_cfg)
-    pgo = _SimplePGO(cfg)
+    pgo = _PGO(cfg)
 
     for obs in stream:
         if on_frame is not None:
             on_frame(obs)
         if obs.pose is None:
             continue
-        x, y, z, qx, qy, qz, qw = obs.pose
-        if x == 0 and y == 0 and z == 0:
-            continue
-        if qx == 0 and qy == 0 and qz == 0 and qw == 0:
+        # Skip placeholder poses (origin position OR zero quaternion).
+        if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0:
             continue
-        r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
-        t = np.array([x, y, z])
-        points, _ = obs.data.as_numpy()
-        if len(points) == 0:
+        if (
+            obs.pose[3] == 0
+            and obs.pose[4] == 0
+            and obs.pose[5] == 0
+            and (obs.pose[6] == 0 or obs.pose[6] == 1)
+        ):
             continue
-        body_pts = (
-            (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3]
-        )
-        if pgo.add_key_pose(r, t, obs.ts, body_pts):
-            pgo.search_for_loops()
-            pgo.smooth_and_update()
-
-    kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp)
-    kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp]
+        local_pose = _obs_to_pose3(obs)
+        pgo.process(local_pose, obs.ts, obs.data)
 
     mem = MemoryStore()
     out: Stream[Keyframe] = mem.stream("keyframes", Keyframe)
-    for kp in kps:
-        out.append(
-            Keyframe(
-                ts=kp.timestamp,
-                r_local=np.ascontiguousarray(kp.r_local),
-                t_local=np.ascontiguousarray(kp.t_local),
-                r_global=np.ascontiguousarray(kp.r_global),
-                t_global=np.ascontiguousarray(kp.t_global),
-            ),
-            ts=kp.timestamp,
-        )
+    for kf in pgo.finalize():
+        out.append(kf, ts=kf.ts)
     return out
 
 
@@ -116,17 +138,10 @@ def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]:
     mem = MemoryStore()
     out: Stream[Transform] = mem.stream("corrections", Transform)
     for obs in keyframes:
-        kp = obs.data
-        R_corr = kp.r_global @ kp.r_local.T
-        t_corr = kp.t_global - R_corr @ kp.t_local
-        tf = Transform(
-            translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])),
-            rotation=Quaternion.from_rotation_matrix(R_corr),
-            frame_id="world_corrected",
-            child_frame_id="world_raw",
-            ts=kp.ts,
-        )
-        out.append(tf, ts=kp.ts)
+        kf = obs.data
+        # world_corrected <- body <- world_raw composes to world_corrected <- world_raw.
+        drift = kf.optimized + kf.local.inverse()
+        out.append(drift, ts=kf.ts)
     return out
 
 
@@ -136,30 +151,22 @@ def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Trans
     R_list: list[np.ndarray] = []
     t_list: list[np.ndarray] = []
     for obs in corrections:
-        tf = obs.data
-        ts_list.append(tf.ts)
-        q = tf.rotation
-        R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix())
-        t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z]))
+        R, t = _r_t_from_transform(obs.data)
+        # obs.ts is authoritative; obs.data.ts can be mutated by Transform's
+        # ts=0.0 -> time.time() fallback in its constructor.
+        ts_list.append(obs.ts)
+        R_list.append(R)
+        t_list.append(t)
 
     if not ts_list:
         raise ValueError("empty corrections stream")
 
+    # Slerp needs ≥2 keyframes. Pad len==1 with a duplicate so the
+    # general path handles it; clip-to-endpoints behavior is unchanged.
     if len(ts_list) == 1:
-        only_ts = ts_list[0]
-        only_R = R_list[0]
-        only_t = t_list[0]
-
-        def _const(ts: float) -> Transform:
-            return Transform(
-                translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])),
-                rotation=Quaternion.from_rotation_matrix(only_R),
-                frame_id="world_corrected",
-                child_frame_id="world_raw",
-                ts=ts if ts is not None else only_ts,
-            )
-
-        return _const
+        ts_list.append(ts_list[0] + 1e-6)
+        R_list.append(R_list[0])
+        t_list.append(t_list[0])
 
     ts_arr = np.array(ts_list)
     R_stack = np.stack(R_list)
@@ -178,13 +185,7 @@ def interp(ts: float) -> Transform:
             t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx]
             alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0
             t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx]
-        return Transform(
-            translation=Vector3(float(t[0]), float(t[1]), float(t[2])),
-            rotation=Quaternion.from_rotation_matrix(R),
-            frame_id="world_corrected",
-            child_frame_id="world_raw",
-            ts=float(ts),
-        )
+        return _transform_from_r_t(R, t, ts=float(ts))
 
     return interp
 
@@ -210,26 +211,391 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
             if obs.pose is None:
                 yield obs
                 continue
-            x, y, z, qx, qy, qz, qw = obs.pose
-            tf = interp(obs.ts)
-            R_corr = Rotation.from_quat(
-                [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w]
-            ).as_matrix()
-            t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
-            R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix()
-            t_raw = np.array([x, y, z])
-            R_new = R_corr @ R_raw
-            t_new = R_corr @ t_raw + t_corr
-            q_new = Rotation.from_matrix(R_new).as_quat()  # xyzw
-            new_pose = (
-                float(t_new[0]),
-                float(t_new[1]),
-                float(t_new[2]),
-                float(q_new[0]),
-                float(q_new[1]),
-                float(q_new[2]),
-                float(q_new[3]),
-            )
-            yield obs.derive(data=obs.data, pose=new_pose)
+            raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped)
+            # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw.
+            # Observation normalizes Transform back to 7-tuple via __post_init__.
+            corrected = interp(obs.ts) + raw_tf
+            yield obs.derive(data=obs.data, pose=corrected)
 
     return stream.transform(xf)
+
+
+# ---------------------------------------------------------------------------
+# Transform <-> (R, t) and obs.pose helpers (private)
+# ---------------------------------------------------------------------------
+
+
+def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]:
+    q = tf.rotation
+    R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()
+    t = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
+    return R, t
+
+
+def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform:
+    return Transform(
+        translation=Vector3(float(t[0]), float(t[1]), float(t[2])),
+        rotation=Quaternion.from_rotation_matrix(R),
+        frame_id=FRAME_WORLD_CORRECTED,
+        child_frame_id=FRAME_WORLD_RAW,
+        ts=ts,
+    )
+
+
+def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3:
+    """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`."""
+    import gtsam  # type: ignore[import-not-found,import-untyped]
+
+    if obs.pose is None:
+        raise LookupError("No pose set on this observation")
+    x, y, z, qx, qy, qz, qw = obs.pose
+    return gtsam.Pose3(
+        gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)),
+        gtsam.Point3(float(x), float(y), float(z)),
+    )
+
+
+@dataclass
+class _KeyPose:
+    local: gtsam.Pose3  # odom-frame pose at capture
+    optimized: gtsam.Pose3  # drift-corrected pose
+    timestamp: float
+    body_cloud: PointCloud2  # voxel-downsampled, body frame
+
+
+@dataclass
+class _LoopPair:
+    source: int
+    target: int
+    offset: gtsam.Pose3  # source pose in target's frame
+    score: float
+
+
+class _PGO:
+    """Incremental PGO: gtsam ISAM2 over keyframes with ICP loop closures.
+
+    Call `process` per frame (odom pose + body-frame points). Call
+    `finalize` once at the end for the sorted, deduped keyframe list.
+    """
+
+    def __init__(self, config: PGOConfig) -> None:
+        import gtsam  # type: ignore[import-not-found,import-untyped]
+
+        self._gtsam = gtsam
+        self._cfg = config
+        self._key_poses: list[_KeyPose] = []
+        self._pending_loops: list[_LoopPair] = []
+        self._last_loop_ts: float | None = None
+        self._world_correction: gtsam.Pose3 = gtsam.Pose3()  # identity
+
+        params = gtsam.ISAM2Params()
+        params.setRelinearizeThreshold(0.01)
+        params.relinearizeSkip = 1
+        self._isam2 = gtsam.ISAM2(params)
+        self._graph = gtsam.NonlinearFactorGraph()
+        self._values = gtsam.Values()
+
+    def process(
+        self,
+        local_pose: gtsam.Pose3,
+        ts: float,
+        world_cloud: PointCloud2,
+    ) -> None:
+        if len(world_cloud) == 0:
+            return
+        if not self._is_keyframe(local_pose):
+            return
+        # Unregister: lift world-frame scan back into body frame using its
+        # odom pose, so PGO can re-project it via the optimized pose later.
+        body_cloud = world_cloud.transform(
+            _pose3_to_transform(local_pose.inverse(), ts=ts)
+        ).voxel_downsample(self._cfg.submap_resolution)
+        self._add_keyframe(local_pose, ts, body_cloud)
+        self._search_for_loops()
+        self._smooth_and_update()
+
+    def finalize(self) -> list[Keyframe]:
+        """Return keyframes sorted by ts, with duplicate-ts entries dropped."""
+        kps = sorted(self._key_poses, key=lambda kp: kp.timestamp)
+        out: list[Keyframe] = []
+        for i, kp in enumerate(kps):
+            if i > 0 and kp.timestamp <= kps[i - 1].timestamp:
+                continue
+            out.append(
+                Keyframe(
+                    ts=kp.timestamp,
+                    local=_pose3_to_transform(
+                        kp.local,
+                        ts=kp.timestamp,
+                        frame_id=FRAME_WORLD_RAW,
+                        child_frame_id=FRAME_BODY,
+                    ),
+                    optimized=_pose3_to_transform(
+                        kp.optimized,
+                        ts=kp.timestamp,
+                        frame_id=FRAME_WORLD_CORRECTED,
+                        child_frame_id=FRAME_BODY,
+                    ),
+                )
+            )
+        return out
+
+    # ---------- internal ----------
+
+    def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool:
+        if not self._key_poses:
+            return True
+        delta = self._key_poses[-1].local.inverse().compose(local_pose)
+        delta_trans = float(np.linalg.norm(np.asarray(delta.translation())))
+        # Rotation magnitude from trace: cos(theta) = (tr(R) - 1) / 2.
+        R = delta.rotation().matrix()
+        cos_theta = float(np.clip((np.trace(R) - 1.0) / 2.0, -1.0, 1.0))
+        delta_deg = float(np.degrees(np.arccos(cos_theta)))
+        return (
+            delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg
+        )
+
+    def _add_keyframe(
+        self,
+        local_pose: gtsam.Pose3,
+        ts: float,
+        body_cloud: PointCloud2,
+    ) -> None:
+        gtsam = self._gtsam
+        idx = len(self._key_poses)
+        optimized = self._world_correction.compose(local_pose)
+
+        self._values.insert(idx, optimized)
+
+        if idx == 0:
+            noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12))
+            self._graph.add(gtsam.PriorFactorPose3(idx, optimized, noise))
+        else:
+            last_local = self._key_poses[-1].local
+            between = last_local.inverse().compose(local_pose)
+            noise = gtsam.noiseModel.Diagonal.Variances(
+                np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6])
+            )
+            self._graph.add(gtsam.BetweenFactorPose3(idx - 1, idx, between, noise))
+
+        self._key_poses.append(
+            _KeyPose(
+                local=local_pose,
+                optimized=optimized,
+                timestamp=ts,
+                body_cloud=body_cloud,
+            )
+        )
+
+    def _get_submap(self, idx: int, half_range: int) -> PointCloud2:
+        lo = max(0, idx - half_range)
+        hi = min(len(self._key_poses) - 1, idx + half_range)
+        if lo > hi:
+            return PointCloud2()
+        cloud = self._key_poses[lo].body_cloud.transform(
+            _pose3_to_transform(self._key_poses[lo].optimized, ts=self._key_poses[lo].timestamp)
+        )
+        for i in range(lo + 1, hi + 1):
+            kp = self._key_poses[i]
+            cloud = cloud + kp.body_cloud.transform(
+                _pose3_to_transform(kp.optimized, ts=kp.timestamp)
+            )
+        return cloud.voxel_downsample(self._cfg.submap_resolution)
+
+    def _search_for_loops(self) -> None:
+        if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search:
+            return
+
+        cur_ts = self._key_poses[-1].timestamp
+        if (
+            self._last_loop_ts is not None
+            and cur_ts - self._last_loop_ts < self._cfg.min_loop_detect_duration
+        ):
+            return
+
+        cur_idx = len(self._key_poses) - 1
+        cur_kp = self._key_poses[-1]
+        cur_t = np.asarray(cur_kp.optimized.translation())
+
+        from scipy.spatial import KDTree
+
+        positions = np.array(
+            [np.asarray(kp.optimized.translation()) for kp in self._key_poses[:-1]]
+        )
+        tree = KDTree(positions)
+        idxs = tree.query_ball_point(cur_t, self._cfg.loop_search_radius)
+        if not idxs:
+            return
+
+        # query_ball_point doesn't sort by distance — do it ourselves and
+        # filter by min time gap to avoid closing loops to recent keyframes.
+        candidates = [
+            (
+                float(
+                    np.linalg.norm(np.asarray(self._key_poses[i].optimized.translation()) - cur_t)
+                ),
+                i,
+            )
+            for i in idxs
+            if abs(cur_ts - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh
+        ]
+        if not candidates:
+            return
+        candidates.sort()
+        loop_idx = candidates[0][1]
+
+        target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range)
+        source = self._get_submap(cur_idx, 0)
+
+        icp_tf, fitness = _icp(
+            source,
+            target,
+            max_iter=self._cfg.max_icp_iterations,
+            max_dist=self._cfg.max_icp_correspondence_dist,
+            min_inliers=self._cfg.min_icp_inliers,
+        )
+        if fitness > self._cfg.loop_score_thresh:
+            return
+
+        # icp_tf takes cur_kp.optimized -> refined pose (correcting the drift).
+        # offset = loop_kp.optimized^-1 * refined = relative pose from loop to cur.
+        icp_pose = _transform_to_pose3(icp_tf)
+        refined = icp_pose.compose(cur_kp.optimized)
+        offset = self._key_poses[loop_idx].optimized.between(refined)
+
+        self._pending_loops.append(
+            _LoopPair(
+                source=cur_idx,
+                target=loop_idx,
+                offset=offset,
+                score=fitness,
+            )
+        )
+        self._last_loop_ts = cur_ts
+        logger.info(
+            "Loop closure detected",
+            source=cur_idx,
+            target=loop_idx,
+            score=round(fitness, 4),
+        )
+
+    def _smooth_and_update(self) -> None:
+        gtsam = self._gtsam
+        has_loop = bool(self._pending_loops)
+
+        for pair in self._pending_loops:
+            # Pose3 noise is [rx, ry, rz, x, y, z]. The two halves have
+            # different units (rad² vs m²), so a uniform variance silently
+            # makes one half pathological. Use ICP fitness as the
+            # *translation* variance and a generous fixed rotation variance
+            # — loops shouldn't be trusted to fix rotation tightly without
+            # normals + p2plane.
+            trans_var = max(0.01, float(pair.score))  # >= sigma_trans = 10 cm
+            rot_var = 0.05  # sigma_rot ~ 13 deg
+            noise = gtsam.noiseModel.Diagonal.Variances(
+                np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var])
+            )
+            self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise))
+        self._pending_loops.clear()
+
+        self._isam2.update(self._graph, self._values)
+        self._isam2.update()
+        if has_loop:
+            for _ in range(self._cfg.loop_closure_extra_iterations):
+                self._isam2.update()
+        self._graph = gtsam.NonlinearFactorGraph()
+        self._values = gtsam.Values()
+
+        estimates = self._isam2.calculateBestEstimate()
+        for i in range(len(self._key_poses)):
+            self._key_poses[i].optimized = estimates.atPose3(i)
+
+        last = self._key_poses[-1]
+        self._world_correction = last.optimized.compose(last.local.inverse())
+
+
+def _pose3_to_transform(
+    pose: gtsam.Pose3,
+    *,
+    ts: float,
+    frame_id: str = "",
+    child_frame_id: str = "",
+) -> Transform:
+    """PGO-internal: build a Transform from a Pose3."""
+    t = np.asarray(pose.translation())
+    return Transform(
+        translation=Vector3(float(t[0]), float(t[1]), float(t[2])),
+        rotation=Quaternion.from_rotation_matrix(pose.rotation().matrix()),
+        frame_id=frame_id,
+        child_frame_id=child_frame_id,
+        ts=ts,
+    )
+
+
+def _transform_to_pose3(tf: Transform) -> gtsam.Pose3:
+    """PGO-internal: build a Pose3 from a Transform."""
+    import gtsam  # type: ignore[import-not-found,import-untyped]
+
+    return gtsam.Pose3(tf.to_matrix())
+
+
+def _icp(
+    source: PointCloud2,
+    target: PointCloud2,
+    max_iter: int = 50,
+    max_dist: float = 1.0,
+    tol: float = 1e-6,
+    min_inliers: int = 10,
+    init: Transform | None = None,
+) -> tuple[Transform, float]:
+    """Point-to-plane ICP using Open3D's tensor pipeline.
+
+    Returns ``(tf, fitness)`` where ``fitness`` is mean squared inlier
+    distance (m^2) — the GTSAM noise model in `_smooth_and_update` uses
+    this directly as sigma_trans squared. On rejection (too few points or
+    zero correspondences) returns the identity transform and inf fitness.
+    """
+
+    if len(source) < min_inliers or len(target) < min_inliers:
+        return Transform.identity(), float("inf")
+
+    src_pcd = source.pointcloud_tensor
+    tgt_pcd = target.pointcloud_tensor
+
+    # Normals on the target enable point-to-plane ICP — converges tighter
+    # than point-to-point on indoor scenes (walls give unambiguous normals
+    # that resolve the slide-along-wall ambiguity).
+    tgt_pcd.estimate_normals(max_nn=30, radius=0.3)
+
+    device = src_pcd.device
+    init_T = o3c.Tensor(
+        init.to_matrix() if init is not None else np.eye(4),
+        dtype=o3c.float64,
+        device=device,
+    )
+
+    # Silence Open3D's "0 correspondence" warning — we deliberately use a
+    # tight max_correspondence_distance and reject loops with poor fitness.
+    with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error):
+        result = o3d.t.pipelines.registration.icp(
+            source=src_pcd,
+            target=tgt_pcd,
+            max_correspondence_distance=max_dist,
+            init_source_to_target=init_T,
+            estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(),
+            criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria(
+                relative_fitness=tol,
+                relative_rmse=tol,
+                max_iteration=max_iter,
+            ),
+        )
+
+    if float(result.fitness) == 0.0:
+        return Transform.identity(), float("inf")
+
+    T_mat = result.transformation.numpy()
+    rmse = float(result.inlier_rmse)
+    return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse
+    T_mat = result.transformation.numpy()
+    rmse = float(result.inlier_rmse)
+    return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse
diff --git a/dimos/mapping/relocalization/pgo_internals.py b/dimos/mapping/relocalization/pgo_internals.py
deleted file mode 100644
index c3465d8ee5..0000000000
--- a/dimos/mapping/relocalization/pgo_internals.py
+++ /dev/null
@@ -1,369 +0,0 @@
-# Copyright 2026 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use
-# the Stream-shaped API in pgo.py — pgo_keyframes, keyframes_to_corrections,
-# make_interpolator, apply_corrections. The classes here (_SimplePGO,
-# PGOConfig, _KeyPose) are imported from pgo to do the heavy lifting.
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any
-
-import gtsam  # type: ignore[import-not-found,import-untyped]
-import numpy as np
-import open3d as o3d  # type: ignore[import-untyped]
-import open3d.core as o3c  # type: ignore[import-untyped]
-from scipy.spatial import KDTree
-from scipy.spatial.transform import Rotation
-
-from dimos.core.module import ModuleConfig
-from dimos.utils.logging_config import setup_logger
-
-FRAME_MAP = "world"
-
-logger = setup_logger()
-
-
-class PGOConfig(ModuleConfig):
-    world_frame: str = FRAME_MAP
-
-    # Keyframe detection
-    key_pose_delta_trans: float = 0.5
-    key_pose_delta_deg: float = 10.0
-
-    # Loop closure
-    loop_search_radius: float = 2.0
-    loop_time_thresh: float = 20.0
-    loop_score_thresh: float = 0.3
-    loop_submap_half_range: int = 10
-    min_icp_inliers: int = 10
-    min_keyframes_for_loop_search: int = 10
-    loop_closure_extra_iterations: int = 4
-    submap_resolution: float = 0.2
-    min_loop_detect_duration: float = 5.0
-
-    # Input mode
-    unregister_input: bool = True  # Transform world-frame scans to body-frame using odom
-
-    # Global map
-    publish_global_map: bool = True
-    global_map_publish_rate: float = 0.5
-    global_map_voxel_size: float = 0.15
-
-    # ICP
-    max_icp_iterations: int = 50
-    max_icp_correspondence_dist: float = 1.0
-
-
-@dataclass
-class _KeyPose:
-    r_local: np.ndarray  # 3x3 rotation in local/odom frame
-    t_local: np.ndarray  # 3-vec translation in local/odom frame
-    r_global: np.ndarray  # 3x3 corrected rotation
-    t_global: np.ndarray  # 3-vec corrected translation
-    timestamp: float
-    body_cloud: np.ndarray  # Nx3 points in body frame
-
-
-def _icp(
-    source: np.ndarray,
-    target: np.ndarray,
-    max_iter: int = 50,
-    max_dist: float = 1.0,
-    tol: float = 1e-6,
-    min_inliers: int = 10,
-    init: np.ndarray | None = None,
-) -> tuple[np.ndarray, float]:
-    """Point-to-point ICP using Open3D's tensor pipeline.
-
-    Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier
-    distance (m²) — same semantic as the previous SVD implementation, so
-    the GTSAM noise model in ``smooth_and_update`` keeps working.
-    """
-    if len(source) < min_inliers or len(target) < min_inliers:
-        return np.eye(4), float("inf")
-
-    cpu = o3c.Device("CPU:0")
-    src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu))
-    tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu))
-
-    # Normals on the target enable point-to-plane ICP, which converges
-    # tighter than point-to-point on indoor scenes (walls give unambiguous
-    # normals that resolve the slide-along-wall ambiguity).
-    tgt_pcd.estimate_normals(max_nn=30, radius=0.3)
-
-    init_T = (
-        o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu)
-        if init is not None
-        else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu)
-    )
-
-    # Silence Open3D's "0 correspondence" warning — we deliberately use a
-    # tight max_correspondence_distance and reject loops with poor fitness;
-    # the warning is informational, not an error.
-    with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error):
-        result = o3d.t.pipelines.registration.icp(
-            source=src_pcd,
-            target=tgt_pcd,
-            max_correspondence_distance=max_dist,
-            init_source_to_target=init_T,
-            estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(),
-            criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria(
-                relative_fitness=tol,
-                relative_rmse=tol,
-                max_iteration=max_iter,
-            ),
-        )
-
-    fitness_inlier_frac = float(result.fitness)
-    if fitness_inlier_frac == 0.0:
-        return np.eye(4), float("inf")
-
-    rmse = float(result.inlier_rmse)
-    T = result.transformation.numpy()
-    # Return mean squared inlier distance (m²) to match prior _icp contract.
-    return T, rmse * rmse
-
-
-def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray:
-    if len(pts) == 0 or voxel_size <= 0:
-        return pts
-    keys = np.floor(pts / voxel_size).astype(np.int32)
-    _, idx = np.unique(keys, axis=0, return_index=True)
-    return pts[idx]
-
-
-class _SimplePGO:
-    def __init__(self, config: PGOConfig) -> None:
-        self._cfg = config
-        self._key_poses: list[_KeyPose] = []
-        self._history_pairs: list[tuple[int, int]] = []
-        self._cache_pairs: list[dict[str, Any]] = []
-        self._r_offset = np.eye(3)
-        self._t_offset = np.zeros(3)
-
-        params = gtsam.ISAM2Params()
-        params.setRelinearizeThreshold(0.01)
-        params.relinearizeSkip = 1
-        self._isam2 = gtsam.ISAM2(params)
-        self._graph = gtsam.NonlinearFactorGraph()
-        self._values = gtsam.Values()
-
-    def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool:
-        if not self._key_poses:
-            return True
-        last = self._key_poses[-1]
-        delta_trans = np.linalg.norm(t - last.t_local)
-        # Angular distance via quaternion dot product
-        q_cur = Rotation.from_matrix(r).as_quat()  # [x,y,z,w]
-        q_last = Rotation.from_matrix(last.r_local).as_quat()
-        dot = abs(np.dot(q_cur, q_last))
-        delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0)))
-        return bool(
-            delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg
-        )
-
-    def add_key_pose(
-        self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray
-    ) -> bool:
-        if not self.is_key_pose(r_local, t_local):
-            return False
-
-        idx = len(self._key_poses)
-        init_r = self._r_offset @ r_local
-        init_t = self._r_offset @ t_local + self._t_offset
-
-        pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t))
-        self._values.insert(idx, pose)
-
-        if idx == 0:
-            noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12))
-            self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise))
-        else:
-            last = self._key_poses[-1]
-            r_between = last.r_local.T @ r_local
-            t_between = last.r_local.T @ (t_local - last.t_local)
-            noise = gtsam.noiseModel.Diagonal.Variances(
-                np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6])
-            )
-            self._graph.add(
-                gtsam.BetweenFactorPose3(
-                    idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise
-                )
-            )
-
-        kp = _KeyPose(
-            r_local=r_local.copy(),
-            t_local=t_local.copy(),
-            r_global=init_r.copy(),
-            t_global=init_t.copy(),
-            timestamp=timestamp,
-            body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution),
-        )
-        self._key_poses.append(kp)
-        return True
-
-    def _get_submap(self, idx: int, half_range: int) -> np.ndarray:
-        lo = max(0, idx - half_range)
-        hi = min(len(self._key_poses) - 1, idx + half_range)
-        parts = []
-        for i in range(lo, hi + 1):
-            kp = self._key_poses[i]
-            world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global
-            parts.append(world)
-        if not parts:
-            return np.empty((0, 3))
-        cloud = np.vstack(parts)
-        return _voxel_downsample(cloud, self._cfg.submap_resolution)
-
-    def search_for_loops(self) -> None:
-        if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search:
-            return
-
-        # Rate limit
-        if self._history_pairs:
-            cur_time = self._key_poses[-1].timestamp
-            last_time = self._key_poses[self._history_pairs[-1][1]].timestamp
-            if cur_time - last_time < self._cfg.min_loop_detect_duration:
-                return
-
-        cur_idx = len(self._key_poses) - 1
-        cur_kp = self._key_poses[-1]
-
-        # Build KD-tree of previous keyframe positions
-        positions = np.array([kp.t_global for kp in self._key_poses[:-1]])
-        tree = KDTree(positions)
-
-        idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius)
-        if not idxs:
-            return
-
-        # Pick the spatially closest keyframe that's also old enough in time.
-        # query_ball_point doesn't sort, so we sort by distance ourselves.
-        candidates = [
-            (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i)
-            for i in idxs
-            if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh
-        ]
-        if not candidates:
-            return
-        candidates.sort()
-        loop_idx = candidates[0][1]
-
-        # ICP verification
-        target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range)
-        source = self._get_submap(cur_idx, 0)
-
-        transform, fitness = _icp(
-            source,
-            target,
-            max_iter=self._cfg.max_icp_iterations,
-            max_dist=self._cfg.max_icp_correspondence_dist,
-            min_inliers=self._cfg.min_icp_inliers,
-        )
-        if fitness > self._cfg.loop_score_thresh:
-            return
-
-        # Compute relative pose
-        R_icp = transform[:3, :3]
-        t_icp = transform[:3, 3]
-        r_refined = R_icp @ cur_kp.r_global
-        t_refined = R_icp @ cur_kp.t_global + t_icp
-        r_offset = self._key_poses[loop_idx].r_global.T @ r_refined
-        t_offset = self._key_poses[loop_idx].r_global.T @ (
-            t_refined - self._key_poses[loop_idx].t_global
-        )
-
-        self._cache_pairs.append(
-            {
-                "source": cur_idx,
-                "target": loop_idx,
-                "r_offset": r_offset,
-                "t_offset": t_offset,
-                "score": fitness,
-            }
-        )
-        self._history_pairs.append((loop_idx, cur_idx))
-        logger.info(
-            "Loop closure detected",
-            source=cur_idx,
-            target=loop_idx,
-            score=round(fitness, 4),
-        )
-
-    def smooth_and_update(self) -> None:
-        has_loop = bool(self._cache_pairs)
-
-        for pair in self._cache_pairs:
-            # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves
-            # have different units (rad² vs m²), so a uniform variance —
-            # the original behaviour — silently makes one half pathological
-            # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of
-            # those is too tight, one is too loose, depending on the loop).
-            # Use ICP fitness as the *translation* variance and a
-            # generous fixed rotation variance — loops shouldn't be
-            # trusted to fix rotation tightly without normals + p2plane.
-            trans_var = max(0.01, float(pair["score"]))  # ≥ σ_trans = 10 cm
-            rot_var = 0.05  # σ_rot ≈ 13°
-            noise = gtsam.noiseModel.Diagonal.Variances(
-                np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var])
-            )
-            self._graph.add(
-                gtsam.BetweenFactorPose3(
-                    pair["target"],
-                    pair["source"],
-                    gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])),
-                    noise,
-                )
-            )
-        self._cache_pairs.clear()
-
-        self._isam2.update(self._graph, self._values)
-        self._isam2.update()
-        if has_loop:
-            for _ in range(self._cfg.loop_closure_extra_iterations):
-                self._isam2.update()
-        self._graph = gtsam.NonlinearFactorGraph()
-        self._values = gtsam.Values()
-
-        estimates = self._isam2.calculateBestEstimate()
-        for i in range(len(self._key_poses)):
-            pose = estimates.atPose3(i)
-            self._key_poses[i].r_global = pose.rotation().matrix()
-            self._key_poses[i].t_global = pose.translation()
-
-        last = self._key_poses[-1]
-        self._r_offset = last.r_global @ last.r_local.T
-        self._t_offset = last.t_global - self._r_offset @ last.t_local
-
-    def get_corrected_pose(
-        self, r_local: np.ndarray, t_local: np.ndarray
-    ) -> tuple[np.ndarray, np.ndarray]:
-        return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset
-
-    def build_global_map(self, voxel_size: float) -> np.ndarray:
-        if not self._key_poses:
-            return np.empty((0, 3), dtype=np.float32)
-        parts = []
-        for kp in self._key_poses:
-            world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global
-            parts.append(world)
-        cloud = np.vstack(parts).astype(np.float32)
-        return _voxel_downsample(cloud, voxel_size)
-
-    @property
-    def num_key_poses(self) -> int:
-        return len(self._key_poses)
diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py
index 63b10a6dbf..179b48744e 100644
--- a/dimos/memory2/type/observation.py
+++ b/dimos/memory2/type/observation.py
@@ -46,9 +46,42 @@ def __repr__(self) -> str:
 _UNLOADED = _Unloaded()
 
 
+def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, float] | None:
+    """Coerce common pose shapes to the storage 7-tuple `(x, y, z, qx, qy, qz, qw)`.
+
+    Accepts: `None`, an already-normalized tuple, or any object with
+    `translation`+`rotation` (Transform) or `position`+`orientation`
+    (Pose / PoseStamped) attributes. Duck-typed to avoid importing
+    `dimos.msgs.geometry_msgs` at module load.
+    """
+    if p is None or isinstance(p, tuple):
+        return p
+    trans = getattr(p, "translation", None) or getattr(p, "position", None)
+    rot = getattr(p, "rotation", None) or getattr(p, "orientation", None)
+    if trans is None or rot is None:
+        raise TypeError(
+            f"Cannot coerce {type(p).__name__} to a pose tuple — expected "
+            "tuple, None, Transform, Pose, or PoseStamped."
+        )
+    return (
+        float(trans.x),
+        float(trans.y),
+        float(trans.z),
+        float(rot.x),
+        float(rot.y),
+        float(rot.z),
+        float(rot.w),
+    )
+
+
 @dataclass
 class Observation(Generic[T]):
-    """A single timestamped observation with optional spatial pose and metadata."""
+    """A single timestamped observation with optional spatial pose and metadata.
+
+    `pose` is stored as a 7-tuple `(x, y, z, qx, qy, qz, qw)`. Passing a
+    `Transform`, `Pose`, or `PoseStamped` to the constructor or to `derive`
+    is normalized to the 7-tuple form automatically.
+    """
 
     id: int
     ts: float
@@ -59,6 +92,9 @@ class Observation(Generic[T]):
     _loader: Callable[[], T] | None = field(default=None, repr=False)
     _data_lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
 
+    def __post_init__(self) -> None:
+        self.pose = _normalize_pose(self.pose)
+
     @property
     def pose_stamped(self) -> PoseStamped:
         from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 645844b735..543a898f6a 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -112,9 +112,9 @@ def collect_path(obs):
         corrections = keyframes_to_corrections(keyframes)
         interp = make_interpolator(corrections)
 
-        for obs in keyframes:
-            kp = obs.data
-            pgo_path.append((float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2])))
+        for kf_obs in keyframes:
+            kf_t = kf_obs.data.optimized.translation
+            pgo_path.append((kf_t.x, kf_t.y, kf_t.z))
 
         pass2_pb = progress(total, "pgo pass 2 (rebuilding)")
         grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)

From d11ef1654ffee88acee9329e95cb4818647d4f94 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 20:09:56 +0800
Subject: [PATCH 08/36] pgo: pytest + map.py type annotations + drop section
 markers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds test_pgo.py covering pose normalization on Observation, the Transform↔Pose3
conversion helpers, the interpolator edge cases, apply_corrections behavior, and
a real-recording smoke test (skipped when data/go2_short.db is absent).
Annotates the two map.py helpers so they pass mypy. Removes leftover section
divider comments to satisfy the no-section-markers project rule.
---
 dimos/mapping/relocalization/pgo.py      |   7 -
 dimos/mapping/relocalization/test_pgo.py | 365 +++++++++++++++++++++++
 dimos/utils/cli/map.py                   |  10 +-
 3 files changed, 372 insertions(+), 10 deletions(-)
 create mode 100644 dimos/mapping/relocalization/test_pgo.py

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index e006cc1b7b..9b48aa6437 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -220,11 +220,6 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
     return stream.transform(xf)
 
 
-# ---------------------------------------------------------------------------
-# Transform <-> (R, t) and obs.pose helpers (private)
-# ---------------------------------------------------------------------------
-
-
 def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]:
     q = tf.rotation
     R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()
@@ -340,8 +335,6 @@ def finalize(self) -> list[Keyframe]:
             )
         return out
 
-    # ---------- internal ----------
-
     def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool:
         if not self._key_poses:
             return True
diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/relocalization/test_pgo.py
new file mode 100644
index 0000000000..ce35cbd25a
--- /dev/null
+++ b/dimos/mapping/relocalization/test_pgo.py
@@ -0,0 +1,365 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+from scipy.spatial.transform import Rotation
+
+from dimos.mapping.relocalization.pgo import (
+    Keyframe,
+    PGOConfig,
+    _obs_to_pose3,
+    _pose3_to_transform,
+    _r_t_from_transform,
+    _transform_from_r_t,
+    apply_corrections,
+    correction_at,
+    keyframes_to_corrections,
+    make_interpolator,
+    pgo_keyframes,
+)
+from dimos.memory2.store.memory import MemoryStore
+from dimos.memory2.stream import Stream
+from dimos.msgs.geometry_msgs.Quaternion import Quaternion
+from dimos.msgs.geometry_msgs.Transform import Transform
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
+
+
+def _random_R(rng: np.random.Generator) -> np.ndarray:
+    """Random uniform rotation matrix via random quaternion."""
+    q = rng.standard_normal(4)
+    q /= np.linalg.norm(q)
+    return np.asarray(Rotation.from_quat(q).as_matrix())
+
+
+class TestPGOConfig:
+    def test_accepts_known_fields(self) -> None:
+        cfg = PGOConfig(key_pose_delta_trans=0.7, max_icp_iterations=42)
+        assert cfg.key_pose_delta_trans == 0.7
+        assert cfg.max_icp_iterations == 42
+
+    def test_rejects_unknown_fields(self) -> None:
+        # The plan deleted these; BaseConfig has extra="forbid" so they raise.
+        for dead in (
+            "world_frame",
+            "publish_global_map",
+            "global_map_publish_rate",
+            "global_map_voxel_size",
+            "unregister_input",
+        ):
+            with pytest.raises(Exception):
+                PGOConfig(**{dead: True})
+
+
+class TestTransformHelpers:
+    def test_r_t_transform_roundtrip(self) -> None:
+        rng = np.random.default_rng(0)
+        R_in = _random_R(rng)
+        t_in = rng.uniform(-5, 5, size=3)
+        tf = _transform_from_r_t(R_in, t_in, ts=1.23)
+        R_out, t_out = _r_t_from_transform(tf)
+        np.testing.assert_allclose(R_out, R_in, atol=1e-10)
+        np.testing.assert_allclose(t_out, t_in, atol=1e-10)
+        assert tf.ts == 1.23
+
+    def test_observation_normalizes_transform_pose(self) -> None:
+        """Constructing/deriving with pose=Transform should coerce to 7-tuple."""
+        from dimos.memory2.type.observation import Observation
+
+        tf = Transform(
+            translation=Vector3(1.5, -2.0, 0.7),
+            rotation=Quaternion(0.1, 0.2, 0.3, 0.927),
+            ts=1.0,
+        )
+        obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0)
+        assert isinstance(obs.pose, tuple)
+        assert obs.pose[0] == pytest.approx(1.5)
+        assert obs.pose[6] == pytest.approx(0.927)
+
+        # derive() also runs the normalization via __post_init__.
+        derived = obs.derive(data=0, pose=tf)
+        assert isinstance(derived.pose, tuple)
+        assert derived.pose == obs.pose
+
+    def test_observation_normalizes_posestamped(self) -> None:
+        from dimos.memory2.type.observation import Observation
+        from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
+
+        ps = PoseStamped(ts=1.0, position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0))
+        obs: Observation[int] = Observation(id=0, ts=1.0, pose=ps, _data=0)
+        assert isinstance(obs.pose, tuple)
+        assert obs.pose == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0)
+
+    def test_obs_to_pose3_roundtrip(self) -> None:
+        from dimos.memory2.type.observation import Observation
+
+        rng = np.random.default_rng(4)
+        R = _random_R(rng)
+        t = rng.uniform(-3, 3, size=3)
+        tf = _transform_from_r_t(R, t, ts=1.0)
+        obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0)
+        p = _obs_to_pose3(obs)
+        np.testing.assert_allclose(p.rotation().matrix(), R, atol=1e-9)
+        np.testing.assert_allclose(np.asarray(p.translation()), t, atol=1e-9)
+
+    def test_pose3_to_transform(self) -> None:
+        import gtsam  # type: ignore[import-not-found,import-untyped]
+
+        rng = np.random.default_rng(2)
+        R = _random_R(rng)
+        t = rng.uniform(-3, 3, size=3)
+        p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t))
+        tf = _pose3_to_transform(p, ts=7.89)
+        R_out, t_out = _r_t_from_transform(tf)
+        np.testing.assert_allclose(R_out, R, atol=1e-10)
+        np.testing.assert_allclose(t_out, t, atol=1e-10)
+
+    def test_pose3_to_transform_with_frames(self) -> None:
+        import gtsam
+
+        rng = np.random.default_rng(3)
+        R = _random_R(rng)
+        t = rng.uniform(-3, 3, size=3)
+        p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t))
+        tf = _pose3_to_transform(p, ts=1.0, frame_id="world_corrected", child_frame_id="body")
+        assert tf.frame_id == "world_corrected"
+        assert tf.child_frame_id == "body"
+        R_out, t_out = _r_t_from_transform(tf)
+        np.testing.assert_allclose(R_out, R, atol=1e-10)
+        np.testing.assert_allclose(t_out, t, atol=1e-10)
+
+
+def _make_lidar_stream(n_frames: int = 12, points_per_frame: int = 500) -> Stream[PointCloud2]:
+    """Straight-line trajectory along +x with small yaw, random body points.
+
+    Note: `pgo_keyframes` skips poses with zero translation OR identity
+    rotation as placeholders, so we use a constant non-identity yaw.
+    """
+    rng = np.random.default_rng(0)
+    mem = MemoryStore()
+    lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2)
+    # Small yaw (~6 deg) -> non-identity quaternion that survives the
+    # placeholder filter.
+    q = Rotation.from_euler("z", 0.1).as_quat()  # xyzw
+    qx, qy, qz, qw = float(q[0]), float(q[1]), float(q[2]), float(q[3])
+    R_world = Rotation.from_euler("z", 0.1).as_matrix()
+    for i in range(1, n_frames + 1):
+        body = rng.uniform(-1, 1, size=(points_per_frame, 3)).astype(np.float32)
+        world = (R_world @ body.T).T + np.array([i, 0, 0], dtype=np.float32)
+        lidar.append(
+            PointCloud2.from_numpy(world.astype(np.float32)),
+            ts=float(i),
+            pose=(float(i), 0.0, 0.0, qx, qy, qz, qw),
+        )
+    return lidar
+
+
+class TestPipelineEndToEnd:
+    def test_straight_line_produces_keyframes(self) -> None:
+        lidar = _make_lidar_stream(n_frames=12)
+        kfs = pgo_keyframes(lidar)
+        # 12 frames spaced 1m apart with key_pose_delta_trans=0.5 -> every frame
+        # after the first triggers a keyframe; some may dedupe but ~11 emitted.
+        n = kfs.count()
+        assert 10 <= n <= 12
+
+    def test_keyframes_to_corrections_size(self) -> None:
+        lidar = _make_lidar_stream(n_frames=12)
+        kfs = pgo_keyframes(lidar)
+        corr = keyframes_to_corrections(kfs)
+        assert corr.count() == kfs.count()
+
+    def test_apply_identity_corrections_preserves_poses(self) -> None:
+        # With no loop closures the optimization is a no-op -> drift = identity ->
+        # apply_corrections is a no-op on input poses.
+        lidar = _make_lidar_stream(n_frames=12)
+        kfs = pgo_keyframes(lidar)
+        corr = keyframes_to_corrections(kfs)
+        corrected = apply_corrections(lidar, corr)
+        in_poses = [o.pose for o in lidar if o.pose is not None]
+        out_poses = [o.pose for o in corrected if o.pose is not None]
+        assert len(in_poses) == len(out_poses)
+        for p_in, p_out in zip(in_poses, out_poses, strict=True):
+            for a, b in zip(p_in, p_out, strict=True):
+                assert a == pytest.approx(b, abs=1e-6)
+
+
+def _make_corrections(transforms: list[Transform]) -> Stream[Transform]:
+    mem = MemoryStore()
+    stream: Stream[Transform] = mem.stream("corrections", Transform)
+    for tf in transforms:
+        stream.append(tf, ts=tf.ts)
+    return stream
+
+
+class TestInterpolator:
+    def test_empty_stream_raises(self) -> None:
+        empty = _make_corrections([])
+        with pytest.raises(ValueError):
+            make_interpolator(empty)
+
+    def test_single_keyframe_returns_constant(self) -> None:
+        R = Rotation.from_euler("z", np.pi / 4).as_matrix()
+        t = np.array([1.0, 2.0, 3.0])
+        only = _transform_from_r_t(R, t, ts=10.0)
+        interp = make_interpolator(_make_corrections([only]))
+        for query_ts in (0.0, 10.0, 100.0):
+            out = interp(query_ts)
+            assert out.translation.x == pytest.approx(1.0, abs=1e-10)
+            assert out.translation.y == pytest.approx(2.0, abs=1e-10)
+            assert out.translation.z == pytest.approx(3.0, abs=1e-10)
+
+    def test_out_of_range_clips_to_endpoints(self) -> None:
+        # Note: Transform's constructor maps ts=0.0 -> time.time(); use ts>0
+        # so test timestamps are deterministic.
+        R = np.eye(3)
+        a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0)
+        b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0)
+        # _make_corrections uses tf.ts; obs.ts ends up the same.
+        mem = MemoryStore()
+        stream: Stream[Transform] = mem.stream("corrections", Transform)
+        stream.append(a, ts=1.0)
+        stream.append(b, ts=11.0)
+        interp = make_interpolator(stream)
+        # Below range -> clipped to a
+        assert interp(-5.0).translation.x == pytest.approx(0.0, abs=1e-10)
+        # Above range -> clipped to b
+        assert interp(100.0).translation.x == pytest.approx(10.0, abs=1e-10)
+        # In-range midpoint
+        assert interp(6.0).translation.x == pytest.approx(5.0, abs=1e-10)
+
+    def test_correction_at_oneshot(self) -> None:
+        R = np.eye(3)
+        a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0)
+        b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0)
+        mem = MemoryStore()
+        stream: Stream[Transform] = mem.stream("corrections", Transform)
+        stream.append(a, ts=1.0)
+        stream.append(b, ts=11.0)
+        result = correction_at(stream, 6.0)
+        assert result.translation.x == pytest.approx(5.0, abs=1e-10)
+
+
+class TestApplyCorrections:
+    def test_pure_translation_shifts_poses(self) -> None:
+        # Build a stream of 3 frames at the origin (identity pose) with a known
+        # correction that shifts everything by +5 in x. Expected: corrected
+        # poses sit at x=5.
+        mem = MemoryStore()
+        lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2)
+        for i in range(3):
+            lidar.append(
+                PointCloud2.from_numpy(np.zeros((1, 3), dtype=np.float32)),
+                ts=float(i + 1),
+                pose=(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0),
+            )
+
+        # Constant correction: translate by (5, 0, 0), identity rotation.
+        c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0)
+        c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=3.0)
+        mem2 = MemoryStore()
+        corr: Stream[Transform] = mem2.stream("corrections", Transform)
+        corr.append(c_a, ts=1.0)
+        corr.append(c_b, ts=3.0)
+
+        corrected = apply_corrections(lidar, corr)
+        for obs in corrected:
+            assert obs.pose is not None
+            assert obs.pose[0] == pytest.approx(5.0, abs=1e-9)
+            assert obs.pose[1] == pytest.approx(0.0, abs=1e-9)
+            assert obs.pose[2] == pytest.approx(0.0, abs=1e-9)
+
+    def test_passes_through_pose_none(self) -> None:
+        mem = MemoryStore()
+        lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2)
+        lidar.append(
+            PointCloud2.from_numpy(np.zeros((1, 3), dtype=np.float32)),
+            ts=1.0,
+            pose=None,
+        )
+        c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0)
+        c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=2.0)
+        mem2 = MemoryStore()
+        corr: Stream[Transform] = mem2.stream("corrections", Transform)
+        corr.append(c_a, ts=1.0)
+        corr.append(c_b, ts=2.0)
+        corrected = apply_corrections(lidar, corr)
+        for obs in corrected:
+            assert obs.pose is None
+
+
+class TestKeyframeType:
+    def test_keyframe_is_frozen(self) -> None:
+        identity = Transform(
+            translation=Vector3(0.0, 0.0, 0.0),
+            rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+            ts=1.0,
+        )
+        kf = Keyframe(ts=1.0, local=identity, optimized=identity)
+        with pytest.raises(Exception):
+            kf.ts = 2.0  # type: ignore[misc]
+        assert isinstance(kf.local, Transform)
+        assert isinstance(kf.optimized, Transform)
+
+
+# Real-recording smoke test. ~45-60s on go2_short.db. Skipped when the LFS
+# file isn't present (CI without LFS pulled, fresh clone, etc.).
+_DATA_DB = Path(__file__).resolve().parents[3] / "data" / "go2_short.db"
+
+
+@pytest.mark.skipif(not _DATA_DB.exists(), reason=f"requires LFS file {_DATA_DB}")
+class TestRealRecording:
+    def test_pgo_pipeline_against_go2_short(self) -> None:
+        """Run the full PGO pipeline on a real 60-second go2 recording.
+
+        Asserts: keyframes produced, drift correction actually corrects (some
+        optimized poses differ from local), correction stream length matches
+        keyframes, apply_corrections preserves input frame count.
+        """
+        from dimos.memory2.store.sqlite import SqliteStore
+
+        store = SqliteStore(path=_DATA_DB)
+        lidar = store.streams.lidar
+        in_count = lidar.count()
+        assert in_count > 0, "recording is empty"
+
+        kfs = pgo_keyframes(lidar)
+        n_kf = kfs.count()
+        assert n_kf > 0, "PGO emitted no keyframes"
+        # 60s recording at ~0.5m keyframe spacing -> at least a handful.
+        assert n_kf >= 5
+
+        # Loop closure detection: at least one optimized pose should differ
+        # from its odom-frame counterpart. Without loops, the optimization is
+        # a no-op and local == optimized for every keyframe.
+        drifted = sum(
+            1
+            for obs in kfs
+            if obs.data.local.translation != obs.data.optimized.translation
+            or obs.data.local.rotation != obs.data.optimized.rotation
+        )
+        assert drifted > 0, "expected loop closures to drift at least one keyframe"
+
+        corr = keyframes_to_corrections(kfs)
+        assert corr.count() == n_kf
+
+        # apply_corrections preserves frame count, including pose=None rows.
+        corrected = apply_corrections(lidar, corr)
+        out_count = sum(1 for _ in corrected)
+        assert out_count == in_count
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 543a898f6a..3ae12b5ad9 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from collections.abc import Callable
 import time
+from typing import Any
 
 import rerun as rr
 import rerun.blueprint as rrb
@@ -20,22 +22,24 @@
 
 from dimos.mapping.voxels import VoxelMapTransformer
 from dimos.memory2.store.sqlite import SqliteStore
+from dimos.memory2.type.observation import Observation
 from dimos.utils.data import resolve_named_path
 from dimos.visualization.rerun.init import rerun_init
 
 
-def progress(total: int, label: str = ""):
+def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]:
     seen = 0
     wall_start: float | None = None
     last_wall: float | None = None
     first_ts: float | None = None
 
-    def _progress(obs):
+    def _progress(obs: Observation[Any]) -> None:
         nonlocal seen, wall_start, last_wall, first_ts
         now = time.monotonic()
         if wall_start is None:
             wall_start = now
             first_ts = obs.ts
+        assert first_ts is not None  # narrowed by the same `if` above
         frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0
         last_wall = now
         seen += 1
@@ -84,7 +88,7 @@ def main(
 
     path: list[tuple[float, float, float]] = []
 
-    def collect_path(obs):
+    def collect_path(obs: Observation[Any]) -> None:
         if obs.pose is None:
             return
         # Reject placeholder poses at the world origin (translation = 0,0,0).

From 45e72da4ff6a391a2534202c6e6cc4f717ff2887 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 21:37:13 +0800
Subject: [PATCH 09/36] pgo cleanup

---
 dimos/mapping/relocalization/pgo.py      | 68 +++++++++++++++++-------
 dimos/mapping/relocalization/test_pgo.py | 12 -----
 2 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index 9b48aa6437..3f5dbbd4ec 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -14,28 +14,47 @@
 
 """PGO drift corrections as composable Stream stages.
 
+A lidar/odom stream comes in with poses that drift over time — the robot's
+estimate of where it is in the world slowly diverges from ground truth as
+small per-frame errors accumulate. PGO ("pose graph optimization") detects
+revisited places via loop closure and pulls the trajectory back into
+self-consistency. The result is a stream of *corrections*: a Transform per
+keyframe that maps every drifted ("raw") pose to its corrected one.
+
 Pipeline:
 
-    lidar: Stream[PointCloud2]
-        -> pgo_keyframes(...)            -> Stream[Keyframe]
-        -> keyframes_to_corrections(...) -> Stream[Transform]   (world_corrected <- world_raw)
-        -> apply_corrections(any_stream, corrections) -> Stream[T]   (obs.pose shuffled)
+    lidar: Stream[PointCloud2]                         # pose-stamped scans
+        -> pgo_keyframes(...)             -> Stream[Keyframe]    # one per kf
+        -> keyframes_to_corrections(...)  -> Stream[Transform]   # world_corrected <- world_raw
+        -> apply_corrections(any_stream, corrections) -> Stream[T]  # obs.pose shuffled
+
+`pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting
+each kept keyframe with both its raw odom pose (`local`) and its optimized
+pose (`optimized`) as Transforms.
+
+`keyframes_to_corrections` is pure arithmetic: per keyframe, the drift
+correction is `optimized @ local^-1` (Transform composition). The resulting
+stream has one entry per keyframe — sparse in time.
 
-The math: per keyframe, the drift correction is
-    R_corr = R_global @ R_local.T
-    t_corr = t_global - R_corr @ t_local
-and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t,
-clipping out-of-range to endpoints.
+`make_interpolator` materializes the sparse correction stream into a
+ts -> Transform function: SLERP for rotation, linear lerp for translation,
+clipping out-of-range queries to the endpoints.
 
-`gtsam` and `open3d` are imported lazily inside hot helpers so importing this module
-stays cheap and gtsam-free for consumers that only need `Keyframe` / `apply_corrections`.
+`apply_corrections` shuffles `obs.pose` on any stream using the interpolated
+correction at each obs.ts. It's read-only on `obs.data`, so the same lidar
+stream can be re-applied — or a different recording from the same physical
+space, if the corrections generalize.
+
+`gtsam` is imported lazily inside hot helpers so importing this module
+stays cheap and gtsam-free for consumers that only need `Keyframe` /
+`apply_corrections`.
 """
 
 from __future__ import annotations
 
 from collections.abc import Callable, Iterator
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack
 
 import numpy as np
 import open3d as o3d  # type: ignore[import-untyped]
@@ -85,6 +104,24 @@ class PGOConfig(BaseConfig):
     max_icp_correspondence_dist: float = 1.0
 
 
+class PGOKwargs(TypedDict, total=False):
+    """Typed kwargs for `pgo_keyframes`; mirrors `PGOConfig` fields."""
+
+    key_pose_delta_trans: float
+    key_pose_delta_deg: float
+    loop_search_radius: float
+    loop_time_thresh: float
+    loop_score_thresh: float
+    loop_submap_half_range: int
+    min_icp_inliers: int
+    min_keyframes_for_loop_search: int
+    loop_closure_extra_iterations: int
+    submap_resolution: float
+    min_loop_detect_duration: float
+    max_icp_iterations: int
+    max_icp_correspondence_dist: float
+
+
 @dataclass(frozen=True)
 class Keyframe:
     """Keyframe emitted by `pgo_keyframes`.
@@ -102,7 +139,7 @@ def pgo_keyframes(
     stream: Stream[PointCloud2],
     *,
     on_frame: Callable[[Any], None] | None = None,
-    **pgo_cfg: Any,
+    **pgo_cfg: Unpack[PGOKwargs],
 ) -> Stream[Keyframe]:
     """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe."""
     cfg = PGOConfig(**pgo_cfg)
@@ -190,11 +227,6 @@ def interp(ts: float) -> Transform:
     return interp
 
 
-def correction_at(corrections: Stream[Transform], ts: float) -> Transform:
-    """One-off lookup. For hot paths build `make_interpolator` once and reuse."""
-    return make_interpolator(corrections)(ts)
-
-
 def apply_corrections(
     stream: Stream[T],
     corrections: Stream[Transform],
diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/relocalization/test_pgo.py
index ce35cbd25a..76f8b378eb 100644
--- a/dimos/mapping/relocalization/test_pgo.py
+++ b/dimos/mapping/relocalization/test_pgo.py
@@ -28,7 +28,6 @@
     _r_t_from_transform,
     _transform_from_r_t,
     apply_corrections,
-    correction_at,
     keyframes_to_corrections,
     make_interpolator,
     pgo_keyframes,
@@ -244,17 +243,6 @@ def test_out_of_range_clips_to_endpoints(self) -> None:
         # In-range midpoint
         assert interp(6.0).translation.x == pytest.approx(5.0, abs=1e-10)
 
-    def test_correction_at_oneshot(self) -> None:
-        R = np.eye(3)
-        a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0)
-        b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0)
-        mem = MemoryStore()
-        stream: Stream[Transform] = mem.stream("corrections", Transform)
-        stream.append(a, ts=1.0)
-        stream.append(b, ts=11.0)
-        result = correction_at(stream, 6.0)
-        assert result.translation.x == pytest.approx(5.0, abs=1e-10)
-
 
 class TestApplyCorrections:
     def test_pure_translation_shifts_poses(self) -> None:

From df4435f21caf9a66445a5e8062cea0b544f3a27e Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 21:49:53 +0800
Subject: [PATCH 10/36] visualizing loop closures

---
 dimos/mapping/relocalization/pgo.py | 55 +++++++++++++++++++++++++++--
 dimos/utils/cli/map.py              | 26 +++++++++++++-
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py
index 3f5dbbd4ec..af94921c9d 100644
--- a/dimos/mapping/relocalization/pgo.py
+++ b/dimos/mapping/relocalization/pgo.py
@@ -30,7 +30,9 @@
 
 `pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting
 each kept keyframe with both its raw odom pose (`local`) and its optimized
-pose (`optimized`) as Transforms.
+pose (`optimized`) as Transforms. Pass `loop_closures_out=[]` to also
+collect each accepted ICP loop edge as a `LoopClosure` (source+target
+optimized poses + ICP fitness) — useful for visualizing the pose graph.
 
 `keyframes_to_corrections` is pure arithmetic: per keyframe, the drift
 correction is `optimized @ local^-1` (Transform composition). The resulting
@@ -135,13 +137,33 @@ class Keyframe:
     optimized: Transform
 
 
+@dataclass(frozen=True)
+class LoopClosure:
+    """An accepted ICP loop-closure edge in the pose graph.
+
+    `source` and `target` are the final optimized body poses (in
+    `world_corrected`) of the two keyframes the loop connected. `score` is
+    the ICP fitness used as the loop's translation variance — lower is a
+    tighter match.
+    """
+
+    source: Transform
+    target: Transform
+    score: float
+
+
 def pgo_keyframes(
     stream: Stream[PointCloud2],
     *,
     on_frame: Callable[[Any], None] | None = None,
+    loop_closures_out: list[LoopClosure] | None = None,
     **pgo_cfg: Unpack[PGOKwargs],
 ) -> Stream[Keyframe]:
-    """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe."""
+    """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe.
+
+    If `loop_closures_out` is provided, accepted loop edges are appended to
+    it (in detection order) using each keyframe's final optimized pose.
+    """
     cfg = PGOConfig(**pgo_cfg)
     pgo = _PGO(cfg)
 
@@ -167,6 +189,8 @@ def pgo_keyframes(
     out: Stream[Keyframe] = mem.stream("keyframes", Keyframe)
     for kf in pgo.finalize():
         out.append(kf, ts=kf.ts)
+    if loop_closures_out is not None:
+        loop_closures_out.extend(pgo.loop_closures())
     return out
 
 
@@ -312,6 +336,7 @@ def __init__(self, config: PGOConfig) -> None:
         self._cfg = config
         self._key_poses: list[_KeyPose] = []
         self._pending_loops: list[_LoopPair] = []
+        self._accepted_loops: list[_LoopPair] = []
         self._last_loop_ts: float | None = None
         self._world_correction: gtsam.Pose3 = gtsam.Pose3()  # identity
 
@@ -367,6 +392,31 @@ def finalize(self) -> list[Keyframe]:
             )
         return out
 
+    def loop_closures(self) -> list[LoopClosure]:
+        """Accepted loop edges, resolved to final optimized poses."""
+        out: list[LoopClosure] = []
+        for pair in self._accepted_loops:
+            src = self._key_poses[pair.source]
+            tgt = self._key_poses[pair.target]
+            out.append(
+                LoopClosure(
+                    source=_pose3_to_transform(
+                        src.optimized,
+                        ts=src.timestamp,
+                        frame_id=FRAME_WORLD_CORRECTED,
+                        child_frame_id=FRAME_BODY,
+                    ),
+                    target=_pose3_to_transform(
+                        tgt.optimized,
+                        ts=tgt.timestamp,
+                        frame_id=FRAME_WORLD_CORRECTED,
+                        child_frame_id=FRAME_BODY,
+                    ),
+                    score=pair.score,
+                )
+            )
+        return out
+
     def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool:
         if not self._key_poses:
             return True
@@ -521,6 +571,7 @@ def _smooth_and_update(self) -> None:
                 np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var])
             )
             self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise))
+        self._accepted_loops.extend(self._pending_loops)
         self._pending_loops.clear()
 
         self._isam2.update(self._graph, self._values)
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 3ae12b5ad9..5bc4d29c52 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -103,6 +103,7 @@ def collect_path(obs: Observation[Any]) -> None:
         from scipy.spatial.transform import Rotation
 
         from dimos.mapping.relocalization.pgo import (
+            LoopClosure,
             keyframes_to_corrections,
             make_interpolator,
             pgo_keyframes,
@@ -112,7 +113,12 @@ def collect_path(obs: Observation[Any]) -> None:
 
         total = lidar.count()
         print("running PGO twopass map...")
-        keyframes = pgo_keyframes(lidar, on_frame=progress(total, "pgo pass 1 (optimizing)"))
+        loops: list[LoopClosure] = []
+        keyframes = pgo_keyframes(
+            lidar,
+            on_frame=progress(total, "pgo pass 1 (optimizing)"),
+            loop_closures_out=loops,
+        )
         corrections = keyframes_to_corrections(keyframes)
         interp = make_interpolator(corrections)
 
@@ -167,6 +173,24 @@ def collect_path(obs: Observation[Any]) -> None:
                 rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]),
                 static=True,
             )
+            rr.log(
+                "world/pgo_map/keyframes",
+                rr.Points3D(positions=pgo_path, colors=[[255, 200, 0]], radii=[0.05]),
+                static=True,
+            )
+        if pgo and loops:
+            loop_strips = [
+                [
+                    (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z),
+                    (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z),
+                ]
+                for lc in loops
+            ]
+            rr.log(
+                "world/pgo_map/loop_closures",
+                rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.02]),
+                static=True,
+            )
 
     if export and pgo_map is not None:
         from pathlib import Path

From ebdf1ec6315d6f17a0543041b9fc7a72b218cc58 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 21:58:14 +0800
Subject: [PATCH 11/36] pgo vis fixed

---
 dimos/utils/cli/map.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 5bc4d29c52..440d824111 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -167,28 +167,38 @@ def collect_path(obs: Observation[Any]) -> None:
             )
         if pgo_map is not None:
             rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True)
+        STEM_HEIGHT = 2.0  # lift pose-graph viz above the map for legibility
         if pgo_path:
             rr.log(
                 "world/pgo_map/path",
                 rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]),
                 static=True,
             )
+            hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path]
             rr.log(
                 "world/pgo_map/keyframes",
-                rr.Points3D(positions=pgo_path, colors=[[255, 200, 0]], radii=[0.05]),
+                rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.05]),
                 static=True,
             )
         if pgo and loops:
             loop_strips = [
                 [
-                    (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z),
-                    (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z),
+                    (
+                        lc.source.translation.x,
+                        lc.source.translation.y,
+                        lc.source.translation.z + STEM_HEIGHT,
+                    ),
+                    (
+                        lc.target.translation.x,
+                        lc.target.translation.y,
+                        lc.target.translation.z + STEM_HEIGHT,
+                    ),
                 ]
                 for lc in loops
             ]
             rr.log(
                 "world/pgo_map/loop_closures",
-                rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.02]),
+                rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]),
                 static=True,
             )
 

From 71762c1c97788ca35514b5ea915d93300d9b9f1f Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 22:25:07 +0800
Subject: [PATCH 12/36] much faster pgo

---
 dimos/robot/cli/dimos.py | 16 +++++++--
 dimos/utils/cli/map.py   | 78 +++++++++++++++++++++++++++++-----------
 2 files changed, 72 insertions(+), 22 deletions(-)

diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py
index 5607c8b1ca..730509180b 100644
--- a/dimos/robot/cli/dimos.py
+++ b/dimos/robot/cli/dimos.py
@@ -693,7 +693,12 @@ def map_cmd(
         "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)"
     ),
     pgo: bool = typer.Option(
-        False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)"
+        False,
+        "--pgo",
+        help="Run pose graph optimization and rebuild from spatially-deduped frames",
+    ),
+    pgo_tol: float = typer.Option(
+        0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)"
     ),
     block_count: int = typer.Option(
         2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)"
@@ -701,7 +706,12 @@ def map_cmd(
     export: bool = typer.Option(
         False,
         "--export",
-        help="Export PGO twopass map to ./<dataset>.pc2.lcm in cwd (implies --pgo)",
+        help="Export PGO map to ./<dataset>.pc2.lcm in cwd (implies --pgo)",
+    ),
+    full_pgo: bool = typer.Option(
+        False,
+        "--full-pgo",
+        help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)",
     ),
     no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"),
 ) -> None:
@@ -713,8 +723,10 @@ def map_cmd(
         voxel=voxel,
         device=device,
         pgo=pgo,
+        pgo_tol=pgo_tol,
         block_count=block_count,
         export=export,
+        full_pgo=full_pgo,
         no_gui=no_gui,
     )
 
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 440d824111..8b23df1e25 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -65,7 +65,12 @@ def main(
         "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)"
     ),
     pgo: bool = typer.Option(
-        False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)"
+        False,
+        "--pgo",
+        help="Run pose graph optimization and rebuild from spatially-deduped frames",
+    ),
+    pgo_tol: float = typer.Option(
+        0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)"
     ),
     block_count: int = typer.Option(
         2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)"
@@ -73,12 +78,17 @@ def main(
     export: bool = typer.Option(
         False,
         "--export",
-        help="Export PGO twopass map to ./<dataset>.pc2.lcm in cwd (implies --pgo)",
+        help="Export PGO map to ./<dataset>.pc2.lcm in cwd (implies --pgo)",
+    ),
+    full_pgo: bool = typer.Option(
+        False,
+        "--full-pgo",
+        help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)",
     ),
     no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"),
 ) -> None:
     db_path = resolve_named_path(dataset, ".db")
-    if export:
+    if export or full_pgo:
         pgo = True
 
     store = SqliteStore(path=db_path)
@@ -99,9 +109,6 @@ def collect_path(obs: Observation[Any]) -> None:
     pgo_map = None
     pgo_path: list[tuple[float, float, float]] = []
     if pgo:
-        import numpy as np
-        from scipy.spatial.transform import Rotation
-
         from dimos.mapping.relocalization.pgo import (
             LoopClosure,
             keyframes_to_corrections,
@@ -109,7 +116,6 @@ def collect_path(obs: Observation[Any]) -> None:
             pgo_keyframes,
         )
         from dimos.mapping.voxels import VoxelGrid
-        from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
 
         total = lidar.count()
         print("running PGO twopass map...")
@@ -126,27 +132,53 @@ def collect_path(obs: Observation[Any]) -> None:
             kf_t = kf_obs.data.optimized.translation
             pgo_path.append((kf_t.x, kf_t.y, kf_t.z))
 
-        pass2_pb = progress(total, "pgo pass 2 (rebuilding)")
+        # Canonical PGO rebuild: bucket frames by spatial cell using the raw
+        # odom pose, keep the latest per cell, transform with the interpolated
+        # correction. obs.data is not touched in the dedup loop so it stays
+        # cheap (no pointcloud loading).
+        seen: dict[tuple[int, int, int], Any] = {}
+        for obs in lidar:
+            if obs.pose is None:
+                continue
+            if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0:
+                continue
+            cell = (
+                int(obs.pose[0] / pgo_tol),
+                int(obs.pose[1] / pgo_tol),
+                int(obs.pose[2] / pgo_tol),
+            )
+            seen[cell] = obs
+
+        n_kept = len(seen)
+        pct = 100 * n_kept / total if total else 0
+        print(f"pgo rebuild: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m")
+
+        pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)")
         grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
         try:
-            for obs in lidar:
+            for obs in seen.values():
                 pass2_pb(obs)
-                if obs.pose is None:
+                if len(obs.data) == 0:
                     continue
-                pts, _ = obs.data.as_numpy()
-                if len(pts) == 0:
-                    continue
-                tf = interp(obs.ts)
-                R = Rotation.from_quat(
-                    [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w]
-                ).as_matrix()
-                t = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
-                corrected = (R @ pts[:, :3].T).T + t
-                grid.add_frame(PointCloud2.from_numpy(corrected.astype(np.float32)))
+                grid.add_frame(obs.data.transform(interp(obs.ts)))
             pgo_map = grid.get_global_pointcloud2()
         finally:
             grid.dispose()
 
+    full_pgo_map = None
+    if full_pgo:
+        full_pb = progress(total, "full pgo (rebuilding)")
+        full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
+        try:
+            for obs in lidar:
+                full_pb(obs)
+                if obs.pose is None or len(obs.data) == 0:
+                    continue
+                full_grid.add_frame(obs.data.transform(interp(obs.ts)))
+            full_pgo_map = full_grid.get_global_pointcloud2()
+        finally:
+            full_grid.dispose()
+
     global_map = (
         lidar.tap(collect_path)
         .transform(VoxelMapTransformer(voxel_size=voxel, device=device))
@@ -167,6 +199,12 @@ def collect_path(obs: Observation[Any]) -> None:
             )
         if pgo_map is not None:
             rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True)
+        if full_pgo_map is not None:
+            rr.log(
+                "world/full_pgo_map/pointcloud",
+                full_pgo_map.to_rerun(size=voxel),
+                static=True,
+            )
         STEM_HEIGHT = 2.0  # lift pose-graph viz above the map for legibility
         if pgo_path:
             rr.log(

From d1218361267c65733f7cfbae869af9ea693cb60f Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Sun, 24 May 2026 22:46:17 +0800
Subject: [PATCH 13/36] PointCloud2: drop has_colors() side effect from
 transform

The colors-copy block in `PointCloud2.transform` calls `self.pointcloud`
to check `has_colors()`, which forces a tensor->legacy conversion on
every invocation. With `pgo --full-pgo` rebuilding from hundreds of
lidar frames, that hidden allocation dominates the per-frame cost. The
colors path was lidar-irrelevant (lidar clouds have no colors anyway)
and the feature it was meant to support never landed; remove it so
transform() stays a clean numpy round-trip.
---
 dimos/msgs/sensor_msgs/PointCloud2.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py
index 598cb0b094..570d33f772 100644
--- a/dimos/msgs/sensor_msgs/PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/PointCloud2.py
@@ -373,10 +373,6 @@ def transform(self, tf: Transform) -> PointCloud2:
         new_pcd = o3d.geometry.PointCloud()
         new_pcd.points = o3d.utility.Vector3dVector(transformed_xyz)
 
-        # Copy colors if available
-        if self.pointcloud.has_colors():
-            new_pcd.colors = self.pointcloud.colors
-
         return PointCloud2(
             pointcloud=new_pcd,
             frame_id=tf.frame_id,

From 626a4fc854bb8bbcb97c053747676b1c9fd41459 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 11:56:57 +0800
Subject: [PATCH 14/36] ci: install gtsam-extended in tests group so import
 gtsam works

---
 pyproject.toml | 2 +-
 uv.lock        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1125d433f3..a6e32f409e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -402,7 +402,7 @@ tests = [
 
     # Deps the lint job swaps for stubs (`stubs/<pkg>/` or `types-*`):
     # the real packages are needed at test time, stubs cover mypy.
-    "dimos[apriltag,psql,drone,cpu]",
+    "dimos[apriltag,mapping,psql,drone,cpu]",
     "mujoco>=3.3.4",
     "pygame>=2.6.1",
     "python-can>=4",
diff --git a/uv.lock b/uv.lock
index 7d50e3d818..8039736ee7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2271,7 +2271,7 @@ project-deps = [
 ]
 tests = [
     { name = "coverage" },
-    { name = "dimos", extra = ["apriltag", "cpu", "drone", "psql", "visualization", "web"] },
+    { name = "dimos", extra = ["apriltag", "cpu", "drone", "mapping", "psql", "visualization", "web"] },
     { name = "gdown" },
     { name = "googlemaps" },
     { name = "hydra-core" },
@@ -2312,7 +2312,7 @@ tests = [
 ]
 tests-self-hosted = [
     { name = "coverage" },
-    { name = "dimos", extra = ["agents", "apriltag", "cpu", "drone", "manipulation", "misc", "perception", "psql", "sim", "unitree", "visualization", "web"] },
+    { name = "dimos", extra = ["agents", "apriltag", "cpu", "drone", "manipulation", "mapping", "misc", "perception", "psql", "sim", "unitree", "visualization", "web"] },
     { name = "gdown" },
     { name = "googlemaps" },
     { name = "hydra-core" },
@@ -2558,7 +2558,7 @@ project-deps = [
 ]
 tests = [
     { name = "coverage", specifier = ">=7.0" },
-    { name = "dimos", extras = ["apriltag", "psql", "drone", "cpu"] },
+    { name = "dimos", extras = ["apriltag", "mapping", "psql", "drone", "cpu"] },
     { name = "dimos", extras = ["web", "visualization"] },
     { name = "gdown", specifier = "==6.0.0" },
     { name = "googlemaps", specifier = ">=4.10.0" },
@@ -2601,7 +2601,7 @@ tests = [
 tests-self-hosted = [
     { name = "coverage", specifier = ">=7.0" },
     { name = "dimos", extras = ["agents", "perception", "manipulation", "sim", "unitree", "misc"] },
-    { name = "dimos", extras = ["apriltag", "psql", "drone", "cpu"] },
+    { name = "dimos", extras = ["apriltag", "mapping", "psql", "drone", "cpu"] },
     { name = "dimos", extras = ["web", "visualization"] },
     { name = "gdown", specifier = "==6.0.0" },
     { name = "googlemaps", specifier = ">=4.10.0" },

From 2f46b38443c180d711f89a24135cd041844be072 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 15:05:07 +0800
Subject: [PATCH 15/36] markers on dimos map

---
 dimos/msgs/sensor_msgs/PointCloud2.py         |   9 +-
 .../detection/type/detection3d/marker.py      |  42 ++++
 .../perception/fiducial/marker_transformer.py | 176 +++++++++++++++
 dimos/robot/cli/dimos.py                      |  10 +
 dimos/utils/cli/map.py                        | 207 +++++++++++++-----
 5 files changed, 388 insertions(+), 56 deletions(-)
 create mode 100644 dimos/perception/detection/type/detection3d/marker.py
 create mode 100644 dimos/perception/fiducial/marker_transformer.py

diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py
index 570d33f772..add056b1a3 100644
--- a/dimos/msgs/sensor_msgs/PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/PointCloud2.py
@@ -686,7 +686,6 @@ def to_rerun(
         voxel_size: float = 0.05,
         colors: list[int] | None = None,
         mode: str = "spheres",
-        size: float | None = None,
         fill_mode: str = "solid",
         bottom_cutoff: float | None = None,
         **kwargs: object,
@@ -699,7 +698,6 @@ def to_rerun(
                 If None, uses height-based turbo colormap via class_ids
                 (requires register_colormap_annotation() called once).
             mode: "points" for raw points, "boxes" for cubes (default), or "spheres" for sized spheres
-            size: Box size for mode="boxes" (e.g., voxel_size). Defaults to radii*2.
             fill_mode: Fill mode for boxes - "solid", "majorwireframe", or "densewireframe"
             **kwargs: Additional args (ignored for compatibility)
 
@@ -732,13 +730,10 @@ def to_rerun(
 
         if mode == "points":
             return rr.Points3D(
-                positions=points,
-                colors=point_colors,
-                class_ids=class_ids,
+                positions=points, colors=point_colors, class_ids=class_ids, sizes=voxel_size
             )
         elif mode == "boxes":
-            box_size = size if size is not None else voxel_size
-            half = box_size / 2
+            half = voxel_size / 2
             return rr.Boxes3D(
                 centers=points,
                 half_sizes=[half, half, half],
diff --git a/dimos/perception/detection/type/detection3d/marker.py b/dimos/perception/detection/type/detection3d/marker.py
new file mode 100644
index 0000000000..a162814d6f
--- /dev/null
+++ b/dimos/perception/detection/type/detection3d/marker.py
@@ -0,0 +1,42 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+import numpy as np
+
+from dimos.perception.detection.type.detection3d.bbox import Detection3DBBox
+
+
+@dataclass
+class Detection3DMarker(Detection3DBBox):
+    """Fiducial marker (ArUco / AprilTag) detection with a world-frame pose.
+
+    ``bbox`` is the axis-aligned 2D bbox around the four detected corners so
+    crop/draw helpers from ``Detection2DBBox`` work as-is. ``center`` and
+    ``orientation`` carry the marker pose in ``frame_id`` (typically
+    ``"world"``); ``transform`` is the camera-in-world transform used to
+    compose it.
+    """
+
+    marker_id: int = -1
+    corners_px: np.ndarray = field(default_factory=lambda: np.zeros((4, 2), dtype=np.float32))
+    dictionary: str = ""
+
+    def to_repr_dict(self) -> dict[str, Any]:
+        parent = super().to_repr_dict()
+        return {**parent, "marker_id": str(self.marker_id), "dict": self.dictionary}
diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py
new file mode 100644
index 0000000000..a3839ed9bb
--- /dev/null
+++ b/dimos/perception/fiducial/marker_transformer.py
@@ -0,0 +1,176 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ArUco / AprilTag detection as a memory2 transformer.
+
+Wraps the pure helpers in :mod:`dimos.perception.fiducial.marker_tf_module`
+and emits one :class:`Detection3DMarker` observation per detected marker, with
+``.pose`` composed into world frame from the upstream observation's
+camera-in-world pose. The companion module :class:`MarkerTfModule` remains
+the right choice for live TF publication; this transformer is for offline /
+mem2-stream composition.
+
+Skips frames where the upstream observation has no ``.pose`` (debug log):
+without a camera-in-world pose, we can't honor the "always world-frame"
+output contract.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from dimos.memory2.transform import Transformer
+from dimos.msgs.geometry_msgs.Quaternion import Quaternion
+from dimos.msgs.geometry_msgs.Transform import Transform
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.perception.detection.type.detection3d.marker import Detection3DMarker
+from dimos.perception.fiducial.marker_tf_module import (
+    camera_info_to_cv_matrices,
+    create_aruco_detector,
+    estimate_marker_pose,
+    rvec_tvec_to_transform,
+)
+from dimos.utils.logging_config import setup_logger
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+    from dimos.memory2.type.observation import Observation
+
+logger = setup_logger()
+
+
+def _pose_tuple_to_transform(
+    pose: tuple[float, float, float, float, float, float, float],
+    *,
+    frame_id: str,
+    child_frame_id: str,
+    ts: float,
+) -> Transform:
+    x, y, z, qx, qy, qz, qw = pose
+    return Transform(
+        translation=Vector3(x, y, z),
+        rotation=Quaternion(qx, qy, qz, qw),
+        frame_id=frame_id,
+        child_frame_id=child_frame_id,
+        ts=ts,
+    )
+
+
+class DetectMarkers(Transformer[Image, Detection3DMarker]):
+    """Detect fiducial markers and emit one world-pose observation per marker."""
+
+    def __init__(
+        self,
+        camera_info: CameraInfo,
+        marker_length_m: float,
+        aruco_dictionary: str = "DICT_APRILTAG_36h11",
+        world_frame: str = "world",
+    ) -> None:
+        if marker_length_m <= 0:
+            raise ValueError(f"marker_length_m must be > 0, got {marker_length_m}")
+        self.camera_info = camera_info
+        self.marker_length_m = marker_length_m
+        self.aruco_dictionary = aruco_dictionary
+        self.world_frame = world_frame
+        self._detector = create_aruco_detector(aruco_dictionary)
+        self._cam_mtx, self._dist = camera_info_to_cv_matrices(camera_info)
+
+    def __call__(
+        self, upstream: Iterator[Observation[Image]]
+    ) -> Iterator[Observation[Detection3DMarker]]:
+        info = self.camera_info
+        marker_size = Vector3(self.marker_length_m, self.marker_length_m, 0.0)
+
+        for obs in upstream:
+            if obs.pose is None:
+                logger.debug("DetectMarkers: obs %s has no .pose; skipping", obs.id)
+                continue
+
+            image = obs.data
+            if (
+                info.width
+                and info.height
+                and (image.width != info.width or image.height != info.height)
+            ):
+                logger.debug(
+                    "DetectMarkers: image %sx%s != CameraInfo %sx%s; skip",
+                    image.width,
+                    image.height,
+                    info.width,
+                    info.height,
+                )
+                continue
+
+            gray = image.to_grayscale().as_numpy()
+            corners, ids, _ = self._detector.detectMarkers(gray)
+            if ids is None or len(ids) == 0:
+                continue
+
+            t_world_optical = _pose_tuple_to_transform(
+                obs.pose,
+                frame_id=self.world_frame,
+                child_frame_id="optical",
+                ts=obs.ts,
+            )
+
+            for corner_set, mid_arr in zip(corners, ids, strict=True):
+                mid = int(mid_arr[0])
+                pose = estimate_marker_pose(
+                    corner_set,
+                    self.marker_length_m,
+                    self._cam_mtx,
+                    self._dist,
+                    distortion_model=info.distortion_model,
+                )
+                if pose is None:
+                    continue
+                rvec, tvec = pose
+                t_optical_marker = rvec_tvec_to_transform(
+                    rvec,
+                    tvec,
+                    frame_id="optical",
+                    child_frame_id=f"marker_{mid}",
+                    ts=obs.ts,
+                )
+                t_world_marker = t_world_optical + t_optical_marker
+
+                corners_2d = corner_set.reshape(4, 2).astype(np.float32)
+                xy_min = corners_2d.min(axis=0)
+                xy_max = corners_2d.max(axis=0)
+                bbox = (float(xy_min[0]), float(xy_min[1]), float(xy_max[0]), float(xy_max[1]))
+
+                det = Detection3DMarker(
+                    bbox=bbox,
+                    track_id=mid,
+                    class_id=mid,
+                    confidence=1.0,
+                    name=f"marker_{mid}",
+                    ts=obs.ts,
+                    image=image,
+                    center=t_world_marker.translation,
+                    size=marker_size,
+                    transform=t_world_optical,
+                    frame_id=self.world_frame,
+                    orientation=t_world_marker.rotation,
+                    marker_id=mid,
+                    corners_px=corners_2d,
+                    dictionary=self.aruco_dictionary,
+                )
+
+                yield obs.derive(data=det, pose=t_world_marker).tag(marker_id=mid)
diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py
index 730509180b..23147e9380 100644
--- a/dimos/robot/cli/dimos.py
+++ b/dimos/robot/cli/dimos.py
@@ -714,6 +714,14 @@ def map_cmd(
         help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)",
     ),
     no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"),
+    markers: bool = typer.Option(
+        False,
+        "--markers",
+        help="Detect AprilTag markers in color_image and overlay them in rerun",
+    ),
+    marker_size: float = typer.Option(
+        0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)"
+    ),
 ) -> None:
     """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun."""
     from dimos.utils.cli.map import main as map_main
@@ -728,6 +736,8 @@ def map_cmd(
         export=export,
         full_pgo=full_pgo,
         no_gui=no_gui,
+        markers=markers,
+        marker_size=marker_size,
     )
 
 
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 8b23df1e25..bea304a538 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -20,12 +20,22 @@
 import rerun.blueprint as rrb
 import typer
 
-from dimos.mapping.voxels import VoxelMapTransformer
+from dimos.mapping.voxels import VoxelGrid
 from dimos.memory2.store.sqlite import SqliteStore
+from dimos.memory2.transform import QualityWindow
 from dimos.memory2.type.observation import Observation
+from dimos.memory2.vis.color import Color
+from dimos.msgs.geometry_msgs.Quaternion import Quaternion
+from dimos.msgs.geometry_msgs.Transform import Transform
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.perception.fiducial.marker_transformer import DetectMarkers
+from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static
 from dimos.utils.data import resolve_named_path
 from dimos.visualization.rerun.init import rerun_init
 
+PATH_THICKNESS = 0.01
+
 
 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]:
     seen = 0
@@ -70,11 +80,11 @@ def main(
         help="Run pose graph optimization and rebuild from spatially-deduped frames",
     ),
     pgo_tol: float = typer.Option(
-        0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)"
-    ),
-    block_count: int = typer.Option(
-        2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)"
+        0.3,
+        "--pgo-tol",
+        help="Spatial dedup tolerance (meters); applies to both raw and --pgo maps",
     ),
+    block_count: int = typer.Option(2_000_000, "--block-count", help="VoxelBlockGrid capacity"),
     export: bool = typer.Option(
         False,
         "--export",
@@ -86,6 +96,14 @@ def main(
         help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)",
     ),
     no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"),
+    markers: bool = typer.Option(
+        False,
+        "--markers",
+        help="Detect AprilTag markers in color_image and overlay them in rerun",
+    ),
+    marker_size: float = typer.Option(
+        0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)"
+    ),
 ) -> None:
     db_path = resolve_named_path(dataset, ".db")
     if export or full_pgo:
@@ -96,18 +114,39 @@ def main(
 
     print(lidar.summary())
 
-    path: list[tuple[float, float, float]] = []
+    total = lidar.count()
 
-    def collect_path(obs: Observation[Any]) -> None:
+    # Spatial dedup: bucket frames by 3D cell using the raw pose, keep the
+    # latest per cell. Shared by raw and PGO rebuilds. Doesn't touch obs.data
+    # so it stays cheap (no pointcloud loading).
+    seen: dict[tuple[int, int, int], Observation[Any]] = {}
+    for obs in lidar:
         if obs.pose is None:
-            return
-        # Reject placeholder poses at the world origin (translation = 0,0,0).
+            continue
+        # Reject placeholder poses at the world origin.
         if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0:
-            return
-        path.append((obs.pose[0], obs.pose[1], obs.pose[2]))
+            continue
+        cell = (
+            int(obs.pose[0] / pgo_tol),
+            int(obs.pose[1] / pgo_tol),
+            int(obs.pose[2] / pgo_tol),
+        )
+        seen[cell] = obs
+
+    n_kept = len(seen)
+    pct = 100 * n_kept / total if total else 0
+    print(f"dedup: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m")
+
+    # Dict insertion order = lidar iteration order = chronological.
+    # `seen` only contains entries with non-None poses (filtered above).
+    path: list[tuple[float, float, float]] = [
+        (obs.pose[0], obs.pose[1], obs.pose[2]) for obs in seen.values() if obs.pose is not None
+    ]
 
     pgo_map = None
     pgo_path: list[tuple[float, float, float]] = []
+    loops: list[Any] = []
+    interp: Any | None = None
     if pgo:
         from dimos.mapping.relocalization.pgo import (
             LoopClosure,
@@ -115,16 +154,15 @@ def collect_path(obs: Observation[Any]) -> None:
             make_interpolator,
             pgo_keyframes,
         )
-        from dimos.mapping.voxels import VoxelGrid
 
-        total = lidar.count()
         print("running PGO twopass map...")
-        loops: list[LoopClosure] = []
+        pgo_loops: list[LoopClosure] = []
         keyframes = pgo_keyframes(
             lidar,
             on_frame=progress(total, "pgo pass 1 (optimizing)"),
-            loop_closures_out=loops,
+            loop_closures_out=pgo_loops,
         )
+        loops = list(pgo_loops)
         corrections = keyframes_to_corrections(keyframes)
         interp = make_interpolator(corrections)
 
@@ -132,27 +170,6 @@ def collect_path(obs: Observation[Any]) -> None:
             kf_t = kf_obs.data.optimized.translation
             pgo_path.append((kf_t.x, kf_t.y, kf_t.z))
 
-        # Canonical PGO rebuild: bucket frames by spatial cell using the raw
-        # odom pose, keep the latest per cell, transform with the interpolated
-        # correction. obs.data is not touched in the dedup loop so it stays
-        # cheap (no pointcloud loading).
-        seen: dict[tuple[int, int, int], Any] = {}
-        for obs in lidar:
-            if obs.pose is None:
-                continue
-            if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0:
-                continue
-            cell = (
-                int(obs.pose[0] / pgo_tol),
-                int(obs.pose[1] / pgo_tol),
-                int(obs.pose[2] / pgo_tol),
-            )
-            seen[cell] = obs
-
-        n_kept = len(seen)
-        pct = 100 * n_kept / total if total else 0
-        print(f"pgo rebuild: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m")
-
         pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)")
         grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
         try:
@@ -167,6 +184,7 @@ def collect_path(obs: Observation[Any]) -> None:
 
     full_pgo_map = None
     if full_pgo:
+        assert interp is not None
         full_pb = progress(total, "full pgo (rebuilding)")
         full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
         try:
@@ -179,43 +197,83 @@ def collect_path(obs: Observation[Any]) -> None:
         finally:
             full_grid.dispose()
 
-    global_map = (
-        lidar.tap(collect_path)
-        .transform(VoxelMapTransformer(voxel_size=voxel, device=device))
-        .tap(progress(lidar.count(), "reconstructing global map"))
-        .last()
-        .data
-    )
+    # Raw map: same dedup'd frames, no PGO correction.
+    raw_pb = progress(n_kept, "reconstructing global map")
+    raw_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
+    try:
+        for obs in seen.values():
+            raw_pb(obs)
+            if len(obs.data) == 0:
+                continue
+            raw_grid.add_frame(obs.data)
+        global_map = raw_grid.get_global_pointcloud2()
+    finally:
+        raw_grid.dispose()
+
+    marker_dets: list[Observation[Any]] = []
+    if markers:
+        color_image = store.stream("color_image", Image)
+
+        def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]:
+            # obs.pose is base_link-in-world; DetectMarkers wants optical-in-world.
+            if obs.pose is None:
+                return obs.derive(data=obs.data)
+            x, y, z, qx, qy, qz, qw = obs.pose
+            t_world_base = Transform(
+                translation=Vector3(x, y, z),
+                rotation=Quaternion(qx, qy, qz, qw),
+                frame_id="world",
+                child_frame_id="base_link",
+                ts=obs.ts,
+            )
+            return obs.derive(data=obs.data, pose=t_world_base + BASE_TO_OPTICAL)
+
+        xf = DetectMarkers(
+            camera_info=_camera_info_static(),
+            marker_length_m=marker_size,
+        )
+        # 2Hz quality-gated: keep only the sharpest frame per 0.5s window.
+        marker_dets = (
+            color_image.tap(progress(color_image.count(), "filtering frames"))
+            .transform(QualityWindow(lambda img: img.sharpness, window=0.5))
+            .map(_lift_pose_to_optical)
+            .transform(xf)
+            .to_list()
+        )
+        unique = sorted({obs.data.marker_id for obs in marker_dets})
+        print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}")
 
     if not no_gui:
         rerun_init("dimos map tool", spawn=True)
         rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world")))
-        rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True)
+        rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True)
         if path:
             rr.log(
                 "world/raw_map/path",
-                rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.05]),
+                rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[PATH_THICKNESS]),
                 static=True,
             )
         if pgo_map is not None:
-            rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True)
+            rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(voxel_size=voxel / 2), static=True)
         if full_pgo_map is not None:
             rr.log(
                 "world/full_pgo_map/pointcloud",
-                full_pgo_map.to_rerun(size=voxel),
+                full_pgo_map.to_rerun(voxel_size=voxel / 2),
                 static=True,
             )
         STEM_HEIGHT = 2.0  # lift pose-graph viz above the map for legibility
         if pgo_path:
             rr.log(
                 "world/pgo_map/path",
-                rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]),
+                rr.LineStrips3D(
+                    strips=[pgo_path], colors=[[255, 255, 255]], radii=[PATH_THICKNESS]
+                ),
                 static=True,
             )
             hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path]
             rr.log(
                 "world/pgo_map/keyframes",
-                rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.05]),
+                rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]),
                 static=True,
             )
         if pgo and loops:
@@ -239,6 +297,54 @@ def collect_path(obs: Observation[Any]) -> None:
                 rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]),
                 static=True,
             )
+        if marker_dets:
+            half = marker_size / 2.0
+            n = len(marker_dets)
+            fill_half = [(half, half, 0.005)] * n
+            # Outline sits just outside the fill so both stay visible.
+            outline_bump = marker_size * 0.05
+            outline_half = [(half + outline_bump, half + outline_bump, 0.006)] * n
+            centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets]
+            quaternions = [
+                (
+                    d.data.orientation.x,
+                    d.data.orientation.y,
+                    d.data.orientation.z,
+                    d.data.orientation.w,
+                )
+                for d in marker_dets
+            ]
+            unique_ids = sorted({d.data.marker_id for d in marker_dets})
+            id_to_color = {
+                mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8()
+                for i, mid in enumerate(unique_ids)
+            }
+            colors = [id_to_color[d.data.marker_id] for d in marker_dets]
+            labels = [f"id={d.data.marker_id}" for d in marker_dets]
+            rr.log(
+                "world/markers/fill",
+                rr.Boxes3D(
+                    centers=centers,
+                    half_sizes=fill_half,
+                    quaternions=quaternions,
+                    colors=colors,
+                    fill_mode=rr.components.FillMode.Solid,
+                    labels=labels,
+                ),
+                static=True,
+            )
+            rr.log(
+                "world/markers/outline",
+                rr.Boxes3D(
+                    centers=centers,
+                    half_sizes=outline_half,
+                    quaternions=quaternions,
+                    colors=[(255, 255, 255)] * n,
+                    fill_mode=rr.components.FillMode.MajorWireframe,
+                    radii=0.002,
+                ),
+                static=True,
+            )
 
     if export and pgo_map is not None:
         from pathlib import Path
@@ -255,3 +361,6 @@ def collect_path(obs: Observation[Any]) -> None:
 
 if __name__ == "__main__":
     typer.run(main)
+    typer.run(main)
+    typer.run(main)
+    typer.run(main)

From 82e2dab13fad12eee902c86575fd61734b19b018 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 15:12:13 +0800
Subject: [PATCH 16/36] map detects markers correctly

---
 dimos/utils/cli/map.py | 93 ++++++++++++++++++++++++++++++------------
 1 file changed, 67 insertions(+), 26 deletions(-)

diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index bea304a538..6fb129a4bd 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -25,16 +25,14 @@
 from dimos.memory2.transform import QualityWindow
 from dimos.memory2.type.observation import Observation
 from dimos.memory2.vis.color import Color
-from dimos.msgs.geometry_msgs.Quaternion import Quaternion
 from dimos.msgs.geometry_msgs.Transform import Transform
-from dimos.msgs.geometry_msgs.Vector3 import Vector3
 from dimos.msgs.sensor_msgs.Image import Image
 from dimos.perception.fiducial.marker_transformer import DetectMarkers
-from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static
+from dimos.robot.unitree.go2.connection import _camera_info_static
 from dimos.utils.data import resolve_named_path
 from dimos.visualization.rerun.init import rerun_init
 
-PATH_THICKNESS = 0.01
+PATH_THICKNESS = 0.005
 
 
 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]:
@@ -212,31 +210,19 @@ def main(
 
     marker_dets: list[Observation[Any]] = []
     if markers:
+        # Image observations in dimos recordings are stamped with
+        # frame_id="camera_optical", so obs.pose is already optical-in-world
+        # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm).
+        # No mount composition needed.
         color_image = store.stream("color_image", Image)
-
-        def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]:
-            # obs.pose is base_link-in-world; DetectMarkers wants optical-in-world.
-            if obs.pose is None:
-                return obs.derive(data=obs.data)
-            x, y, z, qx, qy, qz, qw = obs.pose
-            t_world_base = Transform(
-                translation=Vector3(x, y, z),
-                rotation=Quaternion(qx, qy, qz, qw),
-                frame_id="world",
-                child_frame_id="base_link",
-                ts=obs.ts,
-            )
-            return obs.derive(data=obs.data, pose=t_world_base + BASE_TO_OPTICAL)
-
         xf = DetectMarkers(
             camera_info=_camera_info_static(),
             marker_length_m=marker_size,
         )
         # 2Hz quality-gated: keep only the sharpest frame per 0.5s window.
         marker_dets = (
-            color_image.tap(progress(color_image.count(), "filtering frames"))
+            color_image.tap(progress(color_image.count(), "detecting markers"))
             .transform(QualityWindow(lambda img: img.sharpness, window=0.5))
-            .map(_lift_pose_to_optical)
             .transform(xf)
             .to_list()
         )
@@ -272,7 +258,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]:
             )
             hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path]
             rr.log(
-                "world/pgo_map/keyframes",
+                "world/pgo_map/pgo/keyframes",
                 rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]),
                 static=True,
             )
@@ -293,8 +279,8 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]:
                 for lc in loops
             ]
             rr.log(
-                "world/pgo_map/loop_closures",
-                rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]),
+                "world/pgo_map/pgo/loop_closures",
+                rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.025]),
                 static=True,
             )
         if marker_dets:
@@ -322,7 +308,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]:
             colors = [id_to_color[d.data.marker_id] for d in marker_dets]
             labels = [f"id={d.data.marker_id}" for d in marker_dets]
             rr.log(
-                "world/markers/fill",
+                "world/raw_map/markers/fill",
                 rr.Boxes3D(
                     centers=centers,
                     half_sizes=fill_half,
@@ -334,7 +320,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]:
                 static=True,
             )
             rr.log(
-                "world/markers/outline",
+                "world/raw_map/markers/outline",
                 rr.Boxes3D(
                     centers=centers,
                     half_sizes=outline_half,
@@ -346,6 +332,61 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]:
                 static=True,
             )
 
+            if interp is not None:
+                # PGO-corrected marker poses. interp(ts) maps raw_world →
+                # pgo_world; composing with each raw marker transform lifts
+                # it into the corrected frame so it lines up with pgo_map.
+                pgo_centers: list[tuple[float, float, float]] = []
+                pgo_quaternions: list[tuple[float, float, float, float]] = []
+                for d in marker_dets:
+                    raw_tf = Transform(
+                        translation=d.data.center,
+                        rotation=d.data.orientation,
+                        frame_id="world",
+                        child_frame_id=f"marker_{d.data.marker_id}",
+                        ts=d.ts,
+                    )
+                    corrected = interp(d.ts) + raw_tf
+                    pgo_centers.append(
+                        (
+                            corrected.translation.x,
+                            corrected.translation.y,
+                            corrected.translation.z,
+                        )
+                    )
+                    pgo_quaternions.append(
+                        (
+                            corrected.rotation.x,
+                            corrected.rotation.y,
+                            corrected.rotation.z,
+                            corrected.rotation.w,
+                        )
+                    )
+                rr.log(
+                    "world/pgo_map/markers/fill",
+                    rr.Boxes3D(
+                        centers=pgo_centers,
+                        half_sizes=fill_half,
+                        quaternions=pgo_quaternions,
+                        colors=colors,
+                        fill_mode=rr.components.FillMode.Solid,
+                        labels=labels,
+                    ),
+                    static=True,
+                )
+                rr.log(
+                    "world/pgo_map/markers/outline",
+                    rr.Boxes3D(
+                        centers=pgo_centers,
+                        half_sizes=outline_half,
+                        quaternions=pgo_quaternions,
+                        colors=[(255, 255, 255)] * n,
+                        fill_mode=rr.components.FillMode.MajorWireframe,
+                        radii=0.002,
+                    ),
+                    static=True,
+                )
+
     if export and pgo_map is not None:
         from pathlib import Path
 

From b98b506c31fe35eee1c50759c8205f26beb538ab Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 15:29:36 +0800
Subject: [PATCH 17/36] maptool speed limits

---
 dimos/memory2/transform.py | 61 ++++++++++++++++++++++++++++++++++++++
 dimos/robot/cli/dimos.py   | 12 ++++++++
 dimos/utils/cli/map.py     | 36 ++++++++++++++++------
 3 files changed, 100 insertions(+), 9 deletions(-)

diff --git a/dimos/memory2/transform.py b/dimos/memory2/transform.py
index 6894807627..a79ccf36b8 100644
--- a/dimos/memory2/transform.py
+++ b/dimos/memory2/transform.py
@@ -191,6 +191,67 @@ def _speed(upstream: Iterator[Observation[Any]]) -> Iterator[Observation[float]]
     return FnIterTransformer(_speed)
 
 
+class SpeedLimit(Transformer[T, T]):
+    """Pass through observations whose linear (and optionally angular) speed is low.
+
+    Linear: ``||Δtranslation|| / Δt`` in m/s, must be ≤ ``max_mps``.
+    Angular: rotation-angle-between-quaternions / Δt in deg/s, must be
+    ≤ ``max_dps`` when set.
+
+    Both metrics are computed between consecutive observations in stream
+    order. Use to drop motion-blurry frames from fiducial / OCR /
+    SLAM-frontend pipelines.
+
+    First observation is dropped (no reference). Observations with no
+    ``.pose`` are skipped. ``Δt ≤ 0`` (duplicate / non-monotonic
+    timestamps) is treated as unknown and dropped.
+
+    Note: the effective sampling interval is whatever upstream provides.
+    Chain after ``QualityWindow(window=Δt)`` to measure motion over a
+    fixed time window instead of per-frame.
+    """
+
+    def __init__(self, max_mps: float, max_dps: float | None = None) -> None:
+        if max_mps <= 0:
+            raise ValueError(f"max_mps must be > 0, got {max_mps}")
+        if max_dps is not None and max_dps <= 0:
+            raise ValueError(f"max_dps must be > 0 when set, got {max_dps}")
+        self.max_mps = max_mps
+        self.max_dps = max_dps
+
+    def __call__(self, upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
+        prev: Observation[T] | None = None
+        max_mps = self.max_mps
+        max_dps = self.max_dps
+        max_rps = math.radians(max_dps) if max_dps is not None else None
+        for obs in upstream:
+            if obs.pose is None:
+                continue
+            if prev is not None and prev.pose is not None:
+                dt = obs.ts - prev.ts
+                if dt > 0:
+                    dx = obs.pose[0] - prev.pose[0]
+                    dy = obs.pose[1] - prev.pose[1]
+                    dz = obs.pose[2] - prev.pose[2]
+                    v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt
+                    ok = v <= max_mps
+                    if ok and max_rps is not None:
+                        # rotation angle between two unit quaternions.
+                        # |q · q'| collapses the double-cover sign ambiguity;
+                        # clamp guards against numerical drift past 1.
+                        dot = (
+                            obs.pose[3] * prev.pose[3]
+                            + obs.pose[4] * prev.pose[4]
+                            + obs.pose[5] * prev.pose[5]
+                            + obs.pose[6] * prev.pose[6]
+                        )
+                        angle = 2.0 * math.acos(min(1.0, abs(dot)))
+                        ok = (angle / dt) <= max_rps
+                    if ok:
+                        yield obs
+            prev = obs
+
+
 def smooth(window: int) -> FnIterTransformer[float, float]:
     """Sliding window average over obs.data (must be numeric)."""
 
diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py
index 23147e9380..d26d28d771 100644
--- a/dimos/robot/cli/dimos.py
+++ b/dimos/robot/cli/dimos.py
@@ -722,6 +722,16 @@ def map_cmd(
     marker_size: float = typer.Option(
         0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)"
     ),
+    marker_max_speed: float = typer.Option(
+        0.05,
+        "--marker-max-speed",
+        help="Skip frames where robot is moving faster than this (m/s); 0 disables",
+    ),
+    marker_max_rot_rate: float = typer.Option(
+        15.0,
+        "--marker-max-rot-rate",
+        help="Skip frames where robot is rotating faster than this (deg/s); 0 disables",
+    ),
 ) -> None:
     """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun."""
     from dimos.utils.cli.map import main as map_main
@@ -738,6 +748,8 @@ def map_cmd(
         no_gui=no_gui,
         markers=markers,
         marker_size=marker_size,
+        marker_max_speed=marker_max_speed,
+        marker_max_rot_rate=marker_max_rot_rate,
     )
 
 
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 6fb129a4bd..a6cafccd09 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -22,7 +22,8 @@
 
 from dimos.mapping.voxels import VoxelGrid
 from dimos.memory2.store.sqlite import SqliteStore
-from dimos.memory2.transform import QualityWindow
+from dimos.memory2.stream import Stream
+from dimos.memory2.transform import QualityWindow, SpeedLimit
 from dimos.memory2.type.observation import Observation
 from dimos.memory2.vis.color import Color
 from dimos.msgs.geometry_msgs.Transform import Transform
@@ -32,7 +33,7 @@
 from dimos.utils.data import resolve_named_path
 from dimos.visualization.rerun.init import rerun_init
 
-PATH_THICKNESS = 0.005
+PATH_THICKNESS = 0.01
 
 
 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]:
@@ -102,6 +103,16 @@ def main(
     marker_size: float = typer.Option(
         0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)"
     ),
+    marker_max_speed: float = typer.Option(
+        0.3,
+        "--marker-max-speed",
+        help="Skip frames where robot is moving faster than this (m/s); 0 disables",
+    ),
+    marker_max_rot_rate: float = typer.Option(
+        30.0,
+        "--marker-max-rot-rate",
+        help="Skip frames where robot is rotating faster than this (deg/s); 0 disables",
+    ),
 ) -> None:
     db_path = resolve_named_path(dataset, ".db")
     if export or full_pgo:
@@ -219,13 +230,20 @@ def main(
             camera_info=_camera_info_static(),
             marker_length_m=marker_size,
         )
-        # 2Hz quality-gated: keep only the sharpest frame per 0.5s window.
-        marker_dets = (
-            color_image.tap(progress(color_image.count(), "detecting markers"))
-            .transform(QualityWindow(lambda img: img.sharpness, window=0.5))
-            .transform(xf)
-            .to_list()
-        )
+        # 2Hz quality-gated: keep only the sharpest frame per 0.5s window,
+        # then drop frames where the robot was moving (linear + rotational)
+        # faster than the limits — speed/rate are averaged across the window.
+        pipeline: Stream[Image] = color_image.tap(
+            progress(color_image.count(), "detecting markers")
+        ).transform(QualityWindow(lambda img: img.sharpness, window=0.5))
+        if marker_max_speed > 0:
+            pipeline = pipeline.transform(
+                SpeedLimit(
+                    max_mps=marker_max_speed,
+                    max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None,
+                )
+            )
+        marker_dets = pipeline.transform(xf).to_list()
         unique = sorted({obs.data.marker_id for obs in marker_dets})
         print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}")
 

From 4d5e9b66cbdea13b4df7690e52d48e0791bdd50b Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 16:11:19 +0800
Subject: [PATCH 18/36] markers_rrd renderer

---
 dimos/utils/cli/map.py         |  27 +++--
 dimos/utils/cli/markers_rrd.py | 187 +++++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+), 8 deletions(-)
 create mode 100644 dimos/utils/cli/markers_rrd.py

diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index a6cafccd09..554e1878d2 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -104,12 +104,12 @@ def main(
         0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)"
     ),
     marker_max_speed: float = typer.Option(
-        0.3,
+        0.1,
         "--marker-max-speed",
         help="Skip frames where robot is moving faster than this (m/s); 0 disables",
     ),
     marker_max_rot_rate: float = typer.Option(
-        30.0,
+        15,
         "--marker-max-rot-rate",
         help="Skip frames where robot is rotating faster than this (deg/s); 0 disables",
     ),
@@ -318,12 +318,23 @@ def main(
                 )
                 for d in marker_dets
             ]
-            unique_ids = sorted({d.data.marker_id for d in marker_dets})
-            id_to_color = {
-                mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8()
-                for i, mid in enumerate(unique_ids)
-            }
-            colors = [id_to_color[d.data.marker_id] for d in marker_dets]
+            # Color mode: turbo over detection time vs. tab10 over marker id.
+            COLOR_BY_TIME = True
+            if COLOR_BY_TIME:
+                ts_min = min(d.ts for d in marker_dets)
+                ts_max = max(d.ts for d in marker_dets)
+                ts_span = ts_max - ts_min if ts_max > ts_min else 1.0
+                colors = [
+                    Color.from_cmap("turbo", (d.ts - ts_min) / ts_span).rgb_u8()
+                    for d in marker_dets
+                ]
+            else:
+                unique_ids = sorted({d.data.marker_id for d in marker_dets})
+                id_to_color = {
+                    mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8()
+                    for i, mid in enumerate(unique_ids)
+                }
+                colors = [id_to_color[d.data.marker_id] for d in marker_dets]
             labels = [f"id={d.data.marker_id}" for d in marker_dets]
             rr.log(
                 "world/raw_map/markers/fill",
diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py
new file mode 100644
index 0000000000..3b67f9c2ea
--- /dev/null
+++ b/dimos/utils/cli/markers_rrd.py
@@ -0,0 +1,187 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Temporary: dump apriltag detector replay (.rrd) for reliability investigation.
+
+Walks a recorded SQLite dataset and writes an rrd containing:
+- per-image: camera pose + pinhole + image data
+- per-lidar:  base_link pose (a moving axis)
+- per detection: marker box in world frame, at the detection timestamp
+
+Usage:
+    uv run python -m dimos.utils.cli.markers_rrd hk_village1 --out hk.rrd
+    rerun hk.rrd
+
+Throwaway script next to ``map.py``; remove once the apriltag reliability work
+lands.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import rerun as rr
+import typer
+
+from dimos.memory2.store.sqlite import SqliteStore
+from dimos.memory2.stream import Stream
+from dimos.memory2.transform import QualityWindow, SpeedLimit
+from dimos.memory2.vis.color import Color
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
+from dimos.perception.fiducial.marker_transformer import DetectMarkers
+from dimos.robot.unitree.go2.connection import _camera_info_static
+from dimos.utils.data import resolve_named_path
+
+TIMELINE = "ts"
+
+
+def main(
+    dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"),
+    out: Path = typer.Option(..., "--out", help="Output .rrd path"),
+    marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"),
+    marker_max_speed: float = typer.Option(
+        0.15, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables"
+    ),
+    marker_max_rot_rate: float = typer.Option(
+        30.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables"
+    ),
+    quality_window: float = typer.Option(
+        0.25, "--quality-window", help="Sharpest-frame window for detection (s)"
+    ),
+) -> None:
+    db_path = resolve_named_path(dataset, ".db")
+    cam_info = _camera_info_static()
+
+    rr.init("dimos markers", recording_id=db_path.stem)
+    rr.save(str(out))
+
+    # Static pinhole on the camera entity; per-frame Transform3D goes on the
+    # same entity. Image is the child so it projects through the pinhole.
+    pinhole = cam_info.to_rerun()
+    assert not isinstance(pinhole, list)
+    rr.log("world/camera", pinhole, static=True)
+
+    store = SqliteStore(path=str(db_path))
+    with store:
+        color_image = store.stream("color_image", Image)
+        lidar = store.stream("lidar", PointCloud2)
+
+        # ---- pass 1: robot base pose over time (from lidar.pose) ----
+        for lidar_obs in lidar:
+            if lidar_obs.pose is None:
+                continue
+            rr.set_time(TIMELINE, timestamp=lidar_obs.ts)
+            x, y, z, qx, qy, qz, qw = lidar_obs.pose
+            rr.log(
+                "world/robot",
+                rr.Transform3D(
+                    translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw])
+                ),
+            )
+
+        # ---- pass 2: camera pose + image per color_image frame ----
+        n_img = color_image.count()
+        for i, img_obs in enumerate(color_image):
+            rr.set_time(TIMELINE, timestamp=img_obs.ts)
+            if img_obs.pose is not None:
+                x, y, z, qx, qy, qz, qw = img_obs.pose
+                rr.log(
+                    "world/camera",
+                    rr.Transform3D(
+                        translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw])
+                    ),
+                )
+            rr.log("world/camera/image", img_obs.data.to_rerun())
+            # Clear any prior detection box at this ts; pass 3 will overwrite
+            # this with the actual detection iff one fires for this frame.
+            rr.log(
+                "world/camera/image/detections",
+                rr.Boxes2D(array=[], array_format=rr.Box2DFormat.XYWH),
+            )
+            if (i + 1) % 50 == 0 or i + 1 == n_img:
+                print(f"images: {i + 1}/{n_img}")
+
+        # ---- pass 3: marker detections (filtered same way as `dimos map`) ----
+        xf = DetectMarkers(camera_info=cam_info, marker_length_m=marker_size)
+        pipeline: Stream[Any] = color_image.transform(
+            QualityWindow(lambda img: img.sharpness, window=quality_window)
+        )
+        if marker_max_speed > 0:
+            pipeline = pipeline.transform(
+                SpeedLimit(
+                    max_mps=marker_max_speed,
+                    max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None,
+                )
+            )
+        # Each 3D detection gets its own entity path so it persists from its
+        # ts onward — earlier detections stay visible as new ones appear.
+        # Color by ts (turbo) across the recording's full image-time range.
+        ts_min = color_image.first().ts
+        ts_max = color_image.last().ts
+        ts_span = ts_max - ts_min if ts_max > ts_min else 1.0
+        n_det = 0
+        for det_obs in pipeline.transform(xf):
+            d = det_obs.data
+            rr.set_time(TIMELINE, timestamp=det_obs.ts)
+            color = Color.from_cmap("turbo", (det_obs.ts - ts_min) / ts_span).rgb_u8()
+            rr.log(
+                f"world/markers/det_{n_det:05d}",
+                rr.Boxes3D(
+                    centers=[(d.center.x, d.center.y, d.center.z)],
+                    half_sizes=[(marker_size / 2, marker_size / 2, 0.005)],
+                    quaternions=[
+                        rr.Quaternion(
+                            xyzw=[
+                                d.orientation.x,
+                                d.orientation.y,
+                                d.orientation.z,
+                                d.orientation.w,
+                            ]
+                        )
+                    ],
+                    colors=[color],
+                    fill_mode=rr.components.FillMode.Solid,
+                    labels=[f"id={d.marker_id}"],
+                ),
+            )
+            n_det += 1
+            xs = d.corners_px[:, 0]
+            ys = d.corners_px[:, 1]
+            x1, y1 = float(xs.min()), float(ys.min())
+            x2, y2 = float(xs.max()), float(ys.max())
+            rr.log(
+                "world/camera/image/detections",
+                rr.Boxes2D(
+                    array=[[x1, y1, x2 - x1, y2 - y1]],
+                    array_format=rr.Box2DFormat.XYWH,
+                    labels=[f"id={d.marker_id}"],
+                ),
+            )
+        print(f"detections: {n_det}")
+
+    print(f"wrote {out}")
+    print(f"open with: rerun {out}")
+
+
+if __name__ == "__main__":
+    typer.run(main)
+
+    print(f"wrote {out}")
+    print(f"open with: rerun {out}")
+
+
+if __name__ == "__main__":
+    typer.run(main)

From b3163bc117dbf932d6d2085c7c51c31f93748215 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 16:53:58 +0800
Subject: [PATCH 19/36] average window for markers

---
 .../perception/fiducial/marker_transformer.py | 89 ++++++++++++++++++-
 dimos/utils/cli/markers_rrd.py                | 64 +++++++++++--
 2 files changed, 142 insertions(+), 11 deletions(-)

diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py
index a3839ed9bb..e6c3d3c02f 100644
--- a/dimos/perception/fiducial/marker_transformer.py
+++ b/dimos/perception/fiducial/marker_transformer.py
@@ -28,6 +28,8 @@
 
 from __future__ import annotations
 
+import dataclasses
+import math
 from typing import TYPE_CHECKING
 
 import numpy as np
@@ -45,6 +47,7 @@
     estimate_marker_pose,
     rvec_tvec_to_transform,
 )
+from dimos.types.timestamped import TimestampedBufferCollection
 from dimos.utils.logging_config import setup_logger
 
 if TYPE_CHECKING:
@@ -72,6 +75,39 @@ def _pose_tuple_to_transform(
     )
 
 
+def _average_marker_pose(
+    buffer: TimestampedBufferCollection[Detection3DMarker],
+) -> tuple[Vector3, Quaternion]:
+    """Mean translation; quaternion mean with hemisphere alignment.
+
+    Quaternions q and -q encode the same rotation, so naive averaging can
+    cancel. We pick the first sample as a hemisphere reference and flip the
+    sign of any sample whose dot product against it is negative before
+    summing. For closely-spaced rotations within a short window this is
+    indistinguishable from a proper SLERP-style average.
+    """
+    items = list(buffer)
+    n = len(items)
+    cx = sum(d.center.x for d in items) / n
+    cy = sum(d.center.y for d in items) / n
+    cz = sum(d.center.z for d in items) / n
+
+    ref = items[0].orientation
+    qsx = qsy = qsz = qsw = 0.0
+    for d in items:
+        q = d.orientation
+        s = -1.0 if (q.x * ref.x + q.y * ref.y + q.z * ref.z + q.w * ref.w) < 0 else 1.0
+        qsx += s * q.x
+        qsy += s * q.y
+        qsz += s * q.z
+        qsw += s * q.w
+    norm = math.sqrt(qsx * qsx + qsy * qsy + qsz * qsz + qsw * qsw)
+    return (
+        Vector3(cx, cy, cz),
+        Quaternion(qsx / norm, qsy / norm, qsz / norm, qsw / norm),
+    )
+
+
 class DetectMarkers(Transformer[Image, Detection3DMarker]):
     """Detect fiducial markers and emit one world-pose observation per marker."""
 
@@ -81,15 +117,27 @@ def __init__(
         marker_length_m: float,
         aruco_dictionary: str = "DICT_APRILTAG_36h11",
         world_frame: str = "world",
+        smoothing_window: float = 0.0,
     ) -> None:
         if marker_length_m <= 0:
             raise ValueError(f"marker_length_m must be > 0, got {marker_length_m}")
+        if smoothing_window < 0:
+            raise ValueError(f"smoothing_window must be >= 0, got {smoothing_window}")
         self.camera_info = camera_info
         self.marker_length_m = marker_length_m
         self.aruco_dictionary = aruco_dictionary
         self.world_frame = world_frame
+        self.smoothing_window = smoothing_window
         self._detector = create_aruco_detector(aruco_dictionary)
         self._cam_mtx, self._dist = camera_info_to_cv_matrices(camera_info)
+        # Per marker_id sliding-window buffer of raw detections, used to emit
+        # smoothed pose updates when ``smoothing_window > 0``.
+        self._buffers: dict[int, TimestampedBufferCollection[Detection3DMarker]] = {}
+        # Tracking (smoothing only): if a marker_id reappears after a gap
+        # larger than the buffer window, treat it as a new track. track_id
+        # increments monotonically across the whole stream so it's unique.
+        self._marker_to_track: dict[int, int] = {}
+        self._next_track_id = 0
 
     def __call__(
         self, upstream: Iterator[Observation[Image]]
@@ -155,9 +203,21 @@ def __call__(
                 xy_max = corners_2d.max(axis=0)
                 bbox = (float(xy_min[0]), float(xy_min[1]), float(xy_max[0]), float(xy_max[1]))
 
+                # Decide track_id (only meaningful when smoothing is on).
+                # Without smoothing, track_id == marker_id (legacy behavior).
+                if self.smoothing_window > 0:
+                    prior_buf = self._buffers.get(mid)
+                    prior_last = prior_buf.last() if prior_buf is not None else None
+                    if prior_last is None or (obs.ts - prior_last.ts) > self.smoothing_window:
+                        self._next_track_id += 1
+                        self._marker_to_track[mid] = self._next_track_id
+                    track_id = self._marker_to_track[mid]
+                else:
+                    track_id = mid
+
                 det = Detection3DMarker(
                     bbox=bbox,
-                    track_id=mid,
+                    track_id=track_id,
                     class_id=mid,
                     confidence=1.0,
                     name=f"marker_{mid}",
@@ -173,4 +233,29 @@ def __call__(
                     dictionary=self.aruco_dictionary,
                 )
 
-                yield obs.derive(data=det, pose=t_world_marker).tag(marker_id=mid)
+                yielded_det = det
+                yielded_pose = t_world_marker
+                if self.smoothing_window > 0:
+                    # Buffer raw detections per marker_id over a sliding
+                    # window; emit the windowed-mean pose so each successive
+                    # detection refines the same marker's estimate instead
+                    # of producing a fresh independent observation.
+                    buf = self._buffers.setdefault(
+                        mid, TimestampedBufferCollection(self.smoothing_window)
+                    )
+                    buf.add(det)
+                    avg_center, avg_orient = _average_marker_pose(buf)
+                    yielded_det = dataclasses.replace(
+                        det, center=avg_center, orientation=avg_orient
+                    )
+                    yielded_pose = Transform(
+                        translation=avg_center,
+                        rotation=avg_orient,
+                        frame_id=self.world_frame,
+                        child_frame_id=f"marker_{mid}",
+                        ts=obs.ts,
+                    )
+
+                yield obs.derive(data=yielded_det, pose=yielded_pose).tag(
+                    marker_id=mid, track_id=track_id
+                )
diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py
index 3b67f9c2ea..b1e072d1e1 100644
--- a/dimos/utils/cli/markers_rrd.py
+++ b/dimos/utils/cli/markers_rrd.py
@@ -53,13 +53,16 @@ def main(
     out: Path = typer.Option(..., "--out", help="Output .rrd path"),
     marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"),
     marker_max_speed: float = typer.Option(
-        0.15, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables"
+        0.5, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables"
     ),
     marker_max_rot_rate: float = typer.Option(
-        30.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables"
+        50.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables"
     ),
     quality_window: float = typer.Option(
-        0.25, "--quality-window", help="Sharpest-frame window for detection (s)"
+        0.1, "--quality-window", help="Sharpest-frame window for detection (s)"
+    ),
+    smoothing_window: float = typer.Option(
+        5.0, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables"
     ),
 ) -> None:
     db_path = resolve_named_path(dataset, ".db")
@@ -172,12 +175,55 @@ def main(
             )
         print(f"detections: {n_det}")
 
-    print(f"wrote {out}")
-    print(f"open with: rerun {out}")
-
-
-if __name__ == "__main__":
-    typer.run(main)
+        # ---- pass 4: averaged tracks (smoothing_window > 0 → per-track ids) ----
+        # Re-runs the same filtered pipeline through a smoothing detector;
+        # each track yields one entity that updates as the windowed average
+        # refines. Color stable per track_id for visual identity.
+        if smoothing_window > 0:
+            xf_tracked = DetectMarkers(
+                camera_info=cam_info,
+                marker_length_m=marker_size,
+                smoothing_window=smoothing_window,
+            )
+            pipeline_tracked: Stream[Any] = color_image.transform(
+                QualityWindow(lambda img: img.sharpness, window=quality_window)
+            )
+            if marker_max_speed > 0:
+                pipeline_tracked = pipeline_tracked.transform(
+                    SpeedLimit(
+                        max_mps=marker_max_speed,
+                        max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None,
+                    )
+                )
+            seen_tracks: set[int] = set()
+            n_updates = 0
+            for det_obs in pipeline_tracked.transform(xf_tracked):
+                d = det_obs.data
+                rr.set_time(TIMELINE, timestamp=det_obs.ts)
+                color = Color.from_cmap("tab10", (d.track_id % 10) / 10.0).rgb_u8()
+                rr.log(
+                    f"world/tracks/track_{d.track_id:04d}",
+                    rr.Boxes3D(
+                        centers=[(d.center.x, d.center.y, d.center.z)],
+                        half_sizes=[(marker_size / 2, marker_size / 2, 0.005)],
+                        quaternions=[
+                            rr.Quaternion(
+                                xyzw=[
+                                    d.orientation.x,
+                                    d.orientation.y,
+                                    d.orientation.z,
+                                    d.orientation.w,
+                                ]
+                            )
+                        ],
+                        colors=[color],
+                        fill_mode=rr.components.FillMode.Solid,
+                        labels=[f"track={d.track_id} id={d.marker_id}"],
+                    ),
+                )
+                seen_tracks.add(d.track_id)
+                n_updates += 1
+            print(f"tracks: {len(seen_tracks)} unique, {n_updates} updates")
 
     print(f"wrote {out}")
     print(f"open with: rerun {out}")

From e529913c9de461f0ced54d38e33edad8bcd37d59 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 17:00:06 +0800
Subject: [PATCH 20/36] sliding window 7.5s

---
 dimos/utils/cli/markers_rrd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py
index b1e072d1e1..eb5306edf7 100644
--- a/dimos/utils/cli/markers_rrd.py
+++ b/dimos/utils/cli/markers_rrd.py
@@ -62,7 +62,7 @@ def main(
         0.1, "--quality-window", help="Sharpest-frame window for detection (s)"
     ),
     smoothing_window: float = typer.Option(
-        5.0, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables"
+        7.5, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables"
     ),
 ) -> None:
     db_path = resolve_named_path(dataset, ".db")

From 5751d48deb2cbd123d9965536b5d3273da354dae Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 17:28:36 +0800
Subject: [PATCH 21/36] detector tuned

---
 dimos/robot/cli/dimos.py | 16 ++++++++--
 dimos/utils/cli/map.py   | 65 +++++++++++++++++++++++-----------------
 2 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py
index 041891a135..5c67c3e0e1 100644
--- a/dimos/robot/cli/dimos.py
+++ b/dimos/robot/cli/dimos.py
@@ -710,15 +710,25 @@ def map_cmd(
         0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)"
     ),
     marker_max_speed: float = typer.Option(
-        0.05,
+        0.5,
         "--marker-max-speed",
         help="Skip frames where robot is moving faster than this (m/s); 0 disables",
     ),
     marker_max_rot_rate: float = typer.Option(
-        15.0,
+        50.0,
         "--marker-max-rot-rate",
         help="Skip frames where robot is rotating faster than this (deg/s); 0 disables",
     ),
+    marker_quality_window: float = typer.Option(
+        0.1,
+        "--marker-quality-window",
+        help="Sharpest-frame window for marker detection (s)",
+    ),
+    marker_smoothing: float = typer.Option(
+        7.5,
+        "--marker-smoothing",
+        help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)",
+    ),
 ) -> None:
     """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun."""
     from dimos.utils.cli.map import main as map_main
@@ -737,6 +747,8 @@ def map_cmd(
         marker_size=marker_size,
         marker_max_speed=marker_max_speed,
         marker_max_rot_rate=marker_max_rot_rate,
+        marker_quality_window=marker_quality_window,
+        marker_smoothing=marker_smoothing,
     )
 
 
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 554e1878d2..3c59c9088b 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -104,15 +104,25 @@ def main(
         0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)"
     ),
     marker_max_speed: float = typer.Option(
-        0.1,
+        0.5,
         "--marker-max-speed",
         help="Skip frames where robot is moving faster than this (m/s); 0 disables",
     ),
     marker_max_rot_rate: float = typer.Option(
-        15,
+        50.0,
         "--marker-max-rot-rate",
         help="Skip frames where robot is rotating faster than this (deg/s); 0 disables",
     ),
+    marker_quality_window: float = typer.Option(
+        0.1,
+        "--marker-quality-window",
+        help="Sharpest-frame window for marker detection (s)",
+    ),
+    marker_smoothing: float = typer.Option(
+        7.5,
+        "--marker-smoothing",
+        help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)",
+    ),
 ) -> None:
     db_path = resolve_named_path(dataset, ".db")
     if export or full_pgo:
@@ -229,13 +239,14 @@ def main(
         xf = DetectMarkers(
             camera_info=_camera_info_static(),
             marker_length_m=marker_size,
+            smoothing_window=marker_smoothing,
         )
-        # 2Hz quality-gated: keep only the sharpest frame per 0.5s window,
-        # then drop frames where the robot was moving (linear + rotational)
-        # faster than the limits — speed/rate are averaged across the window.
+        # Keep the sharpest frame per --marker-quality-window window, then
+        # drop frames where the robot was moving (linear + rotational) faster
+        # than the limits. Defaults match markers_rrd.py so positions agree.
         pipeline: Stream[Image] = color_image.tap(
             progress(color_image.count(), "detecting markers")
-        ).transform(QualityWindow(lambda img: img.sharpness, window=0.5))
+        ).transform(QualityWindow(lambda img: img.sharpness, window=marker_quality_window))
         if marker_max_speed > 0:
             pipeline = pipeline.transform(
                 SpeedLimit(
@@ -243,9 +254,21 @@ def main(
                     max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None,
                 )
             )
-        marker_dets = pipeline.transform(xf).to_list()
-        unique = sorted({obs.data.marker_id for obs in marker_dets})
-        print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}")
+        all_dets = pipeline.transform(xf).to_list()
+        if marker_smoothing > 0:
+            # Keep only the latest emission per track_id — that's the most
+            # averaged pose, drawn once per tracked marker session.
+            by_track: dict[int, Observation[Any]] = {}
+            for d in all_dets:
+                by_track[d.data.track_id] = d
+            marker_dets = list(by_track.values())
+        else:
+            marker_dets = all_dets
+        unique_ids = sorted({obs.data.marker_id for obs in marker_dets})
+        print(
+            f"markers: {len(marker_dets)} entries from {len(all_dets)} raw detections "
+            f"across {len(unique_ids)} unique ids {unique_ids}"
+        )
 
     if not no_gui:
         rerun_init("dimos map tool", spawn=True)
@@ -318,24 +341,12 @@ def main(
                 )
                 for d in marker_dets
             ]
-            # Color mode: turbo over detection time vs. tab10 over marker id.
-            COLOR_BY_TIME = True
-            if COLOR_BY_TIME:
-                ts_min = min(d.ts for d in marker_dets)
-                ts_max = max(d.ts for d in marker_dets)
-                ts_span = ts_max - ts_min if ts_max > ts_min else 1.0
-                colors = [
-                    Color.from_cmap("turbo", (d.ts - ts_min) / ts_span).rgb_u8()
-                    for d in marker_dets
-                ]
-            else:
-                unique_ids = sorted({d.data.marker_id for d in marker_dets})
-                id_to_color = {
-                    mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8()
-                    for i, mid in enumerate(unique_ids)
-                }
-                colors = [id_to_color[d.data.marker_id] for d in marker_dets]
-            labels = [f"id={d.data.marker_id}" for d in marker_dets]
+            # One entry per tracked marker session — color stable per track_id.
+            colors = [
+                Color.from_cmap("tab10", (d.data.track_id % 10) / 10.0).rgb_u8()
+                for d in marker_dets
+            ]
+            labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets]
             rr.log(
                 "world/raw_map/markers/fill",
                 rr.Boxes3D(

From dcd7f7b9b746b2b40a1f3264b8dbe0618d2870c1 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 21:22:34 +0800
Subject: [PATCH 22/36] nicer map

---
 dimos/utils/cli/map.py | 45 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 3c59c9088b..e6fd428428 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -288,7 +288,7 @@ def main(
                 full_pgo_map.to_rerun(voxel_size=voxel / 2),
                 static=True,
             )
-        STEM_HEIGHT = 2.0  # lift pose-graph viz above the map for legibility
+        STEM_HEIGHT = 0  # lift pose-graph viz above the map for legibility
         if pgo_path:
             rr.log(
                 "world/pgo_map/path",
@@ -300,7 +300,7 @@ def main(
             hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path]
             rr.log(
                 "world/pgo_map/pgo/keyframes",
-                rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]),
+                rr.Points3D(positions=hovered, colors=[[255, 0, 0]], radii=[0.025]),
                 static=True,
             )
         if pgo and loops:
@@ -347,6 +347,12 @@ def main(
                 for d in marker_dets
             ]
             labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets]
+            # Pin pattern (from dimos/memory2/vis/space/rerun.py): thin
+            # vertical line from each marker with the label floating at the
+            # top so multi-marker labels never overlap the boxes.
+            MARKER_STEM = 1.0
+            pin_strips = [[(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in centers]
+            label_positions = [(cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in centers]
             rr.log(
                 "world/raw_map/markers/fill",
                 rr.Boxes3D(
@@ -355,7 +361,6 @@ def main(
                     quaternions=quaternions,
                     colors=colors,
                     fill_mode=rr.components.FillMode.Solid,
-                    labels=labels,
                 ),
                 static=True,
             )
@@ -371,6 +376,18 @@ def main(
                 ),
                 static=True,
             )
+            rr.log(
+                "world/raw_map/markers/pin",
+                rr.LineStrips3D(strips=pin_strips, colors=colors, radii=[0.005]),
+                static=True,
+            )
+            rr.log(
+                "world/raw_map/markers/label",
+                rr.Points3D(
+                    positions=label_positions, labels=labels, colors=colors, radii=[0.001] * n
+                ),
+                static=True,
+            )
 
             if interp is not None:
                 # PGO-corrected marker poses. interp(ts) maps raw_world →
@@ -402,6 +419,12 @@ def main(
                             corrected.rotation.w,
                         )
                     )
+                pgo_pin_strips = [
+                    [(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in pgo_centers
+                ]
+                pgo_label_positions = [
+                    (cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in pgo_centers
+                ]
                 rr.log(
                     "world/pgo_map/markers/fill",
                     rr.Boxes3D(
@@ -410,7 +433,6 @@ def main(
                         quaternions=pgo_quaternions,
                         colors=colors,
                         fill_mode=rr.components.FillMode.Solid,
-                        labels=labels,
                     ),
                     static=True,
                 )
@@ -426,6 +448,21 @@ def main(
                     ),
                     static=True,
                 )
+                rr.log(
+                    "world/pgo_map/markers/pin",
+                    rr.LineStrips3D(strips=pgo_pin_strips, colors=colors, radii=[0.005]),
+                    static=True,
+                )
+                rr.log(
+                    "world/pgo_map/markers/label",
+                    rr.Points3D(
+                        positions=pgo_label_positions,
+                        labels=labels,
+                        colors=colors,
+                        radii=[0.001] * n,
+                    ),
+                    static=True,
+                )
 
     if export and pgo_map is not None:
         from pathlib import Path

From a6d4dcc98f5671bd3e685546f8c994c93e71e411 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 21:24:32 +0800
Subject: [PATCH 23/36] marker eval first pass

---
 dimos/utils/cli/marker_eval.py | 225 +++++++++++++++++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100644 dimos/utils/cli/marker_eval.py

diff --git a/dimos/utils/cli/marker_eval.py b/dimos/utils/cli/marker_eval.py
new file mode 100644
index 0000000000..47aa834c64
--- /dev/null
+++ b/dimos/utils/cli/marker_eval.py
@@ -0,0 +1,225 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Loop-closure tuning eval — runs the dimos map pipeline over all
+hk_village recordings and reports two totals to minimize:
+
+- TOTAL_PGO_TIME (s): wall-clock for the PGO loop across all recordings
+- TOTAL_SPREAD  (m): per-recording sum of pairwise distances between
+  final-smoothed marker positions of the same marker_id (PGO-corrected),
+  summed across all marker_ids across all recordings. Smaller = tighter
+  loop closures.
+
+Usage:
+    uv run python -m dimos.utils.cli.marker_eval
+    uv run python -m dimos.utils.cli.marker_eval hk_village1
+"""
+
+from __future__ import annotations
+
+from concurrent.futures import ProcessPoolExecutor, as_completed
+import multiprocessing as mp
+import os
+import time
+from typing import Any
+
+from rich.console import Console
+from rich.table import Table
+import typer
+
+from dimos.mapping.relocalization.pgo import (
+    LoopClosure,
+    keyframes_to_corrections,
+    make_interpolator,
+    pgo_keyframes,
+)
+from dimos.memory2.store.sqlite import SqliteStore
+from dimos.memory2.stream import Stream
+from dimos.memory2.transform import QualityWindow, SpeedLimit
+from dimos.memory2.type.observation import Observation
+from dimos.msgs.geometry_msgs.Transform import Transform
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.perception.fiducial.marker_transformer import DetectMarkers
+from dimos.robot.unitree.go2.connection import _camera_info_static
+from dimos.utils.data import get_data
+
+DEFAULT_DATASETS = [f"hk_village{i}" for i in range(1, 7)]
+
+
+def _pairwise_sum(pts: list[tuple[float, float, float]]) -> float:
+    """Sum of euclidean distances over all unordered pairs."""
+    total = 0.0
+    for i, a in enumerate(pts):
+        for b in pts[i + 1 :]:
+            dx, dy, dz = a[0] - b[0], a[1] - b[1], a[2] - b[2]
+            total += (dx * dx + dy * dy + dz * dz) ** 0.5
+    return total
+
+
+def _eval_recording(
+    name: str,
+    *,
+    marker_size: float,
+    marker_max_speed: float,
+    marker_max_rot_rate: float,
+    marker_quality_window: float,
+    marker_smoothing: float,
+) -> tuple[float, float]:
+    """Returns (pgo_time_s, spread_m) for one recording."""
+    db_path = get_data(f"{name}.db")
+    cam_info = _camera_info_static()
+
+    store = SqliteStore(path=str(db_path))
+    with store:
+        lidar = store.streams.lidar
+
+        # --- PGO (timed) ---
+        # .tap() captures each keyframe as make_interpolator iterates the
+        # corrections stream, so no extra pass over lidar.
+        keyframes: list[Any] = []
+        loops_out: list[LoopClosure] = []
+        t0 = time.perf_counter()
+        kf_stream = pgo_keyframes(lidar, loop_closures_out=loops_out).tap(
+            lambda obs: keyframes.append(obs.data)
+        )
+        interp = make_interpolator(keyframes_to_corrections(kf_stream))
+        pgo_time = time.perf_counter() - t0
+
+        # --- Marker detection (same pipeline as dimos map / markers_rrd) ---
+        color_image = store.stream("color_image", Image)
+        xf = DetectMarkers(
+            camera_info=cam_info,
+            marker_length_m=marker_size,
+            smoothing_window=marker_smoothing,
+        )
+        pipeline: Stream[Image] = color_image.transform(
+            QualityWindow(lambda img: img.sharpness, window=marker_quality_window)
+        )
+        if marker_max_speed > 0:
+            pipeline = pipeline.transform(
+                SpeedLimit(
+                    max_mps=marker_max_speed,
+                    max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None,
+                )
+            )
+        all_dets = pipeline.transform(xf).to_list()
+
+        # Dedup by track_id → final smoothed pose per track.
+        by_track: dict[int, Observation[Any]] = {}
+        for d in all_dets:
+            by_track[d.data.track_id] = d
+        tracks = list(by_track.values())
+
+        # PGO-correct each track's pose; group by marker_id.
+        by_marker: dict[int, list[tuple[float, float, float]]] = {}
+        for d in tracks:
+            raw_tf = Transform(
+                translation=d.data.center,
+                rotation=d.data.orientation,
+                frame_id="world",
+                child_frame_id=f"marker_{d.data.marker_id}",
+                ts=d.ts,
+            )
+            corrected = interp(d.ts) + raw_tf
+            t = corrected.translation
+            by_marker.setdefault(d.data.marker_id, []).append((t.x, t.y, t.z))
+
+        spread = sum(_pairwise_sum(v) for v in by_marker.values())
+        return pgo_time, spread
+
+
+def main(
+    datasets: list[str] = typer.Argument(
+        None, help="Recordings to eval; defaults to hk_village1..6"
+    ),
+    marker_size: float = typer.Option(0.1, "--marker-size"),
+    marker_max_speed: float = typer.Option(0.5, "--marker-max-speed"),
+    marker_max_rot_rate: float = typer.Option(50.0, "--marker-max-rot-rate"),
+    marker_quality_window: float = typer.Option(0.1, "--marker-quality-window"),
+    marker_smoothing: float = typer.Option(7.5, "--marker-smoothing"),
+    workers: int = typer.Option(
+        0, "--workers", "-j", help="Parallel workers (0 = min(len(datasets), cpu_count))"
+    ),
+) -> None:
+    names = datasets or DEFAULT_DATASETS
+    console = Console()
+
+    n_workers = workers or min(len(names), os.cpu_count() or 1)
+    wall_start = time.perf_counter()
+    results: dict[str, tuple[float, float]] = {}
+
+    if n_workers <= 1 or len(names) <= 1:
+        for name in names:
+            console.print(f"[dim]eval {name}...[/dim]")
+            results[name] = _eval_recording(
+                name,
+                marker_size=marker_size,
+                marker_max_speed=marker_max_speed,
+                marker_max_rot_rate=marker_max_rot_rate,
+                marker_quality_window=marker_quality_window,
+                marker_smoothing=marker_smoothing,
+            )
+    else:
+        console.print(f"[dim]running {len(names)} recordings on {n_workers} workers[/dim]")
+        # "spawn" — workers are fresh interpreters. Forking after cv2/openmp
+        # have spun threads in the parent deadlocks because the threads
+        # don't survive fork; spawn sidesteps it entirely.
+        with ProcessPoolExecutor(max_workers=n_workers, mp_context=mp.get_context("spawn")) as ex:
+            futures = {
+                ex.submit(
+                    _eval_recording,
+                    name,
+                    marker_size=marker_size,
+                    marker_max_speed=marker_max_speed,
+                    marker_max_rot_rate=marker_max_rot_rate,
+                    marker_quality_window=marker_quality_window,
+                    marker_smoothing=marker_smoothing,
+                ): name
+                for name in names
+            }
+            for f in as_completed(futures):
+                name = futures[f]
+                results[name] = f.result()
+                pgo_time, spread = results[name]
+                console.print(f"  done {name:>14} ({pgo_time:5.2f}s, spread {spread:7.3f}m)")
+
+    wall = time.perf_counter() - wall_start
+
+    table = Table(title="loop-closure eval")
+    table.add_column("recording")
+    table.add_column("pgo_time_s", justify="right")
+    table.add_column("spread_m", justify="right")
+
+    total_pgo = 0.0
+    total_spread = 0.0
+    for name in names:  # original order
+        pgo_time, spread = results[name]
+        table.add_row(name, f"{pgo_time:.2f}", f"{spread:.3f}")
+        total_pgo += pgo_time
+        total_spread += spread
+
+    table.add_section()
+    table.add_row(
+        "[bold]TOTAL[/bold]",
+        f"[bold]{total_pgo:.2f}[/bold]",
+        f"[bold]{total_spread:.3f}[/bold]",
+    )
+    console.print(table)
+    print(f"TOTAL_PGO_TIME={total_pgo:.2f}")
+    print(f"TOTAL_SPREAD={total_spread:.3f}")
+    print(f"WALL_TIME={wall:.2f}")
+
+
+if __name__ == "__main__":
+    typer.run(main)

From 89eb996e05724c35a91edbcb87c7457bf9f859ce Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 21:43:50 +0800
Subject: [PATCH 24/36] extracted loop closure code from relocalization code

---
 .../loop_closure/eval.py}                     | 36 +++++++------------
 .../{relocalization => loop_closure}/pgo.py   |  0
 .../test_pgo.py                               |  2 +-
 .../loop_closure/utils}/markers_rrd.py        |  0
 dimos/utils/cli/map.py                        |  2 +-
 5 files changed, 15 insertions(+), 25 deletions(-)
 rename dimos/{utils/cli/marker_eval.py => mapping/loop_closure/eval.py} (88%)
 rename dimos/mapping/{relocalization => loop_closure}/pgo.py (100%)
 rename dimos/mapping/{relocalization => loop_closure}/test_pgo.py (99%)
 rename dimos/{utils/cli => mapping/loop_closure/utils}/markers_rrd.py (100%)

diff --git a/dimos/utils/cli/marker_eval.py b/dimos/mapping/loop_closure/eval.py
similarity index 88%
rename from dimos/utils/cli/marker_eval.py
rename to dimos/mapping/loop_closure/eval.py
index 47aa834c64..5fe3f6ae4d 100644
--- a/dimos/utils/cli/marker_eval.py
+++ b/dimos/mapping/loop_closure/eval.py
@@ -22,8 +22,8 @@
   loop closures.
 
 Usage:
-    uv run python -m dimos.utils.cli.marker_eval
-    uv run python -m dimos.utils.cli.marker_eval hk_village1
+    uv run python -m dimos.mapping.loop_closure.marker_eval
+    uv run python -m dimos.mapping.loop_closure.marker_eval hk_village1
 """
 
 from __future__ import annotations
@@ -34,11 +34,9 @@
 import time
 from typing import Any
 
-from rich.console import Console
-from rich.table import Table
 import typer
 
-from dimos.mapping.relocalization.pgo import (
+from dimos.mapping.loop_closure.pgo import (
     LoopClosure,
     keyframes_to_corrections,
     make_interpolator,
@@ -153,7 +151,6 @@ def main(
     ),
 ) -> None:
     names = datasets or DEFAULT_DATASETS
-    console = Console()
 
     n_workers = workers or min(len(names), os.cpu_count() or 1)
     wall_start = time.perf_counter()
@@ -161,7 +158,7 @@ def main(
 
     if n_workers <= 1 or len(names) <= 1:
         for name in names:
-            console.print(f"[dim]eval {name}...[/dim]")
+            print(f"eval {name}...")
             results[name] = _eval_recording(
                 name,
                 marker_size=marker_size,
@@ -171,7 +168,7 @@ def main(
                 marker_smoothing=marker_smoothing,
             )
     else:
-        console.print(f"[dim]running {len(names)} recordings on {n_workers} workers[/dim]")
+        print(f"running {len(names)} recordings on {n_workers} workers")
         # "spawn" — workers are fresh interpreters. Forking after cv2/openmp
         # have spun threads in the parent deadlocks because the threads
         # don't survive fork; spawn sidesteps it entirely.
@@ -192,30 +189,23 @@ def main(
                 name = futures[f]
                 results[name] = f.result()
                 pgo_time, spread = results[name]
-                console.print(f"  done {name:>14} ({pgo_time:5.2f}s, spread {spread:7.3f}m)")
+                print(f"  done {name:>14}  pgo={pgo_time:5.2f}s  spread={spread:7.3f}m")
 
     wall = time.perf_counter() - wall_start
 
-    table = Table(title="loop-closure eval")
-    table.add_column("recording")
-    table.add_column("pgo_time_s", justify="right")
-    table.add_column("spread_m", justify="right")
-
     total_pgo = 0.0
     total_spread = 0.0
+    print()
+    print(f"{'recording':<14}  {'pgo_time_s':>10}  {'spread_m':>10}")
+    print("-" * 40)
     for name in names:  # original order
         pgo_time, spread = results[name]
-        table.add_row(name, f"{pgo_time:.2f}", f"{spread:.3f}")
+        print(f"{name:<14}  {pgo_time:>10.2f}  {spread:>10.3f}")
         total_pgo += pgo_time
         total_spread += spread
-
-    table.add_section()
-    table.add_row(
-        "[bold]TOTAL[/bold]",
-        f"[bold]{total_pgo:.2f}[/bold]",
-        f"[bold]{total_spread:.3f}[/bold]",
-    )
-    console.print(table)
+    print("-" * 40)
+    print(f"{'TOTAL':<14}  {total_pgo:>10.2f}  {total_spread:>10.3f}")
+    print()
     print(f"TOTAL_PGO_TIME={total_pgo:.2f}")
     print(f"TOTAL_SPREAD={total_spread:.3f}")
     print(f"WALL_TIME={wall:.2f}")
diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/loop_closure/pgo.py
similarity index 100%
rename from dimos/mapping/relocalization/pgo.py
rename to dimos/mapping/loop_closure/pgo.py
diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py
similarity index 99%
rename from dimos/mapping/relocalization/test_pgo.py
rename to dimos/mapping/loop_closure/test_pgo.py
index 76f8b378eb..b6f5ea3d8e 100644
--- a/dimos/mapping/relocalization/test_pgo.py
+++ b/dimos/mapping/loop_closure/test_pgo.py
@@ -20,7 +20,7 @@
 import pytest
 from scipy.spatial.transform import Rotation
 
-from dimos.mapping.relocalization.pgo import (
+from dimos.mapping.loop_closure.pgo import (
     Keyframe,
     PGOConfig,
     _obs_to_pose3,
diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/mapping/loop_closure/utils/markers_rrd.py
similarity index 100%
rename from dimos/utils/cli/markers_rrd.py
rename to dimos/mapping/loop_closure/utils/markers_rrd.py
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 3c59c9088b..07555b375d 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -167,7 +167,7 @@ def main(
     loops: list[Any] = []
     interp: Any | None = None
     if pgo:
-        from dimos.mapping.relocalization.pgo import (
+        from dimos.mapping.loop_closure.pgo import (
             LoopClosure,
             keyframes_to_corrections,
             make_interpolator,

From 186a4bef1380777a9f53292ab37f33a258375a82 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 25 May 2026 21:46:46 +0800
Subject: [PATCH 25/36] better eval

---
 dimos/mapping/loop_closure/eval.py | 60 ++++++++----------------------
 1 file changed, 15 insertions(+), 45 deletions(-)

diff --git a/dimos/mapping/loop_closure/eval.py b/dimos/mapping/loop_closure/eval.py
index 5fe3f6ae4d..c41817b032 100644
--- a/dimos/mapping/loop_closure/eval.py
+++ b/dimos/mapping/loop_closure/eval.py
@@ -22,15 +22,12 @@
   loop closures.
 
 Usage:
-    uv run python -m dimos.mapping.loop_closure.marker_eval
-    uv run python -m dimos.mapping.loop_closure.marker_eval hk_village1
+    uv run python -m dimos.mapping.loop_closure.eval
+    uv run python -m dimos.mapping.loop_closure.eval hk_village1
 """
 
 from __future__ import annotations
 
-from concurrent.futures import ProcessPoolExecutor, as_completed
-import multiprocessing as mp
-import os
 import time
 from typing import Any
 
@@ -146,51 +143,24 @@ def main(
     marker_max_rot_rate: float = typer.Option(50.0, "--marker-max-rot-rate"),
     marker_quality_window: float = typer.Option(0.1, "--marker-quality-window"),
     marker_smoothing: float = typer.Option(7.5, "--marker-smoothing"),
-    workers: int = typer.Option(
-        0, "--workers", "-j", help="Parallel workers (0 = min(len(datasets), cpu_count))"
-    ),
 ) -> None:
     names = datasets or DEFAULT_DATASETS
 
-    n_workers = workers or min(len(names), os.cpu_count() or 1)
+    # Sequential: PGO (Open3D) and marker detection (cv2/numpy) already
+    # saturate available cores internally via OpenMP — process-level
+    # parallelism only adds contention.
     wall_start = time.perf_counter()
     results: dict[str, tuple[float, float]] = {}
-
-    if n_workers <= 1 or len(names) <= 1:
-        for name in names:
-            print(f"eval {name}...")
-            results[name] = _eval_recording(
-                name,
-                marker_size=marker_size,
-                marker_max_speed=marker_max_speed,
-                marker_max_rot_rate=marker_max_rot_rate,
-                marker_quality_window=marker_quality_window,
-                marker_smoothing=marker_smoothing,
-            )
-    else:
-        print(f"running {len(names)} recordings on {n_workers} workers")
-        # "spawn" — workers are fresh interpreters. Forking after cv2/openmp
-        # have spun threads in the parent deadlocks because the threads
-        # don't survive fork; spawn sidesteps it entirely.
-        with ProcessPoolExecutor(max_workers=n_workers, mp_context=mp.get_context("spawn")) as ex:
-            futures = {
-                ex.submit(
-                    _eval_recording,
-                    name,
-                    marker_size=marker_size,
-                    marker_max_speed=marker_max_speed,
-                    marker_max_rot_rate=marker_max_rot_rate,
-                    marker_quality_window=marker_quality_window,
-                    marker_smoothing=marker_smoothing,
-                ): name
-                for name in names
-            }
-            for f in as_completed(futures):
-                name = futures[f]
-                results[name] = f.result()
-                pgo_time, spread = results[name]
-                print(f"  done {name:>14}  pgo={pgo_time:5.2f}s  spread={spread:7.3f}m")
-
+    for name in names:
+        print(f"eval {name}...")
+        results[name] = _eval_recording(
+            name,
+            marker_size=marker_size,
+            marker_max_speed=marker_max_speed,
+            marker_max_rot_rate=marker_max_rot_rate,
+            marker_quality_window=marker_quality_window,
+            marker_smoothing=marker_smoothing,
+        )
     wall = time.perf_counter() - wall_start
 
     total_pgo = 0.0

From b2b7afbcdbc2519e8956b625b98599cb48c2b9a0 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 15:40:32 +0800
Subject: [PATCH 26/36] observation pose refactor

---
 dimos/mapping/loop_closure/pgo.py        |  26 ++---
 dimos/mapping/loop_closure/test_pgo.py   |  27 +++--
 dimos/memory2/module.py                  |   5 +-
 dimos/memory2/observationstore/sqlite.py |  31 ++---
 dimos/memory2/test_save.py               |   2 +-
 dimos/memory2/test_stream.py             |   2 +-
 dimos/memory2/transform.py               |  32 ++---
 dimos/memory2/type/filter.py             |  10 +-
 dimos/memory2/type/observation.py        | 141 ++++++++++++++++++-----
 dimos/memory2/vis/space/rerun.py         |  13 ++-
 dimos/memory2/vis/space/space.py         |   5 +-
 dimos/memory2/vis/space/svg.py           |   7 +-
 dimos/msgs/geometry_msgs/Quaternion.py   |  13 +++
 dimos/msgs/sensor_msgs/PointCloud2.py    |   2 +-
 dimos/utils/cli/map.py                   |  15 +--
 15 files changed, 200 insertions(+), 131 deletions(-)

diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py
index af94921c9d..84a8a0fa24 100644
--- a/dimos/mapping/loop_closure/pgo.py
+++ b/dimos/mapping/loop_closure/pgo.py
@@ -170,17 +170,15 @@ def pgo_keyframes(
     for obs in stream:
         if on_frame is not None:
             on_frame(obs)
-        if obs.pose is None:
+        p = obs.pose_tuple
+        if p is None:
             continue
-        # Skip placeholder poses (origin position OR zero quaternion).
-        if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0:
+        # Skip placeholder poses written before odom converges:
+        # all-zero translation OR all-zero (uninitialized) quaternion.
+        # An identity quaternion (qw=1) is valid and must not be filtered.
+        if p[0] == 0 and p[1] == 0 and p[2] == 0:
             continue
-        if (
-            obs.pose[3] == 0
-            and obs.pose[4] == 0
-            and obs.pose[5] == 0
-            and (obs.pose[6] == 0 or obs.pose[6] == 1)
-        ):
+        if p[3] == 0 and p[4] == 0 and p[5] == 0 and p[6] == 0:
             continue
         local_pose = _obs_to_pose3(obs)
         pgo.process(local_pose, obs.ts, obs.data)
@@ -264,10 +262,11 @@ def apply_corrections(
 
     def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
         for obs in upstream:
-            if obs.pose is None:
+            ps = obs.pose_stamped
+            if ps is None:
                 yield obs
                 continue
-            raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped)
+            raw_tf = Transform.from_pose(FRAME_BODY, ps)
             # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw.
             # Observation normalizes Transform back to 7-tuple via __post_init__.
             corrected = interp(obs.ts) + raw_tf
@@ -297,9 +296,10 @@ def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3:
     """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`."""
     import gtsam  # type: ignore[import-not-found,import-untyped]
 
-    if obs.pose is None:
+    p = obs.pose_tuple
+    if p is None:
         raise LookupError("No pose set on this observation")
-    x, y, z, qx, qy, qz, qw = obs.pose
+    x, y, z, qx, qy, qz, qw = p
     return gtsam.Pose3(
         gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)),
         gtsam.Point3(float(x), float(y), float(z)),
diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py
index b6f5ea3d8e..0042c19b2c 100644
--- a/dimos/mapping/loop_closure/test_pgo.py
+++ b/dimos/mapping/loop_closure/test_pgo.py
@@ -87,14 +87,13 @@ def test_observation_normalizes_transform_pose(self) -> None:
             ts=1.0,
         )
         obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0)
-        assert isinstance(obs.pose, tuple)
-        assert obs.pose[0] == pytest.approx(1.5)
-        assert obs.pose[6] == pytest.approx(0.927)
+        assert obs.pose_tuple is not None
+        assert obs.pose_tuple[0] == pytest.approx(1.5)
+        assert obs.pose_tuple[6] == pytest.approx(0.927)
 
-        # derive() also runs the normalization via __post_init__.
+        # derive() also re-runs the normalization.
         derived = obs.derive(data=0, pose=tf)
-        assert isinstance(derived.pose, tuple)
-        assert derived.pose == obs.pose
+        assert derived.pose_tuple == obs.pose_tuple
 
     def test_observation_normalizes_posestamped(self) -> None:
         from dimos.memory2.type.observation import Observation
@@ -102,8 +101,7 @@ def test_observation_normalizes_posestamped(self) -> None:
 
         ps = PoseStamped(ts=1.0, position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0))
         obs: Observation[int] = Observation(id=0, ts=1.0, pose=ps, _data=0)
-        assert isinstance(obs.pose, tuple)
-        assert obs.pose == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0)
+        assert obs.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0)
 
     def test_obs_to_pose3_roundtrip(self) -> None:
         from dimos.memory2.type.observation import Observation
@@ -191,8 +189,8 @@ def test_apply_identity_corrections_preserves_poses(self) -> None:
         kfs = pgo_keyframes(lidar)
         corr = keyframes_to_corrections(kfs)
         corrected = apply_corrections(lidar, corr)
-        in_poses = [o.pose for o in lidar if o.pose is not None]
-        out_poses = [o.pose for o in corrected if o.pose is not None]
+        in_poses = [o.pose_tuple for o in lidar if o.pose_tuple is not None]
+        out_poses = [o.pose_tuple for o in corrected if o.pose_tuple is not None]
         assert len(in_poses) == len(out_poses)
         for p_in, p_out in zip(in_poses, out_poses, strict=True):
             for a, b in zip(p_in, p_out, strict=True):
@@ -268,10 +266,11 @@ def test_pure_translation_shifts_poses(self) -> None:
 
         corrected = apply_corrections(lidar, corr)
         for obs in corrected:
-            assert obs.pose is not None
-            assert obs.pose[0] == pytest.approx(5.0, abs=1e-9)
-            assert obs.pose[1] == pytest.approx(0.0, abs=1e-9)
-            assert obs.pose[2] == pytest.approx(0.0, abs=1e-9)
+            p = obs.pose_tuple
+            assert p is not None
+            assert p[0] == pytest.approx(5.0, abs=1e-9)
+            assert p[1] == pytest.approx(0.0, abs=1e-9)
+            assert p[2] == pytest.approx(0.0, abs=1e-9)
 
     def test_passes_through_pose_none(self) -> None:
         mem = MemoryStore()
diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py
index b584553bae..929d241882 100644
--- a/dimos/memory2/module.py
+++ b/dimos/memory2/module.py
@@ -234,7 +234,10 @@ def search(self, query: str) -> PoseStamped:
         def _similarity(obs: Observation[Any]) -> float:
             return cast("EmbeddedObservation[Any]", obs).similarity or 0.0
 
-        return results.transform(peaks(key=_similarity, distance=1.0)).last().pose_stamped
+        best = results.transform(peaks(key=_similarity, distance=1.0)).last()
+        if best.pose_stamped is None:
+            raise LookupError("No pose on best search result")
+        return best.pose_stamped
 
 
 class RecorderConfig(MemoryModuleConfig):
diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py
index 42f4193581..e82e867dc0 100644
--- a/dimos/memory2/observationstore/sqlite.py
+++ b/dimos/memory2/observationstore/sqlite.py
@@ -33,7 +33,7 @@
     TimeRangeFilter,
     _xyz,
 )
-from dimos.memory2.type.observation import _UNLOADED, Observation
+from dimos.memory2.type.observation import _UNLOADED, Observation, PoseTuple
 from dimos.memory2.utils.sqlite import open_disposable_sqlite_connection
 
 if TYPE_CHECKING:
@@ -46,24 +46,6 @@
 _IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 
 
-def _decompose_pose(pose: Any) -> tuple[float, ...] | None:
-    if pose is None:
-        return None
-    if hasattr(pose, "position"):
-        pos = pose.position
-        orient = getattr(pose, "orientation", None)
-        x, y, z = float(pos.x), float(pos.y), float(getattr(pos, "z", 0.0))
-        if orient is not None:
-            return (x, y, z, float(orient.x), float(orient.y), float(orient.z), float(orient.w))
-        return (x, y, z, 0.0, 0.0, 0.0, 1.0)
-    if isinstance(pose, (list, tuple)):
-        vals = [float(v) for v in pose]
-        while len(vals) < 7:
-            vals.append(0.0 if len(vals) < 6 else 1.0)
-        return tuple(vals[:7])
-    return None
-
-
 def _reconstruct_pose(
     x: float | None,
     y: float | None,
@@ -72,7 +54,7 @@ def _reconstruct_pose(
     qy: float | None,
     qz: float | None,
     qw: float | None,
-) -> tuple[float, ...] | None:
+) -> PoseTuple | None:
     if x is None:
         return None
     return (x, y or 0.0, z or 0.0, qx or 0.0, qy or 0.0, qz or 0.0, qw or 1.0)
@@ -328,17 +310,17 @@ def _row_to_obs(self, row: tuple[Any, ...], *, has_blob: bool = False) -> Observ
 
         # Scalar data stored inline in value column
         if value is not None:
-            return Observation(id=row_id, ts=ts, pose=pose, tags=tags, _data=value)
+            return Observation(id=row_id, ts=ts, pose_tuple=pose, tags=tags, _data=value)
 
         if has_blob and blob_data is not None:
             assert self._codec is not None, "codec is required for data loading"
             data = self._codec.decode(blob_data)
-            return Observation(id=row_id, ts=ts, pose=pose, tags=tags, _data=data)
+            return Observation(id=row_id, ts=ts, pose_tuple=pose, tags=tags, _data=data)
 
         return Observation(
             id=row_id,
             ts=ts,
-            pose=pose,
+            pose_tuple=pose,
             tags=tags,
             _data=_UNLOADED,
         )
@@ -353,7 +335,8 @@ def _ensure_tag_indexes(self, tags: dict[str, Any]) -> None:
                 self._tag_indexes.add(key)
 
     def insert(self, obs: Observation[T]) -> int:
-        pose = _decompose_pose(obs.pose)
+        # Observation already normalizes pose to the storage 7-tuple — skip _decompose_pose.
+        pose = obs.pose_tuple
         tags_json = json.dumps(obs.tags) if obs.tags else "{}"
         value = obs._data if isinstance(obs._data, (int, float)) else None
 
diff --git a/dimos/memory2/test_save.py b/dimos/memory2/test_save.py
index eba8905974..ec4d627fc3 100644
--- a/dimos/memory2/test_save.py
+++ b/dimos/memory2/test_save.py
@@ -87,7 +87,7 @@ def test_save_preserves_data(self) -> None:
         obs = target.first()
         assert obs.data == 42
         assert obs.ts == 1.0
-        assert obs.pose == (1, 2, 3)
+        assert obs.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0)
         assert obs.tags == {"label": "cat"}
 
     def test_save_with_transform(self) -> None:
diff --git a/dimos/memory2/test_stream.py b/dimos/memory2/test_stream.py
index f0aaf70137..32cb542b83 100644
--- a/dimos/memory2/test_stream.py
+++ b/dimos/memory2/test_stream.py
@@ -485,7 +485,7 @@ def test_derive_preserves_metadata(self):
         derived = obs.derive(data="transformed")
         assert derived.id == 42
         assert derived.ts == 1.5
-        assert derived.pose == (1, 2, 3)
+        assert derived.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0)
         assert derived.tags == {"k": "v"}
         assert derived.data == "transformed"
 
diff --git a/dimos/memory2/transform.py b/dimos/memory2/transform.py
index a79ccf36b8..328c1de647 100644
--- a/dimos/memory2/transform.py
+++ b/dimos/memory2/transform.py
@@ -179,12 +179,11 @@ def speed() -> FnIterTransformer[Any, float]:
     def _speed(upstream: Iterator[Observation[Any]]) -> Iterator[Observation[float]]:
         prev: Observation[Any] | None = None
         for obs in upstream:
-            if prev is not None and obs.pose is not None and prev.pose is not None:
-                dx = obs.pose[0] - prev.pose[0]
-                dy = obs.pose[1] - prev.pose[1]
-                dz = obs.pose[2] - prev.pose[2]
+            p = obs.pose
+            pp = prev.pose if prev is not None else None
+            if prev is not None and p is not None and pp is not None:
                 dt = obs.ts - prev.ts
-                v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt if dt > 0 else 0.0
+                v = (p.position - pp.position).length() / dt if dt > 0 else 0.0
                 yield obs.derive(data=v)
             prev = obs
 
@@ -225,28 +224,17 @@ def __call__(self, upstream: Iterator[Observation[T]]) -> Iterator[Observation[T
         max_dps = self.max_dps
         max_rps = math.radians(max_dps) if max_dps is not None else None
         for obs in upstream:
-            if obs.pose is None:
+            p = obs.pose
+            if p is None:
                 continue
-            if prev is not None and prev.pose is not None:
+            pp = prev.pose if prev is not None else None
+            if prev is not None and pp is not None:
                 dt = obs.ts - prev.ts
                 if dt > 0:
-                    dx = obs.pose[0] - prev.pose[0]
-                    dy = obs.pose[1] - prev.pose[1]
-                    dz = obs.pose[2] - prev.pose[2]
-                    v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt
+                    v = (p.position - pp.position).length() / dt
                     ok = v <= max_mps
                     if ok and max_rps is not None:
-                        # rotation angle between two unit quaternions.
-                        # |q · q'| collapses the double-cover sign ambiguity;
-                        # clamp guards against numerical drift past 1.
-                        dot = (
-                            obs.pose[3] * prev.pose[3]
-                            + obs.pose[4] * prev.pose[4]
-                            + obs.pose[5] * prev.pose[5]
-                            + obs.pose[6] * prev.pose[6]
-                        )
-                        angle = 2.0 * math.acos(min(1.0, abs(dot)))
-                        ok = (angle / dt) <= max_rps
+                        ok = (p.orientation.angle_to(pp.orientation) / dt) <= max_rps
                     if ok:
                         yield obs
             prev = obs
diff --git a/dimos/memory2/type/filter.py b/dimos/memory2/type/filter.py
index af453498fd..ddc0f86eb5 100644
--- a/dimos/memory2/type/filter.py
+++ b/dimos/memory2/type/filter.py
@@ -79,17 +79,15 @@ class NearFilter(Filter):
     radius: float = 0.0
 
     def matches(self, obs: Observation[Any]) -> bool:
-        if obs.pose is None or self.pose is None:
+        p2 = obs.pose_tuple
+        if p2 is None or self.pose is None:
             return False
         p1 = self.pose
-        p2 = obs.pose
-        # Support both raw (x,y,z) tuples and PoseStamped objects
+        # NearFilter.pose accepts raw (x,y,z) tuples or PoseStamped-like objects.
         if hasattr(p1, "position"):
             p1 = p1.position
-        if hasattr(p2, "position"):
-            p2 = p2.position
         x1, y1, z1 = _xyz(p1)
-        x2, y2, z2 = _xyz(p2)
+        x2, y2, z2 = p2[0], p2[1], p2[2]
         dist_sq = (x1 - x2) ** 2 + (y1 - y2) ** 2 + (z1 - z2) ** 2
         return dist_sq <= self.radius**2
 
diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py
index 179b48744e..038d05f161 100644
--- a/dimos/memory2/type/observation.py
+++ b/dimos/memory2/type/observation.py
@@ -14,7 +14,7 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass, field, fields
+from dataclasses import dataclass, fields
 import sys
 import threading
 from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast
@@ -28,11 +28,14 @@
     from collections.abc import Callable
 
     from dimos.models.embedding.base import Embedding
+    from dimos.msgs.geometry_msgs.Pose import Pose
     from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
 
 T = TypeVar("T")
 R = TypeVar("R")
 
+PoseTuple = tuple[float, float, float, float, float, float, float]
+
 
 class _Unloaded:
     """Sentinel indicating data has not been loaded yet."""
@@ -46,18 +49,33 @@ def __repr__(self) -> str:
 _UNLOADED = _Unloaded()
 
 
-def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, float] | None:
+def _to_tuple(p: Any) -> PoseTuple | None:
     """Coerce common pose shapes to the storage 7-tuple `(x, y, z, qx, qy, qz, qw)`.
 
-    Accepts: `None`, an already-normalized tuple, or any object with
+    Accepts: `None`, a 3- or 7-tuple, or any object with
     `translation`+`rotation` (Transform) or `position`+`orientation`
-    (Pose / PoseStamped) attributes. Duck-typed to avoid importing
-    `dimos.msgs.geometry_msgs` at module load.
+    (Pose / PoseStamped) attributes. 3-tuples are padded with the identity
+    quaternion. Duck-typed to avoid importing `dimos.msgs.geometry_msgs`
+    at module load.
     """
-    if p is None or isinstance(p, tuple):
-        return p
-    trans = getattr(p, "translation", None) or getattr(p, "position", None)
-    rot = getattr(p, "rotation", None) or getattr(p, "orientation", None)
+    if p is None:
+        return None
+    if isinstance(p, tuple):
+        if len(p) == 7:
+            return cast("PoseTuple", tuple(float(v) for v in p))
+        if len(p) == 3:
+            x, y, z = p
+            return (float(x), float(y), float(z), 0.0, 0.0, 0.0, 1.0)
+        raise TypeError(f"Pose tuple must have length 3 or 7, got {len(p)}")
+    # Use ``is not None`` rather than ``or`` — a Vector3 at the origin is
+    # falsy but valid, and falling through to ``position`` on a Transform
+    # would crash.
+    trans = getattr(p, "translation", None)
+    if trans is None:
+        trans = getattr(p, "position", None)
+    rot = getattr(p, "rotation", None)
+    if rot is None:
+        rot = getattr(p, "orientation", None)
     if trans is None or rot is None:
         raise TypeError(
             f"Cannot coerce {type(p).__name__} to a pose tuple — expected "
@@ -74,36 +92,70 @@ def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, f
     )
 
 
-@dataclass
+@dataclass(init=False)
 class Observation(Generic[T]):
     """A single timestamped observation with optional spatial pose and metadata.
 
-    `pose` is stored as a 7-tuple `(x, y, z, qx, qy, qz, qw)`. Passing a
-    `Transform`, `Pose`, or `PoseStamped` to the constructor or to `derive`
-    is normalized to the 7-tuple form automatically.
+    Pose is stored internally as a 7-tuple ``(x, y, z, qx, qy, qz, qw)``.
+    Use :attr:`pose` for a typed :class:`Pose` object (lazy), or
+    :attr:`pose_tuple` for direct field access in hot loops.
     """
 
     id: int
     ts: float
-    data_type: type = object
-    pose: Any | None = None
-    tags: dict[str, Any] = field(default_factory=dict)
-    _data: T | _Unloaded = field(default=_UNLOADED, repr=False)
-    _loader: Callable[[], T] | None = field(default=None, repr=False)
-    _data_lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
+    data_type: type
+    pose_tuple: PoseTuple | None
+    tags: dict[str, Any]
+    _data: T | _Unloaded
+    _loader: Callable[[], T] | None
+    _data_lock: threading.Lock
+
+    def __init__(
+        self,
+        id: int = 0,
+        ts: float = 0.0,
+        *,
+        data_type: type = object,
+        pose: Any | None = None,
+        pose_tuple: PoseTuple | None = None,
+        tags: dict[str, Any] | None = None,
+        _data: T | _Unloaded = _UNLOADED,
+        _loader: Callable[[], T] | None = None,
+        _data_lock: threading.Lock | None = None,
+    ) -> None:
+        self.id = id
+        self.ts = ts
+        self.data_type = data_type
+        # `pose` wins if explicitly supplied: derive()/tag() always re-pass
+        # the current `pose_tuple` from fields(), so an override needs
+        # to take precedence over it.
+        self.pose_tuple = _to_tuple(pose) if pose is not None else pose_tuple
+        self.tags = tags if tags is not None else {}
+        self._data = _data
+        self._loader = _loader
+        self._data_lock = _data_lock if _data_lock is not None else threading.Lock()
+
+    @property
+    def pose(self) -> Pose | None:
+        """Typed :class:`Pose` (or None). Allocates per access — read :attr:`pose_tuple` in hot loops."""
+        if self.pose_tuple is None:
+            return None
+        from dimos.msgs.geometry_msgs.Pose import Pose
 
-    def __post_init__(self) -> None:
-        self.pose = _normalize_pose(self.pose)
+        return Pose(*self.pose_tuple)
 
     @property
-    def pose_stamped(self) -> PoseStamped:
+    def pose_stamped(self) -> PoseStamped | None:
+        """Typed :class:`PoseStamped` (or None) carrying this observation's ts."""
+        if self.pose_tuple is None:
+            return None
         from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
 
-        if self.pose is None:
-            raise LookupError("No pose set on this observation")
-        x, y, z, qx, qy, qz, qw = self.pose
-        ps: PoseStamped = PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw))
-        return ps
+        x, y, z, qx, qy, qz, qw = self.pose_tuple
+        return cast(
+            "PoseStamped",
+            PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw)),
+        )
 
     @property
     def data(self) -> T:
@@ -150,9 +202,38 @@ def tag(self, **tags: Any) -> Self:
         return type(self)(**kwargs)
 
 
-@dataclass
+@dataclass(init=False)
 class EmbeddedObservation(Observation[T]):
     """Observation enriched with a vector embedding and optional similarity score."""
 
-    embedding: Embedding | None = None
-    similarity: float | None = None
+    embedding: Embedding | None
+    similarity: float | None
+
+    def __init__(
+        self,
+        id: int = 0,
+        ts: float = 0.0,
+        *,
+        data_type: type = object,
+        pose: Any | None = None,
+        pose_tuple: PoseTuple | None = None,
+        tags: dict[str, Any] | None = None,
+        embedding: Embedding | None = None,
+        similarity: float | None = None,
+        _data: T | _Unloaded = _UNLOADED,
+        _loader: Callable[[], T] | None = None,
+        _data_lock: threading.Lock | None = None,
+    ) -> None:
+        super().__init__(
+            id=id,
+            ts=ts,
+            data_type=data_type,
+            pose=pose,
+            pose_tuple=pose_tuple,
+            tags=tags,
+            _data=_data,
+            _loader=_loader,
+            _data_lock=_data_lock,
+        )
+        self.embedding = embedding
+        self.similarity = similarity
diff --git a/dimos/memory2/vis/space/rerun.py b/dimos/memory2/vis/space/rerun.py
index 435097e77c..1929ecef71 100644
--- a/dimos/memory2/vis/space/rerun.py
+++ b/dimos/memory2/vis/space/rerun.py
@@ -238,10 +238,13 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None:
     for i, obs in enumerate(observations):
         path = f"scene/observations/{i}"
         data = obs.data
+        ps = obs.pose_stamped
+        if ps is None:
+            continue
         img = _as_image(data)
         if img is not None:
             # Apply base→optical extrinsics for camera frustum rendering
-            world_T_optical = Transform.from_pose("world", obs.pose_stamped) + _BASE_TO_OPTICAL
+            world_T_optical = Transform.from_pose("world", ps) + _BASE_TO_OPTICAL
             rr.log(path, world_T_optical.to_pose().to_rerun(), static=True)
             h, w = img.shape[:2]
             focal = max(w, h)
@@ -257,13 +260,13 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None:
             )
             rr.log(f"{path}/image", img.to_rerun(), static=True)
         elif isinstance(data, PointCloud2):
-            rr.log(path, obs.pose_stamped.to_rerun(), static=True)
+            rr.log(path, ps.to_rerun(), static=True)
             rr.log(f"{path}/pointcloud", data.to_rerun(), static=True)
         elif isinstance(data, (int, float)):
             rr.log(
                 path,
                 rr.Points3D(
-                    positions=[[obs.pose_stamped.x, obs.pose_stamped.y, 0]],
+                    positions=[[ps.x, ps.y, 0]],
                     labels=[str(data)],
                     radii=[0.025],
                 ),
@@ -283,7 +286,7 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None:
             if line:
                 lines.append(line)
             label = "\n".join(lines)
-            x, y = obs.pose_stamped.x, obs.pose_stamped.y
+            x, y = ps.x, ps.y
             # Pin: line from ground up, label at the tip
             rr.log(
                 f"{path}/pin",
@@ -306,7 +309,7 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None:
         else:
             rr.log(
                 path,
-                rr.Points3D(positions=[[obs.pose_stamped.x, obs.pose_stamped.y, 0]], radii=[0.05]),
+                rr.Points3D(positions=[[ps.x, ps.y, 0]], radii=[0.05]),
                 static=True,
             )
 
diff --git a/dimos/memory2/vis/space/space.py b/dimos/memory2/vis/space/space.py
index 4ce973b133..cf7d8be0b3 100644
--- a/dimos/memory2/vis/space/space.py
+++ b/dimos/memory2/vis/space/space.py
@@ -96,7 +96,10 @@ def add(self, element: Any, **kwargs: Any) -> Space:
             for item in element:
                 v = _autocolor_value(item)
                 if v is not None and isinstance(item, Observation):
-                    self._elements.append(Arrow(msg=item.pose_stamped, color=color_range(v)))
+                    ps = item.pose_stamped
+                    if ps is None:
+                        continue
+                    self._elements.append(Arrow(msg=ps, color=color_range(v)))
                 else:
                     self.add(item, **kwargs)
         else:
diff --git a/dimos/memory2/vis/space/svg.py b/dimos/memory2/vis/space/svg.py
index e54da044da..f95119973b 100644
--- a/dimos/memory2/vis/space/svg.py
+++ b/dimos/memory2/vis/space/svg.py
@@ -291,12 +291,13 @@ def _render_element(el: SpaceElement, b: Bounds) -> str:
 
         return _render_occupancy_grid(simple_inflate(height_cost_occupancy(el), 0.05), b)
     elif isinstance(el, Observation):
-        if el.pose is None:
+        ps = el.pose_stamped
+        if ps is None:
             return ""
         if el.data_type == float:
-            return _render_arrow(Arrow(msg=el.pose_stamped, color="#ff0000"), b)
+            return _render_arrow(Arrow(msg=ps, color="#ff0000"), b)
         else:
-            return _render_arrow(Arrow(msg=el.pose_stamped), b)
+            return _render_arrow(Arrow(msg=ps), b)
 
     else:
         return f"<!-- unsupported: {type(el).__name__} -->"
diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py
index e3d64d4bab..42e4dc730d 100644
--- a/dimos/msgs/geometry_msgs/Quaternion.py
+++ b/dimos/msgs/geometry_msgs/Quaternion.py
@@ -16,6 +16,7 @@
 
 from collections.abc import Sequence
 from io import BytesIO
+import math
 import struct
 from typing import TYPE_CHECKING, BinaryIO, TypeAlias
 
@@ -221,6 +222,18 @@ def __mul__(self, other: Quaternion) -> Quaternion:
 
         return Quaternion(x, y, z, w)
 
+    def dot(self, other: Quaternion) -> float:
+        """4D inner product. ``|q.dot(q')| == 1`` iff same orientation (up to sign)."""
+        return float(self.x * other.x + self.y * other.y + self.z * other.z + self.w * other.w)
+
+    def angle_to(self, other: Quaternion) -> float:
+        """Smallest rotation angle (radians) between two unit quaternions.
+
+        ``abs(self.dot(other))`` collapses the double-cover sign ambiguity;
+        the ``min(1.0, ...)`` clamps against numerical drift past 1.
+        """
+        return 2.0 * math.acos(min(1.0, abs(self.dot(other))))
+
     def conjugate(self) -> Quaternion:
         """Return the conjugate of the quaternion.
 
diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py
index add056b1a3..7617f09e58 100644
--- a/dimos/msgs/sensor_msgs/PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/PointCloud2.py
@@ -730,7 +730,7 @@ def to_rerun(
 
         if mode == "points":
             return rr.Points3D(
-                positions=points, colors=point_colors, class_ids=class_ids, sizes=voxel_size
+                positions=points, colors=point_colors, class_ids=class_ids, radii=voxel_size / 2
             )
         elif mode == "boxes":
             half = voxel_size / 2
diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 5030c4fb05..b84d6e8dad 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -140,16 +140,13 @@ def main(
     # so it stays cheap (no pointcloud loading).
     seen: dict[tuple[int, int, int], Observation[Any]] = {}
     for obs in lidar:
-        if obs.pose is None:
+        p = obs.pose_tuple
+        if p is None:
             continue
         # Reject placeholder poses at the world origin.
-        if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0:
+        if p[0] == 0 and p[1] == 0 and p[2] == 0:
             continue
-        cell = (
-            int(obs.pose[0] / pgo_tol),
-            int(obs.pose[1] / pgo_tol),
-            int(obs.pose[2] / pgo_tol),
-        )
+        cell = (int(p[0] / pgo_tol), int(p[1] / pgo_tol), int(p[2] / pgo_tol))
         seen[cell] = obs
 
     n_kept = len(seen)
@@ -159,7 +156,7 @@ def main(
     # Dict insertion order = lidar iteration order = chronological.
     # `seen` only contains entries with non-None poses (filtered above).
     path: list[tuple[float, float, float]] = [
-        (obs.pose[0], obs.pose[1], obs.pose[2]) for obs in seen.values() if obs.pose is not None
+        (p[0], p[1], p[2]) for obs in seen.values() if (p := obs.pose_tuple) is not None
     ]
 
     pgo_map = None
@@ -209,7 +206,7 @@ def main(
         try:
             for obs in lidar:
                 full_pb(obs)
-                if obs.pose is None or len(obs.data) == 0:
+                if obs.pose_tuple is None or len(obs.data) == 0:
                     continue
                 full_grid.add_frame(obs.data.transform(interp(obs.ts)))
             full_pgo_map = full_grid.get_global_pointcloud2()

From ea2c9f4b05f0edbd58a698e6ecd23abe0c0266d4 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 16:11:25 +0800
Subject: [PATCH 27/36] pgo matrices refactor

---
 dimos/mapping/loop_closure/pgo.py      | 51 ++++++++++----------------
 dimos/mapping/loop_closure/test_pgo.py | 44 ++++++++--------------
 dimos/msgs/geometry_msgs/Quaternion.py |  8 ++++
 dimos/msgs/geometry_msgs/Transform.py  | 19 ++++++++++
 4 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py
index 84a8a0fa24..de6c56547a 100644
--- a/dimos/mapping/loop_closure/pgo.py
+++ b/dimos/mapping/loop_closure/pgo.py
@@ -207,15 +207,15 @@ def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]:
 def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]:
     """Materialize corrections once; return a fast ts -> Transform lookup."""
     ts_list: list[float] = []
-    R_list: list[np.ndarray] = []
+    quat_list: list[np.ndarray] = []
     t_list: list[np.ndarray] = []
     for obs in corrections:
-        R, t = _r_t_from_transform(obs.data)
+        tf = obs.data
         # obs.ts is authoritative; obs.data.ts can be mutated by Transform's
         # ts=0.0 -> time.time() fallback in its constructor.
         ts_list.append(obs.ts)
-        R_list.append(R)
-        t_list.append(t)
+        quat_list.append(tf.rotation.to_numpy())
+        t_list.append(tf.translation.to_numpy())
 
     if not ts_list:
         raise ValueError("empty corrections stream")
@@ -224,13 +224,12 @@ def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Trans
     # general path handles it; clip-to-endpoints behavior is unchanged.
     if len(ts_list) == 1:
         ts_list.append(ts_list[0] + 1e-6)
-        R_list.append(R_list[0])
+        quat_list.append(quat_list[0])
         t_list.append(t_list[0])
 
     ts_arr = np.array(ts_list)
-    R_stack = np.stack(R_list)
     t_stack = np.stack(t_list)
-    slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack))
+    slerp = Slerp(ts_arr, Rotation.from_quat(np.stack(quat_list)))
 
     def interp(ts: float) -> Transform:
         ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1]))
@@ -244,7 +243,13 @@ def interp(ts: float) -> Transform:
             t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx]
             alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0
             t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx]
-        return _transform_from_r_t(R, t, ts=float(ts))
+        return Transform(
+            translation=Vector3(t),
+            rotation=Quaternion.from_rotation_matrix(R),
+            frame_id=FRAME_WORLD_CORRECTED,
+            child_frame_id=FRAME_WORLD_RAW,
+            ts=float(ts),
+        )
 
     return interp
 
@@ -268,30 +273,12 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]:
                 continue
             raw_tf = Transform.from_pose(FRAME_BODY, ps)
             # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw.
-            # Observation normalizes Transform back to 7-tuple via __post_init__.
             corrected = interp(obs.ts) + raw_tf
             yield obs.derive(data=obs.data, pose=corrected)
 
     return stream.transform(xf)
 
 
-def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]:
-    q = tf.rotation
-    R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()
-    t = np.array([tf.translation.x, tf.translation.y, tf.translation.z])
-    return R, t
-
-
-def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform:
-    return Transform(
-        translation=Vector3(float(t[0]), float(t[1]), float(t[2])),
-        rotation=Quaternion.from_rotation_matrix(R),
-        frame_id=FRAME_WORLD_CORRECTED,
-        child_frame_id=FRAME_WORLD_RAW,
-        ts=ts,
-    )
-
-
 def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3:
     """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`."""
     import gtsam  # type: ignore[import-not-found,import-untyped]
@@ -669,9 +656,11 @@ def _icp(
     if float(result.fitness) == 0.0:
         return Transform.identity(), float("inf")
 
-    T_mat = result.transformation.numpy()
-    rmse = float(result.inlier_rmse)
-    return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse
-    T_mat = result.transformation.numpy()
+    tf = Transform.from_matrix(
+        result.transformation.numpy(),
+        ts=source.ts,
+        frame_id=FRAME_WORLD_CORRECTED,
+        child_frame_id=FRAME_WORLD_RAW,
+    )
     rmse = float(result.inlier_rmse)
-    return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse
+    return tf, rmse * rmse
diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py
index 0042c19b2c..61a744c893 100644
--- a/dimos/mapping/loop_closure/test_pgo.py
+++ b/dimos/mapping/loop_closure/test_pgo.py
@@ -25,8 +25,6 @@
     PGOConfig,
     _obs_to_pose3,
     _pose3_to_transform,
-    _r_t_from_transform,
-    _transform_from_r_t,
     apply_corrections,
     keyframes_to_corrections,
     make_interpolator,
@@ -67,16 +65,6 @@ def test_rejects_unknown_fields(self) -> None:
 
 
 class TestTransformHelpers:
-    def test_r_t_transform_roundtrip(self) -> None:
-        rng = np.random.default_rng(0)
-        R_in = _random_R(rng)
-        t_in = rng.uniform(-5, 5, size=3)
-        tf = _transform_from_r_t(R_in, t_in, ts=1.23)
-        R_out, t_out = _r_t_from_transform(tf)
-        np.testing.assert_allclose(R_out, R_in, atol=1e-10)
-        np.testing.assert_allclose(t_out, t_in, atol=1e-10)
-        assert tf.ts == 1.23
-
     def test_observation_normalizes_transform_pose(self) -> None:
         """Constructing/deriving with pose=Transform should coerce to 7-tuple."""
         from dimos.memory2.type.observation import Observation
@@ -109,7 +97,7 @@ def test_obs_to_pose3_roundtrip(self) -> None:
         rng = np.random.default_rng(4)
         R = _random_R(rng)
         t = rng.uniform(-3, 3, size=3)
-        tf = _transform_from_r_t(R, t, ts=1.0)
+        tf = Transform(translation=Vector3(t), rotation=Quaternion.from_rotation_matrix(R), ts=1.0)
         obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0)
         p = _obs_to_pose3(obs)
         np.testing.assert_allclose(p.rotation().matrix(), R, atol=1e-9)
@@ -123,9 +111,8 @@ def test_pose3_to_transform(self) -> None:
         t = rng.uniform(-3, 3, size=3)
         p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t))
         tf = _pose3_to_transform(p, ts=7.89)
-        R_out, t_out = _r_t_from_transform(tf)
-        np.testing.assert_allclose(R_out, R, atol=1e-10)
-        np.testing.assert_allclose(t_out, t, atol=1e-10)
+        np.testing.assert_allclose(tf.rotation.to_rotation_matrix(), R, atol=1e-10)
+        np.testing.assert_allclose(tf.translation.to_numpy(), t, atol=1e-10)
 
     def test_pose3_to_transform_with_frames(self) -> None:
         import gtsam
@@ -137,9 +124,8 @@ def test_pose3_to_transform_with_frames(self) -> None:
         tf = _pose3_to_transform(p, ts=1.0, frame_id="world_corrected", child_frame_id="body")
         assert tf.frame_id == "world_corrected"
         assert tf.child_frame_id == "body"
-        R_out, t_out = _r_t_from_transform(tf)
-        np.testing.assert_allclose(R_out, R, atol=1e-10)
-        np.testing.assert_allclose(t_out, t, atol=1e-10)
+        np.testing.assert_allclose(tf.rotation.to_rotation_matrix(), R, atol=1e-10)
+        np.testing.assert_allclose(tf.translation.to_numpy(), t, atol=1e-10)
 
 
 def _make_lidar_stream(n_frames: int = 12, points_per_frame: int = 500) -> Stream[PointCloud2]:
@@ -213,8 +199,11 @@ def test_empty_stream_raises(self) -> None:
 
     def test_single_keyframe_returns_constant(self) -> None:
         R = Rotation.from_euler("z", np.pi / 4).as_matrix()
-        t = np.array([1.0, 2.0, 3.0])
-        only = _transform_from_r_t(R, t, ts=10.0)
+        only = Transform(
+            translation=Vector3(1.0, 2.0, 3.0),
+            rotation=Quaternion.from_rotation_matrix(R),
+            ts=10.0,
+        )
         interp = make_interpolator(_make_corrections([only]))
         for query_ts in (0.0, 10.0, 100.0):
             out = interp(query_ts)
@@ -225,9 +214,8 @@ def test_single_keyframe_returns_constant(self) -> None:
     def test_out_of_range_clips_to_endpoints(self) -> None:
         # Note: Transform's constructor maps ts=0.0 -> time.time(); use ts>0
         # so test timestamps are deterministic.
-        R = np.eye(3)
-        a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0)
-        b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0)
+        a = Transform(translation=Vector3(0.0, 0.0, 0.0), ts=1.0)
+        b = Transform(translation=Vector3(10.0, 0.0, 0.0), ts=11.0)
         # _make_corrections uses tf.ts; obs.ts ends up the same.
         mem = MemoryStore()
         stream: Stream[Transform] = mem.stream("corrections", Transform)
@@ -257,8 +245,8 @@ def test_pure_translation_shifts_poses(self) -> None:
             )
 
         # Constant correction: translate by (5, 0, 0), identity rotation.
-        c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0)
-        c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=3.0)
+        c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0)
+        c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=3.0)
         mem2 = MemoryStore()
         corr: Stream[Transform] = mem2.stream("corrections", Transform)
         corr.append(c_a, ts=1.0)
@@ -280,8 +268,8 @@ def test_passes_through_pose_none(self) -> None:
             ts=1.0,
             pose=None,
         )
-        c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0)
-        c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=2.0)
+        c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0)
+        c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=2.0)
         mem2 = MemoryStore()
         corr: Stream[Transform] = mem2.stream("corrections", Transform)
         corr.append(c_a, ts=1.0)
diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py
index 42e4dc730d..e98e7bcc19 100644
--- a/dimos/msgs/geometry_msgs/Quaternion.py
+++ b/dimos/msgs/geometry_msgs/Quaternion.py
@@ -164,6 +164,14 @@ def from_rotation_matrix(cls, matrix: np.ndarray) -> Quaternion:
         quat = rotation.as_quat()  # Returns [x, y, z, w]
         return cls(quat[0], quat[1], quat[2], quat[3])
 
+    def to_rotation_matrix(self) -> np.ndarray:
+        """Convert quaternion to a 3x3 rotation matrix."""
+        from scipy.spatial.transform import (
+            Rotation,  # ~330ms: deferred to avoid startup cost
+        )
+
+        return np.asarray(Rotation.from_quat([self.x, self.y, self.z, self.w]).as_matrix())
+
     def to_euler(self) -> Vector3:
         """Convert quaternion to Euler angles (roll, pitch, yaw) in radians.
 
diff --git a/dimos/msgs/geometry_msgs/Transform.py b/dimos/msgs/geometry_msgs/Transform.py
index 9b08c8dadd..44bd6b2440 100644
--- a/dimos/msgs/geometry_msgs/Transform.py
+++ b/dimos/msgs/geometry_msgs/Transform.py
@@ -18,6 +18,7 @@
 from typing import TYPE_CHECKING, BinaryIO
 
 if TYPE_CHECKING:
+    import numpy as np
     import rerun as rr
 
     from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
@@ -216,6 +217,24 @@ def to_pose(self, **kwargs: object) -> PoseStamped:
         )
         return result
 
+    @classmethod
+    def from_matrix(
+        cls,
+        matrix: np.ndarray,
+        *,
+        ts: float = 0.0,
+        frame_id: str = "world",
+        child_frame_id: str = "unset",
+    ) -> Transform:
+        """Build a Transform from a 4x4 homogeneous transformation matrix."""
+        return cls(
+            translation=Vector3(matrix[:3, 3]),
+            rotation=Quaternion.from_rotation_matrix(matrix[:3, :3]),
+            frame_id=frame_id,
+            child_frame_id=child_frame_id,
+            ts=ts,
+        )
+
     def to_matrix(self) -> np.ndarray:  # type: ignore[name-defined]
         """Convert Transform to a 4x4 transformation matrix.
 

From 6e7825f7e182575817905cb9d9080a69cf4b16bd Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 16:25:48 +0800
Subject: [PATCH 28/36] pc2 fix

---
 dimos/msgs/sensor_msgs/PointCloud2.py      | 4 ++++
 dimos/msgs/sensor_msgs/test_PointCloud2.py | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py
index 7617f09e58..d31c20d43c 100644
--- a/dimos/msgs/sensor_msgs/PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/PointCloud2.py
@@ -373,6 +373,10 @@ def transform(self, tf: Transform) -> PointCloud2:
         new_pcd = o3d.geometry.PointCloud()
         new_pcd.points = o3d.utility.Vector3dVector(transformed_xyz)
 
+        # Colors are frame-independent — carry them through.
+        if self.pointcloud.has_colors():
+            new_pcd.colors = self.pointcloud.colors
+
         return PointCloud2(
             pointcloud=new_pcd,
             frame_id=tf.frame_id,
diff --git a/dimos/msgs/sensor_msgs/test_PointCloud2.py b/dimos/msgs/sensor_msgs/test_PointCloud2.py
index 56ae622a8d..797e266ed3 100644
--- a/dimos/msgs/sensor_msgs/test_PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/test_PointCloud2.py
@@ -32,8 +32,8 @@ def test_lcm_encode_decode() -> None:
     decoded = PointCloud2.lcm_decode(binary_msg)
 
     # 1. Check number of points
-    original_points, _ = lidar_msg.as_numpy()
-    decoded_points, _ = decoded.as_numpy()
+    original_points = lidar_msg.as_numpy()
+    decoded_points = decoded.as_numpy()
 
     assert len(original_points) == len(decoded_points), (
         f"Point count mismatch: {len(original_points)} vs {len(decoded_points)}"

From 9ac43a249bc75056043f06fa4aece047b482c532 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 16:35:20 +0800
Subject: [PATCH 29/36] map.py: use VoxelMapTransformer for all three
 accumulations

Replaces the three hand-rolled VoxelGrid loops (pgo pass 2, full_pgo, raw)
with a small _accumulate() helper that drives VoxelMapTransformer in batch
mode. Same behavior, less repetition, and disposal is managed by the
transformer.
---
 dimos/utils/cli/map.py | 104 +++++++++++++++++++++++++++--------------
 1 file changed, 68 insertions(+), 36 deletions(-)

diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index b84d6e8dad..8f0e65e9f0 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from collections.abc import Callable
+from collections.abc import Callable, Iterable
 import time
 from typing import Any
 
@@ -20,7 +20,7 @@
 import rerun.blueprint as rrb
 import typer
 
-from dimos.mapping.voxels import VoxelGrid
+from dimos.mapping.voxels import VoxelMapTransformer
 from dimos.memory2.store.sqlite import SqliteStore
 from dimos.memory2.stream import Stream
 from dimos.memory2.transform import QualityWindow, SpeedLimit
@@ -28,6 +28,7 @@
 from dimos.memory2.vis.color import Color
 from dimos.msgs.geometry_msgs.Transform import Transform
 from dimos.msgs.sensor_msgs.Image import Image
+from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
 from dimos.perception.fiducial.marker_transformer import DetectMarkers
 from dimos.robot.unitree.go2.connection import _camera_info_static
 from dimos.utils.data import resolve_named_path
@@ -36,6 +37,44 @@
 PATH_THICKNESS = 0.01
 
 
+def _accumulate(
+    obs_iter: Iterable[Observation[PointCloud2]],
+    *,
+    voxel: float,
+    block_count: int,
+    device: str,
+    interp: Callable[[float], Transform] | None = None,
+    progress_cb: Callable[[Observation[Any]], None] | None = None,
+) -> PointCloud2 | None:
+    """Accumulate a voxel map from `obs_iter`, optionally PGO-correcting each frame.
+
+    Returns the final ``PointCloud2`` (or ``None`` if the input was empty).
+    Disposal of the underlying ``VoxelGrid`` is handled by ``VoxelMapTransformer``.
+    """
+
+    def prepared() -> Iterable[Observation[PointCloud2]]:
+        for obs in obs_iter:
+            if progress_cb is not None:
+                progress_cb(obs)
+            if len(obs.data) == 0:
+                continue
+            if interp is not None:
+                if obs.pose_tuple is None:
+                    continue
+                yield obs.derive(data=obs.data.transform(interp(obs.ts)))
+            else:
+                yield obs
+
+    vmt = VoxelMapTransformer(
+        emit_every=0,  # batch mode: emit once on exhaustion
+        voxel_size=voxel,
+        block_count=block_count,
+        device=device,
+    )
+    result = next(iter(vmt(iter(prepared()))), None)
+    return result.data if result is not None else None
+
+
 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]:
     seen = 0
     wall_start: float | None = None
@@ -186,45 +225,35 @@ def main(
             kf_t = kf_obs.data.optimized.translation
             pgo_path.append((kf_t.x, kf_t.y, kf_t.z))
 
-        pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)")
-        grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
-        try:
-            for obs in seen.values():
-                pass2_pb(obs)
-                if len(obs.data) == 0:
-                    continue
-                grid.add_frame(obs.data.transform(interp(obs.ts)))
-            pgo_map = grid.get_global_pointcloud2()
-        finally:
-            grid.dispose()
+        pgo_map = _accumulate(
+            seen.values(),
+            voxel=voxel,
+            block_count=block_count,
+            device=device,
+            interp=interp,
+            progress_cb=progress(n_kept, "pgo pass 2 (rebuilding)"),
+        )
 
     full_pgo_map = None
     if full_pgo:
         assert interp is not None
-        full_pb = progress(total, "full pgo (rebuilding)")
-        full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
-        try:
-            for obs in lidar:
-                full_pb(obs)
-                if obs.pose_tuple is None or len(obs.data) == 0:
-                    continue
-                full_grid.add_frame(obs.data.transform(interp(obs.ts)))
-            full_pgo_map = full_grid.get_global_pointcloud2()
-        finally:
-            full_grid.dispose()
+        full_pgo_map = _accumulate(
+            lidar,
+            voxel=voxel,
+            block_count=block_count,
+            device=device,
+            interp=interp,
+            progress_cb=progress(total, "full pgo (rebuilding)"),
+        )
 
     # Raw map: same dedup'd frames, no PGO correction.
-    raw_pb = progress(n_kept, "reconstructing global map")
-    raw_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device)
-    try:
-        for obs in seen.values():
-            raw_pb(obs)
-            if len(obs.data) == 0:
-                continue
-            raw_grid.add_frame(obs.data)
-        global_map = raw_grid.get_global_pointcloud2()
-    finally:
-        raw_grid.dispose()
+    global_map = _accumulate(
+        seen.values(),
+        voxel=voxel,
+        block_count=block_count,
+        device=device,
+        progress_cb=progress(n_kept, "reconstructing global map"),
+    )
 
     marker_dets: list[Observation[Any]] = []
     if markers:
@@ -270,7 +299,10 @@ def main(
     if not no_gui:
         rerun_init("dimos map tool", spawn=True)
         rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world")))
-        rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True)
+        if global_map is not None:
+            rr.log(
+                "world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True
+            )
         if path:
             rr.log(
                 "world/raw_map/path",

From ab687a239ac0c7081398ffcfdf529e167db120eb Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 16:47:43 +0800
Subject: [PATCH 30/36] mapper bugfix

---
 .../perception/fiducial/marker_transformer.py | 25 ++++---------------
 1 file changed, 5 insertions(+), 20 deletions(-)

diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py
index e6c3d3c02f..fa114b8b8b 100644
--- a/dimos/perception/fiducial/marker_transformer.py
+++ b/dimos/perception/fiducial/marker_transformer.py
@@ -58,23 +58,6 @@
 logger = setup_logger()
 
 
-def _pose_tuple_to_transform(
-    pose: tuple[float, float, float, float, float, float, float],
-    *,
-    frame_id: str,
-    child_frame_id: str,
-    ts: float,
-) -> Transform:
-    x, y, z, qx, qy, qz, qw = pose
-    return Transform(
-        translation=Vector3(x, y, z),
-        rotation=Quaternion(qx, qy, qz, qw),
-        frame_id=frame_id,
-        child_frame_id=child_frame_id,
-        ts=ts,
-    )
-
-
 def _average_marker_pose(
     buffer: TimestampedBufferCollection[Detection3DMarker],
 ) -> tuple[Vector3, Quaternion]:
@@ -146,7 +129,8 @@ def __call__(
         marker_size = Vector3(self.marker_length_m, self.marker_length_m, 0.0)
 
         for obs in upstream:
-            if obs.pose is None:
+            pose = obs.pose
+            if pose is None:
                 logger.debug("DetectMarkers: obs %s has no .pose; skipping", obs.id)
                 continue
 
@@ -170,8 +154,9 @@ def __call__(
             if ids is None or len(ids) == 0:
                 continue
 
-            t_world_optical = _pose_tuple_to_transform(
-                obs.pose,
+            t_world_optical = Transform(
+                translation=pose.position,
+                rotation=pose.orientation,
                 frame_id=self.world_frame,
                 child_frame_id="optical",
                 ts=obs.ts,

From fcd53f1cde6a857a54793a7ccc62eb1dcad65f43 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 16:59:57 +0800
Subject: [PATCH 31/36] fix mypy: rename shadowed `pose` in DetectMarkers

The outer `pose = obs.pose` (Pose | None) was being shadowed by
`pose = estimate_marker_pose(...)` (tuple[rvec, tvec] | None), which
mypy flags as an incompatible reassignment. Renamed the inner one
to `marker_pose`.
---
 dimos/perception/fiducial/marker_transformer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py
index fa114b8b8b..36fc2bd6bc 100644
--- a/dimos/perception/fiducial/marker_transformer.py
+++ b/dimos/perception/fiducial/marker_transformer.py
@@ -164,16 +164,16 @@ def __call__(
 
             for corner_set, mid_arr in zip(corners, ids, strict=True):
                 mid = int(mid_arr[0])
-                pose = estimate_marker_pose(
+                marker_pose = estimate_marker_pose(
                     corner_set,
                     self.marker_length_m,
                     self._cam_mtx,
                     self._dist,
                     distortion_model=info.distortion_model,
                 )
-                if pose is None:
+                if marker_pose is None:
                     continue
-                rvec, tvec = pose
+                rvec, tvec = marker_pose
                 t_optical_marker = rvec_tvec_to_transform(
                     rvec,
                     tvec,

From 3aaaeb98d7633fad101acb455548c6507d902675 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 17:06:12 +0800
Subject: [PATCH 32/36] NearFilter: store position as Vector3, drop _xyz
 extraction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NearFilter now holds a typed `Vector3` instead of an `Any` named `pose`.
`Stream.near()` does the one-time normalization at construction (Pose/
PoseStamped → .position, then Vector3(...) which accepts tuple/ndarray/
Vector3). `matches()` uses Vector3 subtraction + length_squared(); the
sqlite filter compiler reads f.position.x/.y/.z directly. The _xyz()
helper is gone.
---
 dimos/memory2/observationstore/sqlite.py |  8 +-------
 dimos/memory2/stream.py                  |  7 ++++++-
 dimos/memory2/type/filter.py             | 25 +++++++-----------------
 3 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py
index e82e867dc0..3dd9a34948 100644
--- a/dimos/memory2/observationstore/sqlite.py
+++ b/dimos/memory2/observationstore/sqlite.py
@@ -31,7 +31,6 @@
     NearFilter,
     TagsFilter,
     TimeRangeFilter,
-    _xyz,
 )
 from dimos.memory2.type.observation import _UNLOADED, Observation, PoseTuple
 from dimos.memory2.utils.sqlite import open_disposable_sqlite_connection
@@ -84,12 +83,7 @@ def _compile_filter(f: Filter, stream: str, prefix: str = "") -> tuple[str, list
             params.append(v)
         return (" AND ".join(clauses), params)
     if isinstance(f, NearFilter):
-        pose = f.pose
-        if pose is None:
-            return None
-        if hasattr(pose, "position"):
-            pose = pose.position
-        cx, cy, cz = _xyz(pose)
+        cx, cy, cz = f.position.x, f.position.y, f.position.z
         r = f.radius
         # R*Tree bounding-box pre-filter + exact squared-distance check
         rtree_sql = (
diff --git a/dimos/memory2/stream.py b/dimos/memory2/stream.py
index 3a92f7708f..8f0a767aa7 100644
--- a/dimos/memory2/stream.py
+++ b/dimos/memory2/stream.py
@@ -38,6 +38,7 @@
     TimeRangeFilter,
 )
 from dimos.memory2.type.observation import EmbeddedObservation, Observation
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
 from dimos.utils.logging_config import setup_logger
 
 if TYPE_CHECKING:
@@ -177,7 +178,11 @@ def at_relative(self, t: float, tolerance: float = 1.0) -> Stream[T, O]:
         return self.at(t0 + t, tolerance=tolerance)
 
     def near(self, pose: Any, radius: float) -> Stream[T, O]:
-        return self._with_filter(NearFilter(pose, radius))
+        # Accept Pose/PoseStamped (any object with `.position`), Vector3,
+        # numpy arrays, or (x, y, z) tuples — Vector3() handles the rest.
+        if hasattr(pose, "position"):
+            pose = pose.position
+        return self._with_filter(NearFilter(Vector3(pose), radius))
 
     def tags(self, **tags: Any) -> Stream[T, O]:
         return self._with_filter(TagsFilter(tags))
diff --git a/dimos/memory2/type/filter.py b/dimos/memory2/type/filter.py
index ddc0f86eb5..1250c4c31f 100644
--- a/dimos/memory2/type/filter.py
+++ b/dimos/memory2/type/filter.py
@@ -19,6 +19,8 @@
 from itertools import islice
 from typing import TYPE_CHECKING, Any
 
+from dimos.msgs.geometry_msgs.Vector3 import Vector3
+
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
 
@@ -75,28 +77,15 @@ def matches(self, obs: Observation[Any]) -> bool:
 
 @dataclass(frozen=True)
 class NearFilter(Filter):
-    pose: Any = field(hash=False)
+    position: Vector3 = field(hash=False)
     radius: float = 0.0
 
     def matches(self, obs: Observation[Any]) -> bool:
-        p2 = obs.pose_tuple
-        if p2 is None or self.pose is None:
+        p = obs.pose_tuple
+        if p is None:
             return False
-        p1 = self.pose
-        # NearFilter.pose accepts raw (x,y,z) tuples or PoseStamped-like objects.
-        if hasattr(p1, "position"):
-            p1 = p1.position
-        x1, y1, z1 = _xyz(p1)
-        x2, y2, z2 = p2[0], p2[1], p2[2]
-        dist_sq = (x1 - x2) ** 2 + (y1 - y2) ** 2 + (z1 - z2) ** 2
-        return dist_sq <= self.radius**2
-
-
-def _xyz(p: Any) -> tuple[float, float, float]:
-    """Extract (x, y, z) from various pose representations."""
-    if isinstance(p, (list, tuple)):
-        return (float(p[0]), float(p[1]), float(p[2]) if len(p) > 2 else 0.0)
-    return (float(p.x), float(p.y), float(getattr(p, "z", 0.0)))
+        delta = self.position - Vector3(p[0], p[1], p[2])
+        return delta.length_squared() <= self.radius * self.radius
 
 
 @dataclass(frozen=True)

From 5d7f573c4ed766b61cc74fb4568016024605aa81 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 18:17:31 +0800
Subject: [PATCH 33/36] go2 stairs

---
 data/.lfs/go2_mid360_stairs.db.tar.gz       |   3 +
 dimos/mapping/loop_closure/utils/map_rrd.py | 366 ++++++++++++++++++++
 dimos/memory2/store/base.py                 |  11 +
 3 files changed, 380 insertions(+)
 create mode 100644 data/.lfs/go2_mid360_stairs.db.tar.gz
 create mode 100644 dimos/mapping/loop_closure/utils/map_rrd.py

diff --git a/data/.lfs/go2_mid360_stairs.db.tar.gz b/data/.lfs/go2_mid360_stairs.db.tar.gz
new file mode 100644
index 0000000000..e094ccb1f2
--- /dev/null
+++ b/data/.lfs/go2_mid360_stairs.db.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32adaeab92baf7780915600ee04b01aac9fa5a4b5965deabe72f539b6633e606
+size 514688073
diff --git a/dimos/mapping/loop_closure/utils/map_rrd.py b/dimos/mapping/loop_closure/utils/map_rrd.py
new file mode 100644
index 0000000000..6856dadef7
--- /dev/null
+++ b/dimos/mapping/loop_closure/utils/map_rrd.py
@@ -0,0 +1,366 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Dump a recorded dataset to .rrd: lidar point clouds + camera frames.
+
+Lidar clouds are assumed to be in world frame and logged directly under
+their entity path (no parent transform). Entities written:
+
+- ``world/lidar``         — Go2 L1 per-frame point cloud
+- ``world/lidar_voxels``  — accumulated voxel map of the primary lidar (``--map``)
+- ``world/fastlio_lidar`` — fastlio_lidar raw cloud (if present)
+- ``world/fastlio_voxels``— accumulated voxel map of fastlio_lidar (``--map``)
+- ``world/fastlio``       — fastlio_odometry pose axis (if present)
+- ``world/fastlio_path``  — fastlio_odometry trajectory (growing LineStrips3D)
+- ``world/odom``          — Go2 onboard odom pose axis (if present)
+- ``world/odom_path``     — Go2 onboard odom trajectory (growing LineStrips3D)
+- ``world/camera``        — color_image camera pose (static pinhole + Transform3D)
+- ``world/camera/image``  — color_image frames
+
+Usage:
+    uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd
+    uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd --map
+    rerun map.rrd
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from pathlib import Path
+import time
+from typing import Any
+
+import rerun as rr
+import typer
+
+from dimos.mapping.voxels import VoxelMapTransformer
+from dimos.memory2.store.sqlite import SqliteStore
+from dimos.memory2.stream import Stream
+from dimos.memory2.transform import throttle
+from dimos.memory2.type.observation import Observation
+from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped
+from dimos.msgs.geometry_msgs.Transform import Transform
+from dimos.msgs.nav_msgs.Odometry import Odometry
+from dimos.msgs.sensor_msgs.Image import Image
+from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2, register_colormap_annotation
+from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static
+from dimos.utils.data import resolve_named_path
+
+TIMELINE = "ts"
+
+
+def _progress(total: int, label: str) -> Callable[[Observation[Any]], None]:
+    """Matches dimos/utils/cli/map.py:progress."""
+    seen = 0
+    wall_start: float | None = None
+    last_wall: float | None = None
+    first_ts: float | None = None
+
+    def tick(obs: Observation[Any]) -> None:
+        nonlocal seen, wall_start, last_wall, first_ts
+        now = time.monotonic()
+        if wall_start is None:
+            wall_start = now
+            first_ts = obs.ts
+        assert first_ts is not None
+        frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0
+        last_wall = now
+        seen += 1
+        pct = 100 * seen // total if total else 100
+        wall = now - wall_start
+        data = obs.ts - first_ts
+        speed = data / wall if wall > 0 else 0.0
+        end = "\n" if seen >= total else ""
+        print(
+            f"\r{label} {pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame",
+            end=end,
+            flush=True,
+        )
+
+    return tick
+
+
+def _log_clouds(
+    label: str,
+    stream: Stream[PointCloud2],
+    entity: str,
+    voxel: float,
+    point_mode: str,
+    *,
+    total: int | None = None,
+) -> None:
+    """Iterate a PointCloud2 stream and log each obs to ``entity``.
+
+    ``total`` overrides the progress denominator — useful for transform
+    pipelines where calling :py:meth:`Stream.count` would materialize the
+    whole pipeline.
+    """
+    n = total if total is not None else stream.count()
+    cb = _progress(n, label)
+    for obs in stream:
+        cb(obs)
+        rr.set_time(TIMELINE, timestamp=obs.ts)
+        rr.log(entity, obs.data.to_rerun(voxel_size=voxel, mode=point_mode))
+
+
+def _log_path(
+    label: str,
+    stream: Stream[Any],
+    entity: str,
+    color: tuple[int, int, int],
+    *,
+    emit_every: int = 10,
+) -> None:
+    """Iterate a pose-bearing stream and log a growing :class:`LineStrips3D` to
+    ``entity`` every ``emit_every`` poses (and once more at the end). Frames
+    without a pose are skipped.
+    """
+    n = stream.count()
+    cb = _progress(n, label)
+    points: list[tuple[float, float, float]] = []
+    last_ts: float | None = None
+    emit_count = 0
+    for obs in stream:
+        cb(obs)
+        p = obs.pose_tuple
+        if p is None:
+            continue
+        points.append((float(p[0]), float(p[1]), float(p[2])))
+        last_ts = obs.ts
+        emit_count += 1
+        if emit_every > 0 and emit_count % emit_every == 0 and len(points) >= 2:
+            rr.set_time(TIMELINE, timestamp=obs.ts)
+            rr.log(entity, rr.LineStrips3D([points], colors=[color]))
+    if (
+        last_ts is not None
+        and len(points) >= 2
+        and (emit_every <= 0 or emit_count % emit_every != 0)
+    ):
+        rr.set_time(TIMELINE, timestamp=last_ts)
+        rr.log(entity, rr.LineStrips3D([points], colors=[color]))
+
+
+def main(
+    dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"),
+    out: Path = typer.Option(..., "--out", help="Output .rrd path"),
+    voxel: float = typer.Option(
+        0.05, "--voxel", help="Voxel size hint for the point cloud renderer"
+    ),
+    point_mode: str = typer.Option(
+        "spheres", "--point-mode", help="Render mode: 'spheres', 'boxes', or 'points'"
+    ),
+    camera_hz: float = typer.Option(
+        2.0, "--camera-hz", help="Throttle color_image to at most this rate; 0 disables"
+    ),
+    map: bool = typer.Option(
+        False,
+        "--map",
+        help="Accumulate each lidar stream into a VoxelGrid and log only the final map",
+    ),
+    map_voxel: float = typer.Option(
+        0.05, "--map-voxel", help="Voxel size for the accumulated map (m); --map only"
+    ),
+    map_device: str = typer.Option(
+        "CUDA:0", "--map-device", help="Open3D device for the VoxelGrid; --map only"
+    ),
+    map_emit_every: int = typer.Option(
+        10,
+        "--map-emit-every",
+        help="Emit accumulated map every N frames (0 = only at end); --map only",
+    ),
+    image_pose_from: str = typer.Option(
+        "own",
+        "--image-pose-from",
+        help="Pose authority for color_image frames: 'own' (image pose) or 'fastlio_odom' "
+        "(nearest fastlio_odometry frame in time)",
+    ),
+    image_pose_tol: float = typer.Option(
+        0.1,
+        "--image-pose-tol",
+        help="Max time gap (s) when matching --image-pose-from fastlio_odom",
+    ),
+) -> None:
+    db_path = resolve_named_path(dataset, ".db")
+    cam_info = _camera_info_static()
+
+    rr.init("dimos map_rrd", recording_id=db_path.stem)
+    rr.save(str(out))
+    register_colormap_annotation("turbo")
+
+    # Static pinhole on the camera entity; per-frame Transform3D goes on the
+    # same entity. Image is the child so it projects through the pinhole.
+    pinhole = cam_info.to_rerun()
+    assert not isinstance(pinhole, list)
+    rr.log("world/camera", pinhole, static=True)
+
+    # Static axis triads as children of each moving Transform3D, so the
+    # transforms are actually visible in the 3D view.
+    axes = rr.Arrows3D(
+        vectors=[[0.3, 0, 0], [0, 0.3, 0], [0, 0, 0.3]],
+        colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]],
+    )
+    rr.log("world/fastlio/axes", axes, static=True)
+    rr.log("world/odom/axes", axes, static=True)
+
+    store = SqliteStore(path=str(db_path))
+    with store:
+        print(store.summary())
+
+        lidar = store.stream("lidar", PointCloud2)
+        color_image = store.stream("color_image", Image)
+        has_livox = "fastlio_lidar" in store.streams
+        livox = store.stream("fastlio_lidar", PointCloud2) if has_livox else None
+
+        # ---- per-frame raw clouds ----
+        _log_clouds("       lidar", lidar, "world/lidar", voxel, point_mode)
+        if livox is not None:
+            _log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, point_mode)
+
+        # ---- accumulated voxel maps (--map only) ----
+        # Go2 L1 forward-facing → column carving on.
+        # Mid360 spherical → column carving off, just aggregate.
+        if map:
+            grid_kwargs = {"voxel_size": map_voxel, "device": map_device, "show_startup_log": False}
+            _log_clouds(
+                " lidar_voxels",
+                lidar.transform(
+                    VoxelMapTransformer(
+                        emit_every=map_emit_every, carve_columns=True, **grid_kwargs
+                    )
+                ),
+                "world/lidar_voxels",
+                voxel,
+                point_mode,
+                total=max(1, lidar.count() // max(map_emit_every, 1)),
+            )
+            if livox is not None:
+                _log_clouds(
+                    "fastlio_voxels",
+                    livox.transform(
+                        VoxelMapTransformer(
+                            emit_every=map_emit_every, carve_columns=False, **grid_kwargs
+                        )
+                    ),
+                    "world/fastlio_voxels",
+                    voxel,
+                    point_mode,
+                    total=max(1, livox.count() // max(map_emit_every, 1)),
+                )
+
+        # ---- fastlio pose axis + path from fastlio_odometry stream ----
+        if "fastlio_odometry" in store.streams:
+            odometry = store.stream("fastlio_odometry", Odometry)
+            cb = _progress(odometry.count(), "fastlio_odometry")
+            for obs in odometry:
+                cb(obs)
+                p = obs.pose_tuple
+                if p is None:
+                    continue
+                rr.set_time(TIMELINE, timestamp=obs.ts)
+                x, y, z, qx, qy, qz, qw = p
+                rr.log(
+                    "world/fastlio",
+                    rr.Transform3D(
+                        translation=[x, y, z],
+                        quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]),
+                    ),
+                )
+            _log_path(
+                "  fastlio_path",
+                store.stream("fastlio_odometry", Odometry),
+                "world/fastlio_path",
+                color=(255, 165, 0),  # orange
+            )
+
+        # ---- Go2 native odom pose axis + path ----
+        if "odom" in store.streams:
+            odom = store.stream("odom", PoseStamped)
+            cb = _progress(odom.count(), "        odom")
+            for obs in odom:
+                cb(obs)
+                p = obs.pose_tuple
+                if p is None:
+                    continue
+                rr.set_time(TIMELINE, timestamp=obs.ts)
+                x, y, z, qx, qy, qz, qw = p
+                rr.log(
+                    "world/odom",
+                    rr.Transform3D(
+                        translation=[x, y, z],
+                        quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]),
+                    ),
+                )
+            _log_path(
+                "     odom_path",
+                store.stream("odom", PoseStamped),
+                "world/odom_path",
+                color=(0, 200, 100),  # green
+            )
+
+        # ---- pass 2: camera pose + image per color_image ----
+        cam_pipeline = (
+            color_image.transform(throttle(1.0 / camera_hz)) if camera_hz > 0 else color_image
+        )
+        n_img = cam_pipeline.count()
+        cb = _progress(n_img, "  color_image")
+        pose_authority: Stream[Any] | None = None
+        if image_pose_from == "fastlio_odom":
+            if "fastlio_odometry" not in store.streams:
+                raise typer.BadParameter(
+                    "--image-pose-from=fastlio_odom but no fastlio_odometry stream in dataset"
+                )
+            pose_authority = store.stream("fastlio_odometry", Odometry)
+        elif image_pose_from != "own":
+            raise typer.BadParameter(
+                f"--image-pose-from must be 'own' or 'fastlio_odom', got {image_pose_from!r}"
+            )
+
+        for img_obs in cam_pipeline:
+            cb(img_obs)
+            rr.set_time(TIMELINE, timestamp=img_obs.ts)
+            if pose_authority is not None:
+                # fastlio_odom is body-in-world; compose with base→optical so the
+                # camera entity lands where the camera actually is, not the body.
+                matches = pose_authority.at(img_obs.ts, tolerance=image_pose_tol).to_list()
+                if matches:
+                    nearest = min(matches, key=lambda o: abs(o.ts - img_obs.ts))
+                    ps = nearest.pose_stamped
+                    cam_tf = (
+                        Transform.from_pose("world", ps) + BASE_TO_OPTICAL
+                        if ps is not None
+                        else None
+                    )
+                else:
+                    cam_tf = None
+            else:
+                # The image's own recorded pose is already optical-in-world.
+                ps = img_obs.pose_stamped
+                cam_tf = Transform.from_pose("world", ps) if ps is not None else None
+            if cam_tf is not None:
+                t, q = cam_tf.translation, cam_tf.rotation
+                rr.log(
+                    "world/camera",
+                    rr.Transform3D(
+                        translation=[t.x, t.y, t.z],
+                        quaternion=rr.Quaternion(xyzw=[q.x, q.y, q.z, q.w]),
+                    ),
+                )
+            rr.log("world/camera/image", img_obs.data.to_rerun())
+
+    print(f"wrote {out}")
+    print(f"open with: rerun {out}")
+
+
+if __name__ == "__main__":
+    typer.run(main)
diff --git a/dimos/memory2/store/base.py b/dimos/memory2/store/base.py
index fb6d76379d..3c07d6ae6d 100644
--- a/dimos/memory2/store/base.py
+++ b/dimos/memory2/store/base.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+from collections.abc import Iterator
 from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar, cast
 
 from dimos.core.resource import CompositeResource
@@ -65,6 +66,12 @@ def __getitem__(self, name: str) -> S:
             raise KeyError(name)
         return self._container.stream(name)
 
+    def __contains__(self, name: object) -> bool:
+        return isinstance(name, str) and name in self._container.list_streams()
+
+    def __iter__(self) -> Iterator[str]:
+        return iter(self._container.list_streams())
+
     def __dir__(self) -> list[str]:
         return self._container.list_streams()
 
@@ -200,6 +207,10 @@ def list_streams(self) -> list[str]:
         """Return names of all streams in this store."""
         return list(self._streams.keys())
 
+    def summary(self) -> str:
+        """One line per opened stream — name, count, ts range. See :meth:`Stream.summary`."""
+        return "\n".join(s.summary() for _, s in self.streams.items())
+
     def delete_stream(self, name: str) -> None:
         """Delete a stream by name (from cache and underlying storage)."""
         stream = self._streams.pop(name, None)

From f7f09e24164e61b7c32b516e66315376d6de0f31 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 18:38:10 +0800
Subject: [PATCH 34/36] recording update

---
 data/.lfs/go2_mid360_stairs.db.tar.gz |  4 ++--
 dimos/utils/data.py                   | 10 +++++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/data/.lfs/go2_mid360_stairs.db.tar.gz b/data/.lfs/go2_mid360_stairs.db.tar.gz
index e094ccb1f2..615aad3a17 100644
--- a/data/.lfs/go2_mid360_stairs.db.tar.gz
+++ b/data/.lfs/go2_mid360_stairs.db.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32adaeab92baf7780915600ee04b01aac9fa5a4b5965deabe72f539b6633e606
-size 514688073
+oid sha256:02d8d2194332291cf71988458965aa9f8019b8d61661ff153e93886dd72e85e9
+size 54082131
diff --git a/dimos/utils/data.py b/dimos/utils/data.py
index bcdc08fd6a..82afc4c1fd 100644
--- a/dimos/utils/data.py
+++ b/dimos/utils/data.py
@@ -111,7 +111,13 @@ def get_data_dir(extra_path: str | None = None) -> Path:
     return get_project_root() / "data"
 
 
-def resolve_named_path(name: str | Path, suffix: str = "") -> Path:
+def resolve_named_path(name: str | Path, suffix: str = "", *, pull: bool = True) -> Path:
+    """Resolve a bare name / partial path to an absolute Path.
+
+    If ``pull`` is True (default), fall back to :func:`get_data` (which may
+    download from LFS) when the file isn't found locally. Set ``pull=False``
+    to fail fast with ``FileNotFoundError`` instead.
+    """
     s = str(name)
     p = Path(s)
     if p.is_absolute() or p.exists():
@@ -120,6 +126,8 @@ def resolve_named_path(name: str | Path, suffix: str = "") -> Path:
         p = Path(s + suffix)
         if p.is_absolute() or p.exists():
             return p
+    if not pull:
+        raise FileNotFoundError(f"{name!r} not found locally (pull=False)")
     return get_data(p.name)
 
 

From b140cea1bb97806b3eb7160769d200911052ae12 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Thu, 28 May 2026 18:39:43 +0800
Subject: [PATCH 35/36] oops

---
 dimos/utils/cli/map.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py
index 8f0e65e9f0..af19300549 100644
--- a/dimos/utils/cli/map.py
+++ b/dimos/utils/cli/map.py
@@ -508,6 +508,3 @@ def main(
 
 if __name__ == "__main__":
     typer.run(main)
-    typer.run(main)
-    typer.run(main)
-    typer.run(main)

From 6afa4bc01afa1430962f34c80e7015063641e536 Mon Sep 17 00:00:00 2001
From: Ivan Nikolic <lesh@sysphere.org>
Date: Mon, 1 Jun 2026 20:24:22 +0800
Subject: [PATCH 36/36] map_rrd: rename pose loop var to fix mypy Observation
 type clash

---
 dimos/mapping/loop_closure/utils/map_rrd.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dimos/mapping/loop_closure/utils/map_rrd.py b/dimos/mapping/loop_closure/utils/map_rrd.py
index 6856dadef7..13c2081d5a 100644
--- a/dimos/mapping/loop_closure/utils/map_rrd.py
+++ b/dimos/mapping/loop_closure/utils/map_rrd.py
@@ -287,12 +287,12 @@ def main(
         if "odom" in store.streams:
             odom = store.stream("odom", PoseStamped)
             cb = _progress(odom.count(), "        odom")
-            for obs in odom:
-                cb(obs)
-                p = obs.pose_tuple
+            for pose_obs in odom:
+                cb(pose_obs)
+                p = pose_obs.pose_tuple
                 if p is None:
                     continue
-                rr.set_time(TIMELINE, timestamp=obs.ts)
+                rr.set_time(TIMELINE, timestamp=pose_obs.ts)
                 x, y, z, qx, qy, qz, qw = p
                 rr.log(
                     "world/odom",