From ceb08bacf278753497f7726b0e1957c3635dea78 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 17:26:36 +0800 Subject: [PATCH 01/64] map tool first pass --- dimos/mapping/relocalization/pgo.py | 32 ++++++- dimos/msgs/sensor_msgs/PointCloud2.py | 5 +- dimos/robot/cli/dimos.py | 20 ++++ dimos/utils/cli/map.py | 132 ++++++++++++++++++++++++++ 4 files changed, 183 insertions(+), 6 deletions(-) create mode 100644 dimos/utils/cli/map.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 1c2ee8aa6c..b04cb61ebc 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -18,6 +18,7 @@ from __future__ import annotations +from collections.abc import Callable from dataclasses import dataclass from typing import Any @@ -375,6 +376,9 @@ def pgo_then_voxels( voxel_size: float = 0.05, block_count: int = 2_000_000, device: str = "CUDA:0", + corrected_path_out: list[tuple[float, float, float]] | None = None, + pass1_on_frame: Callable[[Any], None] | None = None, + pass2_on_frame: Callable[[Any], None] | None = None, **pgo_cfg: Any, ) -> PointCloud2: """Two-pass PGO mapping (eliminates duplicate-wall artifacts). @@ -401,9 +405,16 @@ def pgo_then_voxels( n_frames = 0 for obs in stream: + if pass1_on_frame is not None: + pass1_on_frame(obs) if obs.pose is None: continue x, y, z, qx, qy, qz, qw = obs.pose + # Skip placeholder poses: zero translation, or invalid (zero-norm) quaternion. + if x == 0 and y == 0 and z == 0: + continue + if qx == 0 and qy == 0 and qz == 0 and qw == 0: + continue r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() t = np.array([x, y, z]) points, _ = obs.data.as_numpy() @@ -420,24 +431,35 @@ def pgo_then_voxels( n_kf = pgo.num_key_poses print(f" Pass 1: {n_frames} frames, {n_kf} keyframes") + if corrected_path_out is not None: + corrected_path_out.extend( + (float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2])) + for kp in pgo._key_poses + ) + grid = VoxelGrid(voxel_size=voxel_size, block_count=block_count, device=device) try: if n_kf < 2: for obs in stream: + if pass2_on_frame is not None: + pass2_on_frame(obs) grid.add_frame(obs.data) return grid.get_global_pointcloud2() - kf_ts = np.array([kp.timestamp for kp in pgo._key_poses]) + # Sort + dedupe by timestamp: Slerp requires strictly increasing ts. + kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) + kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + kf_ts = np.array([kp.timestamp for kp in kps]) # Per-keyframe rigid drift correction: T_corr = T_global @ T_local.inv() - R_corr_list = [kp.r_global @ kp.r_local.T for kp in pgo._key_poses] - t_corr_list = [ - kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in pgo._key_poses - ] + R_corr_list = [kp.r_global @ kp.r_local.T for kp in kps] + t_corr_list = [kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in kps] t_corrs = np.stack(t_corr_list) rot_slerp = Slerp(kf_ts, Rotation.from_matrix(np.stack(R_corr_list))) n_inserted = 0 for obs in stream: + if pass2_on_frame is not None: + pass2_on_frame(obs) if obs.pose is None: continue ts = float(np.clip(obs.ts, kf_ts[0], kf_ts[-1])) diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 20d6b09354..598cb0b094 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -729,7 +729,10 @@ def to_rerun( point_colors = colors else: z = points[:, 2] - class_ids = ((z - z.min()) / (z.max() - z.min() + 1e-8) * 255).astype(np.uint8) + + # Clip to robust percentiles so a few outliers don't squash the colormap range. + z_lo, z_hi = np.percentile(z, [1, 99]) + class_ids = (np.clip((z - z_lo) / (z_hi - z_lo + 1e-8), 0, 1) * 255).astype(np.uint8) if mode == "points": return rr.Points3D( diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 34b74b7441..b344d9be10 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -685,6 +685,26 @@ def send( topic_send(topic, message_expr) +@main.command(name="map") +def map_cmd( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"), + device: str = typer.Option( + "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" + ), + pgo: bool = typer.Option( + False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + ), + block_count: int = typer.Option( + 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" + ), +) -> None: + """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" + from dimos.utils.cli.map import main as map_main + + map_main(dataset=dataset, voxel=voxel, device=device, pgo=pgo, block_count=block_count) + + @main.command(name="export-premap") def export_premap_cmd( dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py new file mode 100644 index 0000000000..1f82617871 --- /dev/null +++ b/dimos/utils/cli/map.py @@ -0,0 +1,132 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +import rerun as rr +import rerun.blueprint as rrb +import typer + +from dimos.mapping.voxels import VoxelMapTransformer +from dimos.memory2.store.sqlite import SqliteStore +from dimos.utils.data import resolve_named_path +from dimos.visualization.rerun.init import rerun_init + + +def progress(total: int, label: str = ""): + seen = 0 + wall_start: float | None = None + last_wall: float | None = None + first_ts: float | None = None + + def _progress(obs): + nonlocal seen, wall_start, last_wall, first_ts + now = time.monotonic() + if wall_start is None: + wall_start = now + first_ts = obs.ts + frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 + last_wall = now + seen += 1 + pct = 100 * seen // total if total else 100 + wall = now - wall_start + data = obs.ts - first_ts + speed = data / wall if wall > 0 else 0.0 + end = "\n" if seen >= total else "" + prefix = f"{label} " if label else "" + print( + f"\r{prefix}{pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", + end=end, + flush=True, + ) + + return _progress + + +def main( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"), + device: str = typer.Option( + "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" + ), + pgo: bool = typer.Option( + False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + ), + block_count: int = typer.Option( + 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" + ), +) -> None: + db_path = resolve_named_path(dataset, ".db") + + store = SqliteStore(path=db_path) + lidar = store.streams.lidar + + print(lidar.summary()) + + path: list[tuple[float, float, float]] = [] + + def collect_path(obs): + if obs.pose is None: + return + # Reject placeholder poses at the world origin (translation = 0,0,0). + if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + return + path.append((obs.pose[0], obs.pose[1], obs.pose[2])) + + pgo_map = None + pgo_path: list[tuple[float, float, float]] = [] + if pgo: + from dimos.mapping.relocalization.pgo import pgo_then_voxels + + total = lidar.count() + print("running PGO twopass map...") + pgo_map = pgo_then_voxels( + lidar, + voxel_size=voxel, + block_count=block_count, + device=device, + corrected_path_out=pgo_path, + pass1_on_frame=progress(total, "pgo pass 1 (optimizing)"), + pass2_on_frame=progress(total, "pgo pass 2 (rebuilding)"), + ) + + global_map = ( + lidar.tap(collect_path) + .transform(VoxelMapTransformer(voxel_size=voxel, device=device)) + .tap(progress(lidar.count(), "reconstructing global map")) + .last() + .data + ) + + rerun_init("coloring", spawn=True) + rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) + rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) + if path: + rr.log( + "world/raw_map/path", + rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.08]), + static=True, + ) + if pgo_map is not None: + rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + if pgo_path: + rr.log( + "world/pgo_map/path", + rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.08]), + static=True, + ) + + +if __name__ == "__main__": + typer.run(main) From 4e0cec34c3e31989c1fb6e3e2995130bc01564e0 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 17:45:21 +0800 Subject: [PATCH 02/64] map tool wrap --- dimos/mapping/relocalization/pgo.py | 2 +- dimos/robot/cli/dimos.py | 57 +++++---------------- dimos/utils/cli/map.py | 55 ++++++++++++++------ docs/capabilities/mapping/relocalization.md | 30 +++++------ 4 files changed, 66 insertions(+), 78 deletions(-) diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index b04cb61ebc..3b95ba426f 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -13,7 +13,7 @@ # limitations under the License. # NOTE: This lives under mapping/relocalization/ for now because the only -# consumer is the premap export pipeline (`dimos export-premap`). It is +# consumer is the premap export pipeline (`dimos map --export`). It is # temporary and can be moved/split out later when PGO grows other consumers. from __future__ import annotations diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index b344d9be10..5607c8b1ca 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -698,55 +698,26 @@ def map_cmd( block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" ), + export: bool = typer.Option( + False, + "--export", + help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + ), + no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main - map_main(dataset=dataset, voxel=voxel, device=device, pgo=pgo, block_count=block_count) - - -@main.command(name="export-premap") -def export_premap_cmd( - dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), - output: Path | None = typer.Option(None, "-o", "--output", help="Output .pc2.lcm path"), - voxel_size: float = typer.Option(0.05, "--voxel-size", help="Voxel size for the rebuild"), - duration: float | None = typer.Option( - None, "--duration", help="Limit to first N seconds (default: full log)" - ), - device: str = typer.Option( - "CUDA:0", - "--device", - help="Open3D compute device (e.g. CUDA:0, CPU:0); fallback to CPU if unavailable", - ), - block_count: int = typer.Option( - 2_000_000, - "--block-count", - help="VoxelBlockGrid capacity", - ), -) -> None: - """Export a twopass relocalization premap (.pc2.lcm) from a recorded SQLite dataset.""" - from dimos.mapping.relocalization.pgo import pgo_then_voxels - from dimos.memory2.store.sqlite import SqliteStore - from dimos.utils.data import get_data_dir, resolve_named_path - - db_path = resolve_named_path(dataset, ".db") - - store = SqliteStore(path=db_path) - lidar = store.streams.lidar - if duration is not None: - lidar = lidar.before(lidar.first().ts + duration) - - typer.echo(f"computing twopass map from {db_path} (voxel_size={voxel_size})...") - twopass_map = pgo_then_voxels( - lidar, voxel_size=voxel_size, block_count=block_count, device=device + map_main( + dataset=dataset, + voxel=voxel, + device=device, + pgo=pgo, + block_count=block_count, + export=export, + no_gui=no_gui, ) - if output is None: - output = get_data_dir() / f"{db_path.stem}_twopass_map.pc2.lcm" - output.parent.mkdir(parents=True, exist_ok=True) - output.write_bytes(twopass_map.lcm_encode()) - typer.echo(f"wrote {output}") - @main.command() def cameracalibrate( diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 1f82617871..9c7ac61794 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -66,8 +66,16 @@ def main( block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" ), + export: bool = typer.Option( + False, + "--export", + help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + ), + no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: db_path = resolve_named_path(dataset, ".db") + if export: + pgo = True store = SqliteStore(path=db_path) lidar = store.streams.lidar @@ -109,23 +117,36 @@ def collect_path(obs): .data ) - rerun_init("coloring", spawn=True) - rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) - if path: - rr.log( - "world/raw_map/path", - rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.08]), - static=True, - ) - if pgo_map is not None: - rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) - if pgo_path: - rr.log( - "world/pgo_map/path", - rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.08]), - static=True, - ) + if not no_gui: + rerun_init("coloring", spawn=True) + rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) + rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) + if path: + rr.log( + "world/raw_map/path", + rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.05]), + static=True, + ) + if pgo_map is not None: + rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + if pgo_path: + rr.log( + "world/pgo_map/path", + rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), + static=True, + ) + + if export and pgo_map is not None: + from pathlib import Path + + out_path = Path.cwd() / f"{db_path.stem}.pc2.lcm" + print(f"exporting PGO twopass map to {out_path}...") + out_path.write_bytes(pgo_map.lcm_encode()) + print(f"wrote {out_path}") + print() + print("load back with:") + print(" from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2") + print(f' pcd = PointCloud2.lcm_decode(open("{out_path.name}", "rb").read())') if __name__ == "__main__": diff --git a/docs/capabilities/mapping/relocalization.md b/docs/capabilities/mapping/relocalization.md index 51a966a1c6..a794f27f04 100644 --- a/docs/capabilities/mapping/relocalization.md +++ b/docs/capabilities/mapping/relocalization.md @@ -33,12 +33,16 @@ Convert the recording to a relocalization premap (`.pc2.lcm`): ```bash # default name from step 1: -dimos export-premap recording_go2 +dimos map recording_go2 --export # renamed: -dimos export-premap {DB_NAME} +dimos map {DB_NAME} --export ``` +`--export` implies `--pgo` (runs pose graph optimization) and writes +`./{DB_NAME}.pc2.lcm` to the current working directory. Add `--no-gui` +to skip the rerun viewer for headless runs. + `{DB_NAME}` can be a file name (with or without extension), or a relative / absolute path. When a bare file name is given, the tool searches in: @@ -49,28 +53,20 @@ When a bare file name is given, the tool searches in: Examples: ```bash -dimos export-premap go2_hongkong_office -dimos export-premap ./go2_hongkong_office.db -dimos export-premap data/go2_hongkong_office.db -dimos export-premap /abs/path/to/scan.db -``` - -Output defaults to `data/{basename}_twopass_map.pc2.lcm`. Pass `-o -` to save elsewhere. Relative paths resolve against the current -working directory; parent directories are created on demand. - -```bash -dimos export-premap recording_go2 -o /tmp/scan.pc2.lcm -dimos export-premap recording_go2 -o ./recording_go2_twopass_map.pc2.lcm +dimos map go2_hongkong_office --export +dimos map ./go2_hongkong_office.db --export +dimos map data/go2_hongkong_office.db --export +dimos map /abs/path/to/scan.db --export ``` Sample log: ``` -computing twopass map from /Users/dimos/Desktop/dimos-reloc/recording_go2.db (voxel_size=0.05)... +running PGO twopass map... Pass 1: 908 frames, 1 keyframes 12:54:32.025[inf][dimos/mapping/voxels.py ] VoxelGrid using device: CPU:0 -wrote /Users/dimos/Desktop/dimos-reloc/data/recording_go2_twopass_map.pc2.lcm +exporting PGO twopass map to /Users/dimos/Desktop/dimos-reloc/recording_go2.pc2.lcm... +wrote /Users/dimos/Desktop/dimos-reloc/recording_go2.pc2.lcm ``` ## 3. Relocalize against replay From 2099e006bd8b6ab2c6dd0848b6b2bbf4bc581fb7 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:16:05 +0800 Subject: [PATCH 03/64] renamed rerun blueprint to dimos map tool --- dimos/utils/cli/map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 9c7ac61794..9ef30b24b2 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -118,7 +118,7 @@ def collect_path(obs): ) if not no_gui: - rerun_init("coloring", spawn=True) + rerun_init("dimos map tool", spawn=True) rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) if path: From 47848d44e881ce8779e9488743b343807275a8ba Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:49:39 +0800 Subject: [PATCH 04/64] readme cleanup --- docs/capabilities/mapping/relocalization.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/capabilities/mapping/relocalization.md b/docs/capabilities/mapping/relocalization.md index a794f27f04..a7fef58ccd 100644 --- a/docs/capabilities/mapping/relocalization.md +++ b/docs/capabilities/mapping/relocalization.md @@ -77,16 +77,16 @@ standard `unitree-go2` stack plus `RelocalizationModule`: ```bash dimos --replay --replay-db {DB_NAME} run unitree-go2-relocalization \ - -o relocalizationmodule.map_file={DB_NAME}_twopass_map + -o relocalizationmodule.map_file={DB_NAME} ``` -`{DB_NAME}_twopass_map` is resolved the same way as in section 2: cwd +`{DB_NAME}` is resolved the same way as in section 2: cwd first, then `data/`. Sample log: ``` -12:58:51.469[inf][imos/mapping/relocalization.py] Relocalization module started: map_file='recording_go2_twopass_map' loaded_map.frame_id='map' placeholder TF 'world' -> 'map' z_offset=20.0 +12:58:51.469[inf][imos/mapping/relocalization.py] Relocalization module started: map_file='recording_go2' loaded_map.frame_id='map' placeholder TF 'world' -> 'map' z_offset=20.0 12:58:56.528[war][imos/mapping/relocalization.py] relocalize skipped: n_pts=14198 < MIN_LOCAL_POINTS=20000 12:59:04.777[war][imos/mapping/relocalization.py] relocalize rejected: fitness=0.466 < threshold=0.6 time_cost=5.3s n_pts=20231 12:59:14.880[war][imos/mapping/relocalization.py] relocalize rejected: fitness=0.433 < threshold=0.6 time_cost=8.1s n_pts=37770 @@ -125,5 +125,5 @@ recorded `.db`: ```bash dimos --robot-ip {YOUR_ROBOT_IP} run unitree-go2-relocalization \ - -o relocalizationmodule.map_file={DB_NAME}_twopass_map + -o relocalizationmodule.map_file={DB_NAME} ``` From d829c0067de680ddf9a613f7bd4d84b7cde43486 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:35:40 +0800 Subject: [PATCH 05/64] mapping/relocalization: split PGO into composable Stream stages (pgo2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the monolithic pgo_then_voxels with four primitives over mem2 Streams and Transforms: pgo_keyframes(lidar) -> Stream[Keyframe] keyframes_to_corrections(kfs) -> Stream[Transform] (world_corrected <- world_raw) make_interpolator(corrections) -> (ts) -> Transform (SLERP + linear, endpoint-clipped) apply_corrections(stream, corr) -> Stream[T] (shuffles obs.pose) Drift correction is now a first-class, reusable Stream[Transform] that any pose-stamped consumer can apply — same math as before (rigid T_corr = T_global @ T_local^-1 per keyframe, SLERP + linear interpolation between bracketing keyframes), just composable. dimos map --pgo migrates to the new primitives; pgo_then_voxels is deleted. The internal _SimplePGO / PGOConfig / _KeyPose machinery stays in pgo.py and is imported by pgo2. --- dimos/mapping/relocalization/pgo.py | 126 +------------- dimos/mapping/relocalization/pgo2.py | 235 +++++++++++++++++++++++++++ dimos/utils/cli/map.py | 48 ++++-- 3 files changed, 277 insertions(+), 132 deletions(-) create mode 100644 dimos/mapping/relocalization/pgo2.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 3b95ba426f..74a48c7236 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -# NOTE: This lives under mapping/relocalization/ for now because the only -# consumer is the premap export pipeline (`dimos map --export`). It is -# temporary and can be moved/split out later when PGO grows other consumers. +# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use +# the Stream-shaped API in pgo2.py — pgo_keyframes, keyframes_to_corrections, +# make_interpolator, apply_corrections. The classes here (_SimplePGO, PGOConfig, +# _KeyPose) are imported from pgo2 to do the heavy lifting. from __future__ import annotations -from collections.abc import Callable from dataclasses import dataclass from typing import Any @@ -30,7 +30,6 @@ from scipy.spatial.transform import Rotation from dimos.core.module import ModuleConfig -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.utils.logging_config import setup_logger FRAME_MAP = "world" @@ -368,120 +367,3 @@ def build_global_map(self, voxel_size: float) -> np.ndarray: @property def num_key_poses(self) -> int: return len(self._key_poses) - - -def pgo_then_voxels( - stream: Any, - *, - voxel_size: float = 0.05, - block_count: int = 2_000_000, - device: str = "CUDA:0", - corrected_path_out: list[tuple[float, float, float]] | None = None, - pass1_on_frame: Callable[[Any], None] | None = None, - pass2_on_frame: Callable[[Any], None] | None = None, - **pgo_cfg: Any, -) -> PointCloud2: - """Two-pass PGO mapping (eliminates duplicate-wall artifacts). - - Pass 1 runs PGO over the lidar stream to build its corrected - keyframe trajectory. - - Pass 2 re-streams every lidar frame through ``VoxelGrid``, but each - frame's world-frame cloud is first transformed by the rigid drift - correction interpolated (SLERP for rotation, linear for translation) - from the keyframe corrections at the frame's timestamp. - - Each frame is therefore inserted exactly once at its converged - corrected pose, so walls collapse to a single layer instead of the - "smear of slightly-offset re-projections" that the single-pass - ``_SimplePGO.build_global_map`` produces. - """ - from scipy.spatial.transform import Slerp - - from dimos.mapping.voxels import VoxelGrid - - cfg = PGOConfig(**pgo_cfg) - pgo = _SimplePGO(cfg) - - n_frames = 0 - for obs in stream: - if pass1_on_frame is not None: - pass1_on_frame(obs) - if obs.pose is None: - continue - x, y, z, qx, qy, qz, qw = obs.pose - # Skip placeholder poses: zero translation, or invalid (zero-norm) quaternion. - if x == 0 and y == 0 and z == 0: - continue - if qx == 0 and qy == 0 and qz == 0 and qw == 0: - continue - r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t = np.array([x, y, z]) - points, _ = obs.data.as_numpy() - if len(points) == 0: - continue - body_pts = ( - (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] - ) - if pgo.add_key_pose(r, t, obs.ts, body_pts): - pgo.search_for_loops() - pgo.smooth_and_update() - n_frames += 1 - - n_kf = pgo.num_key_poses - print(f" Pass 1: {n_frames} frames, {n_kf} keyframes") - - if corrected_path_out is not None: - corrected_path_out.extend( - (float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2])) - for kp in pgo._key_poses - ) - - grid = VoxelGrid(voxel_size=voxel_size, block_count=block_count, device=device) - try: - if n_kf < 2: - for obs in stream: - if pass2_on_frame is not None: - pass2_on_frame(obs) - grid.add_frame(obs.data) - return grid.get_global_pointcloud2() - - # Sort + dedupe by timestamp: Slerp requires strictly increasing ts. - kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) - kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] - kf_ts = np.array([kp.timestamp for kp in kps]) - # Per-keyframe rigid drift correction: T_corr = T_global @ T_local.inv() - R_corr_list = [kp.r_global @ kp.r_local.T for kp in kps] - t_corr_list = [kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in kps] - t_corrs = np.stack(t_corr_list) - rot_slerp = Slerp(kf_ts, Rotation.from_matrix(np.stack(R_corr_list))) - - n_inserted = 0 - for obs in stream: - if pass2_on_frame is not None: - pass2_on_frame(obs) - if obs.pose is None: - continue - ts = float(np.clip(obs.ts, kf_ts[0], kf_ts[-1])) - r_correction = rot_slerp([ts])[0].as_matrix() - idx = int(np.searchsorted(kf_ts, ts)) - if idx == 0: - t_correction = t_corrs[0] - elif idx >= len(kf_ts): - t_correction = t_corrs[-1] - else: - t_lo, t_hi = kf_ts[idx - 1], kf_ts[idx] - alpha = (ts - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 - t_correction = (1 - alpha) * t_corrs[idx - 1] + alpha * t_corrs[idx] - - points, _ = obs.data.as_numpy() - if len(points) == 0: - continue - corrected_pts = (r_correction @ points[:, :3].T).T + t_correction - grid.add_frame(PointCloud2.from_numpy(corrected_pts.astype(np.float32))) - n_inserted += 1 - - print(f" Pass 2: {n_inserted} frames inserted with PGO-corrected poses") - return grid.get_global_pointcloud2() - finally: - grid.dispose() diff --git a/dimos/mapping/relocalization/pgo2.py b/dimos/mapping/relocalization/pgo2.py new file mode 100644 index 0000000000..3aec9cb0eb --- /dev/null +++ b/dimos/mapping/relocalization/pgo2.py @@ -0,0 +1,235 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""PGO drift corrections as composable Stream stages. + +Pipeline: + + lidar: Stream[PointCloud2] + -> pgo_keyframes(...) -> Stream[Keyframe] + -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) + -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) + +The math: per keyframe, the drift correction is + R_corr = R_global @ R_local.T + t_corr = t_global - R_corr @ t_local +and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, +clipping out-of-range to endpoints. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterator +from dataclasses import dataclass +from typing import Any, TypeVar + +import numpy as np +from scipy.spatial.transform import Rotation, Slerp + +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + +T = TypeVar("T") + + +@dataclass(frozen=True) +class Keyframe: + ts: float + r_local: np.ndarray # 3x3 + t_local: np.ndarray # (3,) + r_global: np.ndarray # 3x3 + t_global: np.ndarray # (3,) + + +def pgo_keyframes( + stream: Stream[PointCloud2], + *, + on_frame: Callable[[Any], None] | None = None, + **pgo_cfg: Any, +) -> Stream[Keyframe]: + """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" + # Imported here to keep pgo.py the only place that imports gtsam at module scope. + from dimos.mapping.relocalization.pgo import PGOConfig, _SimplePGO + + cfg = PGOConfig(**pgo_cfg) + pgo = _SimplePGO(cfg) + + for obs in stream: + if on_frame is not None: + on_frame(obs) + if obs.pose is None: + continue + x, y, z, qx, qy, qz, qw = obs.pose + if x == 0 and y == 0 and z == 0: + continue + if qx == 0 and qy == 0 and qz == 0 and qw == 0: + continue + r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t = np.array([x, y, z]) + points, _ = obs.data.as_numpy() + if len(points) == 0: + continue + body_pts = ( + (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] + ) + if pgo.add_key_pose(r, t, obs.ts, body_pts): + pgo.search_for_loops() + pgo.smooth_and_update() + + kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) + kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + + mem = MemoryStore() + out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) + for kp in kps: + out.append( + Keyframe( + ts=kp.timestamp, + r_local=np.ascontiguousarray(kp.r_local), + t_local=np.ascontiguousarray(kp.t_local), + r_global=np.ascontiguousarray(kp.r_global), + t_global=np.ascontiguousarray(kp.t_global), + ), + ts=kp.timestamp, + ) + return out + + +def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: + """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" + mem = MemoryStore() + out: Stream[Transform] = mem.stream("corrections", Transform) + for obs in keyframes: + kp = obs.data + R_corr = kp.r_global @ kp.r_local.T + t_corr = kp.t_global - R_corr @ kp.t_local + tf = Transform( + translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), + rotation=Quaternion.from_rotation_matrix(R_corr), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=kp.ts, + ) + out.append(tf, ts=kp.ts) + return out + + +def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: + """Materialize corrections once; return a fast ts -> Transform lookup.""" + ts_list: list[float] = [] + R_list: list[np.ndarray] = [] + t_list: list[np.ndarray] = [] + for obs in corrections: + tf = obs.data + ts_list.append(tf.ts) + q = tf.rotation + R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) + t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) + + if not ts_list: + raise ValueError("empty corrections stream") + + if len(ts_list) == 1: + only_ts = ts_list[0] + only_R = R_list[0] + only_t = t_list[0] + + def _const(ts: float) -> Transform: + return Transform( + translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), + rotation=Quaternion.from_rotation_matrix(only_R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=ts if ts is not None else only_ts, + ) + + return _const + + ts_arr = np.array(ts_list) + R_stack = np.stack(R_list) + t_stack = np.stack(t_list) + slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) + + def interp(ts: float) -> Transform: + ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) + R = slerp([ts_clip])[0].as_matrix() + idx = int(np.searchsorted(ts_arr, ts_clip)) + if idx == 0: + t = t_stack[0] + elif idx >= len(ts_arr): + t = t_stack[-1] + else: + t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] + alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 + t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=float(ts), + ) + + return interp + + +def correction_at(corrections: Stream[Transform], ts: float) -> Transform: + """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" + return make_interpolator(corrections)(ts) + + +def apply_corrections( + stream: Stream[T], + corrections: Stream[Transform], +) -> Stream[T]: + """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. + + `obs.data` is untouched. Frames with `obs.pose is None` pass through + unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). + """ + interp = make_interpolator(corrections) + + def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + for obs in upstream: + if obs.pose is None: + yield obs + continue + x, y, z, qx, qy, qz, qw = obs.pose + tf = interp(obs.ts) + R_corr = Rotation.from_quat( + [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] + ).as_matrix() + t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t_raw = np.array([x, y, z]) + R_new = R_corr @ R_raw + t_new = R_corr @ t_raw + t_corr + q_new = Rotation.from_matrix(R_new).as_quat() # xyzw + new_pose = ( + float(t_new[0]), + float(t_new[1]), + float(t_new[2]), + float(q_new[0]), + float(q_new[1]), + float(q_new[2]), + float(q_new[3]), + ) + yield obs.derive(data=obs.data, pose=new_pose) + + return stream.transform(xf) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 9ef30b24b2..dc8f934347 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -95,19 +95,47 @@ def collect_path(obs): pgo_map = None pgo_path: list[tuple[float, float, float]] = [] if pgo: - from dimos.mapping.relocalization.pgo import pgo_then_voxels + import numpy as np + from scipy.spatial.transform import Rotation + + from dimos.mapping.relocalization.pgo2 import ( + keyframes_to_corrections, + make_interpolator, + pgo_keyframes, + ) + from dimos.mapping.voxels import VoxelGrid + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 total = lidar.count() print("running PGO twopass map...") - pgo_map = pgo_then_voxels( - lidar, - voxel_size=voxel, - block_count=block_count, - device=device, - corrected_path_out=pgo_path, - pass1_on_frame=progress(total, "pgo pass 1 (optimizing)"), - pass2_on_frame=progress(total, "pgo pass 2 (rebuilding)"), - ) + keyframes = pgo_keyframes(lidar, on_frame=progress(total, "pgo pass 1 (optimizing)")) + corrections = keyframes_to_corrections(keyframes) + interp = make_interpolator(corrections) + + for obs in keyframes: + kp = obs.data + pgo_path.append((float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2]))) + + pass2_pb = progress(total, "pgo pass 2 (rebuilding)") + grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) + try: + for obs in lidar: + pass2_pb(obs) + if obs.pose is None: + continue + pts, _ = obs.data.as_numpy() + if len(pts) == 0: + continue + tf = interp(obs.ts) + R = Rotation.from_quat( + [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] + ).as_matrix() + t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + corrected = (R @ pts[:, :3].T).T + t + grid.add_frame(PointCloud2.from_numpy(corrected.astype(np.float32))) + pgo_map = grid.get_global_pointcloud2() + finally: + grid.dispose() global_map = ( lidar.tap(collect_path) From ebef675f98249bc429c8c636e855042520f063aa Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:39:44 +0800 Subject: [PATCH 06/64] mapping/relocalization: reclaim pgo.py for the public API Renames: pgo.py -> pgo_internals.py (gtsam/ICP machinery) pgo2.py -> pgo.py (public Stream-shaped API) `dimos.mapping.relocalization.pgo` is now the canonical import path (`pgo_keyframes`, `keyframes_to_corrections`, `make_interpolator`, `apply_corrections`, `correction_at`, `Keyframe`). The internal _SimplePGO / PGOConfig / _KeyPose / _icp / _voxel_downsample helpers live in pgo_internals.py and are imported lazily inside pgo.py to keep gtsam off the public-API import path. --- dimos/mapping/relocalization/pgo.py | 540 +++++++----------- dimos/mapping/relocalization/pgo2.py | 235 -------- dimos/mapping/relocalization/pgo_internals.py | 369 ++++++++++++ dimos/utils/cli/map.py | 2 +- 4 files changed, 573 insertions(+), 573 deletions(-) delete mode 100644 dimos/mapping/relocalization/pgo2.py create mode 100644 dimos/mapping/relocalization/pgo_internals.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 74a48c7236..653c7158d2 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -12,358 +12,224 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use -# the Stream-shaped API in pgo2.py — pgo_keyframes, keyframes_to_corrections, -# make_interpolator, apply_corrections. The classes here (_SimplePGO, PGOConfig, -# _KeyPose) are imported from pgo2 to do the heavy lifting. +"""PGO drift corrections as composable Stream stages. + +Pipeline: + + lidar: Stream[PointCloud2] + -> pgo_keyframes(...) -> Stream[Keyframe] + -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) + -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) + +The math: per keyframe, the drift correction is + R_corr = R_global @ R_local.T + t_corr = t_global - R_corr @ t_local +and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, +clipping out-of-range to endpoints. +""" from __future__ import annotations +from collections.abc import Callable, Iterator from dataclasses import dataclass -from typing import Any +from typing import Any, TypeVar -import gtsam # type: ignore[import-not-found,import-untyped] import numpy as np -import open3d as o3d # type: ignore[import-untyped] -import open3d.core as o3c # type: ignore[import-untyped] -from scipy.spatial import KDTree -from scipy.spatial.transform import Rotation - -from dimos.core.module import ModuleConfig -from dimos.utils.logging_config import setup_logger - -FRAME_MAP = "world" - -logger = setup_logger() - - -class PGOConfig(ModuleConfig): - world_frame: str = FRAME_MAP - - # Keyframe detection - key_pose_delta_trans: float = 0.5 - key_pose_delta_deg: float = 10.0 - - # Loop closure - loop_search_radius: float = 2.0 - loop_time_thresh: float = 20.0 - loop_score_thresh: float = 0.3 - loop_submap_half_range: int = 10 - min_icp_inliers: int = 10 - min_keyframes_for_loop_search: int = 10 - loop_closure_extra_iterations: int = 4 - submap_resolution: float = 0.2 - min_loop_detect_duration: float = 5.0 - - # Input mode - unregister_input: bool = True # Transform world-frame scans to body-frame using odom - - # Global map - publish_global_map: bool = True - global_map_publish_rate: float = 0.5 - global_map_voxel_size: float = 0.15 - - # ICP - max_icp_iterations: int = 50 - max_icp_correspondence_dist: float = 1.0 - - -@dataclass -class _KeyPose: - r_local: np.ndarray # 3x3 rotation in local/odom frame - t_local: np.ndarray # 3-vec translation in local/odom frame - r_global: np.ndarray # 3x3 corrected rotation - t_global: np.ndarray # 3-vec corrected translation - timestamp: float - body_cloud: np.ndarray # Nx3 points in body frame - - -def _icp( - source: np.ndarray, - target: np.ndarray, - max_iter: int = 50, - max_dist: float = 1.0, - tol: float = 1e-6, - min_inliers: int = 10, - init: np.ndarray | None = None, -) -> tuple[np.ndarray, float]: - """Point-to-point ICP using Open3D's tensor pipeline. - - Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier - distance (m²) — same semantic as the previous SVD implementation, so - the GTSAM noise model in ``smooth_and_update`` keeps working. - """ - if len(source) < min_inliers or len(target) < min_inliers: - return np.eye(4), float("inf") - - cpu = o3c.Device("CPU:0") - src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) - tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) - - # Normals on the target enable point-to-plane ICP, which converges - # tighter than point-to-point on indoor scenes (walls give unambiguous - # normals that resolve the slide-along-wall ambiguity). - tgt_pcd.estimate_normals(max_nn=30, radius=0.3) - - init_T = ( - o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) - if init is not None - else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) - ) - - # Silence Open3D's "0 correspondence" warning — we deliberately use a - # tight max_correspondence_distance and reject loops with poor fitness; - # the warning is informational, not an error. - with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): - result = o3d.t.pipelines.registration.icp( - source=src_pcd, - target=tgt_pcd, - max_correspondence_distance=max_dist, - init_source_to_target=init_T, - estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), - criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( - relative_fitness=tol, - relative_rmse=tol, - max_iteration=max_iter, +from scipy.spatial.transform import Rotation, Slerp + +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + +T = TypeVar("T") + + +@dataclass(frozen=True) +class Keyframe: + ts: float + r_local: np.ndarray # 3x3 + t_local: np.ndarray # (3,) + r_global: np.ndarray # 3x3 + t_global: np.ndarray # (3,) + + +def pgo_keyframes( + stream: Stream[PointCloud2], + *, + on_frame: Callable[[Any], None] | None = None, + **pgo_cfg: Any, +) -> Stream[Keyframe]: + """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" + # Imported here to keep pgo_internals.py the only place that imports gtsam at module scope. + from dimos.mapping.relocalization.pgo_internals import PGOConfig, _SimplePGO + + cfg = PGOConfig(**pgo_cfg) + pgo = _SimplePGO(cfg) + + for obs in stream: + if on_frame is not None: + on_frame(obs) + if obs.pose is None: + continue + x, y, z, qx, qy, qz, qw = obs.pose + if x == 0 and y == 0 and z == 0: + continue + if qx == 0 and qy == 0 and qz == 0 and qw == 0: + continue + r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t = np.array([x, y, z]) + points, _ = obs.data.as_numpy() + if len(points) == 0: + continue + body_pts = ( + (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] + ) + if pgo.add_key_pose(r, t, obs.ts, body_pts): + pgo.search_for_loops() + pgo.smooth_and_update() + + kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) + kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + + mem = MemoryStore() + out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) + for kp in kps: + out.append( + Keyframe( + ts=kp.timestamp, + r_local=np.ascontiguousarray(kp.r_local), + t_local=np.ascontiguousarray(kp.t_local), + r_global=np.ascontiguousarray(kp.r_global), + t_global=np.ascontiguousarray(kp.t_global), ), + ts=kp.timestamp, ) - - fitness_inlier_frac = float(result.fitness) - if fitness_inlier_frac == 0.0: - return np.eye(4), float("inf") - - rmse = float(result.inlier_rmse) - T = result.transformation.numpy() - # Return mean squared inlier distance (m²) to match prior _icp contract. - return T, rmse * rmse - - -def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: - if len(pts) == 0 or voxel_size <= 0: - return pts - keys = np.floor(pts / voxel_size).astype(np.int32) - _, idx = np.unique(keys, axis=0, return_index=True) - return pts[idx] - - -class _SimplePGO: - def __init__(self, config: PGOConfig) -> None: - self._cfg = config - self._key_poses: list[_KeyPose] = [] - self._history_pairs: list[tuple[int, int]] = [] - self._cache_pairs: list[dict[str, Any]] = [] - self._r_offset = np.eye(3) - self._t_offset = np.zeros(3) - - params = gtsam.ISAM2Params() - params.setRelinearizeThreshold(0.01) - params.relinearizeSkip = 1 - self._isam2 = gtsam.ISAM2(params) - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: - if not self._key_poses: - return True - last = self._key_poses[-1] - delta_trans = np.linalg.norm(t - last.t_local) - # Angular distance via quaternion dot product - q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] - q_last = Rotation.from_matrix(last.r_local).as_quat() - dot = abs(np.dot(q_cur, q_last)) - delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) - return bool( - delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + return out + + +def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: + """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" + mem = MemoryStore() + out: Stream[Transform] = mem.stream("corrections", Transform) + for obs in keyframes: + kp = obs.data + R_corr = kp.r_global @ kp.r_local.T + t_corr = kp.t_global - R_corr @ kp.t_local + tf = Transform( + translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), + rotation=Quaternion.from_rotation_matrix(R_corr), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=kp.ts, ) + out.append(tf, ts=kp.ts) + return out + + +def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: + """Materialize corrections once; return a fast ts -> Transform lookup.""" + ts_list: list[float] = [] + R_list: list[np.ndarray] = [] + t_list: list[np.ndarray] = [] + for obs in corrections: + tf = obs.data + ts_list.append(tf.ts) + q = tf.rotation + R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) + t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) + + if not ts_list: + raise ValueError("empty corrections stream") + + if len(ts_list) == 1: + only_ts = ts_list[0] + only_R = R_list[0] + only_t = t_list[0] + + def _const(ts: float) -> Transform: + return Transform( + translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), + rotation=Quaternion.from_rotation_matrix(only_R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=ts if ts is not None else only_ts, + ) - def add_key_pose( - self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray - ) -> bool: - if not self.is_key_pose(r_local, t_local): - return False - - idx = len(self._key_poses) - init_r = self._r_offset @ r_local - init_t = self._r_offset @ t_local + self._t_offset + return _const - pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) - self._values.insert(idx, pose) + ts_arr = np.array(ts_list) + R_stack = np.stack(R_list) + t_stack = np.stack(t_list) + slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) + def interp(ts: float) -> Transform: + ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) + R = slerp([ts_clip])[0].as_matrix() + idx = int(np.searchsorted(ts_arr, ts_clip)) if idx == 0: - noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) - self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) + t = t_stack[0] + elif idx >= len(ts_arr): + t = t_stack[-1] else: - last = self._key_poses[-1] - r_between = last.r_local.T @ r_local - t_between = last.r_local.T @ (t_local - last.t_local) - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise - ) - ) - - kp = _KeyPose( - r_local=r_local.copy(), - t_local=t_local.copy(), - r_global=init_r.copy(), - t_global=init_t.copy(), - timestamp=timestamp, - body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), - ) - self._key_poses.append(kp) - return True - - def _get_submap(self, idx: int, half_range: int) -> np.ndarray: - lo = max(0, idx - half_range) - hi = min(len(self._key_poses) - 1, idx + half_range) - parts = [] - for i in range(lo, hi + 1): - kp = self._key_poses[i] - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - if not parts: - return np.empty((0, 3)) - cloud = np.vstack(parts) - return _voxel_downsample(cloud, self._cfg.submap_resolution) - - def search_for_loops(self) -> None: - if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: - return - - # Rate limit - if self._history_pairs: - cur_time = self._key_poses[-1].timestamp - last_time = self._key_poses[self._history_pairs[-1][1]].timestamp - if cur_time - last_time < self._cfg.min_loop_detect_duration: - return - - cur_idx = len(self._key_poses) - 1 - cur_kp = self._key_poses[-1] - - # Build KD-tree of previous keyframe positions - positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) - tree = KDTree(positions) - - idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) - if not idxs: - return - - # Pick the spatially closest keyframe that's also old enough in time. - # query_ball_point doesn't sort, so we sort by distance ourselves. - candidates = [ - (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) - for i in idxs - if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh - ] - if not candidates: - return - candidates.sort() - loop_idx = candidates[0][1] - - # ICP verification - target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) - source = self._get_submap(cur_idx, 0) - - transform, fitness = _icp( - source, - target, - max_iter=self._cfg.max_icp_iterations, - max_dist=self._cfg.max_icp_correspondence_dist, - min_inliers=self._cfg.min_icp_inliers, - ) - if fitness > self._cfg.loop_score_thresh: - return - - # Compute relative pose - R_icp = transform[:3, :3] - t_icp = transform[:3, 3] - r_refined = R_icp @ cur_kp.r_global - t_refined = R_icp @ cur_kp.t_global + t_icp - r_offset = self._key_poses[loop_idx].r_global.T @ r_refined - t_offset = self._key_poses[loop_idx].r_global.T @ ( - t_refined - self._key_poses[loop_idx].t_global + t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] + alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 + t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=float(ts), ) - self._cache_pairs.append( - { - "source": cur_idx, - "target": loop_idx, - "r_offset": r_offset, - "t_offset": t_offset, - "score": fitness, - } - ) - self._history_pairs.append((loop_idx, cur_idx)) - logger.info( - "Loop closure detected", - source=cur_idx, - target=loop_idx, - score=round(fitness, 4), - ) + return interp - def smooth_and_update(self) -> None: - has_loop = bool(self._cache_pairs) - - for pair in self._cache_pairs: - # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves - # have different units (rad² vs m²), so a uniform variance — - # the original behaviour — silently makes one half pathological - # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of - # those is too tight, one is too loose, depending on the loop). - # Use ICP fitness as the *translation* variance and a - # generous fixed rotation variance — loops shouldn't be - # trusted to fix rotation tightly without normals + p2plane. - trans_var = max(0.01, float(pair["score"])) # ≥ σ_trans = 10 cm - rot_var = 0.05 # σ_rot ≈ 13° - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - pair["target"], - pair["source"], - gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), - noise, - ) + +def correction_at(corrections: Stream[Transform], ts: float) -> Transform: + """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" + return make_interpolator(corrections)(ts) + + +def apply_corrections( + stream: Stream[T], + corrections: Stream[Transform], +) -> Stream[T]: + """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. + + `obs.data` is untouched. Frames with `obs.pose is None` pass through + unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). + """ + interp = make_interpolator(corrections) + + def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + for obs in upstream: + if obs.pose is None: + yield obs + continue + x, y, z, qx, qy, qz, qw = obs.pose + tf = interp(obs.ts) + R_corr = Rotation.from_quat( + [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] + ).as_matrix() + t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t_raw = np.array([x, y, z]) + R_new = R_corr @ R_raw + t_new = R_corr @ t_raw + t_corr + q_new = Rotation.from_matrix(R_new).as_quat() # xyzw + new_pose = ( + float(t_new[0]), + float(t_new[1]), + float(t_new[2]), + float(q_new[0]), + float(q_new[1]), + float(q_new[2]), + float(q_new[3]), ) - self._cache_pairs.clear() - - self._isam2.update(self._graph, self._values) - self._isam2.update() - if has_loop: - for _ in range(self._cfg.loop_closure_extra_iterations): - self._isam2.update() - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - estimates = self._isam2.calculateBestEstimate() - for i in range(len(self._key_poses)): - pose = estimates.atPose3(i) - self._key_poses[i].r_global = pose.rotation().matrix() - self._key_poses[i].t_global = pose.translation() - - last = self._key_poses[-1] - self._r_offset = last.r_global @ last.r_local.T - self._t_offset = last.t_global - self._r_offset @ last.t_local - - def get_corrected_pose( - self, r_local: np.ndarray, t_local: np.ndarray - ) -> tuple[np.ndarray, np.ndarray]: - return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset - - def build_global_map(self, voxel_size: float) -> np.ndarray: - if not self._key_poses: - return np.empty((0, 3), dtype=np.float32) - parts = [] - for kp in self._key_poses: - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - cloud = np.vstack(parts).astype(np.float32) - return _voxel_downsample(cloud, voxel_size) - - @property - def num_key_poses(self) -> int: - return len(self._key_poses) + yield obs.derive(data=obs.data, pose=new_pose) + + return stream.transform(xf) diff --git a/dimos/mapping/relocalization/pgo2.py b/dimos/mapping/relocalization/pgo2.py deleted file mode 100644 index 3aec9cb0eb..0000000000 --- a/dimos/mapping/relocalization/pgo2.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""PGO drift corrections as composable Stream stages. - -Pipeline: - - lidar: Stream[PointCloud2] - -> pgo_keyframes(...) -> Stream[Keyframe] - -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) - -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) - -The math: per keyframe, the drift correction is - R_corr = R_global @ R_local.T - t_corr = t_global - R_corr @ t_local -and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, -clipping out-of-range to endpoints. -""" - -from __future__ import annotations - -from collections.abc import Callable, Iterator -from dataclasses import dataclass -from typing import Any, TypeVar - -import numpy as np -from scipy.spatial.transform import Rotation, Slerp - -from dimos.memory2.store.memory import MemoryStore -from dimos.memory2.stream import Stream -from dimos.memory2.type.observation import Observation -from dimos.msgs.geometry_msgs.Quaternion import Quaternion -from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 - -T = TypeVar("T") - - -@dataclass(frozen=True) -class Keyframe: - ts: float - r_local: np.ndarray # 3x3 - t_local: np.ndarray # (3,) - r_global: np.ndarray # 3x3 - t_global: np.ndarray # (3,) - - -def pgo_keyframes( - stream: Stream[PointCloud2], - *, - on_frame: Callable[[Any], None] | None = None, - **pgo_cfg: Any, -) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" - # Imported here to keep pgo.py the only place that imports gtsam at module scope. - from dimos.mapping.relocalization.pgo import PGOConfig, _SimplePGO - - cfg = PGOConfig(**pgo_cfg) - pgo = _SimplePGO(cfg) - - for obs in stream: - if on_frame is not None: - on_frame(obs) - if obs.pose is None: - continue - x, y, z, qx, qy, qz, qw = obs.pose - if x == 0 and y == 0 and z == 0: - continue - if qx == 0 and qy == 0 and qz == 0 and qw == 0: - continue - r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t = np.array([x, y, z]) - points, _ = obs.data.as_numpy() - if len(points) == 0: - continue - body_pts = ( - (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] - ) - if pgo.add_key_pose(r, t, obs.ts, body_pts): - pgo.search_for_loops() - pgo.smooth_and_update() - - kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) - kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] - - mem = MemoryStore() - out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) - for kp in kps: - out.append( - Keyframe( - ts=kp.timestamp, - r_local=np.ascontiguousarray(kp.r_local), - t_local=np.ascontiguousarray(kp.t_local), - r_global=np.ascontiguousarray(kp.r_global), - t_global=np.ascontiguousarray(kp.t_global), - ), - ts=kp.timestamp, - ) - return out - - -def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: - """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" - mem = MemoryStore() - out: Stream[Transform] = mem.stream("corrections", Transform) - for obs in keyframes: - kp = obs.data - R_corr = kp.r_global @ kp.r_local.T - t_corr = kp.t_global - R_corr @ kp.t_local - tf = Transform( - translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), - rotation=Quaternion.from_rotation_matrix(R_corr), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=kp.ts, - ) - out.append(tf, ts=kp.ts) - return out - - -def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: - """Materialize corrections once; return a fast ts -> Transform lookup.""" - ts_list: list[float] = [] - R_list: list[np.ndarray] = [] - t_list: list[np.ndarray] = [] - for obs in corrections: - tf = obs.data - ts_list.append(tf.ts) - q = tf.rotation - R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) - t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) - - if not ts_list: - raise ValueError("empty corrections stream") - - if len(ts_list) == 1: - only_ts = ts_list[0] - only_R = R_list[0] - only_t = t_list[0] - - def _const(ts: float) -> Transform: - return Transform( - translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), - rotation=Quaternion.from_rotation_matrix(only_R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=ts if ts is not None else only_ts, - ) - - return _const - - ts_arr = np.array(ts_list) - R_stack = np.stack(R_list) - t_stack = np.stack(t_list) - slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) - - def interp(ts: float) -> Transform: - ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) - R = slerp([ts_clip])[0].as_matrix() - idx = int(np.searchsorted(ts_arr, ts_clip)) - if idx == 0: - t = t_stack[0] - elif idx >= len(ts_arr): - t = t_stack[-1] - else: - t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] - alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 - t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=float(ts), - ) - - return interp - - -def correction_at(corrections: Stream[Transform], ts: float) -> Transform: - """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" - return make_interpolator(corrections)(ts) - - -def apply_corrections( - stream: Stream[T], - corrections: Stream[Transform], -) -> Stream[T]: - """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. - - `obs.data` is untouched. Frames with `obs.pose is None` pass through - unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). - """ - interp = make_interpolator(corrections) - - def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: - for obs in upstream: - if obs.pose is None: - yield obs - continue - x, y, z, qx, qy, qz, qw = obs.pose - tf = interp(obs.ts) - R_corr = Rotation.from_quat( - [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] - ).as_matrix() - t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t_raw = np.array([x, y, z]) - R_new = R_corr @ R_raw - t_new = R_corr @ t_raw + t_corr - q_new = Rotation.from_matrix(R_new).as_quat() # xyzw - new_pose = ( - float(t_new[0]), - float(t_new[1]), - float(t_new[2]), - float(q_new[0]), - float(q_new[1]), - float(q_new[2]), - float(q_new[3]), - ) - yield obs.derive(data=obs.data, pose=new_pose) - - return stream.transform(xf) diff --git a/dimos/mapping/relocalization/pgo_internals.py b/dimos/mapping/relocalization/pgo_internals.py new file mode 100644 index 0000000000..c3465d8ee5 --- /dev/null +++ b/dimos/mapping/relocalization/pgo_internals.py @@ -0,0 +1,369 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use +# the Stream-shaped API in pgo.py — pgo_keyframes, keyframes_to_corrections, +# make_interpolator, apply_corrections. The classes here (_SimplePGO, +# PGOConfig, _KeyPose) are imported from pgo to do the heavy lifting. + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import gtsam # type: ignore[import-not-found,import-untyped] +import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import open3d.core as o3c # type: ignore[import-untyped] +from scipy.spatial import KDTree +from scipy.spatial.transform import Rotation + +from dimos.core.module import ModuleConfig +from dimos.utils.logging_config import setup_logger + +FRAME_MAP = "world" + +logger = setup_logger() + + +class PGOConfig(ModuleConfig): + world_frame: str = FRAME_MAP + + # Keyframe detection + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + + # Loop closure + loop_search_radius: float = 2.0 + loop_time_thresh: float = 20.0 + loop_score_thresh: float = 0.3 + loop_submap_half_range: int = 10 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 4 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 5.0 + + # Input mode + unregister_input: bool = True # Transform world-frame scans to body-frame using odom + + # Global map + publish_global_map: bool = True + global_map_publish_rate: float = 0.5 + global_map_voxel_size: float = 0.15 + + # ICP + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 1.0 + + +@dataclass +class _KeyPose: + r_local: np.ndarray # 3x3 rotation in local/odom frame + t_local: np.ndarray # 3-vec translation in local/odom frame + r_global: np.ndarray # 3x3 corrected rotation + t_global: np.ndarray # 3-vec corrected translation + timestamp: float + body_cloud: np.ndarray # Nx3 points in body frame + + +def _icp( + source: np.ndarray, + target: np.ndarray, + max_iter: int = 50, + max_dist: float = 1.0, + tol: float = 1e-6, + min_inliers: int = 10, + init: np.ndarray | None = None, +) -> tuple[np.ndarray, float]: + """Point-to-point ICP using Open3D's tensor pipeline. + + Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier + distance (m²) — same semantic as the previous SVD implementation, so + the GTSAM noise model in ``smooth_and_update`` keeps working. + """ + if len(source) < min_inliers or len(target) < min_inliers: + return np.eye(4), float("inf") + + cpu = o3c.Device("CPU:0") + src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) + tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) + + # Normals on the target enable point-to-plane ICP, which converges + # tighter than point-to-point on indoor scenes (walls give unambiguous + # normals that resolve the slide-along-wall ambiguity). + tgt_pcd.estimate_normals(max_nn=30, radius=0.3) + + init_T = ( + o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) + if init is not None + else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) + ) + + # Silence Open3D's "0 correspondence" warning — we deliberately use a + # tight max_correspondence_distance and reject loops with poor fitness; + # the warning is informational, not an error. + with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): + result = o3d.t.pipelines.registration.icp( + source=src_pcd, + target=tgt_pcd, + max_correspondence_distance=max_dist, + init_source_to_target=init_T, + estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), + criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( + relative_fitness=tol, + relative_rmse=tol, + max_iteration=max_iter, + ), + ) + + fitness_inlier_frac = float(result.fitness) + if fitness_inlier_frac == 0.0: + return np.eye(4), float("inf") + + rmse = float(result.inlier_rmse) + T = result.transformation.numpy() + # Return mean squared inlier distance (m²) to match prior _icp contract. + return T, rmse * rmse + + +def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: + if len(pts) == 0 or voxel_size <= 0: + return pts + keys = np.floor(pts / voxel_size).astype(np.int32) + _, idx = np.unique(keys, axis=0, return_index=True) + return pts[idx] + + +class _SimplePGO: + def __init__(self, config: PGOConfig) -> None: + self._cfg = config + self._key_poses: list[_KeyPose] = [] + self._history_pairs: list[tuple[int, int]] = [] + self._cache_pairs: list[dict[str, Any]] = [] + self._r_offset = np.eye(3) + self._t_offset = np.zeros(3) + + params = gtsam.ISAM2Params() + params.setRelinearizeThreshold(0.01) + params.relinearizeSkip = 1 + self._isam2 = gtsam.ISAM2(params) + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: + if not self._key_poses: + return True + last = self._key_poses[-1] + delta_trans = np.linalg.norm(t - last.t_local) + # Angular distance via quaternion dot product + q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] + q_last = Rotation.from_matrix(last.r_local).as_quat() + dot = abs(np.dot(q_cur, q_last)) + delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) + return bool( + delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + ) + + def add_key_pose( + self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray + ) -> bool: + if not self.is_key_pose(r_local, t_local): + return False + + idx = len(self._key_poses) + init_r = self._r_offset @ r_local + init_t = self._r_offset @ t_local + self._t_offset + + pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) + self._values.insert(idx, pose) + + if idx == 0: + noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) + self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) + else: + last = self._key_poses[-1] + r_between = last.r_local.T @ r_local + t_between = last.r_local.T @ (t_local - last.t_local) + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) + ) + self._graph.add( + gtsam.BetweenFactorPose3( + idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise + ) + ) + + kp = _KeyPose( + r_local=r_local.copy(), + t_local=t_local.copy(), + r_global=init_r.copy(), + t_global=init_t.copy(), + timestamp=timestamp, + body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), + ) + self._key_poses.append(kp) + return True + + def _get_submap(self, idx: int, half_range: int) -> np.ndarray: + lo = max(0, idx - half_range) + hi = min(len(self._key_poses) - 1, idx + half_range) + parts = [] + for i in range(lo, hi + 1): + kp = self._key_poses[i] + world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global + parts.append(world) + if not parts: + return np.empty((0, 3)) + cloud = np.vstack(parts) + return _voxel_downsample(cloud, self._cfg.submap_resolution) + + def search_for_loops(self) -> None: + if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: + return + + # Rate limit + if self._history_pairs: + cur_time = self._key_poses[-1].timestamp + last_time = self._key_poses[self._history_pairs[-1][1]].timestamp + if cur_time - last_time < self._cfg.min_loop_detect_duration: + return + + cur_idx = len(self._key_poses) - 1 + cur_kp = self._key_poses[-1] + + # Build KD-tree of previous keyframe positions + positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) + tree = KDTree(positions) + + idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) + if not idxs: + return + + # Pick the spatially closest keyframe that's also old enough in time. + # query_ball_point doesn't sort, so we sort by distance ourselves. + candidates = [ + (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) + for i in idxs + if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh + ] + if not candidates: + return + candidates.sort() + loop_idx = candidates[0][1] + + # ICP verification + target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) + source = self._get_submap(cur_idx, 0) + + transform, fitness = _icp( + source, + target, + max_iter=self._cfg.max_icp_iterations, + max_dist=self._cfg.max_icp_correspondence_dist, + min_inliers=self._cfg.min_icp_inliers, + ) + if fitness > self._cfg.loop_score_thresh: + return + + # Compute relative pose + R_icp = transform[:3, :3] + t_icp = transform[:3, 3] + r_refined = R_icp @ cur_kp.r_global + t_refined = R_icp @ cur_kp.t_global + t_icp + r_offset = self._key_poses[loop_idx].r_global.T @ r_refined + t_offset = self._key_poses[loop_idx].r_global.T @ ( + t_refined - self._key_poses[loop_idx].t_global + ) + + self._cache_pairs.append( + { + "source": cur_idx, + "target": loop_idx, + "r_offset": r_offset, + "t_offset": t_offset, + "score": fitness, + } + ) + self._history_pairs.append((loop_idx, cur_idx)) + logger.info( + "Loop closure detected", + source=cur_idx, + target=loop_idx, + score=round(fitness, 4), + ) + + def smooth_and_update(self) -> None: + has_loop = bool(self._cache_pairs) + + for pair in self._cache_pairs: + # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves + # have different units (rad² vs m²), so a uniform variance — + # the original behaviour — silently makes one half pathological + # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of + # those is too tight, one is too loose, depending on the loop). + # Use ICP fitness as the *translation* variance and a + # generous fixed rotation variance — loops shouldn't be + # trusted to fix rotation tightly without normals + p2plane. + trans_var = max(0.01, float(pair["score"])) # ≥ σ_trans = 10 cm + rot_var = 0.05 # σ_rot ≈ 13° + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) + ) + self._graph.add( + gtsam.BetweenFactorPose3( + pair["target"], + pair["source"], + gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), + noise, + ) + ) + self._cache_pairs.clear() + + self._isam2.update(self._graph, self._values) + self._isam2.update() + if has_loop: + for _ in range(self._cfg.loop_closure_extra_iterations): + self._isam2.update() + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + estimates = self._isam2.calculateBestEstimate() + for i in range(len(self._key_poses)): + pose = estimates.atPose3(i) + self._key_poses[i].r_global = pose.rotation().matrix() + self._key_poses[i].t_global = pose.translation() + + last = self._key_poses[-1] + self._r_offset = last.r_global @ last.r_local.T + self._t_offset = last.t_global - self._r_offset @ last.t_local + + def get_corrected_pose( + self, r_local: np.ndarray, t_local: np.ndarray + ) -> tuple[np.ndarray, np.ndarray]: + return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset + + def build_global_map(self, voxel_size: float) -> np.ndarray: + if not self._key_poses: + return np.empty((0, 3), dtype=np.float32) + parts = [] + for kp in self._key_poses: + world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global + parts.append(world) + cloud = np.vstack(parts).astype(np.float32) + return _voxel_downsample(cloud, voxel_size) + + @property + def num_key_poses(self) -> int: + return len(self._key_poses) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index dc8f934347..645844b735 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -98,7 +98,7 @@ def collect_path(obs): import numpy as np from scipy.spatial.transform import Rotation - from dimos.mapping.relocalization.pgo2 import ( + from dimos.mapping.relocalization.pgo import ( keyframes_to_corrections, make_interpolator, pgo_keyframes, From 25cf609bdc4dd56e8e207c37744507b49ccef86b Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 19:58:19 +0800 Subject: [PATCH 07/64] pgo rewrite --- dimos/mapping/relocalization/pgo.py | 558 +++++++++++++++--- dimos/mapping/relocalization/pgo_internals.py | 369 ------------ dimos/memory2/type/observation.py | 38 +- dimos/utils/cli/map.py | 6 +- 4 files changed, 502 insertions(+), 469 deletions(-) delete mode 100644 dimos/mapping/relocalization/pgo_internals.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 653c7158d2..e006cc1b7b 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -26,15 +26,20 @@ t_corr = t_global - R_corr @ t_local and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, clipping out-of-range to endpoints. + +`gtsam` and `open3d` are imported lazily inside hot helpers so importing this module +stays cheap and gtsam-free for consumers that only need `Keyframe` / `apply_corrections`. """ from __future__ import annotations from collections.abc import Callable, Iterator from dataclasses import dataclass -from typing import Any, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import open3d.core as o3c # type: ignore[import-untyped] from scipy.spatial.transform import Rotation, Slerp from dimos.memory2.store.memory import MemoryStore @@ -44,17 +49,53 @@ from dimos.msgs.geometry_msgs.Transform import Transform from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.protocol.service.spec import BaseConfig +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + import gtsam # type: ignore[import-not-found,import-untyped] T = TypeVar("T") +FRAME_WORLD_CORRECTED = "world_corrected" +FRAME_WORLD_RAW = "world_raw" +FRAME_BODY = "body" + +logger = setup_logger() + + +class PGOConfig(BaseConfig): + # Keyframe detection + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + + # Loop closure + loop_search_radius: float = 2.0 + loop_time_thresh: float = 20.0 + loop_score_thresh: float = 0.3 + loop_submap_half_range: int = 10 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 4 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 5.0 + + # ICP + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 1.0 + @dataclass(frozen=True) class Keyframe: + """Keyframe emitted by `pgo_keyframes`. + + `local` is the body pose in the odom (`world_raw`) frame, `optimized` is + the body pose in the drift-corrected (`world_corrected`) frame. + """ + ts: float - r_local: np.ndarray # 3x3 - t_local: np.ndarray # (3,) - r_global: np.ndarray # 3x3 - t_global: np.ndarray # (3,) + local: Transform + optimized: Transform def pgo_keyframes( @@ -63,51 +104,32 @@ def pgo_keyframes( on_frame: Callable[[Any], None] | None = None, **pgo_cfg: Any, ) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" - # Imported here to keep pgo_internals.py the only place that imports gtsam at module scope. - from dimos.mapping.relocalization.pgo_internals import PGOConfig, _SimplePGO - + """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe.""" cfg = PGOConfig(**pgo_cfg) - pgo = _SimplePGO(cfg) + pgo = _PGO(cfg) for obs in stream: if on_frame is not None: on_frame(obs) if obs.pose is None: continue - x, y, z, qx, qy, qz, qw = obs.pose - if x == 0 and y == 0 and z == 0: - continue - if qx == 0 and qy == 0 and qz == 0 and qw == 0: + # Skip placeholder poses (origin position OR zero quaternion). + if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: continue - r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t = np.array([x, y, z]) - points, _ = obs.data.as_numpy() - if len(points) == 0: + if ( + obs.pose[3] == 0 + and obs.pose[4] == 0 + and obs.pose[5] == 0 + and (obs.pose[6] == 0 or obs.pose[6] == 1) + ): continue - body_pts = ( - (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] - ) - if pgo.add_key_pose(r, t, obs.ts, body_pts): - pgo.search_for_loops() - pgo.smooth_and_update() - - kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) - kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + local_pose = _obs_to_pose3(obs) + pgo.process(local_pose, obs.ts, obs.data) mem = MemoryStore() out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) - for kp in kps: - out.append( - Keyframe( - ts=kp.timestamp, - r_local=np.ascontiguousarray(kp.r_local), - t_local=np.ascontiguousarray(kp.t_local), - r_global=np.ascontiguousarray(kp.r_global), - t_global=np.ascontiguousarray(kp.t_global), - ), - ts=kp.timestamp, - ) + for kf in pgo.finalize(): + out.append(kf, ts=kf.ts) return out @@ -116,17 +138,10 @@ def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: mem = MemoryStore() out: Stream[Transform] = mem.stream("corrections", Transform) for obs in keyframes: - kp = obs.data - R_corr = kp.r_global @ kp.r_local.T - t_corr = kp.t_global - R_corr @ kp.t_local - tf = Transform( - translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), - rotation=Quaternion.from_rotation_matrix(R_corr), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=kp.ts, - ) - out.append(tf, ts=kp.ts) + kf = obs.data + # world_corrected <- body <- world_raw composes to world_corrected <- world_raw. + drift = kf.optimized + kf.local.inverse() + out.append(drift, ts=kf.ts) return out @@ -136,30 +151,22 @@ def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Trans R_list: list[np.ndarray] = [] t_list: list[np.ndarray] = [] for obs in corrections: - tf = obs.data - ts_list.append(tf.ts) - q = tf.rotation - R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) - t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) + R, t = _r_t_from_transform(obs.data) + # obs.ts is authoritative; obs.data.ts can be mutated by Transform's + # ts=0.0 -> time.time() fallback in its constructor. + ts_list.append(obs.ts) + R_list.append(R) + t_list.append(t) if not ts_list: raise ValueError("empty corrections stream") + # Slerp needs ≥2 keyframes. Pad len==1 with a duplicate so the + # general path handles it; clip-to-endpoints behavior is unchanged. if len(ts_list) == 1: - only_ts = ts_list[0] - only_R = R_list[0] - only_t = t_list[0] - - def _const(ts: float) -> Transform: - return Transform( - translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), - rotation=Quaternion.from_rotation_matrix(only_R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=ts if ts is not None else only_ts, - ) - - return _const + ts_list.append(ts_list[0] + 1e-6) + R_list.append(R_list[0]) + t_list.append(t_list[0]) ts_arr = np.array(ts_list) R_stack = np.stack(R_list) @@ -178,13 +185,7 @@ def interp(ts: float) -> Transform: t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=float(ts), - ) + return _transform_from_r_t(R, t, ts=float(ts)) return interp @@ -210,26 +211,391 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: if obs.pose is None: yield obs continue - x, y, z, qx, qy, qz, qw = obs.pose - tf = interp(obs.ts) - R_corr = Rotation.from_quat( - [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] - ).as_matrix() - t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t_raw = np.array([x, y, z]) - R_new = R_corr @ R_raw - t_new = R_corr @ t_raw + t_corr - q_new = Rotation.from_matrix(R_new).as_quat() # xyzw - new_pose = ( - float(t_new[0]), - float(t_new[1]), - float(t_new[2]), - float(q_new[0]), - float(q_new[1]), - float(q_new[2]), - float(q_new[3]), - ) - yield obs.derive(data=obs.data, pose=new_pose) + raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped) + # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. + # Observation normalizes Transform back to 7-tuple via __post_init__. + corrected = interp(obs.ts) + raw_tf + yield obs.derive(data=obs.data, pose=corrected) return stream.transform(xf) + + +# --------------------------------------------------------------------------- +# Transform <-> (R, t) and obs.pose helpers (private) +# --------------------------------------------------------------------------- + + +def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: + q = tf.rotation + R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() + t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + return R, t + + +def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform: + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ts=ts, + ) + + +def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: + """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + if obs.pose is None: + raise LookupError("No pose set on this observation") + x, y, z, qx, qy, qz, qw = obs.pose + return gtsam.Pose3( + gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), + gtsam.Point3(float(x), float(y), float(z)), + ) + + +@dataclass +class _KeyPose: + local: gtsam.Pose3 # odom-frame pose at capture + optimized: gtsam.Pose3 # drift-corrected pose + timestamp: float + body_cloud: PointCloud2 # voxel-downsampled, body frame + + +@dataclass +class _LoopPair: + source: int + target: int + offset: gtsam.Pose3 # source pose in target's frame + score: float + + +class _PGO: + """Incremental PGO: gtsam ISAM2 over keyframes with ICP loop closures. + + Call `process` per frame (odom pose + body-frame points). Call + `finalize` once at the end for the sorted, deduped keyframe list. + """ + + def __init__(self, config: PGOConfig) -> None: + import gtsam # type: ignore[import-not-found,import-untyped] + + self._gtsam = gtsam + self._cfg = config + self._key_poses: list[_KeyPose] = [] + self._pending_loops: list[_LoopPair] = [] + self._last_loop_ts: float | None = None + self._world_correction: gtsam.Pose3 = gtsam.Pose3() # identity + + params = gtsam.ISAM2Params() + params.setRelinearizeThreshold(0.01) + params.relinearizeSkip = 1 + self._isam2 = gtsam.ISAM2(params) + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + def process( + self, + local_pose: gtsam.Pose3, + ts: float, + world_cloud: PointCloud2, + ) -> None: + if len(world_cloud) == 0: + return + if not self._is_keyframe(local_pose): + return + # Unregister: lift world-frame scan back into body frame using its + # odom pose, so PGO can re-project it via the optimized pose later. + body_cloud = world_cloud.transform( + _pose3_to_transform(local_pose.inverse(), ts=ts) + ).voxel_downsample(self._cfg.submap_resolution) + self._add_keyframe(local_pose, ts, body_cloud) + self._search_for_loops() + self._smooth_and_update() + + def finalize(self) -> list[Keyframe]: + """Return keyframes sorted by ts, with duplicate-ts entries dropped.""" + kps = sorted(self._key_poses, key=lambda kp: kp.timestamp) + out: list[Keyframe] = [] + for i, kp in enumerate(kps): + if i > 0 and kp.timestamp <= kps[i - 1].timestamp: + continue + out.append( + Keyframe( + ts=kp.timestamp, + local=_pose3_to_transform( + kp.local, + ts=kp.timestamp, + frame_id=FRAME_WORLD_RAW, + child_frame_id=FRAME_BODY, + ), + optimized=_pose3_to_transform( + kp.optimized, + ts=kp.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + ) + ) + return out + + # ---------- internal ---------- + + def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: + if not self._key_poses: + return True + delta = self._key_poses[-1].local.inverse().compose(local_pose) + delta_trans = float(np.linalg.norm(np.asarray(delta.translation()))) + # Rotation magnitude from trace: cos(theta) = (tr(R) - 1) / 2. + R = delta.rotation().matrix() + cos_theta = float(np.clip((np.trace(R) - 1.0) / 2.0, -1.0, 1.0)) + delta_deg = float(np.degrees(np.arccos(cos_theta))) + return ( + delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + ) + + def _add_keyframe( + self, + local_pose: gtsam.Pose3, + ts: float, + body_cloud: PointCloud2, + ) -> None: + gtsam = self._gtsam + idx = len(self._key_poses) + optimized = self._world_correction.compose(local_pose) + + self._values.insert(idx, optimized) + + if idx == 0: + noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) + self._graph.add(gtsam.PriorFactorPose3(idx, optimized, noise)) + else: + last_local = self._key_poses[-1].local + between = last_local.inverse().compose(local_pose) + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) + ) + self._graph.add(gtsam.BetweenFactorPose3(idx - 1, idx, between, noise)) + + self._key_poses.append( + _KeyPose( + local=local_pose, + optimized=optimized, + timestamp=ts, + body_cloud=body_cloud, + ) + ) + + def _get_submap(self, idx: int, half_range: int) -> PointCloud2: + lo = max(0, idx - half_range) + hi = min(len(self._key_poses) - 1, idx + half_range) + if lo > hi: + return PointCloud2() + cloud = self._key_poses[lo].body_cloud.transform( + _pose3_to_transform(self._key_poses[lo].optimized, ts=self._key_poses[lo].timestamp) + ) + for i in range(lo + 1, hi + 1): + kp = self._key_poses[i] + cloud = cloud + kp.body_cloud.transform( + _pose3_to_transform(kp.optimized, ts=kp.timestamp) + ) + return cloud.voxel_downsample(self._cfg.submap_resolution) + + def _search_for_loops(self) -> None: + if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: + return + + cur_ts = self._key_poses[-1].timestamp + if ( + self._last_loop_ts is not None + and cur_ts - self._last_loop_ts < self._cfg.min_loop_detect_duration + ): + return + + cur_idx = len(self._key_poses) - 1 + cur_kp = self._key_poses[-1] + cur_t = np.asarray(cur_kp.optimized.translation()) + + from scipy.spatial import KDTree + + positions = np.array( + [np.asarray(kp.optimized.translation()) for kp in self._key_poses[:-1]] + ) + tree = KDTree(positions) + idxs = tree.query_ball_point(cur_t, self._cfg.loop_search_radius) + if not idxs: + return + + # query_ball_point doesn't sort by distance — do it ourselves and + # filter by min time gap to avoid closing loops to recent keyframes. + candidates = [ + ( + float( + np.linalg.norm(np.asarray(self._key_poses[i].optimized.translation()) - cur_t) + ), + i, + ) + for i in idxs + if abs(cur_ts - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh + ] + if not candidates: + return + candidates.sort() + loop_idx = candidates[0][1] + + target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) + source = self._get_submap(cur_idx, 0) + + icp_tf, fitness = _icp( + source, + target, + max_iter=self._cfg.max_icp_iterations, + max_dist=self._cfg.max_icp_correspondence_dist, + min_inliers=self._cfg.min_icp_inliers, + ) + if fitness > self._cfg.loop_score_thresh: + return + + # icp_tf takes cur_kp.optimized -> refined pose (correcting the drift). + # offset = loop_kp.optimized^-1 * refined = relative pose from loop to cur. + icp_pose = _transform_to_pose3(icp_tf) + refined = icp_pose.compose(cur_kp.optimized) + offset = self._key_poses[loop_idx].optimized.between(refined) + + self._pending_loops.append( + _LoopPair( + source=cur_idx, + target=loop_idx, + offset=offset, + score=fitness, + ) + ) + self._last_loop_ts = cur_ts + logger.info( + "Loop closure detected", + source=cur_idx, + target=loop_idx, + score=round(fitness, 4), + ) + + def _smooth_and_update(self) -> None: + gtsam = self._gtsam + has_loop = bool(self._pending_loops) + + for pair in self._pending_loops: + # Pose3 noise is [rx, ry, rz, x, y, z]. The two halves have + # different units (rad² vs m²), so a uniform variance silently + # makes one half pathological. Use ICP fitness as the + # *translation* variance and a generous fixed rotation variance + # — loops shouldn't be trusted to fix rotation tightly without + # normals + p2plane. + trans_var = max(0.01, float(pair.score)) # >= sigma_trans = 10 cm + rot_var = 0.05 # sigma_rot ~ 13 deg + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) + ) + self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise)) + self._pending_loops.clear() + + self._isam2.update(self._graph, self._values) + self._isam2.update() + if has_loop: + for _ in range(self._cfg.loop_closure_extra_iterations): + self._isam2.update() + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + estimates = self._isam2.calculateBestEstimate() + for i in range(len(self._key_poses)): + self._key_poses[i].optimized = estimates.atPose3(i) + + last = self._key_poses[-1] + self._world_correction = last.optimized.compose(last.local.inverse()) + + +def _pose3_to_transform( + pose: gtsam.Pose3, + *, + ts: float, + frame_id: str = "", + child_frame_id: str = "", +) -> Transform: + """PGO-internal: build a Transform from a Pose3.""" + t = np.asarray(pose.translation()) + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(pose.rotation().matrix()), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + + +def _transform_to_pose3(tf: Transform) -> gtsam.Pose3: + """PGO-internal: build a Pose3 from a Transform.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + return gtsam.Pose3(tf.to_matrix()) + + +def _icp( + source: PointCloud2, + target: PointCloud2, + max_iter: int = 50, + max_dist: float = 1.0, + tol: float = 1e-6, + min_inliers: int = 10, + init: Transform | None = None, +) -> tuple[Transform, float]: + """Point-to-plane ICP using Open3D's tensor pipeline. + + Returns ``(tf, fitness)`` where ``fitness`` is mean squared inlier + distance (m^2) — the GTSAM noise model in `_smooth_and_update` uses + this directly as sigma_trans squared. On rejection (too few points or + zero correspondences) returns the identity transform and inf fitness. + """ + + if len(source) < min_inliers or len(target) < min_inliers: + return Transform.identity(), float("inf") + + src_pcd = source.pointcloud_tensor + tgt_pcd = target.pointcloud_tensor + + # Normals on the target enable point-to-plane ICP — converges tighter + # than point-to-point on indoor scenes (walls give unambiguous normals + # that resolve the slide-along-wall ambiguity). + tgt_pcd.estimate_normals(max_nn=30, radius=0.3) + + device = src_pcd.device + init_T = o3c.Tensor( + init.to_matrix() if init is not None else np.eye(4), + dtype=o3c.float64, + device=device, + ) + + # Silence Open3D's "0 correspondence" warning — we deliberately use a + # tight max_correspondence_distance and reject loops with poor fitness. + with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): + result = o3d.t.pipelines.registration.icp( + source=src_pcd, + target=tgt_pcd, + max_correspondence_distance=max_dist, + init_source_to_target=init_T, + estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), + criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( + relative_fitness=tol, + relative_rmse=tol, + max_iteration=max_iter, + ), + ) + + if float(result.fitness) == 0.0: + return Transform.identity(), float("inf") + + T_mat = result.transformation.numpy() + rmse = float(result.inlier_rmse) + return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse + T_mat = result.transformation.numpy() + rmse = float(result.inlier_rmse) + return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse diff --git a/dimos/mapping/relocalization/pgo_internals.py b/dimos/mapping/relocalization/pgo_internals.py deleted file mode 100644 index c3465d8ee5..0000000000 --- a/dimos/mapping/relocalization/pgo_internals.py +++ /dev/null @@ -1,369 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use -# the Stream-shaped API in pgo.py — pgo_keyframes, keyframes_to_corrections, -# make_interpolator, apply_corrections. The classes here (_SimplePGO, -# PGOConfig, _KeyPose) are imported from pgo to do the heavy lifting. - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any - -import gtsam # type: ignore[import-not-found,import-untyped] -import numpy as np -import open3d as o3d # type: ignore[import-untyped] -import open3d.core as o3c # type: ignore[import-untyped] -from scipy.spatial import KDTree -from scipy.spatial.transform import Rotation - -from dimos.core.module import ModuleConfig -from dimos.utils.logging_config import setup_logger - -FRAME_MAP = "world" - -logger = setup_logger() - - -class PGOConfig(ModuleConfig): - world_frame: str = FRAME_MAP - - # Keyframe detection - key_pose_delta_trans: float = 0.5 - key_pose_delta_deg: float = 10.0 - - # Loop closure - loop_search_radius: float = 2.0 - loop_time_thresh: float = 20.0 - loop_score_thresh: float = 0.3 - loop_submap_half_range: int = 10 - min_icp_inliers: int = 10 - min_keyframes_for_loop_search: int = 10 - loop_closure_extra_iterations: int = 4 - submap_resolution: float = 0.2 - min_loop_detect_duration: float = 5.0 - - # Input mode - unregister_input: bool = True # Transform world-frame scans to body-frame using odom - - # Global map - publish_global_map: bool = True - global_map_publish_rate: float = 0.5 - global_map_voxel_size: float = 0.15 - - # ICP - max_icp_iterations: int = 50 - max_icp_correspondence_dist: float = 1.0 - - -@dataclass -class _KeyPose: - r_local: np.ndarray # 3x3 rotation in local/odom frame - t_local: np.ndarray # 3-vec translation in local/odom frame - r_global: np.ndarray # 3x3 corrected rotation - t_global: np.ndarray # 3-vec corrected translation - timestamp: float - body_cloud: np.ndarray # Nx3 points in body frame - - -def _icp( - source: np.ndarray, - target: np.ndarray, - max_iter: int = 50, - max_dist: float = 1.0, - tol: float = 1e-6, - min_inliers: int = 10, - init: np.ndarray | None = None, -) -> tuple[np.ndarray, float]: - """Point-to-point ICP using Open3D's tensor pipeline. - - Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier - distance (m²) — same semantic as the previous SVD implementation, so - the GTSAM noise model in ``smooth_and_update`` keeps working. - """ - if len(source) < min_inliers or len(target) < min_inliers: - return np.eye(4), float("inf") - - cpu = o3c.Device("CPU:0") - src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) - tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) - - # Normals on the target enable point-to-plane ICP, which converges - # tighter than point-to-point on indoor scenes (walls give unambiguous - # normals that resolve the slide-along-wall ambiguity). - tgt_pcd.estimate_normals(max_nn=30, radius=0.3) - - init_T = ( - o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) - if init is not None - else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) - ) - - # Silence Open3D's "0 correspondence" warning — we deliberately use a - # tight max_correspondence_distance and reject loops with poor fitness; - # the warning is informational, not an error. - with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): - result = o3d.t.pipelines.registration.icp( - source=src_pcd, - target=tgt_pcd, - max_correspondence_distance=max_dist, - init_source_to_target=init_T, - estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), - criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( - relative_fitness=tol, - relative_rmse=tol, - max_iteration=max_iter, - ), - ) - - fitness_inlier_frac = float(result.fitness) - if fitness_inlier_frac == 0.0: - return np.eye(4), float("inf") - - rmse = float(result.inlier_rmse) - T = result.transformation.numpy() - # Return mean squared inlier distance (m²) to match prior _icp contract. - return T, rmse * rmse - - -def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: - if len(pts) == 0 or voxel_size <= 0: - return pts - keys = np.floor(pts / voxel_size).astype(np.int32) - _, idx = np.unique(keys, axis=0, return_index=True) - return pts[idx] - - -class _SimplePGO: - def __init__(self, config: PGOConfig) -> None: - self._cfg = config - self._key_poses: list[_KeyPose] = [] - self._history_pairs: list[tuple[int, int]] = [] - self._cache_pairs: list[dict[str, Any]] = [] - self._r_offset = np.eye(3) - self._t_offset = np.zeros(3) - - params = gtsam.ISAM2Params() - params.setRelinearizeThreshold(0.01) - params.relinearizeSkip = 1 - self._isam2 = gtsam.ISAM2(params) - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: - if not self._key_poses: - return True - last = self._key_poses[-1] - delta_trans = np.linalg.norm(t - last.t_local) - # Angular distance via quaternion dot product - q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] - q_last = Rotation.from_matrix(last.r_local).as_quat() - dot = abs(np.dot(q_cur, q_last)) - delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) - return bool( - delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg - ) - - def add_key_pose( - self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray - ) -> bool: - if not self.is_key_pose(r_local, t_local): - return False - - idx = len(self._key_poses) - init_r = self._r_offset @ r_local - init_t = self._r_offset @ t_local + self._t_offset - - pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) - self._values.insert(idx, pose) - - if idx == 0: - noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) - self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) - else: - last = self._key_poses[-1] - r_between = last.r_local.T @ r_local - t_between = last.r_local.T @ (t_local - last.t_local) - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise - ) - ) - - kp = _KeyPose( - r_local=r_local.copy(), - t_local=t_local.copy(), - r_global=init_r.copy(), - t_global=init_t.copy(), - timestamp=timestamp, - body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), - ) - self._key_poses.append(kp) - return True - - def _get_submap(self, idx: int, half_range: int) -> np.ndarray: - lo = max(0, idx - half_range) - hi = min(len(self._key_poses) - 1, idx + half_range) - parts = [] - for i in range(lo, hi + 1): - kp = self._key_poses[i] - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - if not parts: - return np.empty((0, 3)) - cloud = np.vstack(parts) - return _voxel_downsample(cloud, self._cfg.submap_resolution) - - def search_for_loops(self) -> None: - if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: - return - - # Rate limit - if self._history_pairs: - cur_time = self._key_poses[-1].timestamp - last_time = self._key_poses[self._history_pairs[-1][1]].timestamp - if cur_time - last_time < self._cfg.min_loop_detect_duration: - return - - cur_idx = len(self._key_poses) - 1 - cur_kp = self._key_poses[-1] - - # Build KD-tree of previous keyframe positions - positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) - tree = KDTree(positions) - - idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) - if not idxs: - return - - # Pick the spatially closest keyframe that's also old enough in time. - # query_ball_point doesn't sort, so we sort by distance ourselves. - candidates = [ - (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) - for i in idxs - if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh - ] - if not candidates: - return - candidates.sort() - loop_idx = candidates[0][1] - - # ICP verification - target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) - source = self._get_submap(cur_idx, 0) - - transform, fitness = _icp( - source, - target, - max_iter=self._cfg.max_icp_iterations, - max_dist=self._cfg.max_icp_correspondence_dist, - min_inliers=self._cfg.min_icp_inliers, - ) - if fitness > self._cfg.loop_score_thresh: - return - - # Compute relative pose - R_icp = transform[:3, :3] - t_icp = transform[:3, 3] - r_refined = R_icp @ cur_kp.r_global - t_refined = R_icp @ cur_kp.t_global + t_icp - r_offset = self._key_poses[loop_idx].r_global.T @ r_refined - t_offset = self._key_poses[loop_idx].r_global.T @ ( - t_refined - self._key_poses[loop_idx].t_global - ) - - self._cache_pairs.append( - { - "source": cur_idx, - "target": loop_idx, - "r_offset": r_offset, - "t_offset": t_offset, - "score": fitness, - } - ) - self._history_pairs.append((loop_idx, cur_idx)) - logger.info( - "Loop closure detected", - source=cur_idx, - target=loop_idx, - score=round(fitness, 4), - ) - - def smooth_and_update(self) -> None: - has_loop = bool(self._cache_pairs) - - for pair in self._cache_pairs: - # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves - # have different units (rad² vs m²), so a uniform variance — - # the original behaviour — silently makes one half pathological - # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of - # those is too tight, one is too loose, depending on the loop). - # Use ICP fitness as the *translation* variance and a - # generous fixed rotation variance — loops shouldn't be - # trusted to fix rotation tightly without normals + p2plane. - trans_var = max(0.01, float(pair["score"])) # ≥ σ_trans = 10 cm - rot_var = 0.05 # σ_rot ≈ 13° - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - pair["target"], - pair["source"], - gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), - noise, - ) - ) - self._cache_pairs.clear() - - self._isam2.update(self._graph, self._values) - self._isam2.update() - if has_loop: - for _ in range(self._cfg.loop_closure_extra_iterations): - self._isam2.update() - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - estimates = self._isam2.calculateBestEstimate() - for i in range(len(self._key_poses)): - pose = estimates.atPose3(i) - self._key_poses[i].r_global = pose.rotation().matrix() - self._key_poses[i].t_global = pose.translation() - - last = self._key_poses[-1] - self._r_offset = last.r_global @ last.r_local.T - self._t_offset = last.t_global - self._r_offset @ last.t_local - - def get_corrected_pose( - self, r_local: np.ndarray, t_local: np.ndarray - ) -> tuple[np.ndarray, np.ndarray]: - return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset - - def build_global_map(self, voxel_size: float) -> np.ndarray: - if not self._key_poses: - return np.empty((0, 3), dtype=np.float32) - parts = [] - for kp in self._key_poses: - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - cloud = np.vstack(parts).astype(np.float32) - return _voxel_downsample(cloud, voxel_size) - - @property - def num_key_poses(self) -> int: - return len(self._key_poses) diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index 63b10a6dbf..179b48744e 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -46,9 +46,42 @@ def __repr__(self) -> str: _UNLOADED = _Unloaded() +def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, float] | None: + """Coerce common pose shapes to the storage 7-tuple `(x, y, z, qx, qy, qz, qw)`. + + Accepts: `None`, an already-normalized tuple, or any object with + `translation`+`rotation` (Transform) or `position`+`orientation` + (Pose / PoseStamped) attributes. Duck-typed to avoid importing + `dimos.msgs.geometry_msgs` at module load. + """ + if p is None or isinstance(p, tuple): + return p + trans = getattr(p, "translation", None) or getattr(p, "position", None) + rot = getattr(p, "rotation", None) or getattr(p, "orientation", None) + if trans is None or rot is None: + raise TypeError( + f"Cannot coerce {type(p).__name__} to a pose tuple — expected " + "tuple, None, Transform, Pose, or PoseStamped." + ) + return ( + float(trans.x), + float(trans.y), + float(trans.z), + float(rot.x), + float(rot.y), + float(rot.z), + float(rot.w), + ) + + @dataclass class Observation(Generic[T]): - """A single timestamped observation with optional spatial pose and metadata.""" + """A single timestamped observation with optional spatial pose and metadata. + + `pose` is stored as a 7-tuple `(x, y, z, qx, qy, qz, qw)`. Passing a + `Transform`, `Pose`, or `PoseStamped` to the constructor or to `derive` + is normalized to the 7-tuple form automatically. + """ id: int ts: float @@ -59,6 +92,9 @@ class Observation(Generic[T]): _loader: Callable[[], T] | None = field(default=None, repr=False) _data_lock: threading.Lock = field(default_factory=threading.Lock, repr=False) + def __post_init__(self) -> None: + self.pose = _normalize_pose(self.pose) + @property def pose_stamped(self) -> PoseStamped: from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 645844b735..543a898f6a 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -112,9 +112,9 @@ def collect_path(obs): corrections = keyframes_to_corrections(keyframes) interp = make_interpolator(corrections) - for obs in keyframes: - kp = obs.data - pgo_path.append((float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2]))) + for kf_obs in keyframes: + kf_t = kf_obs.data.optimized.translation + pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) pass2_pb = progress(total, "pgo pass 2 (rebuilding)") grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) From d11ef1654ffee88acee9329e95cb4818647d4f94 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 20:09:56 +0800 Subject: [PATCH 08/64] pgo: pytest + map.py type annotations + drop section markers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds test_pgo.py covering pose normalization on Observation, the Transform↔Pose3 conversion helpers, the interpolator edge cases, apply_corrections behavior, and a real-recording smoke test (skipped when data/go2_short.db is absent). Annotates the two map.py helpers so they pass mypy. Removes leftover section divider comments to satisfy the no-section-markers project rule. --- dimos/mapping/relocalization/pgo.py | 7 - dimos/mapping/relocalization/test_pgo.py | 365 +++++++++++++++++++++++ dimos/utils/cli/map.py | 10 +- 3 files changed, 372 insertions(+), 10 deletions(-) create mode 100644 dimos/mapping/relocalization/test_pgo.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index e006cc1b7b..9b48aa6437 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -220,11 +220,6 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: return stream.transform(xf) -# --------------------------------------------------------------------------- -# Transform <-> (R, t) and obs.pose helpers (private) -# --------------------------------------------------------------------------- - - def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: q = tf.rotation R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() @@ -340,8 +335,6 @@ def finalize(self) -> list[Keyframe]: ) return out - # ---------- internal ---------- - def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: if not self._key_poses: return True diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/relocalization/test_pgo.py new file mode 100644 index 0000000000..ce35cbd25a --- /dev/null +++ b/dimos/mapping/relocalization/test_pgo.py @@ -0,0 +1,365 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from pathlib import Path + +import numpy as np +import pytest +from scipy.spatial.transform import Rotation + +from dimos.mapping.relocalization.pgo import ( + Keyframe, + PGOConfig, + _obs_to_pose3, + _pose3_to_transform, + _r_t_from_transform, + _transform_from_r_t, + apply_corrections, + correction_at, + keyframes_to_corrections, + make_interpolator, + pgo_keyframes, +) +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + + +def _random_R(rng: np.random.Generator) -> np.ndarray: + """Random uniform rotation matrix via random quaternion.""" + q = rng.standard_normal(4) + q /= np.linalg.norm(q) + return np.asarray(Rotation.from_quat(q).as_matrix()) + + +class TestPGOConfig: + def test_accepts_known_fields(self) -> None: + cfg = PGOConfig(key_pose_delta_trans=0.7, max_icp_iterations=42) + assert cfg.key_pose_delta_trans == 0.7 + assert cfg.max_icp_iterations == 42 + + def test_rejects_unknown_fields(self) -> None: + # The plan deleted these; BaseConfig has extra="forbid" so they raise. + for dead in ( + "world_frame", + "publish_global_map", + "global_map_publish_rate", + "global_map_voxel_size", + "unregister_input", + ): + with pytest.raises(Exception): + PGOConfig(**{dead: True}) + + +class TestTransformHelpers: + def test_r_t_transform_roundtrip(self) -> None: + rng = np.random.default_rng(0) + R_in = _random_R(rng) + t_in = rng.uniform(-5, 5, size=3) + tf = _transform_from_r_t(R_in, t_in, ts=1.23) + R_out, t_out = _r_t_from_transform(tf) + np.testing.assert_allclose(R_out, R_in, atol=1e-10) + np.testing.assert_allclose(t_out, t_in, atol=1e-10) + assert tf.ts == 1.23 + + def test_observation_normalizes_transform_pose(self) -> None: + """Constructing/deriving with pose=Transform should coerce to 7-tuple.""" + from dimos.memory2.type.observation import Observation + + tf = Transform( + translation=Vector3(1.5, -2.0, 0.7), + rotation=Quaternion(0.1, 0.2, 0.3, 0.927), + ts=1.0, + ) + obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) + assert isinstance(obs.pose, tuple) + assert obs.pose[0] == pytest.approx(1.5) + assert obs.pose[6] == pytest.approx(0.927) + + # derive() also runs the normalization via __post_init__. + derived = obs.derive(data=0, pose=tf) + assert isinstance(derived.pose, tuple) + assert derived.pose == obs.pose + + def test_observation_normalizes_posestamped(self) -> None: + from dimos.memory2.type.observation import Observation + from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped + + ps = PoseStamped(ts=1.0, position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)) + obs: Observation[int] = Observation(id=0, ts=1.0, pose=ps, _data=0) + assert isinstance(obs.pose, tuple) + assert obs.pose == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) + + def test_obs_to_pose3_roundtrip(self) -> None: + from dimos.memory2.type.observation import Observation + + rng = np.random.default_rng(4) + R = _random_R(rng) + t = rng.uniform(-3, 3, size=3) + tf = _transform_from_r_t(R, t, ts=1.0) + obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) + p = _obs_to_pose3(obs) + np.testing.assert_allclose(p.rotation().matrix(), R, atol=1e-9) + np.testing.assert_allclose(np.asarray(p.translation()), t, atol=1e-9) + + def test_pose3_to_transform(self) -> None: + import gtsam # type: ignore[import-not-found,import-untyped] + + rng = np.random.default_rng(2) + R = _random_R(rng) + t = rng.uniform(-3, 3, size=3) + p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t)) + tf = _pose3_to_transform(p, ts=7.89) + R_out, t_out = _r_t_from_transform(tf) + np.testing.assert_allclose(R_out, R, atol=1e-10) + np.testing.assert_allclose(t_out, t, atol=1e-10) + + def test_pose3_to_transform_with_frames(self) -> None: + import gtsam + + rng = np.random.default_rng(3) + R = _random_R(rng) + t = rng.uniform(-3, 3, size=3) + p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t)) + tf = _pose3_to_transform(p, ts=1.0, frame_id="world_corrected", child_frame_id="body") + assert tf.frame_id == "world_corrected" + assert tf.child_frame_id == "body" + R_out, t_out = _r_t_from_transform(tf) + np.testing.assert_allclose(R_out, R, atol=1e-10) + np.testing.assert_allclose(t_out, t, atol=1e-10) + + +def _make_lidar_stream(n_frames: int = 12, points_per_frame: int = 500) -> Stream[PointCloud2]: + """Straight-line trajectory along +x with small yaw, random body points. + + Note: `pgo_keyframes` skips poses with zero translation OR identity + rotation as placeholders, so we use a constant non-identity yaw. + """ + rng = np.random.default_rng(0) + mem = MemoryStore() + lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2) + # Small yaw (~6 deg) -> non-identity quaternion that survives the + # placeholder filter. + q = Rotation.from_euler("z", 0.1).as_quat() # xyzw + qx, qy, qz, qw = float(q[0]), float(q[1]), float(q[2]), float(q[3]) + R_world = Rotation.from_euler("z", 0.1).as_matrix() + for i in range(1, n_frames + 1): + body = rng.uniform(-1, 1, size=(points_per_frame, 3)).astype(np.float32) + world = (R_world @ body.T).T + np.array([i, 0, 0], dtype=np.float32) + lidar.append( + PointCloud2.from_numpy(world.astype(np.float32)), + ts=float(i), + pose=(float(i), 0.0, 0.0, qx, qy, qz, qw), + ) + return lidar + + +class TestPipelineEndToEnd: + def test_straight_line_produces_keyframes(self) -> None: + lidar = _make_lidar_stream(n_frames=12) + kfs = pgo_keyframes(lidar) + # 12 frames spaced 1m apart with key_pose_delta_trans=0.5 -> every frame + # after the first triggers a keyframe; some may dedupe but ~11 emitted. + n = kfs.count() + assert 10 <= n <= 12 + + def test_keyframes_to_corrections_size(self) -> None: + lidar = _make_lidar_stream(n_frames=12) + kfs = pgo_keyframes(lidar) + corr = keyframes_to_corrections(kfs) + assert corr.count() == kfs.count() + + def test_apply_identity_corrections_preserves_poses(self) -> None: + # With no loop closures the optimization is a no-op -> drift = identity -> + # apply_corrections is a no-op on input poses. + lidar = _make_lidar_stream(n_frames=12) + kfs = pgo_keyframes(lidar) + corr = keyframes_to_corrections(kfs) + corrected = apply_corrections(lidar, corr) + in_poses = [o.pose for o in lidar if o.pose is not None] + out_poses = [o.pose for o in corrected if o.pose is not None] + assert len(in_poses) == len(out_poses) + for p_in, p_out in zip(in_poses, out_poses, strict=True): + for a, b in zip(p_in, p_out, strict=True): + assert a == pytest.approx(b, abs=1e-6) + + +def _make_corrections(transforms: list[Transform]) -> Stream[Transform]: + mem = MemoryStore() + stream: Stream[Transform] = mem.stream("corrections", Transform) + for tf in transforms: + stream.append(tf, ts=tf.ts) + return stream + + +class TestInterpolator: + def test_empty_stream_raises(self) -> None: + empty = _make_corrections([]) + with pytest.raises(ValueError): + make_interpolator(empty) + + def test_single_keyframe_returns_constant(self) -> None: + R = Rotation.from_euler("z", np.pi / 4).as_matrix() + t = np.array([1.0, 2.0, 3.0]) + only = _transform_from_r_t(R, t, ts=10.0) + interp = make_interpolator(_make_corrections([only])) + for query_ts in (0.0, 10.0, 100.0): + out = interp(query_ts) + assert out.translation.x == pytest.approx(1.0, abs=1e-10) + assert out.translation.y == pytest.approx(2.0, abs=1e-10) + assert out.translation.z == pytest.approx(3.0, abs=1e-10) + + def test_out_of_range_clips_to_endpoints(self) -> None: + # Note: Transform's constructor maps ts=0.0 -> time.time(); use ts>0 + # so test timestamps are deterministic. + R = np.eye(3) + a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) + b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) + # _make_corrections uses tf.ts; obs.ts ends up the same. + mem = MemoryStore() + stream: Stream[Transform] = mem.stream("corrections", Transform) + stream.append(a, ts=1.0) + stream.append(b, ts=11.0) + interp = make_interpolator(stream) + # Below range -> clipped to a + assert interp(-5.0).translation.x == pytest.approx(0.0, abs=1e-10) + # Above range -> clipped to b + assert interp(100.0).translation.x == pytest.approx(10.0, abs=1e-10) + # In-range midpoint + assert interp(6.0).translation.x == pytest.approx(5.0, abs=1e-10) + + def test_correction_at_oneshot(self) -> None: + R = np.eye(3) + a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) + b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) + mem = MemoryStore() + stream: Stream[Transform] = mem.stream("corrections", Transform) + stream.append(a, ts=1.0) + stream.append(b, ts=11.0) + result = correction_at(stream, 6.0) + assert result.translation.x == pytest.approx(5.0, abs=1e-10) + + +class TestApplyCorrections: + def test_pure_translation_shifts_poses(self) -> None: + # Build a stream of 3 frames at the origin (identity pose) with a known + # correction that shifts everything by +5 in x. Expected: corrected + # poses sit at x=5. + mem = MemoryStore() + lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2) + for i in range(3): + lidar.append( + PointCloud2.from_numpy(np.zeros((1, 3), dtype=np.float32)), + ts=float(i + 1), + pose=(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0), + ) + + # Constant correction: translate by (5, 0, 0), identity rotation. + c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) + c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=3.0) + mem2 = MemoryStore() + corr: Stream[Transform] = mem2.stream("corrections", Transform) + corr.append(c_a, ts=1.0) + corr.append(c_b, ts=3.0) + + corrected = apply_corrections(lidar, corr) + for obs in corrected: + assert obs.pose is not None + assert obs.pose[0] == pytest.approx(5.0, abs=1e-9) + assert obs.pose[1] == pytest.approx(0.0, abs=1e-9) + assert obs.pose[2] == pytest.approx(0.0, abs=1e-9) + + def test_passes_through_pose_none(self) -> None: + mem = MemoryStore() + lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2) + lidar.append( + PointCloud2.from_numpy(np.zeros((1, 3), dtype=np.float32)), + ts=1.0, + pose=None, + ) + c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) + c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=2.0) + mem2 = MemoryStore() + corr: Stream[Transform] = mem2.stream("corrections", Transform) + corr.append(c_a, ts=1.0) + corr.append(c_b, ts=2.0) + corrected = apply_corrections(lidar, corr) + for obs in corrected: + assert obs.pose is None + + +class TestKeyframeType: + def test_keyframe_is_frozen(self) -> None: + identity = Transform( + translation=Vector3(0.0, 0.0, 0.0), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + ts=1.0, + ) + kf = Keyframe(ts=1.0, local=identity, optimized=identity) + with pytest.raises(Exception): + kf.ts = 2.0 # type: ignore[misc] + assert isinstance(kf.local, Transform) + assert isinstance(kf.optimized, Transform) + + +# Real-recording smoke test. ~45-60s on go2_short.db. Skipped when the LFS +# file isn't present (CI without LFS pulled, fresh clone, etc.). +_DATA_DB = Path(__file__).resolve().parents[3] / "data" / "go2_short.db" + + +@pytest.mark.skipif(not _DATA_DB.exists(), reason=f"requires LFS file {_DATA_DB}") +class TestRealRecording: + def test_pgo_pipeline_against_go2_short(self) -> None: + """Run the full PGO pipeline on a real 60-second go2 recording. + + Asserts: keyframes produced, drift correction actually corrects (some + optimized poses differ from local), correction stream length matches + keyframes, apply_corrections preserves input frame count. + """ + from dimos.memory2.store.sqlite import SqliteStore + + store = SqliteStore(path=_DATA_DB) + lidar = store.streams.lidar + in_count = lidar.count() + assert in_count > 0, "recording is empty" + + kfs = pgo_keyframes(lidar) + n_kf = kfs.count() + assert n_kf > 0, "PGO emitted no keyframes" + # 60s recording at ~0.5m keyframe spacing -> at least a handful. + assert n_kf >= 5 + + # Loop closure detection: at least one optimized pose should differ + # from its odom-frame counterpart. Without loops, the optimization is + # a no-op and local == optimized for every keyframe. + drifted = sum( + 1 + for obs in kfs + if obs.data.local.translation != obs.data.optimized.translation + or obs.data.local.rotation != obs.data.optimized.rotation + ) + assert drifted > 0, "expected loop closures to drift at least one keyframe" + + corr = keyframes_to_corrections(kfs) + assert corr.count() == n_kf + + # apply_corrections preserves frame count, including pose=None rows. + corrected = apply_corrections(lidar, corr) + out_count = sum(1 for _ in corrected) + assert out_count == in_count diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 543a898f6a..3ae12b5ad9 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections.abc import Callable import time +from typing import Any import rerun as rr import rerun.blueprint as rrb @@ -20,22 +22,24 @@ from dimos.mapping.voxels import VoxelMapTransformer from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.type.observation import Observation from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init -def progress(total: int, label: str = ""): +def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: seen = 0 wall_start: float | None = None last_wall: float | None = None first_ts: float | None = None - def _progress(obs): + def _progress(obs: Observation[Any]) -> None: nonlocal seen, wall_start, last_wall, first_ts now = time.monotonic() if wall_start is None: wall_start = now first_ts = obs.ts + assert first_ts is not None # narrowed by the same `if` above frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 last_wall = now seen += 1 @@ -84,7 +88,7 @@ def main( path: list[tuple[float, float, float]] = [] - def collect_path(obs): + def collect_path(obs: Observation[Any]) -> None: if obs.pose is None: return # Reject placeholder poses at the world origin (translation = 0,0,0). From 45e72da4ff6a391a2534202c6e6cc4f717ff2887 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 21:37:13 +0800 Subject: [PATCH 09/64] pgo cleanup --- dimos/mapping/relocalization/pgo.py | 68 +++++++++++++++++------- dimos/mapping/relocalization/test_pgo.py | 12 ----- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 9b48aa6437..3f5dbbd4ec 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -14,28 +14,47 @@ """PGO drift corrections as composable Stream stages. +A lidar/odom stream comes in with poses that drift over time — the robot's +estimate of where it is in the world slowly diverges from ground truth as +small per-frame errors accumulate. PGO ("pose graph optimization") detects +revisited places via loop closure and pulls the trajectory back into +self-consistency. The result is a stream of *corrections*: a Transform per +keyframe that maps every drifted ("raw") pose to its corrected one. + Pipeline: - lidar: Stream[PointCloud2] - -> pgo_keyframes(...) -> Stream[Keyframe] - -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) - -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) + lidar: Stream[PointCloud2] # pose-stamped scans + -> pgo_keyframes(...) -> Stream[Keyframe] # one per kf + -> keyframes_to_corrections(...) -> Stream[Transform] # world_corrected <- world_raw + -> apply_corrections(any_stream, corrections) -> Stream[T] # obs.pose shuffled + +`pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting +each kept keyframe with both its raw odom pose (`local`) and its optimized +pose (`optimized`) as Transforms. + +`keyframes_to_corrections` is pure arithmetic: per keyframe, the drift +correction is `optimized @ local^-1` (Transform composition). The resulting +stream has one entry per keyframe — sparse in time. -The math: per keyframe, the drift correction is - R_corr = R_global @ R_local.T - t_corr = t_global - R_corr @ t_local -and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, -clipping out-of-range to endpoints. +`make_interpolator` materializes the sparse correction stream into a +ts -> Transform function: SLERP for rotation, linear lerp for translation, +clipping out-of-range queries to the endpoints. -`gtsam` and `open3d` are imported lazily inside hot helpers so importing this module -stays cheap and gtsam-free for consumers that only need `Keyframe` / `apply_corrections`. +`apply_corrections` shuffles `obs.pose` on any stream using the interpolated +correction at each obs.ts. It's read-only on `obs.data`, so the same lidar +stream can be re-applied — or a different recording from the same physical +space, if the corrections generalize. + +`gtsam` is imported lazily inside hot helpers so importing this module +stays cheap and gtsam-free for consumers that only need `Keyframe` / +`apply_corrections`. """ from __future__ import annotations from collections.abc import Callable, Iterator from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, TypeVar +from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack import numpy as np import open3d as o3d # type: ignore[import-untyped] @@ -85,6 +104,24 @@ class PGOConfig(BaseConfig): max_icp_correspondence_dist: float = 1.0 +class PGOKwargs(TypedDict, total=False): + """Typed kwargs for `pgo_keyframes`; mirrors `PGOConfig` fields.""" + + key_pose_delta_trans: float + key_pose_delta_deg: float + loop_search_radius: float + loop_time_thresh: float + loop_score_thresh: float + loop_submap_half_range: int + min_icp_inliers: int + min_keyframes_for_loop_search: int + loop_closure_extra_iterations: int + submap_resolution: float + min_loop_detect_duration: float + max_icp_iterations: int + max_icp_correspondence_dist: float + + @dataclass(frozen=True) class Keyframe: """Keyframe emitted by `pgo_keyframes`. @@ -102,7 +139,7 @@ def pgo_keyframes( stream: Stream[PointCloud2], *, on_frame: Callable[[Any], None] | None = None, - **pgo_cfg: Any, + **pgo_cfg: Unpack[PGOKwargs], ) -> Stream[Keyframe]: """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe.""" cfg = PGOConfig(**pgo_cfg) @@ -190,11 +227,6 @@ def interp(ts: float) -> Transform: return interp -def correction_at(corrections: Stream[Transform], ts: float) -> Transform: - """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" - return make_interpolator(corrections)(ts) - - def apply_corrections( stream: Stream[T], corrections: Stream[Transform], diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/relocalization/test_pgo.py index ce35cbd25a..76f8b378eb 100644 --- a/dimos/mapping/relocalization/test_pgo.py +++ b/dimos/mapping/relocalization/test_pgo.py @@ -28,7 +28,6 @@ _r_t_from_transform, _transform_from_r_t, apply_corrections, - correction_at, keyframes_to_corrections, make_interpolator, pgo_keyframes, @@ -244,17 +243,6 @@ def test_out_of_range_clips_to_endpoints(self) -> None: # In-range midpoint assert interp(6.0).translation.x == pytest.approx(5.0, abs=1e-10) - def test_correction_at_oneshot(self) -> None: - R = np.eye(3) - a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) - b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) - mem = MemoryStore() - stream: Stream[Transform] = mem.stream("corrections", Transform) - stream.append(a, ts=1.0) - stream.append(b, ts=11.0) - result = correction_at(stream, 6.0) - assert result.translation.x == pytest.approx(5.0, abs=1e-10) - class TestApplyCorrections: def test_pure_translation_shifts_poses(self) -> None: From df4435f21caf9a66445a5e8062cea0b544f3a27e Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 21:49:53 +0800 Subject: [PATCH 10/64] visualizing loop closures --- dimos/mapping/relocalization/pgo.py | 55 +++++++++++++++++++++++++++-- dimos/utils/cli/map.py | 26 +++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 3f5dbbd4ec..af94921c9d 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -30,7 +30,9 @@ `pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting each kept keyframe with both its raw odom pose (`local`) and its optimized -pose (`optimized`) as Transforms. +pose (`optimized`) as Transforms. Pass `loop_closures_out=[]` to also +collect each accepted ICP loop edge as a `LoopClosure` (source+target +optimized poses + ICP fitness) — useful for visualizing the pose graph. `keyframes_to_corrections` is pure arithmetic: per keyframe, the drift correction is `optimized @ local^-1` (Transform composition). The resulting @@ -135,13 +137,33 @@ class Keyframe: optimized: Transform +@dataclass(frozen=True) +class LoopClosure: + """An accepted ICP loop-closure edge in the pose graph. + + `source` and `target` are the final optimized body poses (in + `world_corrected`) of the two keyframes the loop connected. `score` is + the ICP fitness used as the loop's translation variance — lower is a + tighter match. + """ + + source: Transform + target: Transform + score: float + + def pgo_keyframes( stream: Stream[PointCloud2], *, on_frame: Callable[[Any], None] | None = None, + loop_closures_out: list[LoopClosure] | None = None, **pgo_cfg: Unpack[PGOKwargs], ) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe.""" + """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe. + + If `loop_closures_out` is provided, accepted loop edges are appended to + it (in detection order) using each keyframe's final optimized pose. + """ cfg = PGOConfig(**pgo_cfg) pgo = _PGO(cfg) @@ -167,6 +189,8 @@ def pgo_keyframes( out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) for kf in pgo.finalize(): out.append(kf, ts=kf.ts) + if loop_closures_out is not None: + loop_closures_out.extend(pgo.loop_closures()) return out @@ -312,6 +336,7 @@ def __init__(self, config: PGOConfig) -> None: self._cfg = config self._key_poses: list[_KeyPose] = [] self._pending_loops: list[_LoopPair] = [] + self._accepted_loops: list[_LoopPair] = [] self._last_loop_ts: float | None = None self._world_correction: gtsam.Pose3 = gtsam.Pose3() # identity @@ -367,6 +392,31 @@ def finalize(self) -> list[Keyframe]: ) return out + def loop_closures(self) -> list[LoopClosure]: + """Accepted loop edges, resolved to final optimized poses.""" + out: list[LoopClosure] = [] + for pair in self._accepted_loops: + src = self._key_poses[pair.source] + tgt = self._key_poses[pair.target] + out.append( + LoopClosure( + source=_pose3_to_transform( + src.optimized, + ts=src.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + target=_pose3_to_transform( + tgt.optimized, + ts=tgt.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + score=pair.score, + ) + ) + return out + def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: if not self._key_poses: return True @@ -521,6 +571,7 @@ def _smooth_and_update(self) -> None: np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) ) self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise)) + self._accepted_loops.extend(self._pending_loops) self._pending_loops.clear() self._isam2.update(self._graph, self._values) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 3ae12b5ad9..5bc4d29c52 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -103,6 +103,7 @@ def collect_path(obs: Observation[Any]) -> None: from scipy.spatial.transform import Rotation from dimos.mapping.relocalization.pgo import ( + LoopClosure, keyframes_to_corrections, make_interpolator, pgo_keyframes, @@ -112,7 +113,12 @@ def collect_path(obs: Observation[Any]) -> None: total = lidar.count() print("running PGO twopass map...") - keyframes = pgo_keyframes(lidar, on_frame=progress(total, "pgo pass 1 (optimizing)")) + loops: list[LoopClosure] = [] + keyframes = pgo_keyframes( + lidar, + on_frame=progress(total, "pgo pass 1 (optimizing)"), + loop_closures_out=loops, + ) corrections = keyframes_to_corrections(keyframes) interp = make_interpolator(corrections) @@ -167,6 +173,24 @@ def collect_path(obs: Observation[Any]) -> None: rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), static=True, ) + rr.log( + "world/pgo_map/keyframes", + rr.Points3D(positions=pgo_path, colors=[[255, 200, 0]], radii=[0.05]), + static=True, + ) + if pgo and loops: + loop_strips = [ + [ + (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), + (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), + ] + for lc in loops + ] + rr.log( + "world/pgo_map/loop_closures", + rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.02]), + static=True, + ) if export and pgo_map is not None: from pathlib import Path From ebdf1ec6315d6f17a0543041b9fc7a72b218cc58 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 21:58:14 +0800 Subject: [PATCH 11/64] pgo vis fixed --- dimos/utils/cli/map.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 5bc4d29c52..440d824111 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -167,28 +167,38 @@ def collect_path(obs: Observation[Any]) -> None: ) if pgo_map is not None: rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( "world/pgo_map/path", rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), static=True, ) + hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( "world/pgo_map/keyframes", - rr.Points3D(positions=pgo_path, colors=[[255, 200, 0]], radii=[0.05]), + rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.05]), static=True, ) if pgo and loops: loop_strips = [ [ - (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), - (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), + ( + lc.source.translation.x, + lc.source.translation.y, + lc.source.translation.z + STEM_HEIGHT, + ), + ( + lc.target.translation.x, + lc.target.translation.y, + lc.target.translation.z + STEM_HEIGHT, + ), ] for lc in loops ] rr.log( "world/pgo_map/loop_closures", - rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.02]), + rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]), static=True, ) From 71762c1c97788ca35514b5ea915d93300d9b9f1f Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 22:25:07 +0800 Subject: [PATCH 12/64] much faster pgo --- dimos/robot/cli/dimos.py | 16 +++++++-- dimos/utils/cli/map.py | 78 +++++++++++++++++++++++++++++----------- 2 files changed, 72 insertions(+), 22 deletions(-) diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 5607c8b1ca..730509180b 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -693,7 +693,12 @@ def map_cmd( "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" ), pgo: bool = typer.Option( - False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + False, + "--pgo", + help="Run pose graph optimization and rebuild from spatially-deduped frames", + ), + pgo_tol: float = typer.Option( + 0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)" ), block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" @@ -701,7 +706,12 @@ def map_cmd( export: bool = typer.Option( False, "--export", - help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + help="Export PGO map to ./.pc2.lcm in cwd (implies --pgo)", + ), + full_pgo: bool = typer.Option( + False, + "--full-pgo", + help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: @@ -713,8 +723,10 @@ def map_cmd( voxel=voxel, device=device, pgo=pgo, + pgo_tol=pgo_tol, block_count=block_count, export=export, + full_pgo=full_pgo, no_gui=no_gui, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 440d824111..8b23df1e25 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -65,7 +65,12 @@ def main( "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" ), pgo: bool = typer.Option( - False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + False, + "--pgo", + help="Run pose graph optimization and rebuild from spatially-deduped frames", + ), + pgo_tol: float = typer.Option( + 0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)" ), block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" @@ -73,12 +78,17 @@ def main( export: bool = typer.Option( False, "--export", - help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + help="Export PGO map to ./.pc2.lcm in cwd (implies --pgo)", + ), + full_pgo: bool = typer.Option( + False, + "--full-pgo", + help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: db_path = resolve_named_path(dataset, ".db") - if export: + if export or full_pgo: pgo = True store = SqliteStore(path=db_path) @@ -99,9 +109,6 @@ def collect_path(obs: Observation[Any]) -> None: pgo_map = None pgo_path: list[tuple[float, float, float]] = [] if pgo: - import numpy as np - from scipy.spatial.transform import Rotation - from dimos.mapping.relocalization.pgo import ( LoopClosure, keyframes_to_corrections, @@ -109,7 +116,6 @@ def collect_path(obs: Observation[Any]) -> None: pgo_keyframes, ) from dimos.mapping.voxels import VoxelGrid - from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 total = lidar.count() print("running PGO twopass map...") @@ -126,27 +132,53 @@ def collect_path(obs: Observation[Any]) -> None: kf_t = kf_obs.data.optimized.translation pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) - pass2_pb = progress(total, "pgo pass 2 (rebuilding)") + # Canonical PGO rebuild: bucket frames by spatial cell using the raw + # odom pose, keep the latest per cell, transform with the interpolated + # correction. obs.data is not touched in the dedup loop so it stays + # cheap (no pointcloud loading). + seen: dict[tuple[int, int, int], Any] = {} + for obs in lidar: + if obs.pose is None: + continue + if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + continue + cell = ( + int(obs.pose[0] / pgo_tol), + int(obs.pose[1] / pgo_tol), + int(obs.pose[2] / pgo_tol), + ) + seen[cell] = obs + + n_kept = len(seen) + pct = 100 * n_kept / total if total else 0 + print(f"pgo rebuild: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") + + pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)") grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) try: - for obs in lidar: + for obs in seen.values(): pass2_pb(obs) - if obs.pose is None: + if len(obs.data) == 0: continue - pts, _ = obs.data.as_numpy() - if len(pts) == 0: - continue - tf = interp(obs.ts) - R = Rotation.from_quat( - [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] - ).as_matrix() - t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - corrected = (R @ pts[:, :3].T).T + t - grid.add_frame(PointCloud2.from_numpy(corrected.astype(np.float32))) + grid.add_frame(obs.data.transform(interp(obs.ts))) pgo_map = grid.get_global_pointcloud2() finally: grid.dispose() + full_pgo_map = None + if full_pgo: + full_pb = progress(total, "full pgo (rebuilding)") + full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) + try: + for obs in lidar: + full_pb(obs) + if obs.pose is None or len(obs.data) == 0: + continue + full_grid.add_frame(obs.data.transform(interp(obs.ts))) + full_pgo_map = full_grid.get_global_pointcloud2() + finally: + full_grid.dispose() + global_map = ( lidar.tap(collect_path) .transform(VoxelMapTransformer(voxel_size=voxel, device=device)) @@ -167,6 +199,12 @@ def collect_path(obs: Observation[Any]) -> None: ) if pgo_map is not None: rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + if full_pgo_map is not None: + rr.log( + "world/full_pgo_map/pointcloud", + full_pgo_map.to_rerun(size=voxel), + static=True, + ) STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( From d1218361267c65733f7cfbae869af9ea693cb60f Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 22:46:17 +0800 Subject: [PATCH 13/64] PointCloud2: drop has_colors() side effect from transform The colors-copy block in `PointCloud2.transform` calls `self.pointcloud` to check `has_colors()`, which forces a tensor->legacy conversion on every invocation. With `pgo --full-pgo` rebuilding from hundreds of lidar frames, that hidden allocation dominates the per-frame cost. The colors path was lidar-irrelevant (lidar clouds have no colors anyway) and the feature it was meant to support never landed; remove it so transform() stays a clean numpy round-trip. --- dimos/msgs/sensor_msgs/PointCloud2.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 598cb0b094..570d33f772 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -373,10 +373,6 @@ def transform(self, tf: Transform) -> PointCloud2: new_pcd = o3d.geometry.PointCloud() new_pcd.points = o3d.utility.Vector3dVector(transformed_xyz) - # Copy colors if available - if self.pointcloud.has_colors(): - new_pcd.colors = self.pointcloud.colors - return PointCloud2( pointcloud=new_pcd, frame_id=tf.frame_id, From 626a4fc854bb8bbcb97c053747676b1c9fd41459 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 11:56:57 +0800 Subject: [PATCH 14/64] ci: install gtsam-extended in tests group so import gtsam works --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1125d433f3..a6e32f409e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -402,7 +402,7 @@ tests = [ # Deps the lint job swaps for stubs (`stubs//` or `types-*`): # the real packages are needed at test time, stubs cover mypy. - "dimos[apriltag,psql,drone,cpu]", + "dimos[apriltag,mapping,psql,drone,cpu]", "mujoco>=3.3.4", "pygame>=2.6.1", "python-can>=4", diff --git a/uv.lock b/uv.lock index 7d50e3d818..8039736ee7 100644 --- a/uv.lock +++ b/uv.lock @@ -2271,7 +2271,7 @@ project-deps = [ ] tests = [ { name = "coverage" }, - { name = "dimos", extra = ["apriltag", "cpu", "drone", "psql", "visualization", "web"] }, + { name = "dimos", extra = ["apriltag", "cpu", "drone", "mapping", "psql", "visualization", "web"] }, { name = "gdown" }, { name = "googlemaps" }, { name = "hydra-core" }, @@ -2312,7 +2312,7 @@ tests = [ ] tests-self-hosted = [ { name = "coverage" }, - { name = "dimos", extra = ["agents", "apriltag", "cpu", "drone", "manipulation", "misc", "perception", "psql", "sim", "unitree", "visualization", "web"] }, + { name = "dimos", extra = ["agents", "apriltag", "cpu", "drone", "manipulation", "mapping", "misc", "perception", "psql", "sim", "unitree", "visualization", "web"] }, { name = "gdown" }, { name = "googlemaps" }, { name = "hydra-core" }, @@ -2558,7 +2558,7 @@ project-deps = [ ] tests = [ { name = "coverage", specifier = ">=7.0" }, - { name = "dimos", extras = ["apriltag", "psql", "drone", "cpu"] }, + { name = "dimos", extras = ["apriltag", "mapping", "psql", "drone", "cpu"] }, { name = "dimos", extras = ["web", "visualization"] }, { name = "gdown", specifier = "==6.0.0" }, { name = "googlemaps", specifier = ">=4.10.0" }, @@ -2601,7 +2601,7 @@ tests = [ tests-self-hosted = [ { name = "coverage", specifier = ">=7.0" }, { name = "dimos", extras = ["agents", "perception", "manipulation", "sim", "unitree", "misc"] }, - { name = "dimos", extras = ["apriltag", "psql", "drone", "cpu"] }, + { name = "dimos", extras = ["apriltag", "mapping", "psql", "drone", "cpu"] }, { name = "dimos", extras = ["web", "visualization"] }, { name = "gdown", specifier = "==6.0.0" }, { name = "googlemaps", specifier = ">=4.10.0" }, From 2f46b38443c180d711f89a24135cd041844be072 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 15:05:07 +0800 Subject: [PATCH 15/64] markers on dimos map --- dimos/msgs/sensor_msgs/PointCloud2.py | 9 +- .../detection/type/detection3d/marker.py | 42 ++++ .../perception/fiducial/marker_transformer.py | 176 +++++++++++++++ dimos/robot/cli/dimos.py | 10 + dimos/utils/cli/map.py | 207 +++++++++++++----- 5 files changed, 388 insertions(+), 56 deletions(-) create mode 100644 dimos/perception/detection/type/detection3d/marker.py create mode 100644 dimos/perception/fiducial/marker_transformer.py diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 570d33f772..add056b1a3 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -686,7 +686,6 @@ def to_rerun( voxel_size: float = 0.05, colors: list[int] | None = None, mode: str = "spheres", - size: float | None = None, fill_mode: str = "solid", bottom_cutoff: float | None = None, **kwargs: object, @@ -699,7 +698,6 @@ def to_rerun( If None, uses height-based turbo colormap via class_ids (requires register_colormap_annotation() called once). mode: "points" for raw points, "boxes" for cubes (default), or "spheres" for sized spheres - size: Box size for mode="boxes" (e.g., voxel_size). Defaults to radii*2. fill_mode: Fill mode for boxes - "solid", "majorwireframe", or "densewireframe" **kwargs: Additional args (ignored for compatibility) @@ -732,13 +730,10 @@ def to_rerun( if mode == "points": return rr.Points3D( - positions=points, - colors=point_colors, - class_ids=class_ids, + positions=points, colors=point_colors, class_ids=class_ids, sizes=voxel_size ) elif mode == "boxes": - box_size = size if size is not None else voxel_size - half = box_size / 2 + half = voxel_size / 2 return rr.Boxes3D( centers=points, half_sizes=[half, half, half], diff --git a/dimos/perception/detection/type/detection3d/marker.py b/dimos/perception/detection/type/detection3d/marker.py new file mode 100644 index 0000000000..a162814d6f --- /dev/null +++ b/dimos/perception/detection/type/detection3d/marker.py @@ -0,0 +1,42 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +import numpy as np + +from dimos.perception.detection.type.detection3d.bbox import Detection3DBBox + + +@dataclass +class Detection3DMarker(Detection3DBBox): + """Fiducial marker (ArUco / AprilTag) detection with a world-frame pose. + + ``bbox`` is the axis-aligned 2D bbox around the four detected corners so + crop/draw helpers from ``Detection2DBBox`` work as-is. ``center`` and + ``orientation`` carry the marker pose in ``frame_id`` (typically + ``"world"``); ``transform`` is the camera-in-world transform used to + compose it. + """ + + marker_id: int = -1 + corners_px: np.ndarray = field(default_factory=lambda: np.zeros((4, 2), dtype=np.float32)) + dictionary: str = "" + + def to_repr_dict(self) -> dict[str, Any]: + parent = super().to_repr_dict() + return {**parent, "marker_id": str(self.marker_id), "dict": self.dictionary} diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py new file mode 100644 index 0000000000..a3839ed9bb --- /dev/null +++ b/dimos/perception/fiducial/marker_transformer.py @@ -0,0 +1,176 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ArUco / AprilTag detection as a memory2 transformer. + +Wraps the pure helpers in :mod:`dimos.perception.fiducial.marker_tf_module` +and emits one :class:`Detection3DMarker` observation per detected marker, with +``.pose`` composed into world frame from the upstream observation's +camera-in-world pose. The companion module :class:`MarkerTfModule` remains +the right choice for live TF publication; this transformer is for offline / +mem2-stream composition. + +Skips frames where the upstream observation has no ``.pose`` (debug log): +without a camera-in-world pose, we can't honor the "always world-frame" +output contract. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from dimos.memory2.transform import Transformer +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo +from dimos.msgs.sensor_msgs.Image import Image +from dimos.perception.detection.type.detection3d.marker import Detection3DMarker +from dimos.perception.fiducial.marker_tf_module import ( + camera_info_to_cv_matrices, + create_aruco_detector, + estimate_marker_pose, + rvec_tvec_to_transform, +) +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + from collections.abc import Iterator + + from dimos.memory2.type.observation import Observation + +logger = setup_logger() + + +def _pose_tuple_to_transform( + pose: tuple[float, float, float, float, float, float, float], + *, + frame_id: str, + child_frame_id: str, + ts: float, +) -> Transform: + x, y, z, qx, qy, qz, qw = pose + return Transform( + translation=Vector3(x, y, z), + rotation=Quaternion(qx, qy, qz, qw), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + + +class DetectMarkers(Transformer[Image, Detection3DMarker]): + """Detect fiducial markers and emit one world-pose observation per marker.""" + + def __init__( + self, + camera_info: CameraInfo, + marker_length_m: float, + aruco_dictionary: str = "DICT_APRILTAG_36h11", + world_frame: str = "world", + ) -> None: + if marker_length_m <= 0: + raise ValueError(f"marker_length_m must be > 0, got {marker_length_m}") + self.camera_info = camera_info + self.marker_length_m = marker_length_m + self.aruco_dictionary = aruco_dictionary + self.world_frame = world_frame + self._detector = create_aruco_detector(aruco_dictionary) + self._cam_mtx, self._dist = camera_info_to_cv_matrices(camera_info) + + def __call__( + self, upstream: Iterator[Observation[Image]] + ) -> Iterator[Observation[Detection3DMarker]]: + info = self.camera_info + marker_size = Vector3(self.marker_length_m, self.marker_length_m, 0.0) + + for obs in upstream: + if obs.pose is None: + logger.debug("DetectMarkers: obs %s has no .pose; skipping", obs.id) + continue + + image = obs.data + if ( + info.width + and info.height + and (image.width != info.width or image.height != info.height) + ): + logger.debug( + "DetectMarkers: image %sx%s != CameraInfo %sx%s; skip", + image.width, + image.height, + info.width, + info.height, + ) + continue + + gray = image.to_grayscale().as_numpy() + corners, ids, _ = self._detector.detectMarkers(gray) + if ids is None or len(ids) == 0: + continue + + t_world_optical = _pose_tuple_to_transform( + obs.pose, + frame_id=self.world_frame, + child_frame_id="optical", + ts=obs.ts, + ) + + for corner_set, mid_arr in zip(corners, ids, strict=True): + mid = int(mid_arr[0]) + pose = estimate_marker_pose( + corner_set, + self.marker_length_m, + self._cam_mtx, + self._dist, + distortion_model=info.distortion_model, + ) + if pose is None: + continue + rvec, tvec = pose + t_optical_marker = rvec_tvec_to_transform( + rvec, + tvec, + frame_id="optical", + child_frame_id=f"marker_{mid}", + ts=obs.ts, + ) + t_world_marker = t_world_optical + t_optical_marker + + corners_2d = corner_set.reshape(4, 2).astype(np.float32) + xy_min = corners_2d.min(axis=0) + xy_max = corners_2d.max(axis=0) + bbox = (float(xy_min[0]), float(xy_min[1]), float(xy_max[0]), float(xy_max[1])) + + det = Detection3DMarker( + bbox=bbox, + track_id=mid, + class_id=mid, + confidence=1.0, + name=f"marker_{mid}", + ts=obs.ts, + image=image, + center=t_world_marker.translation, + size=marker_size, + transform=t_world_optical, + frame_id=self.world_frame, + orientation=t_world_marker.rotation, + marker_id=mid, + corners_px=corners_2d, + dictionary=self.aruco_dictionary, + ) + + yield obs.derive(data=det, pose=t_world_marker).tag(marker_id=mid) diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 730509180b..23147e9380 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -714,6 +714,14 @@ def map_cmd( help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), + markers: bool = typer.Option( + False, + "--markers", + help="Detect AprilTag markers in color_image and overlay them in rerun", + ), + marker_size: float = typer.Option( + 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" + ), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main @@ -728,6 +736,8 @@ def map_cmd( export=export, full_pgo=full_pgo, no_gui=no_gui, + markers=markers, + marker_size=marker_size, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 8b23df1e25..bea304a538 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -20,12 +20,22 @@ import rerun.blueprint as rrb import typer -from dimos.mapping.voxels import VoxelMapTransformer +from dimos.mapping.voxels import VoxelGrid from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.transform import QualityWindow from dimos.memory2.type.observation import Observation +from dimos.memory2.vis.color import Color +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.Image import Image +from dimos.perception.fiducial.marker_transformer import DetectMarkers +from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init +PATH_THICKNESS = 0.01 + def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: seen = 0 @@ -70,11 +80,11 @@ def main( help="Run pose graph optimization and rebuild from spatially-deduped frames", ), pgo_tol: float = typer.Option( - 0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)" - ), - block_count: int = typer.Option( - 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" + 0.3, + "--pgo-tol", + help="Spatial dedup tolerance (meters); applies to both raw and --pgo maps", ), + block_count: int = typer.Option(2_000_000, "--block-count", help="VoxelBlockGrid capacity"), export: bool = typer.Option( False, "--export", @@ -86,6 +96,14 @@ def main( help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), + markers: bool = typer.Option( + False, + "--markers", + help="Detect AprilTag markers in color_image and overlay them in rerun", + ), + marker_size: float = typer.Option( + 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" + ), ) -> None: db_path = resolve_named_path(dataset, ".db") if export or full_pgo: @@ -96,18 +114,39 @@ def main( print(lidar.summary()) - path: list[tuple[float, float, float]] = [] + total = lidar.count() - def collect_path(obs: Observation[Any]) -> None: + # Spatial dedup: bucket frames by 3D cell using the raw pose, keep the + # latest per cell. Shared by raw and PGO rebuilds. Doesn't touch obs.data + # so it stays cheap (no pointcloud loading). + seen: dict[tuple[int, int, int], Observation[Any]] = {} + for obs in lidar: if obs.pose is None: - return - # Reject placeholder poses at the world origin (translation = 0,0,0). + continue + # Reject placeholder poses at the world origin. if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: - return - path.append((obs.pose[0], obs.pose[1], obs.pose[2])) + continue + cell = ( + int(obs.pose[0] / pgo_tol), + int(obs.pose[1] / pgo_tol), + int(obs.pose[2] / pgo_tol), + ) + seen[cell] = obs + + n_kept = len(seen) + pct = 100 * n_kept / total if total else 0 + print(f"dedup: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") + + # Dict insertion order = lidar iteration order = chronological. + # `seen` only contains entries with non-None poses (filtered above). + path: list[tuple[float, float, float]] = [ + (obs.pose[0], obs.pose[1], obs.pose[2]) for obs in seen.values() if obs.pose is not None + ] pgo_map = None pgo_path: list[tuple[float, float, float]] = [] + loops: list[Any] = [] + interp: Any | None = None if pgo: from dimos.mapping.relocalization.pgo import ( LoopClosure, @@ -115,16 +154,15 @@ def collect_path(obs: Observation[Any]) -> None: make_interpolator, pgo_keyframes, ) - from dimos.mapping.voxels import VoxelGrid - total = lidar.count() print("running PGO twopass map...") - loops: list[LoopClosure] = [] + pgo_loops: list[LoopClosure] = [] keyframes = pgo_keyframes( lidar, on_frame=progress(total, "pgo pass 1 (optimizing)"), - loop_closures_out=loops, + loop_closures_out=pgo_loops, ) + loops = list(pgo_loops) corrections = keyframes_to_corrections(keyframes) interp = make_interpolator(corrections) @@ -132,27 +170,6 @@ def collect_path(obs: Observation[Any]) -> None: kf_t = kf_obs.data.optimized.translation pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) - # Canonical PGO rebuild: bucket frames by spatial cell using the raw - # odom pose, keep the latest per cell, transform with the interpolated - # correction. obs.data is not touched in the dedup loop so it stays - # cheap (no pointcloud loading). - seen: dict[tuple[int, int, int], Any] = {} - for obs in lidar: - if obs.pose is None: - continue - if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: - continue - cell = ( - int(obs.pose[0] / pgo_tol), - int(obs.pose[1] / pgo_tol), - int(obs.pose[2] / pgo_tol), - ) - seen[cell] = obs - - n_kept = len(seen) - pct = 100 * n_kept / total if total else 0 - print(f"pgo rebuild: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") - pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)") grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) try: @@ -167,6 +184,7 @@ def collect_path(obs: Observation[Any]) -> None: full_pgo_map = None if full_pgo: + assert interp is not None full_pb = progress(total, "full pgo (rebuilding)") full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) try: @@ -179,43 +197,83 @@ def collect_path(obs: Observation[Any]) -> None: finally: full_grid.dispose() - global_map = ( - lidar.tap(collect_path) - .transform(VoxelMapTransformer(voxel_size=voxel, device=device)) - .tap(progress(lidar.count(), "reconstructing global map")) - .last() - .data - ) + # Raw map: same dedup'd frames, no PGO correction. + raw_pb = progress(n_kept, "reconstructing global map") + raw_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) + try: + for obs in seen.values(): + raw_pb(obs) + if len(obs.data) == 0: + continue + raw_grid.add_frame(obs.data) + global_map = raw_grid.get_global_pointcloud2() + finally: + raw_grid.dispose() + + marker_dets: list[Observation[Any]] = [] + if markers: + color_image = store.stream("color_image", Image) + + def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: + # obs.pose is base_link-in-world; DetectMarkers wants optical-in-world. + if obs.pose is None: + return obs.derive(data=obs.data) + x, y, z, qx, qy, qz, qw = obs.pose + t_world_base = Transform( + translation=Vector3(x, y, z), + rotation=Quaternion(qx, qy, qz, qw), + frame_id="world", + child_frame_id="base_link", + ts=obs.ts, + ) + return obs.derive(data=obs.data, pose=t_world_base + BASE_TO_OPTICAL) + + xf = DetectMarkers( + camera_info=_camera_info_static(), + marker_length_m=marker_size, + ) + # 2Hz quality-gated: keep only the sharpest frame per 0.5s window. + marker_dets = ( + color_image.tap(progress(color_image.count(), "filtering frames")) + .transform(QualityWindow(lambda img: img.sharpness, window=0.5)) + .map(_lift_pose_to_optical) + .transform(xf) + .to_list() + ) + unique = sorted({obs.data.marker_id for obs in marker_dets}) + print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}") if not no_gui: rerun_init("dimos map tool", spawn=True) rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) + rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True) if path: rr.log( "world/raw_map/path", - rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.05]), + rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[PATH_THICKNESS]), static=True, ) if pgo_map is not None: - rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(voxel_size=voxel / 2), static=True) if full_pgo_map is not None: rr.log( "world/full_pgo_map/pointcloud", - full_pgo_map.to_rerun(size=voxel), + full_pgo_map.to_rerun(voxel_size=voxel / 2), static=True, ) STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( "world/pgo_map/path", - rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), + rr.LineStrips3D( + strips=[pgo_path], colors=[[255, 255, 255]], radii=[PATH_THICKNESS] + ), static=True, ) hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( "world/pgo_map/keyframes", - rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.05]), + rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]), static=True, ) if pgo and loops: @@ -239,6 +297,54 @@ def collect_path(obs: Observation[Any]) -> None: rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]), static=True, ) + if marker_dets: + half = marker_size / 2.0 + n = len(marker_dets) + fill_half = [(half, half, 0.005)] * n + # Outline sits just outside the fill so both stay visible. + outline_bump = marker_size * 0.05 + outline_half = [(half + outline_bump, half + outline_bump, 0.006)] * n + centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets] + quaternions = [ + ( + d.data.orientation.x, + d.data.orientation.y, + d.data.orientation.z, + d.data.orientation.w, + ) + for d in marker_dets + ] + unique_ids = sorted({d.data.marker_id for d in marker_dets}) + id_to_color = { + mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() + for i, mid in enumerate(unique_ids) + } + colors = [id_to_color[d.data.marker_id] for d in marker_dets] + labels = [f"id={d.data.marker_id}" for d in marker_dets] + rr.log( + "world/markers/fill", + rr.Boxes3D( + centers=centers, + half_sizes=fill_half, + quaternions=quaternions, + colors=colors, + fill_mode=rr.components.FillMode.Solid, + labels=labels, + ), + static=True, + ) + rr.log( + "world/markers/outline", + rr.Boxes3D( + centers=centers, + half_sizes=outline_half, + quaternions=quaternions, + colors=[(255, 255, 255)] * n, + fill_mode=rr.components.FillMode.MajorWireframe, + radii=0.002, + ), + static=True, + ) if export and pgo_map is not None: from pathlib import Path @@ -255,3 +361,6 @@ def collect_path(obs: Observation[Any]) -> None: if __name__ == "__main__": typer.run(main) + typer.run(main) + typer.run(main) + typer.run(main) From 82e2dab13fad12eee902c86575fd61734b19b018 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 15:12:13 +0800 Subject: [PATCH 16/64] map detects markers correctly --- dimos/utils/cli/map.py | 93 ++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index bea304a538..6fb129a4bd 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -25,16 +25,14 @@ from dimos.memory2.transform import QualityWindow from dimos.memory2.type.observation import Observation from dimos.memory2.vis.color import Color -from dimos.msgs.geometry_msgs.Quaternion import Quaternion from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.sensor_msgs.Image import Image from dimos.perception.fiducial.marker_transformer import DetectMarkers -from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static +from dimos.robot.unitree.go2.connection import _camera_info_static from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init -PATH_THICKNESS = 0.01 +PATH_THICKNESS = 0.005 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: @@ -212,31 +210,19 @@ def main( marker_dets: list[Observation[Any]] = [] if markers: + # Image observations in dimos recordings are stamped with + # frame_id="camera_optical", so obs.pose is already optical-in-world + # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). + # No mount composition needed. color_image = store.stream("color_image", Image) - - def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: - # obs.pose is base_link-in-world; DetectMarkers wants optical-in-world. - if obs.pose is None: - return obs.derive(data=obs.data) - x, y, z, qx, qy, qz, qw = obs.pose - t_world_base = Transform( - translation=Vector3(x, y, z), - rotation=Quaternion(qx, qy, qz, qw), - frame_id="world", - child_frame_id="base_link", - ts=obs.ts, - ) - return obs.derive(data=obs.data, pose=t_world_base + BASE_TO_OPTICAL) - xf = DetectMarkers( camera_info=_camera_info_static(), marker_length_m=marker_size, ) # 2Hz quality-gated: keep only the sharpest frame per 0.5s window. marker_dets = ( - color_image.tap(progress(color_image.count(), "filtering frames")) + color_image.tap(progress(color_image.count(), "detecting markers")) .transform(QualityWindow(lambda img: img.sharpness, window=0.5)) - .map(_lift_pose_to_optical) .transform(xf) .to_list() ) @@ -272,7 +258,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: ) hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( - "world/pgo_map/keyframes", + "world/pgo_map/pgo/keyframes", rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]), static=True, ) @@ -293,8 +279,8 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: for lc in loops ] rr.log( - "world/pgo_map/loop_closures", - rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]), + "world/pgo_map/pgo/loop_closures", + rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.025]), static=True, ) if marker_dets: @@ -322,7 +308,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: colors = [id_to_color[d.data.marker_id] for d in marker_dets] labels = [f"id={d.data.marker_id}" for d in marker_dets] rr.log( - "world/markers/fill", + "world/raw_map/markers/fill", rr.Boxes3D( centers=centers, half_sizes=fill_half, @@ -334,7 +320,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: static=True, ) rr.log( - "world/markers/outline", + "world/raw_map/markers/outline", rr.Boxes3D( centers=centers, half_sizes=outline_half, @@ -346,6 +332,61 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: static=True, ) + if interp is not None: + # PGO-corrected marker poses. interp(ts) maps raw_world → + # pgo_world; composing with each raw marker transform lifts + # it into the corrected frame so it lines up with pgo_map. + pgo_centers: list[tuple[float, float, float]] = [] + pgo_quaternions: list[tuple[float, float, float, float]] = [] + for d in marker_dets: + raw_tf = Transform( + translation=d.data.center, + rotation=d.data.orientation, + frame_id="world", + child_frame_id=f"marker_{d.data.marker_id}", + ts=d.ts, + ) + corrected = interp(d.ts) + raw_tf + pgo_centers.append( + ( + corrected.translation.x, + corrected.translation.y, + corrected.translation.z, + ) + ) + pgo_quaternions.append( + ( + corrected.rotation.x, + corrected.rotation.y, + corrected.rotation.z, + corrected.rotation.w, + ) + ) + rr.log( + "world/pgo_map/markers/fill", + rr.Boxes3D( + centers=pgo_centers, + half_sizes=fill_half, + quaternions=pgo_quaternions, + colors=colors, + fill_mode=rr.components.FillMode.Solid, + labels=labels, + ), + static=True, + ) + rr.log( + "world/pgo_map/markers/outline", + rr.Boxes3D( + centers=pgo_centers, + half_sizes=outline_half, + quaternions=pgo_quaternions, + colors=[(255, 255, 255)] * n, + fill_mode=rr.components.FillMode.MajorWireframe, + radii=0.002, + ), + static=True, + ) + if export and pgo_map is not None: from pathlib import Path From b98b506c31fe35eee1c50759c8205f26beb538ab Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 15:29:36 +0800 Subject: [PATCH 17/64] maptool speed limits --- dimos/memory2/transform.py | 61 ++++++++++++++++++++++++++++++++++++++ dimos/robot/cli/dimos.py | 12 ++++++++ dimos/utils/cli/map.py | 36 ++++++++++++++++------ 3 files changed, 100 insertions(+), 9 deletions(-) diff --git a/dimos/memory2/transform.py b/dimos/memory2/transform.py index 6894807627..a79ccf36b8 100644 --- a/dimos/memory2/transform.py +++ b/dimos/memory2/transform.py @@ -191,6 +191,67 @@ def _speed(upstream: Iterator[Observation[Any]]) -> Iterator[Observation[float]] return FnIterTransformer(_speed) +class SpeedLimit(Transformer[T, T]): + """Pass through observations whose linear (and optionally angular) speed is low. + + Linear: ``||Δtranslation|| / Δt`` in m/s, must be ≤ ``max_mps``. + Angular: rotation-angle-between-quaternions / Δt in deg/s, must be + ≤ ``max_dps`` when set. + + Both metrics are computed between consecutive observations in stream + order. Use to drop motion-blurry frames from fiducial / OCR / + SLAM-frontend pipelines. + + First observation is dropped (no reference). Observations with no + ``.pose`` are skipped. ``Δt ≤ 0`` (duplicate / non-monotonic + timestamps) is treated as unknown and dropped. + + Note: the effective sampling interval is whatever upstream provides. + Chain after ``QualityWindow(window=Δt)`` to measure motion over a + fixed time window instead of per-frame. + """ + + def __init__(self, max_mps: float, max_dps: float | None = None) -> None: + if max_mps <= 0: + raise ValueError(f"max_mps must be > 0, got {max_mps}") + if max_dps is not None and max_dps <= 0: + raise ValueError(f"max_dps must be > 0 when set, got {max_dps}") + self.max_mps = max_mps + self.max_dps = max_dps + + def __call__(self, upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + prev: Observation[T] | None = None + max_mps = self.max_mps + max_dps = self.max_dps + max_rps = math.radians(max_dps) if max_dps is not None else None + for obs in upstream: + if obs.pose is None: + continue + if prev is not None and prev.pose is not None: + dt = obs.ts - prev.ts + if dt > 0: + dx = obs.pose[0] - prev.pose[0] + dy = obs.pose[1] - prev.pose[1] + dz = obs.pose[2] - prev.pose[2] + v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt + ok = v <= max_mps + if ok and max_rps is not None: + # rotation angle between two unit quaternions. + # |q · q'| collapses the double-cover sign ambiguity; + # clamp guards against numerical drift past 1. + dot = ( + obs.pose[3] * prev.pose[3] + + obs.pose[4] * prev.pose[4] + + obs.pose[5] * prev.pose[5] + + obs.pose[6] * prev.pose[6] + ) + angle = 2.0 * math.acos(min(1.0, abs(dot))) + ok = (angle / dt) <= max_rps + if ok: + yield obs + prev = obs + + def smooth(window: int) -> FnIterTransformer[float, float]: """Sliding window average over obs.data (must be numeric).""" diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 23147e9380..d26d28d771 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -722,6 +722,16 @@ def map_cmd( marker_size: float = typer.Option( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), + marker_max_speed: float = typer.Option( + 0.05, + "--marker-max-speed", + help="Skip frames where robot is moving faster than this (m/s); 0 disables", + ), + marker_max_rot_rate: float = typer.Option( + 15.0, + "--marker-max-rot-rate", + help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", + ), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main @@ -738,6 +748,8 @@ def map_cmd( no_gui=no_gui, markers=markers, marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 6fb129a4bd..a6cafccd09 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -22,7 +22,8 @@ from dimos.mapping.voxels import VoxelGrid from dimos.memory2.store.sqlite import SqliteStore -from dimos.memory2.transform import QualityWindow +from dimos.memory2.stream import Stream +from dimos.memory2.transform import QualityWindow, SpeedLimit from dimos.memory2.type.observation import Observation from dimos.memory2.vis.color import Color from dimos.msgs.geometry_msgs.Transform import Transform @@ -32,7 +33,7 @@ from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init -PATH_THICKNESS = 0.005 +PATH_THICKNESS = 0.01 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: @@ -102,6 +103,16 @@ def main( marker_size: float = typer.Option( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), + marker_max_speed: float = typer.Option( + 0.3, + "--marker-max-speed", + help="Skip frames where robot is moving faster than this (m/s); 0 disables", + ), + marker_max_rot_rate: float = typer.Option( + 30.0, + "--marker-max-rot-rate", + help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", + ), ) -> None: db_path = resolve_named_path(dataset, ".db") if export or full_pgo: @@ -219,13 +230,20 @@ def main( camera_info=_camera_info_static(), marker_length_m=marker_size, ) - # 2Hz quality-gated: keep only the sharpest frame per 0.5s window. - marker_dets = ( - color_image.tap(progress(color_image.count(), "detecting markers")) - .transform(QualityWindow(lambda img: img.sharpness, window=0.5)) - .transform(xf) - .to_list() - ) + # 2Hz quality-gated: keep only the sharpest frame per 0.5s window, + # then drop frames where the robot was moving (linear + rotational) + # faster than the limits — speed/rate are averaged across the window. + pipeline: Stream[Image] = color_image.tap( + progress(color_image.count(), "detecting markers") + ).transform(QualityWindow(lambda img: img.sharpness, window=0.5)) + if marker_max_speed > 0: + pipeline = pipeline.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + marker_dets = pipeline.transform(xf).to_list() unique = sorted({obs.data.marker_id for obs in marker_dets}) print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}") From 4d5e9b66cbdea13b4df7690e52d48e0791bdd50b Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 16:11:19 +0800 Subject: [PATCH 18/64] markers_rrd renderer --- dimos/utils/cli/map.py | 27 +++-- dimos/utils/cli/markers_rrd.py | 187 +++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 8 deletions(-) create mode 100644 dimos/utils/cli/markers_rrd.py diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index a6cafccd09..554e1878d2 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -104,12 +104,12 @@ def main( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), marker_max_speed: float = typer.Option( - 0.3, + 0.1, "--marker-max-speed", help="Skip frames where robot is moving faster than this (m/s); 0 disables", ), marker_max_rot_rate: float = typer.Option( - 30.0, + 15, "--marker-max-rot-rate", help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", ), @@ -318,12 +318,23 @@ def main( ) for d in marker_dets ] - unique_ids = sorted({d.data.marker_id for d in marker_dets}) - id_to_color = { - mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() - for i, mid in enumerate(unique_ids) - } - colors = [id_to_color[d.data.marker_id] for d in marker_dets] + # Color mode: turbo over detection time vs. tab10 over marker id. + COLOR_BY_TIME = True + if COLOR_BY_TIME: + ts_min = min(d.ts for d in marker_dets) + ts_max = max(d.ts for d in marker_dets) + ts_span = ts_max - ts_min if ts_max > ts_min else 1.0 + colors = [ + Color.from_cmap("turbo", (d.ts - ts_min) / ts_span).rgb_u8() + for d in marker_dets + ] + else: + unique_ids = sorted({d.data.marker_id for d in marker_dets}) + id_to_color = { + mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() + for i, mid in enumerate(unique_ids) + } + colors = [id_to_color[d.data.marker_id] for d in marker_dets] labels = [f"id={d.data.marker_id}" for d in marker_dets] rr.log( "world/raw_map/markers/fill", diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py new file mode 100644 index 0000000000..3b67f9c2ea --- /dev/null +++ b/dimos/utils/cli/markers_rrd.py @@ -0,0 +1,187 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Temporary: dump apriltag detector replay (.rrd) for reliability investigation. + +Walks a recorded SQLite dataset and writes an rrd containing: +- per-image: camera pose + pinhole + image data +- per-lidar: base_link pose (a moving axis) +- per detection: marker box in world frame, at the detection timestamp + +Usage: + uv run python -m dimos.utils.cli.markers_rrd hk_village1 --out hk.rrd + rerun hk.rrd + +Throwaway script next to ``map.py``; remove once the apriltag reliability work +lands. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import rerun as rr +import typer + +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream +from dimos.memory2.transform import QualityWindow, SpeedLimit +from dimos.memory2.vis.color import Color +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.perception.fiducial.marker_transformer import DetectMarkers +from dimos.robot.unitree.go2.connection import _camera_info_static +from dimos.utils.data import resolve_named_path + +TIMELINE = "ts" + + +def main( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + out: Path = typer.Option(..., "--out", help="Output .rrd path"), + marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"), + marker_max_speed: float = typer.Option( + 0.15, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables" + ), + marker_max_rot_rate: float = typer.Option( + 30.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables" + ), + quality_window: float = typer.Option( + 0.25, "--quality-window", help="Sharpest-frame window for detection (s)" + ), +) -> None: + db_path = resolve_named_path(dataset, ".db") + cam_info = _camera_info_static() + + rr.init("dimos markers", recording_id=db_path.stem) + rr.save(str(out)) + + # Static pinhole on the camera entity; per-frame Transform3D goes on the + # same entity. Image is the child so it projects through the pinhole. + pinhole = cam_info.to_rerun() + assert not isinstance(pinhole, list) + rr.log("world/camera", pinhole, static=True) + + store = SqliteStore(path=str(db_path)) + with store: + color_image = store.stream("color_image", Image) + lidar = store.stream("lidar", PointCloud2) + + # ---- pass 1: robot base pose over time (from lidar.pose) ---- + for lidar_obs in lidar: + if lidar_obs.pose is None: + continue + rr.set_time(TIMELINE, timestamp=lidar_obs.ts) + x, y, z, qx, qy, qz, qw = lidar_obs.pose + rr.log( + "world/robot", + rr.Transform3D( + translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]) + ), + ) + + # ---- pass 2: camera pose + image per color_image frame ---- + n_img = color_image.count() + for i, img_obs in enumerate(color_image): + rr.set_time(TIMELINE, timestamp=img_obs.ts) + if img_obs.pose is not None: + x, y, z, qx, qy, qz, qw = img_obs.pose + rr.log( + "world/camera", + rr.Transform3D( + translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]) + ), + ) + rr.log("world/camera/image", img_obs.data.to_rerun()) + # Clear any prior detection box at this ts; pass 3 will overwrite + # this with the actual detection iff one fires for this frame. + rr.log( + "world/camera/image/detections", + rr.Boxes2D(array=[], array_format=rr.Box2DFormat.XYWH), + ) + if (i + 1) % 50 == 0 or i + 1 == n_img: + print(f"images: {i + 1}/{n_img}") + + # ---- pass 3: marker detections (filtered same way as `dimos map`) ---- + xf = DetectMarkers(camera_info=cam_info, marker_length_m=marker_size) + pipeline: Stream[Any] = color_image.transform( + QualityWindow(lambda img: img.sharpness, window=quality_window) + ) + if marker_max_speed > 0: + pipeline = pipeline.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + # Each 3D detection gets its own entity path so it persists from its + # ts onward — earlier detections stay visible as new ones appear. + # Color by ts (turbo) across the recording's full image-time range. + ts_min = color_image.first().ts + ts_max = color_image.last().ts + ts_span = ts_max - ts_min if ts_max > ts_min else 1.0 + n_det = 0 + for det_obs in pipeline.transform(xf): + d = det_obs.data + rr.set_time(TIMELINE, timestamp=det_obs.ts) + color = Color.from_cmap("turbo", (det_obs.ts - ts_min) / ts_span).rgb_u8() + rr.log( + f"world/markers/det_{n_det:05d}", + rr.Boxes3D( + centers=[(d.center.x, d.center.y, d.center.z)], + half_sizes=[(marker_size / 2, marker_size / 2, 0.005)], + quaternions=[ + rr.Quaternion( + xyzw=[ + d.orientation.x, + d.orientation.y, + d.orientation.z, + d.orientation.w, + ] + ) + ], + colors=[color], + fill_mode=rr.components.FillMode.Solid, + labels=[f"id={d.marker_id}"], + ), + ) + n_det += 1 + xs = d.corners_px[:, 0] + ys = d.corners_px[:, 1] + x1, y1 = float(xs.min()), float(ys.min()) + x2, y2 = float(xs.max()), float(ys.max()) + rr.log( + "world/camera/image/detections", + rr.Boxes2D( + array=[[x1, y1, x2 - x1, y2 - y1]], + array_format=rr.Box2DFormat.XYWH, + labels=[f"id={d.marker_id}"], + ), + ) + print(f"detections: {n_det}") + + print(f"wrote {out}") + print(f"open with: rerun {out}") + + +if __name__ == "__main__": + typer.run(main) + + print(f"wrote {out}") + print(f"open with: rerun {out}") + + +if __name__ == "__main__": + typer.run(main) From b3163bc117dbf932d6d2085c7c51c31f93748215 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 16:53:58 +0800 Subject: [PATCH 19/64] average window for markers --- .../perception/fiducial/marker_transformer.py | 89 ++++++++++++++++++- dimos/utils/cli/markers_rrd.py | 64 +++++++++++-- 2 files changed, 142 insertions(+), 11 deletions(-) diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index a3839ed9bb..e6c3d3c02f 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -28,6 +28,8 @@ from __future__ import annotations +import dataclasses +import math from typing import TYPE_CHECKING import numpy as np @@ -45,6 +47,7 @@ estimate_marker_pose, rvec_tvec_to_transform, ) +from dimos.types.timestamped import TimestampedBufferCollection from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: @@ -72,6 +75,39 @@ def _pose_tuple_to_transform( ) +def _average_marker_pose( + buffer: TimestampedBufferCollection[Detection3DMarker], +) -> tuple[Vector3, Quaternion]: + """Mean translation; quaternion mean with hemisphere alignment. + + Quaternions q and -q encode the same rotation, so naive averaging can + cancel. We pick the first sample as a hemisphere reference and flip the + sign of any sample whose dot product against it is negative before + summing. For closely-spaced rotations within a short window this is + indistinguishable from a proper SLERP-style average. + """ + items = list(buffer) + n = len(items) + cx = sum(d.center.x for d in items) / n + cy = sum(d.center.y for d in items) / n + cz = sum(d.center.z for d in items) / n + + ref = items[0].orientation + qsx = qsy = qsz = qsw = 0.0 + for d in items: + q = d.orientation + s = -1.0 if (q.x * ref.x + q.y * ref.y + q.z * ref.z + q.w * ref.w) < 0 else 1.0 + qsx += s * q.x + qsy += s * q.y + qsz += s * q.z + qsw += s * q.w + norm = math.sqrt(qsx * qsx + qsy * qsy + qsz * qsz + qsw * qsw) + return ( + Vector3(cx, cy, cz), + Quaternion(qsx / norm, qsy / norm, qsz / norm, qsw / norm), + ) + + class DetectMarkers(Transformer[Image, Detection3DMarker]): """Detect fiducial markers and emit one world-pose observation per marker.""" @@ -81,15 +117,27 @@ def __init__( marker_length_m: float, aruco_dictionary: str = "DICT_APRILTAG_36h11", world_frame: str = "world", + smoothing_window: float = 0.0, ) -> None: if marker_length_m <= 0: raise ValueError(f"marker_length_m must be > 0, got {marker_length_m}") + if smoothing_window < 0: + raise ValueError(f"smoothing_window must be >= 0, got {smoothing_window}") self.camera_info = camera_info self.marker_length_m = marker_length_m self.aruco_dictionary = aruco_dictionary self.world_frame = world_frame + self.smoothing_window = smoothing_window self._detector = create_aruco_detector(aruco_dictionary) self._cam_mtx, self._dist = camera_info_to_cv_matrices(camera_info) + # Per marker_id sliding-window buffer of raw detections, used to emit + # smoothed pose updates when ``smoothing_window > 0``. + self._buffers: dict[int, TimestampedBufferCollection[Detection3DMarker]] = {} + # Tracking (smoothing only): if a marker_id reappears after a gap + # larger than the buffer window, treat it as a new track. track_id + # increments monotonically across the whole stream so it's unique. + self._marker_to_track: dict[int, int] = {} + self._next_track_id = 0 def __call__( self, upstream: Iterator[Observation[Image]] @@ -155,9 +203,21 @@ def __call__( xy_max = corners_2d.max(axis=0) bbox = (float(xy_min[0]), float(xy_min[1]), float(xy_max[0]), float(xy_max[1])) + # Decide track_id (only meaningful when smoothing is on). + # Without smoothing, track_id == marker_id (legacy behavior). + if self.smoothing_window > 0: + prior_buf = self._buffers.get(mid) + prior_last = prior_buf.last() if prior_buf is not None else None + if prior_last is None or (obs.ts - prior_last.ts) > self.smoothing_window: + self._next_track_id += 1 + self._marker_to_track[mid] = self._next_track_id + track_id = self._marker_to_track[mid] + else: + track_id = mid + det = Detection3DMarker( bbox=bbox, - track_id=mid, + track_id=track_id, class_id=mid, confidence=1.0, name=f"marker_{mid}", @@ -173,4 +233,29 @@ def __call__( dictionary=self.aruco_dictionary, ) - yield obs.derive(data=det, pose=t_world_marker).tag(marker_id=mid) + yielded_det = det + yielded_pose = t_world_marker + if self.smoothing_window > 0: + # Buffer raw detections per marker_id over a sliding + # window; emit the windowed-mean pose so each successive + # detection refines the same marker's estimate instead + # of producing a fresh independent observation. + buf = self._buffers.setdefault( + mid, TimestampedBufferCollection(self.smoothing_window) + ) + buf.add(det) + avg_center, avg_orient = _average_marker_pose(buf) + yielded_det = dataclasses.replace( + det, center=avg_center, orientation=avg_orient + ) + yielded_pose = Transform( + translation=avg_center, + rotation=avg_orient, + frame_id=self.world_frame, + child_frame_id=f"marker_{mid}", + ts=obs.ts, + ) + + yield obs.derive(data=yielded_det, pose=yielded_pose).tag( + marker_id=mid, track_id=track_id + ) diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py index 3b67f9c2ea..b1e072d1e1 100644 --- a/dimos/utils/cli/markers_rrd.py +++ b/dimos/utils/cli/markers_rrd.py @@ -53,13 +53,16 @@ def main( out: Path = typer.Option(..., "--out", help="Output .rrd path"), marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"), marker_max_speed: float = typer.Option( - 0.15, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables" + 0.5, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables" ), marker_max_rot_rate: float = typer.Option( - 30.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables" + 50.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables" ), quality_window: float = typer.Option( - 0.25, "--quality-window", help="Sharpest-frame window for detection (s)" + 0.1, "--quality-window", help="Sharpest-frame window for detection (s)" + ), + smoothing_window: float = typer.Option( + 5.0, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables" ), ) -> None: db_path = resolve_named_path(dataset, ".db") @@ -172,12 +175,55 @@ def main( ) print(f"detections: {n_det}") - print(f"wrote {out}") - print(f"open with: rerun {out}") - - -if __name__ == "__main__": - typer.run(main) + # ---- pass 4: averaged tracks (smoothing_window > 0 → per-track ids) ---- + # Re-runs the same filtered pipeline through a smoothing detector; + # each track yields one entity that updates as the windowed average + # refines. Color stable per track_id for visual identity. + if smoothing_window > 0: + xf_tracked = DetectMarkers( + camera_info=cam_info, + marker_length_m=marker_size, + smoothing_window=smoothing_window, + ) + pipeline_tracked: Stream[Any] = color_image.transform( + QualityWindow(lambda img: img.sharpness, window=quality_window) + ) + if marker_max_speed > 0: + pipeline_tracked = pipeline_tracked.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + seen_tracks: set[int] = set() + n_updates = 0 + for det_obs in pipeline_tracked.transform(xf_tracked): + d = det_obs.data + rr.set_time(TIMELINE, timestamp=det_obs.ts) + color = Color.from_cmap("tab10", (d.track_id % 10) / 10.0).rgb_u8() + rr.log( + f"world/tracks/track_{d.track_id:04d}", + rr.Boxes3D( + centers=[(d.center.x, d.center.y, d.center.z)], + half_sizes=[(marker_size / 2, marker_size / 2, 0.005)], + quaternions=[ + rr.Quaternion( + xyzw=[ + d.orientation.x, + d.orientation.y, + d.orientation.z, + d.orientation.w, + ] + ) + ], + colors=[color], + fill_mode=rr.components.FillMode.Solid, + labels=[f"track={d.track_id} id={d.marker_id}"], + ), + ) + seen_tracks.add(d.track_id) + n_updates += 1 + print(f"tracks: {len(seen_tracks)} unique, {n_updates} updates") print(f"wrote {out}") print(f"open with: rerun {out}") From e529913c9de461f0ced54d38e33edad8bcd37d59 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 17:00:06 +0800 Subject: [PATCH 20/64] sliding window 7.5s --- dimos/utils/cli/markers_rrd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py index b1e072d1e1..eb5306edf7 100644 --- a/dimos/utils/cli/markers_rrd.py +++ b/dimos/utils/cli/markers_rrd.py @@ -62,7 +62,7 @@ def main( 0.1, "--quality-window", help="Sharpest-frame window for detection (s)" ), smoothing_window: float = typer.Option( - 5.0, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables" + 7.5, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables" ), ) -> None: db_path = resolve_named_path(dataset, ".db") From 5751d48deb2cbd123d9965536b5d3273da354dae Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 17:28:36 +0800 Subject: [PATCH 21/64] detector tuned --- dimos/robot/cli/dimos.py | 16 ++++++++-- dimos/utils/cli/map.py | 65 +++++++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 041891a135..5c67c3e0e1 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -710,15 +710,25 @@ def map_cmd( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), marker_max_speed: float = typer.Option( - 0.05, + 0.5, "--marker-max-speed", help="Skip frames where robot is moving faster than this (m/s); 0 disables", ), marker_max_rot_rate: float = typer.Option( - 15.0, + 50.0, "--marker-max-rot-rate", help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", ), + marker_quality_window: float = typer.Option( + 0.1, + "--marker-quality-window", + help="Sharpest-frame window for marker detection (s)", + ), + marker_smoothing: float = typer.Option( + 7.5, + "--marker-smoothing", + help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", + ), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main @@ -737,6 +747,8 @@ def map_cmd( marker_size=marker_size, marker_max_speed=marker_max_speed, marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 554e1878d2..3c59c9088b 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -104,15 +104,25 @@ def main( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), marker_max_speed: float = typer.Option( - 0.1, + 0.5, "--marker-max-speed", help="Skip frames where robot is moving faster than this (m/s); 0 disables", ), marker_max_rot_rate: float = typer.Option( - 15, + 50.0, "--marker-max-rot-rate", help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", ), + marker_quality_window: float = typer.Option( + 0.1, + "--marker-quality-window", + help="Sharpest-frame window for marker detection (s)", + ), + marker_smoothing: float = typer.Option( + 7.5, + "--marker-smoothing", + help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", + ), ) -> None: db_path = resolve_named_path(dataset, ".db") if export or full_pgo: @@ -229,13 +239,14 @@ def main( xf = DetectMarkers( camera_info=_camera_info_static(), marker_length_m=marker_size, + smoothing_window=marker_smoothing, ) - # 2Hz quality-gated: keep only the sharpest frame per 0.5s window, - # then drop frames where the robot was moving (linear + rotational) - # faster than the limits — speed/rate are averaged across the window. + # Keep the sharpest frame per --marker-quality-window window, then + # drop frames where the robot was moving (linear + rotational) faster + # than the limits. Defaults match markers_rrd.py so positions agree. pipeline: Stream[Image] = color_image.tap( progress(color_image.count(), "detecting markers") - ).transform(QualityWindow(lambda img: img.sharpness, window=0.5)) + ).transform(QualityWindow(lambda img: img.sharpness, window=marker_quality_window)) if marker_max_speed > 0: pipeline = pipeline.transform( SpeedLimit( @@ -243,9 +254,21 @@ def main( max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, ) ) - marker_dets = pipeline.transform(xf).to_list() - unique = sorted({obs.data.marker_id for obs in marker_dets}) - print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}") + all_dets = pipeline.transform(xf).to_list() + if marker_smoothing > 0: + # Keep only the latest emission per track_id — that's the most + # averaged pose, drawn once per tracked marker session. + by_track: dict[int, Observation[Any]] = {} + for d in all_dets: + by_track[d.data.track_id] = d + marker_dets = list(by_track.values()) + else: + marker_dets = all_dets + unique_ids = sorted({obs.data.marker_id for obs in marker_dets}) + print( + f"markers: {len(marker_dets)} entries from {len(all_dets)} raw detections " + f"across {len(unique_ids)} unique ids {unique_ids}" + ) if not no_gui: rerun_init("dimos map tool", spawn=True) @@ -318,24 +341,12 @@ def main( ) for d in marker_dets ] - # Color mode: turbo over detection time vs. tab10 over marker id. - COLOR_BY_TIME = True - if COLOR_BY_TIME: - ts_min = min(d.ts for d in marker_dets) - ts_max = max(d.ts for d in marker_dets) - ts_span = ts_max - ts_min if ts_max > ts_min else 1.0 - colors = [ - Color.from_cmap("turbo", (d.ts - ts_min) / ts_span).rgb_u8() - for d in marker_dets - ] - else: - unique_ids = sorted({d.data.marker_id for d in marker_dets}) - id_to_color = { - mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() - for i, mid in enumerate(unique_ids) - } - colors = [id_to_color[d.data.marker_id] for d in marker_dets] - labels = [f"id={d.data.marker_id}" for d in marker_dets] + # One entry per tracked marker session — color stable per track_id. + colors = [ + Color.from_cmap("tab10", (d.data.track_id % 10) / 10.0).rgb_u8() + for d in marker_dets + ] + labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] rr.log( "world/raw_map/markers/fill", rr.Boxes3D( From dcd7f7b9b746b2b40a1f3264b8dbe0618d2870c1 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:22:34 +0800 Subject: [PATCH 22/64] nicer map --- dimos/utils/cli/map.py | 45 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 3c59c9088b..e6fd428428 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -288,7 +288,7 @@ def main( full_pgo_map.to_rerun(voxel_size=voxel / 2), static=True, ) - STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility + STEM_HEIGHT = 0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( "world/pgo_map/path", @@ -300,7 +300,7 @@ def main( hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( "world/pgo_map/pgo/keyframes", - rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]), + rr.Points3D(positions=hovered, colors=[[255, 0, 0]], radii=[0.025]), static=True, ) if pgo and loops: @@ -347,6 +347,12 @@ def main( for d in marker_dets ] labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] + # Pin pattern (from dimos/memory2/vis/space/rerun.py): thin + # vertical line from each marker with the label floating at the + # top so multi-marker labels never overlap the boxes. + MARKER_STEM = 1.0 + pin_strips = [[(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in centers] + label_positions = [(cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in centers] rr.log( "world/raw_map/markers/fill", rr.Boxes3D( @@ -355,7 +361,6 @@ def main( quaternions=quaternions, colors=colors, fill_mode=rr.components.FillMode.Solid, - labels=labels, ), static=True, ) @@ -371,6 +376,18 @@ def main( ), static=True, ) + rr.log( + "world/raw_map/markers/pin", + rr.LineStrips3D(strips=pin_strips, colors=colors, radii=[0.005]), + static=True, + ) + rr.log( + "world/raw_map/markers/label", + rr.Points3D( + positions=label_positions, labels=labels, colors=colors, radii=[0.001] * n + ), + static=True, + ) if interp is not None: # PGO-corrected marker poses. interp(ts) maps raw_world → @@ -402,6 +419,12 @@ def main( corrected.rotation.w, ) ) + pgo_pin_strips = [ + [(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in pgo_centers + ] + pgo_label_positions = [ + (cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in pgo_centers + ] rr.log( "world/pgo_map/markers/fill", rr.Boxes3D( @@ -410,7 +433,6 @@ def main( quaternions=pgo_quaternions, colors=colors, fill_mode=rr.components.FillMode.Solid, - labels=labels, ), static=True, ) @@ -426,6 +448,21 @@ def main( ), static=True, ) + rr.log( + "world/pgo_map/markers/pin", + rr.LineStrips3D(strips=pgo_pin_strips, colors=colors, radii=[0.005]), + static=True, + ) + rr.log( + "world/pgo_map/markers/label", + rr.Points3D( + positions=pgo_label_positions, + labels=labels, + colors=colors, + radii=[0.001] * n, + ), + static=True, + ) if export and pgo_map is not None: from pathlib import Path From a6d4dcc98f5671bd3e685546f8c994c93e71e411 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:24:32 +0800 Subject: [PATCH 23/64] marker eval first pass --- dimos/utils/cli/marker_eval.py | 225 +++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 dimos/utils/cli/marker_eval.py diff --git a/dimos/utils/cli/marker_eval.py b/dimos/utils/cli/marker_eval.py new file mode 100644 index 0000000000..47aa834c64 --- /dev/null +++ b/dimos/utils/cli/marker_eval.py @@ -0,0 +1,225 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loop-closure tuning eval — runs the dimos map pipeline over all +hk_village recordings and reports two totals to minimize: + +- TOTAL_PGO_TIME (s): wall-clock for the PGO loop across all recordings +- TOTAL_SPREAD (m): per-recording sum of pairwise distances between + final-smoothed marker positions of the same marker_id (PGO-corrected), + summed across all marker_ids across all recordings. Smaller = tighter + loop closures. + +Usage: + uv run python -m dimos.utils.cli.marker_eval + uv run python -m dimos.utils.cli.marker_eval hk_village1 +""" + +from __future__ import annotations + +from concurrent.futures import ProcessPoolExecutor, as_completed +import multiprocessing as mp +import os +import time +from typing import Any + +from rich.console import Console +from rich.table import Table +import typer + +from dimos.mapping.relocalization.pgo import ( + LoopClosure, + keyframes_to_corrections, + make_interpolator, + pgo_keyframes, +) +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream +from dimos.memory2.transform import QualityWindow, SpeedLimit +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.sensor_msgs.Image import Image +from dimos.perception.fiducial.marker_transformer import DetectMarkers +from dimos.robot.unitree.go2.connection import _camera_info_static +from dimos.utils.data import get_data + +DEFAULT_DATASETS = [f"hk_village{i}" for i in range(1, 7)] + + +def _pairwise_sum(pts: list[tuple[float, float, float]]) -> float: + """Sum of euclidean distances over all unordered pairs.""" + total = 0.0 + for i, a in enumerate(pts): + for b in pts[i + 1 :]: + dx, dy, dz = a[0] - b[0], a[1] - b[1], a[2] - b[2] + total += (dx * dx + dy * dy + dz * dz) ** 0.5 + return total + + +def _eval_recording( + name: str, + *, + marker_size: float, + marker_max_speed: float, + marker_max_rot_rate: float, + marker_quality_window: float, + marker_smoothing: float, +) -> tuple[float, float]: + """Returns (pgo_time_s, spread_m) for one recording.""" + db_path = get_data(f"{name}.db") + cam_info = _camera_info_static() + + store = SqliteStore(path=str(db_path)) + with store: + lidar = store.streams.lidar + + # --- PGO (timed) --- + # .tap() captures each keyframe as make_interpolator iterates the + # corrections stream, so no extra pass over lidar. + keyframes: list[Any] = [] + loops_out: list[LoopClosure] = [] + t0 = time.perf_counter() + kf_stream = pgo_keyframes(lidar, loop_closures_out=loops_out).tap( + lambda obs: keyframes.append(obs.data) + ) + interp = make_interpolator(keyframes_to_corrections(kf_stream)) + pgo_time = time.perf_counter() - t0 + + # --- Marker detection (same pipeline as dimos map / markers_rrd) --- + color_image = store.stream("color_image", Image) + xf = DetectMarkers( + camera_info=cam_info, + marker_length_m=marker_size, + smoothing_window=marker_smoothing, + ) + pipeline: Stream[Image] = color_image.transform( + QualityWindow(lambda img: img.sharpness, window=marker_quality_window) + ) + if marker_max_speed > 0: + pipeline = pipeline.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + all_dets = pipeline.transform(xf).to_list() + + # Dedup by track_id → final smoothed pose per track. + by_track: dict[int, Observation[Any]] = {} + for d in all_dets: + by_track[d.data.track_id] = d + tracks = list(by_track.values()) + + # PGO-correct each track's pose; group by marker_id. + by_marker: dict[int, list[tuple[float, float, float]]] = {} + for d in tracks: + raw_tf = Transform( + translation=d.data.center, + rotation=d.data.orientation, + frame_id="world", + child_frame_id=f"marker_{d.data.marker_id}", + ts=d.ts, + ) + corrected = interp(d.ts) + raw_tf + t = corrected.translation + by_marker.setdefault(d.data.marker_id, []).append((t.x, t.y, t.z)) + + spread = sum(_pairwise_sum(v) for v in by_marker.values()) + return pgo_time, spread + + +def main( + datasets: list[str] = typer.Argument( + None, help="Recordings to eval; defaults to hk_village1..6" + ), + marker_size: float = typer.Option(0.1, "--marker-size"), + marker_max_speed: float = typer.Option(0.5, "--marker-max-speed"), + marker_max_rot_rate: float = typer.Option(50.0, "--marker-max-rot-rate"), + marker_quality_window: float = typer.Option(0.1, "--marker-quality-window"), + marker_smoothing: float = typer.Option(7.5, "--marker-smoothing"), + workers: int = typer.Option( + 0, "--workers", "-j", help="Parallel workers (0 = min(len(datasets), cpu_count))" + ), +) -> None: + names = datasets or DEFAULT_DATASETS + console = Console() + + n_workers = workers or min(len(names), os.cpu_count() or 1) + wall_start = time.perf_counter() + results: dict[str, tuple[float, float]] = {} + + if n_workers <= 1 or len(names) <= 1: + for name in names: + console.print(f"[dim]eval {name}...[/dim]") + results[name] = _eval_recording( + name, + marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, + ) + else: + console.print(f"[dim]running {len(names)} recordings on {n_workers} workers[/dim]") + # "spawn" — workers are fresh interpreters. Forking after cv2/openmp + # have spun threads in the parent deadlocks because the threads + # don't survive fork; spawn sidesteps it entirely. + with ProcessPoolExecutor(max_workers=n_workers, mp_context=mp.get_context("spawn")) as ex: + futures = { + ex.submit( + _eval_recording, + name, + marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, + ): name + for name in names + } + for f in as_completed(futures): + name = futures[f] + results[name] = f.result() + pgo_time, spread = results[name] + console.print(f" done {name:>14} ({pgo_time:5.2f}s, spread {spread:7.3f}m)") + + wall = time.perf_counter() - wall_start + + table = Table(title="loop-closure eval") + table.add_column("recording") + table.add_column("pgo_time_s", justify="right") + table.add_column("spread_m", justify="right") + + total_pgo = 0.0 + total_spread = 0.0 + for name in names: # original order + pgo_time, spread = results[name] + table.add_row(name, f"{pgo_time:.2f}", f"{spread:.3f}") + total_pgo += pgo_time + total_spread += spread + + table.add_section() + table.add_row( + "[bold]TOTAL[/bold]", + f"[bold]{total_pgo:.2f}[/bold]", + f"[bold]{total_spread:.3f}[/bold]", + ) + console.print(table) + print(f"TOTAL_PGO_TIME={total_pgo:.2f}") + print(f"TOTAL_SPREAD={total_spread:.3f}") + print(f"WALL_TIME={wall:.2f}") + + +if __name__ == "__main__": + typer.run(main) From 89eb996e05724c35a91edbcb87c7457bf9f859ce Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:43:50 +0800 Subject: [PATCH 24/64] extracted loop closure code from relocalization code --- .../loop_closure/eval.py} | 36 +++++++------------ .../{relocalization => loop_closure}/pgo.py | 0 .../test_pgo.py | 2 +- .../loop_closure/utils}/markers_rrd.py | 0 dimos/utils/cli/map.py | 2 +- 5 files changed, 15 insertions(+), 25 deletions(-) rename dimos/{utils/cli/marker_eval.py => mapping/loop_closure/eval.py} (88%) rename dimos/mapping/{relocalization => loop_closure}/pgo.py (100%) rename dimos/mapping/{relocalization => loop_closure}/test_pgo.py (99%) rename dimos/{utils/cli => mapping/loop_closure/utils}/markers_rrd.py (100%) diff --git a/dimos/utils/cli/marker_eval.py b/dimos/mapping/loop_closure/eval.py similarity index 88% rename from dimos/utils/cli/marker_eval.py rename to dimos/mapping/loop_closure/eval.py index 47aa834c64..5fe3f6ae4d 100644 --- a/dimos/utils/cli/marker_eval.py +++ b/dimos/mapping/loop_closure/eval.py @@ -22,8 +22,8 @@ loop closures. Usage: - uv run python -m dimos.utils.cli.marker_eval - uv run python -m dimos.utils.cli.marker_eval hk_village1 + uv run python -m dimos.mapping.loop_closure.marker_eval + uv run python -m dimos.mapping.loop_closure.marker_eval hk_village1 """ from __future__ import annotations @@ -34,11 +34,9 @@ import time from typing import Any -from rich.console import Console -from rich.table import Table import typer -from dimos.mapping.relocalization.pgo import ( +from dimos.mapping.loop_closure.pgo import ( LoopClosure, keyframes_to_corrections, make_interpolator, @@ -153,7 +151,6 @@ def main( ), ) -> None: names = datasets or DEFAULT_DATASETS - console = Console() n_workers = workers or min(len(names), os.cpu_count() or 1) wall_start = time.perf_counter() @@ -161,7 +158,7 @@ def main( if n_workers <= 1 or len(names) <= 1: for name in names: - console.print(f"[dim]eval {name}...[/dim]") + print(f"eval {name}...") results[name] = _eval_recording( name, marker_size=marker_size, @@ -171,7 +168,7 @@ def main( marker_smoothing=marker_smoothing, ) else: - console.print(f"[dim]running {len(names)} recordings on {n_workers} workers[/dim]") + print(f"running {len(names)} recordings on {n_workers} workers") # "spawn" — workers are fresh interpreters. Forking after cv2/openmp # have spun threads in the parent deadlocks because the threads # don't survive fork; spawn sidesteps it entirely. @@ -192,30 +189,23 @@ def main( name = futures[f] results[name] = f.result() pgo_time, spread = results[name] - console.print(f" done {name:>14} ({pgo_time:5.2f}s, spread {spread:7.3f}m)") + print(f" done {name:>14} pgo={pgo_time:5.2f}s spread={spread:7.3f}m") wall = time.perf_counter() - wall_start - table = Table(title="loop-closure eval") - table.add_column("recording") - table.add_column("pgo_time_s", justify="right") - table.add_column("spread_m", justify="right") - total_pgo = 0.0 total_spread = 0.0 + print() + print(f"{'recording':<14} {'pgo_time_s':>10} {'spread_m':>10}") + print("-" * 40) for name in names: # original order pgo_time, spread = results[name] - table.add_row(name, f"{pgo_time:.2f}", f"{spread:.3f}") + print(f"{name:<14} {pgo_time:>10.2f} {spread:>10.3f}") total_pgo += pgo_time total_spread += spread - - table.add_section() - table.add_row( - "[bold]TOTAL[/bold]", - f"[bold]{total_pgo:.2f}[/bold]", - f"[bold]{total_spread:.3f}[/bold]", - ) - console.print(table) + print("-" * 40) + print(f"{'TOTAL':<14} {total_pgo:>10.2f} {total_spread:>10.3f}") + print() print(f"TOTAL_PGO_TIME={total_pgo:.2f}") print(f"TOTAL_SPREAD={total_spread:.3f}") print(f"WALL_TIME={wall:.2f}") diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/loop_closure/pgo.py similarity index 100% rename from dimos/mapping/relocalization/pgo.py rename to dimos/mapping/loop_closure/pgo.py diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py similarity index 99% rename from dimos/mapping/relocalization/test_pgo.py rename to dimos/mapping/loop_closure/test_pgo.py index 76f8b378eb..b6f5ea3d8e 100644 --- a/dimos/mapping/relocalization/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -20,7 +20,7 @@ import pytest from scipy.spatial.transform import Rotation -from dimos.mapping.relocalization.pgo import ( +from dimos.mapping.loop_closure.pgo import ( Keyframe, PGOConfig, _obs_to_pose3, diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/mapping/loop_closure/utils/markers_rrd.py similarity index 100% rename from dimos/utils/cli/markers_rrd.py rename to dimos/mapping/loop_closure/utils/markers_rrd.py diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 3c59c9088b..07555b375d 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -167,7 +167,7 @@ def main( loops: list[Any] = [] interp: Any | None = None if pgo: - from dimos.mapping.relocalization.pgo import ( + from dimos.mapping.loop_closure.pgo import ( LoopClosure, keyframes_to_corrections, make_interpolator, From 186a4bef1380777a9f53292ab37f33a258375a82 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:46:46 +0800 Subject: [PATCH 25/64] better eval --- dimos/mapping/loop_closure/eval.py | 60 ++++++++---------------------- 1 file changed, 15 insertions(+), 45 deletions(-) diff --git a/dimos/mapping/loop_closure/eval.py b/dimos/mapping/loop_closure/eval.py index 5fe3f6ae4d..c41817b032 100644 --- a/dimos/mapping/loop_closure/eval.py +++ b/dimos/mapping/loop_closure/eval.py @@ -22,15 +22,12 @@ loop closures. Usage: - uv run python -m dimos.mapping.loop_closure.marker_eval - uv run python -m dimos.mapping.loop_closure.marker_eval hk_village1 + uv run python -m dimos.mapping.loop_closure.eval + uv run python -m dimos.mapping.loop_closure.eval hk_village1 """ from __future__ import annotations -from concurrent.futures import ProcessPoolExecutor, as_completed -import multiprocessing as mp -import os import time from typing import Any @@ -146,51 +143,24 @@ def main( marker_max_rot_rate: float = typer.Option(50.0, "--marker-max-rot-rate"), marker_quality_window: float = typer.Option(0.1, "--marker-quality-window"), marker_smoothing: float = typer.Option(7.5, "--marker-smoothing"), - workers: int = typer.Option( - 0, "--workers", "-j", help="Parallel workers (0 = min(len(datasets), cpu_count))" - ), ) -> None: names = datasets or DEFAULT_DATASETS - n_workers = workers or min(len(names), os.cpu_count() or 1) + # Sequential: PGO (Open3D) and marker detection (cv2/numpy) already + # saturate available cores internally via OpenMP — process-level + # parallelism only adds contention. wall_start = time.perf_counter() results: dict[str, tuple[float, float]] = {} - - if n_workers <= 1 or len(names) <= 1: - for name in names: - print(f"eval {name}...") - results[name] = _eval_recording( - name, - marker_size=marker_size, - marker_max_speed=marker_max_speed, - marker_max_rot_rate=marker_max_rot_rate, - marker_quality_window=marker_quality_window, - marker_smoothing=marker_smoothing, - ) - else: - print(f"running {len(names)} recordings on {n_workers} workers") - # "spawn" — workers are fresh interpreters. Forking after cv2/openmp - # have spun threads in the parent deadlocks because the threads - # don't survive fork; spawn sidesteps it entirely. - with ProcessPoolExecutor(max_workers=n_workers, mp_context=mp.get_context("spawn")) as ex: - futures = { - ex.submit( - _eval_recording, - name, - marker_size=marker_size, - marker_max_speed=marker_max_speed, - marker_max_rot_rate=marker_max_rot_rate, - marker_quality_window=marker_quality_window, - marker_smoothing=marker_smoothing, - ): name - for name in names - } - for f in as_completed(futures): - name = futures[f] - results[name] = f.result() - pgo_time, spread = results[name] - print(f" done {name:>14} pgo={pgo_time:5.2f}s spread={spread:7.3f}m") - + for name in names: + print(f"eval {name}...") + results[name] = _eval_recording( + name, + marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, + ) wall = time.perf_counter() - wall_start total_pgo = 0.0 From b2b7afbcdbc2519e8956b625b98599cb48c2b9a0 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 15:40:32 +0800 Subject: [PATCH 26/64] observation pose refactor --- dimos/mapping/loop_closure/pgo.py | 26 ++--- dimos/mapping/loop_closure/test_pgo.py | 27 +++-- dimos/memory2/module.py | 5 +- dimos/memory2/observationstore/sqlite.py | 31 ++--- dimos/memory2/test_save.py | 2 +- dimos/memory2/test_stream.py | 2 +- dimos/memory2/transform.py | 32 ++--- dimos/memory2/type/filter.py | 10 +- dimos/memory2/type/observation.py | 141 ++++++++++++++++++----- dimos/memory2/vis/space/rerun.py | 13 ++- dimos/memory2/vis/space/space.py | 5 +- dimos/memory2/vis/space/svg.py | 7 +- dimos/msgs/geometry_msgs/Quaternion.py | 13 +++ dimos/msgs/sensor_msgs/PointCloud2.py | 2 +- dimos/utils/cli/map.py | 15 +-- 15 files changed, 200 insertions(+), 131 deletions(-) diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py index af94921c9d..84a8a0fa24 100644 --- a/dimos/mapping/loop_closure/pgo.py +++ b/dimos/mapping/loop_closure/pgo.py @@ -170,17 +170,15 @@ def pgo_keyframes( for obs in stream: if on_frame is not None: on_frame(obs) - if obs.pose is None: + p = obs.pose_tuple + if p is None: continue - # Skip placeholder poses (origin position OR zero quaternion). - if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + # Skip placeholder poses written before odom converges: + # all-zero translation OR all-zero (uninitialized) quaternion. + # An identity quaternion (qw=1) is valid and must not be filtered. + if p[0] == 0 and p[1] == 0 and p[2] == 0: continue - if ( - obs.pose[3] == 0 - and obs.pose[4] == 0 - and obs.pose[5] == 0 - and (obs.pose[6] == 0 or obs.pose[6] == 1) - ): + if p[3] == 0 and p[4] == 0 and p[5] == 0 and p[6] == 0: continue local_pose = _obs_to_pose3(obs) pgo.process(local_pose, obs.ts, obs.data) @@ -264,10 +262,11 @@ def apply_corrections( def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: for obs in upstream: - if obs.pose is None: + ps = obs.pose_stamped + if ps is None: yield obs continue - raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped) + raw_tf = Transform.from_pose(FRAME_BODY, ps) # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. # Observation normalizes Transform back to 7-tuple via __post_init__. corrected = interp(obs.ts) + raw_tf @@ -297,9 +296,10 @@ def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" import gtsam # type: ignore[import-not-found,import-untyped] - if obs.pose is None: + p = obs.pose_tuple + if p is None: raise LookupError("No pose set on this observation") - x, y, z, qx, qy, qz, qw = obs.pose + x, y, z, qx, qy, qz, qw = p return gtsam.Pose3( gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), gtsam.Point3(float(x), float(y), float(z)), diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py index b6f5ea3d8e..0042c19b2c 100644 --- a/dimos/mapping/loop_closure/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -87,14 +87,13 @@ def test_observation_normalizes_transform_pose(self) -> None: ts=1.0, ) obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) - assert isinstance(obs.pose, tuple) - assert obs.pose[0] == pytest.approx(1.5) - assert obs.pose[6] == pytest.approx(0.927) + assert obs.pose_tuple is not None + assert obs.pose_tuple[0] == pytest.approx(1.5) + assert obs.pose_tuple[6] == pytest.approx(0.927) - # derive() also runs the normalization via __post_init__. + # derive() also re-runs the normalization. derived = obs.derive(data=0, pose=tf) - assert isinstance(derived.pose, tuple) - assert derived.pose == obs.pose + assert derived.pose_tuple == obs.pose_tuple def test_observation_normalizes_posestamped(self) -> None: from dimos.memory2.type.observation import Observation @@ -102,8 +101,7 @@ def test_observation_normalizes_posestamped(self) -> None: ps = PoseStamped(ts=1.0, position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)) obs: Observation[int] = Observation(id=0, ts=1.0, pose=ps, _data=0) - assert isinstance(obs.pose, tuple) - assert obs.pose == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) + assert obs.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) def test_obs_to_pose3_roundtrip(self) -> None: from dimos.memory2.type.observation import Observation @@ -191,8 +189,8 @@ def test_apply_identity_corrections_preserves_poses(self) -> None: kfs = pgo_keyframes(lidar) corr = keyframes_to_corrections(kfs) corrected = apply_corrections(lidar, corr) - in_poses = [o.pose for o in lidar if o.pose is not None] - out_poses = [o.pose for o in corrected if o.pose is not None] + in_poses = [o.pose_tuple for o in lidar if o.pose_tuple is not None] + out_poses = [o.pose_tuple for o in corrected if o.pose_tuple is not None] assert len(in_poses) == len(out_poses) for p_in, p_out in zip(in_poses, out_poses, strict=True): for a, b in zip(p_in, p_out, strict=True): @@ -268,10 +266,11 @@ def test_pure_translation_shifts_poses(self) -> None: corrected = apply_corrections(lidar, corr) for obs in corrected: - assert obs.pose is not None - assert obs.pose[0] == pytest.approx(5.0, abs=1e-9) - assert obs.pose[1] == pytest.approx(0.0, abs=1e-9) - assert obs.pose[2] == pytest.approx(0.0, abs=1e-9) + p = obs.pose_tuple + assert p is not None + assert p[0] == pytest.approx(5.0, abs=1e-9) + assert p[1] == pytest.approx(0.0, abs=1e-9) + assert p[2] == pytest.approx(0.0, abs=1e-9) def test_passes_through_pose_none(self) -> None: mem = MemoryStore() diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py index b584553bae..929d241882 100644 --- a/dimos/memory2/module.py +++ b/dimos/memory2/module.py @@ -234,7 +234,10 @@ def search(self, query: str) -> PoseStamped: def _similarity(obs: Observation[Any]) -> float: return cast("EmbeddedObservation[Any]", obs).similarity or 0.0 - return results.transform(peaks(key=_similarity, distance=1.0)).last().pose_stamped + best = results.transform(peaks(key=_similarity, distance=1.0)).last() + if best.pose_stamped is None: + raise LookupError("No pose on best search result") + return best.pose_stamped class RecorderConfig(MemoryModuleConfig): diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py index 42f4193581..e82e867dc0 100644 --- a/dimos/memory2/observationstore/sqlite.py +++ b/dimos/memory2/observationstore/sqlite.py @@ -33,7 +33,7 @@ TimeRangeFilter, _xyz, ) -from dimos.memory2.type.observation import _UNLOADED, Observation +from dimos.memory2.type.observation import _UNLOADED, Observation, PoseTuple from dimos.memory2.utils.sqlite import open_disposable_sqlite_connection if TYPE_CHECKING: @@ -46,24 +46,6 @@ _IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") -def _decompose_pose(pose: Any) -> tuple[float, ...] | None: - if pose is None: - return None - if hasattr(pose, "position"): - pos = pose.position - orient = getattr(pose, "orientation", None) - x, y, z = float(pos.x), float(pos.y), float(getattr(pos, "z", 0.0)) - if orient is not None: - return (x, y, z, float(orient.x), float(orient.y), float(orient.z), float(orient.w)) - return (x, y, z, 0.0, 0.0, 0.0, 1.0) - if isinstance(pose, (list, tuple)): - vals = [float(v) for v in pose] - while len(vals) < 7: - vals.append(0.0 if len(vals) < 6 else 1.0) - return tuple(vals[:7]) - return None - - def _reconstruct_pose( x: float | None, y: float | None, @@ -72,7 +54,7 @@ def _reconstruct_pose( qy: float | None, qz: float | None, qw: float | None, -) -> tuple[float, ...] | None: +) -> PoseTuple | None: if x is None: return None return (x, y or 0.0, z or 0.0, qx or 0.0, qy or 0.0, qz or 0.0, qw or 1.0) @@ -328,17 +310,17 @@ def _row_to_obs(self, row: tuple[Any, ...], *, has_blob: bool = False) -> Observ # Scalar data stored inline in value column if value is not None: - return Observation(id=row_id, ts=ts, pose=pose, tags=tags, _data=value) + return Observation(id=row_id, ts=ts, pose_tuple=pose, tags=tags, _data=value) if has_blob and blob_data is not None: assert self._codec is not None, "codec is required for data loading" data = self._codec.decode(blob_data) - return Observation(id=row_id, ts=ts, pose=pose, tags=tags, _data=data) + return Observation(id=row_id, ts=ts, pose_tuple=pose, tags=tags, _data=data) return Observation( id=row_id, ts=ts, - pose=pose, + pose_tuple=pose, tags=tags, _data=_UNLOADED, ) @@ -353,7 +335,8 @@ def _ensure_tag_indexes(self, tags: dict[str, Any]) -> None: self._tag_indexes.add(key) def insert(self, obs: Observation[T]) -> int: - pose = _decompose_pose(obs.pose) + # Observation already normalizes pose to the storage 7-tuple — skip _decompose_pose. + pose = obs.pose_tuple tags_json = json.dumps(obs.tags) if obs.tags else "{}" value = obs._data if isinstance(obs._data, (int, float)) else None diff --git a/dimos/memory2/test_save.py b/dimos/memory2/test_save.py index eba8905974..ec4d627fc3 100644 --- a/dimos/memory2/test_save.py +++ b/dimos/memory2/test_save.py @@ -87,7 +87,7 @@ def test_save_preserves_data(self) -> None: obs = target.first() assert obs.data == 42 assert obs.ts == 1.0 - assert obs.pose == (1, 2, 3) + assert obs.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) assert obs.tags == {"label": "cat"} def test_save_with_transform(self) -> None: diff --git a/dimos/memory2/test_stream.py b/dimos/memory2/test_stream.py index f0aaf70137..32cb542b83 100644 --- a/dimos/memory2/test_stream.py +++ b/dimos/memory2/test_stream.py @@ -485,7 +485,7 @@ def test_derive_preserves_metadata(self): derived = obs.derive(data="transformed") assert derived.id == 42 assert derived.ts == 1.5 - assert derived.pose == (1, 2, 3) + assert derived.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) assert derived.tags == {"k": "v"} assert derived.data == "transformed" diff --git a/dimos/memory2/transform.py b/dimos/memory2/transform.py index a79ccf36b8..328c1de647 100644 --- a/dimos/memory2/transform.py +++ b/dimos/memory2/transform.py @@ -179,12 +179,11 @@ def speed() -> FnIterTransformer[Any, float]: def _speed(upstream: Iterator[Observation[Any]]) -> Iterator[Observation[float]]: prev: Observation[Any] | None = None for obs in upstream: - if prev is not None and obs.pose is not None and prev.pose is not None: - dx = obs.pose[0] - prev.pose[0] - dy = obs.pose[1] - prev.pose[1] - dz = obs.pose[2] - prev.pose[2] + p = obs.pose + pp = prev.pose if prev is not None else None + if prev is not None and p is not None and pp is not None: dt = obs.ts - prev.ts - v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt if dt > 0 else 0.0 + v = (p.position - pp.position).length() / dt if dt > 0 else 0.0 yield obs.derive(data=v) prev = obs @@ -225,28 +224,17 @@ def __call__(self, upstream: Iterator[Observation[T]]) -> Iterator[Observation[T max_dps = self.max_dps max_rps = math.radians(max_dps) if max_dps is not None else None for obs in upstream: - if obs.pose is None: + p = obs.pose + if p is None: continue - if prev is not None and prev.pose is not None: + pp = prev.pose if prev is not None else None + if prev is not None and pp is not None: dt = obs.ts - prev.ts if dt > 0: - dx = obs.pose[0] - prev.pose[0] - dy = obs.pose[1] - prev.pose[1] - dz = obs.pose[2] - prev.pose[2] - v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt + v = (p.position - pp.position).length() / dt ok = v <= max_mps if ok and max_rps is not None: - # rotation angle between two unit quaternions. - # |q · q'| collapses the double-cover sign ambiguity; - # clamp guards against numerical drift past 1. - dot = ( - obs.pose[3] * prev.pose[3] - + obs.pose[4] * prev.pose[4] - + obs.pose[5] * prev.pose[5] - + obs.pose[6] * prev.pose[6] - ) - angle = 2.0 * math.acos(min(1.0, abs(dot))) - ok = (angle / dt) <= max_rps + ok = (p.orientation.angle_to(pp.orientation) / dt) <= max_rps if ok: yield obs prev = obs diff --git a/dimos/memory2/type/filter.py b/dimos/memory2/type/filter.py index af453498fd..ddc0f86eb5 100644 --- a/dimos/memory2/type/filter.py +++ b/dimos/memory2/type/filter.py @@ -79,17 +79,15 @@ class NearFilter(Filter): radius: float = 0.0 def matches(self, obs: Observation[Any]) -> bool: - if obs.pose is None or self.pose is None: + p2 = obs.pose_tuple + if p2 is None or self.pose is None: return False p1 = self.pose - p2 = obs.pose - # Support both raw (x,y,z) tuples and PoseStamped objects + # NearFilter.pose accepts raw (x,y,z) tuples or PoseStamped-like objects. if hasattr(p1, "position"): p1 = p1.position - if hasattr(p2, "position"): - p2 = p2.position x1, y1, z1 = _xyz(p1) - x2, y2, z2 = _xyz(p2) + x2, y2, z2 = p2[0], p2[1], p2[2] dist_sq = (x1 - x2) ** 2 + (y1 - y2) ** 2 + (z1 - z2) ** 2 return dist_sq <= self.radius**2 diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index 179b48744e..038d05f161 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -14,7 +14,7 @@ from __future__ import annotations -from dataclasses import dataclass, field, fields +from dataclasses import dataclass, fields import sys import threading from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast @@ -28,11 +28,14 @@ from collections.abc import Callable from dimos.models.embedding.base import Embedding + from dimos.msgs.geometry_msgs.Pose import Pose from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped T = TypeVar("T") R = TypeVar("R") +PoseTuple = tuple[float, float, float, float, float, float, float] + class _Unloaded: """Sentinel indicating data has not been loaded yet.""" @@ -46,18 +49,33 @@ def __repr__(self) -> str: _UNLOADED = _Unloaded() -def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, float] | None: +def _to_tuple(p: Any) -> PoseTuple | None: """Coerce common pose shapes to the storage 7-tuple `(x, y, z, qx, qy, qz, qw)`. - Accepts: `None`, an already-normalized tuple, or any object with + Accepts: `None`, a 3- or 7-tuple, or any object with `translation`+`rotation` (Transform) or `position`+`orientation` - (Pose / PoseStamped) attributes. Duck-typed to avoid importing - `dimos.msgs.geometry_msgs` at module load. + (Pose / PoseStamped) attributes. 3-tuples are padded with the identity + quaternion. Duck-typed to avoid importing `dimos.msgs.geometry_msgs` + at module load. """ - if p is None or isinstance(p, tuple): - return p - trans = getattr(p, "translation", None) or getattr(p, "position", None) - rot = getattr(p, "rotation", None) or getattr(p, "orientation", None) + if p is None: + return None + if isinstance(p, tuple): + if len(p) == 7: + return cast("PoseTuple", tuple(float(v) for v in p)) + if len(p) == 3: + x, y, z = p + return (float(x), float(y), float(z), 0.0, 0.0, 0.0, 1.0) + raise TypeError(f"Pose tuple must have length 3 or 7, got {len(p)}") + # Use ``is not None`` rather than ``or`` — a Vector3 at the origin is + # falsy but valid, and falling through to ``position`` on a Transform + # would crash. + trans = getattr(p, "translation", None) + if trans is None: + trans = getattr(p, "position", None) + rot = getattr(p, "rotation", None) + if rot is None: + rot = getattr(p, "orientation", None) if trans is None or rot is None: raise TypeError( f"Cannot coerce {type(p).__name__} to a pose tuple — expected " @@ -74,36 +92,70 @@ def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, f ) -@dataclass +@dataclass(init=False) class Observation(Generic[T]): """A single timestamped observation with optional spatial pose and metadata. - `pose` is stored as a 7-tuple `(x, y, z, qx, qy, qz, qw)`. Passing a - `Transform`, `Pose`, or `PoseStamped` to the constructor or to `derive` - is normalized to the 7-tuple form automatically. + Pose is stored internally as a 7-tuple ``(x, y, z, qx, qy, qz, qw)``. + Use :attr:`pose` for a typed :class:`Pose` object (lazy), or + :attr:`pose_tuple` for direct field access in hot loops. """ id: int ts: float - data_type: type = object - pose: Any | None = None - tags: dict[str, Any] = field(default_factory=dict) - _data: T | _Unloaded = field(default=_UNLOADED, repr=False) - _loader: Callable[[], T] | None = field(default=None, repr=False) - _data_lock: threading.Lock = field(default_factory=threading.Lock, repr=False) + data_type: type + pose_tuple: PoseTuple | None + tags: dict[str, Any] + _data: T | _Unloaded + _loader: Callable[[], T] | None + _data_lock: threading.Lock + + def __init__( + self, + id: int = 0, + ts: float = 0.0, + *, + data_type: type = object, + pose: Any | None = None, + pose_tuple: PoseTuple | None = None, + tags: dict[str, Any] | None = None, + _data: T | _Unloaded = _UNLOADED, + _loader: Callable[[], T] | None = None, + _data_lock: threading.Lock | None = None, + ) -> None: + self.id = id + self.ts = ts + self.data_type = data_type + # `pose` wins if explicitly supplied: derive()/tag() always re-pass + # the current `pose_tuple` from fields(), so an override needs + # to take precedence over it. + self.pose_tuple = _to_tuple(pose) if pose is not None else pose_tuple + self.tags = tags if tags is not None else {} + self._data = _data + self._loader = _loader + self._data_lock = _data_lock if _data_lock is not None else threading.Lock() + + @property + def pose(self) -> Pose | None: + """Typed :class:`Pose` (or None). Allocates per access — read :attr:`pose_tuple` in hot loops.""" + if self.pose_tuple is None: + return None + from dimos.msgs.geometry_msgs.Pose import Pose - def __post_init__(self) -> None: - self.pose = _normalize_pose(self.pose) + return Pose(*self.pose_tuple) @property - def pose_stamped(self) -> PoseStamped: + def pose_stamped(self) -> PoseStamped | None: + """Typed :class:`PoseStamped` (or None) carrying this observation's ts.""" + if self.pose_tuple is None: + return None from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped - if self.pose is None: - raise LookupError("No pose set on this observation") - x, y, z, qx, qy, qz, qw = self.pose - ps: PoseStamped = PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw)) - return ps + x, y, z, qx, qy, qz, qw = self.pose_tuple + return cast( + "PoseStamped", + PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw)), + ) @property def data(self) -> T: @@ -150,9 +202,38 @@ def tag(self, **tags: Any) -> Self: return type(self)(**kwargs) -@dataclass +@dataclass(init=False) class EmbeddedObservation(Observation[T]): """Observation enriched with a vector embedding and optional similarity score.""" - embedding: Embedding | None = None - similarity: float | None = None + embedding: Embedding | None + similarity: float | None + + def __init__( + self, + id: int = 0, + ts: float = 0.0, + *, + data_type: type = object, + pose: Any | None = None, + pose_tuple: PoseTuple | None = None, + tags: dict[str, Any] | None = None, + embedding: Embedding | None = None, + similarity: float | None = None, + _data: T | _Unloaded = _UNLOADED, + _loader: Callable[[], T] | None = None, + _data_lock: threading.Lock | None = None, + ) -> None: + super().__init__( + id=id, + ts=ts, + data_type=data_type, + pose=pose, + pose_tuple=pose_tuple, + tags=tags, + _data=_data, + _loader=_loader, + _data_lock=_data_lock, + ) + self.embedding = embedding + self.similarity = similarity diff --git a/dimos/memory2/vis/space/rerun.py b/dimos/memory2/vis/space/rerun.py index 435097e77c..1929ecef71 100644 --- a/dimos/memory2/vis/space/rerun.py +++ b/dimos/memory2/vis/space/rerun.py @@ -238,10 +238,13 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: for i, obs in enumerate(observations): path = f"scene/observations/{i}" data = obs.data + ps = obs.pose_stamped + if ps is None: + continue img = _as_image(data) if img is not None: # Apply base→optical extrinsics for camera frustum rendering - world_T_optical = Transform.from_pose("world", obs.pose_stamped) + _BASE_TO_OPTICAL + world_T_optical = Transform.from_pose("world", ps) + _BASE_TO_OPTICAL rr.log(path, world_T_optical.to_pose().to_rerun(), static=True) h, w = img.shape[:2] focal = max(w, h) @@ -257,13 +260,13 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: ) rr.log(f"{path}/image", img.to_rerun(), static=True) elif isinstance(data, PointCloud2): - rr.log(path, obs.pose_stamped.to_rerun(), static=True) + rr.log(path, ps.to_rerun(), static=True) rr.log(f"{path}/pointcloud", data.to_rerun(), static=True) elif isinstance(data, (int, float)): rr.log( path, rr.Points3D( - positions=[[obs.pose_stamped.x, obs.pose_stamped.y, 0]], + positions=[[ps.x, ps.y, 0]], labels=[str(data)], radii=[0.025], ), @@ -283,7 +286,7 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: if line: lines.append(line) label = "\n".join(lines) - x, y = obs.pose_stamped.x, obs.pose_stamped.y + x, y = ps.x, ps.y # Pin: line from ground up, label at the tip rr.log( f"{path}/pin", @@ -306,7 +309,7 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: else: rr.log( path, - rr.Points3D(positions=[[obs.pose_stamped.x, obs.pose_stamped.y, 0]], radii=[0.05]), + rr.Points3D(positions=[[ps.x, ps.y, 0]], radii=[0.05]), static=True, ) diff --git a/dimos/memory2/vis/space/space.py b/dimos/memory2/vis/space/space.py index 4ce973b133..cf7d8be0b3 100644 --- a/dimos/memory2/vis/space/space.py +++ b/dimos/memory2/vis/space/space.py @@ -96,7 +96,10 @@ def add(self, element: Any, **kwargs: Any) -> Space: for item in element: v = _autocolor_value(item) if v is not None and isinstance(item, Observation): - self._elements.append(Arrow(msg=item.pose_stamped, color=color_range(v))) + ps = item.pose_stamped + if ps is None: + continue + self._elements.append(Arrow(msg=ps, color=color_range(v))) else: self.add(item, **kwargs) else: diff --git a/dimos/memory2/vis/space/svg.py b/dimos/memory2/vis/space/svg.py index e54da044da..f95119973b 100644 --- a/dimos/memory2/vis/space/svg.py +++ b/dimos/memory2/vis/space/svg.py @@ -291,12 +291,13 @@ def _render_element(el: SpaceElement, b: Bounds) -> str: return _render_occupancy_grid(simple_inflate(height_cost_occupancy(el), 0.05), b) elif isinstance(el, Observation): - if el.pose is None: + ps = el.pose_stamped + if ps is None: return "" if el.data_type == float: - return _render_arrow(Arrow(msg=el.pose_stamped, color="#ff0000"), b) + return _render_arrow(Arrow(msg=ps, color="#ff0000"), b) else: - return _render_arrow(Arrow(msg=el.pose_stamped), b) + return _render_arrow(Arrow(msg=ps), b) else: return f"" diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py index e3d64d4bab..42e4dc730d 100644 --- a/dimos/msgs/geometry_msgs/Quaternion.py +++ b/dimos/msgs/geometry_msgs/Quaternion.py @@ -16,6 +16,7 @@ from collections.abc import Sequence from io import BytesIO +import math import struct from typing import TYPE_CHECKING, BinaryIO, TypeAlias @@ -221,6 +222,18 @@ def __mul__(self, other: Quaternion) -> Quaternion: return Quaternion(x, y, z, w) + def dot(self, other: Quaternion) -> float: + """4D inner product. ``|q.dot(q')| == 1`` iff same orientation (up to sign).""" + return float(self.x * other.x + self.y * other.y + self.z * other.z + self.w * other.w) + + def angle_to(self, other: Quaternion) -> float: + """Smallest rotation angle (radians) between two unit quaternions. + + ``abs(self.dot(other))`` collapses the double-cover sign ambiguity; + the ``min(1.0, ...)`` clamps against numerical drift past 1. + """ + return 2.0 * math.acos(min(1.0, abs(self.dot(other)))) + def conjugate(self) -> Quaternion: """Return the conjugate of the quaternion. diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index add056b1a3..7617f09e58 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -730,7 +730,7 @@ def to_rerun( if mode == "points": return rr.Points3D( - positions=points, colors=point_colors, class_ids=class_ids, sizes=voxel_size + positions=points, colors=point_colors, class_ids=class_ids, radii=voxel_size / 2 ) elif mode == "boxes": half = voxel_size / 2 diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 5030c4fb05..b84d6e8dad 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -140,16 +140,13 @@ def main( # so it stays cheap (no pointcloud loading). seen: dict[tuple[int, int, int], Observation[Any]] = {} for obs in lidar: - if obs.pose is None: + p = obs.pose_tuple + if p is None: continue # Reject placeholder poses at the world origin. - if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + if p[0] == 0 and p[1] == 0 and p[2] == 0: continue - cell = ( - int(obs.pose[0] / pgo_tol), - int(obs.pose[1] / pgo_tol), - int(obs.pose[2] / pgo_tol), - ) + cell = (int(p[0] / pgo_tol), int(p[1] / pgo_tol), int(p[2] / pgo_tol)) seen[cell] = obs n_kept = len(seen) @@ -159,7 +156,7 @@ def main( # Dict insertion order = lidar iteration order = chronological. # `seen` only contains entries with non-None poses (filtered above). path: list[tuple[float, float, float]] = [ - (obs.pose[0], obs.pose[1], obs.pose[2]) for obs in seen.values() if obs.pose is not None + (p[0], p[1], p[2]) for obs in seen.values() if (p := obs.pose_tuple) is not None ] pgo_map = None @@ -209,7 +206,7 @@ def main( try: for obs in lidar: full_pb(obs) - if obs.pose is None or len(obs.data) == 0: + if obs.pose_tuple is None or len(obs.data) == 0: continue full_grid.add_frame(obs.data.transform(interp(obs.ts))) full_pgo_map = full_grid.get_global_pointcloud2() From ea2c9f4b05f0edbd58a698e6ecd23abe0c0266d4 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:11:25 +0800 Subject: [PATCH 27/64] pgo matrices refactor --- dimos/mapping/loop_closure/pgo.py | 51 ++++++++++---------------- dimos/mapping/loop_closure/test_pgo.py | 44 ++++++++-------------- dimos/msgs/geometry_msgs/Quaternion.py | 8 ++++ dimos/msgs/geometry_msgs/Transform.py | 19 ++++++++++ 4 files changed, 63 insertions(+), 59 deletions(-) diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py index 84a8a0fa24..de6c56547a 100644 --- a/dimos/mapping/loop_closure/pgo.py +++ b/dimos/mapping/loop_closure/pgo.py @@ -207,15 +207,15 @@ def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: """Materialize corrections once; return a fast ts -> Transform lookup.""" ts_list: list[float] = [] - R_list: list[np.ndarray] = [] + quat_list: list[np.ndarray] = [] t_list: list[np.ndarray] = [] for obs in corrections: - R, t = _r_t_from_transform(obs.data) + tf = obs.data # obs.ts is authoritative; obs.data.ts can be mutated by Transform's # ts=0.0 -> time.time() fallback in its constructor. ts_list.append(obs.ts) - R_list.append(R) - t_list.append(t) + quat_list.append(tf.rotation.to_numpy()) + t_list.append(tf.translation.to_numpy()) if not ts_list: raise ValueError("empty corrections stream") @@ -224,13 +224,12 @@ def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Trans # general path handles it; clip-to-endpoints behavior is unchanged. if len(ts_list) == 1: ts_list.append(ts_list[0] + 1e-6) - R_list.append(R_list[0]) + quat_list.append(quat_list[0]) t_list.append(t_list[0]) ts_arr = np.array(ts_list) - R_stack = np.stack(R_list) t_stack = np.stack(t_list) - slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) + slerp = Slerp(ts_arr, Rotation.from_quat(np.stack(quat_list))) def interp(ts: float) -> Transform: ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) @@ -244,7 +243,13 @@ def interp(ts: float) -> Transform: t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] - return _transform_from_r_t(R, t, ts=float(ts)) + return Transform( + translation=Vector3(t), + rotation=Quaternion.from_rotation_matrix(R), + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ts=float(ts), + ) return interp @@ -268,30 +273,12 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: continue raw_tf = Transform.from_pose(FRAME_BODY, ps) # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. - # Observation normalizes Transform back to 7-tuple via __post_init__. corrected = interp(obs.ts) + raw_tf yield obs.derive(data=obs.data, pose=corrected) return stream.transform(xf) -def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: - q = tf.rotation - R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() - t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - return R, t - - -def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform: - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(R), - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_WORLD_RAW, - ts=ts, - ) - - def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" import gtsam # type: ignore[import-not-found,import-untyped] @@ -669,9 +656,11 @@ def _icp( if float(result.fitness) == 0.0: return Transform.identity(), float("inf") - T_mat = result.transformation.numpy() - rmse = float(result.inlier_rmse) - return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse - T_mat = result.transformation.numpy() + tf = Transform.from_matrix( + result.transformation.numpy(), + ts=source.ts, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ) rmse = float(result.inlier_rmse) - return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse + return tf, rmse * rmse diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py index 0042c19b2c..61a744c893 100644 --- a/dimos/mapping/loop_closure/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -25,8 +25,6 @@ PGOConfig, _obs_to_pose3, _pose3_to_transform, - _r_t_from_transform, - _transform_from_r_t, apply_corrections, keyframes_to_corrections, make_interpolator, @@ -67,16 +65,6 @@ def test_rejects_unknown_fields(self) -> None: class TestTransformHelpers: - def test_r_t_transform_roundtrip(self) -> None: - rng = np.random.default_rng(0) - R_in = _random_R(rng) - t_in = rng.uniform(-5, 5, size=3) - tf = _transform_from_r_t(R_in, t_in, ts=1.23) - R_out, t_out = _r_t_from_transform(tf) - np.testing.assert_allclose(R_out, R_in, atol=1e-10) - np.testing.assert_allclose(t_out, t_in, atol=1e-10) - assert tf.ts == 1.23 - def test_observation_normalizes_transform_pose(self) -> None: """Constructing/deriving with pose=Transform should coerce to 7-tuple.""" from dimos.memory2.type.observation import Observation @@ -109,7 +97,7 @@ def test_obs_to_pose3_roundtrip(self) -> None: rng = np.random.default_rng(4) R = _random_R(rng) t = rng.uniform(-3, 3, size=3) - tf = _transform_from_r_t(R, t, ts=1.0) + tf = Transform(translation=Vector3(t), rotation=Quaternion.from_rotation_matrix(R), ts=1.0) obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) p = _obs_to_pose3(obs) np.testing.assert_allclose(p.rotation().matrix(), R, atol=1e-9) @@ -123,9 +111,8 @@ def test_pose3_to_transform(self) -> None: t = rng.uniform(-3, 3, size=3) p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t)) tf = _pose3_to_transform(p, ts=7.89) - R_out, t_out = _r_t_from_transform(tf) - np.testing.assert_allclose(R_out, R, atol=1e-10) - np.testing.assert_allclose(t_out, t, atol=1e-10) + np.testing.assert_allclose(tf.rotation.to_rotation_matrix(), R, atol=1e-10) + np.testing.assert_allclose(tf.translation.to_numpy(), t, atol=1e-10) def test_pose3_to_transform_with_frames(self) -> None: import gtsam @@ -137,9 +124,8 @@ def test_pose3_to_transform_with_frames(self) -> None: tf = _pose3_to_transform(p, ts=1.0, frame_id="world_corrected", child_frame_id="body") assert tf.frame_id == "world_corrected" assert tf.child_frame_id == "body" - R_out, t_out = _r_t_from_transform(tf) - np.testing.assert_allclose(R_out, R, atol=1e-10) - np.testing.assert_allclose(t_out, t, atol=1e-10) + np.testing.assert_allclose(tf.rotation.to_rotation_matrix(), R, atol=1e-10) + np.testing.assert_allclose(tf.translation.to_numpy(), t, atol=1e-10) def _make_lidar_stream(n_frames: int = 12, points_per_frame: int = 500) -> Stream[PointCloud2]: @@ -213,8 +199,11 @@ def test_empty_stream_raises(self) -> None: def test_single_keyframe_returns_constant(self) -> None: R = Rotation.from_euler("z", np.pi / 4).as_matrix() - t = np.array([1.0, 2.0, 3.0]) - only = _transform_from_r_t(R, t, ts=10.0) + only = Transform( + translation=Vector3(1.0, 2.0, 3.0), + rotation=Quaternion.from_rotation_matrix(R), + ts=10.0, + ) interp = make_interpolator(_make_corrections([only])) for query_ts in (0.0, 10.0, 100.0): out = interp(query_ts) @@ -225,9 +214,8 @@ def test_single_keyframe_returns_constant(self) -> None: def test_out_of_range_clips_to_endpoints(self) -> None: # Note: Transform's constructor maps ts=0.0 -> time.time(); use ts>0 # so test timestamps are deterministic. - R = np.eye(3) - a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) - b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) + a = Transform(translation=Vector3(0.0, 0.0, 0.0), ts=1.0) + b = Transform(translation=Vector3(10.0, 0.0, 0.0), ts=11.0) # _make_corrections uses tf.ts; obs.ts ends up the same. mem = MemoryStore() stream: Stream[Transform] = mem.stream("corrections", Transform) @@ -257,8 +245,8 @@ def test_pure_translation_shifts_poses(self) -> None: ) # Constant correction: translate by (5, 0, 0), identity rotation. - c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) - c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=3.0) + c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0) + c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=3.0) mem2 = MemoryStore() corr: Stream[Transform] = mem2.stream("corrections", Transform) corr.append(c_a, ts=1.0) @@ -280,8 +268,8 @@ def test_passes_through_pose_none(self) -> None: ts=1.0, pose=None, ) - c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) - c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=2.0) + c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0) + c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=2.0) mem2 = MemoryStore() corr: Stream[Transform] = mem2.stream("corrections", Transform) corr.append(c_a, ts=1.0) diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py index 42e4dc730d..e98e7bcc19 100644 --- a/dimos/msgs/geometry_msgs/Quaternion.py +++ b/dimos/msgs/geometry_msgs/Quaternion.py @@ -164,6 +164,14 @@ def from_rotation_matrix(cls, matrix: np.ndarray) -> Quaternion: quat = rotation.as_quat() # Returns [x, y, z, w] return cls(quat[0], quat[1], quat[2], quat[3]) + def to_rotation_matrix(self) -> np.ndarray: + """Convert quaternion to a 3x3 rotation matrix.""" + from scipy.spatial.transform import ( + Rotation, # ~330ms: deferred to avoid startup cost + ) + + return np.asarray(Rotation.from_quat([self.x, self.y, self.z, self.w]).as_matrix()) + def to_euler(self) -> Vector3: """Convert quaternion to Euler angles (roll, pitch, yaw) in radians. diff --git a/dimos/msgs/geometry_msgs/Transform.py b/dimos/msgs/geometry_msgs/Transform.py index 9b08c8dadd..44bd6b2440 100644 --- a/dimos/msgs/geometry_msgs/Transform.py +++ b/dimos/msgs/geometry_msgs/Transform.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, BinaryIO if TYPE_CHECKING: + import numpy as np import rerun as rr from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped @@ -216,6 +217,24 @@ def to_pose(self, **kwargs: object) -> PoseStamped: ) return result + @classmethod + def from_matrix( + cls, + matrix: np.ndarray, + *, + ts: float = 0.0, + frame_id: str = "world", + child_frame_id: str = "unset", + ) -> Transform: + """Build a Transform from a 4x4 homogeneous transformation matrix.""" + return cls( + translation=Vector3(matrix[:3, 3]), + rotation=Quaternion.from_rotation_matrix(matrix[:3, :3]), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + def to_matrix(self) -> np.ndarray: # type: ignore[name-defined] """Convert Transform to a 4x4 transformation matrix. From 6e7825f7e182575817905cb9d9080a69cf4b16bd Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:25:48 +0800 Subject: [PATCH 28/64] pc2 fix --- dimos/msgs/sensor_msgs/PointCloud2.py | 4 ++++ dimos/msgs/sensor_msgs/test_PointCloud2.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 7617f09e58..d31c20d43c 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -373,6 +373,10 @@ def transform(self, tf: Transform) -> PointCloud2: new_pcd = o3d.geometry.PointCloud() new_pcd.points = o3d.utility.Vector3dVector(transformed_xyz) + # Colors are frame-independent — carry them through. + if self.pointcloud.has_colors(): + new_pcd.colors = self.pointcloud.colors + return PointCloud2( pointcloud=new_pcd, frame_id=tf.frame_id, diff --git a/dimos/msgs/sensor_msgs/test_PointCloud2.py b/dimos/msgs/sensor_msgs/test_PointCloud2.py index 56ae622a8d..797e266ed3 100644 --- a/dimos/msgs/sensor_msgs/test_PointCloud2.py +++ b/dimos/msgs/sensor_msgs/test_PointCloud2.py @@ -32,8 +32,8 @@ def test_lcm_encode_decode() -> None: decoded = PointCloud2.lcm_decode(binary_msg) # 1. Check number of points - original_points, _ = lidar_msg.as_numpy() - decoded_points, _ = decoded.as_numpy() + original_points = lidar_msg.as_numpy() + decoded_points = decoded.as_numpy() assert len(original_points) == len(decoded_points), ( f"Point count mismatch: {len(original_points)} vs {len(decoded_points)}" From 9ac43a249bc75056043f06fa4aece047b482c532 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:35:20 +0800 Subject: [PATCH 29/64] map.py: use VoxelMapTransformer for all three accumulations Replaces the three hand-rolled VoxelGrid loops (pgo pass 2, full_pgo, raw) with a small _accumulate() helper that drives VoxelMapTransformer in batch mode. Same behavior, less repetition, and disposal is managed by the transformer. --- dimos/utils/cli/map.py | 104 +++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index b84d6e8dad..8f0e65e9f0 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections.abc import Callable +from collections.abc import Callable, Iterable import time from typing import Any @@ -20,7 +20,7 @@ import rerun.blueprint as rrb import typer -from dimos.mapping.voxels import VoxelGrid +from dimos.mapping.voxels import VoxelMapTransformer from dimos.memory2.store.sqlite import SqliteStore from dimos.memory2.stream import Stream from dimos.memory2.transform import QualityWindow, SpeedLimit @@ -28,6 +28,7 @@ from dimos.memory2.vis.color import Color from dimos.msgs.geometry_msgs.Transform import Transform from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.perception.fiducial.marker_transformer import DetectMarkers from dimos.robot.unitree.go2.connection import _camera_info_static from dimos.utils.data import resolve_named_path @@ -36,6 +37,44 @@ PATH_THICKNESS = 0.01 +def _accumulate( + obs_iter: Iterable[Observation[PointCloud2]], + *, + voxel: float, + block_count: int, + device: str, + interp: Callable[[float], Transform] | None = None, + progress_cb: Callable[[Observation[Any]], None] | None = None, +) -> PointCloud2 | None: + """Accumulate a voxel map from `obs_iter`, optionally PGO-correcting each frame. + + Returns the final ``PointCloud2`` (or ``None`` if the input was empty). + Disposal of the underlying ``VoxelGrid`` is handled by ``VoxelMapTransformer``. + """ + + def prepared() -> Iterable[Observation[PointCloud2]]: + for obs in obs_iter: + if progress_cb is not None: + progress_cb(obs) + if len(obs.data) == 0: + continue + if interp is not None: + if obs.pose_tuple is None: + continue + yield obs.derive(data=obs.data.transform(interp(obs.ts))) + else: + yield obs + + vmt = VoxelMapTransformer( + emit_every=0, # batch mode: emit once on exhaustion + voxel_size=voxel, + block_count=block_count, + device=device, + ) + result = next(iter(vmt(iter(prepared()))), None) + return result.data if result is not None else None + + def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: seen = 0 wall_start: float | None = None @@ -186,45 +225,35 @@ def main( kf_t = kf_obs.data.optimized.translation pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) - pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)") - grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) - try: - for obs in seen.values(): - pass2_pb(obs) - if len(obs.data) == 0: - continue - grid.add_frame(obs.data.transform(interp(obs.ts))) - pgo_map = grid.get_global_pointcloud2() - finally: - grid.dispose() + pgo_map = _accumulate( + seen.values(), + voxel=voxel, + block_count=block_count, + device=device, + interp=interp, + progress_cb=progress(n_kept, "pgo pass 2 (rebuilding)"), + ) full_pgo_map = None if full_pgo: assert interp is not None - full_pb = progress(total, "full pgo (rebuilding)") - full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) - try: - for obs in lidar: - full_pb(obs) - if obs.pose_tuple is None or len(obs.data) == 0: - continue - full_grid.add_frame(obs.data.transform(interp(obs.ts))) - full_pgo_map = full_grid.get_global_pointcloud2() - finally: - full_grid.dispose() + full_pgo_map = _accumulate( + lidar, + voxel=voxel, + block_count=block_count, + device=device, + interp=interp, + progress_cb=progress(total, "full pgo (rebuilding)"), + ) # Raw map: same dedup'd frames, no PGO correction. - raw_pb = progress(n_kept, "reconstructing global map") - raw_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) - try: - for obs in seen.values(): - raw_pb(obs) - if len(obs.data) == 0: - continue - raw_grid.add_frame(obs.data) - global_map = raw_grid.get_global_pointcloud2() - finally: - raw_grid.dispose() + global_map = _accumulate( + seen.values(), + voxel=voxel, + block_count=block_count, + device=device, + progress_cb=progress(n_kept, "reconstructing global map"), + ) marker_dets: list[Observation[Any]] = [] if markers: @@ -270,7 +299,10 @@ def main( if not no_gui: rerun_init("dimos map tool", spawn=True) rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True) + if global_map is not None: + rr.log( + "world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True + ) if path: rr.log( "world/raw_map/path", From ab687a239ac0c7081398ffcfdf529e167db120eb Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:47:43 +0800 Subject: [PATCH 30/64] mapper bugfix --- .../perception/fiducial/marker_transformer.py | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index e6c3d3c02f..fa114b8b8b 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -58,23 +58,6 @@ logger = setup_logger() -def _pose_tuple_to_transform( - pose: tuple[float, float, float, float, float, float, float], - *, - frame_id: str, - child_frame_id: str, - ts: float, -) -> Transform: - x, y, z, qx, qy, qz, qw = pose - return Transform( - translation=Vector3(x, y, z), - rotation=Quaternion(qx, qy, qz, qw), - frame_id=frame_id, - child_frame_id=child_frame_id, - ts=ts, - ) - - def _average_marker_pose( buffer: TimestampedBufferCollection[Detection3DMarker], ) -> tuple[Vector3, Quaternion]: @@ -146,7 +129,8 @@ def __call__( marker_size = Vector3(self.marker_length_m, self.marker_length_m, 0.0) for obs in upstream: - if obs.pose is None: + pose = obs.pose + if pose is None: logger.debug("DetectMarkers: obs %s has no .pose; skipping", obs.id) continue @@ -170,8 +154,9 @@ def __call__( if ids is None or len(ids) == 0: continue - t_world_optical = _pose_tuple_to_transform( - obs.pose, + t_world_optical = Transform( + translation=pose.position, + rotation=pose.orientation, frame_id=self.world_frame, child_frame_id="optical", ts=obs.ts, From fcd53f1cde6a857a54793a7ccc62eb1dcad65f43 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:59:57 +0800 Subject: [PATCH 31/64] fix mypy: rename shadowed `pose` in DetectMarkers The outer `pose = obs.pose` (Pose | None) was being shadowed by `pose = estimate_marker_pose(...)` (tuple[rvec, tvec] | None), which mypy flags as an incompatible reassignment. Renamed the inner one to `marker_pose`. --- dimos/perception/fiducial/marker_transformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index fa114b8b8b..36fc2bd6bc 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -164,16 +164,16 @@ def __call__( for corner_set, mid_arr in zip(corners, ids, strict=True): mid = int(mid_arr[0]) - pose = estimate_marker_pose( + marker_pose = estimate_marker_pose( corner_set, self.marker_length_m, self._cam_mtx, self._dist, distortion_model=info.distortion_model, ) - if pose is None: + if marker_pose is None: continue - rvec, tvec = pose + rvec, tvec = marker_pose t_optical_marker = rvec_tvec_to_transform( rvec, tvec, From 3aaaeb98d7633fad101acb455548c6507d902675 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 17:06:12 +0800 Subject: [PATCH 32/64] NearFilter: store position as Vector3, drop _xyz extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NearFilter now holds a typed `Vector3` instead of an `Any` named `pose`. `Stream.near()` does the one-time normalization at construction (Pose/ PoseStamped → .position, then Vector3(...) which accepts tuple/ndarray/ Vector3). `matches()` uses Vector3 subtraction + length_squared(); the sqlite filter compiler reads f.position.x/.y/.z directly. The _xyz() helper is gone. --- dimos/memory2/observationstore/sqlite.py | 8 +------- dimos/memory2/stream.py | 7 ++++++- dimos/memory2/type/filter.py | 25 +++++++----------------- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py index e82e867dc0..3dd9a34948 100644 --- a/dimos/memory2/observationstore/sqlite.py +++ b/dimos/memory2/observationstore/sqlite.py @@ -31,7 +31,6 @@ NearFilter, TagsFilter, TimeRangeFilter, - _xyz, ) from dimos.memory2.type.observation import _UNLOADED, Observation, PoseTuple from dimos.memory2.utils.sqlite import open_disposable_sqlite_connection @@ -84,12 +83,7 @@ def _compile_filter(f: Filter, stream: str, prefix: str = "") -> tuple[str, list params.append(v) return (" AND ".join(clauses), params) if isinstance(f, NearFilter): - pose = f.pose - if pose is None: - return None - if hasattr(pose, "position"): - pose = pose.position - cx, cy, cz = _xyz(pose) + cx, cy, cz = f.position.x, f.position.y, f.position.z r = f.radius # R*Tree bounding-box pre-filter + exact squared-distance check rtree_sql = ( diff --git a/dimos/memory2/stream.py b/dimos/memory2/stream.py index 3a92f7708f..8f0a767aa7 100644 --- a/dimos/memory2/stream.py +++ b/dimos/memory2/stream.py @@ -38,6 +38,7 @@ TimeRangeFilter, ) from dimos.memory2.type.observation import EmbeddedObservation, Observation +from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: @@ -177,7 +178,11 @@ def at_relative(self, t: float, tolerance: float = 1.0) -> Stream[T, O]: return self.at(t0 + t, tolerance=tolerance) def near(self, pose: Any, radius: float) -> Stream[T, O]: - return self._with_filter(NearFilter(pose, radius)) + # Accept Pose/PoseStamped (any object with `.position`), Vector3, + # numpy arrays, or (x, y, z) tuples — Vector3() handles the rest. + if hasattr(pose, "position"): + pose = pose.position + return self._with_filter(NearFilter(Vector3(pose), radius)) def tags(self, **tags: Any) -> Stream[T, O]: return self._with_filter(TagsFilter(tags)) diff --git a/dimos/memory2/type/filter.py b/dimos/memory2/type/filter.py index ddc0f86eb5..1250c4c31f 100644 --- a/dimos/memory2/type/filter.py +++ b/dimos/memory2/type/filter.py @@ -19,6 +19,8 @@ from itertools import islice from typing import TYPE_CHECKING, Any +from dimos.msgs.geometry_msgs.Vector3 import Vector3 + if TYPE_CHECKING: from collections.abc import Callable, Iterator @@ -75,28 +77,15 @@ def matches(self, obs: Observation[Any]) -> bool: @dataclass(frozen=True) class NearFilter(Filter): - pose: Any = field(hash=False) + position: Vector3 = field(hash=False) radius: float = 0.0 def matches(self, obs: Observation[Any]) -> bool: - p2 = obs.pose_tuple - if p2 is None or self.pose is None: + p = obs.pose_tuple + if p is None: return False - p1 = self.pose - # NearFilter.pose accepts raw (x,y,z) tuples or PoseStamped-like objects. - if hasattr(p1, "position"): - p1 = p1.position - x1, y1, z1 = _xyz(p1) - x2, y2, z2 = p2[0], p2[1], p2[2] - dist_sq = (x1 - x2) ** 2 + (y1 - y2) ** 2 + (z1 - z2) ** 2 - return dist_sq <= self.radius**2 - - -def _xyz(p: Any) -> tuple[float, float, float]: - """Extract (x, y, z) from various pose representations.""" - if isinstance(p, (list, tuple)): - return (float(p[0]), float(p[1]), float(p[2]) if len(p) > 2 else 0.0) - return (float(p.x), float(p.y), float(getattr(p, "z", 0.0))) + delta = self.position - Vector3(p[0], p[1], p[2]) + return delta.length_squared() <= self.radius * self.radius @dataclass(frozen=True) From 18797efda140755f9307abbf0023b56772fc3af7 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 19:02:12 +0800 Subject: [PATCH 33/64] bugfix --- dimos/memory2/observationstore/sqlite.py | 4 +++- dimos/utils/cli/map.py | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py index 3dd9a34948..88128e3c1a 100644 --- a/dimos/memory2/observationstore/sqlite.py +++ b/dimos/memory2/observationstore/sqlite.py @@ -56,7 +56,9 @@ def _reconstruct_pose( ) -> PoseTuple | None: if x is None: return None - return (x, y or 0.0, z or 0.0, qx or 0.0, qy or 0.0, qz or 0.0, qw or 1.0) + assert y is not None and z is not None + assert qx is not None and qy is not None and qz is not None and qw is not None + return (x, y, z, qx, qy, qz, qw) def _compile_filter(f: Filter, stream: str, prefix: str = "") -> tuple[str, list[Any]] | None: diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 8f0e65e9f0..af19300549 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -508,6 +508,3 @@ def main( if __name__ == "__main__": typer.run(main) - typer.run(main) - typer.run(main) - typer.run(main) From cc652122a0ee79c85243169e92f886c318da3a40 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 21:08:11 +0800 Subject: [PATCH 34/64] cleanup --- dimos/mapping/loop_closure/eval.py | 3 +- dimos/mapping/loop_closure/pgo.py | 29 +++++++++---------- dimos/mapping/loop_closure/test_pgo.py | 14 ++++----- .../mapping/loop_closure/utils/markers_rrd.py | 16 +++++----- dimos/memory2/type/observation.py | 2 +- dimos/msgs/geometry_msgs/Quaternion.py | 4 +++ 6 files changed, 32 insertions(+), 36 deletions(-) diff --git a/dimos/mapping/loop_closure/eval.py b/dimos/mapping/loop_closure/eval.py index c41817b032..f0294a0c67 100644 --- a/dimos/mapping/loop_closure/eval.py +++ b/dimos/mapping/loop_closure/eval.py @@ -79,7 +79,6 @@ def _eval_recording( with store: lidar = store.streams.lidar - # --- PGO (timed) --- # .tap() captures each keyframe as make_interpolator iterates the # corrections stream, so no extra pass over lidar. keyframes: list[Any] = [] @@ -91,7 +90,7 @@ def _eval_recording( interp = make_interpolator(keyframes_to_corrections(kf_stream)) pgo_time = time.perf_counter() - t0 - # --- Marker detection (same pipeline as dimos map / markers_rrd) --- + # Marker detection pipeline: same shape as dimos map / markers_rrd. color_image = store.stream("color_image", Image) xf = DetectMarkers( camera_info=cam_info, diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py index de6c56547a..b67adbae92 100644 --- a/dimos/mapping/loop_closure/pgo.py +++ b/dimos/mapping/loop_closure/pgo.py @@ -170,18 +170,15 @@ def pgo_keyframes( for obs in stream: if on_frame is not None: on_frame(obs) - p = obs.pose_tuple - if p is None: + pose = obs.pose + if pose is None: continue - # Skip placeholder poses written before odom converges: - # all-zero translation OR all-zero (uninitialized) quaternion. - # An identity quaternion (qw=1) is valid and must not be filtered. - if p[0] == 0 and p[1] == 0 and p[2] == 0: + # Skip placeholder poses written before odom converges: zero + # translation or uninitialized (all-zero) quaternion. Identity + # rotation (qw=1) is valid and must not be filtered. + if pose.position.is_zero() or pose.orientation.is_zero(): continue - if p[3] == 0 and p[4] == 0 and p[5] == 0 and p[6] == 0: - continue - local_pose = _obs_to_pose3(obs) - pgo.process(local_pose, obs.ts, obs.data) + pgo.process(_obs_to_pose3(obs), obs.ts, obs.data) mem = MemoryStore() out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) @@ -280,16 +277,16 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: - """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" + """Convert an observation's pose to a `gtsam.Pose3`.""" import gtsam # type: ignore[import-not-found,import-untyped] - p = obs.pose_tuple - if p is None: + pose = obs.pose + if pose is None: raise LookupError("No pose set on this observation") - x, y, z, qx, qy, qz, qw = p + t, r = pose.position, pose.orientation return gtsam.Pose3( - gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), - gtsam.Point3(float(x), float(y), float(z)), + gtsam.Rot3.Quaternion(r.w, r.x, r.y, r.z), + gtsam.Point3(t.x, t.y, t.z), ) diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py index 61a744c893..e45d9fefe2 100644 --- a/dimos/mapping/loop_closure/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -14,8 +14,6 @@ from __future__ import annotations -from pathlib import Path - import numpy as np import pytest from scipy.spatial.transform import Rotation @@ -293,13 +291,10 @@ def test_keyframe_is_frozen(self) -> None: assert isinstance(kf.optimized, Transform) -# Real-recording smoke test. ~45-60s on go2_short.db. Skipped when the LFS -# file isn't present (CI without LFS pulled, fresh clone, etc.). -_DATA_DB = Path(__file__).resolve().parents[3] / "data" / "go2_short.db" - - -@pytest.mark.skipif(not _DATA_DB.exists(), reason=f"requires LFS file {_DATA_DB}") +# Real-recording smoke test. ~45-60s on go2_short.db. get_data() auto-pulls +# the LFS archive on first use. class TestRealRecording: + @pytest.mark.self_hosted def test_pgo_pipeline_against_go2_short(self) -> None: """Run the full PGO pipeline on a real 60-second go2 recording. @@ -308,8 +303,9 @@ def test_pgo_pipeline_against_go2_short(self) -> None: keyframes, apply_corrections preserves input frame count. """ from dimos.memory2.store.sqlite import SqliteStore + from dimos.utils.data import get_data - store = SqliteStore(path=_DATA_DB) + store = SqliteStore(path=get_data("go2_short.db")) lidar = store.streams.lidar in_count = lidar.count() assert in_count > 0, "recording is empty" diff --git a/dimos/mapping/loop_closure/utils/markers_rrd.py b/dimos/mapping/loop_closure/utils/markers_rrd.py index eb5306edf7..f4223be553 100644 --- a/dimos/mapping/loop_closure/utils/markers_rrd.py +++ b/dimos/mapping/loop_closure/utils/markers_rrd.py @@ -82,12 +82,12 @@ def main( color_image = store.stream("color_image", Image) lidar = store.stream("lidar", PointCloud2) - # ---- pass 1: robot base pose over time (from lidar.pose) ---- + # Pass 1: robot base pose over time (from lidar.pose). for lidar_obs in lidar: - if lidar_obs.pose is None: + if lidar_obs.pose_tuple is None: continue rr.set_time(TIMELINE, timestamp=lidar_obs.ts) - x, y, z, qx, qy, qz, qw = lidar_obs.pose + x, y, z, qx, qy, qz, qw = lidar_obs.pose_tuple rr.log( "world/robot", rr.Transform3D( @@ -95,12 +95,12 @@ def main( ), ) - # ---- pass 2: camera pose + image per color_image frame ---- + # Pass 2: camera pose + image per color_image frame. n_img = color_image.count() for i, img_obs in enumerate(color_image): rr.set_time(TIMELINE, timestamp=img_obs.ts) - if img_obs.pose is not None: - x, y, z, qx, qy, qz, qw = img_obs.pose + if img_obs.pose_tuple is not None: + x, y, z, qx, qy, qz, qw = img_obs.pose_tuple rr.log( "world/camera", rr.Transform3D( @@ -117,7 +117,7 @@ def main( if (i + 1) % 50 == 0 or i + 1 == n_img: print(f"images: {i + 1}/{n_img}") - # ---- pass 3: marker detections (filtered same way as `dimos map`) ---- + # Pass 3: marker detections, filtered the same way as `dimos map`. xf = DetectMarkers(camera_info=cam_info, marker_length_m=marker_size) pipeline: Stream[Any] = color_image.transform( QualityWindow(lambda img: img.sharpness, window=quality_window) @@ -175,7 +175,7 @@ def main( ) print(f"detections: {n_det}") - # ---- pass 4: averaged tracks (smoothing_window > 0 → per-track ids) ---- + # Pass 4: averaged tracks (smoothing_window > 0 → per-track ids). # Re-runs the same filtered pipeline through a smoothing detector; # each track yields one entity that updates as the windowed average # refines. Color stable per track_id for visual identity. diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index 038d05f161..b2a23012d4 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -60,7 +60,7 @@ def _to_tuple(p: Any) -> PoseTuple | None: """ if p is None: return None - if isinstance(p, tuple): + if isinstance(p, (tuple, list)): if len(p) == 7: return cast("PoseTuple", tuple(float(v) for v in p)) if len(p) == 3: diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py index e98e7bcc19..56b2045c35 100644 --- a/dimos/msgs/geometry_msgs/Quaternion.py +++ b/dimos/msgs/geometry_msgs/Quaternion.py @@ -213,6 +213,10 @@ def __eq__(self, other) -> bool: # type: ignore[no-untyped-def] return False return self.x == other.x and self.y == other.y and self.z == other.z and self.w == other.w + def is_zero(self) -> bool: + """All components are zero — i.e. an uninitialized placeholder, not a valid rotation.""" + return bool(np.allclose([self.x, self.y, self.z, self.w], 0.0)) + def __mul__(self, other: Quaternion) -> Quaternion: """Multiply two quaternions (Hamilton product). From ce78e86ea296ceec2c73687148c03b19d60b2566 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 21:38:26 +0800 Subject: [PATCH 35/64] further cleanup --- dimos/mapping/loop_closure/pgo.py | 62 ++++++-- dimos/mapping/loop_closure/test_pgo.py | 8 +- dimos/memory2/type/observation.py | 26 +++- dimos/robot/cli/dimos.py | 80 +--------- dimos/utils/cli/map.py | 208 ++++++++++++------------- 5 files changed, 177 insertions(+), 207 deletions(-) diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py index b67adbae92..db6efa9096 100644 --- a/dimos/mapping/loop_closure/pgo.py +++ b/dimos/mapping/loop_closure/pgo.py @@ -105,6 +105,17 @@ class PGOConfig(BaseConfig): max_icp_iterations: int = 50 max_icp_correspondence_dist: float = 1.0 + # Noise variances on the GTSAM between-factor (Pose3 is [rx, ry, rz, x, y, z]). + # Defaults are tuned for a Go2-class ground robot: tight z because the + # platform is planar; isotropic rotation. Loosen on aerial / 6-DoF arms. + odom_rot_var: float = 1e-6 + odom_trans_var_xy: float = 1e-4 + odom_trans_var_z: float = 1e-6 + + # Loop-closure rotation variance. Translation variance is supplied + # per-edge from ICP fitness, floored at 0.01 (sigma_trans ~ 10 cm). + loop_rot_var: float = 0.05 + class PGOKwargs(TypedDict, total=False): """Typed kwargs for `pgo_keyframes`; mirrors `PGOConfig` fields.""" @@ -122,6 +133,10 @@ class PGOKwargs(TypedDict, total=False): min_loop_detect_duration: float max_icp_iterations: int max_icp_correspondence_dist: float + odom_rot_var: float + odom_trans_var_xy: float + odom_trans_var_z: float + loop_rot_var: float @dataclass(frozen=True) @@ -344,7 +359,12 @@ def process( # Unregister: lift world-frame scan back into body frame using its # odom pose, so PGO can re-project it via the optimized pose later. body_cloud = world_cloud.transform( - _pose3_to_transform(local_pose.inverse(), ts=ts) + _pose3_to_transform( + local_pose.inverse(), + ts=ts, + frame_id=FRAME_BODY, + child_frame_id=FRAME_WORLD_RAW, + ) ).voxel_downsample(self._cfg.submap_resolution) self._add_keyframe(local_pose, ts, body_cloud) self._search_for_loops() @@ -432,8 +452,18 @@ def _add_keyframe( else: last_local = self._key_poses[-1].local between = last_local.inverse().compose(local_pose) + cfg = self._cfg noise = gtsam.noiseModel.Diagonal.Variances( - np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) + np.array( + [ + cfg.odom_rot_var, + cfg.odom_rot_var, + cfg.odom_rot_var, + cfg.odom_trans_var_xy, + cfg.odom_trans_var_xy, + cfg.odom_trans_var_z, + ] + ) ) self._graph.add(gtsam.BetweenFactorPose3(idx - 1, idx, between, noise)) @@ -452,12 +482,22 @@ def _get_submap(self, idx: int, half_range: int) -> PointCloud2: if lo > hi: return PointCloud2() cloud = self._key_poses[lo].body_cloud.transform( - _pose3_to_transform(self._key_poses[lo].optimized, ts=self._key_poses[lo].timestamp) + _pose3_to_transform( + self._key_poses[lo].optimized, + ts=self._key_poses[lo].timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ) ) for i in range(lo + 1, hi + 1): kp = self._key_poses[i] cloud = cloud + kp.body_cloud.transform( - _pose3_to_transform(kp.optimized, ts=kp.timestamp) + _pose3_to_transform( + kp.optimized, + ts=kp.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ) ) return cloud.voxel_downsample(self._cfg.submap_resolution) @@ -550,7 +590,7 @@ def _smooth_and_update(self) -> None: # — loops shouldn't be trusted to fix rotation tightly without # normals + p2plane. trans_var = max(0.01, float(pair.score)) # >= sigma_trans = 10 cm - rot_var = 0.05 # sigma_rot ~ 13 deg + rot_var = self._cfg.loop_rot_var noise = gtsam.noiseModel.Diagonal.Variances( np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) ) @@ -578,8 +618,8 @@ def _pose3_to_transform( pose: gtsam.Pose3, *, ts: float, - frame_id: str = "", - child_frame_id: str = "", + frame_id: str, + child_frame_id: str, ) -> Transform: """PGO-internal: build a Transform from a Pose3.""" t = np.asarray(pose.translation()) @@ -653,11 +693,7 @@ def _icp( if float(result.fitness) == 0.0: return Transform.identity(), float("inf") - tf = Transform.from_matrix( - result.transformation.numpy(), - ts=source.ts, - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_WORLD_RAW, - ) + # Frames intentionally unlabeled: caller only reads .to_matrix(). + tf = Transform.from_matrix(result.transformation.numpy(), ts=source.ts) rmse = float(result.inlier_rmse) return tf, rmse * rmse diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py index e45d9fefe2..3feeb3d348 100644 --- a/dimos/mapping/loop_closure/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -61,6 +61,12 @@ def test_rejects_unknown_fields(self) -> None: with pytest.raises(Exception): PGOConfig(**{dead: True}) + def test_kwargs_typed_dict_matches_config(self) -> None: + """`PGOKwargs` must mirror every `PGOConfig` field 1:1.""" + from dimos.mapping.loop_closure.pgo import PGOKwargs + + assert set(PGOConfig.model_fields.keys()) == set(PGOKwargs.__annotations__.keys()) + class TestTransformHelpers: def test_observation_normalizes_transform_pose(self) -> None: @@ -108,7 +114,7 @@ def test_pose3_to_transform(self) -> None: R = _random_R(rng) t = rng.uniform(-3, 3, size=3) p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t)) - tf = _pose3_to_transform(p, ts=7.89) + tf = _pose3_to_transform(p, ts=7.89, frame_id="world", child_frame_id="body") np.testing.assert_allclose(tf.rotation.to_rotation_matrix(), R, atol=1e-10) np.testing.assert_allclose(tf.translation.to_numpy(), t, atol=1e-10) diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index b2a23012d4..2d86812678 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -49,6 +49,16 @@ def __repr__(self) -> str: _UNLOADED = _Unloaded() +class _MissingType: + """Sentinel for "argument not supplied", distinct from None.""" + + def __repr__(self) -> str: + return "" + + +_MISSING = _MissingType() + + def _to_tuple(p: Any) -> PoseTuple | None: """Coerce common pose shapes to the storage 7-tuple `(x, y, z, qx, qy, qz, qw)`. @@ -116,7 +126,7 @@ def __init__( ts: float = 0.0, *, data_type: type = object, - pose: Any | None = None, + pose: Any | _MissingType = _MISSING, pose_tuple: PoseTuple | None = None, tags: dict[str, Any] | None = None, _data: T | _Unloaded = _UNLOADED, @@ -126,10 +136,14 @@ def __init__( self.id = id self.ts = ts self.data_type = data_type - # `pose` wins if explicitly supplied: derive()/tag() always re-pass - # the current `pose_tuple` from fields(), so an override needs - # to take precedence over it. - self.pose_tuple = _to_tuple(pose) if pose is not None else pose_tuple + + # `pose` wins if explicitly supplied (including `pose=None`, which + # clears). `derive()`/`tag()` re-pass the current `pose_tuple` from + # fields(); only an explicit override on `pose` should beat it. + if pose is _MISSING: + self.pose_tuple = pose_tuple + else: + self.pose_tuple = _to_tuple(pose) self.tags = tags if tags is not None else {} self._data = _data self._loader = _loader @@ -215,7 +229,7 @@ def __init__( ts: float = 0.0, *, data_type: type = object, - pose: Any | None = None, + pose: Any | _MissingType = _MISSING, pose_tuple: PoseTuple | None = None, tags: dict[str, Any] | None = None, embedding: Embedding | None = None, diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 5c67c3e0e1..209c30c3e8 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -39,6 +39,7 @@ from dimos.core.global_config import GlobalConfig, global_config from dimos.core.run_registry import get_most_recent, is_pid_alive, stop_entry from dimos.robot.unitree.go2.cli.go2tool import app as go2tool_app +from dimos.utils.cli.map import main as _map_main from dimos.utils.logging_config import setup_logger from dimos.visualization.rerun.constants import RerunOpenOption @@ -672,84 +673,7 @@ def send( topic_send(topic, message_expr) -@main.command(name="map") -def map_cmd( - dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), - voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"), - device: str = typer.Option( - "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" - ), - pgo: bool = typer.Option( - False, - "--pgo", - help="Run pose graph optimization and rebuild from spatially-deduped frames", - ), - pgo_tol: float = typer.Option( - 0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)" - ), - block_count: int = typer.Option( - 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" - ), - export: bool = typer.Option( - False, - "--export", - help="Export PGO map to ./.pc2.lcm in cwd (implies --pgo)", - ), - full_pgo: bool = typer.Option( - False, - "--full-pgo", - help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", - ), - no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), - markers: bool = typer.Option( - False, - "--markers", - help="Detect AprilTag markers in color_image and overlay them in rerun", - ), - marker_size: float = typer.Option( - 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" - ), - marker_max_speed: float = typer.Option( - 0.5, - "--marker-max-speed", - help="Skip frames where robot is moving faster than this (m/s); 0 disables", - ), - marker_max_rot_rate: float = typer.Option( - 50.0, - "--marker-max-rot-rate", - help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", - ), - marker_quality_window: float = typer.Option( - 0.1, - "--marker-quality-window", - help="Sharpest-frame window for marker detection (s)", - ), - marker_smoothing: float = typer.Option( - 7.5, - "--marker-smoothing", - help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", - ), -) -> None: - """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" - from dimos.utils.cli.map import main as map_main - - map_main( - dataset=dataset, - voxel=voxel, - device=device, - pgo=pgo, - pgo_tol=pgo_tol, - block_count=block_count, - export=export, - full_pgo=full_pgo, - no_gui=no_gui, - markers=markers, - marker_size=marker_size, - marker_max_speed=marker_max_speed, - marker_max_rot_rate=marker_max_rot_rate, - marker_quality_window=marker_quality_window, - marker_smoothing=marker_smoothing, - ) +main.command(name="map")(_map_main) @main.command() diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index af19300549..21d7b04ed1 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -13,6 +13,7 @@ # limitations under the License. from collections.abc import Callable, Iterable +from pathlib import Path import time from typing import Any @@ -27,6 +28,7 @@ from dimos.memory2.type.observation import Observation from dimos.memory2.vis.color import Color from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo from dimos.msgs.sensor_msgs.Image import Image from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.perception.fiducial.marker_transformer import DetectMarkers @@ -35,6 +37,61 @@ from dimos.visualization.rerun.init import rerun_init PATH_THICKNESS = 0.01 +# Pin pattern (from dimos/memory2/vis/space/rerun.py): thin vertical line +# from each marker with the label floating at the top so multi-marker +# labels never overlap the boxes. +MARKER_STEM = 1.0 + + +def _log_markers( + prefix: str, + centers: list[tuple[float, float, float]], + quats: list[tuple[float, float, float, float]], + *, + fill_half: list[tuple[float, float, float]], + outline_half: list[tuple[float, float, float]], + colors: list[tuple[int, int, int]], + labels: list[str], +) -> None: + """Render per-marker fill + outline + pin-stem + label as four static entities.""" + import rerun as rr + + n = len(centers) + pin_strips = [[(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in centers] + label_positions = [(cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in centers] + rr.log( + f"{prefix}/fill", + rr.Boxes3D( + centers=centers, + half_sizes=fill_half, + quaternions=quats, + colors=colors, + fill_mode=rr.components.FillMode.Solid, + ), + static=True, + ) + rr.log( + f"{prefix}/outline", + rr.Boxes3D( + centers=centers, + half_sizes=outline_half, + quaternions=quats, + colors=[(255, 255, 255)] * n, + fill_mode=rr.components.FillMode.MajorWireframe, + radii=0.002, + ), + static=True, + ) + rr.log( + f"{prefix}/pin", + rr.LineStrips3D(strips=pin_strips, colors=colors, radii=[0.005]), + static=True, + ) + rr.log( + f"{prefix}/label", + rr.Points3D(positions=label_positions, labels=labels, colors=colors, radii=[0.001] * n), + static=True, + ) def _accumulate( @@ -122,7 +179,9 @@ def main( "--pgo-tol", help="Spatial dedup tolerance (meters); applies to both raw and --pgo maps", ), - block_count: int = typer.Option(2_000_000, "--block-count", help="VoxelBlockGrid capacity"), + block_count: int = typer.Option( + 2_000_000, "--block-count", help="VoxelBlockGrid capacity (raw and PGO rebuilds)" + ), export: bool = typer.Option( False, "--export", @@ -139,6 +198,11 @@ def main( "--markers", help="Detect AprilTag markers in color_image and overlay them in rerun", ), + camera_info: Path | None = typer.Option( + None, + "--camera-info", + help="YAML calibration file for --markers; defaults to Go2 builtin", + ), marker_size: float = typer.Option( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), @@ -163,6 +227,7 @@ def main( help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", ), ) -> None: + """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" db_path = resolve_named_path(dataset, ".db") if export or full_pgo: pgo = True @@ -179,13 +244,15 @@ def main( # so it stays cheap (no pointcloud loading). seen: dict[tuple[int, int, int], Observation[Any]] = {} for obs in lidar: - p = obs.pose_tuple - if p is None: + pose = obs.pose + if pose is None: continue - # Reject placeholder poses at the world origin. - if p[0] == 0 and p[1] == 0 and p[2] == 0: + # Reject placeholder poses: zero translation OR uninitialized rotation. + # Same condition as pgo_keyframes so dedup and PGO see the same frames. + if pose.position.is_zero() or pose.orientation.is_zero(): continue - cell = (int(p[0] / pgo_tol), int(p[1] / pgo_tol), int(p[2] / pgo_tol)) + t = pose.position + cell = (int(t.x / pgo_tol), int(t.y / pgo_tol), int(t.z / pgo_tol)) seen[cell] = obs n_kept = len(seen) @@ -262,8 +329,9 @@ def main( # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). # No mount composition needed. color_image = store.stream("color_image", Image) + cam_info = CameraInfo.from_yaml(str(camera_info)) if camera_info else _camera_info_static() xf = DetectMarkers( - camera_info=_camera_info_static(), + camera_info=cam_info, marker_length_m=marker_size, smoothing_window=marker_smoothing, ) @@ -317,7 +385,6 @@ def main( full_pgo_map.to_rerun(voxel_size=voxel / 2), static=True, ) - STEM_HEIGHT = 0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( "world/pgo_map/path", @@ -326,25 +393,16 @@ def main( ), static=True, ) - hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( "world/pgo_map/pgo/keyframes", - rr.Points3D(positions=hovered, colors=[[255, 0, 0]], radii=[0.025]), + rr.Points3D(positions=pgo_path, colors=[[255, 0, 0]], radii=[0.025]), static=True, ) if pgo and loops: loop_strips = [ [ - ( - lc.source.translation.x, - lc.source.translation.y, - lc.source.translation.z + STEM_HEIGHT, - ), - ( - lc.target.translation.x, - lc.target.translation.y, - lc.target.translation.z + STEM_HEIGHT, - ), + (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), + (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), ] for lc in loops ] @@ -360,8 +418,8 @@ def main( # Outline sits just outside the fill so both stay visible. outline_bump = marker_size * 0.05 outline_half = [(half + outline_bump, half + outline_bump, 0.006)] * n - centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets] - quaternions = [ + raw_centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets] + raw_quats = [ ( d.data.orientation.x, d.data.orientation.y, @@ -376,46 +434,15 @@ def main( for d in marker_dets ] labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] - # Pin pattern (from dimos/memory2/vis/space/rerun.py): thin - # vertical line from each marker with the label floating at the - # top so multi-marker labels never overlap the boxes. - MARKER_STEM = 1.0 - pin_strips = [[(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in centers] - label_positions = [(cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in centers] - rr.log( - "world/raw_map/markers/fill", - rr.Boxes3D( - centers=centers, - half_sizes=fill_half, - quaternions=quaternions, - colors=colors, - fill_mode=rr.components.FillMode.Solid, - ), - static=True, - ) - rr.log( - "world/raw_map/markers/outline", - rr.Boxes3D( - centers=centers, - half_sizes=outline_half, - quaternions=quaternions, - colors=[(255, 255, 255)] * n, - fill_mode=rr.components.FillMode.MajorWireframe, - radii=0.002, - ), - static=True, - ) - rr.log( - "world/raw_map/markers/pin", - rr.LineStrips3D(strips=pin_strips, colors=colors, radii=[0.005]), - static=True, - ) - rr.log( - "world/raw_map/markers/label", - rr.Points3D( - positions=label_positions, labels=labels, colors=colors, radii=[0.001] * n - ), - static=True, + + _log_markers( + "world/raw_map/markers", + raw_centers, + raw_quats, + fill_half=fill_half, + outline_half=outline_half, + colors=colors, + labels=labels, ) if interp is not None: @@ -423,7 +450,7 @@ def main( # pgo_world; composing with each raw marker transform lifts # it into the corrected frame so it lines up with pgo_map. pgo_centers: list[tuple[float, float, float]] = [] - pgo_quaternions: list[tuple[float, float, float, float]] = [] + pgo_quats: list[tuple[float, float, float, float]] = [] for d in marker_dets: raw_tf = Transform( translation=d.data.center, @@ -440,7 +467,7 @@ def main( corrected.translation.z, ) ) - pgo_quaternions.append( + pgo_quats.append( ( corrected.rotation.x, corrected.rotation.y, @@ -448,54 +475,17 @@ def main( corrected.rotation.w, ) ) - pgo_pin_strips = [ - [(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in pgo_centers - ] - pgo_label_positions = [ - (cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in pgo_centers - ] - rr.log( - "world/pgo_map/markers/fill", - rr.Boxes3D( - centers=pgo_centers, - half_sizes=fill_half, - quaternions=pgo_quaternions, - colors=colors, - fill_mode=rr.components.FillMode.Solid, - ), - static=True, - ) - rr.log( - "world/pgo_map/markers/outline", - rr.Boxes3D( - centers=pgo_centers, - half_sizes=outline_half, - quaternions=pgo_quaternions, - colors=[(255, 255, 255)] * n, - fill_mode=rr.components.FillMode.MajorWireframe, - radii=0.002, - ), - static=True, - ) - rr.log( - "world/pgo_map/markers/pin", - rr.LineStrips3D(strips=pgo_pin_strips, colors=colors, radii=[0.005]), - static=True, - ) - rr.log( - "world/pgo_map/markers/label", - rr.Points3D( - positions=pgo_label_positions, - labels=labels, - colors=colors, - radii=[0.001] * n, - ), - static=True, + _log_markers( + "world/pgo_map/markers", + pgo_centers, + pgo_quats, + fill_half=fill_half, + outline_half=outline_half, + colors=colors, + labels=labels, ) if export and pgo_map is not None: - from pathlib import Path - out_path = Path.cwd() / f"{db_path.stem}.pc2.lcm" print(f"exporting PGO twopass map to {out_path}...") out_path.write_bytes(pgo_map.lcm_encode()) From 3c0878b03aa624bd767e5dfc08d7935973c0e941 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 22:42:11 +0800 Subject: [PATCH 36/64] another PGO rewrite --- .gitignore | 3 + dimos/mapping/loop_closure/eval.py | 18 +- dimos/mapping/loop_closure/pgo.py | 315 +++++++++--------- dimos/mapping/loop_closure/test_pgo.py | 133 ++++---- .../mapping/loop_closure/utils/markers_rrd.py | 10 +- dimos/memory2/type/observation.py | 11 +- dimos/msgs/sensor_msgs/test_PointCloud2.py | 4 +- .../perception/fiducial/marker_transformer.py | 8 +- dimos/utils/cli/map.py | 61 ++-- 9 files changed, 276 insertions(+), 287 deletions(-) diff --git a/.gitignore b/.gitignore index aedee04af7..ffc1c4f31f 100644 --- a/.gitignore +++ b/.gitignore @@ -86,3 +86,6 @@ htmlcov/ # Memory2 autorecord recording*.db + +# Rerun recordings +*.rrd diff --git a/dimos/mapping/loop_closure/eval.py b/dimos/mapping/loop_closure/eval.py index f0294a0c67..ddd9d4cb2a 100644 --- a/dimos/mapping/loop_closure/eval.py +++ b/dimos/mapping/loop_closure/eval.py @@ -33,12 +33,7 @@ import typer -from dimos.mapping.loop_closure.pgo import ( - LoopClosure, - keyframes_to_corrections, - make_interpolator, - pgo_keyframes, -) +from dimos.mapping.loop_closure.pgo import PGO from dimos.memory2.store.sqlite import SqliteStore from dimos.memory2.stream import Stream from dimos.memory2.transform import QualityWindow, SpeedLimit @@ -79,15 +74,8 @@ def _eval_recording( with store: lidar = store.streams.lidar - # .tap() captures each keyframe as make_interpolator iterates the - # corrections stream, so no extra pass over lidar. - keyframes: list[Any] = [] - loops_out: list[LoopClosure] = [] t0 = time.perf_counter() - kf_stream = pgo_keyframes(lidar, loop_closures_out=loops_out).tap( - lambda obs: keyframes.append(obs.data) - ) - interp = make_interpolator(keyframes_to_corrections(kf_stream)) + graph = lidar.transform(PGO()).last().data pgo_time = time.perf_counter() - t0 # Marker detection pipeline: same shape as dimos map / markers_rrd. @@ -125,7 +113,7 @@ def _eval_recording( child_frame_id=f"marker_{d.data.marker_id}", ts=d.ts, ) - corrected = interp(d.ts) + raw_tf + corrected = graph.correct(raw_tf) t = corrected.translation by_marker.setdefault(d.data.marker_id, []).append((t.x, t.y, t.z)) diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py index db6efa9096..b6c2b595c7 100644 --- a/dimos/mapping/loop_closure/pgo.py +++ b/dimos/mapping/loop_closure/pgo.py @@ -12,59 +12,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""PGO drift corrections as composable Stream stages. +"""PGO drift corrections as a memory2 Transformer. A lidar/odom stream comes in with poses that drift over time — the robot's estimate of where it is in the world slowly diverges from ground truth as small per-frame errors accumulate. PGO ("pose graph optimization") detects revisited places via loop closure and pulls the trajectory back into -self-consistency. The result is a stream of *corrections*: a Transform per -keyframe that maps every drifted ("raw") pose to its corrected one. - -Pipeline: - - lidar: Stream[PointCloud2] # pose-stamped scans - -> pgo_keyframes(...) -> Stream[Keyframe] # one per kf - -> keyframes_to_corrections(...) -> Stream[Transform] # world_corrected <- world_raw - -> apply_corrections(any_stream, corrections) -> Stream[T] # obs.pose shuffled - -`pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting -each kept keyframe with both its raw odom pose (`local`) and its optimized -pose (`optimized`) as Transforms. Pass `loop_closures_out=[]` to also -collect each accepted ICP loop edge as a `LoopClosure` (source+target -optimized poses + ICP fitness) — useful for visualizing the pose graph. - -`keyframes_to_corrections` is pure arithmetic: per keyframe, the drift -correction is `optimized @ local^-1` (Transform composition). The resulting -stream has one entry per keyframe — sparse in time. - -`make_interpolator` materializes the sparse correction stream into a -ts -> Transform function: SLERP for rotation, linear lerp for translation, -clipping out-of-range queries to the endpoints. - -`apply_corrections` shuffles `obs.pose` on any stream using the interpolated -correction at each obs.ts. It's read-only on `obs.data`, so the same lidar -stream can be re-applied — or a different recording from the same physical -space, if the corrections generalize. - -`gtsam` is imported lazily inside hot helpers so importing this module -stays cheap and gtsam-free for consumers that only need `Keyframe` / -`apply_corrections`. +self-consistency. + +Two public types: + +- ``PGO`` is ``Transformer[PointCloud2, PoseGraph]``. Apply it to a lidar + stream and it emits one cumulative ``PoseGraph`` snapshot per loop-closure + event (the only state changes that meaningfully restructure the optimized + poses), plus a final emit at end-of-stream so ``.last()`` always returns + something even on loop-less recordings. +- ``PoseGraph`` is a frozen dataclass carrying ``keyframes`` (nodes) and + ``loops`` (edges), *and* is itself ``Transformer[Any, Any]``. Apply via + ``stream.transform(graph)`` to rewrite ``obs.pose`` from the raw to the + drift-corrected frame; or call ``graph.correct(pose)`` for a one-off. + +Typical usage:: + + graph = lidar.transform(PGO()).last().data # batch + corrected = some_stream.transform(graph) # apply + for snapshot in lidar.transform(PGO()): ... # live viz + +``gtsam`` is imported lazily inside hot helpers so importing this module +stays cheap and gtsam-free for consumers that only need ``PoseGraph``. """ from __future__ import annotations from collections.abc import Callable, Iterator from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack +from typing import TYPE_CHECKING, Any, Literal, TypedDict, TypeVar, Unpack, cast import numpy as np import open3d as o3d # type: ignore[import-untyped] import open3d.core as o3c # type: ignore[import-untyped] from scipy.spatial.transform import Rotation, Slerp -from dimos.memory2.store.memory import MemoryStore -from dimos.memory2.stream import Stream +from dimos.memory2.transform import Transformer from dimos.memory2.type.observation import Observation from dimos.msgs.geometry_msgs.Quaternion import Quaternion from dimos.msgs.geometry_msgs.Transform import Transform @@ -113,7 +102,7 @@ class PGOConfig(BaseConfig): odom_trans_var_z: float = 1e-6 # Loop-closure rotation variance. Translation variance is supplied - # per-edge from ICP fitness, floored at 0.01 (sigma_trans ~ 10 cm). + # per-edge from ICP fitness, floored at 1e-4 (sigma_trans ~ 1 cm). loop_rot_var: float = 0.05 @@ -167,128 +156,146 @@ class LoopClosure: score: float -def pgo_keyframes( - stream: Stream[PointCloud2], - *, - on_frame: Callable[[Any], None] | None = None, - loop_closures_out: list[LoopClosure] | None = None, - **pgo_cfg: Unpack[PGOKwargs], -) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe. - - If `loop_closures_out` is provided, accepted loop edges are appended to - it (in detection order) using each keyframe's final optimized pose. +@dataclass(frozen=True) +class PoseGraph(Transformer[Any, Any]): + """Output of PGO: the keyframe nodes and loop edges of the optimized graph. + + Apply via ``stream.transform(graph)`` to rewrite each obs's pose into + the drift-corrected frame; the data payload passes through untouched. + For one-off pose corrections call :meth:`correct` directly. """ - cfg = PGOConfig(**pgo_cfg) - pgo = _PGO(cfg) - - for obs in stream: - if on_frame is not None: - on_frame(obs) - pose = obs.pose - if pose is None: - continue - # Skip placeholder poses written before odom converges: zero - # translation or uninitialized (all-zero) quaternion. Identity - # rotation (qw=1) is valid and must not be filtered. - if pose.position.is_zero() or pose.orientation.is_zero(): - continue - pgo.process(_obs_to_pose3(obs), obs.ts, obs.data) - - mem = MemoryStore() - out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) - for kf in pgo.finalize(): - out.append(kf, ts=kf.ts) - if loop_closures_out is not None: - loop_closures_out.extend(pgo.loop_closures()) - return out - - -def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: - """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" - mem = MemoryStore() - out: Stream[Transform] = mem.stream("corrections", Transform) - for obs in keyframes: - kf = obs.data - # world_corrected <- body <- world_raw composes to world_corrected <- world_raw. - drift = kf.optimized + kf.local.inverse() - out.append(drift, ts=kf.ts) - return out - - -def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: - """Materialize corrections once; return a fast ts -> Transform lookup.""" - ts_list: list[float] = [] - quat_list: list[np.ndarray] = [] - t_list: list[np.ndarray] = [] - for obs in corrections: - tf = obs.data - # obs.ts is authoritative; obs.data.ts can be mutated by Transform's - # ts=0.0 -> time.time() fallback in its constructor. - ts_list.append(obs.ts) - quat_list.append(tf.rotation.to_numpy()) - t_list.append(tf.translation.to_numpy()) - - if not ts_list: - raise ValueError("empty corrections stream") - - # Slerp needs ≥2 keyframes. Pad len==1 with a duplicate so the - # general path handles it; clip-to-endpoints behavior is unchanged. - if len(ts_list) == 1: - ts_list.append(ts_list[0] + 1e-6) - quat_list.append(quat_list[0]) - t_list.append(t_list[0]) - - ts_arr = np.array(ts_list) - t_stack = np.stack(t_list) - slerp = Slerp(ts_arr, Rotation.from_quat(np.stack(quat_list))) - - def interp(ts: float) -> Transform: - ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) - R = slerp([ts_clip])[0].as_matrix() - idx = int(np.searchsorted(ts_arr, ts_clip)) - if idx == 0: - t = t_stack[0] - elif idx >= len(ts_arr): - t = t_stack[-1] - else: - t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] - alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 - t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] - return Transform( - translation=Vector3(t), - rotation=Quaternion.from_rotation_matrix(R), - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_WORLD_RAW, - ts=float(ts), - ) - return interp + keyframes: tuple[Keyframe, ...] = () + loops: tuple[LoopClosure, ...] = () + def correct(self, pose: Transform) -> Transform: + """Map a raw-world pose into the drift-corrected frame at ``pose.ts``.""" + # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. + return self.correction_at(pose.ts) + pose -def apply_corrections( - stream: Stream[T], - corrections: Stream[Transform], -) -> Stream[T]: - """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. + def correction_at(self, ts: float) -> Transform: + """The raw ``world_corrected <- world_raw`` transform at ``ts``. - `obs.data` is untouched. Frames with `obs.pose is None` pass through - unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). - """ - interp = make_interpolator(corrections) + Useful when applying the correction to non-pose data (e.g. point + clouds): ``pointcloud.transform(graph.correction_at(obs.ts))``. + """ + return self._interp()(ts) - def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + def __call__(self, upstream: Iterator[Observation[Any]]) -> Iterator[Observation[Any]]: + """Rewrite obs.pose via :meth:`correct`; pass through pose-less obs unchanged.""" for obs in upstream: ps = obs.pose_stamped if ps is None: yield obs continue raw_tf = Transform.from_pose(FRAME_BODY, ps) - # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. - corrected = interp(obs.ts) + raw_tf - yield obs.derive(data=obs.data, pose=corrected) + yield obs.derive(data=obs.data, pose=self.correct(raw_tf)) + + def _interp(self) -> Callable[[float], Transform]: + """Lazy slerp/lerp drift-correction lookup keyed by ts.""" + cached = self.__dict__.get("_interp_cache") + if cached is not None: + return cast("Callable[[float], Transform]", cached) + + if not self.keyframes: + raise ValueError("PoseGraph has no keyframes") + + # Per-keyframe drift: world_corrected <- body <- world_raw. + ts_list = [kf.ts for kf in self.keyframes] + drifts = [(kf.optimized + kf.local.inverse()) for kf in self.keyframes] + quat_list = [tf.rotation.to_numpy() for tf in drifts] + t_list = [tf.translation.to_numpy() for tf in drifts] + + # Slerp needs ≥2 keyframes; pad a len==1 list with a near-duplicate. + if len(ts_list) == 1: + ts_list.append(ts_list[0] + 1e-6) + quat_list.append(quat_list[0]) + t_list.append(t_list[0]) + + ts_arr = np.array(ts_list) + t_stack = np.stack(t_list) + slerp = Slerp(ts_arr, Rotation.from_quat(np.stack(quat_list))) + + def interp(ts: float) -> Transform: + ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) + R = slerp([ts_clip])[0].as_matrix() + idx = int(np.searchsorted(ts_arr, ts_clip)) + if idx == 0: + t = t_stack[0] + elif idx >= len(ts_arr): + t = t_stack[-1] + else: + t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] + alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 + t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] + return Transform( + translation=Vector3(t), + rotation=Quaternion.from_rotation_matrix(R), + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ts=float(ts), + ) + + # frozen=True blocks plain attribute writes; use object.__setattr__. + object.__setattr__(self, "_interp_cache", interp) + return interp + - return stream.transform(xf) +class PGO(Transformer[PointCloud2, "PoseGraph"]): + """Pose-graph optimization as a memory2 Transformer. + + Emits one cumulative :class:`PoseGraph` snapshot per state change. By + default (``emit_on="loop"``) that's one emit per accepted loop closure + plus a final emit at end-of-stream so ``.last()`` always returns + something even on loop-less recordings. ``emit_on="keyframe"`` emits on + every new keyframe — noisier, useful for live progress viz. + """ + + def __init__( + self, + *, + emit_on: Literal["loop", "keyframe"] = "loop", + **cfg: Unpack[PGOKwargs], + ) -> None: + self._cfg = PGOConfig(**cfg) + self._emit_on = emit_on + + def __call__( + self, upstream: Iterator[Observation[PointCloud2]] + ) -> Iterator[Observation[PoseGraph]]: + pgo = _PGOState(self._cfg) + last_kf_count = 0 + last_loop_count = 0 + last_kf_ts: float | None = None + + for obs in upstream: + pose = obs.pose + if pose is None: + continue + # Placeholder filter: zero translation OR uninitialized (all-zero) + # quaternion. Identity rotation (qw=1) is valid and stays. + if pose.position.is_zero() or pose.orientation.is_zero(): + continue + pgo.process(_obs_to_pose3(obs), obs.ts, obs.data) + + n_kf = len(pgo._key_poses) + n_loops = len(pgo._accepted_loops) + if n_kf == last_kf_count and n_loops == last_loop_count: + continue + last_kf_ts = pgo._key_poses[-1].timestamp if pgo._key_poses else last_kf_ts + + should_emit = (self._emit_on == "loop" and n_loops > last_loop_count) or ( + self._emit_on == "keyframe" + ) + last_kf_count = n_kf + last_loop_count = n_loops + if should_emit and last_kf_ts is not None: + yield Observation(ts=last_kf_ts, data_type=PoseGraph, _data=pgo.snapshot()) + + # Final emit: guarantees `.last()` returns something even on + # loop-less recordings (and catches any tail state since the last emit). + if last_kf_ts is not None: + yield Observation(ts=last_kf_ts, data_type=PoseGraph, _data=pgo.snapshot()) def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: @@ -321,7 +328,7 @@ class _LoopPair: score: float -class _PGO: +class _PGOState: """Incremental PGO: gtsam ISAM2 over keyframes with ICP loop closures. Call `process` per frame (odom pose + body-frame points). Call @@ -421,6 +428,13 @@ def loop_closures(self) -> list[LoopClosure]: ) return out + def snapshot(self) -> PoseGraph: + """Immutable view of the current pose graph.""" + return PoseGraph( + keyframes=tuple(self.finalize()), + loops=tuple(self.loop_closures()), + ) + def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: if not self._key_poses: return True @@ -589,7 +603,9 @@ def _smooth_and_update(self) -> None: # *translation* variance and a generous fixed rotation variance # — loops shouldn't be trusted to fix rotation tightly without # normals + p2plane. - trans_var = max(0.01, float(pair.score)) # >= sigma_trans = 10 cm + # Floor at 1e-4 m² (sigma_trans ~ 1 cm) so a near-perfect ICP isn't + # forced to a slack 10 cm prior; a fitness score of 0.01 still wins. + trans_var = max(1e-4, float(pair.score)) rot_var = self._cfg.loop_rot_var noise = gtsam.noiseModel.Diagonal.Variances( np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) @@ -599,7 +615,8 @@ def _smooth_and_update(self) -> None: self._pending_loops.clear() self._isam2.update(self._graph, self._values) - self._isam2.update() + # Extra linearization iterations only when a loop edge lands; odom-only + # adds don't move the state much and a single update suffices. if has_loop: for _ in range(self._cfg.loop_closure_extra_iterations): self._isam2.update() diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py index 3feeb3d348..3a8722e14d 100644 --- a/dimos/mapping/loop_closure/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -19,14 +19,12 @@ from scipy.spatial.transform import Rotation from dimos.mapping.loop_closure.pgo import ( + PGO, Keyframe, PGOConfig, + PoseGraph, _obs_to_pose3, _pose3_to_transform, - apply_corrections, - keyframes_to_corrections, - make_interpolator, - pgo_keyframes, ) from dimos.memory2.store.memory import MemoryStore from dimos.memory2.stream import Stream @@ -160,25 +158,18 @@ def _make_lidar_stream(n_frames: int = 12, points_per_frame: int = 500) -> Strea class TestPipelineEndToEnd: def test_straight_line_produces_keyframes(self) -> None: lidar = _make_lidar_stream(n_frames=12) - kfs = pgo_keyframes(lidar) + graph = lidar.transform(PGO()).last().data # 12 frames spaced 1m apart with key_pose_delta_trans=0.5 -> every frame # after the first triggers a keyframe; some may dedupe but ~11 emitted. - n = kfs.count() + n = len(graph.keyframes) assert 10 <= n <= 12 - def test_keyframes_to_corrections_size(self) -> None: - lidar = _make_lidar_stream(n_frames=12) - kfs = pgo_keyframes(lidar) - corr = keyframes_to_corrections(kfs) - assert corr.count() == kfs.count() - def test_apply_identity_corrections_preserves_poses(self) -> None: # With no loop closures the optimization is a no-op -> drift = identity -> - # apply_corrections is a no-op on input poses. + # stream.transform(graph) is a no-op on input poses. lidar = _make_lidar_stream(n_frames=12) - kfs = pgo_keyframes(lidar) - corr = keyframes_to_corrections(kfs) - corrected = apply_corrections(lidar, corr) + graph = lidar.transform(PGO()).last().data + corrected = lidar.transform(graph) in_poses = [o.pose_tuple for o in lidar if o.pose_tuple is not None] out_poses = [o.pose_tuple for o in corrected if o.pose_tuple is not None] assert len(in_poses) == len(out_poses) @@ -187,19 +178,29 @@ def test_apply_identity_corrections_preserves_poses(self) -> None: assert a == pytest.approx(b, abs=1e-6) -def _make_corrections(transforms: list[Transform]) -> Stream[Transform]: - mem = MemoryStore() - stream: Stream[Transform] = mem.stream("corrections", Transform) - for tf in transforms: - stream.append(tf, ts=tf.ts) - return stream +def _graph_with_drift_at(drifts: list[Transform]) -> PoseGraph: + """PoseGraph whose drift correction equals each ``drifts[i]`` at ``drifts[i].ts``. + + Trick: drift = optimized + local^-1. With local=identity, drift==optimized. + """ + identity = Vector3(0.0, 0.0, 0.0) + identity_rot = Quaternion(0.0, 0.0, 0.0, 1.0) + return PoseGraph( + keyframes=tuple( + Keyframe( + ts=d.ts, + local=Transform(translation=identity, rotation=identity_rot, ts=d.ts), + optimized=Transform(translation=d.translation, rotation=d.rotation, ts=d.ts), + ) + for d in drifts + ) + ) -class TestInterpolator: - def test_empty_stream_raises(self) -> None: - empty = _make_corrections([]) +class TestPoseGraphCorrection: + def test_empty_raises(self) -> None: with pytest.raises(ValueError): - make_interpolator(empty) + PoseGraph().correction_at(0.0) def test_single_keyframe_returns_constant(self) -> None: R = Rotation.from_euler("z", np.pi / 4).as_matrix() @@ -208,33 +209,32 @@ def test_single_keyframe_returns_constant(self) -> None: rotation=Quaternion.from_rotation_matrix(R), ts=10.0, ) - interp = make_interpolator(_make_corrections([only])) + graph = _graph_with_drift_at([only]) for query_ts in (0.0, 10.0, 100.0): - out = interp(query_ts) + out = graph.correction_at(query_ts) assert out.translation.x == pytest.approx(1.0, abs=1e-10) assert out.translation.y == pytest.approx(2.0, abs=1e-10) assert out.translation.z == pytest.approx(3.0, abs=1e-10) def test_out_of_range_clips_to_endpoints(self) -> None: - # Note: Transform's constructor maps ts=0.0 -> time.time(); use ts>0 - # so test timestamps are deterministic. + # Transform's ctor maps ts=0.0 -> time.time(); use ts>0 for determinism. a = Transform(translation=Vector3(0.0, 0.0, 0.0), ts=1.0) b = Transform(translation=Vector3(10.0, 0.0, 0.0), ts=11.0) - # _make_corrections uses tf.ts; obs.ts ends up the same. - mem = MemoryStore() - stream: Stream[Transform] = mem.stream("corrections", Transform) - stream.append(a, ts=1.0) - stream.append(b, ts=11.0) - interp = make_interpolator(stream) + graph = _graph_with_drift_at([a, b]) # Below range -> clipped to a - assert interp(-5.0).translation.x == pytest.approx(0.0, abs=1e-10) + assert graph.correction_at(-5.0).translation.x == pytest.approx(0.0, abs=1e-10) # Above range -> clipped to b - assert interp(100.0).translation.x == pytest.approx(10.0, abs=1e-10) + assert graph.correction_at(100.0).translation.x == pytest.approx(10.0, abs=1e-10) # In-range midpoint - assert interp(6.0).translation.x == pytest.approx(5.0, abs=1e-10) + assert graph.correction_at(6.0).translation.x == pytest.approx(5.0, abs=1e-10) + + def test_frozen(self) -> None: + graph = PoseGraph() + with pytest.raises(Exception): + graph.keyframes = (Keyframe(ts=0, local=Transform(), optimized=Transform()),) # type: ignore[misc] -class TestApplyCorrections: +class TestApplyAsTransformer: def test_pure_translation_shifts_poses(self) -> None: # Build a stream of 3 frames at the origin (identity pose) with a known # correction that shifts everything by +5 in x. Expected: corrected @@ -247,17 +247,13 @@ def test_pure_translation_shifts_poses(self) -> None: ts=float(i + 1), pose=(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0), ) - - # Constant correction: translate by (5, 0, 0), identity rotation. - c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0) - c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=3.0) - mem2 = MemoryStore() - corr: Stream[Transform] = mem2.stream("corrections", Transform) - corr.append(c_a, ts=1.0) - corr.append(c_b, ts=3.0) - - corrected = apply_corrections(lidar, corr) - for obs in corrected: + graph = _graph_with_drift_at( + [ + Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0), + Transform(translation=Vector3(5.0, 0.0, 0.0), ts=3.0), + ] + ) + for obs in lidar.transform(graph): p = obs.pose_tuple assert p is not None assert p[0] == pytest.approx(5.0, abs=1e-9) @@ -272,14 +268,13 @@ def test_passes_through_pose_none(self) -> None: ts=1.0, pose=None, ) - c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0) - c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=2.0) - mem2 = MemoryStore() - corr: Stream[Transform] = mem2.stream("corrections", Transform) - corr.append(c_a, ts=1.0) - corr.append(c_b, ts=2.0) - corrected = apply_corrections(lidar, corr) - for obs in corrected: + graph = _graph_with_drift_at( + [ + Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0), + Transform(translation=Vector3(5.0, 0.0, 0.0), ts=2.0), + ] + ) + for obs in lidar.transform(graph): assert obs.pose is None @@ -316,8 +311,8 @@ def test_pgo_pipeline_against_go2_short(self) -> None: in_count = lidar.count() assert in_count > 0, "recording is empty" - kfs = pgo_keyframes(lidar) - n_kf = kfs.count() + graph = lidar.transform(PGO()).last().data + n_kf = len(graph.keyframes) assert n_kf > 0, "PGO emitted no keyframes" # 60s recording at ~0.5m keyframe spacing -> at least a handful. assert n_kf >= 5 @@ -327,16 +322,14 @@ def test_pgo_pipeline_against_go2_short(self) -> None: # a no-op and local == optimized for every keyframe. drifted = sum( 1 - for obs in kfs - if obs.data.local.translation != obs.data.optimized.translation - or obs.data.local.rotation != obs.data.optimized.rotation + for kf in graph.keyframes + if kf.local.translation != kf.optimized.translation + or kf.local.rotation != kf.optimized.rotation ) assert drifted > 0, "expected loop closures to drift at least one keyframe" + # loop_closures_out side-channel is gone — graph.loops carries them. + assert len(graph.loops) > 0 - corr = keyframes_to_corrections(kfs) - assert corr.count() == n_kf - - # apply_corrections preserves frame count, including pose=None rows. - corrected = apply_corrections(lidar, corr) - out_count = sum(1 for _ in corrected) + # PoseGraph-as-Transformer preserves frame count, including pose=None rows. + out_count = sum(1 for _ in lidar.transform(graph)) assert out_count == in_count diff --git a/dimos/mapping/loop_closure/utils/markers_rrd.py b/dimos/mapping/loop_closure/utils/markers_rrd.py index f4223be553..a98e79ddc3 100644 --- a/dimos/mapping/loop_closure/utils/markers_rrd.py +++ b/dimos/mapping/loop_closure/utils/markers_rrd.py @@ -20,17 +20,15 @@ - per detection: marker box in world frame, at the detection timestamp Usage: - uv run python -m dimos.utils.cli.markers_rrd hk_village1 --out hk.rrd + uv run python -m dimos.mapping.loop_closure.utils.markers_rrd hk_village1 --out hk.rrd rerun hk.rrd -Throwaway script next to ``map.py``; remove once the apriltag reliability work -lands. +Throwaway script; remove once the apriltag reliability work lands. """ from __future__ import annotations from pathlib import Path -from typing import Any import rerun as rr import typer @@ -119,7 +117,7 @@ def main( # Pass 3: marker detections, filtered the same way as `dimos map`. xf = DetectMarkers(camera_info=cam_info, marker_length_m=marker_size) - pipeline: Stream[Any] = color_image.transform( + pipeline: Stream[Image] = color_image.transform( QualityWindow(lambda img: img.sharpness, window=quality_window) ) if marker_max_speed > 0: @@ -185,7 +183,7 @@ def main( marker_length_m=marker_size, smoothing_window=smoothing_window, ) - pipeline_tracked: Stream[Any] = color_image.transform( + pipeline_tracked: Stream[Image] = color_image.transform( QualityWindow(lambda img: img.sharpness, window=quality_window) ) if marker_max_speed > 0: diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index 2d86812678..139566287d 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -19,6 +19,9 @@ import threading from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped + if sys.version_info >= (3, 11): from typing import Self else: @@ -28,8 +31,6 @@ from collections.abc import Callable from dimos.models.embedding.base import Embedding - from dimos.msgs.geometry_msgs.Pose import Pose - from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped T = TypeVar("T") R = TypeVar("R") @@ -154,7 +155,6 @@ def pose(self) -> Pose | None: """Typed :class:`Pose` (or None). Allocates per access — read :attr:`pose_tuple` in hot loops.""" if self.pose_tuple is None: return None - from dimos.msgs.geometry_msgs.Pose import Pose return Pose(*self.pose_tuple) @@ -166,10 +166,7 @@ def pose_stamped(self) -> PoseStamped | None: from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped x, y, z, qx, qy, qz, qw = self.pose_tuple - return cast( - "PoseStamped", - PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw)), - ) + return PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw)) @property def data(self) -> T: diff --git a/dimos/msgs/sensor_msgs/test_PointCloud2.py b/dimos/msgs/sensor_msgs/test_PointCloud2.py index 797e266ed3..56ae622a8d 100644 --- a/dimos/msgs/sensor_msgs/test_PointCloud2.py +++ b/dimos/msgs/sensor_msgs/test_PointCloud2.py @@ -32,8 +32,8 @@ def test_lcm_encode_decode() -> None: decoded = PointCloud2.lcm_decode(binary_msg) # 1. Check number of points - original_points = lidar_msg.as_numpy() - decoded_points = decoded.as_numpy() + original_points, _ = lidar_msg.as_numpy() + decoded_points, _ = decoded.as_numpy() assert len(original_points) == len(decoded_points), ( f"Point count mismatch: {len(original_points)} vs {len(decoded_points)}" diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index 36fc2bd6bc..95b1b6b972 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -85,6 +85,9 @@ def _average_marker_pose( qsz += s * q.z qsw += s * q.w norm = math.sqrt(qsx * qsx + qsy * qsy + qsz * qsz + qsw * qsw) + if norm < 1e-12: + # Signs cancelled exactly — pathological, fall back to the hemisphere ref. + return (Vector3(cx, cy, cz), ref) return ( Vector3(cx, cy, cz), Quaternion(qsx / norm, qsy / norm, qsz / norm, qsw / norm), @@ -230,8 +233,11 @@ def __call__( ) buf.add(det) avg_center, avg_orient = _average_marker_pose(buf) + # Drop `transform` (camera-in-world): the averaged pose is + # built from many frames, so any single camera transform is + # inconsistent with center/orientation. yielded_det = dataclasses.replace( - det, center=avg_center, orientation=avg_orient + det, center=avg_center, orientation=avg_orient, transform=None ) yielded_pose = Transform( translation=avg_center, diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 21d7b04ed1..ce4c9ba236 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -13,6 +13,7 @@ # limitations under the License. from collections.abc import Callable, Iterable +import math from pathlib import Path import time from typing import Any @@ -21,6 +22,7 @@ import rerun.blueprint as rrb import typer +from dimos.mapping.loop_closure.pgo import PGO, PoseGraph from dimos.mapping.voxels import VoxelMapTransformer from dimos.memory2.store.sqlite import SqliteStore from dimos.memory2.stream import Stream @@ -54,8 +56,6 @@ def _log_markers( labels: list[str], ) -> None: """Render per-marker fill + outline + pin-stem + label as four static entities.""" - import rerun as rr - n = len(centers) pin_strips = [[(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in centers] label_positions = [(cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in centers] @@ -100,7 +100,7 @@ def _accumulate( voxel: float, block_count: int, device: str, - interp: Callable[[float], Transform] | None = None, + graph: PoseGraph | None = None, progress_cb: Callable[[Observation[Any]], None] | None = None, ) -> PointCloud2 | None: """Accumulate a voxel map from `obs_iter`, optionally PGO-correcting each frame. @@ -115,10 +115,10 @@ def prepared() -> Iterable[Observation[PointCloud2]]: progress_cb(obs) if len(obs.data) == 0: continue - if interp is not None: + if graph is not None: if obs.pose_tuple is None: continue - yield obs.derive(data=obs.data.transform(interp(obs.ts))) + yield obs.derive(data=obs.data.transform(graph.correction_at(obs.ts))) else: yield obs @@ -252,7 +252,9 @@ def main( if pose.position.is_zero() or pose.orientation.is_zero(): continue t = pose.position - cell = (int(t.x / pgo_tol), int(t.y / pgo_tol), int(t.z / pgo_tol)) + # math.floor so negative coords bucket consistently; int() truncates + # toward zero and silently folds -0.5 and 0.5 into the same cell. + cell = (math.floor(t.x / pgo_tol), math.floor(t.y / pgo_tol), math.floor(t.z / pgo_tol)) seen[cell] = obs n_kept = len(seen) @@ -267,49 +269,35 @@ def main( pgo_map = None pgo_path: list[tuple[float, float, float]] = [] - loops: list[Any] = [] - interp: Any | None = None + graph: PoseGraph | None = None if pgo: - from dimos.mapping.loop_closure.pgo import ( - LoopClosure, - keyframes_to_corrections, - make_interpolator, - pgo_keyframes, - ) - print("running PGO twopass map...") - pgo_loops: list[LoopClosure] = [] - keyframes = pgo_keyframes( - lidar, - on_frame=progress(total, "pgo pass 1 (optimizing)"), - loop_closures_out=pgo_loops, - ) - loops = list(pgo_loops) - corrections = keyframes_to_corrections(keyframes) - interp = make_interpolator(corrections) + prog = progress(total, "pgo pass 1 (optimizing)") + graph = lidar.tap(prog).transform(PGO()).last().data - for kf_obs in keyframes: - kf_t = kf_obs.data.optimized.translation - pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) + pgo_path = [ + (kf.optimized.translation.x, kf.optimized.translation.y, kf.optimized.translation.z) + for kf in graph.keyframes + ] pgo_map = _accumulate( seen.values(), voxel=voxel, block_count=block_count, device=device, - interp=interp, + graph=graph, progress_cb=progress(n_kept, "pgo pass 2 (rebuilding)"), ) full_pgo_map = None if full_pgo: - assert interp is not None + assert graph is not None full_pgo_map = _accumulate( lidar, voxel=voxel, block_count=block_count, device=device, - interp=interp, + graph=graph, progress_cb=progress(total, "full pgo (rebuilding)"), ) @@ -398,13 +386,13 @@ def main( rr.Points3D(positions=pgo_path, colors=[[255, 0, 0]], radii=[0.025]), static=True, ) - if pgo and loops: + if graph is not None and graph.loops: loop_strips = [ [ (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), ] - for lc in loops + for lc in graph.loops ] rr.log( "world/pgo_map/pgo/loop_closures", @@ -445,10 +433,9 @@ def main( labels=labels, ) - if interp is not None: - # PGO-corrected marker poses. interp(ts) maps raw_world → - # pgo_world; composing with each raw marker transform lifts - # it into the corrected frame so it lines up with pgo_map. + if graph is not None: + # PGO-correct each raw marker pose: lift it from world_raw + # into world_corrected so it lines up with pgo_map. pgo_centers: list[tuple[float, float, float]] = [] pgo_quats: list[tuple[float, float, float, float]] = [] for d in marker_dets: @@ -459,7 +446,7 @@ def main( child_frame_id=f"marker_{d.data.marker_id}", ts=d.ts, ) - corrected = interp(d.ts) + raw_tf + corrected = graph.correct(raw_tf) pgo_centers.append( ( corrected.translation.x, From 731de3dbdacfe165bab571102fc22d8cd0635cd2 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 11:07:04 +0800 Subject: [PATCH 37/64] Use min/max instead of percentiles for pointcloud colormap np.percentile sorts the full z array on every frame; min/max is O(n) and avoids the per-frame cost for visualization color scaling. --- dimos/msgs/sensor_msgs/PointCloud2.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index d31c20d43c..2d6a228523 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -727,10 +727,7 @@ def to_rerun( point_colors = colors else: z = points[:, 2] - - # Clip to robust percentiles so a few outliers don't squash the colormap range. - z_lo, z_hi = np.percentile(z, [1, 99]) - class_ids = (np.clip((z - z_lo) / (z_hi - z_lo + 1e-8), 0, 1) * 255).astype(np.uint8) + class_ids = ((z - z.min()) / (z.max() - z.min() + 1e-8) * 255).astype(np.uint8) if mode == "points": return rr.Points3D( From e93f2632c7b712fafbf1b3061b29195db9e4233a Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 11:30:17 +0800 Subject: [PATCH 38/64] Defer heavy imports in map CLI so dimos --help stays fast map.py imported dimos.mapping/memory2 at module top, which transitively pulls torch, transformers, open3d, sklearn. Since dimos.py eagerly imports map to register the subcommand, dimos --help ballooned to ~7s and tripped test_cli_startup. Move those imports into the function bodies; module top keeps only the typer signature so --help still renders all options. --- dimos/utils/cli/map.py | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index ce4c9ba236..c162a015ef 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -12,31 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from collections.abc import Callable, Iterable import math from pathlib import Path import time -from typing import Any +from typing import TYPE_CHECKING, Any import rerun as rr import rerun.blueprint as rrb import typer -from dimos.mapping.loop_closure.pgo import PGO, PoseGraph -from dimos.mapping.voxels import VoxelMapTransformer -from dimos.memory2.store.sqlite import SqliteStore -from dimos.memory2.stream import Stream -from dimos.memory2.transform import QualityWindow, SpeedLimit -from dimos.memory2.type.observation import Observation -from dimos.memory2.vis.color import Color -from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo -from dimos.msgs.sensor_msgs.Image import Image -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -from dimos.perception.fiducial.marker_transformer import DetectMarkers -from dimos.robot.unitree.go2.connection import _camera_info_static -from dimos.utils.data import resolve_named_path -from dimos.visualization.rerun.init import rerun_init +# Heavy dimos imports (mapping/memory2 → torch, transformers, open3d, sklearn) are +# deferred into the function bodies below so that `dimos --help` — which imports this +# module just to register the `map` subcommand — stays fast. See test_cli_startup.py. +if TYPE_CHECKING: + from dimos.mapping.loop_closure.pgo import PoseGraph + from dimos.memory2.stream import Stream + from dimos.memory2.type.observation import Observation + from dimos.msgs.sensor_msgs.Image import Image + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 PATH_THICKNESS = 0.01 # Pin pattern (from dimos/memory2/vis/space/rerun.py): thin vertical line @@ -108,6 +104,7 @@ def _accumulate( Returns the final ``PointCloud2`` (or ``None`` if the input was empty). Disposal of the underlying ``VoxelGrid`` is handled by ``VoxelMapTransformer``. """ + from dimos.mapping.voxels import VoxelMapTransformer def prepared() -> Iterable[Observation[PointCloud2]]: for obs in obs_iter: @@ -228,6 +225,18 @@ def main( ), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" + from dimos.mapping.loop_closure.pgo import PGO + from dimos.memory2.store.sqlite import SqliteStore + from dimos.memory2.transform import QualityWindow, SpeedLimit + from dimos.memory2.vis.color import Color + from dimos.msgs.geometry_msgs.Transform import Transform + from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo + from dimos.msgs.sensor_msgs.Image import Image + from dimos.perception.fiducial.marker_transformer import DetectMarkers + from dimos.robot.unitree.go2.connection import _camera_info_static + from dimos.utils.data import resolve_named_path + from dimos.visualization.rerun.init import rerun_init + db_path = resolve_named_path(dataset, ".db") if export or full_pgo: pgo = True From 7f8dc3c6f37ba97d56a0829792804a4febaa180c Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 12:03:18 +0800 Subject: [PATCH 39/64] small changes --- dimos/msgs/geometry_msgs/Quaternion.py | 3 +-- dimos/msgs/sensor_msgs/PointCloud2.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py index 56b2045c35..39a9c83c42 100644 --- a/dimos/msgs/geometry_msgs/Quaternion.py +++ b/dimos/msgs/geometry_msgs/Quaternion.py @@ -215,7 +215,7 @@ def __eq__(self, other) -> bool: # type: ignore[no-untyped-def] def is_zero(self) -> bool: """All components are zero — i.e. an uninitialized placeholder, not a valid rotation.""" - return bool(np.allclose([self.x, self.y, self.z, self.w], 0.0)) + return self.x == 0.0 and self.y == 0.0 and self.z == 0.0 and self.w == 0.0 def __mul__(self, other: Quaternion) -> Quaternion: """Multiply two quaternions (Hamilton product). @@ -235,7 +235,6 @@ def __mul__(self, other: Quaternion) -> Quaternion: return Quaternion(x, y, z, w) def dot(self, other: Quaternion) -> float: - """4D inner product. ``|q.dot(q')| == 1`` iff same orientation (up to sign).""" return float(self.x * other.x + self.y * other.y + self.z * other.z + self.w * other.w) def angle_to(self, other: Quaternion) -> float: diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 2d6a228523..ae30c41711 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -373,7 +373,7 @@ def transform(self, tf: Transform) -> PointCloud2: new_pcd = o3d.geometry.PointCloud() new_pcd.points = o3d.utility.Vector3dVector(transformed_xyz) - # Colors are frame-independent — carry them through. + # Colors are frame-independent, carry them through. if self.pointcloud.has_colors(): new_pcd.colors = self.pointcloud.colors From 7c1c3cdf618389577704551a0279fa795b8c3965 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 12:09:26 +0800 Subject: [PATCH 40/64] removed comment --- dimos/memory2/observationstore/sqlite.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py index 88128e3c1a..c74a481cfe 100644 --- a/dimos/memory2/observationstore/sqlite.py +++ b/dimos/memory2/observationstore/sqlite.py @@ -331,7 +331,6 @@ def _ensure_tag_indexes(self, tags: dict[str, Any]) -> None: self._tag_indexes.add(key) def insert(self, obs: Observation[T]) -> int: - # Observation already normalizes pose to the storage 7-tuple — skip _decompose_pose. pose = obs.pose_tuple tags_json = json.dumps(obs.tags) if obs.tags else "{}" value = obs._data if isinstance(obs._data, (int, float)) else None From 02d4b227e74d7368146710cf8c0fac2efc88b4ae Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 12:37:14 +0800 Subject: [PATCH 41/64] Add stream alignment + pose_fill for pose-less lidar recordings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some recorders (e.g. go2 mid360) store raw lidar without baking the trajectory into each frame — the pose lives in a separate odom stream. PGO and the voxel rebuild skip pose-less frames, so `dimos map --pgo` on such a dataset dedups 0/N frames and crashes with `LookupError: No matching observation`. - Stream.align(other, tolerance): single-pass two-pointer nearest-ts merge of two streams. Source-agnostic (stored, transformed, live) — the secondary cursor catches up to each primary, keeping only the two bracketing it (O(1) state). Pairs are an AlignedPair namedtuple with fields named after the source streams (Stream.name, inherited through transforms), index access always available. - Observation.with_pose(pose): copy with a pose attached, payload kept lazy. - mapping/loop_closure/utils/pose_fill: pose_fill() re-poses a stream from the nearest entry of a pose stream; pose_fill_db() bakes the result into a fresh .db copy. Verified on recording_go2_mid360_outdoor_small_loop: 5728/5729 lidar frames re-posed from odom, PGO builds 1119 keyframes / 32 loop closures. --- dimos/mapping/loop_closure/utils/pose_fill.py | 101 ++++++++++ dimos/memory2/stream.py | 84 ++++++++ dimos/memory2/test_stream.py | 179 ++++++++++++++++++ dimos/memory2/type/observation.py | 16 ++ 4 files changed, 380 insertions(+) create mode 100644 dimos/mapping/loop_closure/utils/pose_fill.py diff --git a/dimos/mapping/loop_closure/utils/pose_fill.py b/dimos/mapping/loop_closure/utils/pose_fill.py new file mode 100644 index 0000000000..703413db91 --- /dev/null +++ b/dimos/mapping/loop_closure/utils/pose_fill.py @@ -0,0 +1,101 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Stamp observations with poses pulled from another stream by nearest timestamp. + +Some recorders store raw sensor streams (``lidar``, images) without baking the +trajectory into each frame — the pose lives in a separate odometry stream +(``odom``, ``fastlio_odometry``). PGO and the voxel rebuild skip pose-less +frames, so such a recording yields an empty map. :func:`pose_fill` re-attaches +poses by nearest-in-time match (via :meth:`Stream.align`) so those tools work +again. Frames stay in whatever coordinate frame they were stored in — only the +pose *metadata* is added. + +:func:`pose_fill_db` runs the same fill while copying a whole SQLite dataset to +a new file, baking the pulled poses into the target stream. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, cast + +from dimos.memory2.backend import Backend +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation + + +def pose_fill( + stream: Stream[Any], pose_stream: Stream[Any], *, tolerance: float = 0.1 +) -> Stream[Any]: + """Re-pose each observation in *stream* from the nearest entry in *pose_stream*. + + Pairs are formed by :meth:`Stream.align` (nearest ``|Δts| <= tolerance``); + primaries with no match in tolerance are dropped. The pose is read from the + matched pose observation's *payload* (``PoseStamped`` / ``Odometry`` / any + object exposing ``.position`` + ``.orientation``) because pose-message + streams carry the pose in the value, not the indexed pose columns. The + target stream's payload stays lazy. + """ + + def _fill(pair_obs: Observation[Any]) -> Observation[Any]: + primary, secondary = cast( + "tuple[Observation[Any], Observation[Any]]", pair_obs.data + ) # AlignedPair(primary, secondary) + return primary.with_pose(secondary.data) + + return stream.align(pose_stream, tolerance=tolerance).map(_fill) + + +def pose_fill_db( + src_path: str | Path, + dest_path: str | Path, + *, + target: str = "lidar", + pose_source: str = "odom", + tolerance: float = 0.1, + streams: list[str] | None = None, +) -> dict[str, int]: + """Copy a SQLite dataset to *dest_path*, baking *pose_source* poses into *target*. + + Every stream in *streams* (default: all streams in the source) is copied + with its original payload type and codec. The *target* stream is re-posed + via :func:`pose_fill` from the *pose_source* stream; all others are copied + verbatim. Returns a per-stream count of observations written. + """ + from dimos.memory2.store.sqlite import SqliteStore + + src = SqliteStore(path=str(src_path), must_exist=True) + dest = SqliteStore(path=str(dest_path)) + names = streams if streams is not None else src.list_streams() + if target not in names: + raise ValueError(f"target stream {target!r} not in {names}") + if pose_source not in src.list_streams(): + raise ValueError(f"pose_source stream {pose_source!r} not found in source dataset") + + written: dict[str, int] = {} + for name in names: + src_stream: Stream[Any] = src.stream(name) + backend = src_stream._source # bare store stream → source is the Backend + assert isinstance(backend, Backend) + dest_stream: Stream[Any] = dest.stream(name, backend.data_type, codec=backend.codec) + if name == target: + filled = pose_fill(src_stream, src.stream(pose_source), tolerance=tolerance) + written[name] = filled.save(dest_stream).drain() + else: + written[name] = src_stream.save(dest_stream).drain() + + src.stop() + dest.stop() + return written diff --git a/dimos/memory2/stream.py b/dimos/memory2/stream.py index 8f0a767aa7..829f10966f 100644 --- a/dimos/memory2/stream.py +++ b/dimos/memory2/stream.py @@ -14,6 +14,8 @@ from __future__ import annotations +from collections import namedtuple +from functools import cache import sys import time from typing import TYPE_CHECKING, Any, Generic, cast, overload @@ -56,6 +58,51 @@ logger = setup_logger() +@cache +def _pair_class(field_a: str, field_b: str) -> type: + """Cached ``AlignedPair`` namedtuple with fields named after the two streams. + + ``rename=True`` so any non-identifier or duplicate name degrades to ``_0`` / + ``_1`` rather than raising — index access (``pair[0]``) always works. + """ + return namedtuple("AlignedPair", [field_a, field_b], rename=True) # type: ignore[misc] + + +def _nearest_align_iter( + primary_iter: Iterator[Observation[Any]], + secondary_iter: Iterator[Observation[Any]], + tolerance: float, + pair_class: type, +) -> Iterator[Observation[Any]]: + """Streaming two-pointer nearest-ts merge over ts-ascending iterators. + + The secondary cursor catches up to each primary; the nearest secondary is + whichever of the two bracketing it (the last one at/before ``p`` or the + first one after) is closer. O(1) state — only those two are held, never a + window — so it streams over arbitrarily long (including live) inputs. + """ + secondary = iter(secondary_iter) + prev: Observation[Any] | None = None # last secondary with ts <= p.ts + nxt: Observation[Any] | None = next(secondary, None) # first secondary after prev + + for p in primary_iter: + # Advance the cursor until `nxt` sits past `p`; `prev` trails just behind. + while nxt is not None and nxt.ts <= p.ts: + prev = nxt + nxt = next(secondary, None) + + # Nearest is the closer of the two bracketing secondaries. + if prev is None: + best = nxt + elif nxt is None: + best = prev + else: + best = prev if (p.ts - prev.ts) <= (nxt.ts - p.ts) else nxt + + if best is not None and abs(best.ts - p.ts) <= tolerance: + yield p.derive(data=pair_class(p, best)) + + class Stream(CompositeResource, Generic[T, O]): """Lazy, pull-based stream over observations. @@ -118,6 +165,14 @@ def __str__(self) -> str: return result + @property + def name(self) -> str | None: + """Backing stream's name (``None`` if unbound), inherited through transforms.""" + current: Any = self + while isinstance(current, Stream): + current = current._source + return None if current is None else cast("str", current.name) + def is_live(self) -> bool: """True if this stream (or any ancestor in the chain) is in live mode.""" if self._query.live_buffer is not None: @@ -281,6 +336,35 @@ def detect(upstream): xf = FnIterTransformer(xf) return Stream(source=self, transform=xf, query=StreamQuery()) + def align(self, other: Stream[Any, Any], *, tolerance: float) -> Stream[Any]: + """Pair each observation with the nearest-in-time one from *other*. + + The primary (``self``) drives; for each primary the nearest secondary + within ``|Δts| <= tolerance`` is paired, others are skipped. The merge + is a single forward pass that assumes **both streams iterate in + ascending ts** — true for live streams (arrival order) and for stored + streams in insertion order; prepend ``.order_by("ts")`` on a side whose + iteration order isn't chronological. Source-agnostic: stored, + transformed, and live streams all work (a live side simply never ends). + + The output ``Observation`` carries the primary's ``ts``, ``pose``, + ``tags``. ``obs.data`` is an ``AlignedPair`` namedtuple whose fields are + named after each side's stream (e.g. ``pair.lidar`` / ``pair.odom``), + with ``pair[0]`` / ``pair[1]`` index access always available. Each + field is the full :class:`Observation` from that side. Field names fall + back to ``primary`` / ``secondary`` when a stream is unnamed or both + share a name. + """ + field_a, field_b = self.name, other.name + if not field_a or not field_b or field_a == field_b: + field_a, field_b = "primary", "secondary" + pair_class = _pair_class(field_a, field_b) + + def _align(upstream: Iterator[Observation[Any]]) -> Iterator[Observation[Any]]: + return _nearest_align_iter(upstream, iter(other), tolerance, pair_class) + + return self.transform(FnIterTransformer(_align)) + def live(self, buffer: BackpressureBuffer[Observation[Any]] | None = None) -> Stream[T, O]: """Return a stream whose iteration never ends — backfill then live tail. diff --git a/dimos/memory2/test_stream.py b/dimos/memory2/test_stream.py index 32cb542b83..9e9d989e4f 100644 --- a/dimos/memory2/test_stream.py +++ b/dimos/memory2/test_stream.py @@ -781,3 +781,182 @@ def consumer(): assert results == ["b", "d"] assert results == ["b", "d"] assert results == ["b", "d"] + + +class TestAlign: + """Pairwise nearest-ts alignment between two streams.""" + + def test_basic_pairing(self, session): + """Each primary pairs with the closest in-tolerance secondary.""" + lidar = session.stream("lidar", str) + image = session.stream("image", str) + # primary at 0.0, 0.1, 0.2; secondary slightly offset + for i, v in enumerate(["L0", "L1", "L2"]): + lidar.append(v, ts=i * 0.1) + for ts, v in [(0.02, "I0"), (0.11, "I1"), (0.19, "I2")]: + image.append(v, ts=ts) + + pairs = lidar.align(image, tolerance=0.05).to_list() + assert [p.data.lidar.data for p in pairs] == ["L0", "L1", "L2"] + assert [p.data.image.data for p in pairs] == ["I0", "I1", "I2"] + + def test_named_and_index_access(self, session): + """Pair fields are accessible by source-stream name and by index.""" + a = session.stream("alpha", str) + b = session.stream("beta", str) + a.append("a0", ts=0.0) + b.append("b0", ts=0.001) + + pair = a.align(b, tolerance=0.01).to_list()[0].data + assert pair.alpha.data == "a0" + assert pair.beta.data == "b0" + assert pair[0].data == "a0" + assert pair[1].data == "b0" + + def test_same_name_falls_back_to_primary_secondary(self, session): + """Aligning streams that share a name names fields primary/secondary.""" + a = session.stream("dup", str) + a.append("x", ts=0.0) + + pair = a.align(a, tolerance=0.01).to_list()[0].data + assert pair.primary.data == "x" + assert pair.secondary.data == "x" + assert pair[0].data == "x" + + def test_outer_obs_carries_primary_metadata(self, session): + """The yielded outer Observation keeps the primary's ts/pose/tags.""" + a = session.stream("primary", str) + b = session.stream("secondary", str) + a.append("pa", ts=10.0, pose=(1, 2, 3), tags={"src": "p"}) + b.append("sb", ts=10.005, pose=(9, 9, 9), tags={"src": "s"}) + + out = a.align(b, tolerance=0.05).to_list()[0] + assert out.ts == 10.0 + assert out.pose_tuple[:3] == (1, 2, 3) + assert out.tags == {"src": "p"} + # Secondary's pose still reachable via the pair. + assert out.data.secondary.pose_tuple[:3] == (9, 9, 9) + + def test_skip_when_no_match_in_tolerance(self, session): + """Primary frames with no secondary within tolerance are dropped.""" + a = session.stream("a", str) + b = session.stream("b", str) + a.append("a0", ts=0.0) + a.append("a1", ts=1.0) + a.append("a2", ts=2.0) + # Only a match near a1. + b.append("b1", ts=1.02) + + pairs = a.align(b, tolerance=0.05).to_list() + assert [p.data.a.data for p in pairs] == ["a1"] + assert [p.data.b.data for p in pairs] == ["b1"] + + def test_one_secondary_matches_many_primaries(self, session): + """Same secondary can pair with multiple primaries — no early eviction.""" + a = session.stream("a", str) + b = session.stream("b", str) + for ts in (1.000, 1.005, 1.010): + a.append(f"a{ts}", ts=ts) + b.append("b", ts=1.004) + + pairs = a.align(b, tolerance=0.05).to_list() + assert len(pairs) == 3 + assert all(p.data.b.data == "b" for p in pairs) + + def test_primary_before_first_secondary(self, session): + """Early primaries without any in-window secondary are skipped, later ones pair.""" + a = session.stream("a", str) + b = session.stream("b", str) + for ts in (0.0, 0.1, 0.2, 0.3): + a.append(f"a{ts}", ts=ts) + b.append("b", ts=0.3) + + pairs = a.align(b, tolerance=0.05).to_list() + assert [p.ts for p in pairs] == [0.3] + + def test_empty_secondary(self, session): + """Empty secondary stream → empty alignment, no error.""" + a = session.stream("a", str) + b = session.stream("b", str) # leave empty + a.append("a0", ts=0.0) + pairs = a.align(b, tolerance=1.0).to_list() + assert pairs == [] + + def test_filter_on_either_side_respected(self, session): + """Filters applied before .align() restrict the iterated set on that side.""" + a = session.stream("a", str) + b = session.stream("b", str) + for ts in (0.0, 1.0, 2.0): + a.append(f"a{ts}", ts=ts) + b.append(f"b{ts}", ts=ts + 0.01) + + pairs = a.after(0.5).align(b.before(1.5), tolerance=0.05).to_list() + assert [p.data.a.data for p in pairs] == ["a1.0"] + assert [p.data.b.data for p in pairs] == ["b1.0"] + + def test_transform_source_allowed(self, session): + """A transform-sourced stream on either side aligns fine (source-agnostic).""" + a = session.stream("a", int) + b = session.stream("b", int) + for ts in (0.0, 1.0): + a.append(int(ts), ts=ts) + b.append(int(ts) + 100, ts=ts + 0.01) + + # Primary transformed (+1), secondary transformed (+1) — both still ts-ordered. + primary = a.map(lambda obs: obs.derive(data=obs.data + 1)) + secondary = b.map(lambda obs: obs.derive(data=obs.data + 1)) + pairs = primary.align(secondary, tolerance=0.05).to_list() + # Field names inherit through the transform from the backing streams. + assert [p.data.a.data for p in pairs] == [1, 2] + assert [p.data.b.data for p in pairs] == [101, 102] + + def test_live_alignment(self, memory_session): + """align() streams over live inputs — a pair emits once its bracket closes.""" + p = memory_session.stream("lp") + s = memory_session.stream("ls") + # Backfilled matched pair so the first result is immediate. + p.append("P0", ts=1.0) + s.append("S0", ts=1.002) + + aligned = p.live(buffer=Unbounded()).align(s.live(buffer=Unbounded()), tolerance=0.1) + + results: list[tuple[str, str]] = [] + consumed = threading.Event() + + def consumer(): + for obs in aligned: + results.append((obs.data.lp.data, obs.data.ls.data)) + if len(results) >= 2: + consumed.set() + return + + t = threading.Thread(target=consumer, daemon=True) + t.start() + + time.sleep(0.05) + # Live primary, then a secondary just past it so P1's bracket closes. + p.append("P1", ts=2.0) + s.append("S1", ts=2.003) + + consumed.wait(timeout=2.0) + t.join(timeout=2.0) + assert results == [("P0", "S0"), ("P1", "S1")] + + +class TestAlignLargeInput: + """The two-pointer merge scales to dense streams in a single forward pass.""" + + def test_dense_secondary_pairs_correctly(self, memory_session): + """5k primaries against 50k secondaries — every primary gets its exact match.""" + primary = memory_session.stream("p") + secondary = memory_session.stream("s") + # secondary at 500 Hz, primary at 50 Hz — each primary lands on a secondary. + for i in range(5_000): + primary.append(i, ts=i * 0.02) + for i in range(50_000): + secondary.append(i, ts=i * 0.002) + + pairs = primary.align(secondary, tolerance=0.005).to_list() + assert len(pairs) == 5_000 + # Each primary's nearest secondary is the coincident one (Δts ≈ 0). + assert max(abs(p.data[0].ts - p.data[1].ts) for p in pairs) < 1e-9 diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index 139566287d..7ee8224582 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -212,6 +212,22 @@ def tag(self, **tags: Any) -> Self: ) return type(self)(**kwargs) + def with_pose(self, pose: Any) -> Self: + """Return a new observation with ``pose`` attached, payload kept lazy. + + ``pose`` accepts anything :func:`_to_tuple` handles (a 3-/7-tuple, + ``Pose``/``PoseStamped``/``Transform``, or ``None`` to clear). + """ + kwargs: dict[str, Any] = {f.name: getattr(self, f.name) for f in fields(self)} + kwargs.pop("pose_tuple", None) + kwargs.update( + pose=pose, + _data=_UNLOADED, + _loader=lambda: self.data, + _data_lock=threading.Lock(), + ) + return type(self)(**kwargs) + @dataclass(init=False) class EmbeddedObservation(Observation[T]): From 74485fc01cbb384b49f90769511a4053196b6184 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 14:04:14 +0800 Subject: [PATCH 42/64] pose_fill verbatim copy + `dimos map` reconstruct/summary verbs - pose_fill.py: `python -m ...pose_fill ` CLI wrapping pose_fill_db (--streams/--target/--pose-source/--tolerance/--out); refuses to overwrite an existing db. pose_fill_db copies blobs VERBATIM (raw bytes via blob_store get/put, no decode+re-encode), so jpeg images keep their exact bytes; only the target stream's pose columns are rewritten (rtree rebuilt on insert). - Backend.append materializes lazy payloads before validate/encode, so with_pose()/tag()/derived observations can be saved. - `dimos map` is now a verb group: `reconstruct` (the former `dimos map `) and `summary` (per-stream counts/time ranges). Verified on recording_go2_mid360_outdoor_small_loop: full faithful copy in 14s, color_image blobs byte-identical, lidar 5728/5728 re-posed from odom, `dimos map reconstruct ..._posed --pgo` dedups 1527/5728 (was 0) and completes. --- dimos/mapping/loop_closure/utils/pose_fill.py | 88 ++++++++++++++++--- dimos/mapping/loop_closure/utils/summary.py | 59 +++++++++++++ dimos/memory2/backend.py | 13 ++- dimos/robot/cli/dimos.py | 6 +- 4 files changed, 151 insertions(+), 15 deletions(-) create mode 100644 dimos/mapping/loop_closure/utils/summary.py diff --git a/dimos/mapping/loop_closure/utils/pose_fill.py b/dimos/mapping/loop_closure/utils/pose_fill.py index 703413db91..eee8ae08fa 100644 --- a/dimos/mapping/loop_closure/utils/pose_fill.py +++ b/dimos/mapping/loop_closure/utils/pose_fill.py @@ -28,6 +28,7 @@ from __future__ import annotations +from collections.abc import Iterable from pathlib import Path from typing import Any, cast @@ -70,12 +71,22 @@ def pose_fill_db( """Copy a SQLite dataset to *dest_path*, baking *pose_source* poses into *target*. Every stream in *streams* (default: all streams in the source) is copied - with its original payload type and codec. The *target* stream is re-posed - via :func:`pose_fill` from the *pose_source* stream; all others are copied - verbatim. Returns a per-stream count of observations written. + with its original payload type and codec. Blobs are copied **verbatim** — + the encoded bytes are moved as-is, never decoded and re-encoded — so lossy + codecs (e.g. jpeg images) keep their original quality. Only the *target* + stream's pose metadata is rewritten, to the nearest *pose_source* pose (the + rtree spatial index is rebuilt from it on insert). Returns a per-stream + count of observations written. """ from dimos.memory2.store.sqlite import SqliteStore + dest_p = Path(dest_path) + if dest_p.exists(): + raise FileExistsError( + f"{dest_p} already exists — refusing to append into it (stale rows would " + "collide on ts). Delete it (and any -wal/-shm sidecars) first." + ) + src = SqliteStore(path=str(src_path), must_exist=True) dest = SqliteStore(path=str(dest_path)) names = streams if streams is not None else src.list_streams() @@ -86,16 +97,73 @@ def pose_fill_db( written: dict[str, int] = {} for name in names: - src_stream: Stream[Any] = src.stream(name) - backend = src_stream._source # bare store stream → source is the Backend - assert isinstance(backend, Backend) - dest_stream: Stream[Any] = dest.stream(name, backend.data_type, codec=backend.codec) + src_b = cast("Backend[Any]", src.stream(name)._source) + dest_b = cast("Backend[Any]", dest.stream(name, src_b.data_type, codec=src_b.codec)._source) + + # Re-posed observations for the target (ids preserved from source, so + # the source blob still resolves); plain metadata for everything else. if name == target: - filled = pose_fill(src_stream, src.stream(pose_source), tolerance=tolerance) - written[name] = filled.save(dest_stream).drain() + rows: Iterable[Observation[Any]] = pose_fill( + src.stream(name), src.stream(pose_source), tolerance=tolerance + ) else: - written[name] = src_stream.save(dest_stream).drain() + rows = src.stream(name).order_by("ts") + + scalar = src_b.data_type in (int, float) + n = 0 + for obs in rows: + blob = ( + None if scalar or src_b.blob_store is None else src_b.blob_store.get(name, obs.id) + ) + row_id = dest_b.metadata_store.insert(obs) + if blob is not None: + assert dest_b.blob_store is not None + dest_b.blob_store.put(name, row_id, blob) + n += 1 + commit = getattr(dest_b.metadata_store, "commit", None) + if commit is not None: + commit() + written[name] = n src.stop() dest.stop() return written + + +def main( + dataset: str, + out: str | None = None, + target: str = "lidar", + pose_source: str = "odom", + tolerance: float = 0.1, + streams: str | None = None, +) -> None: + """Write a copy of *dataset* with *pose_source* poses baked into *target*. + + Run as ``python -m dimos.mapping.loop_closure.utils.pose_fill ``. + The output (``_posed.db`` by default) is then usable directly with + ``dimos map reconstruct --pgo``. + + *streams* is a comma-separated subset to copy (default: all) — restrict to + e.g. ``lidar,odom`` for a smaller map-only db instead of copying every + stream (images, fastlio, …). Blobs are copied verbatim either way. + """ + from dimos.utils.data import resolve_named_path + + src = resolve_named_path(dataset, ".db") + dest = Path(out) if out else src.with_name(f"{src.stem}_posed.db") + names = [s.strip() for s in streams.split(",")] if streams else None + print(f"pose-filling {src.name}: {target!r} <- nearest {pose_source!r} (±{tolerance}s)") + written = pose_fill_db( + src, dest, target=target, pose_source=pose_source, tolerance=tolerance, streams=names + ) + for name, n in written.items(): + print(f" {name}: {n}") + print(f"wrote {dest}") + print(f"now run: dimos map reconstruct {dest.stem} --pgo") + + +if __name__ == "__main__": + import typer + + typer.run(main) diff --git a/dimos/mapping/loop_closure/utils/summary.py b/dimos/mapping/loop_closure/utils/summary.py new file mode 100644 index 0000000000..d34f8b902a --- /dev/null +++ b/dimos/mapping/loop_closure/utils/summary.py @@ -0,0 +1,59 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Print ``Store.summary()`` for a memory2 sqlite recording. + +Usage: + uv run python -m dimos.mapping.loop_closure.utils.summary mid360 +""" + +from __future__ import annotations + +import json +from pathlib import Path +import sqlite3 + +import typer + +from dimos.memory2.codecs.base import _resolve_payload_type +from dimos.memory2.store.sqlite import SqliteStore +from dimos.utils.data import resolve_named_path + + +def _stream_payload_types(db_path: Path) -> dict[str, type]: + """Read each stream's registered payload type from the _streams table.""" + conn = sqlite3.connect(str(db_path)) + try: + rows = conn.execute("SELECT name, config FROM _streams").fetchall() + finally: + conn.close() + return {name: _resolve_payload_type(json.loads(cfg)["payload_module"]) for name, cfg in rows} + + +def main( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), +) -> None: + """Print per-stream counts and time ranges for a recorded SQLite dataset.""" + db_path = resolve_named_path(dataset, ".db") + payload_types = _stream_payload_types(db_path) + + store = SqliteStore(path=str(db_path)) + with store: + for name, ptype in payload_types.items(): + store.stream(name, ptype) + print(store.summary()) + + +if __name__ == "__main__": + typer.run(main) diff --git a/dimos/memory2/backend.py b/dimos/memory2/backend.py index 3c3b89669e..7b95bd6335 100644 --- a/dimos/memory2/backend.py +++ b/dimos/memory2/backend.py @@ -89,20 +89,25 @@ def loader() -> Any: return loader def append(self, obs: Observation[T]) -> Observation[T]: + # Materialize lazy payloads (e.g. from with_pose()/tag()/derived + # streams) in place — validation and encoding below read obs._data, + # and we must encode it to store anyway. + payload = obs.data + # Validate payload type matches stream type - if self.data_type is not object and not isinstance(obs._data, self.data_type): + if self.data_type is not object and not isinstance(payload, self.data_type): raise TypeError( - f"Stream expects {self.data_type.__qualname__}, got {type(obs._data).__qualname__}" + f"Stream expects {self.data_type.__qualname__}, got {type(payload).__qualname__}" ) obs.data_type = self.data_type # Scalars are stored inline in the metadata value column — skip blob - is_scalar = isinstance(obs._data, (int, float)) + is_scalar = isinstance(payload, (int, float)) # Encode payload before any locking (avoids holding locks during IO) encoded: bytes | None = None if self.blob_store is not None and not is_scalar: - encoded = self.codec.encode(obs._data) + encoded = self.codec.encode(payload) try: # Insert metadata, get assigned id diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 209c30c3e8..e9b28ab9ab 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -38,6 +38,7 @@ from dimos.core.daemon import daemonize, install_signal_handlers from dimos.core.global_config import GlobalConfig, global_config from dimos.core.run_registry import get_most_recent, is_pid_alive, stop_entry +from dimos.mapping.loop_closure.utils.summary import main as _map_summary_main from dimos.robot.unitree.go2.cli.go2tool import app as go2tool_app from dimos.utils.cli.map import main as _map_main from dimos.utils.logging_config import setup_logger @@ -673,7 +674,10 @@ def send( topic_send(topic, message_expr) -main.command(name="map")(_map_main) +map_app = typer.Typer(help="Voxel-map tools over recorded sqlite datasets") +main.add_typer(map_app, name="map") +map_app.command("reconstruct")(_map_main) +map_app.command("summary")(_map_summary_main) @main.command() From 55d5f34f8259088aafdf74e94e58b2eed9ad907f Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 14:05:47 +0800 Subject: [PATCH 43/64] mini map tooling --- dimos/mapping/loop_closure/utils/autotrim.py | 217 ++++++++++++ dimos/mapping/loop_closure/utils/map_rrd.py | 324 ++++++++++++++++++ .../mapping/loop_closure/utils/markers_rrd.py | 26 +- .../loop_closure/utils/rename_streams.py | 173 ++++++++++ dimos/memory2/store/base.py | 7 + 5 files changed, 735 insertions(+), 12 deletions(-) create mode 100644 dimos/mapping/loop_closure/utils/autotrim.py create mode 100644 dimos/mapping/loop_closure/utils/map_rrd.py create mode 100644 dimos/mapping/loop_closure/utils/rename_streams.py diff --git a/dimos/mapping/loop_closure/utils/autotrim.py b/dimos/mapping/loop_closure/utils/autotrim.py new file mode 100644 index 0000000000..fecdbea63f --- /dev/null +++ b/dimos/mapping/loop_closure/utils/autotrim.py @@ -0,0 +1,217 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Trim the static lead-in from a memory2 sqlite recording. + +Uses one stream's poses to find the first moment the robot displaces from +its starting pose by at least ``--tolerance``, then writes a trimmed copy of +every stream in the source, starting ``--lead-in`` seconds before that +motion. With ``--main-stream`` omitted, scans every stream that has poses +and prompts you to pick. + +Usage: + uv run python -m dimos.mapping.loop_closure.utils.autotrim mid360 \\ + --out mid360_trimmed.db --tolerance 0.20 +""" + +from __future__ import annotations + +from collections.abc import Callable +import json +import math +from pathlib import Path +import sqlite3 +import time +from typing import Any + +import typer + +from dimos.memory2.codecs.base import _resolve_payload_type +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation +from dimos.utils.data import resolve_named_path + + +def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: + """Matches dimos/utils/cli/map.py:progress — kept inline to avoid pulling rerun.""" + seen = 0 + wall_start: float | None = None + last_wall: float | None = None + first_ts: float | None = None + + def _progress(obs: Observation[Any]) -> None: + nonlocal seen, wall_start, last_wall, first_ts + now = time.monotonic() + if wall_start is None: + wall_start = now + first_ts = obs.ts + assert first_ts is not None + frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 + last_wall = now + seen += 1 + pct = 100 * seen // total if total else 100 + wall = now - wall_start + data = obs.ts - first_ts + speed = data / wall if wall > 0 else 0.0 + end = "\n" if seen >= total else "" + prefix = f"{label} " if label else "" + print( + f"\r{prefix}{pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", + end=end, + flush=True, + ) + + return _progress + + +def _find_motion_edge_ts( + stream: Stream[Any], tolerance: float, *, reverse: bool = False +) -> tuple[float, tuple[float, float, float]]: + """Walks a stream in ts order (or descending if reverse=True) and returns the + first ts whose pose is ≥ tolerance from the first pose seen in that walk. + + Forward: motion *start* — anchor = initial pose, first ts that moves away. + Reverse: motion *stop* — anchor = final pose, last ts that's still away. + """ + walk = stream.order_by("ts", desc=True) if reverse else stream + anchor: tuple[float, float, float] | None = None + for obs in walk: + if obs.pose is None: + continue + x, y, z = obs.pose[0], obs.pose[1], obs.pose[2] + if anchor is None: + anchor = (x, y, z) + continue + if math.dist((x, y, z), anchor) >= tolerance: + return obs.ts, anchor + raise RuntimeError( + f"No pose ever displaced ≥ {tolerance:.3f} m from {'final' if reverse else 'initial'} pose" + + (" (no poses in stream)" if anchor is None else "") + ) + + +def _stream_payload_types(db_path: Path) -> dict[str, type]: + """Read each stream's registered payload type from the _streams table.""" + conn = sqlite3.connect(str(db_path)) + try: + rows = conn.execute("SELECT name, config FROM _streams").fetchall() + finally: + conn.close() + return {name: _resolve_payload_type(json.loads(cfg)["payload_module"]) for name, cfg in rows} + + +def _pick_main_stream_interactive( + src: SqliteStore, payload_types: dict[str, type], tolerance: float +) -> str: + """Scan every stream's poses, print first/last motion times, prompt for a pick.""" + print(f"scanning streams for motion (≥{tolerance:.2f} m)...") + candidates: list[str] = [] + for name, ptype in payload_types.items(): + s = src.stream(name, ptype) + try: + start_ts, _ = _find_motion_edge_ts(s, tolerance) + stop_ts, _ = _find_motion_edge_ts(s, tolerance, reverse=True) + except RuntimeError: + print(f" {name:>12s}: no motion detected") + continue + t0, tN = s.first().ts, s.last().ts + candidates.append(name) + print( + f" {name:>12s}: motion +{start_ts - t0:6.2f} s → +{stop_ts - t0:6.2f} s" + f" (tail idle {tN - stop_ts:5.2f} s)" + ) + + if not candidates: + raise typer.BadParameter( + f"No stream has poses with ≥{tolerance:.2f} m motion. Lower --tolerance." + ) + + print("\npick main stream (number or name):") + for i, name in enumerate(candidates, 1): + print(f" {i}) {name}") + by_idx = {str(i): name for i, name in enumerate(candidates, 1)} + by_name = set(candidates) + while True: + choice = typer.prompt("choice", default="1").strip() + if choice in by_idx: + return by_idx[choice] + if choice in by_name: + return choice + print(f"invalid choice {choice!r}; try a number 1..{len(candidates)} or a stream name") + + +def main( + dataset: str = typer.Argument(..., help="Source .db: bare name (cwd or data/) or path"), + out: Path = typer.Option(..., "--out", help="Output trimmed .db path"), + main_stream: str | None = typer.Option( + None, + "--main-stream", + help="Stream whose poses define motion; omit to pick interactively", + ), + tolerance: float = typer.Option( + 0.20, "--tolerance", help="Distance (m) the main stream must travel before 'motion' starts" + ), + lead_in: float = typer.Option( + 1.0, "--lead-in", help="Seconds to keep before the first-motion timestamp" + ), +) -> None: + src_path = resolve_named_path(dataset, ".db") + if out.exists(): + raise typer.BadParameter(f"Output already exists: {out}") + + print(f"analizing dataset {src_path}") + payload_types = _stream_payload_types(src_path) + if main_stream is not None and main_stream not in payload_types: + raise typer.BadParameter( + f"Main stream {main_stream!r} not in source db. Available: {sorted(payload_types)}" + ) + + src = SqliteStore(path=str(src_path)) + with src: + if main_stream is None: + main_stream = _pick_main_stream_interactive(src, payload_types, tolerance) + + motion_ts, origin = _find_motion_edge_ts( + src.stream(main_stream, payload_types[main_stream]), tolerance + ) + cutoff = motion_ts - lead_in + ox, oy, oz = origin + print(f"\nmain stream {main_stream!r}: origin=({ox:.3f},{oy:.3f},{oz:.3f})") + print(f" motion start (≥{tolerance:.2f} m): ts={motion_ts:.3f}") + print(f" cutoff (lead-in {lead_in:.1f} s): ts={cutoff:.3f}\n") + + dst = SqliteStore(path=str(out)) + with dst: + for name, ptype in payload_types.items(): + src_s = src.stream(name, ptype) + dst_s = dst.stream(name, ptype) + total_src = src_s.count() + filtered = src_s.after(cutoff) + total_kept = filtered.count() + dropped = total_src - total_kept + cb = progress(total_kept, f"{name:>12s}") + for obs in src_s.after(cutoff): + dst_s.append(obs.data, ts=obs.ts, pose=obs.pose, tags=obs.tags or None) + cb(obs) + if total_kept == 0: + print(f"{name:>12s} kept 0/{total_src} (dropped {dropped})") + else: + print(f" ↳ dropped {dropped}/{total_src}") + + print(f"\nwrote {out}") + + +if __name__ == "__main__": + typer.run(main) diff --git a/dimos/mapping/loop_closure/utils/map_rrd.py b/dimos/mapping/loop_closure/utils/map_rrd.py new file mode 100644 index 0000000000..be54eb9f35 --- /dev/null +++ b/dimos/mapping/loop_closure/utils/map_rrd.py @@ -0,0 +1,324 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dump a recorded dataset to .rrd: lidar point clouds + camera frames. + +Lidar clouds are assumed to be in world frame and logged directly under +their entity path (no parent transform). Entities written: + +- ``world/lidar`` — Go2 L1 per-frame point cloud +- ``world/lidar_voxels`` — accumulated voxel map of the primary lidar (``--map``) +- ``world/fastlio_lidar`` — fastlio_lidar raw cloud (if present) +- ``world/fastlio_voxels``— accumulated voxel map of fastlio_lidar (``--map``) +- ``world/fastlio`` — fastlio_odometry pose axis (if present) +- ``world/fastlio_path`` — fastlio_odometry trajectory (growing LineStrips3D) +- ``world/odom`` — Go2 onboard odom pose axis (if present) +- ``world/odom_path`` — Go2 onboard odom trajectory (growing LineStrips3D) +- ``world/camera`` — color_image camera pose (static pinhole + Transform3D) +- ``world/camera/image`` — color_image frames + +Usage: + uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd + uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd --map + rerun map.rrd +""" + +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path +import time +from typing import Any + +import rerun as rr +import typer + +from dimos.mapping.voxels import VoxelMapTransformer +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream +from dimos.memory2.transform import throttle +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2, register_colormap_annotation +from dimos.robot.unitree.go2.connection import _camera_info_static +from dimos.utils.data import resolve_named_path + +TIMELINE = "ts" + + +def _progress(total: int, label: str) -> Callable[[Observation[Any]], None]: + """Matches dimos/utils/cli/map.py:progress.""" + seen = 0 + wall_start: float | None = None + last_wall: float | None = None + first_ts: float | None = None + + def tick(obs: Observation[Any]) -> None: + nonlocal seen, wall_start, last_wall, first_ts + now = time.monotonic() + if wall_start is None: + wall_start = now + first_ts = obs.ts + assert first_ts is not None + frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 + last_wall = now + seen += 1 + pct = 100 * seen // total if total else 100 + wall = now - wall_start + data = obs.ts - first_ts + speed = data / wall if wall > 0 else 0.0 + end = "\n" if seen >= total else "" + print( + f"\r{label} {pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", + end=end, + flush=True, + ) + + return tick + + +def _log_clouds( + label: str, + stream: Stream[PointCloud2], + entity: str, + voxel: float, + point_mode: str, + *, + total: int | None = None, +) -> None: + """Iterate a PointCloud2 stream and log each obs to ``entity``. + + ``total`` overrides the progress denominator — useful for transform + pipelines where calling :py:meth:`Stream.count` would materialize the + whole pipeline. + """ + n = total if total is not None else stream.count() + cb = _progress(n, label) + for obs in stream: + cb(obs) + rr.set_time(TIMELINE, timestamp=obs.ts) + rr.log(entity, obs.data.to_rerun(voxel_size=voxel, mode=point_mode)) + + +def _log_path( + label: str, + stream: Stream[Any], + entity: str, + color: tuple[int, int, int], + *, + emit_every: int = 10, +) -> None: + """Iterate a pose-bearing stream and log a growing :class:`LineStrips3D` to + ``entity`` every ``emit_every`` poses (and once more at the end). Frames + without a pose are skipped. + """ + n = stream.count() + cb = _progress(n, label) + points: list[tuple[float, float, float]] = [] + last_ts: float | None = None + emit_count = 0 + for obs in stream: + cb(obs) + if obs.pose_tuple is None: + continue + points.append( + (float(obs.pose_tuple[0]), float(obs.pose_tuple[1]), float(obs.pose_tuple[2])) + ) + last_ts = obs.ts + emit_count += 1 + if emit_every > 0 and emit_count % emit_every == 0 and len(points) >= 2: + rr.set_time(TIMELINE, timestamp=obs.ts) + rr.log(entity, rr.LineStrips3D([points], colors=[color])) + if ( + last_ts is not None + and len(points) >= 2 + and (emit_every <= 0 or emit_count % emit_every != 0) + ): + rr.set_time(TIMELINE, timestamp=last_ts) + rr.log(entity, rr.LineStrips3D([points], colors=[color])) + + +def main( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + out: Path = typer.Option(..., "--out", help="Output .rrd path"), + voxel: float = typer.Option( + 0.05, "--voxel", help="Voxel size hint for the point cloud renderer" + ), + point_mode: str = typer.Option( + "spheres", "--point-mode", help="Render mode: 'spheres', 'boxes', or 'points'" + ), + camera_hz: float = typer.Option( + 0.0, + "--camera-hz", + help="Throttle color_image to at most this rate; 0 (default) logs all frames", + ), + map: bool = typer.Option( + False, + "--map", + help="Accumulate each lidar stream into a VoxelGrid and log only the final map", + ), + map_voxel: float = typer.Option( + 0.05, "--map-voxel", help="Voxel size for the accumulated map (m); --map only" + ), + map_device: str = typer.Option( + "CUDA:0", "--map-device", help="Open3D device for the VoxelGrid; --map only" + ), + map_emit_every: int = typer.Option( + 10, + "--map-emit-every", + help="Emit accumulated map every N frames (0 = only at end); --map only", + ), +) -> None: + db_path = resolve_named_path(dataset, ".db") + cam_info = _camera_info_static() + + rr.init("dimos map_rrd", recording_id=db_path.stem) + rr.save(str(out)) + register_colormap_annotation("turbo") + + # Static pinhole on the camera entity; per-frame Transform3D goes on the + # same entity. Image is the child so it projects through the pinhole. + pinhole = cam_info.to_rerun() + assert not isinstance(pinhole, list) + rr.log("world/camera", pinhole, static=True) + + # Static axis triads as children of each moving Transform3D, so the + # transforms are actually visible in the 3D view. + axes = rr.Arrows3D( + vectors=[[0.3, 0, 0], [0, 0.3, 0], [0, 0, 0.3]], + colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]], + ) + rr.log("world/fastlio/axes", axes, static=True) + rr.log("world/odom/axes", axes, static=True) + + store = SqliteStore(path=str(db_path)) + with store: + print(store.summary()) + + lidar = store.stream("lidar", PointCloud2) + color_image = store.stream("color_image", Image) + has_livox = "fastlio_lidar" in store.streams + livox = store.stream("fastlio_lidar", PointCloud2) if has_livox else None + + # ---- per-frame raw clouds ---- + _log_clouds(" lidar", lidar, "world/lidar", voxel, point_mode) + if livox is not None: + _log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, point_mode) + + # ---- accumulated voxel maps (--map only) ---- + # Go2 L1 forward-facing → column carving on. + # Mid360 spherical → column carving off, just aggregate. + if map: + grid_kwargs = {"voxel_size": map_voxel, "device": map_device, "show_startup_log": False} + _log_clouds( + " lidar_voxels", + lidar.transform( + VoxelMapTransformer( + emit_every=map_emit_every, carve_columns=True, **grid_kwargs + ) + ), + "world/lidar_voxels", + voxel, + point_mode, + total=max(1, lidar.count() // max(map_emit_every, 1)), + ) + if livox is not None: + _log_clouds( + "fastlio_voxels", + livox.transform( + VoxelMapTransformer( + emit_every=map_emit_every, carve_columns=False, **grid_kwargs + ) + ), + "world/fastlio_voxels", + voxel, + point_mode, + total=max(1, livox.count() // max(map_emit_every, 1)), + ) + + # ---- fastlio pose axis + path from fastlio_odometry stream ---- + if "fastlio_odometry" in store.streams: + odometry = store.stream("fastlio_odometry", Odometry) + cb = _progress(odometry.count(), "fastlio_odometry") + for obs in odometry: + cb(obs) + if obs.pose_tuple is None: + continue + rr.set_time(TIMELINE, timestamp=obs.ts) + x, y, z, qx, qy, qz, qw = obs.pose_tuple + rr.log( + "world/fastlio", + rr.Transform3D( + translation=[x, y, z], + quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]), + ), + ) + _log_path( + " fastlio_path", + store.stream("fastlio_odometry", Odometry), + "world/fastlio_path", + color=(255, 165, 0), # orange + ) + + # ---- Go2 native odom pose axis + path ---- + if "odom" in store.streams: + odom = store.stream("odom", PoseStamped) + cb = _progress(odom.count(), " odom") + for odom_obs in odom: + cb(odom_obs) + if odom_obs.pose_tuple is None: + continue + rr.set_time(TIMELINE, timestamp=odom_obs.ts) + x, y, z, qx, qy, qz, qw = odom_obs.pose_tuple + rr.log( + "world/odom", + rr.Transform3D( + translation=[x, y, z], + quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]), + ), + ) + _log_path( + " odom_path", + store.stream("odom", PoseStamped), + "world/odom_path", + color=(0, 200, 100), # green + ) + + # ---- pass 2: camera pose + image per color_image ---- + cam_pipeline = ( + color_image.transform(throttle(1.0 / camera_hz)) if camera_hz > 0 else color_image + ) + n_img = cam_pipeline.count() + cb = _progress(n_img, " color_image") + for img_obs in cam_pipeline: + cb(img_obs) + rr.set_time(TIMELINE, timestamp=img_obs.ts) + if img_obs.pose_tuple is not None: + x, y, z, qx, qy, qz, qw = img_obs.pose_tuple + rr.log( + "world/camera", + rr.Transform3D( + translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]) + ), + ) + rr.log("world/camera/image", img_obs.data.to_rerun()) + + print(f"wrote {out}") + print(f"open with: rerun {out}") + + +if __name__ == "__main__": + typer.run(main) diff --git a/dimos/mapping/loop_closure/utils/markers_rrd.py b/dimos/mapping/loop_closure/utils/markers_rrd.py index a98e79ddc3..eb5306edf7 100644 --- a/dimos/mapping/loop_closure/utils/markers_rrd.py +++ b/dimos/mapping/loop_closure/utils/markers_rrd.py @@ -20,15 +20,17 @@ - per detection: marker box in world frame, at the detection timestamp Usage: - uv run python -m dimos.mapping.loop_closure.utils.markers_rrd hk_village1 --out hk.rrd + uv run python -m dimos.utils.cli.markers_rrd hk_village1 --out hk.rrd rerun hk.rrd -Throwaway script; remove once the apriltag reliability work lands. +Throwaway script next to ``map.py``; remove once the apriltag reliability work +lands. """ from __future__ import annotations from pathlib import Path +from typing import Any import rerun as rr import typer @@ -80,12 +82,12 @@ def main( color_image = store.stream("color_image", Image) lidar = store.stream("lidar", PointCloud2) - # Pass 1: robot base pose over time (from lidar.pose). + # ---- pass 1: robot base pose over time (from lidar.pose) ---- for lidar_obs in lidar: - if lidar_obs.pose_tuple is None: + if lidar_obs.pose is None: continue rr.set_time(TIMELINE, timestamp=lidar_obs.ts) - x, y, z, qx, qy, qz, qw = lidar_obs.pose_tuple + x, y, z, qx, qy, qz, qw = lidar_obs.pose rr.log( "world/robot", rr.Transform3D( @@ -93,12 +95,12 @@ def main( ), ) - # Pass 2: camera pose + image per color_image frame. + # ---- pass 2: camera pose + image per color_image frame ---- n_img = color_image.count() for i, img_obs in enumerate(color_image): rr.set_time(TIMELINE, timestamp=img_obs.ts) - if img_obs.pose_tuple is not None: - x, y, z, qx, qy, qz, qw = img_obs.pose_tuple + if img_obs.pose is not None: + x, y, z, qx, qy, qz, qw = img_obs.pose rr.log( "world/camera", rr.Transform3D( @@ -115,9 +117,9 @@ def main( if (i + 1) % 50 == 0 or i + 1 == n_img: print(f"images: {i + 1}/{n_img}") - # Pass 3: marker detections, filtered the same way as `dimos map`. + # ---- pass 3: marker detections (filtered same way as `dimos map`) ---- xf = DetectMarkers(camera_info=cam_info, marker_length_m=marker_size) - pipeline: Stream[Image] = color_image.transform( + pipeline: Stream[Any] = color_image.transform( QualityWindow(lambda img: img.sharpness, window=quality_window) ) if marker_max_speed > 0: @@ -173,7 +175,7 @@ def main( ) print(f"detections: {n_det}") - # Pass 4: averaged tracks (smoothing_window > 0 → per-track ids). + # ---- pass 4: averaged tracks (smoothing_window > 0 → per-track ids) ---- # Re-runs the same filtered pipeline through a smoothing detector; # each track yields one entity that updates as the windowed average # refines. Color stable per track_id for visual identity. @@ -183,7 +185,7 @@ def main( marker_length_m=marker_size, smoothing_window=smoothing_window, ) - pipeline_tracked: Stream[Image] = color_image.transform( + pipeline_tracked: Stream[Any] = color_image.transform( QualityWindow(lambda img: img.sharpness, window=quality_window) ) if marker_max_speed > 0: diff --git a/dimos/mapping/loop_closure/utils/rename_streams.py b/dimos/mapping/loop_closure/utils/rename_streams.py new file mode 100644 index 0000000000..73002aeb2a --- /dev/null +++ b/dimos/mapping/loop_closure/utils/rename_streams.py @@ -0,0 +1,173 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Copy a memory2 sqlite recording into a new file, renaming streams. + +Iterates each source stream and re-appends every observation under its new +name in a fresh destination db. Slower than in-place ``ALTER TABLE`` but +forces a full re-read of every row, so any pre-existing corruption surfaces +immediately. Streams not mentioned in ``--rename`` are copied verbatim. + +Usage: + uv run python -m dimos.mapping.loop_closure.utils.rename_streams mid360 \\ + --out mid360_renamed.db \\ + --rename go2_lidar=lidar \\ + --rename lidar=fastlio_lidar \\ + --rename odometry=fastlio_odometry +""" + +from __future__ import annotations + +from collections.abc import Callable +import json +from pathlib import Path +import sqlite3 +import time +from typing import Any + +import typer + +from dimos.memory2.codecs.base import _resolve_payload_type +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.type.observation import Observation +from dimos.utils.data import resolve_named_path + + +def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: + """Matches dimos/utils/cli/map.py:progress — kept inline to avoid pulling rerun.""" + seen = 0 + wall_start: float | None = None + last_wall: float | None = None + first_ts: float | None = None + + def _progress(obs: Observation[Any]) -> None: + nonlocal seen, wall_start, last_wall, first_ts + now = time.monotonic() + if wall_start is None: + wall_start = now + first_ts = obs.ts + assert first_ts is not None + frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 + last_wall = now + seen += 1 + pct = 100 * seen // total if total else 100 + wall = now - wall_start + data = obs.ts - first_ts + speed = data / wall if wall > 0 else 0.0 + end = "\n" if seen >= total else "" + prefix = f"{label} " if label else "" + print( + f"\r{prefix}{pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", + end=end, + flush=True, + ) + + return _progress + + +def _stream_payload_types(db_path: Path) -> dict[str, type]: + """Read each stream's registered payload type from the _streams table.""" + conn = sqlite3.connect(str(db_path)) + try: + rows = conn.execute("SELECT name, config FROM _streams").fetchall() + finally: + conn.close() + return {name: _resolve_payload_type(json.loads(cfg)["payload_module"]) for name, cfg in rows} + + +def _parse_renames(pairs: list[str]) -> dict[str, str]: + """Parse ``OLD=NEW`` pairs into a dict; reject malformed or duplicate OLDs.""" + out: dict[str, str] = {} + for raw in pairs: + if "=" not in raw: + raise typer.BadParameter(f"--rename must be OLD=NEW, got {raw!r}") + old, new = raw.split("=", 1) + old, new = old.strip(), new.strip() + if not old or not new: + raise typer.BadParameter(f"--rename has empty side in {raw!r}") + if old in out: + raise typer.BadParameter(f"--rename {old!r} specified more than once") + out[old] = new + return out + + +def main( + dataset: str = typer.Argument(..., help="Source .db: bare name (cwd or data/) or path"), + out: Path = typer.Option(..., "--out", help="Output renamed .db path (must not exist)"), + rename: list[str] = typer.Option( + [], + "--rename", + help="OLD=NEW rename pair (can be passed multiple times); unmapped streams pass through", + ), + drop: list[str] = typer.Option( + [], "--drop", help="Stream name to omit from output (can be passed multiple times)" + ), +) -> None: + src_path = resolve_named_path(dataset, ".db") + if out.exists(): + raise typer.BadParameter(f"Output already exists: {out}") + + print(f"analizing dataset {src_path}") + rename_map = _parse_renames(rename) + payload_types = _stream_payload_types(src_path) + + drop_set = set(drop) + missing = sorted((set(rename_map) | drop_set) - set(payload_types)) + if missing: + raise typer.BadParameter( + f"--rename / --drop refers to streams not in source: {missing}. " + f"Available: {sorted(payload_types)}" + ) + overlap = drop_set & set(rename_map) + if overlap: + raise typer.BadParameter(f"stream(s) in both --rename and --drop: {sorted(overlap)}") + + kept = {name: rename_map.get(name, name) for name in payload_types if name not in drop_set} + seen: dict[str, str] = {} + for src_name, dst_name in kept.items(): + if dst_name in seen: + raise typer.BadParameter( + f"name collision: {seen[dst_name]!r} and {src_name!r} both map to {dst_name!r}" + ) + seen[dst_name] = src_name + + print("rename plan:") + for src_name in payload_types: + if src_name in drop_set: + print(f" {src_name:>16s} ✗ (dropped)") + else: + dst_name = kept[src_name] + arrow = "→" if src_name != dst_name else " " + print(f" {src_name:>16s} {arrow} {dst_name}") + print() + + src = SqliteStore(path=str(src_path)) + with src: + dst = SqliteStore(path=str(out)) + with dst: + for src_name, dst_name in kept.items(): + ptype = payload_types[src_name] + src_s = src.stream(src_name, ptype) + dst_s = dst.stream(dst_name, ptype) + total = src_s.count() + cb = progress(total, f"{dst_name:>16s}") + for obs in src_s: + dst_s.append(obs.data, ts=obs.ts, pose=obs.pose, tags=obs.tags or None) + cb(obs) + + print(f"\nwrote {out}") + + +if __name__ == "__main__": + typer.run(main) diff --git a/dimos/memory2/store/base.py b/dimos/memory2/store/base.py index fb6d76379d..7a7162a6d1 100644 --- a/dimos/memory2/store/base.py +++ b/dimos/memory2/store/base.py @@ -65,6 +65,9 @@ def __getitem__(self, name: str) -> S: raise KeyError(name) return self._container.stream(name) + def __contains__(self, name: str) -> bool: + return name in self._container.list_streams() + def __dir__(self) -> list[str]: return self._container.list_streams() @@ -200,6 +203,10 @@ def list_streams(self) -> list[str]: """Return names of all streams in this store.""" return list(self._streams.keys()) + def summary(self) -> str: + """One line per stream — name, count, ts range. See :meth:`Stream.summary`.""" + return "\n".join(stream.summary() for _, stream in self.streams.items()) + def delete_stream(self, name: str) -> None: """Delete a stream by name (from cache and underlying storage).""" stream = self._streams.pop(name, None) From d4a615bcab2ee62dc68ae173bb41830e133d42a2 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Fri, 29 May 2026 14:08:00 +0800 Subject: [PATCH 44/64] migrated autoresearch here --- .../loop_closure/autoresearch/conclusions.md | 262 ++++++ .../autoresearch/pgo_autoresearched.py | 802 ++++++++++++++++++ .../loop_closure/autoresearch/program.md | 221 +++++ 3 files changed, 1285 insertions(+) create mode 100644 dimos/mapping/loop_closure/autoresearch/conclusions.md create mode 100644 dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py create mode 100644 dimos/mapping/loop_closure/autoresearch/program.md diff --git a/dimos/mapping/loop_closure/autoresearch/conclusions.md b/dimos/mapping/loop_closure/autoresearch/conclusions.md new file mode 100644 index 0000000000..8504ccd731 --- /dev/null +++ b/dimos/mapping/loop_closure/autoresearch/conclusions.md @@ -0,0 +1,262 @@ +# loop-closure autoresearch — conclusions + +## TL;DR + +Baseline `TOTAL_SPREAD = 48.655 m`. Tuned `pgo.py` to **22.289 m**, a +**-54.2%** reduction over 37 commits. + +| dataset | baseline | final | change | +|-------------|---------:|------:|---------:| +| hk_village1 | 5.83 | 6.20 | +6% | +| hk_village2 | 19.53 | 6.49 | **-67%** | +| hk_village3 | 5.00 | 3.78 | -24% | +| hk_village4 | 13.14 | 2.39 | **-82%** | +| hk_village5 | 2.80 | 2.14 | -24% | +| hk_village6 | 1.74 | 1.59 | -9% | + +hk_village2 and hk_village4 were dominating the baseline metric; both +contain a clear "robot was disturbed" event mid-trajectory that the +unmodified loop-closure code couldn't bridge. + +## The problem, after looking at it + +Two failure modes drove the baseline number, both visible from the +per-dataset breakdown but only diagnosable by looking at the actual +data: + +### 1. PGO can't relocalize through a meter-scale disturbance + +`hk_village2` had drift growing from 0 to 4.7 m across one trajectory. +Loop closure search ran against optimized-frame positions with a 2 m +radius — once optimized drift exceeded 2 m, every nearby revisit fell +outside the search window. The robot kept revisiting the same physical +area, but the optimizer never saw a match. Marker spread for the only +multi-tracked marker (id 10) was 19 m, all because detections from after +the disturbance landed wherever the unconstrained chain put them. + +Diagnostic script (`/tmp/hk2_marker10.py`) dumped per-detection raw + +corrected positions plus the nearest keyframe's drift. The drift +trace (`/tmp/drift_trace.py`) printed kf-by-kf when drift crossed +threshold. These were the two scripts that made every subsequent +tuning decision obvious. + +### 2. Chain factors were overconfident + +Inter-keyframe BetweenFactors had translation variance `1e-4` (sigma +~1cm). Accumulated over 262 keyframes that implies a total uncertainty +of ~16 cm — but actual drift in hk_village2 hit 4 m. PGO under-corrected +even when good loop closures fired, because the chain factors fought +back. Forensic comparison of marker raw positions vs. PGO-corrected +positions showed the correction matched the *measured* drift but +under-corrected the *true* drift by ~36%. + +## What worked (in order they were found) + +Each line in the table is roughly a real win — bigger pieces of the +total improvement at the top. + +| change | spread | +|----------------------------------------------------------------|--------------:| +| baseline | 48.66 | +| tighten loop rot_var 0.05 → 0.01 then → 0.003 | 47.75 → 37.98 | +| `loop_submap_half_range` 10 → 40 (more ICP context) | 39.92 | +| tighten ICP `max_correspondence_dist` 1.0 → 0.5 | 38.82 | +| drift-gated local-frame fallback (radius 15 m, drift > 0.5 m) | 34.06 | +| multi-loop accept inside fallback (K=7 candidates) | 31.40 | +| `min_loop_detect_duration` 5s → 3s | 33.66 → 31.26 | +| **drift-aware chain variance** (8e-4 if prev kf drift > 1.3 m) | 27.39 | +| nearest-neighbor interp (vs linear blend) | 27.33 | +| ISAM2 Dogleg optimizer | 27.33 | +| trans_var floor 0.01 → 0.005 | 30.24 | +| **second-pass loop rescan** over converged optimized poses | 24.62 | +| rescan opt-only (no fallback) + `submap_half_range=38` | 22.60 | +| rescan `time_thresh_override=13s` | **22.29** | + +The two structural changes (chain-variance-by-drift and second-pass +rescan) account for most of the improvement. The rest is parameter +tuning at their joint sweet spot, which is narrow — moving any one knob +±20% usually regressed by 1 m+. + +## What didn't work (a non-exhaustive list) + +So future tuners don't repeat these: + +- **Loosening trajectory variance globally** (1e-4 → 1e-3, 5e-4, etc.) — + helps hk2 dramatically (sometimes drops it to 8 m) but blows up + hk1/hk5/hk6 because chain becomes effectively unconstrained. +- **RANSAC + FPFH global registration** for fallback init — admits + wrong alignments (geometrically similar but different actual places) + that the optimizer can't reject. +- **Coarse-to-fine ICP** for fallback — same problem; coarse pass found + wrong-but-locally-tight alignments. +- **ICP init from local-frame relative pose** — marginal at best, + occasionally worse. +- **Huber robust kernel on loop factors** (both global and + fallback-only) — down-weights good loops along with bad ones. +- **Forcing multi-loop accept everywhere** (not just fallback) — adds + redundant constraints in easy datasets, increases noise. +- **Two-pass rescan** (run rescan twice) — over-constrains. +- **Tighter chain factors when many loops exist** — they keep being the + bottleneck even with abundant loop evidence. +- **Variations on the drift threshold for chain loosening** (1.0, 1.2, + 1.4, 1.5, 1.7, 1.8, 2.0, 3.0, multi-tier) — 1.3 m is sharply optimal. +- **Variations on rescan submap_half_range** (20, 30, 33, 35, 36, 37, + 39, 40, 50, 80) — 38 is sharply optimal; first-pass needs 40, not 38. +- **Rescan with wider opt search radius, force-fallback, smaller source + submap, force-multi-loop, tighter ICP correspondence, lower + candidate count, tighter early-exit** — all worse. +- **Windowed-mean / Catmull-Rom interpolation in the corrections** — + nearest-neighbor wins; the noise isn't in interp blending. +- **Final batch LM re-optimization after ISAM2** — ISAM2 was already + converged; LM finds the same answer. + +## How the process went + +The first half of the experiment was parameter twiddling — small wins +adding up to ~10 m of improvement, then a hard plateau around 27–28 m +where every knob was at its joint optimum and ±20% perturbations +regressed. + +The breakthroughs all came from **investigation runs that didn't change +`pgo.py`**: + +1. Per-dataset breakdown of spread (already in the eval table). Showed + hk_village2 + hk_village4 were 68% of the baseline metric. +2. `_diag.py` / `hk2_marker10.py` — per-marker pose dump. Revealed + marker 10 in hk_village2 had detections 5 m apart in the corrected + frame, all in the disturbance area. Concretely showed the + relocalization-after-disturbance failure mode. +3. `drift_trace.py` — kf-by-kf drift trace. Showed when each dataset + crossed which drift threshold and how long it stayed there; + directly motivated the "drift-aware chain variance" change. +4. Comparing raw vs. corrected marker positions plus the correction + translation. Showed the correction was matching the *measured* + drift but falling 36% short of the true drift — pointed straight + at the chain-factor overconfidence problem. + +Each of these scripts ran in 30–60s, didn't count toward the +experiment log, and led to a 1–3 m improvement that knob tuning hadn't +been finding. The second-pass rescan idea came from "we converged once, +the optimized poses are now actually trustworthy, what would loop +detection do if we re-ran it?" — and that single structural change +dropped the metric by ~3 m. + +The final ~2 m came from narrow-window tuning of the rescan +parameters: `submap_half_range=38` (not 37 or 39) and +`time_thresh_override=13s` (not 12 or 14). At this point most +perturbations regress by 0.1–2 m, suggesting we're at a real local +optimum and further gains likely need a different structural change +(e.g., refining the marker_transformer integration, which is out of +scope here). + +## What's still on the table + +- **Marker_transformer rotation accuracy.** Forensic of hk_village2 + marker 10 shows det 3 (the worst remaining) has 1.4 m of x-error + that doesn't move with chain or loop tuning. The robot saw the + marker from different angles at det 1 vs det 3, and small yaw drift + in the optimized pose projects to large marker-position error + because the marker is ~1 m from the camera. PGO can't fix the + camera-to-marker geometry; this is approaching the noise floor. +- **Per-recording adaptive parameters.** The current params are a + global optimum across all 6 recordings; recordings 1 and 2/4 have + pretty different dynamics. A per-recording tuner would probably + squeeze another 1–2 m total but isn't well-aligned with deployment. + +## Code shape + +The final `pgo.py` is bigger than the original (the `_search_for_loops` +function in particular took on several optional kwargs to support the +rescan code path). Worth a cleanup pass before merging — the original +inline code path stayed for the first pass; only the rescan path needs +the extra knobs. A targeted refactor could pull rescan into its own +method and drop most of the kwargs. + +## What data would have helped next time + +Some signals would have shortened the loop substantially: + +- **Ground-truth marker positions.** A `markers.csv` with the true + world position of each `marker_id` per recording. Right now the eval + scores "consistency across detections of the same marker", which + only tells you when corrections diverge — not whether they converge + to the *right* place. Half my forensic time was spent inferring true + positions from the most consistent cluster. With ground truth you + could also distinguish "PGO under-correcting" from "marker detection + is wrong" without having to reason about it. +- **Ground-truth trajectories.** Even just one or two recordings with + pose ground truth (e.g., logged from a more accurate localizer) + would let you compute keyframe-pose error directly, instead of using + marker-spread as a proxy. The proxy is informative but indirect: + there were several experiments where loop count and quality both + improved but spread stayed flat because the underlying poses moved + along the marker viewing rays. +- **Annotation of disturbance events.** Marking the timestamps in each + recording where the robot got pushed / lifted / kicked would have + saved a lot of inference. I built `drift_trace.py` to detect these + from PGO output, but the timestamps would have been cleaner ground + truth and would let an automated tuner segment "easy" from + "disturbed" trajectory regions. +- **Loop closure log structured beyond `[inf]` log lines.** Per-loop + records of (source kf, target kf, score, was-fallback, accepted-vs- + rejected-by-ICP) emitted as JSONL or a sqlite table would have made + many forensics one-liner queries instead of grep + Python parsing. +- **The eval's per-marker breakdown printed by default**, not derived + from a custom diag script. The dataset row tells you which recording + is hurting; one more level of detail (which marker, how many + detections, max pair distance) would have pointed at hk_village2 + marker 10 in the first 5 minutes instead of after several + knob-tuning rounds. + +## What approach would help next time + +Roughly in order of expected payoff: + +1. **Spend the first iteration on tooling, not tuning.** I tuned for + ~20 commits before writing the first diagnostic script and got the + biggest single wins (drift-aware chain, second-pass rescan) only + *after* the forensics existed. The program.md update mid-experiment + ("Investigation is part of the loop") was the right correction. The + diagnostics paid back their cost in the first iteration they were + used. +2. **Run dimos map with rerun for the worst recording before + touching anything.** Even a 5-minute look at the actual trajectory, + marker positions, and loop closure edges would have made the + disturbance failure mode obvious and saved several speculative + tuning rounds. I never actually opened rerun this session and the + most important visualization was reconstructed badly from log + parsing. +3. **State a hypothesis before each change, log the prediction, then + check.** I did this informally in the responses to the user but + the experiment log records *what changed and the resulting + spread*, not *what was predicted*. When a change regresses, the + prediction-vs-reality gap is the actual learning; without it you + only learn "that one didn't work." +4. **Search the loose-chain / drift-threshold parameter space jointly + with a grid, not greedy.** The threshold and the loose variance + value interact (1.5/5e-4 worked, 1.3/8e-4 worked better; some + combinations weren't tested because the greedy walk didn't go + there). A 3×3 or 5×5 grid would have taken 25 evals (≈40 min) and + probably found the joint optimum directly. I spent more time than + that on greedy walks that revisited the same region. +5. **Try the structural change ("rescan with converged poses") much + earlier.** It was obvious in retrospect — if loop search depended + on the optimized poses being accurate, and those poses are most + accurate after PGO converges, then re-searching after convergence + *had to* find more loops. I sat on this for many iterations because + it felt like a bigger change. The cost was ~30 minutes of + refactoring `_search_for_loops` to take a `cur_idx`, and the + payoff was the largest single improvement. +6. **Keep a "did not work" register that's easier to grep than the + results.tsv `discard` rows.** Several ideas resurfaced under + different names (e.g., "loosen rot_var in fallback" got tried + three times across iterations). Cross-referencing against past + attempts before each new experiment is cheap; not doing it is + compounding waste. +7. **Resist twiddling the same knob multiple times.** Once a + parameter is at a local optimum, ±1 unit perturbations are usually + in the noise; the energy is better spent on a structural change + somewhere else. I did several rounds of "try 1.3 / try 1.4 / try + 1.5" once a knob hit its sweet spot, and the wins from those + sub-percent moves were almost always reverted by the next + experiment downstream. diff --git a/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py b/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py new file mode 100644 index 0000000000..ecb4735044 --- /dev/null +++ b/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py @@ -0,0 +1,802 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""PGO drift corrections as composable Stream stages. + +A lidar/odom stream comes in with poses that drift over time — the robot's +estimate of where it is in the world slowly diverges from ground truth as +small per-frame errors accumulate. PGO ("pose graph optimization") detects +revisited places via loop closure and pulls the trajectory back into +self-consistency. The result is a stream of *corrections*: a Transform per +keyframe that maps every drifted ("raw") pose to its corrected one. + +Pipeline: + + lidar: Stream[PointCloud2] # pose-stamped scans + -> pgo_keyframes(...) -> Stream[Keyframe] # one per kf + -> keyframes_to_corrections(...) -> Stream[Transform] # world_corrected <- world_raw + -> apply_corrections(any_stream, corrections) -> Stream[T] # obs.pose shuffled + +`pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting +each kept keyframe with both its raw odom pose (`local`) and its optimized +pose (`optimized`) as Transforms. Pass `loop_closures_out=[]` to also +collect each accepted ICP loop edge as a `LoopClosure` (source+target +optimized poses + ICP fitness) — useful for visualizing the pose graph. + +`keyframes_to_corrections` is pure arithmetic: per keyframe, the drift +correction is `optimized @ local^-1` (Transform composition). The resulting +stream has one entry per keyframe — sparse in time. + +`make_interpolator` materializes the sparse correction stream into a +ts -> Transform function: SLERP for rotation, linear lerp for translation, +clipping out-of-range queries to the endpoints. + +`apply_corrections` shuffles `obs.pose` on any stream using the interpolated +correction at each obs.ts. It's read-only on `obs.data`, so the same lidar +stream can be re-applied — or a different recording from the same physical +space, if the corrections generalize. + +`gtsam` is imported lazily inside hot helpers so importing this module +stays cheap and gtsam-free for consumers that only need `Keyframe` / +`apply_corrections`. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterator +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack + +import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import open3d.core as o3c # type: ignore[import-untyped] +from scipy.spatial.transform import Rotation + +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.protocol.service.spec import BaseConfig +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + import gtsam # type: ignore[import-not-found,import-untyped] + +T = TypeVar("T") + +FRAME_WORLD_CORRECTED = "world_corrected" +FRAME_WORLD_RAW = "world_raw" +FRAME_BODY = "body" + +logger = setup_logger() + + +class PGOConfig(BaseConfig): + # Keyframe detection + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + + # Loop closure + loop_search_radius: float = 2.0 + loop_search_radius_local: float = 15.0 + loop_fallback_drift_thresh: float = 0.5 # min drift to trigger local fallback + loop_time_thresh: float = 20.0 + loop_time_thresh_local: float = 40.0 # stricter time gap for local fallback + loop_score_thresh: float = 0.3 + loop_score_thresh_tight: float = 0.05 # early-exit threshold: ICP rmse ~22cm + loop_submap_half_range: int = 40 + loop_candidates_per_iter: int = 7 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 2 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 3.0 + + # ICP + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 0.5 + + +class PGOKwargs(TypedDict, total=False): + """Typed kwargs for `pgo_keyframes`; mirrors `PGOConfig` fields.""" + + key_pose_delta_trans: float + key_pose_delta_deg: float + loop_search_radius: float + loop_search_radius_local: float + loop_fallback_drift_thresh: float + loop_time_thresh: float + loop_time_thresh_local: float + loop_score_thresh: float + loop_score_thresh_tight: float + loop_submap_half_range: int + loop_candidates_per_iter: int + min_icp_inliers: int + min_keyframes_for_loop_search: int + loop_closure_extra_iterations: int + submap_resolution: float + min_loop_detect_duration: float + max_icp_iterations: int + max_icp_correspondence_dist: float + + +@dataclass(frozen=True) +class Keyframe: + """Keyframe emitted by `pgo_keyframes`. + + `local` is the body pose in the odom (`world_raw`) frame, `optimized` is + the body pose in the drift-corrected (`world_corrected`) frame. + """ + + ts: float + local: Transform + optimized: Transform + + +@dataclass(frozen=True) +class LoopClosure: + """An accepted ICP loop-closure edge in the pose graph. + + `source` and `target` are the final optimized body poses (in + `world_corrected`) of the two keyframes the loop connected. `score` is + the ICP fitness used as the loop's translation variance — lower is a + tighter match. + """ + + source: Transform + target: Transform + score: float + + +def pgo_keyframes( + stream: Stream[PointCloud2], + *, + on_frame: Callable[[Any], None] | None = None, + loop_closures_out: list[LoopClosure] | None = None, + **pgo_cfg: Unpack[PGOKwargs], +) -> Stream[Keyframe]: + """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe. + + If `loop_closures_out` is provided, accepted loop edges are appended to + it (in detection order) using each keyframe's final optimized pose. + """ + cfg = PGOConfig(**pgo_cfg) + pgo = _PGO(cfg) + + for obs in stream: + if on_frame is not None: + on_frame(obs) + if obs.pose is None: + continue + # Skip placeholder poses (origin position OR zero quaternion). + if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + continue + if ( + obs.pose[3] == 0 + and obs.pose[4] == 0 + and obs.pose[5] == 0 + and (obs.pose[6] == 0 or obs.pose[6] == 1) + ): + continue + local_pose = _obs_to_pose3(obs) + pgo.process(local_pose, obs.ts, obs.data) + + mem = MemoryStore() + out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) + for kf in pgo.finalize(): + out.append(kf, ts=kf.ts) + if loop_closures_out is not None: + loop_closures_out.extend(pgo.loop_closures()) + return out + + +def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: + """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" + mem = MemoryStore() + out: Stream[Transform] = mem.stream("corrections", Transform) + for obs in keyframes: + kf = obs.data + # world_corrected <- body <- world_raw composes to world_corrected <- world_raw. + drift = kf.optimized + kf.local.inverse() + out.append(drift, ts=kf.ts) + return out + + +def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: + """Materialize corrections once; return a fast ts -> Transform lookup.""" + ts_list: list[float] = [] + R_list: list[np.ndarray] = [] + t_list: list[np.ndarray] = [] + for obs in corrections: + R, t = _r_t_from_transform(obs.data) + # obs.ts is authoritative; obs.data.ts can be mutated by Transform's + # ts=0.0 -> time.time() fallback in its constructor. + ts_list.append(obs.ts) + R_list.append(R) + t_list.append(t) + + if not ts_list: + raise ValueError("empty corrections stream") + + # Pad len==1 with a duplicate so the general path handles it; + # clip-to-endpoints behavior is unchanged. + if len(ts_list) == 1: + ts_list.append(ts_list[0] + 1e-6) + R_list.append(R_list[0]) + t_list.append(t_list[0]) + + ts_arr = np.array(ts_list) + R_stack = np.stack(R_list) + t_stack = np.stack(t_list) + + def interp(ts: float) -> Transform: + ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) + idx = int(np.searchsorted(ts_arr, ts_clip)) + if idx == 0: + t = t_stack[0] + R = R_stack[0] + elif idx >= len(ts_arr): + t = t_stack[-1] + R = R_stack[-1] + else: + t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] + # Nearest-neighbor in time: avoids blending two adjacent + # corrections that may straddle a loop-closure update. + near_lo = (ts_clip - t_lo) < (t_hi - ts_clip) + t = t_stack[idx - 1] if near_lo else t_stack[idx] + R = R_stack[idx - 1] if near_lo else R_stack[idx] + return _transform_from_r_t(R, t, ts=float(ts)) + + return interp + + +def apply_corrections( + stream: Stream[T], + corrections: Stream[Transform], +) -> Stream[T]: + """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. + + `obs.data` is untouched. Frames with `obs.pose is None` pass through + unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). + """ + interp = make_interpolator(corrections) + + def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + for obs in upstream: + if obs.pose is None: + yield obs + continue + raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped) + # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. + # Observation normalizes Transform back to 7-tuple via __post_init__. + corrected = interp(obs.ts) + raw_tf + yield obs.derive(data=obs.data, pose=corrected) + + return stream.transform(xf) + + +def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: + q = tf.rotation + R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() + t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + return R, t + + +def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform: + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ts=ts, + ) + + +def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: + """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + if obs.pose is None: + raise LookupError("No pose set on this observation") + x, y, z, qx, qy, qz, qw = obs.pose + return gtsam.Pose3( + gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), + gtsam.Point3(float(x), float(y), float(z)), + ) + + +@dataclass +class _KeyPose: + local: gtsam.Pose3 # odom-frame pose at capture + optimized: gtsam.Pose3 # drift-corrected pose + timestamp: float + body_cloud: PointCloud2 # voxel-downsampled, body frame + + +@dataclass +class _LoopPair: + source: int + target: int + offset: gtsam.Pose3 # source pose in target's frame + score: float + relocalizing: bool = False # True if this loop was found via local-frame fallback + + +class _PGO: + """Incremental PGO: gtsam ISAM2 over keyframes with ICP loop closures. + + Call `process` per frame (odom pose + body-frame points). Call + `finalize` once at the end for the sorted, deduped keyframe list. + """ + + def __init__(self, config: PGOConfig) -> None: + import gtsam # type: ignore[import-not-found,import-untyped] + + self._gtsam = gtsam + self._cfg = config + self._key_poses: list[_KeyPose] = [] + self._pending_loops: list[_LoopPair] = [] + self._accepted_loops: list[_LoopPair] = [] + self._last_loop_ts: float | None = None + self._world_correction: gtsam.Pose3 = gtsam.Pose3() # identity + + params = gtsam.ISAM2Params() + params.setRelinearizeThreshold(0.01) + params.relinearizeSkip = 1 + params.setOptimizationParams(gtsam.ISAM2DoglegParams()) + self._isam2 = gtsam.ISAM2(params) + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + def process( + self, + local_pose: gtsam.Pose3, + ts: float, + world_cloud: PointCloud2, + ) -> None: + if len(world_cloud) == 0: + return + if not self._is_keyframe(local_pose): + return + # Unregister: lift world-frame scan back into body frame using its + # odom pose, so PGO can re-project it via the optimized pose later. + body_cloud = world_cloud.transform( + _pose3_to_transform(local_pose.inverse(), ts=ts) + ).voxel_downsample(self._cfg.submap_resolution) + self._add_keyframe(local_pose, ts, body_cloud) + self._search_for_loops() + self._smooth_and_update() + + def finalize(self) -> list[Keyframe]: + """Return keyframes sorted by ts, with duplicate-ts entries dropped.""" + # Second-pass loop rescan over the converged optimized poses: + # keyframes that ended up in heavily drifted regions may have + # missed loops during incremental processing (their optimized + # pose at the moment was misleading). Re-search with the final + # poses gives us another chance to anchor those segments. + self._last_loop_ts = None + for i in range(len(self._key_poses)): + self._search_for_loops( + cur_idx=i, + enforce_time_gate=False, + no_fallback=True, + submap_half_range=38, + time_thresh_override=13.0, + ) + if self._pending_loops: + self._smooth_and_update() + kps = sorted(self._key_poses, key=lambda kp: kp.timestamp) + out: list[Keyframe] = [] + for i, kp in enumerate(kps): + if i > 0 and kp.timestamp <= kps[i - 1].timestamp: + continue + out.append( + Keyframe( + ts=kp.timestamp, + local=_pose3_to_transform( + kp.local, + ts=kp.timestamp, + frame_id=FRAME_WORLD_RAW, + child_frame_id=FRAME_BODY, + ), + optimized=_pose3_to_transform( + kp.optimized, + ts=kp.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + ) + ) + return out + + def loop_closures(self) -> list[LoopClosure]: + """Accepted loop edges, resolved to final optimized poses.""" + out: list[LoopClosure] = [] + for pair in self._accepted_loops: + src = self._key_poses[pair.source] + tgt = self._key_poses[pair.target] + out.append( + LoopClosure( + source=_pose3_to_transform( + src.optimized, + ts=src.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + target=_pose3_to_transform( + tgt.optimized, + ts=tgt.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + score=pair.score, + ) + ) + return out + + def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: + if not self._key_poses: + return True + delta = self._key_poses[-1].local.inverse().compose(local_pose) + delta_trans = float(np.linalg.norm(np.asarray(delta.translation()))) + # Rotation magnitude from trace: cos(theta) = (tr(R) - 1) / 2. + R = delta.rotation().matrix() + cos_theta = float(np.clip((np.trace(R) - 1.0) / 2.0, -1.0, 1.0)) + delta_deg = float(np.degrees(np.arccos(cos_theta))) + return ( + delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + ) + + def _add_keyframe( + self, + local_pose: gtsam.Pose3, + ts: float, + body_cloud: PointCloud2, + ) -> None: + gtsam = self._gtsam + idx = len(self._key_poses) + optimized = self._world_correction.compose(local_pose) + + self._values.insert(idx, optimized) + + if idx == 0: + noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) + self._graph.add(gtsam.PriorFactorPose3(idx, optimized, noise)) + else: + last_local = self._key_poses[-1].local + between = last_local.inverse().compose(local_pose) + # When the prior keyframe is already in a drifted region, + # loosen the chain variance for this step so loop closures + # can pull harder across the disturbance. + prev = self._key_poses[-1] + prev_drift = float( + np.linalg.norm( + np.asarray(prev.optimized.translation()) - np.asarray(prev.local.translation()) + ) + ) + trans_var = 8e-4 if prev_drift > 1.3 else 1e-4 + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([1e-6, 1e-6, 1e-6, trans_var, trans_var, 1e-6]) + ) + self._graph.add(gtsam.BetweenFactorPose3(idx - 1, idx, between, noise)) + + self._key_poses.append( + _KeyPose( + local=local_pose, + optimized=optimized, + timestamp=ts, + body_cloud=body_cloud, + ) + ) + + def _get_submap(self, idx: int, half_range: int) -> PointCloud2: + lo = max(0, idx - half_range) + hi = min(len(self._key_poses) - 1, idx + half_range) + if lo > hi: + return PointCloud2() + cloud = self._key_poses[lo].body_cloud.transform( + _pose3_to_transform(self._key_poses[lo].optimized, ts=self._key_poses[lo].timestamp) + ) + for i in range(lo + 1, hi + 1): + kp = self._key_poses[i] + cloud = cloud + kp.body_cloud.transform( + _pose3_to_transform(kp.optimized, ts=kp.timestamp) + ) + return cloud.voxel_downsample(self._cfg.submap_resolution) + + def _search_for_loops( + self, + cur_idx: int | None = None, + *, + enforce_time_gate: bool = True, + opt_radius: float | None = None, + force_fallback: bool = False, + no_fallback: bool = False, + submap_half_range: int | None = None, + icp_max_dist: float | None = None, + source_half_range: int = 0, + candidates_per_iter: int | None = None, + force_multi: bool = False, + time_thresh_override: float | None = None, + ) -> None: + if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: + return + if cur_idx is None: + cur_idx = len(self._key_poses) - 1 + + cur_kp = self._key_poses[cur_idx] + cur_ts = cur_kp.timestamp + if enforce_time_gate and ( + self._last_loop_ts is not None + and cur_ts - self._last_loop_ts < self._cfg.min_loop_detect_duration + ): + return + + cur_opt_t = np.asarray(cur_kp.optimized.translation()) + cur_loc_t = np.asarray(cur_kp.local.translation()) + + from scipy.spatial import KDTree + + # Search both optimized and local (odom) frames; exclude cur_idx + # from the candidate pool (and time-adjacent neighbors via the + # time gate filter inside _collect). + other_kps = [kp for j, kp in enumerate(self._key_poses) if j != cur_idx] + other_idx_map = [j for j in range(len(self._key_poses)) if j != cur_idx] + opt_positions = np.array([np.asarray(kp.optimized.translation()) for kp in other_kps]) + loc_positions = np.array([np.asarray(kp.local.translation()) for kp in other_kps]) + + def _collect(local_idxs: set[int], time_thresh: float) -> list[tuple[float, int]]: + # local_idxs index into other_kps; map back to global keyframe index. + cands = [] + for li in local_idxs: + gi = other_idx_map[li] + if abs(cur_ts - self._key_poses[gi].timestamp) <= time_thresh: + continue + d_opt = float( + np.linalg.norm( + np.asarray(self._key_poses[gi].optimized.translation()) - cur_opt_t + ) + ) + d_loc = float( + np.linalg.norm(np.asarray(self._key_poses[gi].local.translation()) - cur_loc_t) + ) + cands.append((min(d_opt, d_loc), gi)) + cands.sort() + return cands + + # Tight optimized-frame search first; if none survive the + # time-gap filter AND accumulated drift is large enough that we + # likely need a relocalization, fall back to wider local-frame + # search with a stricter time gap. + opt_radius_val = opt_radius if opt_radius is not None else self._cfg.loop_search_radius + opt_idxs = set(KDTree(opt_positions).query_ball_point(cur_opt_t, opt_radius_val)) + time_thresh = ( + time_thresh_override if time_thresh_override is not None else self._cfg.loop_time_thresh + ) + candidates = _collect(opt_idxs, time_thresh) + drift = float(np.linalg.norm(cur_opt_t - cur_loc_t)) + relocalizing = False + if not no_fallback and ( + force_fallback or (not candidates and drift > self._cfg.loop_fallback_drift_thresh) + ): + loc_idxs = set( + KDTree(loc_positions).query_ball_point( + cur_loc_t, self._cfg.loop_search_radius_local + ) + ) + candidates = _collect(loc_idxs - opt_idxs, self._cfg.loop_time_thresh_local) + relocalizing = True + logger.info( + "loop fallback fired", + cur_idx=cur_idx, + drift=round(drift, 2), + n_candidates=len(candidates), + ) + if not candidates: + return + + # Try top-K candidates; keep all that pass the threshold (for + # relocalization fallback, multiple loops to different points in + # the past give the optimizer more constraint to anchor the + # disturbed segment). For the regular non-fallback path keep + # only the best one — the trajectory there is well-constrained + # already and extra loops add noise. Early-exit on a very tight + # match to save redundant ICP runs in the common case. + source = self._get_submap(cur_idx, source_half_range) + accepted: list[tuple[int, Transform, float]] = [] + k = ( + candidates_per_iter + if candidates_per_iter is not None + else self._cfg.loop_candidates_per_iter + ) + for _, loop_idx in candidates[:k]: + target_hr = ( + submap_half_range + if submap_half_range is not None + else self._cfg.loop_submap_half_range + ) + target = self._get_submap(loop_idx, target_hr) + max_dist = ( + icp_max_dist if icp_max_dist is not None else self._cfg.max_icp_correspondence_dist + ) + icp_tf, fitness = _icp( + source, + target, + max_iter=self._cfg.max_icp_iterations, + max_dist=max_dist, + min_inliers=self._cfg.min_icp_inliers, + ) + if fitness <= self._cfg.loop_score_thresh: + accepted.append((loop_idx, icp_tf, fitness)) + if ( + not relocalizing + and not force_multi + and fitness <= self._cfg.loop_score_thresh_tight + ): + break + if not accepted: + return + accepted.sort(key=lambda x: x[2]) + keep = accepted if (relocalizing or force_multi) else [accepted[0]] + + for loop_idx, icp_tf, fitness in keep: + # icp_tf takes cur_kp.optimized -> refined pose (correcting the drift). + # offset = loop_kp.optimized^-1 * refined = relative pose from loop to cur. + icp_pose = _transform_to_pose3(icp_tf) + refined = icp_pose.compose(cur_kp.optimized) + offset = self._key_poses[loop_idx].optimized.between(refined) + + self._pending_loops.append( + _LoopPair( + source=cur_idx, + target=loop_idx, + offset=offset, + score=fitness, + relocalizing=relocalizing, + ) + ) + logger.info( + "Loop closure detected", + source=cur_idx, + target=loop_idx, + score=round(fitness, 4), + reloc=relocalizing, + ) + self._last_loop_ts = cur_ts + + def _smooth_and_update(self) -> None: + gtsam = self._gtsam + has_loop = bool(self._pending_loops) + + for pair in self._pending_loops: + # Pose3 noise is [rx, ry, rz, x, y, z]. The two halves have + # different units (rad² vs m²), so a uniform variance silently + # makes one half pathological. Use ICP fitness as the + # *translation* variance and a generous fixed rotation variance + # — loops shouldn't be trusted to fix rotation tightly without + # normals + p2plane. + trans_var = max(0.005, float(pair.score)) # >= sigma_trans ~7 cm + rot_var = 0.003 # sigma_rot ~ 3.1 deg (large submap p2plane ICP, tight rotation) + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) + ) + self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise)) + self._accepted_loops.extend(self._pending_loops) + self._pending_loops.clear() + + self._isam2.update(self._graph, self._values) + self._isam2.update() + if has_loop: + for _ in range(self._cfg.loop_closure_extra_iterations): + self._isam2.update() + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + estimates = self._isam2.calculateBestEstimate() + for i in range(len(self._key_poses)): + self._key_poses[i].optimized = estimates.atPose3(i) + + last = self._key_poses[-1] + self._world_correction = last.optimized.compose(last.local.inverse()) + + +def _pose3_to_transform( + pose: gtsam.Pose3, + *, + ts: float, + frame_id: str = "", + child_frame_id: str = "", +) -> Transform: + """PGO-internal: build a Transform from a Pose3.""" + t = np.asarray(pose.translation()) + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(pose.rotation().matrix()), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + + +def _transform_to_pose3(tf: Transform) -> gtsam.Pose3: + """PGO-internal: build a Pose3 from a Transform.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + return gtsam.Pose3(tf.to_matrix()) + + +def _icp( + source: PointCloud2, + target: PointCloud2, + max_iter: int = 50, + max_dist: float = 1.0, + tol: float = 1e-6, + min_inliers: int = 10, + init: Transform | None = None, +) -> tuple[Transform, float]: + """Point-to-plane ICP using Open3D's tensor pipeline. + + Returns ``(tf, fitness)`` where ``fitness`` is mean squared inlier + distance (m^2) — the GTSAM noise model in `_smooth_and_update` uses + this directly as sigma_trans squared. On rejection (too few points or + zero correspondences) returns the identity transform and inf fitness. + """ + + if len(source) < min_inliers or len(target) < min_inliers: + return Transform.identity(), float("inf") + + src_pcd = source.pointcloud_tensor + tgt_pcd = target.pointcloud_tensor + + # Normals on the target enable point-to-plane ICP — converges tighter + # than point-to-point on indoor scenes (walls give unambiguous normals + # that resolve the slide-along-wall ambiguity). + tgt_pcd.estimate_normals(max_nn=30, radius=0.3) + + device = src_pcd.device + init_T = o3c.Tensor( + init.to_matrix() if init is not None else np.eye(4), + dtype=o3c.float64, + device=device, + ) + + # Silence Open3D's "0 correspondence" warning — we deliberately use a + # tight max_correspondence_distance and reject loops with poor fitness. + # Treat singular-system failures as rejection rather than aborting. + try: + with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): + result = o3d.t.pipelines.registration.icp( + source=src_pcd, + target=tgt_pcd, + max_correspondence_distance=max_dist, + init_source_to_target=init_T, + estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), + criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( + relative_fitness=tol, + relative_rmse=tol, + max_iteration=max_iter, + ), + ) + except RuntimeError: + return Transform.identity(), float("inf") + + if float(result.fitness) == 0.0: + return Transform.identity(), float("inf") + + T_mat = result.transformation.numpy() + rmse = float(result.inlier_rmse) + return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse diff --git a/dimos/mapping/loop_closure/autoresearch/program.md b/dimos/mapping/loop_closure/autoresearch/program.md new file mode 100644 index 0000000000..965b493e5b --- /dev/null +++ b/dimos/mapping/loop_closure/autoresearch/program.md @@ -0,0 +1,221 @@ +# loop-closure autoresearch + +Adapted from [karpathy/autoresearch](https://github.com/karpathy/autoresearch). +The agent iteratively tunes ICP / pose-graph code to minimize marker +position drift across recorded sessions. + +| nanochat | here | +|---|---| +| `train.py` (edit) | `pgo.py` | +| `prepare.py` (read-only) | `eval.py` | +| `val_bpb` (lower better) | `TOTAL_SPREAD` (lower better) | +| 5-min wall budget | data-bounded, ~2 min sequential | + +## Setup + +1. **Agree on a run tag**: propose one based on today's date (e.g. + `mar5`). The branch `autoresearch/loopclose-` must not already + exist — this is a fresh run. +2. **Create the branch**: `git checkout -b autoresearch/loopclose-` + from current `main`. +3. **Read the in-scope files**: + - `dimos/mapping/loop_closure/pgo.py` — PGO + ICP + loop detection. + This is the file you edit. + - `dimos/mapping/loop_closure/eval.py` — the eval harness. Read it to + understand the metric; do not modify. + - `dimos/perception/fiducial/marker_transformer.py` — fiducial + detector. Held fixed for fair comparison; do not modify. +4. **Verify LFS data**: the first eval run will pull + `hk_village1..6.db` from LFS automatically via `get_data`. If LFS + isn't configured, tell the human to run `git lfs install && git lfs pull`. +5. **Initialize `results.tsv`**: header row only. The baseline goes in + after the first eval. +6. **Confirm and go**: confirm setup looks good. + +Once you get confirmation, kick off experimentation. + +## Experimentation + +**What you CAN do:** +- Modify `dimos/mapping/loop_closure/pgo.py` — anything in this file is + fair game: `PGOConfig` defaults, loop-detection logic, ICP setup, + GTSAM noise models, keyframe selection rules, the inner classes, + whatever. + +**What you CANNOT do:** +- Modify `eval.py` or any of its dependencies (`marker_transformer.py`, + detection types, the recordings). The eval harness is the ground + truth metric. +- Change the **public surface** of `pgo.py`. The signatures of + `pgo_keyframes`, `keyframes_to_corrections`, `make_interpolator`, and + `apply_corrections` must stay the same so `eval.py` (and downstream + consumers) keep working. You can change their internals freely. +- Add new dependencies or modify other files in the repo. + +**The goal: lowest `TOTAL_SPREAD` (meters).** Secondary: lower +`WALL_TIME` (seconds) for ties. The metric is per-recording sum of +pairwise distances between PGO-corrected marker positions for the same +`marker_id`, summed across all six `hk_village*` recordings. Smaller = +tighter loop closures = the same physical marker is placed in the same +world spot every time the tracker re-acquires it. + +**Simplicity criterion**: All else being equal, simpler wins. A small +spread improvement that adds 30 lines of GTSAM hackery is probably not +worth it. Deleting code and getting equal or better results is a great +outcome. + +**The first run**: establish the baseline with `pgo.py` unmodified. + +## Investigation is part of the loop + +Twiddling knobs blindly is the slow way. Between experiments, **look at +the actual data** — this is encouraged, not a detour from "real" work. +A single discard doesn't mean a direction is dead; it means *this +specific implementation* didn't help. Before abandoning an idea, +investigate *why* the metric moved (or didn't): + +- **Run per-recording with `--verbose`** to see live loop-closure events + with scores and source/target keyframe indices: + `uv run python -m dimos.mapping.loop_closure.eval hk_village2 -v`. + A param change that, say, *adds* loop closures but worsens spread + tells a very different story than one that *removes* them. +- **Use `dimos/utils/cli/map.py`** as the visualization companion. It + runs the same PGO + marker pipeline as the eval and renders the + result in rerun. Use `--no-gui` for headless inspection or run with + GUI locally to scrub the timeline and see where markers cluster, where + the path drifts, when loop closures fire: + `uv run dimos map hk_village2 --pgo --markers --no-gui` +- **Write small throwaway scripts** in `/tmp` to interrogate state that + the eval doesn't surface — per-marker spread breakdown, loop closure + score distribution, keyframe density over time, etc. The eval is + intentionally one number for tuning; the script lets you SEE. +- **Read `pgo.py`** end-to-end before changing a knob whose effect you + can't predict. The docstrings and inline comments explain the + invariants. Same for `marker_transformer.py` if you're not sure what + `track_id` / `marker_id` mean. +- **Form a theory, then test it.** "If I raise `min_icp_inliers`, I + expect fewer accepted closures but tighter mean_score and possibly + worse spread because the graph has fewer constraints." Then run, and + *check whether the observed change matches your prediction.* When it + doesn't, that's a learning, not a failure. + +Investigation runs don't count toward the experiment log unless you +also commit a `pgo.py` change. They cost a few minutes; the +understanding compounds across the rest of the night. + +## Running the eval + +```bash +uv run python -m dimos.mapping.loop_closure.eval > run.log 2>&1 +grep "^TOTAL_\|^WALL_" run.log +``` + +Output trailer looks like: + +``` +TOTAL_PGO_TIME=37.86 +TOTAL_SPREAD=48.811 ← primary metric, lower is better +TOTAL_LOOPS=42 +TOTAL_LOOP_SCORE_MEAN=0.0584 +TOTAL_KEYFRAMES=720 +WALL_TIME=115.82 ← secondary metric, lower is better +``` + +## Logging results + +Log every experiment to `results.tsv` (tab-separated). Do **not** +commit this file — it's per-branch scratch state, in `.gitignore`-style +spirit. The header row plus seven columns: + +``` +commit spread_m wall_s n_loops mean_score n_keyframes status description +``` + +1. git commit hash (short, 7 chars) +2. `TOTAL_SPREAD` — use `0.000000` for crashes +3. `WALL_TIME` — use `0.0` for crashes +4. `TOTAL_LOOPS` +5. `TOTAL_LOOP_SCORE_MEAN` (`.4f`) +6. `TOTAL_KEYFRAMES` +7. status: `keep`, `discard`, or `crash` +8. one-line description of what this experiment tried + +Example: + +``` +commit spread_m wall_s n_loops mean_score n_keyframes status description +a1b2c3d 48.811 115.82 42 0.0584 720 keep baseline +b2c3d4e 43.220 118.31 55 0.0501 720 keep lower loop_score_thresh 0.1 -> 0.05 +c3d4e5f 47.118 119.04 30 0.0623 720 discard raise min_icp_inliers 50 -> 200 +d4e5f6g 0.000000 0.0 0 0.0000 0 crash drop GTSAM, use simple averaging +``` + +## The experiment loop + +The experiment runs on a dedicated branch (e.g. +`autoresearch/loopclose-mar5` or +`autoresearch/loopclose-mar5-gpu0`). + +LOOP FOREVER: + +1. Look at the git state: the current branch and commit. +2. Tune `pgo.py` with an experimental idea by editing the code + directly. +3. `git commit -am ""` +4. Run the eval: + `uv run python -m dimos.mapping.loop_closure.eval > run.log 2>&1` + (redirect everything — do NOT `tee` and don't dump the file into + your context). +5. Read the result: + `grep "^TOTAL_\|^WALL_" run.log` +6. If the grep output is empty, the run crashed. Run + `tail -n 80 run.log` to see the Python traceback and either fix it + (typo, missing import) or revert (idea fundamentally broken). +7. Record the result in `results.tsv` (keep it untracked). +8. Decision: + - `TOTAL_SPREAD` strictly **lower** than the current branch tip → + `keep` (advance the branch). + - `TOTAL_SPREAD` within ±0.5% of current AND `WALL_TIME` lower → + `keep` (compute simplification). + - Otherwise → `discard` (`git reset --hard HEAD~1`). + +You're a fully autonomous researcher. If something works, keep it. If +it doesn't, revert. The branch advances over time, and you iterate. + +**Timeout**: each eval should take ~2 minutes wall. If a run exceeds +10 minutes, kill it and treat it as a failure. + +**Crashes**: If a run crashes (GTSAM error, Open3D ICP failure, +import error, etc.), use judgment. Typos and missing imports → fix and +re-run. Fundamentally broken idea → log `crash` and move on. + +**NEVER STOP**: Once the loop has begun, do NOT pause to ask the human +if you should continue. Don't ask "should I keep going?" or "is this a +good stopping point?". The human might be asleep or away. You are +autonomous; the loop runs until you are manually stopped. + +**Don't give up on a direction after one bad result.** A single +`discard` is one data point about *one specific implementation*, not a +verdict on the underlying idea. If you tried "stricter ICP gating" and +it worsened spread, that doesn't mean ICP gating is the wrong knob — +maybe your value was wrong, maybe it interacted with another setting, +maybe the effect was on a recording you didn't expect. **Investigate +before you abandon** (see the Investigation section above): inspect the +per-recording numbers, run `dimos map ... --markers --no-gui` to see +where the spread actually comes from, write a 20-line script to print +loop-closure scores per recording. Often the next experiment in a +"failed" direction is the one that pays off, once you understand the +mechanism. + +**If you run out of ideas**: re-read `pgo.py` for angles you haven't +touched, look at the papers/code links in its docstrings, try +combining previous near-misses, try more radical changes (different +loop-detection radius scaling, different ICP estimator, swapping GTSAM +optimizer, etc.). Look at the recordings themselves — `dimos map +hk_villageN --pgo --markers` gives you ground truth for what the data +looks like. + +A typical use case: human leaves you running while they sleep. At +~2 min/eval you can run ~30/hour, or roughly 240 experiments over an +8-hour night. They wake up to a `results.tsv` full of attempts and a +branch advanced to the best. From 8c26042c0a9a7d8e820528dd4f6dbe0499bbe8c5 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 13:18:36 +0800 Subject: [PATCH 45/64] recordings, autoresearched pgo --- data/.lfs/go2dds_data1.tar.gz | 3 + ...go2_mid360_2026-05-29_4-45pm-PST.db.tar.gz | 3 + dimos/mapping/loop_closure/pgo_auto.py | 854 ++++++++++++++++++ 3 files changed, 860 insertions(+) create mode 100644 data/.lfs/go2dds_data1.tar.gz create mode 100644 data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST.db.tar.gz create mode 100644 dimos/mapping/loop_closure/pgo_auto.py diff --git a/data/.lfs/go2dds_data1.tar.gz b/data/.lfs/go2dds_data1.tar.gz new file mode 100644 index 0000000000..f44d478ad5 --- /dev/null +++ b/data/.lfs/go2dds_data1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd237ce49cef1f96074ba02576b9a8094db6d6e0dc9ca24b63e5fdaa84acaf2 +size 224684671 diff --git a/data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST.db.tar.gz b/data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST.db.tar.gz new file mode 100644 index 0000000000..d30906b058 --- /dev/null +++ b/data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST.db.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b89c53b5a32c9b262b821904031a66143132464e23413ff3f77fad98d363790 +size 1384605823 diff --git a/dimos/mapping/loop_closure/pgo_auto.py b/dimos/mapping/loop_closure/pgo_auto.py new file mode 100644 index 0000000000..d4ac46ef65 --- /dev/null +++ b/dimos/mapping/loop_closure/pgo_auto.py @@ -0,0 +1,854 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""PGO drift corrections as composable Stream stages. + +A lidar/odom stream comes in with poses that drift over time — the robot's +estimate of where it is in the world slowly diverges from ground truth as +small per-frame errors accumulate. PGO ("pose graph optimization") detects +revisited places via loop closure and pulls the trajectory back into +self-consistency. The result is a stream of *corrections*: a Transform per +keyframe that maps every drifted ("raw") pose to its corrected one. + +Pipeline: + + lidar: Stream[PointCloud2] # pose-stamped scans + -> pgo_keyframes(...) -> Stream[Keyframe] # one per kf + -> keyframes_to_corrections(...) -> Stream[Transform] # world_corrected <- world_raw + -> apply_corrections(any_stream, corrections) -> Stream[T] # obs.pose shuffled + +`pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting +each kept keyframe with both its raw odom pose (`local`) and its optimized +pose (`optimized`) as Transforms. Pass `loop_closures_out=[]` to also +collect each accepted ICP loop edge as a `LoopClosure` (source+target +optimized poses + ICP fitness) — useful for visualizing the pose graph. + +`keyframes_to_corrections` is pure arithmetic: per keyframe, the drift +correction is `optimized @ local^-1` (Transform composition). The resulting +stream has one entry per keyframe — sparse in time. + +`make_interpolator` materializes the sparse correction stream into a +ts -> Transform function: SLERP for rotation, linear lerp for translation, +clipping out-of-range queries to the endpoints. + +`apply_corrections` shuffles `obs.pose` on any stream using the interpolated +correction at each obs.ts. It's read-only on `obs.data`, so the same lidar +stream can be re-applied — or a different recording from the same physical +space, if the corrections generalize. + +`gtsam` is imported lazily inside hot helpers so importing this module +stays cheap and gtsam-free for consumers that only need `Keyframe` / +`apply_corrections`. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterator +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack + +import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import open3d.core as o3c # type: ignore[import-untyped] +from scipy.spatial.transform import Rotation + +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.protocol.service.spec import BaseConfig +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + import gtsam # type: ignore[import-not-found,import-untyped] + +T = TypeVar("T") + +FRAME_WORLD_CORRECTED = "world_corrected" +FRAME_WORLD_RAW = "world_raw" +FRAME_BODY = "body" + +logger = setup_logger() + + +class PGOConfig(BaseConfig): + # Keyframe detection + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + + # Loop closure + loop_search_radius: float = 2.0 + loop_search_radius_local: float = 15.0 + loop_fallback_drift_thresh: float = 0.5 # min drift to trigger local fallback + loop_time_thresh: float = 20.0 + loop_time_thresh_local: float = 40.0 # stricter time gap for local fallback + loop_score_thresh: float = 0.3 + loop_score_thresh_tight: float = 0.05 # early-exit threshold: ICP rmse ~22cm + loop_submap_half_range: int = 40 + loop_candidates_per_iter: int = 7 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 2 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 3.0 + + # ICP + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 0.5 + + +class PGOKwargs(TypedDict, total=False): + """Typed kwargs for `pgo_keyframes`; mirrors `PGOConfig` fields.""" + + key_pose_delta_trans: float + key_pose_delta_deg: float + loop_search_radius: float + loop_search_radius_local: float + loop_fallback_drift_thresh: float + loop_time_thresh: float + loop_time_thresh_local: float + loop_score_thresh: float + loop_score_thresh_tight: float + loop_submap_half_range: int + loop_candidates_per_iter: int + min_icp_inliers: int + min_keyframes_for_loop_search: int + loop_closure_extra_iterations: int + submap_resolution: float + min_loop_detect_duration: float + max_icp_iterations: int + max_icp_correspondence_dist: float + + +@dataclass(frozen=True) +class Keyframe: + """Keyframe emitted by `pgo_keyframes`. + + `local` is the body pose in the odom (`world_raw`) frame, `optimized` is + the body pose in the drift-corrected (`world_corrected`) frame. + """ + + ts: float + local: Transform + optimized: Transform + + +@dataclass(frozen=True) +class LoopClosure: + """An accepted ICP loop-closure edge in the pose graph. + + `source` and `target` are the final optimized body poses (in + `world_corrected`) of the two keyframes the loop connected. `score` is + the ICP fitness used as the loop's translation variance — lower is a + tighter match. + """ + + source: Transform + target: Transform + score: float + + +def pgo_keyframes( + stream: Stream[PointCloud2], + *, + on_frame: Callable[[Any], None] | None = None, + loop_closures_out: list[LoopClosure] | None = None, + **pgo_cfg: Unpack[PGOKwargs], +) -> Stream[Keyframe]: + """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe. + + If `loop_closures_out` is provided, accepted loop edges are appended to + it (in detection order) using each keyframe's final optimized pose. + """ + cfg = PGOConfig(**pgo_cfg) + pgo = _PGO(cfg) + + for obs in stream: + if on_frame is not None: + on_frame(obs) + pose = obs.pose_tuple + if pose is None: + continue + # Skip placeholder poses (origin position OR zero quaternion). + if pose[0] == 0 and pose[1] == 0 and pose[2] == 0: + continue + if pose[3] == 0 and pose[4] == 0 and pose[5] == 0 and (pose[6] == 0 or pose[6] == 1): + continue + local_pose = _obs_to_pose3(obs) + pgo.process(local_pose, obs.ts, obs.data) + + mem = MemoryStore() + out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) + for kf in pgo.finalize(): + out.append(kf, ts=kf.ts) + if loop_closures_out is not None: + loop_closures_out.extend(pgo.loop_closures()) + return out + + +def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: + """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" + mem = MemoryStore() + out: Stream[Transform] = mem.stream("corrections", Transform) + for obs in keyframes: + kf = obs.data + # world_corrected <- body <- world_raw composes to world_corrected <- world_raw. + drift = kf.optimized + kf.local.inverse() + out.append(drift, ts=kf.ts) + return out + + +def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: + """Materialize corrections once; return a fast ts -> Transform lookup.""" + ts_list: list[float] = [] + R_list: list[np.ndarray] = [] + t_list: list[np.ndarray] = [] + for obs in corrections: + R, t = _r_t_from_transform(obs.data) + # obs.ts is authoritative; obs.data.ts can be mutated by Transform's + # ts=0.0 -> time.time() fallback in its constructor. + ts_list.append(obs.ts) + R_list.append(R) + t_list.append(t) + + if not ts_list: + raise ValueError("empty corrections stream") + + # Pad len==1 with a duplicate so the general path handles it; + # clip-to-endpoints behavior is unchanged. + if len(ts_list) == 1: + ts_list.append(ts_list[0] + 1e-6) + R_list.append(R_list[0]) + t_list.append(t_list[0]) + + ts_arr = np.array(ts_list) + R_stack = np.stack(R_list) + t_stack = np.stack(t_list) + + def interp(ts: float) -> Transform: + ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) + idx = int(np.searchsorted(ts_arr, ts_clip)) + if idx == 0: + t = t_stack[0] + R = R_stack[0] + elif idx >= len(ts_arr): + t = t_stack[-1] + R = R_stack[-1] + else: + t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] + # Nearest-neighbor in time: avoids blending two adjacent + # corrections that may straddle a loop-closure update. + near_lo = (ts_clip - t_lo) < (t_hi - ts_clip) + t = t_stack[idx - 1] if near_lo else t_stack[idx] + R = R_stack[idx - 1] if near_lo else R_stack[idx] + return _transform_from_r_t(R, t, ts=float(ts)) + + return interp + + +def apply_corrections( + stream: Stream[T], + corrections: Stream[Transform], +) -> Stream[T]: + """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. + + `obs.data` is untouched. Frames with `obs.pose is None` pass through + unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). + """ + interp = make_interpolator(corrections) + + def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + for obs in upstream: + if obs.pose is None: + yield obs + continue + raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped) + # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. + # Observation normalizes Transform back to 7-tuple via __post_init__. + corrected = interp(obs.ts) + raw_tf + yield obs.derive(data=obs.data, pose=corrected) + + return stream.transform(xf) + + +class PoseGraph: + """OOP view over PGO output — optimized keyframes, loop edges, and the + drift correction they imply. + + Wraps the functional pipeline (`pgo_keyframes` → `keyframes_to_corrections` + → `make_interpolator`) behind one object for callers that want a single + handle (e.g. the `dimos map` tool). + """ + + def __init__( + self, + keyframes: tuple[Keyframe, ...], + loops: tuple[LoopClosure, ...], + correct: Callable[[float], Transform], + ) -> None: + self.keyframes = keyframes + self.loops = loops + self._correct = correct + + def correction_at(self, ts: float) -> Transform: + """Drift correction (world_corrected <- world_raw) at `ts`.""" + return self._correct(ts) + + def correct(self, pose: Transform) -> Transform: + """Lift a world_raw pose into world_corrected.""" + return self._correct(pose.ts) + pose + + +class PGO: + """Transformer that runs PGO over a pose-stamped cloud stream and emits a + single `PoseGraph` observation. Thin OOP wrapper over `pgo_keyframes`. + + Use as ``stream.transform(PGO()).last().data``. + """ + + def __init__(self, **pgo_cfg: Unpack[PGOKwargs]) -> None: + self._cfg = pgo_cfg + + def __call__( + self, upstream: Iterator[Observation[PointCloud2]] + ) -> Iterator[Observation[PoseGraph]]: + loops: list[LoopClosure] = [] + kfs = pgo_keyframes(upstream, loop_closures_out=loops, **self._cfg) + keyframes = tuple(obs.data for obs in kfs) + if keyframes: + correct = make_interpolator(keyframes_to_corrections(kfs)) + last_ts = keyframes[-1].ts + else: + # No valid keyframes (e.g. pose-less input): correction is identity. + correct = lambda ts: Transform( # noqa: E731 + frame_id=FRAME_WORLD_CORRECTED, child_frame_id=FRAME_WORLD_RAW, ts=ts + ) + last_ts = 0.0 + yield Observation(ts=last_ts, _data=PoseGraph(keyframes, tuple(loops), correct)) + + +def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: + q = tf.rotation + R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() + t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + return R, t + + +def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform: + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ts=ts, + ) + + +def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: + """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + if obs.pose_tuple is None: + raise LookupError("No pose set on this observation") + x, y, z, qx, qy, qz, qw = obs.pose_tuple + return gtsam.Pose3( + gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), + gtsam.Point3(float(x), float(y), float(z)), + ) + + +@dataclass +class _KeyPose: + local: gtsam.Pose3 # odom-frame pose at capture + optimized: gtsam.Pose3 # drift-corrected pose + timestamp: float + body_cloud: PointCloud2 # voxel-downsampled, body frame + + +@dataclass +class _LoopPair: + source: int + target: int + offset: gtsam.Pose3 # source pose in target's frame + score: float + relocalizing: bool = False # True if this loop was found via local-frame fallback + + +class _PGO: + """Incremental PGO: gtsam ISAM2 over keyframes with ICP loop closures. + + Call `process` per frame (odom pose + body-frame points). Call + `finalize` once at the end for the sorted, deduped keyframe list. + """ + + def __init__(self, config: PGOConfig) -> None: + import gtsam # type: ignore[import-not-found,import-untyped] + + self._gtsam = gtsam + self._cfg = config + self._key_poses: list[_KeyPose] = [] + self._pending_loops: list[_LoopPair] = [] + self._accepted_loops: list[_LoopPair] = [] + self._last_loop_ts: float | None = None + self._world_correction: gtsam.Pose3 = gtsam.Pose3() # identity + + params = gtsam.ISAM2Params() + params.setRelinearizeThreshold(0.01) + params.relinearizeSkip = 1 + params.setOptimizationParams(gtsam.ISAM2DoglegParams()) + self._isam2 = gtsam.ISAM2(params) + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + def process( + self, + local_pose: gtsam.Pose3, + ts: float, + world_cloud: PointCloud2, + ) -> None: + if len(world_cloud) == 0: + return + if not self._is_keyframe(local_pose): + return + # Unregister: lift world-frame scan back into body frame using its + # odom pose, so PGO can re-project it via the optimized pose later. + body_cloud = world_cloud.transform( + _pose3_to_transform(local_pose.inverse(), ts=ts) + ).voxel_downsample(self._cfg.submap_resolution) + self._add_keyframe(local_pose, ts, body_cloud) + self._search_for_loops() + self._smooth_and_update() + + def finalize(self) -> list[Keyframe]: + """Return keyframes sorted by ts, with duplicate-ts entries dropped.""" + # Second-pass loop rescan over the converged optimized poses: + # keyframes that ended up in heavily drifted regions may have + # missed loops during incremental processing (their optimized + # pose at the moment was misleading). Re-search with the final + # poses gives us another chance to anchor those segments. + self._last_loop_ts = None + for i in range(len(self._key_poses)): + self._search_for_loops( + cur_idx=i, + enforce_time_gate=False, + no_fallback=True, + submap_half_range=38, + time_thresh_override=13.0, + ) + if self._pending_loops: + self._smooth_and_update() + kps = sorted(self._key_poses, key=lambda kp: kp.timestamp) + out: list[Keyframe] = [] + for i, kp in enumerate(kps): + if i > 0 and kp.timestamp <= kps[i - 1].timestamp: + continue + out.append( + Keyframe( + ts=kp.timestamp, + local=_pose3_to_transform( + kp.local, + ts=kp.timestamp, + frame_id=FRAME_WORLD_RAW, + child_frame_id=FRAME_BODY, + ), + optimized=_pose3_to_transform( + kp.optimized, + ts=kp.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + ) + ) + return out + + def loop_closures(self) -> list[LoopClosure]: + """Accepted loop edges, resolved to final optimized poses.""" + out: list[LoopClosure] = [] + for pair in self._accepted_loops: + src = self._key_poses[pair.source] + tgt = self._key_poses[pair.target] + out.append( + LoopClosure( + source=_pose3_to_transform( + src.optimized, + ts=src.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + target=_pose3_to_transform( + tgt.optimized, + ts=tgt.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + score=pair.score, + ) + ) + return out + + def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: + if not self._key_poses: + return True + delta = self._key_poses[-1].local.inverse().compose(local_pose) + delta_trans = float(np.linalg.norm(np.asarray(delta.translation()))) + # Rotation magnitude from trace: cos(theta) = (tr(R) - 1) / 2. + R = delta.rotation().matrix() + cos_theta = float(np.clip((np.trace(R) - 1.0) / 2.0, -1.0, 1.0)) + delta_deg = float(np.degrees(np.arccos(cos_theta))) + return ( + delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + ) + + def _add_keyframe( + self, + local_pose: gtsam.Pose3, + ts: float, + body_cloud: PointCloud2, + ) -> None: + gtsam = self._gtsam + idx = len(self._key_poses) + optimized = self._world_correction.compose(local_pose) + + self._values.insert(idx, optimized) + + if idx == 0: + noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) + self._graph.add(gtsam.PriorFactorPose3(idx, optimized, noise)) + else: + last_local = self._key_poses[-1].local + between = last_local.inverse().compose(local_pose) + # When the prior keyframe is already in a drifted region, + # loosen the chain variance for this step so loop closures + # can pull harder across the disturbance. + prev = self._key_poses[-1] + prev_drift = float( + np.linalg.norm( + np.asarray(prev.optimized.translation()) - np.asarray(prev.local.translation()) + ) + ) + trans_var = 8e-4 if prev_drift > 1.3 else 1e-4 + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([1e-6, 1e-6, 1e-6, trans_var, trans_var, 1e-6]) + ) + self._graph.add(gtsam.BetweenFactorPose3(idx - 1, idx, between, noise)) + + self._key_poses.append( + _KeyPose( + local=local_pose, + optimized=optimized, + timestamp=ts, + body_cloud=body_cloud, + ) + ) + + def _get_submap(self, idx: int, half_range: int) -> PointCloud2: + lo = max(0, idx - half_range) + hi = min(len(self._key_poses) - 1, idx + half_range) + if lo > hi: + return PointCloud2() + cloud = self._key_poses[lo].body_cloud.transform( + _pose3_to_transform(self._key_poses[lo].optimized, ts=self._key_poses[lo].timestamp) + ) + for i in range(lo + 1, hi + 1): + kp = self._key_poses[i] + cloud = cloud + kp.body_cloud.transform( + _pose3_to_transform(kp.optimized, ts=kp.timestamp) + ) + return cloud.voxel_downsample(self._cfg.submap_resolution) + + def _search_for_loops( + self, + cur_idx: int | None = None, + *, + enforce_time_gate: bool = True, + opt_radius: float | None = None, + force_fallback: bool = False, + no_fallback: bool = False, + submap_half_range: int | None = None, + icp_max_dist: float | None = None, + source_half_range: int = 0, + candidates_per_iter: int | None = None, + force_multi: bool = False, + time_thresh_override: float | None = None, + ) -> None: + if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: + return + if cur_idx is None: + cur_idx = len(self._key_poses) - 1 + + cur_kp = self._key_poses[cur_idx] + cur_ts = cur_kp.timestamp + if enforce_time_gate and ( + self._last_loop_ts is not None + and cur_ts - self._last_loop_ts < self._cfg.min_loop_detect_duration + ): + return + + cur_opt_t = np.asarray(cur_kp.optimized.translation()) + cur_loc_t = np.asarray(cur_kp.local.translation()) + + from scipy.spatial import KDTree + + # Search both optimized and local (odom) frames; exclude cur_idx + # from the candidate pool (and time-adjacent neighbors via the + # time gate filter inside _collect). + other_kps = [kp for j, kp in enumerate(self._key_poses) if j != cur_idx] + other_idx_map = [j for j in range(len(self._key_poses)) if j != cur_idx] + opt_positions = np.array([np.asarray(kp.optimized.translation()) for kp in other_kps]) + loc_positions = np.array([np.asarray(kp.local.translation()) for kp in other_kps]) + + def _collect(local_idxs: set[int], time_thresh: float) -> list[tuple[float, int]]: + # local_idxs index into other_kps; map back to global keyframe index. + cands = [] + for li in local_idxs: + gi = other_idx_map[li] + if abs(cur_ts - self._key_poses[gi].timestamp) <= time_thresh: + continue + d_opt = float( + np.linalg.norm( + np.asarray(self._key_poses[gi].optimized.translation()) - cur_opt_t + ) + ) + d_loc = float( + np.linalg.norm(np.asarray(self._key_poses[gi].local.translation()) - cur_loc_t) + ) + cands.append((min(d_opt, d_loc), gi)) + cands.sort() + return cands + + # Tight optimized-frame search first; if none survive the + # time-gap filter AND accumulated drift is large enough that we + # likely need a relocalization, fall back to wider local-frame + # search with a stricter time gap. + opt_radius_val = opt_radius if opt_radius is not None else self._cfg.loop_search_radius + opt_idxs = set(KDTree(opt_positions).query_ball_point(cur_opt_t, opt_radius_val)) + time_thresh = ( + time_thresh_override if time_thresh_override is not None else self._cfg.loop_time_thresh + ) + candidates = _collect(opt_idxs, time_thresh) + drift = float(np.linalg.norm(cur_opt_t - cur_loc_t)) + relocalizing = False + if not no_fallback and ( + force_fallback or (not candidates and drift > self._cfg.loop_fallback_drift_thresh) + ): + loc_idxs = set( + KDTree(loc_positions).query_ball_point( + cur_loc_t, self._cfg.loop_search_radius_local + ) + ) + candidates = _collect(loc_idxs - opt_idxs, self._cfg.loop_time_thresh_local) + relocalizing = True + logger.info( + "loop fallback fired", + cur_idx=cur_idx, + drift=round(drift, 2), + n_candidates=len(candidates), + ) + if not candidates: + return + + # Try top-K candidates; keep all that pass the threshold (for + # relocalization fallback, multiple loops to different points in + # the past give the optimizer more constraint to anchor the + # disturbed segment). For the regular non-fallback path keep + # only the best one — the trajectory there is well-constrained + # already and extra loops add noise. Early-exit on a very tight + # match to save redundant ICP runs in the common case. + source = self._get_submap(cur_idx, source_half_range) + accepted: list[tuple[int, Transform, float]] = [] + k = ( + candidates_per_iter + if candidates_per_iter is not None + else self._cfg.loop_candidates_per_iter + ) + for _, loop_idx in candidates[:k]: + target_hr = ( + submap_half_range + if submap_half_range is not None + else self._cfg.loop_submap_half_range + ) + target = self._get_submap(loop_idx, target_hr) + max_dist = ( + icp_max_dist if icp_max_dist is not None else self._cfg.max_icp_correspondence_dist + ) + icp_tf, fitness = _icp( + source, + target, + max_iter=self._cfg.max_icp_iterations, + max_dist=max_dist, + min_inliers=self._cfg.min_icp_inliers, + ) + if fitness <= self._cfg.loop_score_thresh: + accepted.append((loop_idx, icp_tf, fitness)) + if ( + not relocalizing + and not force_multi + and fitness <= self._cfg.loop_score_thresh_tight + ): + break + if not accepted: + return + accepted.sort(key=lambda x: x[2]) + keep = accepted if (relocalizing or force_multi) else [accepted[0]] + + for loop_idx, icp_tf, fitness in keep: + # icp_tf takes cur_kp.optimized -> refined pose (correcting the drift). + # offset = loop_kp.optimized^-1 * refined = relative pose from loop to cur. + icp_pose = _transform_to_pose3(icp_tf) + refined = icp_pose.compose(cur_kp.optimized) + offset = self._key_poses[loop_idx].optimized.between(refined) + + self._pending_loops.append( + _LoopPair( + source=cur_idx, + target=loop_idx, + offset=offset, + score=fitness, + relocalizing=relocalizing, + ) + ) + logger.info( + "Loop closure detected", + source=cur_idx, + target=loop_idx, + score=round(fitness, 4), + reloc=relocalizing, + ) + self._last_loop_ts = cur_ts + + def _smooth_and_update(self) -> None: + gtsam = self._gtsam + has_loop = bool(self._pending_loops) + + for pair in self._pending_loops: + # Pose3 noise is [rx, ry, rz, x, y, z]. The two halves have + # different units (rad² vs m²), so a uniform variance silently + # makes one half pathological. Use ICP fitness as the + # *translation* variance and a generous fixed rotation variance + # — loops shouldn't be trusted to fix rotation tightly without + # normals + p2plane. + trans_var = max(0.005, float(pair.score)) # >= sigma_trans ~7 cm + rot_var = 0.003 # sigma_rot ~ 3.1 deg (large submap p2plane ICP, tight rotation) + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) + ) + self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise)) + self._accepted_loops.extend(self._pending_loops) + self._pending_loops.clear() + + self._isam2.update(self._graph, self._values) + self._isam2.update() + if has_loop: + for _ in range(self._cfg.loop_closure_extra_iterations): + self._isam2.update() + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + estimates = self._isam2.calculateBestEstimate() + for i in range(len(self._key_poses)): + self._key_poses[i].optimized = estimates.atPose3(i) + + last = self._key_poses[-1] + self._world_correction = last.optimized.compose(last.local.inverse()) + + +def _pose3_to_transform( + pose: gtsam.Pose3, + *, + ts: float, + frame_id: str = "", + child_frame_id: str = "", +) -> Transform: + """PGO-internal: build a Transform from a Pose3.""" + t = np.asarray(pose.translation()) + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(pose.rotation().matrix()), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + + +def _transform_to_pose3(tf: Transform) -> gtsam.Pose3: + """PGO-internal: build a Pose3 from a Transform.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + return gtsam.Pose3(tf.to_matrix()) + + +def _icp( + source: PointCloud2, + target: PointCloud2, + max_iter: int = 50, + max_dist: float = 1.0, + tol: float = 1e-6, + min_inliers: int = 10, + init: Transform | None = None, +) -> tuple[Transform, float]: + """Point-to-plane ICP using Open3D's tensor pipeline. + + Returns ``(tf, fitness)`` where ``fitness`` is mean squared inlier + distance (m^2) — the GTSAM noise model in `_smooth_and_update` uses + this directly as sigma_trans squared. On rejection (too few points or + zero correspondences) returns the identity transform and inf fitness. + """ + + if len(source) < min_inliers or len(target) < min_inliers: + return Transform.identity(), float("inf") + + src_pcd = source.pointcloud_tensor + tgt_pcd = target.pointcloud_tensor + + # Normals on the target enable point-to-plane ICP — converges tighter + # than point-to-point on indoor scenes (walls give unambiguous normals + # that resolve the slide-along-wall ambiguity). + tgt_pcd.estimate_normals(max_nn=30, radius=0.3) + + device = src_pcd.device + init_T = o3c.Tensor( + init.to_matrix() if init is not None else np.eye(4), + dtype=o3c.float64, + device=device, + ) + + # Silence Open3D's "0 correspondence" warning — we deliberately use a + # tight max_correspondence_distance and reject loops with poor fitness. + # Treat singular-system failures as rejection rather than aborting. + try: + with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): + result = o3d.t.pipelines.registration.icp( + source=src_pcd, + target=tgt_pcd, + max_correspondence_distance=max_dist, + init_source_to_target=init_T, + estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), + criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( + relative_fitness=tol, + relative_rmse=tol, + max_iteration=max_iter, + ), + ) + except RuntimeError: + return Transform.identity(), float("inf") + + if float(result.fitness) == 0.0: + return Transform.identity(), float("inf") + + T_mat = result.transformation.numpy() + rmse = float(result.inlier_rmse) + return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse From f0b1aafdccfb998368c2059005831fbc6215f9ce Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 15:16:58 +0800 Subject: [PATCH 46/64] map CLI: consolidate verbs, add pose-fill + snippet windowing Gather the `dimos map` toolset into one package and round it out: - Rename `reconstruct` -> `global`; add `rename`, `pose-fill`, `replay` (map_rrd), and `replay-marker` (markers_rrd) as first-class verbs. - Move the six verb modules into dimos/mapping/utils/ and rename each file to match its verb (globalmap, summary, rename, pose_fill, replay, replay_marker). Heavy imports deferred into each main() so `dimos --help` stays fast. - `global`/`replay`/`replay-marker` now write a .rrd (default ./.rrd) and auto-launch rerun unless --no-gui. - Add Stream.clip(seek, duration) and wire --seek/--duration into every recording-consuming verb so you can operate on snippets. `global` also gains --lidar to pick the lidar stream. - Add subprocess integration tests over go2_short using a short snippet; they caught and fixed a pose vs pose_tuple bug in replay-marker. --- .../loop_closure/autoresearch/program.md | 2 +- dimos/mapping/loop_closure/utils/autotrim.py | 2 +- .../cli/map.py => mapping/utils/globalmap.py} | 282 ++++++++++-------- .../{loop_closure => }/utils/pose_fill.py | 67 +++-- .../rename_streams.py => utils/rename.py} | 14 +- .../utils/map_rrd.py => utils/replay.py} | 71 +++-- .../markers_rrd.py => utils/replay_marker.py} | 57 ++-- .../{loop_closure => }/utils/summary.py | 2 +- dimos/memory2/stream.py | 11 + dimos/robot/cli/dimos.py | 14 +- dimos/robot/cli/test_map_cli.py | 161 ++++++++++ 11 files changed, 479 insertions(+), 204 deletions(-) rename dimos/{utils/cli/map.py => mapping/utils/globalmap.py} (71%) rename dimos/mapping/{loop_closure => }/utils/pose_fill.py (74%) rename dimos/mapping/{loop_closure/utils/rename_streams.py => utils/rename.py} (90%) rename dimos/mapping/{loop_closure/utils/map_rrd.py => utils/replay.py} (81%) rename dimos/mapping/{loop_closure/utils/markers_rrd.py => utils/replay_marker.py} (81%) rename dimos/mapping/{loop_closure => }/utils/summary.py (96%) create mode 100644 dimos/robot/cli/test_map_cli.py diff --git a/dimos/mapping/loop_closure/autoresearch/program.md b/dimos/mapping/loop_closure/autoresearch/program.md index 965b493e5b..55791f748d 100644 --- a/dimos/mapping/loop_closure/autoresearch/program.md +++ b/dimos/mapping/loop_closure/autoresearch/program.md @@ -79,7 +79,7 @@ investigate *why* the metric moved (or didn't): `uv run python -m dimos.mapping.loop_closure.eval hk_village2 -v`. A param change that, say, *adds* loop closures but worsens spread tells a very different story than one that *removes* them. -- **Use `dimos/utils/cli/map.py`** as the visualization companion. It +- **Use `dimos/mapping/utils/globalmap.py`** as the visualization companion. It runs the same PGO + marker pipeline as the eval and renders the result in rerun. Use `--no-gui` for headless inspection or run with GUI locally to scrub the timeline and see where markers cluster, where diff --git a/dimos/mapping/loop_closure/utils/autotrim.py b/dimos/mapping/loop_closure/utils/autotrim.py index fecdbea63f..1eb16bc153 100644 --- a/dimos/mapping/loop_closure/utils/autotrim.py +++ b/dimos/mapping/loop_closure/utils/autotrim.py @@ -45,7 +45,7 @@ def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: - """Matches dimos/utils/cli/map.py:progress — kept inline to avoid pulling rerun.""" + """Matches dimos/mapping/utils/globalmap.py:progress — kept inline to avoid pulling rerun.""" seen = 0 wall_start: float | None = None last_wall: float | None = None diff --git a/dimos/utils/cli/map.py b/dimos/mapping/utils/globalmap.py similarity index 71% rename from dimos/utils/cli/map.py rename to dimos/mapping/utils/globalmap.py index c162a015ef..57a55b760a 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/mapping/utils/globalmap.py @@ -17,6 +17,7 @@ from collections.abc import Callable, Iterable import math from pathlib import Path +import subprocess import time from typing import TYPE_CHECKING, Any @@ -160,8 +161,136 @@ def _progress(obs: Observation[Any]) -> None: return _progress +def _log_reconstruction( + *, + voxel: float, + global_map: PointCloud2 | None, + path: list[tuple[float, float, float]], + pgo_map: PointCloud2 | None, + full_pgo_map: PointCloud2 | None, + pgo_path: list[tuple[float, float, float]], + graph: PoseGraph | None, + marker_dets: list[Observation[Any]], + marker_size: float, +) -> None: + """Log maps, paths, the PGO graph, and markers to the active rerun recording.""" + from dimos.memory2.vis.color import Color + from dimos.msgs.geometry_msgs.Transform import Transform + + rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) + if global_map is not None: + rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True) + if path: + rr.log( + "world/raw_map/path", + rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[PATH_THICKNESS]), + static=True, + ) + if pgo_map is not None: + rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(voxel_size=voxel / 2), static=True) + if full_pgo_map is not None: + rr.log( + "world/full_pgo_map/pointcloud", + full_pgo_map.to_rerun(voxel_size=voxel / 2), + static=True, + ) + if pgo_path: + rr.log( + "world/pgo_map/path", + rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[PATH_THICKNESS]), + static=True, + ) + rr.log( + "world/pgo_map/pgo/keyframes", + rr.Points3D(positions=pgo_path, colors=[[255, 0, 0]], radii=[0.025]), + static=True, + ) + if graph is not None and graph.loops: + loop_strips = [ + [ + (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), + (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), + ] + for lc in graph.loops + ] + rr.log( + "world/pgo_map/pgo/loop_closures", + rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.025]), + static=True, + ) + if marker_dets: + half = marker_size / 2.0 + n = len(marker_dets) + fill_half = [(half, half, 0.005)] * n + # Outline sits just outside the fill so both stay visible. + outline_bump = marker_size * 0.05 + outline_half = [(half + outline_bump, half + outline_bump, 0.006)] * n + raw_centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets] + raw_quats = [ + (d.data.orientation.x, d.data.orientation.y, d.data.orientation.z, d.data.orientation.w) + for d in marker_dets + ] + # One entry per tracked marker session — color stable per track_id. + colors = [ + Color.from_cmap("tab10", (d.data.track_id % 10) / 10.0).rgb_u8() for d in marker_dets + ] + labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] + + _log_markers( + "world/raw_map/markers", + raw_centers, + raw_quats, + fill_half=fill_half, + outline_half=outline_half, + colors=colors, + labels=labels, + ) + + if graph is not None: + # PGO-correct each raw marker pose: lift it from world_raw into + # world_corrected so it lines up with pgo_map. + pgo_centers: list[tuple[float, float, float]] = [] + pgo_quats: list[tuple[float, float, float, float]] = [] + for d in marker_dets: + raw_tf = Transform( + translation=d.data.center, + rotation=d.data.orientation, + frame_id="world", + child_frame_id=f"marker_{d.data.marker_id}", + ts=d.ts, + ) + corrected = graph.correct(raw_tf) + pgo_centers.append( + (corrected.translation.x, corrected.translation.y, corrected.translation.z) + ) + pgo_quats.append( + ( + corrected.rotation.x, + corrected.rotation.y, + corrected.rotation.z, + corrected.rotation.w, + ) + ) + _log_markers( + "world/pgo_map/markers", + pgo_centers, + pgo_quats, + fill_half=fill_half, + outline_half=outline_half, + colors=colors, + labels=labels, + ) + + def main( dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + lidar_stream: str = typer.Option( + "lidar", "--lidar", help="Lidar point-cloud stream to reconstruct" + ), + seek: float = typer.Option(0.0, "--seek", help="Skip the first N seconds of the recording"), + duration: float | None = typer.Option( + None, "--duration", help="Use only N seconds from --seek (default: to the end)" + ), voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"), device: str = typer.Option( "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" @@ -189,7 +318,10 @@ def main( "--full-pgo", help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), - no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), + out: Path | None = typer.Option( + None, "--out", help="Output .rrd path (default: ./.rrd)" + ), + no_gui: bool = typer.Option(False, "--no-gui", help="Write the .rrd but don't launch rerun"), markers: bool = typer.Option( False, "--markers", @@ -224,25 +356,26 @@ def main( help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", ), ) -> None: - """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" + """Rebuild a voxel map from a recorded SQLite dataset, write a .rrd, and open it in rerun.""" from dimos.mapping.loop_closure.pgo import PGO from dimos.memory2.store.sqlite import SqliteStore from dimos.memory2.transform import QualityWindow, SpeedLimit - from dimos.memory2.vis.color import Color - from dimos.msgs.geometry_msgs.Transform import Transform from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo from dimos.msgs.sensor_msgs.Image import Image + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.perception.fiducial.marker_transformer import DetectMarkers from dimos.robot.unitree.go2.connection import _camera_info_static from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init db_path = resolve_named_path(dataset, ".db") + if out is None: + out = Path.cwd() / f"{db_path.stem}.rrd" if export or full_pgo: pgo = True store = SqliteStore(path=db_path) - lidar = store.streams.lidar + lidar = store.stream(lidar_stream, PointCloud2).clip(seek, duration) print(lidar.summary()) @@ -325,7 +458,7 @@ def main( # frame_id="camera_optical", so obs.pose is already optical-in-world # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). # No mount composition needed. - color_image = store.stream("color_image", Image) + color_image = store.stream("color_image", Image).clip(seek, duration) cam_info = CameraInfo.from_yaml(str(camera_info)) if camera_info else _camera_info_static() xf = DetectMarkers( camera_info=cam_info, @@ -361,125 +494,24 @@ def main( f"across {len(unique_ids)} unique ids {unique_ids}" ) - if not no_gui: - rerun_init("dimos map tool", spawn=True) - rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - if global_map is not None: - rr.log( - "world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True - ) - if path: - rr.log( - "world/raw_map/path", - rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[PATH_THICKNESS]), - static=True, - ) - if pgo_map is not None: - rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(voxel_size=voxel / 2), static=True) - if full_pgo_map is not None: - rr.log( - "world/full_pgo_map/pointcloud", - full_pgo_map.to_rerun(voxel_size=voxel / 2), - static=True, - ) - if pgo_path: - rr.log( - "world/pgo_map/path", - rr.LineStrips3D( - strips=[pgo_path], colors=[[255, 255, 255]], radii=[PATH_THICKNESS] - ), - static=True, - ) - rr.log( - "world/pgo_map/pgo/keyframes", - rr.Points3D(positions=pgo_path, colors=[[255, 0, 0]], radii=[0.025]), - static=True, - ) - if graph is not None and graph.loops: - loop_strips = [ - [ - (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), - (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), - ] - for lc in graph.loops - ] - rr.log( - "world/pgo_map/pgo/loop_closures", - rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.025]), - static=True, - ) - if marker_dets: - half = marker_size / 2.0 - n = len(marker_dets) - fill_half = [(half, half, 0.005)] * n - # Outline sits just outside the fill so both stay visible. - outline_bump = marker_size * 0.05 - outline_half = [(half + outline_bump, half + outline_bump, 0.006)] * n - raw_centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets] - raw_quats = [ - ( - d.data.orientation.x, - d.data.orientation.y, - d.data.orientation.z, - d.data.orientation.w, - ) - for d in marker_dets - ] - # One entry per tracked marker session — color stable per track_id. - colors = [ - Color.from_cmap("tab10", (d.data.track_id % 10) / 10.0).rgb_u8() - for d in marker_dets - ] - labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] - - _log_markers( - "world/raw_map/markers", - raw_centers, - raw_quats, - fill_half=fill_half, - outline_half=outline_half, - colors=colors, - labels=labels, - ) - - if graph is not None: - # PGO-correct each raw marker pose: lift it from world_raw - # into world_corrected so it lines up with pgo_map. - pgo_centers: list[tuple[float, float, float]] = [] - pgo_quats: list[tuple[float, float, float, float]] = [] - for d in marker_dets: - raw_tf = Transform( - translation=d.data.center, - rotation=d.data.orientation, - frame_id="world", - child_frame_id=f"marker_{d.data.marker_id}", - ts=d.ts, - ) - corrected = graph.correct(raw_tf) - pgo_centers.append( - ( - corrected.translation.x, - corrected.translation.y, - corrected.translation.z, - ) - ) - pgo_quats.append( - ( - corrected.rotation.x, - corrected.rotation.y, - corrected.rotation.z, - corrected.rotation.w, - ) - ) - _log_markers( - "world/pgo_map/markers", - pgo_centers, - pgo_quats, - fill_half=fill_half, - outline_half=outline_half, - colors=colors, - labels=labels, - ) + rerun_init("dimos map tool") + rr.save(str(out)) + _log_reconstruction( + voxel=voxel, + global_map=global_map, + path=path, + pgo_map=pgo_map, + full_pgo_map=full_pgo_map, + pgo_path=pgo_path, + graph=graph, + marker_dets=marker_dets, + marker_size=marker_size, + ) + print(f"wrote {out}") + if no_gui: + print(f"open with: rerun {out}") + else: + subprocess.Popen(["rerun", str(out)]) if export and pgo_map is not None: out_path = Path.cwd() / f"{db_path.stem}.pc2.lcm" diff --git a/dimos/mapping/loop_closure/utils/pose_fill.py b/dimos/mapping/utils/pose_fill.py similarity index 74% rename from dimos/mapping/loop_closure/utils/pose_fill.py rename to dimos/mapping/utils/pose_fill.py index eee8ae08fa..dd0abbcf1f 100644 --- a/dimos/mapping/loop_closure/utils/pose_fill.py +++ b/dimos/mapping/utils/pose_fill.py @@ -30,11 +30,16 @@ from collections.abc import Iterable from pathlib import Path -from typing import Any, cast +from typing import TYPE_CHECKING, Any, cast -from dimos.memory2.backend import Backend -from dimos.memory2.stream import Stream -from dimos.memory2.type.observation import Observation +import typer + +# Heavy dimos imports are deferred (TYPE_CHECKING / inside functions) so that +# `dimos map --help` stays fast. See test_cli_startup.py. +if TYPE_CHECKING: + from dimos.memory2.backend import Backend + from dimos.memory2.stream import Stream + from dimos.memory2.type.observation import Observation def pose_fill( @@ -67,6 +72,8 @@ def pose_fill_db( pose_source: str = "odom", tolerance: float = 0.1, streams: list[str] | None = None, + seek: float = 0.0, + duration: float | None = None, ) -> dict[str, int]: """Copy a SQLite dataset to *dest_path*, baking *pose_source* poses into *target*. @@ -104,10 +111,12 @@ def pose_fill_db( # the source blob still resolves); plain metadata for everything else. if name == target: rows: Iterable[Observation[Any]] = pose_fill( - src.stream(name), src.stream(pose_source), tolerance=tolerance + src.stream(name).clip(seek, duration), + src.stream(pose_source).clip(seek, duration), + tolerance=tolerance, ) else: - rows = src.stream(name).order_by("ts") + rows = src.stream(name).clip(seek, duration).order_by("ts") scalar = src_b.data_type in (int, float) n = 0 @@ -131,23 +140,24 @@ def pose_fill_db( def main( - dataset: str, - out: str | None = None, - target: str = "lidar", - pose_source: str = "odom", - tolerance: float = 0.1, - streams: str | None = None, + dataset: str = typer.Argument(..., help="Source .db: bare name (cwd or data/) or path"), + out: str | None = typer.Option( + None, "--out", help="Output .db path (default: _posed.db beside the source)" + ), + target: str = typer.Option("lidar", "--target", help="Stream to bake poses into"), + pose_source: str = typer.Option( + "odom", "--pose-source", help="Stream to read the nearest poses from" + ), + tolerance: float = typer.Option(0.1, "--tolerance", help="Max |Δts| for a pose match (s)"), + streams: str | None = typer.Option( + None, "--streams", help="Comma-separated subset of streams to copy (default: all)" + ), + seek: float = typer.Option(0.0, "--seek", help="Skip the first N seconds of the recording"), + duration: float | None = typer.Option( + None, "--duration", help="Use only N seconds from --seek (default: to the end)" + ), ) -> None: - """Write a copy of *dataset* with *pose_source* poses baked into *target*. - - Run as ``python -m dimos.mapping.loop_closure.utils.pose_fill ``. - The output (``_posed.db`` by default) is then usable directly with - ``dimos map reconstruct --pgo``. - - *streams* is a comma-separated subset to copy (default: all) — restrict to - e.g. ``lidar,odom`` for a smaller map-only db instead of copying every - stream (images, fastlio, …). Blobs are copied verbatim either way. - """ + """Copy a recording, baking nearest pose_source poses into the target stream.""" from dimos.utils.data import resolve_named_path src = resolve_named_path(dataset, ".db") @@ -155,15 +165,20 @@ def main( names = [s.strip() for s in streams.split(",")] if streams else None print(f"pose-filling {src.name}: {target!r} <- nearest {pose_source!r} (±{tolerance}s)") written = pose_fill_db( - src, dest, target=target, pose_source=pose_source, tolerance=tolerance, streams=names + src, + dest, + target=target, + pose_source=pose_source, + tolerance=tolerance, + streams=names, + seek=seek, + duration=duration, ) for name, n in written.items(): print(f" {name}: {n}") print(f"wrote {dest}") - print(f"now run: dimos map reconstruct {dest.stem} --pgo") + print(f"now run: dimos map global {dest.stem} --pgo") if __name__ == "__main__": - import typer - typer.run(main) diff --git a/dimos/mapping/loop_closure/utils/rename_streams.py b/dimos/mapping/utils/rename.py similarity index 90% rename from dimos/mapping/loop_closure/utils/rename_streams.py rename to dimos/mapping/utils/rename.py index 73002aeb2a..afabff0f46 100644 --- a/dimos/mapping/loop_closure/utils/rename_streams.py +++ b/dimos/mapping/utils/rename.py @@ -20,7 +20,7 @@ immediately. Streams not mentioned in ``--rename`` are copied verbatim. Usage: - uv run python -m dimos.mapping.loop_closure.utils.rename_streams mid360 \\ + uv run python -m dimos.mapping.utils.rename mid360 \\ --out mid360_renamed.db \\ --rename go2_lidar=lidar \\ --rename lidar=fastlio_lidar \\ @@ -40,12 +40,13 @@ from dimos.memory2.codecs.base import _resolve_payload_type from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream from dimos.memory2.type.observation import Observation from dimos.utils.data import resolve_named_path def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: - """Matches dimos/utils/cli/map.py:progress — kept inline to avoid pulling rerun.""" + """Matches dimos/mapping/utils/globalmap.py:progress — kept inline to avoid pulling rerun.""" seen = 0 wall_start: float | None = None last_wall: float | None = None @@ -113,7 +114,12 @@ def main( drop: list[str] = typer.Option( [], "--drop", help="Stream name to omit from output (can be passed multiple times)" ), + seek: float = typer.Option(0.0, "--seek", help="Skip the first N seconds of the recording"), + duration: float | None = typer.Option( + None, "--duration", help="Use only N seconds from --seek (default: to the end)" + ), ) -> None: + """Copy a recording into a new .db, renaming and/or dropping streams.""" src_path = resolve_named_path(dataset, ".db") if out.exists(): raise typer.BadParameter(f"Output already exists: {out}") @@ -158,8 +164,8 @@ def main( with dst: for src_name, dst_name in kept.items(): ptype = payload_types[src_name] - src_s = src.stream(src_name, ptype) - dst_s = dst.stream(dst_name, ptype) + src_s: Stream[Any] = src.stream(src_name, ptype).clip(seek, duration) + dst_s: Stream[Any] = dst.stream(dst_name, ptype) total = src_s.count() cb = progress(total, f"{dst_name:>16s}") for obs in src_s: diff --git a/dimos/mapping/loop_closure/utils/map_rrd.py b/dimos/mapping/utils/replay.py similarity index 81% rename from dimos/mapping/loop_closure/utils/map_rrd.py rename to dimos/mapping/utils/replay.py index be54eb9f35..7527683f2b 100644 --- a/dimos/mapping/loop_closure/utils/map_rrd.py +++ b/dimos/mapping/utils/replay.py @@ -29,8 +29,8 @@ - ``world/camera/image`` — color_image frames Usage: - uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd - uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd --map + uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd + uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd --map rerun map.rrd """ @@ -38,29 +38,26 @@ from collections.abc import Callable from pathlib import Path +import subprocess import time -from typing import Any +from typing import TYPE_CHECKING, Any import rerun as rr import typer -from dimos.mapping.voxels import VoxelMapTransformer -from dimos.memory2.store.sqlite import SqliteStore -from dimos.memory2.stream import Stream -from dimos.memory2.transform import throttle -from dimos.memory2.type.observation import Observation -from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped -from dimos.msgs.nav_msgs.Odometry import Odometry -from dimos.msgs.sensor_msgs.Image import Image -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2, register_colormap_annotation -from dimos.robot.unitree.go2.connection import _camera_info_static -from dimos.utils.data import resolve_named_path +# Heavy dimos imports (mapping/memory2 → torch, scipy, open3d) are deferred into +# main() so that `dimos map --help` stays fast. See test_cli_startup.py and the +# same pattern in dimos/mapping/utils/globalmap.py. +if TYPE_CHECKING: + from dimos.memory2.stream import Stream + from dimos.memory2.type.observation import Observation + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 TIMELINE = "ts" def _progress(total: int, label: str) -> Callable[[Observation[Any]], None]: - """Matches dimos/utils/cli/map.py:progress.""" + """Matches dimos/mapping/utils/globalmap.py:progress.""" seen = 0 wall_start: float | None = None last_wall: float | None = None @@ -153,7 +150,14 @@ def _log_path( def main( dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), - out: Path = typer.Option(..., "--out", help="Output .rrd path"), + out: Path | None = typer.Option( + None, "--out", help="Output .rrd path (default: ./.rrd)" + ), + no_gui: bool = typer.Option(False, "--no-gui", help="Don't launch rerun on the result"), + seek: float = typer.Option(0.0, "--seek", help="Skip the first N seconds of the recording"), + duration: float | None = typer.Option( + None, "--duration", help="Use only N seconds from --seek (default: to the end)" + ), voxel: float = typer.Option( 0.05, "--voxel", help="Voxel size hint for the point cloud renderer" ), @@ -182,7 +186,20 @@ def main( help="Emit accumulated map every N frames (0 = only at end); --map only", ), ) -> None: + """Dump a recording to .rrd (lidar clouds + camera frames) and open it in rerun.""" + from dimos.mapping.voxels import VoxelMapTransformer + from dimos.memory2.store.sqlite import SqliteStore + from dimos.memory2.transform import throttle + from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped + from dimos.msgs.nav_msgs.Odometry import Odometry + from dimos.msgs.sensor_msgs.Image import Image + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2, register_colormap_annotation + from dimos.robot.unitree.go2.connection import _camera_info_static + from dimos.utils.data import resolve_named_path + db_path = resolve_named_path(dataset, ".db") + if out is None: + out = Path.cwd() / f"{db_path.stem}.rrd" cam_info = _camera_info_static() rr.init("dimos map_rrd", recording_id=db_path.stem) @@ -208,10 +225,13 @@ def main( with store: print(store.summary()) - lidar = store.stream("lidar", PointCloud2) - color_image = store.stream("color_image", Image) + def clipped(name: str, ptype: type[Any]) -> Stream[Any]: + return store.stream(name, ptype).clip(seek, duration) + + lidar = clipped("lidar", PointCloud2) + color_image = clipped("color_image", Image) has_livox = "fastlio_lidar" in store.streams - livox = store.stream("fastlio_lidar", PointCloud2) if has_livox else None + livox = clipped("fastlio_lidar", PointCloud2) if has_livox else None # ---- per-frame raw clouds ---- _log_clouds(" lidar", lidar, "world/lidar", voxel, point_mode) @@ -251,7 +271,7 @@ def main( # ---- fastlio pose axis + path from fastlio_odometry stream ---- if "fastlio_odometry" in store.streams: - odometry = store.stream("fastlio_odometry", Odometry) + odometry = clipped("fastlio_odometry", Odometry) cb = _progress(odometry.count(), "fastlio_odometry") for obs in odometry: cb(obs) @@ -268,14 +288,14 @@ def main( ) _log_path( " fastlio_path", - store.stream("fastlio_odometry", Odometry), + clipped("fastlio_odometry", Odometry), "world/fastlio_path", color=(255, 165, 0), # orange ) # ---- Go2 native odom pose axis + path ---- if "odom" in store.streams: - odom = store.stream("odom", PoseStamped) + odom = clipped("odom", PoseStamped) cb = _progress(odom.count(), " odom") for odom_obs in odom: cb(odom_obs) @@ -292,7 +312,7 @@ def main( ) _log_path( " odom_path", - store.stream("odom", PoseStamped), + clipped("odom", PoseStamped), "world/odom_path", color=(0, 200, 100), # green ) @@ -317,7 +337,10 @@ def main( rr.log("world/camera/image", img_obs.data.to_rerun()) print(f"wrote {out}") - print(f"open with: rerun {out}") + if no_gui: + print(f"open with: rerun {out}") + else: + subprocess.Popen(["rerun", str(out)]) if __name__ == "__main__": diff --git a/dimos/mapping/loop_closure/utils/markers_rrd.py b/dimos/mapping/utils/replay_marker.py similarity index 81% rename from dimos/mapping/loop_closure/utils/markers_rrd.py rename to dimos/mapping/utils/replay_marker.py index eb5306edf7..0510792476 100644 --- a/dimos/mapping/loop_closure/utils/markers_rrd.py +++ b/dimos/mapping/utils/replay_marker.py @@ -20,7 +20,7 @@ - per detection: marker box in world frame, at the detection timestamp Usage: - uv run python -m dimos.utils.cli.markers_rrd hk_village1 --out hk.rrd + uv run python -m dimos.mapping.utils.replay_marker hk_village1 --out hk.rrd rerun hk.rrd Throwaway script next to ``map.py``; remove once the apriltag reliability work @@ -30,27 +30,31 @@ from __future__ import annotations from pathlib import Path -from typing import Any +import subprocess +from typing import TYPE_CHECKING, Any import rerun as rr import typer -from dimos.memory2.store.sqlite import SqliteStore -from dimos.memory2.stream import Stream -from dimos.memory2.transform import QualityWindow, SpeedLimit -from dimos.memory2.vis.color import Color -from dimos.msgs.sensor_msgs.Image import Image -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -from dimos.perception.fiducial.marker_transformer import DetectMarkers -from dimos.robot.unitree.go2.connection import _camera_info_static -from dimos.utils.data import resolve_named_path +# Heavy dimos imports (memory2/perception → torch, scipy, cv2) are deferred into +# main() so that `dimos map --help` stays fast. See test_cli_startup.py and the +# same pattern in dimos/mapping/utils/globalmap.py. +if TYPE_CHECKING: + from dimos.memory2.stream import Stream TIMELINE = "ts" def main( dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), - out: Path = typer.Option(..., "--out", help="Output .rrd path"), + out: Path | None = typer.Option( + None, "--out", help="Output .rrd path (default: ./.rrd)" + ), + no_gui: bool = typer.Option(False, "--no-gui", help="Don't launch rerun on the result"), + seek: float = typer.Option(0.0, "--seek", help="Skip the first N seconds of the recording"), + duration: float | None = typer.Option( + None, "--duration", help="Use only N seconds from --seek (default: to the end)" + ), marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"), marker_max_speed: float = typer.Option( 0.5, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables" @@ -65,7 +69,19 @@ def main( 7.5, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables" ), ) -> None: + """Dump an AprilTag detection replay to .rrd and open it in rerun.""" + from dimos.memory2.store.sqlite import SqliteStore + from dimos.memory2.transform import QualityWindow, SpeedLimit + from dimos.memory2.vis.color import Color + from dimos.msgs.sensor_msgs.Image import Image + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + from dimos.perception.fiducial.marker_transformer import DetectMarkers + from dimos.robot.unitree.go2.connection import _camera_info_static + from dimos.utils.data import resolve_named_path + db_path = resolve_named_path(dataset, ".db") + if out is None: + out = Path.cwd() / f"{db_path.stem}.rrd" cam_info = _camera_info_static() rr.init("dimos markers", recording_id=db_path.stem) @@ -79,15 +95,15 @@ def main( store = SqliteStore(path=str(db_path)) with store: - color_image = store.stream("color_image", Image) - lidar = store.stream("lidar", PointCloud2) + color_image = store.stream("color_image", Image).clip(seek, duration) + lidar = store.stream("lidar", PointCloud2).clip(seek, duration) # ---- pass 1: robot base pose over time (from lidar.pose) ---- for lidar_obs in lidar: - if lidar_obs.pose is None: + if lidar_obs.pose_tuple is None: continue rr.set_time(TIMELINE, timestamp=lidar_obs.ts) - x, y, z, qx, qy, qz, qw = lidar_obs.pose + x, y, z, qx, qy, qz, qw = lidar_obs.pose_tuple rr.log( "world/robot", rr.Transform3D( @@ -99,8 +115,8 @@ def main( n_img = color_image.count() for i, img_obs in enumerate(color_image): rr.set_time(TIMELINE, timestamp=img_obs.ts) - if img_obs.pose is not None: - x, y, z, qx, qy, qz, qw = img_obs.pose + if img_obs.pose_tuple is not None: + x, y, z, qx, qy, qz, qw = img_obs.pose_tuple rr.log( "world/camera", rr.Transform3D( @@ -226,7 +242,10 @@ def main( print(f"tracks: {len(seen_tracks)} unique, {n_updates} updates") print(f"wrote {out}") - print(f"open with: rerun {out}") + if no_gui: + print(f"open with: rerun {out}") + else: + subprocess.Popen(["rerun", str(out)]) if __name__ == "__main__": diff --git a/dimos/mapping/loop_closure/utils/summary.py b/dimos/mapping/utils/summary.py similarity index 96% rename from dimos/mapping/loop_closure/utils/summary.py rename to dimos/mapping/utils/summary.py index d34f8b902a..34a5721b37 100644 --- a/dimos/mapping/loop_closure/utils/summary.py +++ b/dimos/mapping/utils/summary.py @@ -15,7 +15,7 @@ """Print ``Store.summary()`` for a memory2 sqlite recording. Usage: - uv run python -m dimos.mapping.loop_closure.utils.summary mid360 + uv run python -m dimos.mapping.utils.summary mid360 """ from __future__ import annotations diff --git a/dimos/memory2/stream.py b/dimos/memory2/stream.py index 829f10966f..cff2e689c4 100644 --- a/dimos/memory2/stream.py +++ b/dimos/memory2/stream.py @@ -232,6 +232,17 @@ def at_relative(self, t: float, tolerance: float = 1.0) -> Stream[T, O]: t0 = self.first().ts return self.at(t0 + t, tolerance=tolerance) + def clip(self, seek: float = 0.0, duration: float | None = None) -> Stream[T, O]: + """Window to a snippet ``seek`` seconds in, ``duration`` seconds long. + + Offsets are relative to the first observation; ``duration=None`` runs to + the end. A no-op when ``seek<=0`` and ``duration is None``. + """ + if seek <= 0 and duration is None: + return self + start = self.first().ts + seek + return self.after(start) if duration is None else self.time_range(start, start + duration) + def near(self, pose: Any, radius: float) -> Stream[T, O]: # Accept Pose/PoseStamped (any object with `.position`), Vector3, # numpy arrays, or (x, y, z) tuples — Vector3() handles the rest. diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index e9b28ab9ab..4c92635570 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -38,9 +38,13 @@ from dimos.core.daemon import daemonize, install_signal_handlers from dimos.core.global_config import GlobalConfig, global_config from dimos.core.run_registry import get_most_recent, is_pid_alive, stop_entry -from dimos.mapping.loop_closure.utils.summary import main as _map_summary_main +from dimos.mapping.utils.globalmap import main as _map_main +from dimos.mapping.utils.pose_fill import main as _map_pose_fill_main +from dimos.mapping.utils.rename import main as _map_rename_main +from dimos.mapping.utils.replay import main as _map_replay_main +from dimos.mapping.utils.replay_marker import main as _map_replay_marker_main +from dimos.mapping.utils.summary import main as _map_summary_main from dimos.robot.unitree.go2.cli.go2tool import app as go2tool_app -from dimos.utils.cli.map import main as _map_main from dimos.utils.logging_config import setup_logger from dimos.visualization.rerun.constants import RerunOpenOption @@ -676,8 +680,12 @@ def send( map_app = typer.Typer(help="Voxel-map tools over recorded sqlite datasets") main.add_typer(map_app, name="map") -map_app.command("reconstruct")(_map_main) +map_app.command("global")(_map_main) map_app.command("summary")(_map_summary_main) +map_app.command("rename")(_map_rename_main) +map_app.command("pose-fill")(_map_pose_fill_main) +map_app.command("replay")(_map_replay_main) +map_app.command("replay-marker")(_map_replay_marker_main) @main.command() diff --git a/dimos/robot/cli/test_map_cli.py b/dimos/robot/cli/test_map_cli.py new file mode 100644 index 0000000000..4ec235d341 --- /dev/null +++ b/dimos/robot/cli/test_map_cli.py @@ -0,0 +1,161 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""End-to-end tests for the `dimos map` verbs, run as external subprocesses. + +Each test invokes the real CLI against the `go2_short` recording (60s, auto- +pulled via LFS) and asserts on the artifact it produces. A short `--duration` +snippet keeps every invocation to a few seconds, and `--no-gui` stops the rrd +verbs from spawning a rerun viewer. +""" + +from __future__ import annotations + +from pathlib import Path +import subprocess +import sys + +import pytest + +# A few seconds in, then a couple seconds long — small enough to stay fast, +# long enough that the robot moves (so dedup/PGO/markers have something to do). +SEEK = 4.0 +DURATION = 3.0 +# go2_short has 461 lidar frames over ~60s; a 3s snippet must be far fewer. +FULL_LIDAR_COUNT = 461 + + +def _run(*args: str, timeout: float = 300.0) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, "-m", "dimos.robot.cli.dimos", "map", *args], + capture_output=True, + text=True, + timeout=timeout, + ) + + +def _stream_counts(db_path: Path) -> dict[str, int]: + from dimos.memory2.store.sqlite import SqliteStore + + store = SqliteStore(path=str(db_path), must_exist=True) + try: + return {name: store.stream(name).count() for name in store.list_streams()} + finally: + store.stop() + + +@pytest.fixture(scope="module") +def dataset() -> str: + """Ensure the go2_short recording is present locally; return its bare name.""" + from dimos.utils.data import get_data + + get_data("go2_short.db") + return "go2_short" + + +def test_summary(dataset: str) -> None: + res = _run("summary", dataset) + assert res.returncode == 0, res.stderr + assert "lidar" in res.stdout + assert "odom" in res.stdout + + +def test_rename_snippet(dataset: str, tmp_path: Path) -> None: + out = tmp_path / "renamed.db" + res = _run( + "rename", + dataset, + "--out", + str(out), + "--rename", + "odom=odometry", + "--duration", + str(DURATION), + ) + assert res.returncode == 0, res.stderr + assert out.exists() + + counts = _stream_counts(out) + # Stream was renamed, not duplicated. + assert "odometry" in counts + assert "odom" not in counts + assert "lidar" in counts + # --duration actually clipped the copy. + assert 0 < counts["lidar"] < FULL_LIDAR_COUNT + + +def test_pose_fill_snippet(dataset: str, tmp_path: Path) -> None: + out = tmp_path / "posed.db" + res = _run( + "pose-fill", + dataset, + "--out", + str(out), + "--streams", + "lidar,odom", + "--duration", + str(DURATION), + ) + assert res.returncode == 0, res.stderr + assert out.exists() + + from dimos.memory2.store.sqlite import SqliteStore + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + + store = SqliteStore(path=str(out), must_exist=True) + try: + lidar = store.stream("lidar", PointCloud2) + assert 0 < lidar.count() < FULL_LIDAR_COUNT + # The target stream carries a baked pose after pose-fill. + assert lidar.first().pose is not None + finally: + store.stop() + + +def test_replay_snippet(dataset: str, tmp_path: Path) -> None: + out = tmp_path / "replay.rrd" + res = _run("replay", dataset, "--out", str(out), "--no-gui", "--duration", str(DURATION)) + assert res.returncode == 0, res.stderr + assert out.exists() and out.stat().st_size > 0 + + +def test_replay_marker_snippet(dataset: str, tmp_path: Path) -> None: + out = tmp_path / "markers.rrd" + res = _run("replay-marker", dataset, "--out", str(out), "--no-gui", "--duration", str(DURATION)) + assert res.returncode == 0, res.stderr + assert out.exists() and out.stat().st_size > 0 + + +def test_global_snippet(dataset: str, tmp_path: Path) -> None: + out = tmp_path / "global.rrd" + # CPU device + small block budget keeps this runnable without a GPU. + # --seek exercises the snippet offset alongside --duration. + res = _run( + "global", + dataset, + "--out", + str(out), + "--no-gui", + "--seek", + str(SEEK), + "--duration", + str(DURATION), + "--device", + "CPU:0", + "--block-count", + "100000", + timeout=600.0, + ) + assert res.returncode == 0, res.stderr + assert out.exists() and out.stat().st_size > 0 From 8e51265df842e73d8cc1fd5392cdbf37ce547ec3 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:04:28 +0800 Subject: [PATCH 47/64] dataset validation guide --- ..._2026-05-29_4-45pm-PST_corrected.db.tar.gz | 3 + dimos/mapping/utils/assets/fastlio_lidar.png | 3 + dimos/mapping/utils/assets/go2_lidar.png | 3 + .../mapping/utils/assets/markers_fastlio.png | 3 + dimos/mapping/utils/assets/markers_go2.png | 3 + dimos/mapping/utils/dataset_validation.md | 48 ++++++++++++++++ dimos/mapping/utils/globalmap.py | 55 ++++++++++++++----- dimos/mapping/utils/pose_fill.py | 22 +++++++- 8 files changed, 123 insertions(+), 17 deletions(-) create mode 100644 data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST_corrected.db.tar.gz create mode 100644 dimos/mapping/utils/assets/fastlio_lidar.png create mode 100644 dimos/mapping/utils/assets/go2_lidar.png create mode 100644 dimos/mapping/utils/assets/markers_fastlio.png create mode 100644 dimos/mapping/utils/assets/markers_go2.png create mode 100644 dimos/mapping/utils/dataset_validation.md diff --git a/data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST_corrected.db.tar.gz b/data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST_corrected.db.tar.gz new file mode 100644 index 0000000000..a91571ea98 --- /dev/null +++ b/data/.lfs/recording_go2_mid360_2026-05-29_4-45pm-PST_corrected.db.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e913ca4c640c1bfcce523d67fffbac80b3220d2ed94289cbb6bd966e1c68a78d +size 1365230944 diff --git a/dimos/mapping/utils/assets/fastlio_lidar.png b/dimos/mapping/utils/assets/fastlio_lidar.png new file mode 100644 index 0000000000..ee6bb6d17a --- /dev/null +++ b/dimos/mapping/utils/assets/fastlio_lidar.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc654b1c5837615fc14c0097f468c2337dcd17722ac41895081daf392666e62e +size 3000691 diff --git a/dimos/mapping/utils/assets/go2_lidar.png b/dimos/mapping/utils/assets/go2_lidar.png new file mode 100644 index 0000000000..2ce3d83882 --- /dev/null +++ b/dimos/mapping/utils/assets/go2_lidar.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:816287e864438b4eb951df649685db66cac7b278a8d95f5326459e6bdca78ab9 +size 1589502 diff --git a/dimos/mapping/utils/assets/markers_fastlio.png b/dimos/mapping/utils/assets/markers_fastlio.png new file mode 100644 index 0000000000..6e637a5473 --- /dev/null +++ b/dimos/mapping/utils/assets/markers_fastlio.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79045486d46148c44718bb2eaf000dfc12cf421c4db4762ffaaf12011f36e2a7 +size 3320057 diff --git a/dimos/mapping/utils/assets/markers_go2.png b/dimos/mapping/utils/assets/markers_go2.png new file mode 100644 index 0000000000..c3e93c8b5a --- /dev/null +++ b/dimos/mapping/utils/assets/markers_go2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c007ae1b46039d008620feee867e512fb81d4d39f9b3f712a380fe15e007692 +size 2260861 diff --git a/dimos/mapping/utils/dataset_validation.md b/dimos/mapping/utils/dataset_validation.md new file mode 100644 index 0000000000..ed83a39ba8 --- /dev/null +++ b/dimos/mapping/utils/dataset_validation.md @@ -0,0 +1,48 @@ +Dataset Validation + +```sh +dimos map summary recording_go2_mid360_2026-05-29_4-45pm-PST.db + +Stream("color_image"): 11141 items, 2026-05-29 23:32:57 — 2026-05-29 23:45:57 (780.1s) +Stream("fastlio_lidar"): 7240 items, 2026-05-29 23:32:56 — 2026-05-29 23:45:57 (781.7s) +Stream("fastlio_odometry"): 18737 items, 2026-05-29 23:32:56 — 2026-05-29 23:45:57 (781.8s) +Stream("lidar"): 6025 items, 2026-05-29 23:32:55 — 2026-05-29 23:45:57 (782.3s) +Stream("odom"): 14630 items, 2026-05-29 23:32:55 — 2026-05-29 23:45:57 (782.3s) +``` + +Shows which streams are in the database + +```sh +dimos map global recording_go2_mid360_2026-05-29_4-45pm-PST_corrected --voxel 0.1 --lidar lidar +``` + +validates that livox lidar observations have correct poses associated coming from livox odometry + +![output](assets/fastlio_lidar.png) + + +```sh +dimos map global recording_go2_mid360_2026-05-29_4-45pm-PST_corrected --voxel 0.1 --lidar lidar +``` + +validates that go2 lidar observations have correct poses associated coming from go2 odometry + +![output](assets/go2_lidar.png) + + +``` +dimos map global recording_go2_mid360_2026-05-29_4-45pm-PST_corrected --voxel 0.1 --lidar lidar --markers --pgo +``` + +validates that camera image has correct pose associated, `--pgo` is not needed but you can + +![output](assets/markers_go2.png) + + +```sh +dimos map global recording_go2_mid360_2026-05-29_4-45pm-PST_corrected --voxel 0.1 --lidar fastlio_lidar --markers --image-pose fastlio_odometry --duration 120 +``` + +Validates that transform between fastlio_odom and camera image is correct (below it isn't, we detect markers at the angle - mabye fastlio_odometry pose should be flat representing base_link not lidar mounting style) + +![output](assets/markers_fastlio.png) diff --git a/dimos/mapping/utils/globalmap.py b/dimos/mapping/utils/globalmap.py index 57a55b760a..6518ecf04f 100644 --- a/dimos/mapping/utils/globalmap.py +++ b/dimos/mapping/utils/globalmap.py @@ -303,7 +303,7 @@ def main( pgo_tol: float = typer.Option( 0.3, "--pgo-tol", - help="Spatial dedup tolerance (meters); applies to both raw and --pgo maps", + help="Spatial dedup tolerance (meters); applies to both raw and --pgo maps. 0 disables dedup (keep every posed frame)", ), block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (raw and PGO rebuilds)" @@ -332,6 +332,12 @@ def main( "--camera-info", help="YAML calibration file for --markers; defaults to Go2 builtin", ), + image_pose: str | None = typer.Option( + None, + "--image-pose", + help="Re-pose color_image from this stream's pose (composed with the camera " + "optical mount) before marker detection, instead of the image's stored pose", + ), marker_size: float = typer.Option( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), @@ -364,7 +370,7 @@ def main( from dimos.msgs.sensor_msgs.Image import Image from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.perception.fiducial.marker_transformer import DetectMarkers - from dimos.robot.unitree.go2.connection import _camera_info_static + from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init @@ -383,9 +389,10 @@ def main( # Spatial dedup: bucket frames by 3D cell using the raw pose, keep the # latest per cell. Shared by raw and PGO rebuilds. Doesn't touch obs.data - # so it stays cheap (no pointcloud loading). - seen: dict[tuple[int, int, int], Observation[Any]] = {} - for obs in lidar: + # so it stays cheap (no pointcloud loading). With pgo_tol<=0 the bucketing + # is disabled and every posed frame is kept (keyed by index). + seen: dict[Any, Observation[Any]] = {} + for i, obs in enumerate(lidar): pose = obs.pose if pose is None: continue @@ -393,15 +400,25 @@ def main( # Same condition as pgo_keyframes so dedup and PGO see the same frames. if pose.position.is_zero() or pose.orientation.is_zero(): continue - t = pose.position - # math.floor so negative coords bucket consistently; int() truncates - # toward zero and silently folds -0.5 and 0.5 into the same cell. - cell = (math.floor(t.x / pgo_tol), math.floor(t.y / pgo_tol), math.floor(t.z / pgo_tol)) - seen[cell] = obs + if pgo_tol > 0: + t = pose.position + # math.floor so negative coords bucket consistently; int() truncates + # toward zero and silently folds -0.5 and 0.5 into the same cell. + key: Any = ( + math.floor(t.x / pgo_tol), + math.floor(t.y / pgo_tol), + math.floor(t.z / pgo_tol), + ) + else: + key = i + seen[key] = obs n_kept = len(seen) pct = 100 * n_kept / total if total else 0 - print(f"dedup: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") + if pgo_tol > 0: + print(f"dedup: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") + else: + print(f"dedup: disabled, kept all [{n_kept}/{total}] posed frames") # Dict insertion order = lidar iteration order = chronological. # `seen` only contains entries with non-None poses (filtered above). @@ -456,9 +473,17 @@ def main( if markers: # Image observations in dimos recordings are stamped with # frame_id="camera_optical", so obs.pose is already optical-in-world - # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). - # No mount composition needed. + # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). With + # --image-pose, swap that stored pose for a different source (e.g. + # fastlio_odometry), composing the base→optical mount onto it first. color_image = store.stream("color_image", Image).clip(seek, duration) + n_images = color_image.count() + if image_pose is not None: + from dimos.mapping.utils.pose_fill import pose_fill + + src_pose: Stream[Any] = store.stream(image_pose).clip(seek, duration) + print(f"re-posing color_image from {image_pose!r} + camera optical mount") + color_image = pose_fill(color_image, src_pose, tolerance=0.1, mount=BASE_TO_OPTICAL) cam_info = CameraInfo.from_yaml(str(camera_info)) if camera_info else _camera_info_static() xf = DetectMarkers( camera_info=cam_info, @@ -467,9 +492,9 @@ def main( ) # Keep the sharpest frame per --marker-quality-window window, then # drop frames where the robot was moving (linear + rotational) faster - # than the limits. Defaults match markers_rrd.py so positions agree. + # than the limits. Defaults match replay_marker.py so positions agree. pipeline: Stream[Image] = color_image.tap( - progress(color_image.count(), "detecting markers") + progress(n_images, "detecting markers") ).transform(QualityWindow(lambda img: img.sharpness, window=marker_quality_window)) if marker_max_speed > 0: pipeline = pipeline.transform( diff --git a/dimos/mapping/utils/pose_fill.py b/dimos/mapping/utils/pose_fill.py index dd0abbcf1f..c42072cbe1 100644 --- a/dimos/mapping/utils/pose_fill.py +++ b/dimos/mapping/utils/pose_fill.py @@ -40,10 +40,15 @@ from dimos.memory2.backend import Backend from dimos.memory2.stream import Stream from dimos.memory2.type.observation import Observation + from dimos.msgs.geometry_msgs.Transform import Transform def pose_fill( - stream: Stream[Any], pose_stream: Stream[Any], *, tolerance: float = 0.1 + stream: Stream[Any], + pose_stream: Stream[Any], + *, + tolerance: float = 0.1, + mount: Transform | None = None, ) -> Stream[Any]: """Re-pose each observation in *stream* from the nearest entry in *pose_stream*. @@ -53,13 +58,26 @@ def pose_fill( object exposing ``.position`` + ``.orientation``) because pose-message streams carry the pose in the value, not the indexed pose columns. The target stream's payload stays lazy. + + *mount* composes a static child transform onto each source pose + (``world_base ∘ mount``) before attaching — e.g. ``base_link → + camera_optical`` so a base-frame odometry source yields an optical-frame + image pose. """ + from dimos.memory2.type.observation import _to_tuple + from dimos.msgs.geometry_msgs.Quaternion import Quaternion + from dimos.msgs.geometry_msgs.Transform import Transform + from dimos.msgs.geometry_msgs.Vector3 import Vector3 def _fill(pair_obs: Observation[Any]) -> Observation[Any]: primary, secondary = cast( "tuple[Observation[Any], Observation[Any]]", pair_obs.data ) # AlignedPair(primary, secondary) - return primary.with_pose(secondary.data) + if mount is None: + return primary.with_pose(secondary.data) + x, y, z, qx, qy, qz, qw = cast("tuple[float, ...]", _to_tuple(secondary.data)) + world_base = Transform(translation=Vector3(x, y, z), rotation=Quaternion(qx, qy, qz, qw)) + return primary.with_pose(world_base + mount) return stream.align(pose_stream, tolerance=tolerance).map(_fill) From 25eecb59218bc448e54c26990ac93e274e2f8e89 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:11:56 +0800 Subject: [PATCH 48/64] Drop unused utils/cli/map.py (superseded by mapping.utils map verbs) --- dimos/utils/cli/map.py | 496 ----------------------------------------- 1 file changed, 496 deletions(-) delete mode 100644 dimos/utils/cli/map.py diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py deleted file mode 100644 index c162a015ef..0000000000 --- a/dimos/utils/cli/map.py +++ /dev/null @@ -1,496 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -from collections.abc import Callable, Iterable -import math -from pathlib import Path -import time -from typing import TYPE_CHECKING, Any - -import rerun as rr -import rerun.blueprint as rrb -import typer - -# Heavy dimos imports (mapping/memory2 → torch, transformers, open3d, sklearn) are -# deferred into the function bodies below so that `dimos --help` — which imports this -# module just to register the `map` subcommand — stays fast. See test_cli_startup.py. -if TYPE_CHECKING: - from dimos.mapping.loop_closure.pgo import PoseGraph - from dimos.memory2.stream import Stream - from dimos.memory2.type.observation import Observation - from dimos.msgs.sensor_msgs.Image import Image - from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 - -PATH_THICKNESS = 0.01 -# Pin pattern (from dimos/memory2/vis/space/rerun.py): thin vertical line -# from each marker with the label floating at the top so multi-marker -# labels never overlap the boxes. -MARKER_STEM = 1.0 - - -def _log_markers( - prefix: str, - centers: list[tuple[float, float, float]], - quats: list[tuple[float, float, float, float]], - *, - fill_half: list[tuple[float, float, float]], - outline_half: list[tuple[float, float, float]], - colors: list[tuple[int, int, int]], - labels: list[str], -) -> None: - """Render per-marker fill + outline + pin-stem + label as four static entities.""" - n = len(centers) - pin_strips = [[(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in centers] - label_positions = [(cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in centers] - rr.log( - f"{prefix}/fill", - rr.Boxes3D( - centers=centers, - half_sizes=fill_half, - quaternions=quats, - colors=colors, - fill_mode=rr.components.FillMode.Solid, - ), - static=True, - ) - rr.log( - f"{prefix}/outline", - rr.Boxes3D( - centers=centers, - half_sizes=outline_half, - quaternions=quats, - colors=[(255, 255, 255)] * n, - fill_mode=rr.components.FillMode.MajorWireframe, - radii=0.002, - ), - static=True, - ) - rr.log( - f"{prefix}/pin", - rr.LineStrips3D(strips=pin_strips, colors=colors, radii=[0.005]), - static=True, - ) - rr.log( - f"{prefix}/label", - rr.Points3D(positions=label_positions, labels=labels, colors=colors, radii=[0.001] * n), - static=True, - ) - - -def _accumulate( - obs_iter: Iterable[Observation[PointCloud2]], - *, - voxel: float, - block_count: int, - device: str, - graph: PoseGraph | None = None, - progress_cb: Callable[[Observation[Any]], None] | None = None, -) -> PointCloud2 | None: - """Accumulate a voxel map from `obs_iter`, optionally PGO-correcting each frame. - - Returns the final ``PointCloud2`` (or ``None`` if the input was empty). - Disposal of the underlying ``VoxelGrid`` is handled by ``VoxelMapTransformer``. - """ - from dimos.mapping.voxels import VoxelMapTransformer - - def prepared() -> Iterable[Observation[PointCloud2]]: - for obs in obs_iter: - if progress_cb is not None: - progress_cb(obs) - if len(obs.data) == 0: - continue - if graph is not None: - if obs.pose_tuple is None: - continue - yield obs.derive(data=obs.data.transform(graph.correction_at(obs.ts))) - else: - yield obs - - vmt = VoxelMapTransformer( - emit_every=0, # batch mode: emit once on exhaustion - voxel_size=voxel, - block_count=block_count, - device=device, - ) - result = next(iter(vmt(iter(prepared()))), None) - return result.data if result is not None else None - - -def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: - seen = 0 - wall_start: float | None = None - last_wall: float | None = None - first_ts: float | None = None - - def _progress(obs: Observation[Any]) -> None: - nonlocal seen, wall_start, last_wall, first_ts - now = time.monotonic() - if wall_start is None: - wall_start = now - first_ts = obs.ts - assert first_ts is not None # narrowed by the same `if` above - frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 - last_wall = now - seen += 1 - pct = 100 * seen // total if total else 100 - wall = now - wall_start - data = obs.ts - first_ts - speed = data / wall if wall > 0 else 0.0 - end = "\n" if seen >= total else "" - prefix = f"{label} " if label else "" - print( - f"\r{prefix}{pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", - end=end, - flush=True, - ) - - return _progress - - -def main( - dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), - voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"), - device: str = typer.Option( - "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" - ), - pgo: bool = typer.Option( - False, - "--pgo", - help="Run pose graph optimization and rebuild from spatially-deduped frames", - ), - pgo_tol: float = typer.Option( - 0.3, - "--pgo-tol", - help="Spatial dedup tolerance (meters); applies to both raw and --pgo maps", - ), - block_count: int = typer.Option( - 2_000_000, "--block-count", help="VoxelBlockGrid capacity (raw and PGO rebuilds)" - ), - export: bool = typer.Option( - False, - "--export", - help="Export PGO map to ./.pc2.lcm in cwd (implies --pgo)", - ), - full_pgo: bool = typer.Option( - False, - "--full-pgo", - help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", - ), - no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), - markers: bool = typer.Option( - False, - "--markers", - help="Detect AprilTag markers in color_image and overlay them in rerun", - ), - camera_info: Path | None = typer.Option( - None, - "--camera-info", - help="YAML calibration file for --markers; defaults to Go2 builtin", - ), - marker_size: float = typer.Option( - 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" - ), - marker_max_speed: float = typer.Option( - 0.5, - "--marker-max-speed", - help="Skip frames where robot is moving faster than this (m/s); 0 disables", - ), - marker_max_rot_rate: float = typer.Option( - 50.0, - "--marker-max-rot-rate", - help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", - ), - marker_quality_window: float = typer.Option( - 0.1, - "--marker-quality-window", - help="Sharpest-frame window for marker detection (s)", - ), - marker_smoothing: float = typer.Option( - 7.5, - "--marker-smoothing", - help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", - ), -) -> None: - """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" - from dimos.mapping.loop_closure.pgo import PGO - from dimos.memory2.store.sqlite import SqliteStore - from dimos.memory2.transform import QualityWindow, SpeedLimit - from dimos.memory2.vis.color import Color - from dimos.msgs.geometry_msgs.Transform import Transform - from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo - from dimos.msgs.sensor_msgs.Image import Image - from dimos.perception.fiducial.marker_transformer import DetectMarkers - from dimos.robot.unitree.go2.connection import _camera_info_static - from dimos.utils.data import resolve_named_path - from dimos.visualization.rerun.init import rerun_init - - db_path = resolve_named_path(dataset, ".db") - if export or full_pgo: - pgo = True - - store = SqliteStore(path=db_path) - lidar = store.streams.lidar - - print(lidar.summary()) - - total = lidar.count() - - # Spatial dedup: bucket frames by 3D cell using the raw pose, keep the - # latest per cell. Shared by raw and PGO rebuilds. Doesn't touch obs.data - # so it stays cheap (no pointcloud loading). - seen: dict[tuple[int, int, int], Observation[Any]] = {} - for obs in lidar: - pose = obs.pose - if pose is None: - continue - # Reject placeholder poses: zero translation OR uninitialized rotation. - # Same condition as pgo_keyframes so dedup and PGO see the same frames. - if pose.position.is_zero() or pose.orientation.is_zero(): - continue - t = pose.position - # math.floor so negative coords bucket consistently; int() truncates - # toward zero and silently folds -0.5 and 0.5 into the same cell. - cell = (math.floor(t.x / pgo_tol), math.floor(t.y / pgo_tol), math.floor(t.z / pgo_tol)) - seen[cell] = obs - - n_kept = len(seen) - pct = 100 * n_kept / total if total else 0 - print(f"dedup: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") - - # Dict insertion order = lidar iteration order = chronological. - # `seen` only contains entries with non-None poses (filtered above). - path: list[tuple[float, float, float]] = [ - (p[0], p[1], p[2]) for obs in seen.values() if (p := obs.pose_tuple) is not None - ] - - pgo_map = None - pgo_path: list[tuple[float, float, float]] = [] - graph: PoseGraph | None = None - if pgo: - print("running PGO twopass map...") - prog = progress(total, "pgo pass 1 (optimizing)") - graph = lidar.tap(prog).transform(PGO()).last().data - - pgo_path = [ - (kf.optimized.translation.x, kf.optimized.translation.y, kf.optimized.translation.z) - for kf in graph.keyframes - ] - - pgo_map = _accumulate( - seen.values(), - voxel=voxel, - block_count=block_count, - device=device, - graph=graph, - progress_cb=progress(n_kept, "pgo pass 2 (rebuilding)"), - ) - - full_pgo_map = None - if full_pgo: - assert graph is not None - full_pgo_map = _accumulate( - lidar, - voxel=voxel, - block_count=block_count, - device=device, - graph=graph, - progress_cb=progress(total, "full pgo (rebuilding)"), - ) - - # Raw map: same dedup'd frames, no PGO correction. - global_map = _accumulate( - seen.values(), - voxel=voxel, - block_count=block_count, - device=device, - progress_cb=progress(n_kept, "reconstructing global map"), - ) - - marker_dets: list[Observation[Any]] = [] - if markers: - # Image observations in dimos recordings are stamped with - # frame_id="camera_optical", so obs.pose is already optical-in-world - # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). - # No mount composition needed. - color_image = store.stream("color_image", Image) - cam_info = CameraInfo.from_yaml(str(camera_info)) if camera_info else _camera_info_static() - xf = DetectMarkers( - camera_info=cam_info, - marker_length_m=marker_size, - smoothing_window=marker_smoothing, - ) - # Keep the sharpest frame per --marker-quality-window window, then - # drop frames where the robot was moving (linear + rotational) faster - # than the limits. Defaults match markers_rrd.py so positions agree. - pipeline: Stream[Image] = color_image.tap( - progress(color_image.count(), "detecting markers") - ).transform(QualityWindow(lambda img: img.sharpness, window=marker_quality_window)) - if marker_max_speed > 0: - pipeline = pipeline.transform( - SpeedLimit( - max_mps=marker_max_speed, - max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, - ) - ) - all_dets = pipeline.transform(xf).to_list() - if marker_smoothing > 0: - # Keep only the latest emission per track_id — that's the most - # averaged pose, drawn once per tracked marker session. - by_track: dict[int, Observation[Any]] = {} - for d in all_dets: - by_track[d.data.track_id] = d - marker_dets = list(by_track.values()) - else: - marker_dets = all_dets - unique_ids = sorted({obs.data.marker_id for obs in marker_dets}) - print( - f"markers: {len(marker_dets)} entries from {len(all_dets)} raw detections " - f"across {len(unique_ids)} unique ids {unique_ids}" - ) - - if not no_gui: - rerun_init("dimos map tool", spawn=True) - rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - if global_map is not None: - rr.log( - "world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True - ) - if path: - rr.log( - "world/raw_map/path", - rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[PATH_THICKNESS]), - static=True, - ) - if pgo_map is not None: - rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(voxel_size=voxel / 2), static=True) - if full_pgo_map is not None: - rr.log( - "world/full_pgo_map/pointcloud", - full_pgo_map.to_rerun(voxel_size=voxel / 2), - static=True, - ) - if pgo_path: - rr.log( - "world/pgo_map/path", - rr.LineStrips3D( - strips=[pgo_path], colors=[[255, 255, 255]], radii=[PATH_THICKNESS] - ), - static=True, - ) - rr.log( - "world/pgo_map/pgo/keyframes", - rr.Points3D(positions=pgo_path, colors=[[255, 0, 0]], radii=[0.025]), - static=True, - ) - if graph is not None and graph.loops: - loop_strips = [ - [ - (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), - (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), - ] - for lc in graph.loops - ] - rr.log( - "world/pgo_map/pgo/loop_closures", - rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.025]), - static=True, - ) - if marker_dets: - half = marker_size / 2.0 - n = len(marker_dets) - fill_half = [(half, half, 0.005)] * n - # Outline sits just outside the fill so both stay visible. - outline_bump = marker_size * 0.05 - outline_half = [(half + outline_bump, half + outline_bump, 0.006)] * n - raw_centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets] - raw_quats = [ - ( - d.data.orientation.x, - d.data.orientation.y, - d.data.orientation.z, - d.data.orientation.w, - ) - for d in marker_dets - ] - # One entry per tracked marker session — color stable per track_id. - colors = [ - Color.from_cmap("tab10", (d.data.track_id % 10) / 10.0).rgb_u8() - for d in marker_dets - ] - labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] - - _log_markers( - "world/raw_map/markers", - raw_centers, - raw_quats, - fill_half=fill_half, - outline_half=outline_half, - colors=colors, - labels=labels, - ) - - if graph is not None: - # PGO-correct each raw marker pose: lift it from world_raw - # into world_corrected so it lines up with pgo_map. - pgo_centers: list[tuple[float, float, float]] = [] - pgo_quats: list[tuple[float, float, float, float]] = [] - for d in marker_dets: - raw_tf = Transform( - translation=d.data.center, - rotation=d.data.orientation, - frame_id="world", - child_frame_id=f"marker_{d.data.marker_id}", - ts=d.ts, - ) - corrected = graph.correct(raw_tf) - pgo_centers.append( - ( - corrected.translation.x, - corrected.translation.y, - corrected.translation.z, - ) - ) - pgo_quats.append( - ( - corrected.rotation.x, - corrected.rotation.y, - corrected.rotation.z, - corrected.rotation.w, - ) - ) - _log_markers( - "world/pgo_map/markers", - pgo_centers, - pgo_quats, - fill_half=fill_half, - outline_half=outline_half, - colors=colors, - labels=labels, - ) - - if export and pgo_map is not None: - out_path = Path.cwd() / f"{db_path.stem}.pc2.lcm" - print(f"exporting PGO twopass map to {out_path}...") - out_path.write_bytes(pgo_map.lcm_encode()) - print(f"wrote {out_path}") - print() - print("load back with:") - print(" from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2") - print(f' pcd = PointCloud2.lcm_decode(open("{out_path.name}", "rb").read())') - - -if __name__ == "__main__": - typer.run(main) From 0cb908642acf7e6997ea2cfc4bcc50300fcf8415 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:15:20 +0800 Subject: [PATCH 49/64] added replay --- dimos/mapping/utils/dataset_validation.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dimos/mapping/utils/dataset_validation.md b/dimos/mapping/utils/dataset_validation.md index ed83a39ba8..599e1c5046 100644 --- a/dimos/mapping/utils/dataset_validation.md +++ b/dimos/mapping/utils/dataset_validation.md @@ -10,7 +10,12 @@ Stream("lidar"): 6025 items, 2026-05-29 23:32:55 — 2026-05-29 23:45:57 (782.3s Stream("odom"): 14630 items, 2026-05-29 23:32:55 — 2026-05-29 23:45:57 (782.3s) ``` -Shows which streams are in the database +Shows which streams are in the database. You can replay messages in rerun: + +```sh +dimos map replay recording_go2_mid360_2026-05-29_4-45pm-PST.db --duration 60 +``` + ```sh dimos map global recording_go2_mid360_2026-05-29_4-45pm-PST_corrected --voxel 0.1 --lidar lidar From 79393925091eae7bc232981c23fd16a941d48223 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:15:55 +0800 Subject: [PATCH 50/64] fix to cmdline --- dimos/mapping/utils/dataset_validation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/mapping/utils/dataset_validation.md b/dimos/mapping/utils/dataset_validation.md index 599e1c5046..5f32853ddf 100644 --- a/dimos/mapping/utils/dataset_validation.md +++ b/dimos/mapping/utils/dataset_validation.md @@ -18,7 +18,7 @@ dimos map replay recording_go2_mid360_2026-05-29_4-45pm-PST.db --duration 60 ```sh -dimos map global recording_go2_mid360_2026-05-29_4-45pm-PST_corrected --voxel 0.1 --lidar lidar +dimos map global recording_go2_mid360_2026-05-29_4-45pm-PST_corrected --voxel 0.1 --lidar fastlio_lidar ``` validates that livox lidar observations have correct poses associated coming from livox odometry From 5c9e8874084729634dd7101b64d19f6113233bff Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:41:14 +0800 Subject: [PATCH 51/64] map replay: add --map-final and --map-carve-columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --map-final logs a single static accumulated voxel map of the whole recording (world/lidar_map, world/fastlio_map), independent of the growing --map. --map-carve-columns (off by default) clears the Z column under each surface voxel — useful for forward-facing lidar like the Go2 L1 — and applies to both --map and --map-final on the primary lidar; spherical fastlio_lidar is never carved. --- dimos/mapping/utils/replay.py | 57 ++++++++++++++++++++++++++++----- dimos/robot/cli/test_map_cli.py | 20 ++++++++++++ 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/dimos/mapping/utils/replay.py b/dimos/mapping/utils/replay.py index 7527683f2b..fddb8f815c 100644 --- a/dimos/mapping/utils/replay.py +++ b/dimos/mapping/utils/replay.py @@ -18,9 +18,11 @@ their entity path (no parent transform). Entities written: - ``world/lidar`` — Go2 L1 per-frame point cloud -- ``world/lidar_voxels`` — accumulated voxel map of the primary lidar (``--map``) +- ``world/lidar_voxels`` — growing voxel map of the primary lidar (``--map``) +- ``world/lidar_map`` — single static voxel map of the primary lidar (``--map-final``) - ``world/fastlio_lidar`` — fastlio_lidar raw cloud (if present) -- ``world/fastlio_voxels``— accumulated voxel map of fastlio_lidar (``--map``) +- ``world/fastlio_voxels``— growing voxel map of fastlio_lidar (``--map``) +- ``world/fastlio_map`` — single static voxel map of fastlio_lidar (``--map-final``) - ``world/fastlio`` — fastlio_odometry pose axis (if present) - ``world/fastlio_path`` — fastlio_odometry trajectory (growing LineStrips3D) - ``world/odom`` — Go2 onboard odom pose axis (if present) @@ -31,6 +33,7 @@ Usage: uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd --map + uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd --map-final rerun map.rrd """ @@ -172,13 +175,24 @@ def main( map: bool = typer.Option( False, "--map", - help="Accumulate each lidar stream into a VoxelGrid and log only the final map", + help="Accumulate each lidar stream into a VoxelGrid, logging a growing map over the timeline", + ), + map_final: bool = typer.Option( + False, + "--map-final", + help="Log a single static accumulated map of the whole recording (independent of --map)", + ), + map_carve_columns: bool = typer.Option( + False, + "--map-carve-columns/--no-map-carve-columns", + help="Clear the full Z column under each new voxel, keeping only the latest surface " + "(good for forward-facing lidar like the Go2 L1); --map/--map-final only", ), map_voxel: float = typer.Option( - 0.05, "--map-voxel", help="Voxel size for the accumulated map (m); --map only" + 0.05, "--map-voxel", help="Voxel size for the accumulated map (m); --map/--map-final only" ), map_device: str = typer.Option( - "CUDA:0", "--map-device", help="Open3D device for the VoxelGrid; --map only" + "CUDA:0", "--map-device", help="Open3D device for the VoxelGrid; --map/--map-final only" ), map_emit_every: int = typer.Option( 10, @@ -239,15 +253,16 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: _log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, point_mode) # ---- accumulated voxel maps (--map only) ---- - # Go2 L1 forward-facing → column carving on. - # Mid360 spherical → column carving off, just aggregate. + # --map-carve-columns clears the Z column under each surface voxel + # (good for forward-facing lidar like the Go2 L1). fastlio_lidar is + # spherical (Mid360) → never carve, just aggregate. if map: grid_kwargs = {"voxel_size": map_voxel, "device": map_device, "show_startup_log": False} _log_clouds( " lidar_voxels", lidar.transform( VoxelMapTransformer( - emit_every=map_emit_every, carve_columns=True, **grid_kwargs + emit_every=map_emit_every, carve_columns=map_carve_columns, **grid_kwargs ) ), "world/lidar_voxels", @@ -269,6 +284,32 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: total=max(1, livox.count() // max(map_emit_every, 1)), ) + # ---- single static accumulated map (--map-final only) ---- + # Guard on the raw source's exists() (cheap limit(1)) so the voxel + # accumulation (emit_every=0 → one obs at exhaustion) runs only once. + if map_final: + grid_kwargs = {"voxel_size": map_voxel, "device": map_device, "show_startup_log": False} + if lidar.exists(): + final = lidar.transform( + VoxelMapTransformer( + emit_every=0, carve_columns=map_carve_columns, **grid_kwargs + ) + ).last() + rr.log( + "world/lidar_map", + final.data.to_rerun(voxel_size=voxel, mode=point_mode), + static=True, + ) + if livox is not None and livox.exists(): + final_livox = livox.transform( + VoxelMapTransformer(emit_every=0, carve_columns=False, **grid_kwargs) + ).last() + rr.log( + "world/fastlio_map", + final_livox.data.to_rerun(voxel_size=voxel, mode=point_mode), + static=True, + ) + # ---- fastlio pose axis + path from fastlio_odometry stream ---- if "fastlio_odometry" in store.streams: odometry = clipped("fastlio_odometry", Odometry) diff --git a/dimos/robot/cli/test_map_cli.py b/dimos/robot/cli/test_map_cli.py index 4ec235d341..3714a9619f 100644 --- a/dimos/robot/cli/test_map_cli.py +++ b/dimos/robot/cli/test_map_cli.py @@ -137,6 +137,26 @@ def test_replay_marker_snippet(dataset: str, tmp_path: Path) -> None: assert out.exists() and out.stat().st_size > 0 +def test_replay_map_final_snippet(dataset: str, tmp_path: Path) -> None: + out = tmp_path / "replay_map.rrd" + # CPU device keeps the voxel accumulation runnable without a GPU. + res = _run( + "replay", + dataset, + "--out", + str(out), + "--no-gui", + "--duration", + str(DURATION), + "--map-final", + "--map-carve-columns", + "--map-device", + "CPU:0", + ) + assert res.returncode == 0, res.stderr + assert out.exists() and out.stat().st_size > 0 + + def test_global_snippet(dataset: str, tmp_path: Path) -> None: out = tmp_path / "global.rrd" # CPU device + small block budget keeps this runnable without a GPU. From 1452327ec960babc2b15398afa3b9be7ce49d2a8 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:54:31 +0800 Subject: [PATCH 52/64] map replay: --voxel sets mapper grid resolution, drop --map-voxel One voxel knob: --voxel now sets the VoxelGrid resolution for --map/--map-final, and the rerun rendering uses the same size. Removes the redundant --map-voxel. --- dimos/mapping/utils/replay.py | 112 ++++++++++++++++------------------ 1 file changed, 51 insertions(+), 61 deletions(-) diff --git a/dimos/mapping/utils/replay.py b/dimos/mapping/utils/replay.py index fddb8f815c..75b9eba5c3 100644 --- a/dimos/mapping/utils/replay.py +++ b/dimos/mapping/utils/replay.py @@ -18,11 +18,9 @@ their entity path (no parent transform). Entities written: - ``world/lidar`` — Go2 L1 per-frame point cloud -- ``world/lidar_voxels`` — growing voxel map of the primary lidar (``--map``) -- ``world/lidar_map`` — single static voxel map of the primary lidar (``--map-final``) - ``world/fastlio_lidar`` — fastlio_lidar raw cloud (if present) -- ``world/fastlio_voxels``— growing voxel map of fastlio_lidar (``--map``) -- ``world/fastlio_map`` — single static voxel map of fastlio_lidar (``--map-final``) +- ``world/_voxels`` — growing voxel map, one per PointCloud2 stream (``--map``) +- ``world/_map`` — single static voxel map, one per PointCloud2 stream (``--map-final``) - ``world/fastlio`` — fastlio_odometry pose axis (if present) - ``world/fastlio_path`` — fastlio_odometry trajectory (growing LineStrips3D) - ``world/odom`` — Go2 onboard odom pose axis (if present) @@ -162,7 +160,9 @@ def main( None, "--duration", help="Use only N seconds from --seek (default: to the end)" ), voxel: float = typer.Option( - 0.05, "--voxel", help="Voxel size hint for the point cloud renderer" + 0.05, + "--voxel", + help="Voxel grid resolution (m) for --map/--map-final; rendering follows the same size", ), point_mode: str = typer.Option( "spheres", "--point-mode", help="Render mode: 'spheres', 'boxes', or 'points'" @@ -182,15 +182,17 @@ def main( "--map-final", help="Log a single static accumulated map of the whole recording (independent of --map)", ), + map_source: list[str] = typer.Option( + [], + "--map-source", + help="PointCloud2 stream(s) to map; repeatable. Default: all PointCloud2 streams", + ), map_carve_columns: bool = typer.Option( False, "--map-carve-columns/--no-map-carve-columns", help="Clear the full Z column under each new voxel, keeping only the latest surface " "(good for forward-facing lidar like the Go2 L1); --map/--map-final only", ), - map_voxel: float = typer.Option( - 0.05, "--map-voxel", help="Voxel size for the accumulated map (m); --map/--map-final only" - ), map_device: str = typer.Option( "CUDA:0", "--map-device", help="Open3D device for the VoxelGrid; --map/--map-final only" ), @@ -201,6 +203,7 @@ def main( ), ) -> None: """Dump a recording to .rrd (lidar clouds + camera frames) and open it in rerun.""" + from dimos.mapping.utils.summary import _stream_payload_types from dimos.mapping.voxels import VoxelMapTransformer from dimos.memory2.store.sqlite import SqliteStore from dimos.memory2.transform import throttle @@ -216,6 +219,13 @@ def main( out = Path.cwd() / f"{db_path.stem}.rrd" cam_info = _camera_info_static() + # Resolve which streams to voxelize: all PointCloud2 streams, or the + # explicit --map-source subset. Validate up front so typos fail fast. + pc_streams = [n for n, t in _stream_payload_types(db_path).items() if t is PointCloud2] + map_sources = list(map_source) or pc_streams + if (map or map_final) and (bad := [s for s in map_sources if s not in pc_streams]): + raise typer.BadParameter(f"--map-source: not PointCloud2 stream(s): {', '.join(bad)}") + rr.init("dimos map_rrd", recording_id=db_path.stem) rr.save(str(out)) register_colormap_annotation("turbo") @@ -252,63 +262,43 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: if livox is not None: _log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, point_mode) - # ---- accumulated voxel maps (--map only) ---- - # --map-carve-columns clears the Z column under each surface voxel - # (good for forward-facing lidar like the Go2 L1). fastlio_lidar is - # spherical (Mid360) → never carve, just aggregate. - if map: - grid_kwargs = {"voxel_size": map_voxel, "device": map_device, "show_startup_log": False} - _log_clouds( - " lidar_voxels", - lidar.transform( - VoxelMapTransformer( - emit_every=map_emit_every, carve_columns=map_carve_columns, **grid_kwargs + # ---- accumulated voxel maps over the selected PointCloud2 streams ---- + # --map logs a growing map per stream; --map-final logs one static map + # per stream. --map-carve-columns clears the Z column under each surface + # voxel (good for forward-facing lidar like the Go2 L1); off by default. + if map or map_final: + grid_kwargs = {"voxel_size": voxel, "device": map_device, "show_startup_log": False} + for name in map_sources: + src = clipped(name, PointCloud2) + if not src.exists(): + continue + if map: + _log_clouds( + f"{name}_voxels", + src.transform( + VoxelMapTransformer( + emit_every=map_emit_every, + carve_columns=map_carve_columns, + **grid_kwargs, + ) + ), + f"world/{name}_voxels", + voxel, + point_mode, + total=max(1, src.count() // max(map_emit_every, 1)), ) - ), - "world/lidar_voxels", - voxel, - point_mode, - total=max(1, lidar.count() // max(map_emit_every, 1)), - ) - if livox is not None: - _log_clouds( - "fastlio_voxels", - livox.transform( + if map_final: + # emit_every=0 → one accumulated obs at exhaustion + final = src.transform( VoxelMapTransformer( - emit_every=map_emit_every, carve_columns=False, **grid_kwargs + emit_every=0, carve_columns=map_carve_columns, **grid_kwargs ) - ), - "world/fastlio_voxels", - voxel, - point_mode, - total=max(1, livox.count() // max(map_emit_every, 1)), - ) - - # ---- single static accumulated map (--map-final only) ---- - # Guard on the raw source's exists() (cheap limit(1)) so the voxel - # accumulation (emit_every=0 → one obs at exhaustion) runs only once. - if map_final: - grid_kwargs = {"voxel_size": map_voxel, "device": map_device, "show_startup_log": False} - if lidar.exists(): - final = lidar.transform( - VoxelMapTransformer( - emit_every=0, carve_columns=map_carve_columns, **grid_kwargs + ).last() + rr.log( + f"world/{name}_map", + final.data.to_rerun(voxel_size=voxel, mode=point_mode), + static=True, ) - ).last() - rr.log( - "world/lidar_map", - final.data.to_rerun(voxel_size=voxel, mode=point_mode), - static=True, - ) - if livox is not None and livox.exists(): - final_livox = livox.transform( - VoxelMapTransformer(emit_every=0, carve_columns=False, **grid_kwargs) - ).last() - rr.log( - "world/fastlio_map", - final_livox.data.to_rerun(voxel_size=voxel, mode=point_mode), - static=True, - ) # ---- fastlio pose axis + path from fastlio_odometry stream ---- if "fastlio_odometry" in store.streams: From 8864e4fc98be09974bd544e42607160e348316c4 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:57:16 +0800 Subject: [PATCH 53/64] map replay: render maps at voxel/2 for see-through voxels Drawing the accumulated maps at half the grid resolution leaves gaps between boxes, reading as semi-transparent (matches globalmap.py). --- dimos/mapping/utils/replay.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dimos/mapping/utils/replay.py b/dimos/mapping/utils/replay.py index 75b9eba5c3..0645618fb4 100644 --- a/dimos/mapping/utils/replay.py +++ b/dimos/mapping/utils/replay.py @@ -283,7 +283,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: ) ), f"world/{name}_voxels", - voxel, + voxel / 2, # render smaller than the grid → gaps read as transparency point_mode, total=max(1, src.count() // max(map_emit_every, 1)), ) @@ -296,7 +296,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: ).last() rr.log( f"world/{name}_map", - final.data.to_rerun(voxel_size=voxel, mode=point_mode), + final.data.to_rerun(voxel_size=voxel / 2, mode=point_mode), static=True, ) From 47ca2f8376b4caa751e637312fec1ef16ab6b68c Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 16:58:12 +0800 Subject: [PATCH 54/64] map replay: render maps at voxel/4 for lighter voxels --- dimos/mapping/utils/replay.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dimos/mapping/utils/replay.py b/dimos/mapping/utils/replay.py index 0645618fb4..8de4ebc93d 100644 --- a/dimos/mapping/utils/replay.py +++ b/dimos/mapping/utils/replay.py @@ -283,7 +283,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: ) ), f"world/{name}_voxels", - voxel / 2, # render smaller than the grid → gaps read as transparency + voxel / 4, # render smaller than the grid → gaps read as transparency point_mode, total=max(1, src.count() // max(map_emit_every, 1)), ) @@ -296,7 +296,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: ).last() rr.log( f"world/{name}_map", - final.data.to_rerun(voxel_size=voxel / 2, mode=point_mode), + final.data.to_rerun(voxel_size=voxel / 4, mode=point_mode), static=True, ) From e7156a40a669ee9d309e8ba6bc1e09b3c14a927d Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 17:10:10 +0800 Subject: [PATCH 55/64] mapper bugfix --- dimos/perception/fiducial/marker_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index 95b1b6b972..728baf7940 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -41,7 +41,7 @@ from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo from dimos.msgs.sensor_msgs.Image import Image from dimos.perception.detection.type.detection3d.marker import Detection3DMarker -from dimos.perception.fiducial.marker_tf_module import ( +from dimos.perception.fiducial.marker_pose import ( camera_info_to_cv_matrices, create_aruco_detector, estimate_marker_pose, From 0a1169ffc578172bec2ed6b5d1b92c502273a609 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 17:35:57 +0800 Subject: [PATCH 56/64] map cli: move mapping/utils CLI verbs into mapping/utils/cli Group the map CLI scripts (globalmap, pose_fill, rename, replay, replay_marker, summary) plus their assets and dataset_validation doc under a cli/ subpackage. distance.py stays in utils/ as a shared lib. The map CLI test moves alongside as cli/test_cli.py. --- dimos/mapping/loop_closure/autoresearch/program.md | 2 +- dimos/mapping/loop_closure/utils/autotrim.py | 2 +- .../mapping/utils/{ => cli}/assets/fastlio_lidar.png | 0 dimos/mapping/utils/{ => cli}/assets/go2_lidar.png | 0 .../utils/{ => cli}/assets/markers_fastlio.png | 0 dimos/mapping/utils/{ => cli}/assets/markers_go2.png | 0 dimos/mapping/utils/{ => cli}/dataset_validation.md | 0 dimos/mapping/utils/{ => cli}/globalmap.py | 2 +- dimos/mapping/utils/{ => cli}/pose_fill.py | 0 dimos/mapping/utils/{ => cli}/rename.py | 4 ++-- dimos/mapping/utils/{ => cli}/replay.py | 12 ++++++------ dimos/mapping/utils/{ => cli}/replay_marker.py | 4 ++-- dimos/mapping/utils/{ => cli}/summary.py | 2 +- .../utils/cli/test_cli.py} | 0 dimos/robot/cli/dimos.py | 12 ++++++------ 15 files changed, 20 insertions(+), 20 deletions(-) rename dimos/mapping/utils/{ => cli}/assets/fastlio_lidar.png (100%) rename dimos/mapping/utils/{ => cli}/assets/go2_lidar.png (100%) rename dimos/mapping/utils/{ => cli}/assets/markers_fastlio.png (100%) rename dimos/mapping/utils/{ => cli}/assets/markers_go2.png (100%) rename dimos/mapping/utils/{ => cli}/dataset_validation.md (100%) rename dimos/mapping/utils/{ => cli}/globalmap.py (99%) rename dimos/mapping/utils/{ => cli}/pose_fill.py (100%) rename dimos/mapping/utils/{ => cli}/rename.py (97%) rename dimos/mapping/utils/{ => cli}/replay.py (97%) rename dimos/mapping/utils/{ => cli}/replay_marker.py (98%) rename dimos/mapping/utils/{ => cli}/summary.py (96%) rename dimos/{robot/cli/test_map_cli.py => mapping/utils/cli/test_cli.py} (100%) diff --git a/dimos/mapping/loop_closure/autoresearch/program.md b/dimos/mapping/loop_closure/autoresearch/program.md index 55791f748d..e543485d6a 100644 --- a/dimos/mapping/loop_closure/autoresearch/program.md +++ b/dimos/mapping/loop_closure/autoresearch/program.md @@ -79,7 +79,7 @@ investigate *why* the metric moved (or didn't): `uv run python -m dimos.mapping.loop_closure.eval hk_village2 -v`. A param change that, say, *adds* loop closures but worsens spread tells a very different story than one that *removes* them. -- **Use `dimos/mapping/utils/globalmap.py`** as the visualization companion. It +- **Use `dimos/mapping/utils/cli/globalmap.py`** as the visualization companion. It runs the same PGO + marker pipeline as the eval and renders the result in rerun. Use `--no-gui` for headless inspection or run with GUI locally to scrub the timeline and see where markers cluster, where diff --git a/dimos/mapping/loop_closure/utils/autotrim.py b/dimos/mapping/loop_closure/utils/autotrim.py index 1eb16bc153..c93094bc1b 100644 --- a/dimos/mapping/loop_closure/utils/autotrim.py +++ b/dimos/mapping/loop_closure/utils/autotrim.py @@ -45,7 +45,7 @@ def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: - """Matches dimos/mapping/utils/globalmap.py:progress — kept inline to avoid pulling rerun.""" + """Matches dimos/mapping/utils/cli/globalmap.py:progress — kept inline to avoid pulling rerun.""" seen = 0 wall_start: float | None = None last_wall: float | None = None diff --git a/dimos/mapping/utils/assets/fastlio_lidar.png b/dimos/mapping/utils/cli/assets/fastlio_lidar.png similarity index 100% rename from dimos/mapping/utils/assets/fastlio_lidar.png rename to dimos/mapping/utils/cli/assets/fastlio_lidar.png diff --git a/dimos/mapping/utils/assets/go2_lidar.png b/dimos/mapping/utils/cli/assets/go2_lidar.png similarity index 100% rename from dimos/mapping/utils/assets/go2_lidar.png rename to dimos/mapping/utils/cli/assets/go2_lidar.png diff --git a/dimos/mapping/utils/assets/markers_fastlio.png b/dimos/mapping/utils/cli/assets/markers_fastlio.png similarity index 100% rename from dimos/mapping/utils/assets/markers_fastlio.png rename to dimos/mapping/utils/cli/assets/markers_fastlio.png diff --git a/dimos/mapping/utils/assets/markers_go2.png b/dimos/mapping/utils/cli/assets/markers_go2.png similarity index 100% rename from dimos/mapping/utils/assets/markers_go2.png rename to dimos/mapping/utils/cli/assets/markers_go2.png diff --git a/dimos/mapping/utils/dataset_validation.md b/dimos/mapping/utils/cli/dataset_validation.md similarity index 100% rename from dimos/mapping/utils/dataset_validation.md rename to dimos/mapping/utils/cli/dataset_validation.md diff --git a/dimos/mapping/utils/globalmap.py b/dimos/mapping/utils/cli/globalmap.py similarity index 99% rename from dimos/mapping/utils/globalmap.py rename to dimos/mapping/utils/cli/globalmap.py index 6518ecf04f..f53f69923a 100644 --- a/dimos/mapping/utils/globalmap.py +++ b/dimos/mapping/utils/cli/globalmap.py @@ -479,7 +479,7 @@ def main( color_image = store.stream("color_image", Image).clip(seek, duration) n_images = color_image.count() if image_pose is not None: - from dimos.mapping.utils.pose_fill import pose_fill + from dimos.mapping.utils.cli.pose_fill import pose_fill src_pose: Stream[Any] = store.stream(image_pose).clip(seek, duration) print(f"re-posing color_image from {image_pose!r} + camera optical mount") diff --git a/dimos/mapping/utils/pose_fill.py b/dimos/mapping/utils/cli/pose_fill.py similarity index 100% rename from dimos/mapping/utils/pose_fill.py rename to dimos/mapping/utils/cli/pose_fill.py diff --git a/dimos/mapping/utils/rename.py b/dimos/mapping/utils/cli/rename.py similarity index 97% rename from dimos/mapping/utils/rename.py rename to dimos/mapping/utils/cli/rename.py index afabff0f46..c628dd03d5 100644 --- a/dimos/mapping/utils/rename.py +++ b/dimos/mapping/utils/cli/rename.py @@ -20,7 +20,7 @@ immediately. Streams not mentioned in ``--rename`` are copied verbatim. Usage: - uv run python -m dimos.mapping.utils.rename mid360 \\ + uv run python -m dimos.mapping.utils.cli.rename mid360 \\ --out mid360_renamed.db \\ --rename go2_lidar=lidar \\ --rename lidar=fastlio_lidar \\ @@ -46,7 +46,7 @@ def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: - """Matches dimos/mapping/utils/globalmap.py:progress — kept inline to avoid pulling rerun.""" + """Matches dimos/mapping/utils/cli/globalmap.py:progress — kept inline to avoid pulling rerun.""" seen = 0 wall_start: float | None = None last_wall: float | None = None diff --git a/dimos/mapping/utils/replay.py b/dimos/mapping/utils/cli/replay.py similarity index 97% rename from dimos/mapping/utils/replay.py rename to dimos/mapping/utils/cli/replay.py index 8de4ebc93d..6aa5e5da3b 100644 --- a/dimos/mapping/utils/replay.py +++ b/dimos/mapping/utils/cli/replay.py @@ -29,9 +29,9 @@ - ``world/camera/image`` — color_image frames Usage: - uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd - uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd --map - uv run python -m dimos.mapping.utils.replay mid360 --out map.rrd --map-final + uv run python -m dimos.mapping.utils.cli.replay mid360 --out map.rrd + uv run python -m dimos.mapping.utils.cli.replay mid360 --out map.rrd --map + uv run python -m dimos.mapping.utils.cli.replay mid360 --out map.rrd --map-final rerun map.rrd """ @@ -48,7 +48,7 @@ # Heavy dimos imports (mapping/memory2 → torch, scipy, open3d) are deferred into # main() so that `dimos map --help` stays fast. See test_cli_startup.py and the -# same pattern in dimos/mapping/utils/globalmap.py. +# same pattern in dimos/mapping/utils/cli/globalmap.py. if TYPE_CHECKING: from dimos.memory2.stream import Stream from dimos.memory2.type.observation import Observation @@ -58,7 +58,7 @@ def _progress(total: int, label: str) -> Callable[[Observation[Any]], None]: - """Matches dimos/mapping/utils/globalmap.py:progress.""" + """Matches dimos/mapping/utils/cli/globalmap.py:progress.""" seen = 0 wall_start: float | None = None last_wall: float | None = None @@ -203,7 +203,7 @@ def main( ), ) -> None: """Dump a recording to .rrd (lidar clouds + camera frames) and open it in rerun.""" - from dimos.mapping.utils.summary import _stream_payload_types + from dimos.mapping.utils.cli.summary import _stream_payload_types from dimos.mapping.voxels import VoxelMapTransformer from dimos.memory2.store.sqlite import SqliteStore from dimos.memory2.transform import throttle diff --git a/dimos/mapping/utils/replay_marker.py b/dimos/mapping/utils/cli/replay_marker.py similarity index 98% rename from dimos/mapping/utils/replay_marker.py rename to dimos/mapping/utils/cli/replay_marker.py index 0510792476..ef4d80ca86 100644 --- a/dimos/mapping/utils/replay_marker.py +++ b/dimos/mapping/utils/cli/replay_marker.py @@ -20,7 +20,7 @@ - per detection: marker box in world frame, at the detection timestamp Usage: - uv run python -m dimos.mapping.utils.replay_marker hk_village1 --out hk.rrd + uv run python -m dimos.mapping.utils.cli.replay_marker hk_village1 --out hk.rrd rerun hk.rrd Throwaway script next to ``map.py``; remove once the apriltag reliability work @@ -38,7 +38,7 @@ # Heavy dimos imports (memory2/perception → torch, scipy, cv2) are deferred into # main() so that `dimos map --help` stays fast. See test_cli_startup.py and the -# same pattern in dimos/mapping/utils/globalmap.py. +# same pattern in dimos/mapping/utils/cli/globalmap.py. if TYPE_CHECKING: from dimos.memory2.stream import Stream diff --git a/dimos/mapping/utils/summary.py b/dimos/mapping/utils/cli/summary.py similarity index 96% rename from dimos/mapping/utils/summary.py rename to dimos/mapping/utils/cli/summary.py index 34a5721b37..6475f2d833 100644 --- a/dimos/mapping/utils/summary.py +++ b/dimos/mapping/utils/cli/summary.py @@ -15,7 +15,7 @@ """Print ``Store.summary()`` for a memory2 sqlite recording. Usage: - uv run python -m dimos.mapping.utils.summary mid360 + uv run python -m dimos.mapping.utils.cli.summary mid360 """ from __future__ import annotations diff --git a/dimos/robot/cli/test_map_cli.py b/dimos/mapping/utils/cli/test_cli.py similarity index 100% rename from dimos/robot/cli/test_map_cli.py rename to dimos/mapping/utils/cli/test_cli.py diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 4c92635570..126f783266 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -38,12 +38,12 @@ from dimos.core.daemon import daemonize, install_signal_handlers from dimos.core.global_config import GlobalConfig, global_config from dimos.core.run_registry import get_most_recent, is_pid_alive, stop_entry -from dimos.mapping.utils.globalmap import main as _map_main -from dimos.mapping.utils.pose_fill import main as _map_pose_fill_main -from dimos.mapping.utils.rename import main as _map_rename_main -from dimos.mapping.utils.replay import main as _map_replay_main -from dimos.mapping.utils.replay_marker import main as _map_replay_marker_main -from dimos.mapping.utils.summary import main as _map_summary_main +from dimos.mapping.utils.cli.globalmap import main as _map_main +from dimos.mapping.utils.cli.pose_fill import main as _map_pose_fill_main +from dimos.mapping.utils.cli.rename import main as _map_rename_main +from dimos.mapping.utils.cli.replay import main as _map_replay_main +from dimos.mapping.utils.cli.replay_marker import main as _map_replay_marker_main +from dimos.mapping.utils.cli.summary import main as _map_summary_main from dimos.robot.unitree.go2.cli.go2tool import app as go2tool_app from dimos.utils.logging_config import setup_logger from dimos.visualization.rerun.constants import RerunOpenOption From c91992ca45b0c894280ac9b8066d5691c4c03659 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 18:05:02 +0800 Subject: [PATCH 57/64] Revert branch-local changes to marker.py and marker_transformer.py These files were inadvertently diverged on this branch. Restore both to main; the map-marker CLI callers remain compatible with main's DetectMarkers API. --- .../detection/type/detection3d/marker.py | 26 +- .../perception/fiducial/marker_transformer.py | 298 +++++++++++++----- 2 files changed, 252 insertions(+), 72 deletions(-) diff --git a/dimos/perception/detection/type/detection3d/marker.py b/dimos/perception/detection/type/detection3d/marker.py index a162814d6f..9fd58cc6d8 100644 --- a/dimos/perception/detection/type/detection3d/marker.py +++ b/dimos/perception/detection/type/detection3d/marker.py @@ -19,6 +19,7 @@ import numpy as np +from dimos.msgs.vision_msgs.Detection3D import Detection3D from dimos.perception.detection.type.detection3d.bbox import Detection3DBBox @@ -36,7 +37,30 @@ class Detection3DMarker(Detection3DBBox): marker_id: int = -1 corners_px: np.ndarray = field(default_factory=lambda: np.zeros((4, 2), dtype=np.float32)) dictionary: str = "" + reprojection_error: float = 0.0 + + def __post_init__(self) -> None: + self.name = self.marker_label + + @property + def marker_label(self) -> str: + """Dictionary-qualified marker label used for display and wire class id.""" + return f"{self.dictionary}:{self.marker_id}" + + def to_detection3d_msg(self) -> Detection3D: + """Convert to a ROS Detection3D message, preserving marker identity.""" + msg = super().to_detection3d_msg() + msg.id = str(self.marker_id) + if msg.results: + msg.results[0].hypothesis.class_id = self.marker_label + msg.results_length = len(msg.results) + return msg def to_repr_dict(self) -> dict[str, Any]: parent = super().to_repr_dict() - return {**parent, "marker_id": str(self.marker_id), "dict": self.dictionary} + return { + **parent, + "marker_id": str(self.marker_id), + "dict": self.dictionary, + "reproj": f"{self.reprojection_error:.3f}px", + } diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index 728baf7940..e7dcaa95bd 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""ArUco / AprilTag detection as a memory2 transformer. +"""ArUco / AprilTag detection as memory2 transforms. -Wraps the pure helpers in :mod:`dimos.perception.fiducial.marker_tf_module` +Wraps :func:`dimos.perception.fiducial.marker_detect.detect_markers_in_image` and emits one :class:`Detection3DMarker` observation per detected marker, with -``.pose`` composed into world frame from the upstream observation's -camera-in-world pose. The companion module :class:`MarkerTfModule` remains -the right choice for live TF publication; this transformer is for offline / -mem2-stream composition. +``.pose`` composed into world frame from the upstream observation's camera pose. +This module also keeps marker smoothing helpers and ``MarkersPerFrame``, which +collapses marker fan-out back into one ``Detection3DArray`` per source image. +The companion module :class:`MarkerTfModule` remains the right choice for live +TF publication. Skips frames where the upstream observation has no ``.pose`` (debug log): without a camera-in-world pose, we can't honor the "always world-frame" @@ -28,9 +29,10 @@ from __future__ import annotations +from collections.abc import Callable import dataclasses import math -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, cast import numpy as np @@ -40,12 +42,16 @@ from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.vision_msgs.Detection3DArray import Detection3DArray +from dimos.perception.detection.type.detection3d.imageDetections3D import ImageDetections3D from dimos.perception.detection.type.detection3d.marker import Detection3DMarker +from dimos.perception.fiducial.marker_detect import ( + detect_markers_in_image as _detect_markers_in_image, +) from dimos.perception.fiducial.marker_pose import ( camera_info_to_cv_matrices, + camera_optical_frame_id, create_aruco_detector, - estimate_marker_pose, - rvec_tvec_to_transform, ) from dimos.types.timestamped import TimestampedBufferCollection from dimos.utils.logging_config import setup_logger @@ -58,6 +64,37 @@ logger = setup_logger() +CameraInfoSource = CameraInfo | Callable[[], CameraInfo | None] | None + + +def _camera_info_key(info: CameraInfo) -> tuple[Any, ...]: + """Return an intrinsics-only key for cached OpenCV calibration state.""" + return ( + info.width, + info.height, + info.distortion_model, + tuple(info.K), + tuple(info.D), + ) + + +def _pose_tuple_to_transform( + pose: tuple[float, float, float, float, float, float, float], + *, + frame_id: str, + child_frame_id: str, + ts: float, +) -> Transform: + x, y, z, qx, qy, qz, qw = pose + return Transform( + translation=Vector3(x, y, z), + rotation=Quaternion(qx, qy, qz, qw), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + + def _average_marker_pose( buffer: TimestampedBufferCollection[Detection3DMarker], ) -> tuple[Vector3, Quaternion]: @@ -99,11 +136,12 @@ class DetectMarkers(Transformer[Image, Detection3DMarker]): def __init__( self, - camera_info: CameraInfo, + camera_info: CameraInfoSource, marker_length_m: float, aruco_dictionary: str = "DICT_APRILTAG_36h11", world_frame: str = "world", smoothing_window: float = 0.0, + emit_empty_frames: bool = False, ) -> None: if marker_length_m <= 0: raise ValueError(f"marker_length_m must be > 0, got {marker_length_m}") @@ -114,8 +152,12 @@ def __init__( self.aruco_dictionary = aruco_dictionary self.world_frame = world_frame self.smoothing_window = smoothing_window + self.emit_empty_frames = emit_empty_frames self._detector = create_aruco_detector(aruco_dictionary) - self._cam_mtx, self._dist = camera_info_to_cv_matrices(camera_info) + self._camera_info_key: tuple[Any, ...] | None = None + self._resolved_camera_info: CameraInfo | None = None + self._cam_mtx: np.ndarray | None = None + self._dist: np.ndarray | None = None # Per marker_id sliding-window buffer of raw detections, used to emit # smoothed pose updates when ``smoothing_window > 0``. self._buffers: dict[int, TimestampedBufferCollection[Detection3DMarker]] = {} @@ -125,24 +167,43 @@ def __init__( self._marker_to_track: dict[int, int] = {} self._next_track_id = 0 + def _resolve_camera_info(self) -> CameraInfo | None: + source = self.camera_info + info = source() if callable(source) else source + if info is None: + return None + + key = _camera_info_key(info) + if key != self._camera_info_key: + self._cam_mtx, self._dist = camera_info_to_cv_matrices(info) + self._detector = create_aruco_detector(self.aruco_dictionary) + self._resolved_camera_info = info + self._camera_info_key = key + return info + def __call__( self, upstream: Iterator[Observation[Image]] ) -> Iterator[Observation[Detection3DMarker]]: - info = self.camera_info - marker_size = Vector3(self.marker_length_m, self.marker_length_m, 0.0) - for obs in upstream: - pose = obs.pose - if pose is None: + pose_tuple = obs.pose_tuple + if pose_tuple is None: logger.debug("DetectMarkers: obs %s has no .pose; skipping", obs.id) continue + info = self._resolve_camera_info() + if info is None: + logger.debug("DetectMarkers: no CameraInfo for obs %s; skipping", obs.id) + continue + assert self._cam_mtx is not None + assert self._dist is not None + image = obs.data - if ( + image_size_mismatch = ( info.width and info.height and (image.width != info.width or image.height != info.height) - ): + ) + if image_size_mismatch: logger.debug( "DetectMarkers: image %sx%s != CameraInfo %sx%s; skip", image.width, @@ -152,47 +213,42 @@ def __call__( ) continue - gray = image.to_grayscale().as_numpy() - corners, ids, _ = self._detector.detectMarkers(gray) - if ids is None or len(ids) == 0: - continue - - t_world_optical = Transform( - translation=pose.position, - rotation=pose.orientation, + optical_frame = camera_optical_frame_id(image, info) + t_world_optical = _pose_tuple_to_transform( + pose_tuple, frame_id=self.world_frame, - child_frame_id="optical", + child_frame_id=optical_frame, ts=obs.ts, ) - for corner_set, mid_arr in zip(corners, ids, strict=True): - mid = int(mid_arr[0]) - marker_pose = estimate_marker_pose( - corner_set, - self.marker_length_m, - self._cam_mtx, - self._dist, - distortion_model=info.distortion_model, - ) - if marker_pose is None: - continue - rvec, tvec = marker_pose - t_optical_marker = rvec_tvec_to_transform( - rvec, - tvec, - frame_id="optical", - child_frame_id=f"marker_{mid}", - ts=obs.ts, - ) - t_world_marker = t_world_optical + t_optical_marker + detections = _detect_markers_in_image( + image, + camera_info=info, + world_T_optical=t_world_optical, + marker_length_m=self.marker_length_m, + aruco_dictionary=self.aruco_dictionary, + world_frame=self.world_frame, + detector=self._detector, + camera_matrix=self._cam_mtx, + dist_coeffs=self._dist, + ) - corners_2d = corner_set.reshape(4, 2).astype(np.float32) - xy_min = corners_2d.min(axis=0) - xy_max = corners_2d.max(axis=0) - bbox = (float(xy_min[0]), float(xy_min[1]), float(xy_max[0]), float(xy_max[1])) + if not detections: + if self.emit_empty_frames: + yield cast( + "Observation[Detection3DMarker]", + obs.derive(data=None).tag( + marker_frame_image=image, + marker_frame_count=0, + ), + ) + continue - # Decide track_id (only meaningful when smoothing is on). - # Without smoothing, track_id == marker_id (legacy behavior). + marker_count = len(detections) + for marker_index, det in enumerate(detections): + mid = det.marker_id + # track_id is only for smoothing / mem2 tags; marker identity is marker_id. + # Without smoothing, use -1 (no temporal track), same as untracked 2D detections. if self.smoothing_window > 0: prior_buf = self._buffers.get(mid) prior_last = prior_buf.last() if prior_buf is not None else None @@ -201,28 +257,18 @@ def __call__( self._marker_to_track[mid] = self._next_track_id track_id = self._marker_to_track[mid] else: - track_id = mid + track_id = -1 - det = Detection3DMarker( - bbox=bbox, - track_id=track_id, - class_id=mid, - confidence=1.0, - name=f"marker_{mid}", - ts=obs.ts, - image=image, - center=t_world_marker.translation, - size=marker_size, - transform=t_world_optical, + det = dataclasses.replace(det, track_id=track_id) + yielded_pose = Transform( + translation=det.center, + rotation=det.orientation, frame_id=self.world_frame, - orientation=t_world_marker.rotation, - marker_id=mid, - corners_px=corners_2d, - dictionary=self.aruco_dictionary, + child_frame_id=f"marker_{mid}", + ts=obs.ts, ) yielded_det = det - yielded_pose = t_world_marker if self.smoothing_window > 0: # Buffer raw detections per marker_id over a sliding # window; emit the windowed-mean pose so each successive @@ -248,5 +294,115 @@ def __call__( ) yield obs.derive(data=yielded_det, pose=yielded_pose).tag( - marker_id=mid, track_id=track_id + marker_id=mid, + track_id=track_id, + marker_frame_image=image, + marker_frame_count=marker_count, + marker_frame_index=marker_index, ) + + +class MarkersPerFrame(Transformer[Detection3DMarker | None, Detection3DArray]): + """Collapse marker fan-out back into one Detection3DArray per image frame. + + ``DetectMarkers`` normally emits one observation per decoded marker. For + live LCM semantics, downstream consumers need a single array for each + processed image, including empty arrays for frames where no marker decoded. + ``DetectMarkers(emit_empty_frames=True)`` supplies a ``None`` sentinel for + those empty frames and tags every marker observation with the source image + and frame marker count so this transformer can emit without waiting for a + later timestamp. + """ + + def __init__(self, frame_id: str = "world") -> None: + self.frame_id = frame_id + + def __call__( + self, upstream: Iterator[Observation[Detection3DMarker | None]] + ) -> Iterator[Observation[Detection3DArray]]: + pending: list[Detection3DMarker] = [] + pending_obs: Observation[Detection3DMarker | None] | None = None + pending_ts: float | None = None + + def flush() -> Observation[Detection3DArray] | None: + nonlocal pending, pending_obs, pending_ts + if pending_obs is None: + return None + result = self._to_array_observation(pending_obs, pending) + pending = [] + pending_obs = None + pending_ts = None + return result + + for obs in upstream: + det = obs.data + if det is None: + flushed = flush() + if flushed is not None: + yield flushed + yield self._to_array_observation(obs, []) + continue + + if pending_ts is not None and obs.ts != pending_ts: + flushed = flush() + if flushed is not None: + yield flushed + + if pending_obs is None: + pending_obs = obs + pending_ts = obs.ts + pending.append(det) + + expected_count = obs.tags.get("marker_frame_count") + if ( + isinstance(expected_count, int) + and expected_count > 0 + and len(pending) >= expected_count + ): + flushed = flush() + if flushed is not None: + yield flushed + + flushed = flush() + if flushed is not None: + yield flushed + + def _to_array_observation( + self, + obs: Observation[Detection3DMarker | None], + detections: list[Detection3DMarker], + ) -> Observation[Detection3DArray]: + image = self._source_image(obs, detections) + msg = ImageDetections3D(image, detections).to_ros_detection3d_array(frame_id=self.frame_id) + pose = self._source_pose(obs, detections) + return obs.derive(data=msg, pose=pose).tag(detections_length=len(detections)) + + @staticmethod + def _source_image( + obs: Observation[Detection3DMarker | None], + detections: list[Detection3DMarker], + ) -> Image: + if detections: + return detections[0].image + image = obs.tags.get("marker_frame_image") + if isinstance(image, Image): + return image + raise ValueError("MarkersPerFrame requires marker_frame_image for empty frames") + + @staticmethod + def _source_pose( + obs: Observation[Detection3DMarker | None], + detections: list[Detection3DMarker], + ) -> Any | None: + if detections and detections[0].transform is not None: + transform = detections[0].transform + return ( + transform.translation.x, + transform.translation.y, + transform.translation.z, + transform.rotation.x, + transform.rotation.y, + transform.rotation.z, + transform.rotation.w, + ) + return obs.pose From 3721fda99ceaff583a16e3475bf46ec29b05c531 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 18:09:35 +0800 Subject: [PATCH 58/64] Remove unused loop_closure/utils/autotrim.py --- dimos/mapping/loop_closure/utils/autotrim.py | 217 ------------------- 1 file changed, 217 deletions(-) delete mode 100644 dimos/mapping/loop_closure/utils/autotrim.py diff --git a/dimos/mapping/loop_closure/utils/autotrim.py b/dimos/mapping/loop_closure/utils/autotrim.py deleted file mode 100644 index c93094bc1b..0000000000 --- a/dimos/mapping/loop_closure/utils/autotrim.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Trim the static lead-in from a memory2 sqlite recording. - -Uses one stream's poses to find the first moment the robot displaces from -its starting pose by at least ``--tolerance``, then writes a trimmed copy of -every stream in the source, starting ``--lead-in`` seconds before that -motion. With ``--main-stream`` omitted, scans every stream that has poses -and prompts you to pick. - -Usage: - uv run python -m dimos.mapping.loop_closure.utils.autotrim mid360 \\ - --out mid360_trimmed.db --tolerance 0.20 -""" - -from __future__ import annotations - -from collections.abc import Callable -import json -import math -from pathlib import Path -import sqlite3 -import time -from typing import Any - -import typer - -from dimos.memory2.codecs.base import _resolve_payload_type -from dimos.memory2.store.sqlite import SqliteStore -from dimos.memory2.stream import Stream -from dimos.memory2.type.observation import Observation -from dimos.utils.data import resolve_named_path - - -def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: - """Matches dimos/mapping/utils/cli/globalmap.py:progress — kept inline to avoid pulling rerun.""" - seen = 0 - wall_start: float | None = None - last_wall: float | None = None - first_ts: float | None = None - - def _progress(obs: Observation[Any]) -> None: - nonlocal seen, wall_start, last_wall, first_ts - now = time.monotonic() - if wall_start is None: - wall_start = now - first_ts = obs.ts - assert first_ts is not None - frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 - last_wall = now - seen += 1 - pct = 100 * seen // total if total else 100 - wall = now - wall_start - data = obs.ts - first_ts - speed = data / wall if wall > 0 else 0.0 - end = "\n" if seen >= total else "" - prefix = f"{label} " if label else "" - print( - f"\r{prefix}{pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", - end=end, - flush=True, - ) - - return _progress - - -def _find_motion_edge_ts( - stream: Stream[Any], tolerance: float, *, reverse: bool = False -) -> tuple[float, tuple[float, float, float]]: - """Walks a stream in ts order (or descending if reverse=True) and returns the - first ts whose pose is ≥ tolerance from the first pose seen in that walk. - - Forward: motion *start* — anchor = initial pose, first ts that moves away. - Reverse: motion *stop* — anchor = final pose, last ts that's still away. - """ - walk = stream.order_by("ts", desc=True) if reverse else stream - anchor: tuple[float, float, float] | None = None - for obs in walk: - if obs.pose is None: - continue - x, y, z = obs.pose[0], obs.pose[1], obs.pose[2] - if anchor is None: - anchor = (x, y, z) - continue - if math.dist((x, y, z), anchor) >= tolerance: - return obs.ts, anchor - raise RuntimeError( - f"No pose ever displaced ≥ {tolerance:.3f} m from {'final' if reverse else 'initial'} pose" - + (" (no poses in stream)" if anchor is None else "") - ) - - -def _stream_payload_types(db_path: Path) -> dict[str, type]: - """Read each stream's registered payload type from the _streams table.""" - conn = sqlite3.connect(str(db_path)) - try: - rows = conn.execute("SELECT name, config FROM _streams").fetchall() - finally: - conn.close() - return {name: _resolve_payload_type(json.loads(cfg)["payload_module"]) for name, cfg in rows} - - -def _pick_main_stream_interactive( - src: SqliteStore, payload_types: dict[str, type], tolerance: float -) -> str: - """Scan every stream's poses, print first/last motion times, prompt for a pick.""" - print(f"scanning streams for motion (≥{tolerance:.2f} m)...") - candidates: list[str] = [] - for name, ptype in payload_types.items(): - s = src.stream(name, ptype) - try: - start_ts, _ = _find_motion_edge_ts(s, tolerance) - stop_ts, _ = _find_motion_edge_ts(s, tolerance, reverse=True) - except RuntimeError: - print(f" {name:>12s}: no motion detected") - continue - t0, tN = s.first().ts, s.last().ts - candidates.append(name) - print( - f" {name:>12s}: motion +{start_ts - t0:6.2f} s → +{stop_ts - t0:6.2f} s" - f" (tail idle {tN - stop_ts:5.2f} s)" - ) - - if not candidates: - raise typer.BadParameter( - f"No stream has poses with ≥{tolerance:.2f} m motion. Lower --tolerance." - ) - - print("\npick main stream (number or name):") - for i, name in enumerate(candidates, 1): - print(f" {i}) {name}") - by_idx = {str(i): name for i, name in enumerate(candidates, 1)} - by_name = set(candidates) - while True: - choice = typer.prompt("choice", default="1").strip() - if choice in by_idx: - return by_idx[choice] - if choice in by_name: - return choice - print(f"invalid choice {choice!r}; try a number 1..{len(candidates)} or a stream name") - - -def main( - dataset: str = typer.Argument(..., help="Source .db: bare name (cwd or data/) or path"), - out: Path = typer.Option(..., "--out", help="Output trimmed .db path"), - main_stream: str | None = typer.Option( - None, - "--main-stream", - help="Stream whose poses define motion; omit to pick interactively", - ), - tolerance: float = typer.Option( - 0.20, "--tolerance", help="Distance (m) the main stream must travel before 'motion' starts" - ), - lead_in: float = typer.Option( - 1.0, "--lead-in", help="Seconds to keep before the first-motion timestamp" - ), -) -> None: - src_path = resolve_named_path(dataset, ".db") - if out.exists(): - raise typer.BadParameter(f"Output already exists: {out}") - - print(f"analizing dataset {src_path}") - payload_types = _stream_payload_types(src_path) - if main_stream is not None and main_stream not in payload_types: - raise typer.BadParameter( - f"Main stream {main_stream!r} not in source db. Available: {sorted(payload_types)}" - ) - - src = SqliteStore(path=str(src_path)) - with src: - if main_stream is None: - main_stream = _pick_main_stream_interactive(src, payload_types, tolerance) - - motion_ts, origin = _find_motion_edge_ts( - src.stream(main_stream, payload_types[main_stream]), tolerance - ) - cutoff = motion_ts - lead_in - ox, oy, oz = origin - print(f"\nmain stream {main_stream!r}: origin=({ox:.3f},{oy:.3f},{oz:.3f})") - print(f" motion start (≥{tolerance:.2f} m): ts={motion_ts:.3f}") - print(f" cutoff (lead-in {lead_in:.1f} s): ts={cutoff:.3f}\n") - - dst = SqliteStore(path=str(out)) - with dst: - for name, ptype in payload_types.items(): - src_s = src.stream(name, ptype) - dst_s = dst.stream(name, ptype) - total_src = src_s.count() - filtered = src_s.after(cutoff) - total_kept = filtered.count() - dropped = total_src - total_kept - cb = progress(total_kept, f"{name:>12s}") - for obs in src_s.after(cutoff): - dst_s.append(obs.data, ts=obs.ts, pose=obs.pose, tags=obs.tags or None) - cb(obs) - if total_kept == 0: - print(f"{name:>12s} kept 0/{total_src} (dropped {dropped})") - else: - print(f" ↳ dropped {dropped}/{total_src}") - - print(f"\nwrote {out}") - - -if __name__ == "__main__": - typer.run(main) From 6c31c4e38d279d1bf058245cbf73af355e4e19b8 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 18:10:33 +0800 Subject: [PATCH 59/64] map cli: rename globalmap.py to map.py --- dimos/mapping/loop_closure/autoresearch/program.md | 2 +- dimos/mapping/utils/cli/{globalmap.py => map.py} | 0 dimos/mapping/utils/cli/rename.py | 2 +- dimos/mapping/utils/cli/replay.py | 4 ++-- dimos/mapping/utils/cli/replay_marker.py | 2 +- dimos/robot/cli/dimos.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) rename dimos/mapping/utils/cli/{globalmap.py => map.py} (100%) diff --git a/dimos/mapping/loop_closure/autoresearch/program.md b/dimos/mapping/loop_closure/autoresearch/program.md index e543485d6a..61803a405c 100644 --- a/dimos/mapping/loop_closure/autoresearch/program.md +++ b/dimos/mapping/loop_closure/autoresearch/program.md @@ -79,7 +79,7 @@ investigate *why* the metric moved (or didn't): `uv run python -m dimos.mapping.loop_closure.eval hk_village2 -v`. A param change that, say, *adds* loop closures but worsens spread tells a very different story than one that *removes* them. -- **Use `dimos/mapping/utils/cli/globalmap.py`** as the visualization companion. It +- **Use `dimos/mapping/utils/cli/map.py`** as the visualization companion. It runs the same PGO + marker pipeline as the eval and renders the result in rerun. Use `--no-gui` for headless inspection or run with GUI locally to scrub the timeline and see where markers cluster, where diff --git a/dimos/mapping/utils/cli/globalmap.py b/dimos/mapping/utils/cli/map.py similarity index 100% rename from dimos/mapping/utils/cli/globalmap.py rename to dimos/mapping/utils/cli/map.py diff --git a/dimos/mapping/utils/cli/rename.py b/dimos/mapping/utils/cli/rename.py index c628dd03d5..cbda104798 100644 --- a/dimos/mapping/utils/cli/rename.py +++ b/dimos/mapping/utils/cli/rename.py @@ -46,7 +46,7 @@ def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: - """Matches dimos/mapping/utils/cli/globalmap.py:progress — kept inline to avoid pulling rerun.""" + """Matches dimos/mapping/utils/cli/map.py:progress — kept inline to avoid pulling rerun.""" seen = 0 wall_start: float | None = None last_wall: float | None = None diff --git a/dimos/mapping/utils/cli/replay.py b/dimos/mapping/utils/cli/replay.py index 6aa5e5da3b..79ef56c848 100644 --- a/dimos/mapping/utils/cli/replay.py +++ b/dimos/mapping/utils/cli/replay.py @@ -48,7 +48,7 @@ # Heavy dimos imports (mapping/memory2 → torch, scipy, open3d) are deferred into # main() so that `dimos map --help` stays fast. See test_cli_startup.py and the -# same pattern in dimos/mapping/utils/cli/globalmap.py. +# same pattern in dimos/mapping/utils/cli/map.py. if TYPE_CHECKING: from dimos.memory2.stream import Stream from dimos.memory2.type.observation import Observation @@ -58,7 +58,7 @@ def _progress(total: int, label: str) -> Callable[[Observation[Any]], None]: - """Matches dimos/mapping/utils/cli/globalmap.py:progress.""" + """Matches dimos/mapping/utils/cli/map.py:progress.""" seen = 0 wall_start: float | None = None last_wall: float | None = None diff --git a/dimos/mapping/utils/cli/replay_marker.py b/dimos/mapping/utils/cli/replay_marker.py index ef4d80ca86..c9766dab6d 100644 --- a/dimos/mapping/utils/cli/replay_marker.py +++ b/dimos/mapping/utils/cli/replay_marker.py @@ -38,7 +38,7 @@ # Heavy dimos imports (memory2/perception → torch, scipy, cv2) are deferred into # main() so that `dimos map --help` stays fast. See test_cli_startup.py and the -# same pattern in dimos/mapping/utils/cli/globalmap.py. +# same pattern in dimos/mapping/utils/cli/map.py. if TYPE_CHECKING: from dimos.memory2.stream import Stream diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 126f783266..c866a4546f 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -38,7 +38,7 @@ from dimos.core.daemon import daemonize, install_signal_handlers from dimos.core.global_config import GlobalConfig, global_config from dimos.core.run_registry import get_most_recent, is_pid_alive, stop_entry -from dimos.mapping.utils.cli.globalmap import main as _map_main +from dimos.mapping.utils.cli.map import main as _map_main from dimos.mapping.utils.cli.pose_fill import main as _map_pose_fill_main from dimos.mapping.utils.cli.rename import main as _map_rename_main from dimos.mapping.utils.cli.replay import main as _map_replay_main From 1c45fe08f3ed8e80f108a2b47a4a32c72f7c31a4 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sat, 30 May 2026 18:36:09 +0800 Subject: [PATCH 60/64] test_cli: invoke dimos map verbs in-process (4.5x faster) These end-to-end CLI tests forked python -m dimos.robot.cli.dimos per case, re-paying the heavy per-verb imports (rerun, open3d, voxel) 7 times. Run them in-process via Typer's CliRunner so those imports are paid once: 33.7s -> 7.5s, all 7 still pass. Trade-off: shared interpreter, no per-test process isolation. --- dimos/mapping/utils/cli/test_cli.py | 39 +++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/dimos/mapping/utils/cli/test_cli.py b/dimos/mapping/utils/cli/test_cli.py index 3714a9619f..3c12657c97 100644 --- a/dimos/mapping/utils/cli/test_cli.py +++ b/dimos/mapping/utils/cli/test_cli.py @@ -12,37 +12,56 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""End-to-end tests for the `dimos map` verbs, run as external subprocesses. +"""End-to-end tests for the `dimos map` verbs, run in-process. Each test invokes the real CLI against the `go2_short` recording (60s, auto- pulled via LFS) and asserts on the artifact it produces. A short `--duration` snippet keeps every invocation to a few seconds, and `--no-gui` stops the rrd verbs from spawning a rerun viewer. + +The CLI is invoked in-process via Typer's CliRunner (not a subprocess per case), +so the heavy dimos import is paid once for the whole module instead of per case. """ from __future__ import annotations from pathlib import Path -import subprocess -import sys +import traceback +from types import SimpleNamespace import pytest +from typer.testing import CliRunner + +# These drive the real CLI against an LFS recording (CPU voxel accumulation, +# multi-second runs) — self-hosted runner only. +pytestmark = pytest.mark.self_hosted # A few seconds in, then a couple seconds long — small enough to stay fast, # long enough that the robot moves (so dedup/PGO/markers have something to do). SEEK = 4.0 DURATION = 3.0 + # go2_short has 461 lidar frames over ~60s; a 3s snippet must be far fewer. FULL_LIDAR_COUNT = 461 +_runner = CliRunner() -def _run(*args: str, timeout: float = 300.0) -> subprocess.CompletedProcess[str]: - return subprocess.run( - [sys.executable, "-m", "dimos.robot.cli.dimos", "map", *args], - capture_output=True, - text=True, - timeout=timeout, - ) + +def _run(*args: str, timeout: float = 300.0) -> SimpleNamespace: + """Invoke `dimos map ` in-process and capture its result. + + Uses Typer's CliRunner so the dimos import cost is paid once (module import) + rather than per case. `timeout` is kept for call-site compatibility but is a + no-op here — pytest-timeout covers hangs. Returns a result with the + .returncode/.stdout/.stderr fields the test bodies use. + """ + from dimos.robot.cli.dimos import main as cli_app + + res = _runner.invoke(cli_app, ["map", *args]) + err = res.output + if res.exception is not None and not isinstance(res.exception, SystemExit): + err += "\n" + "".join(traceback.format_exception(res.exception)) + return SimpleNamespace(returncode=res.exit_code, stdout=res.output, stderr=err) def _stream_counts(db_path: Path) -> dict[str, int]: From 501f3e32f06d1b219a311aea3aad56b88dff16fa Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 1 Jun 2026 13:00:01 +0800 Subject: [PATCH 61/64] cleanup --- .../autoresearch/pgo_autoresearched.py | 18 +++---- dimos/mapping/utils/cli/map.py | 8 ++-- dimos/mapping/utils/cli/pose_fill.py | 6 +-- dimos/mapping/utils/cli/rename.py | 4 +- dimos/mapping/utils/cli/replay.py | 12 ++--- dimos/mapping/utils/cli/replay_marker.py | 12 ++--- dimos/memory2/module.py | 21 ++++++-- dimos/memory2/stream.py | 29 ++++++----- dimos/memory2/test_stream.py | 48 +++++++++++++++++++ 9 files changed, 114 insertions(+), 44 deletions(-) diff --git a/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py b/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py index ecb4735044..07215156f0 100644 --- a/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py +++ b/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py @@ -180,17 +180,13 @@ def pgo_keyframes( for obs in stream: if on_frame is not None: on_frame(obs) - if obs.pose is None: + pose = obs.pose_tuple + if pose is None: continue - # Skip placeholder poses (origin position OR zero quaternion). - if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + # Skip placeholder poses (origin position OR zero/identity quaternion). + if pose[0] == 0 and pose[1] == 0 and pose[2] == 0: continue - if ( - obs.pose[3] == 0 - and obs.pose[4] == 0 - and obs.pose[5] == 0 - and (obs.pose[6] == 0 or obs.pose[6] == 1) - ): + if pose[3] == 0 and pose[4] == 0 and pose[5] == 0 and (pose[6] == 0 or pose[6] == 1): continue local_pose = _obs_to_pose3(obs) pgo.process(local_pose, obs.ts, obs.data) @@ -310,9 +306,9 @@ def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" import gtsam # type: ignore[import-not-found,import-untyped] - if obs.pose is None: + if obs.pose_tuple is None: raise LookupError("No pose set on this observation") - x, y, z, qx, qy, qz, qw = obs.pose + x, y, z, qx, qy, qz, qw = obs.pose_tuple return gtsam.Pose3( gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), gtsam.Point3(float(x), float(y), float(z)), diff --git a/dimos/mapping/utils/cli/map.py b/dimos/mapping/utils/cli/map.py index f53f69923a..7fa2d39cc7 100644 --- a/dimos/mapping/utils/cli/map.py +++ b/dimos/mapping/utils/cli/map.py @@ -381,7 +381,7 @@ def main( pgo = True store = SqliteStore(path=db_path) - lidar = store.stream(lidar_stream, PointCloud2).clip(seek, duration) + lidar = store.stream(lidar_stream, PointCloud2).from_time(seek or None).to_time(duration) print(lidar.summary()) @@ -476,12 +476,14 @@ def main( # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). With # --image-pose, swap that stored pose for a different source (e.g. # fastlio_odometry), composing the base→optical mount onto it first. - color_image = store.stream("color_image", Image).clip(seek, duration) + color_image = store.stream("color_image", Image).from_time(seek or None).to_time(duration) n_images = color_image.count() if image_pose is not None: from dimos.mapping.utils.cli.pose_fill import pose_fill - src_pose: Stream[Any] = store.stream(image_pose).clip(seek, duration) + src_pose: Stream[Any] = ( + store.stream(image_pose).from_time(seek or None).to_time(duration) + ) print(f"re-posing color_image from {image_pose!r} + camera optical mount") color_image = pose_fill(color_image, src_pose, tolerance=0.1, mount=BASE_TO_OPTICAL) cam_info = CameraInfo.from_yaml(str(camera_info)) if camera_info else _camera_info_static() diff --git a/dimos/mapping/utils/cli/pose_fill.py b/dimos/mapping/utils/cli/pose_fill.py index c42072cbe1..012a1fba3d 100644 --- a/dimos/mapping/utils/cli/pose_fill.py +++ b/dimos/mapping/utils/cli/pose_fill.py @@ -129,12 +129,12 @@ def pose_fill_db( # the source blob still resolves); plain metadata for everything else. if name == target: rows: Iterable[Observation[Any]] = pose_fill( - src.stream(name).clip(seek, duration), - src.stream(pose_source).clip(seek, duration), + src.stream(name).from_time(seek or None).to_time(duration), + src.stream(pose_source).from_time(seek or None).to_time(duration), tolerance=tolerance, ) else: - rows = src.stream(name).clip(seek, duration).order_by("ts") + rows = src.stream(name).from_time(seek or None).to_time(duration).order_by("ts") scalar = src_b.data_type in (int, float) n = 0 diff --git a/dimos/mapping/utils/cli/rename.py b/dimos/mapping/utils/cli/rename.py index cbda104798..95c3f671b1 100644 --- a/dimos/mapping/utils/cli/rename.py +++ b/dimos/mapping/utils/cli/rename.py @@ -164,7 +164,9 @@ def main( with dst: for src_name, dst_name in kept.items(): ptype = payload_types[src_name] - src_s: Stream[Any] = src.stream(src_name, ptype).clip(seek, duration) + src_s: Stream[Any] = ( + src.stream(src_name, ptype).from_time(seek or None).to_time(duration) + ) dst_s: Stream[Any] = dst.stream(dst_name, ptype) total = src_s.count() cb = progress(total, f"{dst_name:>16s}") diff --git a/dimos/mapping/utils/cli/replay.py b/dimos/mapping/utils/cli/replay.py index 79ef56c848..4228f6cc3c 100644 --- a/dimos/mapping/utils/cli/replay.py +++ b/dimos/mapping/utils/cli/replay.py @@ -250,19 +250,19 @@ def main( print(store.summary()) def clipped(name: str, ptype: type[Any]) -> Stream[Any]: - return store.stream(name, ptype).clip(seek, duration) + return store.stream(name, ptype).from_time(seek or None).to_time(duration) lidar = clipped("lidar", PointCloud2) color_image = clipped("color_image", Image) has_livox = "fastlio_lidar" in store.streams livox = clipped("fastlio_lidar", PointCloud2) if has_livox else None - # ---- per-frame raw clouds ---- + # Per-frame raw clouds. _log_clouds(" lidar", lidar, "world/lidar", voxel, point_mode) if livox is not None: _log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, point_mode) - # ---- accumulated voxel maps over the selected PointCloud2 streams ---- + # Accumulated voxel maps over the selected PointCloud2 streams. # --map logs a growing map per stream; --map-final logs one static map # per stream. --map-carve-columns clears the Z column under each surface # voxel (good for forward-facing lidar like the Go2 L1); off by default. @@ -300,7 +300,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: static=True, ) - # ---- fastlio pose axis + path from fastlio_odometry stream ---- + # fastlio pose axis + path from fastlio_odometry stream. if "fastlio_odometry" in store.streams: odometry = clipped("fastlio_odometry", Odometry) cb = _progress(odometry.count(), "fastlio_odometry") @@ -324,7 +324,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: color=(255, 165, 0), # orange ) - # ---- Go2 native odom pose axis + path ---- + # Go2 native odom pose axis + path. if "odom" in store.streams: odom = clipped("odom", PoseStamped) cb = _progress(odom.count(), " odom") @@ -348,7 +348,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]: color=(0, 200, 100), # green ) - # ---- pass 2: camera pose + image per color_image ---- + # Pass 2: camera pose + image per color_image. cam_pipeline = ( color_image.transform(throttle(1.0 / camera_hz)) if camera_hz > 0 else color_image ) diff --git a/dimos/mapping/utils/cli/replay_marker.py b/dimos/mapping/utils/cli/replay_marker.py index c9766dab6d..881ef4cf9b 100644 --- a/dimos/mapping/utils/cli/replay_marker.py +++ b/dimos/mapping/utils/cli/replay_marker.py @@ -95,10 +95,10 @@ def main( store = SqliteStore(path=str(db_path)) with store: - color_image = store.stream("color_image", Image).clip(seek, duration) - lidar = store.stream("lidar", PointCloud2).clip(seek, duration) + color_image = store.stream("color_image", Image).from_time(seek or None).to_time(duration) + lidar = store.stream("lidar", PointCloud2).from_time(seek or None).to_time(duration) - # ---- pass 1: robot base pose over time (from lidar.pose) ---- + # Pass 1: robot base pose over time (from lidar.pose). for lidar_obs in lidar: if lidar_obs.pose_tuple is None: continue @@ -111,7 +111,7 @@ def main( ), ) - # ---- pass 2: camera pose + image per color_image frame ---- + # Pass 2: camera pose + image per color_image frame. n_img = color_image.count() for i, img_obs in enumerate(color_image): rr.set_time(TIMELINE, timestamp=img_obs.ts) @@ -133,7 +133,7 @@ def main( if (i + 1) % 50 == 0 or i + 1 == n_img: print(f"images: {i + 1}/{n_img}") - # ---- pass 3: marker detections (filtered same way as `dimos map`) ---- + # Pass 3: marker detections (filtered same way as `dimos map`). xf = DetectMarkers(camera_info=cam_info, marker_length_m=marker_size) pipeline: Stream[Any] = color_image.transform( QualityWindow(lambda img: img.sharpness, window=quality_window) @@ -191,7 +191,7 @@ def main( ) print(f"detections: {n_det}") - # ---- pass 4: averaged tracks (smoothing_window > 0 → per-track ids) ---- + # Pass 4: averaged tracks (smoothing_window > 0 → per-track ids). # Re-runs the same filtered pipeline through a smoothing detector; # each track yields one entity that updates as the windowed average # refines. Color stable per track_id for visual identity. diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py index 929d241882..9a3d90e164 100644 --- a/dimos/memory2/module.py +++ b/dimos/memory2/module.py @@ -14,13 +14,14 @@ from __future__ import annotations +import enum import inspect import os from pathlib import Path import time from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast -from pydantic import field_validator +from pydantic import Field, field_validator from reactivex.disposable import Disposable from dimos.agents.annotation import skill @@ -37,6 +38,7 @@ from dimos.models.embedding.clip import CLIPModel from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped from dimos.msgs.sensor_msgs.Image import Image +from dimos.utils.data import backup_file from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: @@ -240,8 +242,15 @@ def _similarity(obs: Observation[Any]) -> float: return best.pose_stamped +class OnExisting(str, enum.Enum): + OVERWRITE = "overwrite" + ERROR = "error" + BACKUP = "backup" + + class RecorderConfig(MemoryModuleConfig): - overwrite: bool = True + on_existing: OnExisting = OnExisting.BACKUP + backup_keep_last: int = Field(default=10, ge=0) default_frame_id: str = "base_link" tf_tolerance: float = 0.5 db_path: str | Path = "recording.db" @@ -277,9 +286,15 @@ def start(self) -> None: # this module in a deployed blueprint. db_path = Path(self.config.db_path) if db_path.exists(): - if self.config.overwrite: + if self.config.on_existing is OnExisting.OVERWRITE: db_path.unlink() logger.info("Deleted existing recording %s", db_path) + elif self.config.on_existing is OnExisting.BACKUP: + backup = backup_file(db_path, keep_last=self.config.backup_keep_last) + if backup is None: + logger.info("Removed existing recording %s (backup_keep_last=0)", db_path) + else: + logger.info("Backed up existing recording %s -> %s", db_path, backup) else: raise FileExistsError(f"Recording already exists: {db_path}") diff --git a/dimos/memory2/stream.py b/dimos/memory2/stream.py index cff2e689c4..045ac0c386 100644 --- a/dimos/memory2/stream.py +++ b/dimos/memory2/stream.py @@ -232,17 +232,6 @@ def at_relative(self, t: float, tolerance: float = 1.0) -> Stream[T, O]: t0 = self.first().ts return self.at(t0 + t, tolerance=tolerance) - def clip(self, seek: float = 0.0, duration: float | None = None) -> Stream[T, O]: - """Window to a snippet ``seek`` seconds in, ``duration`` seconds long. - - Offsets are relative to the first observation; ``duration=None`` runs to - the end. A no-op when ``seek<=0`` and ``duration is None``. - """ - if seek <= 0 and duration is None: - return self - start = self.first().ts + seek - return self.after(start) if duration is None else self.time_range(start, start + duration) - def near(self, pose: Any, radius: float) -> Stream[T, O]: # Accept Pose/PoseStamped (any object with `.position`), Vector3, # numpy arrays, or (x, y, z) tuples — Vector3() handles the rest. @@ -262,6 +251,24 @@ def limit(self, k: int) -> Stream[T, O]: def offset(self, n: int) -> Stream[T, O]: return self._replace_query(offset_val=n) + # Time windowing — None means unbounded on that side. ``*_time`` is relative + # to the stream's first observation; ``*_timestamp`` is absolute epoch seconds. + def from_time(self, seconds: float | None) -> Stream[T, O]: + """Keep observations from ``seconds`` after the first (relative).""" + return self if seconds is None else self.after(self.first().ts + seconds) + + def to_time(self, seconds: float | None) -> Stream[T, O]: + """Keep ``seconds`` of observations from the current start (relative duration).""" + return self if seconds is None else self.before(self.first().ts + seconds) + + def from_timestamp(self, ts: float | None) -> Stream[T, O]: + """Keep observations after absolute epoch ``ts``.""" + return self if ts is None else self.after(ts) + + def to_timestamp(self, ts: float | None) -> Stream[T, O]: + """Keep observations up to absolute epoch ``ts``.""" + return self if ts is None else self.before(ts) + def search(self, query: Embedding, k: int | None = None) -> Stream[T, EmbeddedObservation[T]]: """Rank observations by cosine similarity to *query*. diff --git a/dimos/memory2/test_stream.py b/dimos/memory2/test_stream.py index 9e9d989e4f..63c21b1b86 100644 --- a/dimos/memory2/test_stream.py +++ b/dimos/memory2/test_stream.py @@ -960,3 +960,51 @@ def test_dense_secondary_pairs_correctly(self, memory_session): assert len(pairs) == 5_000 # Each primary's nearest secondary is the coincident one (Δts ≈ 0). assert max(abs(p.data[0].ts - p.data[1].ts) for p in pairs) < 1e-9 + + +class TestTimeWindowing: + """``*_time`` is relative to the first observation, ``*_timestamp`` is absolute. + + A trailing ``to_time`` measures a duration from the current start, so chaining + reads as "skip, then take the following N seconds". + """ + + def test_from_time_skips_relative_seconds(self, make_stream): + stream = make_stream(10) # ts 0..9 + assert [o.data for o in stream.from_time(3)] == [40, 50, 60, 70, 80, 90] + + def test_to_time_keeps_leading_seconds(self, make_stream): + stream = make_stream(10) + assert [o.data for o in stream.to_time(3)] == [0, 10, 20] + + def test_chained_skip_then_following_duration(self, make_stream): + stream = make_stream(10) # ts 0..9 + # skip first 2s, then the following 3s -> ts 3, 4, 5 + assert [o.data for o in stream.from_time(2).to_time(3)] == [30, 40, 50] + + def test_from_timestamp_is_absolute(self, make_stream): + stream = make_stream(10, start_ts=1000.0) # ts 1000..1009 + assert [o.data for o in stream.from_timestamp(1003.0)] == [40, 50, 60, 70, 80, 90] + + def test_to_timestamp_is_absolute(self, make_stream): + stream = make_stream(10, start_ts=1000.0) + assert [o.data for o in stream.to_timestamp(1003.0)] == [0, 10, 20] + + def test_absolute_range(self, make_stream): + stream = make_stream(10, start_ts=1000.0) + # all between 1002 and 1006 -> ts 1003, 1004, 1005 + windowed = stream.from_timestamp(1002.0).to_timestamp(1006.0) + assert [o.data for o in windowed] == [30, 40, 50] + + def test_absolute_start_relative_duration(self, make_stream): + stream = make_stream(10, start_ts=1000.0) + # seek to 1003, then the following 3s -> ts 1004, 1005, 1006 + assert [o.data for o in stream.from_timestamp(1003.0).to_time(3)] == [40, 50, 60] + + def test_none_bounds_are_noops(self, make_stream): + stream = make_stream(5) + full = [0, 10, 20, 30, 40] + assert [o.data for o in stream.from_time(None)] == full + assert [o.data for o in stream.to_time(None)] == full + assert [o.data for o in stream.from_timestamp(None)] == full + assert [o.data for o in stream.to_timestamp(None)] == full From ecdcee65fb457c1285f2600be38c3d321df47dac Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 1 Jun 2026 14:09:21 +0800 Subject: [PATCH 62/64] Address PR review: mypy fix, pose_fill ordering, tests, drop autoresearch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - pgo_keyframes: widen stream param to Iterable[Observation[PointCloud2]] (clears the mypy gate; it never uses Stream methods) - pose_fill_db: order_by("ts") on both align inputs — align requires ts-ascending iteration, but sqlite defaults to id ASC (insertion order), which would silently mis-pair out-of-order recordings - add CI-running unit tests for Observation.with_pose (lazy payload kept) and pose_fill(mount=...) composition - remove the unreferenced autoresearch/ research drop (near-duplicate of pgo_auto.py + scaffolding); pgo_auto.py kept --- .../loop_closure/autoresearch/conclusions.md | 262 ------ .../autoresearch/pgo_autoresearched.py | 798 ------------------ .../loop_closure/autoresearch/program.md | 221 ----- dimos/mapping/loop_closure/pgo_auto.py | 4 +- dimos/mapping/utils/cli/pose_fill.py | 4 +- dimos/mapping/utils/cli/test_pose_fill.py | 61 ++ dimos/memory2/test_stream.py | 36 + 7 files changed, 101 insertions(+), 1285 deletions(-) delete mode 100644 dimos/mapping/loop_closure/autoresearch/conclusions.md delete mode 100644 dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py delete mode 100644 dimos/mapping/loop_closure/autoresearch/program.md create mode 100644 dimos/mapping/utils/cli/test_pose_fill.py diff --git a/dimos/mapping/loop_closure/autoresearch/conclusions.md b/dimos/mapping/loop_closure/autoresearch/conclusions.md deleted file mode 100644 index 8504ccd731..0000000000 --- a/dimos/mapping/loop_closure/autoresearch/conclusions.md +++ /dev/null @@ -1,262 +0,0 @@ -# loop-closure autoresearch — conclusions - -## TL;DR - -Baseline `TOTAL_SPREAD = 48.655 m`. Tuned `pgo.py` to **22.289 m**, a -**-54.2%** reduction over 37 commits. - -| dataset | baseline | final | change | -|-------------|---------:|------:|---------:| -| hk_village1 | 5.83 | 6.20 | +6% | -| hk_village2 | 19.53 | 6.49 | **-67%** | -| hk_village3 | 5.00 | 3.78 | -24% | -| hk_village4 | 13.14 | 2.39 | **-82%** | -| hk_village5 | 2.80 | 2.14 | -24% | -| hk_village6 | 1.74 | 1.59 | -9% | - -hk_village2 and hk_village4 were dominating the baseline metric; both -contain a clear "robot was disturbed" event mid-trajectory that the -unmodified loop-closure code couldn't bridge. - -## The problem, after looking at it - -Two failure modes drove the baseline number, both visible from the -per-dataset breakdown but only diagnosable by looking at the actual -data: - -### 1. PGO can't relocalize through a meter-scale disturbance - -`hk_village2` had drift growing from 0 to 4.7 m across one trajectory. -Loop closure search ran against optimized-frame positions with a 2 m -radius — once optimized drift exceeded 2 m, every nearby revisit fell -outside the search window. The robot kept revisiting the same physical -area, but the optimizer never saw a match. Marker spread for the only -multi-tracked marker (id 10) was 19 m, all because detections from after -the disturbance landed wherever the unconstrained chain put them. - -Diagnostic script (`/tmp/hk2_marker10.py`) dumped per-detection raw + -corrected positions plus the nearest keyframe's drift. The drift -trace (`/tmp/drift_trace.py`) printed kf-by-kf when drift crossed -threshold. These were the two scripts that made every subsequent -tuning decision obvious. - -### 2. Chain factors were overconfident - -Inter-keyframe BetweenFactors had translation variance `1e-4` (sigma -~1cm). Accumulated over 262 keyframes that implies a total uncertainty -of ~16 cm — but actual drift in hk_village2 hit 4 m. PGO under-corrected -even when good loop closures fired, because the chain factors fought -back. Forensic comparison of marker raw positions vs. PGO-corrected -positions showed the correction matched the *measured* drift but -under-corrected the *true* drift by ~36%. - -## What worked (in order they were found) - -Each line in the table is roughly a real win — bigger pieces of the -total improvement at the top. - -| change | spread | -|----------------------------------------------------------------|--------------:| -| baseline | 48.66 | -| tighten loop rot_var 0.05 → 0.01 then → 0.003 | 47.75 → 37.98 | -| `loop_submap_half_range` 10 → 40 (more ICP context) | 39.92 | -| tighten ICP `max_correspondence_dist` 1.0 → 0.5 | 38.82 | -| drift-gated local-frame fallback (radius 15 m, drift > 0.5 m) | 34.06 | -| multi-loop accept inside fallback (K=7 candidates) | 31.40 | -| `min_loop_detect_duration` 5s → 3s | 33.66 → 31.26 | -| **drift-aware chain variance** (8e-4 if prev kf drift > 1.3 m) | 27.39 | -| nearest-neighbor interp (vs linear blend) | 27.33 | -| ISAM2 Dogleg optimizer | 27.33 | -| trans_var floor 0.01 → 0.005 | 30.24 | -| **second-pass loop rescan** over converged optimized poses | 24.62 | -| rescan opt-only (no fallback) + `submap_half_range=38` | 22.60 | -| rescan `time_thresh_override=13s` | **22.29** | - -The two structural changes (chain-variance-by-drift and second-pass -rescan) account for most of the improvement. The rest is parameter -tuning at their joint sweet spot, which is narrow — moving any one knob -±20% usually regressed by 1 m+. - -## What didn't work (a non-exhaustive list) - -So future tuners don't repeat these: - -- **Loosening trajectory variance globally** (1e-4 → 1e-3, 5e-4, etc.) — - helps hk2 dramatically (sometimes drops it to 8 m) but blows up - hk1/hk5/hk6 because chain becomes effectively unconstrained. -- **RANSAC + FPFH global registration** for fallback init — admits - wrong alignments (geometrically similar but different actual places) - that the optimizer can't reject. -- **Coarse-to-fine ICP** for fallback — same problem; coarse pass found - wrong-but-locally-tight alignments. -- **ICP init from local-frame relative pose** — marginal at best, - occasionally worse. -- **Huber robust kernel on loop factors** (both global and - fallback-only) — down-weights good loops along with bad ones. -- **Forcing multi-loop accept everywhere** (not just fallback) — adds - redundant constraints in easy datasets, increases noise. -- **Two-pass rescan** (run rescan twice) — over-constrains. -- **Tighter chain factors when many loops exist** — they keep being the - bottleneck even with abundant loop evidence. -- **Variations on the drift threshold for chain loosening** (1.0, 1.2, - 1.4, 1.5, 1.7, 1.8, 2.0, 3.0, multi-tier) — 1.3 m is sharply optimal. -- **Variations on rescan submap_half_range** (20, 30, 33, 35, 36, 37, - 39, 40, 50, 80) — 38 is sharply optimal; first-pass needs 40, not 38. -- **Rescan with wider opt search radius, force-fallback, smaller source - submap, force-multi-loop, tighter ICP correspondence, lower - candidate count, tighter early-exit** — all worse. -- **Windowed-mean / Catmull-Rom interpolation in the corrections** — - nearest-neighbor wins; the noise isn't in interp blending. -- **Final batch LM re-optimization after ISAM2** — ISAM2 was already - converged; LM finds the same answer. - -## How the process went - -The first half of the experiment was parameter twiddling — small wins -adding up to ~10 m of improvement, then a hard plateau around 27–28 m -where every knob was at its joint optimum and ±20% perturbations -regressed. - -The breakthroughs all came from **investigation runs that didn't change -`pgo.py`**: - -1. Per-dataset breakdown of spread (already in the eval table). Showed - hk_village2 + hk_village4 were 68% of the baseline metric. -2. `_diag.py` / `hk2_marker10.py` — per-marker pose dump. Revealed - marker 10 in hk_village2 had detections 5 m apart in the corrected - frame, all in the disturbance area. Concretely showed the - relocalization-after-disturbance failure mode. -3. `drift_trace.py` — kf-by-kf drift trace. Showed when each dataset - crossed which drift threshold and how long it stayed there; - directly motivated the "drift-aware chain variance" change. -4. Comparing raw vs. corrected marker positions plus the correction - translation. Showed the correction was matching the *measured* - drift but falling 36% short of the true drift — pointed straight - at the chain-factor overconfidence problem. - -Each of these scripts ran in 30–60s, didn't count toward the -experiment log, and led to a 1–3 m improvement that knob tuning hadn't -been finding. The second-pass rescan idea came from "we converged once, -the optimized poses are now actually trustworthy, what would loop -detection do if we re-ran it?" — and that single structural change -dropped the metric by ~3 m. - -The final ~2 m came from narrow-window tuning of the rescan -parameters: `submap_half_range=38` (not 37 or 39) and -`time_thresh_override=13s` (not 12 or 14). At this point most -perturbations regress by 0.1–2 m, suggesting we're at a real local -optimum and further gains likely need a different structural change -(e.g., refining the marker_transformer integration, which is out of -scope here). - -## What's still on the table - -- **Marker_transformer rotation accuracy.** Forensic of hk_village2 - marker 10 shows det 3 (the worst remaining) has 1.4 m of x-error - that doesn't move with chain or loop tuning. The robot saw the - marker from different angles at det 1 vs det 3, and small yaw drift - in the optimized pose projects to large marker-position error - because the marker is ~1 m from the camera. PGO can't fix the - camera-to-marker geometry; this is approaching the noise floor. -- **Per-recording adaptive parameters.** The current params are a - global optimum across all 6 recordings; recordings 1 and 2/4 have - pretty different dynamics. A per-recording tuner would probably - squeeze another 1–2 m total but isn't well-aligned with deployment. - -## Code shape - -The final `pgo.py` is bigger than the original (the `_search_for_loops` -function in particular took on several optional kwargs to support the -rescan code path). Worth a cleanup pass before merging — the original -inline code path stayed for the first pass; only the rescan path needs -the extra knobs. A targeted refactor could pull rescan into its own -method and drop most of the kwargs. - -## What data would have helped next time - -Some signals would have shortened the loop substantially: - -- **Ground-truth marker positions.** A `markers.csv` with the true - world position of each `marker_id` per recording. Right now the eval - scores "consistency across detections of the same marker", which - only tells you when corrections diverge — not whether they converge - to the *right* place. Half my forensic time was spent inferring true - positions from the most consistent cluster. With ground truth you - could also distinguish "PGO under-correcting" from "marker detection - is wrong" without having to reason about it. -- **Ground-truth trajectories.** Even just one or two recordings with - pose ground truth (e.g., logged from a more accurate localizer) - would let you compute keyframe-pose error directly, instead of using - marker-spread as a proxy. The proxy is informative but indirect: - there were several experiments where loop count and quality both - improved but spread stayed flat because the underlying poses moved - along the marker viewing rays. -- **Annotation of disturbance events.** Marking the timestamps in each - recording where the robot got pushed / lifted / kicked would have - saved a lot of inference. I built `drift_trace.py` to detect these - from PGO output, but the timestamps would have been cleaner ground - truth and would let an automated tuner segment "easy" from - "disturbed" trajectory regions. -- **Loop closure log structured beyond `[inf]` log lines.** Per-loop - records of (source kf, target kf, score, was-fallback, accepted-vs- - rejected-by-ICP) emitted as JSONL or a sqlite table would have made - many forensics one-liner queries instead of grep + Python parsing. -- **The eval's per-marker breakdown printed by default**, not derived - from a custom diag script. The dataset row tells you which recording - is hurting; one more level of detail (which marker, how many - detections, max pair distance) would have pointed at hk_village2 - marker 10 in the first 5 minutes instead of after several - knob-tuning rounds. - -## What approach would help next time - -Roughly in order of expected payoff: - -1. **Spend the first iteration on tooling, not tuning.** I tuned for - ~20 commits before writing the first diagnostic script and got the - biggest single wins (drift-aware chain, second-pass rescan) only - *after* the forensics existed. The program.md update mid-experiment - ("Investigation is part of the loop") was the right correction. The - diagnostics paid back their cost in the first iteration they were - used. -2. **Run dimos map with rerun for the worst recording before - touching anything.** Even a 5-minute look at the actual trajectory, - marker positions, and loop closure edges would have made the - disturbance failure mode obvious and saved several speculative - tuning rounds. I never actually opened rerun this session and the - most important visualization was reconstructed badly from log - parsing. -3. **State a hypothesis before each change, log the prediction, then - check.** I did this informally in the responses to the user but - the experiment log records *what changed and the resulting - spread*, not *what was predicted*. When a change regresses, the - prediction-vs-reality gap is the actual learning; without it you - only learn "that one didn't work." -4. **Search the loose-chain / drift-threshold parameter space jointly - with a grid, not greedy.** The threshold and the loose variance - value interact (1.5/5e-4 worked, 1.3/8e-4 worked better; some - combinations weren't tested because the greedy walk didn't go - there). A 3×3 or 5×5 grid would have taken 25 evals (≈40 min) and - probably found the joint optimum directly. I spent more time than - that on greedy walks that revisited the same region. -5. **Try the structural change ("rescan with converged poses") much - earlier.** It was obvious in retrospect — if loop search depended - on the optimized poses being accurate, and those poses are most - accurate after PGO converges, then re-searching after convergence - *had to* find more loops. I sat on this for many iterations because - it felt like a bigger change. The cost was ~30 minutes of - refactoring `_search_for_loops` to take a `cur_idx`, and the - payoff was the largest single improvement. -6. **Keep a "did not work" register that's easier to grep than the - results.tsv `discard` rows.** Several ideas resurfaced under - different names (e.g., "loosen rot_var in fallback" got tried - three times across iterations). Cross-referencing against past - attempts before each new experiment is cheap; not doing it is - compounding waste. -7. **Resist twiddling the same knob multiple times.** Once a - parameter is at a local optimum, ±1 unit perturbations are usually - in the noise; the energy is better spent on a structural change - somewhere else. I did several rounds of "try 1.3 / try 1.4 / try - 1.5" once a knob hit its sweet spot, and the wins from those - sub-percent moves were almost always reverted by the next - experiment downstream. diff --git a/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py b/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py deleted file mode 100644 index 07215156f0..0000000000 --- a/dimos/mapping/loop_closure/autoresearch/pgo_autoresearched.py +++ /dev/null @@ -1,798 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""PGO drift corrections as composable Stream stages. - -A lidar/odom stream comes in with poses that drift over time — the robot's -estimate of where it is in the world slowly diverges from ground truth as -small per-frame errors accumulate. PGO ("pose graph optimization") detects -revisited places via loop closure and pulls the trajectory back into -self-consistency. The result is a stream of *corrections*: a Transform per -keyframe that maps every drifted ("raw") pose to its corrected one. - -Pipeline: - - lidar: Stream[PointCloud2] # pose-stamped scans - -> pgo_keyframes(...) -> Stream[Keyframe] # one per kf - -> keyframes_to_corrections(...) -> Stream[Transform] # world_corrected <- world_raw - -> apply_corrections(any_stream, corrections) -> Stream[T] # obs.pose shuffled - -`pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting -each kept keyframe with both its raw odom pose (`local`) and its optimized -pose (`optimized`) as Transforms. Pass `loop_closures_out=[]` to also -collect each accepted ICP loop edge as a `LoopClosure` (source+target -optimized poses + ICP fitness) — useful for visualizing the pose graph. - -`keyframes_to_corrections` is pure arithmetic: per keyframe, the drift -correction is `optimized @ local^-1` (Transform composition). The resulting -stream has one entry per keyframe — sparse in time. - -`make_interpolator` materializes the sparse correction stream into a -ts -> Transform function: SLERP for rotation, linear lerp for translation, -clipping out-of-range queries to the endpoints. - -`apply_corrections` shuffles `obs.pose` on any stream using the interpolated -correction at each obs.ts. It's read-only on `obs.data`, so the same lidar -stream can be re-applied — or a different recording from the same physical -space, if the corrections generalize. - -`gtsam` is imported lazily inside hot helpers so importing this module -stays cheap and gtsam-free for consumers that only need `Keyframe` / -`apply_corrections`. -""" - -from __future__ import annotations - -from collections.abc import Callable, Iterator -from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack - -import numpy as np -import open3d as o3d # type: ignore[import-untyped] -import open3d.core as o3c # type: ignore[import-untyped] -from scipy.spatial.transform import Rotation - -from dimos.memory2.store.memory import MemoryStore -from dimos.memory2.stream import Stream -from dimos.memory2.type.observation import Observation -from dimos.msgs.geometry_msgs.Quaternion import Quaternion -from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -from dimos.protocol.service.spec import BaseConfig -from dimos.utils.logging_config import setup_logger - -if TYPE_CHECKING: - import gtsam # type: ignore[import-not-found,import-untyped] - -T = TypeVar("T") - -FRAME_WORLD_CORRECTED = "world_corrected" -FRAME_WORLD_RAW = "world_raw" -FRAME_BODY = "body" - -logger = setup_logger() - - -class PGOConfig(BaseConfig): - # Keyframe detection - key_pose_delta_trans: float = 0.5 - key_pose_delta_deg: float = 10.0 - - # Loop closure - loop_search_radius: float = 2.0 - loop_search_radius_local: float = 15.0 - loop_fallback_drift_thresh: float = 0.5 # min drift to trigger local fallback - loop_time_thresh: float = 20.0 - loop_time_thresh_local: float = 40.0 # stricter time gap for local fallback - loop_score_thresh: float = 0.3 - loop_score_thresh_tight: float = 0.05 # early-exit threshold: ICP rmse ~22cm - loop_submap_half_range: int = 40 - loop_candidates_per_iter: int = 7 - min_icp_inliers: int = 10 - min_keyframes_for_loop_search: int = 10 - loop_closure_extra_iterations: int = 2 - submap_resolution: float = 0.2 - min_loop_detect_duration: float = 3.0 - - # ICP - max_icp_iterations: int = 50 - max_icp_correspondence_dist: float = 0.5 - - -class PGOKwargs(TypedDict, total=False): - """Typed kwargs for `pgo_keyframes`; mirrors `PGOConfig` fields.""" - - key_pose_delta_trans: float - key_pose_delta_deg: float - loop_search_radius: float - loop_search_radius_local: float - loop_fallback_drift_thresh: float - loop_time_thresh: float - loop_time_thresh_local: float - loop_score_thresh: float - loop_score_thresh_tight: float - loop_submap_half_range: int - loop_candidates_per_iter: int - min_icp_inliers: int - min_keyframes_for_loop_search: int - loop_closure_extra_iterations: int - submap_resolution: float - min_loop_detect_duration: float - max_icp_iterations: int - max_icp_correspondence_dist: float - - -@dataclass(frozen=True) -class Keyframe: - """Keyframe emitted by `pgo_keyframes`. - - `local` is the body pose in the odom (`world_raw`) frame, `optimized` is - the body pose in the drift-corrected (`world_corrected`) frame. - """ - - ts: float - local: Transform - optimized: Transform - - -@dataclass(frozen=True) -class LoopClosure: - """An accepted ICP loop-closure edge in the pose graph. - - `source` and `target` are the final optimized body poses (in - `world_corrected`) of the two keyframes the loop connected. `score` is - the ICP fitness used as the loop's translation variance — lower is a - tighter match. - """ - - source: Transform - target: Transform - score: float - - -def pgo_keyframes( - stream: Stream[PointCloud2], - *, - on_frame: Callable[[Any], None] | None = None, - loop_closures_out: list[LoopClosure] | None = None, - **pgo_cfg: Unpack[PGOKwargs], -) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe. - - If `loop_closures_out` is provided, accepted loop edges are appended to - it (in detection order) using each keyframe's final optimized pose. - """ - cfg = PGOConfig(**pgo_cfg) - pgo = _PGO(cfg) - - for obs in stream: - if on_frame is not None: - on_frame(obs) - pose = obs.pose_tuple - if pose is None: - continue - # Skip placeholder poses (origin position OR zero/identity quaternion). - if pose[0] == 0 and pose[1] == 0 and pose[2] == 0: - continue - if pose[3] == 0 and pose[4] == 0 and pose[5] == 0 and (pose[6] == 0 or pose[6] == 1): - continue - local_pose = _obs_to_pose3(obs) - pgo.process(local_pose, obs.ts, obs.data) - - mem = MemoryStore() - out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) - for kf in pgo.finalize(): - out.append(kf, ts=kf.ts) - if loop_closures_out is not None: - loop_closures_out.extend(pgo.loop_closures()) - return out - - -def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: - """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" - mem = MemoryStore() - out: Stream[Transform] = mem.stream("corrections", Transform) - for obs in keyframes: - kf = obs.data - # world_corrected <- body <- world_raw composes to world_corrected <- world_raw. - drift = kf.optimized + kf.local.inverse() - out.append(drift, ts=kf.ts) - return out - - -def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: - """Materialize corrections once; return a fast ts -> Transform lookup.""" - ts_list: list[float] = [] - R_list: list[np.ndarray] = [] - t_list: list[np.ndarray] = [] - for obs in corrections: - R, t = _r_t_from_transform(obs.data) - # obs.ts is authoritative; obs.data.ts can be mutated by Transform's - # ts=0.0 -> time.time() fallback in its constructor. - ts_list.append(obs.ts) - R_list.append(R) - t_list.append(t) - - if not ts_list: - raise ValueError("empty corrections stream") - - # Pad len==1 with a duplicate so the general path handles it; - # clip-to-endpoints behavior is unchanged. - if len(ts_list) == 1: - ts_list.append(ts_list[0] + 1e-6) - R_list.append(R_list[0]) - t_list.append(t_list[0]) - - ts_arr = np.array(ts_list) - R_stack = np.stack(R_list) - t_stack = np.stack(t_list) - - def interp(ts: float) -> Transform: - ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) - idx = int(np.searchsorted(ts_arr, ts_clip)) - if idx == 0: - t = t_stack[0] - R = R_stack[0] - elif idx >= len(ts_arr): - t = t_stack[-1] - R = R_stack[-1] - else: - t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] - # Nearest-neighbor in time: avoids blending two adjacent - # corrections that may straddle a loop-closure update. - near_lo = (ts_clip - t_lo) < (t_hi - ts_clip) - t = t_stack[idx - 1] if near_lo else t_stack[idx] - R = R_stack[idx - 1] if near_lo else R_stack[idx] - return _transform_from_r_t(R, t, ts=float(ts)) - - return interp - - -def apply_corrections( - stream: Stream[T], - corrections: Stream[Transform], -) -> Stream[T]: - """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. - - `obs.data` is untouched. Frames with `obs.pose is None` pass through - unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). - """ - interp = make_interpolator(corrections) - - def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: - for obs in upstream: - if obs.pose is None: - yield obs - continue - raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped) - # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. - # Observation normalizes Transform back to 7-tuple via __post_init__. - corrected = interp(obs.ts) + raw_tf - yield obs.derive(data=obs.data, pose=corrected) - - return stream.transform(xf) - - -def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: - q = tf.rotation - R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() - t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - return R, t - - -def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform: - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(R), - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_WORLD_RAW, - ts=ts, - ) - - -def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: - """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" - import gtsam # type: ignore[import-not-found,import-untyped] - - if obs.pose_tuple is None: - raise LookupError("No pose set on this observation") - x, y, z, qx, qy, qz, qw = obs.pose_tuple - return gtsam.Pose3( - gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), - gtsam.Point3(float(x), float(y), float(z)), - ) - - -@dataclass -class _KeyPose: - local: gtsam.Pose3 # odom-frame pose at capture - optimized: gtsam.Pose3 # drift-corrected pose - timestamp: float - body_cloud: PointCloud2 # voxel-downsampled, body frame - - -@dataclass -class _LoopPair: - source: int - target: int - offset: gtsam.Pose3 # source pose in target's frame - score: float - relocalizing: bool = False # True if this loop was found via local-frame fallback - - -class _PGO: - """Incremental PGO: gtsam ISAM2 over keyframes with ICP loop closures. - - Call `process` per frame (odom pose + body-frame points). Call - `finalize` once at the end for the sorted, deduped keyframe list. - """ - - def __init__(self, config: PGOConfig) -> None: - import gtsam # type: ignore[import-not-found,import-untyped] - - self._gtsam = gtsam - self._cfg = config - self._key_poses: list[_KeyPose] = [] - self._pending_loops: list[_LoopPair] = [] - self._accepted_loops: list[_LoopPair] = [] - self._last_loop_ts: float | None = None - self._world_correction: gtsam.Pose3 = gtsam.Pose3() # identity - - params = gtsam.ISAM2Params() - params.setRelinearizeThreshold(0.01) - params.relinearizeSkip = 1 - params.setOptimizationParams(gtsam.ISAM2DoglegParams()) - self._isam2 = gtsam.ISAM2(params) - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - def process( - self, - local_pose: gtsam.Pose3, - ts: float, - world_cloud: PointCloud2, - ) -> None: - if len(world_cloud) == 0: - return - if not self._is_keyframe(local_pose): - return - # Unregister: lift world-frame scan back into body frame using its - # odom pose, so PGO can re-project it via the optimized pose later. - body_cloud = world_cloud.transform( - _pose3_to_transform(local_pose.inverse(), ts=ts) - ).voxel_downsample(self._cfg.submap_resolution) - self._add_keyframe(local_pose, ts, body_cloud) - self._search_for_loops() - self._smooth_and_update() - - def finalize(self) -> list[Keyframe]: - """Return keyframes sorted by ts, with duplicate-ts entries dropped.""" - # Second-pass loop rescan over the converged optimized poses: - # keyframes that ended up in heavily drifted regions may have - # missed loops during incremental processing (their optimized - # pose at the moment was misleading). Re-search with the final - # poses gives us another chance to anchor those segments. - self._last_loop_ts = None - for i in range(len(self._key_poses)): - self._search_for_loops( - cur_idx=i, - enforce_time_gate=False, - no_fallback=True, - submap_half_range=38, - time_thresh_override=13.0, - ) - if self._pending_loops: - self._smooth_and_update() - kps = sorted(self._key_poses, key=lambda kp: kp.timestamp) - out: list[Keyframe] = [] - for i, kp in enumerate(kps): - if i > 0 and kp.timestamp <= kps[i - 1].timestamp: - continue - out.append( - Keyframe( - ts=kp.timestamp, - local=_pose3_to_transform( - kp.local, - ts=kp.timestamp, - frame_id=FRAME_WORLD_RAW, - child_frame_id=FRAME_BODY, - ), - optimized=_pose3_to_transform( - kp.optimized, - ts=kp.timestamp, - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_BODY, - ), - ) - ) - return out - - def loop_closures(self) -> list[LoopClosure]: - """Accepted loop edges, resolved to final optimized poses.""" - out: list[LoopClosure] = [] - for pair in self._accepted_loops: - src = self._key_poses[pair.source] - tgt = self._key_poses[pair.target] - out.append( - LoopClosure( - source=_pose3_to_transform( - src.optimized, - ts=src.timestamp, - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_BODY, - ), - target=_pose3_to_transform( - tgt.optimized, - ts=tgt.timestamp, - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_BODY, - ), - score=pair.score, - ) - ) - return out - - def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: - if not self._key_poses: - return True - delta = self._key_poses[-1].local.inverse().compose(local_pose) - delta_trans = float(np.linalg.norm(np.asarray(delta.translation()))) - # Rotation magnitude from trace: cos(theta) = (tr(R) - 1) / 2. - R = delta.rotation().matrix() - cos_theta = float(np.clip((np.trace(R) - 1.0) / 2.0, -1.0, 1.0)) - delta_deg = float(np.degrees(np.arccos(cos_theta))) - return ( - delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg - ) - - def _add_keyframe( - self, - local_pose: gtsam.Pose3, - ts: float, - body_cloud: PointCloud2, - ) -> None: - gtsam = self._gtsam - idx = len(self._key_poses) - optimized = self._world_correction.compose(local_pose) - - self._values.insert(idx, optimized) - - if idx == 0: - noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) - self._graph.add(gtsam.PriorFactorPose3(idx, optimized, noise)) - else: - last_local = self._key_poses[-1].local - between = last_local.inverse().compose(local_pose) - # When the prior keyframe is already in a drifted region, - # loosen the chain variance for this step so loop closures - # can pull harder across the disturbance. - prev = self._key_poses[-1] - prev_drift = float( - np.linalg.norm( - np.asarray(prev.optimized.translation()) - np.asarray(prev.local.translation()) - ) - ) - trans_var = 8e-4 if prev_drift > 1.3 else 1e-4 - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([1e-6, 1e-6, 1e-6, trans_var, trans_var, 1e-6]) - ) - self._graph.add(gtsam.BetweenFactorPose3(idx - 1, idx, between, noise)) - - self._key_poses.append( - _KeyPose( - local=local_pose, - optimized=optimized, - timestamp=ts, - body_cloud=body_cloud, - ) - ) - - def _get_submap(self, idx: int, half_range: int) -> PointCloud2: - lo = max(0, idx - half_range) - hi = min(len(self._key_poses) - 1, idx + half_range) - if lo > hi: - return PointCloud2() - cloud = self._key_poses[lo].body_cloud.transform( - _pose3_to_transform(self._key_poses[lo].optimized, ts=self._key_poses[lo].timestamp) - ) - for i in range(lo + 1, hi + 1): - kp = self._key_poses[i] - cloud = cloud + kp.body_cloud.transform( - _pose3_to_transform(kp.optimized, ts=kp.timestamp) - ) - return cloud.voxel_downsample(self._cfg.submap_resolution) - - def _search_for_loops( - self, - cur_idx: int | None = None, - *, - enforce_time_gate: bool = True, - opt_radius: float | None = None, - force_fallback: bool = False, - no_fallback: bool = False, - submap_half_range: int | None = None, - icp_max_dist: float | None = None, - source_half_range: int = 0, - candidates_per_iter: int | None = None, - force_multi: bool = False, - time_thresh_override: float | None = None, - ) -> None: - if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: - return - if cur_idx is None: - cur_idx = len(self._key_poses) - 1 - - cur_kp = self._key_poses[cur_idx] - cur_ts = cur_kp.timestamp - if enforce_time_gate and ( - self._last_loop_ts is not None - and cur_ts - self._last_loop_ts < self._cfg.min_loop_detect_duration - ): - return - - cur_opt_t = np.asarray(cur_kp.optimized.translation()) - cur_loc_t = np.asarray(cur_kp.local.translation()) - - from scipy.spatial import KDTree - - # Search both optimized and local (odom) frames; exclude cur_idx - # from the candidate pool (and time-adjacent neighbors via the - # time gate filter inside _collect). - other_kps = [kp for j, kp in enumerate(self._key_poses) if j != cur_idx] - other_idx_map = [j for j in range(len(self._key_poses)) if j != cur_idx] - opt_positions = np.array([np.asarray(kp.optimized.translation()) for kp in other_kps]) - loc_positions = np.array([np.asarray(kp.local.translation()) for kp in other_kps]) - - def _collect(local_idxs: set[int], time_thresh: float) -> list[tuple[float, int]]: - # local_idxs index into other_kps; map back to global keyframe index. - cands = [] - for li in local_idxs: - gi = other_idx_map[li] - if abs(cur_ts - self._key_poses[gi].timestamp) <= time_thresh: - continue - d_opt = float( - np.linalg.norm( - np.asarray(self._key_poses[gi].optimized.translation()) - cur_opt_t - ) - ) - d_loc = float( - np.linalg.norm(np.asarray(self._key_poses[gi].local.translation()) - cur_loc_t) - ) - cands.append((min(d_opt, d_loc), gi)) - cands.sort() - return cands - - # Tight optimized-frame search first; if none survive the - # time-gap filter AND accumulated drift is large enough that we - # likely need a relocalization, fall back to wider local-frame - # search with a stricter time gap. - opt_radius_val = opt_radius if opt_radius is not None else self._cfg.loop_search_radius - opt_idxs = set(KDTree(opt_positions).query_ball_point(cur_opt_t, opt_radius_val)) - time_thresh = ( - time_thresh_override if time_thresh_override is not None else self._cfg.loop_time_thresh - ) - candidates = _collect(opt_idxs, time_thresh) - drift = float(np.linalg.norm(cur_opt_t - cur_loc_t)) - relocalizing = False - if not no_fallback and ( - force_fallback or (not candidates and drift > self._cfg.loop_fallback_drift_thresh) - ): - loc_idxs = set( - KDTree(loc_positions).query_ball_point( - cur_loc_t, self._cfg.loop_search_radius_local - ) - ) - candidates = _collect(loc_idxs - opt_idxs, self._cfg.loop_time_thresh_local) - relocalizing = True - logger.info( - "loop fallback fired", - cur_idx=cur_idx, - drift=round(drift, 2), - n_candidates=len(candidates), - ) - if not candidates: - return - - # Try top-K candidates; keep all that pass the threshold (for - # relocalization fallback, multiple loops to different points in - # the past give the optimizer more constraint to anchor the - # disturbed segment). For the regular non-fallback path keep - # only the best one — the trajectory there is well-constrained - # already and extra loops add noise. Early-exit on a very tight - # match to save redundant ICP runs in the common case. - source = self._get_submap(cur_idx, source_half_range) - accepted: list[tuple[int, Transform, float]] = [] - k = ( - candidates_per_iter - if candidates_per_iter is not None - else self._cfg.loop_candidates_per_iter - ) - for _, loop_idx in candidates[:k]: - target_hr = ( - submap_half_range - if submap_half_range is not None - else self._cfg.loop_submap_half_range - ) - target = self._get_submap(loop_idx, target_hr) - max_dist = ( - icp_max_dist if icp_max_dist is not None else self._cfg.max_icp_correspondence_dist - ) - icp_tf, fitness = _icp( - source, - target, - max_iter=self._cfg.max_icp_iterations, - max_dist=max_dist, - min_inliers=self._cfg.min_icp_inliers, - ) - if fitness <= self._cfg.loop_score_thresh: - accepted.append((loop_idx, icp_tf, fitness)) - if ( - not relocalizing - and not force_multi - and fitness <= self._cfg.loop_score_thresh_tight - ): - break - if not accepted: - return - accepted.sort(key=lambda x: x[2]) - keep = accepted if (relocalizing or force_multi) else [accepted[0]] - - for loop_idx, icp_tf, fitness in keep: - # icp_tf takes cur_kp.optimized -> refined pose (correcting the drift). - # offset = loop_kp.optimized^-1 * refined = relative pose from loop to cur. - icp_pose = _transform_to_pose3(icp_tf) - refined = icp_pose.compose(cur_kp.optimized) - offset = self._key_poses[loop_idx].optimized.between(refined) - - self._pending_loops.append( - _LoopPair( - source=cur_idx, - target=loop_idx, - offset=offset, - score=fitness, - relocalizing=relocalizing, - ) - ) - logger.info( - "Loop closure detected", - source=cur_idx, - target=loop_idx, - score=round(fitness, 4), - reloc=relocalizing, - ) - self._last_loop_ts = cur_ts - - def _smooth_and_update(self) -> None: - gtsam = self._gtsam - has_loop = bool(self._pending_loops) - - for pair in self._pending_loops: - # Pose3 noise is [rx, ry, rz, x, y, z]. The two halves have - # different units (rad² vs m²), so a uniform variance silently - # makes one half pathological. Use ICP fitness as the - # *translation* variance and a generous fixed rotation variance - # — loops shouldn't be trusted to fix rotation tightly without - # normals + p2plane. - trans_var = max(0.005, float(pair.score)) # >= sigma_trans ~7 cm - rot_var = 0.003 # sigma_rot ~ 3.1 deg (large submap p2plane ICP, tight rotation) - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) - ) - self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise)) - self._accepted_loops.extend(self._pending_loops) - self._pending_loops.clear() - - self._isam2.update(self._graph, self._values) - self._isam2.update() - if has_loop: - for _ in range(self._cfg.loop_closure_extra_iterations): - self._isam2.update() - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - estimates = self._isam2.calculateBestEstimate() - for i in range(len(self._key_poses)): - self._key_poses[i].optimized = estimates.atPose3(i) - - last = self._key_poses[-1] - self._world_correction = last.optimized.compose(last.local.inverse()) - - -def _pose3_to_transform( - pose: gtsam.Pose3, - *, - ts: float, - frame_id: str = "", - child_frame_id: str = "", -) -> Transform: - """PGO-internal: build a Transform from a Pose3.""" - t = np.asarray(pose.translation()) - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(pose.rotation().matrix()), - frame_id=frame_id, - child_frame_id=child_frame_id, - ts=ts, - ) - - -def _transform_to_pose3(tf: Transform) -> gtsam.Pose3: - """PGO-internal: build a Pose3 from a Transform.""" - import gtsam # type: ignore[import-not-found,import-untyped] - - return gtsam.Pose3(tf.to_matrix()) - - -def _icp( - source: PointCloud2, - target: PointCloud2, - max_iter: int = 50, - max_dist: float = 1.0, - tol: float = 1e-6, - min_inliers: int = 10, - init: Transform | None = None, -) -> tuple[Transform, float]: - """Point-to-plane ICP using Open3D's tensor pipeline. - - Returns ``(tf, fitness)`` where ``fitness`` is mean squared inlier - distance (m^2) — the GTSAM noise model in `_smooth_and_update` uses - this directly as sigma_trans squared. On rejection (too few points or - zero correspondences) returns the identity transform and inf fitness. - """ - - if len(source) < min_inliers or len(target) < min_inliers: - return Transform.identity(), float("inf") - - src_pcd = source.pointcloud_tensor - tgt_pcd = target.pointcloud_tensor - - # Normals on the target enable point-to-plane ICP — converges tighter - # than point-to-point on indoor scenes (walls give unambiguous normals - # that resolve the slide-along-wall ambiguity). - tgt_pcd.estimate_normals(max_nn=30, radius=0.3) - - device = src_pcd.device - init_T = o3c.Tensor( - init.to_matrix() if init is not None else np.eye(4), - dtype=o3c.float64, - device=device, - ) - - # Silence Open3D's "0 correspondence" warning — we deliberately use a - # tight max_correspondence_distance and reject loops with poor fitness. - # Treat singular-system failures as rejection rather than aborting. - try: - with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): - result = o3d.t.pipelines.registration.icp( - source=src_pcd, - target=tgt_pcd, - max_correspondence_distance=max_dist, - init_source_to_target=init_T, - estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), - criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( - relative_fitness=tol, - relative_rmse=tol, - max_iteration=max_iter, - ), - ) - except RuntimeError: - return Transform.identity(), float("inf") - - if float(result.fitness) == 0.0: - return Transform.identity(), float("inf") - - T_mat = result.transformation.numpy() - rmse = float(result.inlier_rmse) - return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse diff --git a/dimos/mapping/loop_closure/autoresearch/program.md b/dimos/mapping/loop_closure/autoresearch/program.md deleted file mode 100644 index 61803a405c..0000000000 --- a/dimos/mapping/loop_closure/autoresearch/program.md +++ /dev/null @@ -1,221 +0,0 @@ -# loop-closure autoresearch - -Adapted from [karpathy/autoresearch](https://github.com/karpathy/autoresearch). -The agent iteratively tunes ICP / pose-graph code to minimize marker -position drift across recorded sessions. - -| nanochat | here | -|---|---| -| `train.py` (edit) | `pgo.py` | -| `prepare.py` (read-only) | `eval.py` | -| `val_bpb` (lower better) | `TOTAL_SPREAD` (lower better) | -| 5-min wall budget | data-bounded, ~2 min sequential | - -## Setup - -1. **Agree on a run tag**: propose one based on today's date (e.g. - `mar5`). The branch `autoresearch/loopclose-` must not already - exist — this is a fresh run. -2. **Create the branch**: `git checkout -b autoresearch/loopclose-` - from current `main`. -3. **Read the in-scope files**: - - `dimos/mapping/loop_closure/pgo.py` — PGO + ICP + loop detection. - This is the file you edit. - - `dimos/mapping/loop_closure/eval.py` — the eval harness. Read it to - understand the metric; do not modify. - - `dimos/perception/fiducial/marker_transformer.py` — fiducial - detector. Held fixed for fair comparison; do not modify. -4. **Verify LFS data**: the first eval run will pull - `hk_village1..6.db` from LFS automatically via `get_data`. If LFS - isn't configured, tell the human to run `git lfs install && git lfs pull`. -5. **Initialize `results.tsv`**: header row only. The baseline goes in - after the first eval. -6. **Confirm and go**: confirm setup looks good. - -Once you get confirmation, kick off experimentation. - -## Experimentation - -**What you CAN do:** -- Modify `dimos/mapping/loop_closure/pgo.py` — anything in this file is - fair game: `PGOConfig` defaults, loop-detection logic, ICP setup, - GTSAM noise models, keyframe selection rules, the inner classes, - whatever. - -**What you CANNOT do:** -- Modify `eval.py` or any of its dependencies (`marker_transformer.py`, - detection types, the recordings). The eval harness is the ground - truth metric. -- Change the **public surface** of `pgo.py`. The signatures of - `pgo_keyframes`, `keyframes_to_corrections`, `make_interpolator`, and - `apply_corrections` must stay the same so `eval.py` (and downstream - consumers) keep working. You can change their internals freely. -- Add new dependencies or modify other files in the repo. - -**The goal: lowest `TOTAL_SPREAD` (meters).** Secondary: lower -`WALL_TIME` (seconds) for ties. The metric is per-recording sum of -pairwise distances between PGO-corrected marker positions for the same -`marker_id`, summed across all six `hk_village*` recordings. Smaller = -tighter loop closures = the same physical marker is placed in the same -world spot every time the tracker re-acquires it. - -**Simplicity criterion**: All else being equal, simpler wins. A small -spread improvement that adds 30 lines of GTSAM hackery is probably not -worth it. Deleting code and getting equal or better results is a great -outcome. - -**The first run**: establish the baseline with `pgo.py` unmodified. - -## Investigation is part of the loop - -Twiddling knobs blindly is the slow way. Between experiments, **look at -the actual data** — this is encouraged, not a detour from "real" work. -A single discard doesn't mean a direction is dead; it means *this -specific implementation* didn't help. Before abandoning an idea, -investigate *why* the metric moved (or didn't): - -- **Run per-recording with `--verbose`** to see live loop-closure events - with scores and source/target keyframe indices: - `uv run python -m dimos.mapping.loop_closure.eval hk_village2 -v`. - A param change that, say, *adds* loop closures but worsens spread - tells a very different story than one that *removes* them. -- **Use `dimos/mapping/utils/cli/map.py`** as the visualization companion. It - runs the same PGO + marker pipeline as the eval and renders the - result in rerun. Use `--no-gui` for headless inspection or run with - GUI locally to scrub the timeline and see where markers cluster, where - the path drifts, when loop closures fire: - `uv run dimos map hk_village2 --pgo --markers --no-gui` -- **Write small throwaway scripts** in `/tmp` to interrogate state that - the eval doesn't surface — per-marker spread breakdown, loop closure - score distribution, keyframe density over time, etc. The eval is - intentionally one number for tuning; the script lets you SEE. -- **Read `pgo.py`** end-to-end before changing a knob whose effect you - can't predict. The docstrings and inline comments explain the - invariants. Same for `marker_transformer.py` if you're not sure what - `track_id` / `marker_id` mean. -- **Form a theory, then test it.** "If I raise `min_icp_inliers`, I - expect fewer accepted closures but tighter mean_score and possibly - worse spread because the graph has fewer constraints." Then run, and - *check whether the observed change matches your prediction.* When it - doesn't, that's a learning, not a failure. - -Investigation runs don't count toward the experiment log unless you -also commit a `pgo.py` change. They cost a few minutes; the -understanding compounds across the rest of the night. - -## Running the eval - -```bash -uv run python -m dimos.mapping.loop_closure.eval > run.log 2>&1 -grep "^TOTAL_\|^WALL_" run.log -``` - -Output trailer looks like: - -``` -TOTAL_PGO_TIME=37.86 -TOTAL_SPREAD=48.811 ← primary metric, lower is better -TOTAL_LOOPS=42 -TOTAL_LOOP_SCORE_MEAN=0.0584 -TOTAL_KEYFRAMES=720 -WALL_TIME=115.82 ← secondary metric, lower is better -``` - -## Logging results - -Log every experiment to `results.tsv` (tab-separated). Do **not** -commit this file — it's per-branch scratch state, in `.gitignore`-style -spirit. The header row plus seven columns: - -``` -commit spread_m wall_s n_loops mean_score n_keyframes status description -``` - -1. git commit hash (short, 7 chars) -2. `TOTAL_SPREAD` — use `0.000000` for crashes -3. `WALL_TIME` — use `0.0` for crashes -4. `TOTAL_LOOPS` -5. `TOTAL_LOOP_SCORE_MEAN` (`.4f`) -6. `TOTAL_KEYFRAMES` -7. status: `keep`, `discard`, or `crash` -8. one-line description of what this experiment tried - -Example: - -``` -commit spread_m wall_s n_loops mean_score n_keyframes status description -a1b2c3d 48.811 115.82 42 0.0584 720 keep baseline -b2c3d4e 43.220 118.31 55 0.0501 720 keep lower loop_score_thresh 0.1 -> 0.05 -c3d4e5f 47.118 119.04 30 0.0623 720 discard raise min_icp_inliers 50 -> 200 -d4e5f6g 0.000000 0.0 0 0.0000 0 crash drop GTSAM, use simple averaging -``` - -## The experiment loop - -The experiment runs on a dedicated branch (e.g. -`autoresearch/loopclose-mar5` or -`autoresearch/loopclose-mar5-gpu0`). - -LOOP FOREVER: - -1. Look at the git state: the current branch and commit. -2. Tune `pgo.py` with an experimental idea by editing the code - directly. -3. `git commit -am ""` -4. Run the eval: - `uv run python -m dimos.mapping.loop_closure.eval > run.log 2>&1` - (redirect everything — do NOT `tee` and don't dump the file into - your context). -5. Read the result: - `grep "^TOTAL_\|^WALL_" run.log` -6. If the grep output is empty, the run crashed. Run - `tail -n 80 run.log` to see the Python traceback and either fix it - (typo, missing import) or revert (idea fundamentally broken). -7. Record the result in `results.tsv` (keep it untracked). -8. Decision: - - `TOTAL_SPREAD` strictly **lower** than the current branch tip → - `keep` (advance the branch). - - `TOTAL_SPREAD` within ±0.5% of current AND `WALL_TIME` lower → - `keep` (compute simplification). - - Otherwise → `discard` (`git reset --hard HEAD~1`). - -You're a fully autonomous researcher. If something works, keep it. If -it doesn't, revert. The branch advances over time, and you iterate. - -**Timeout**: each eval should take ~2 minutes wall. If a run exceeds -10 minutes, kill it and treat it as a failure. - -**Crashes**: If a run crashes (GTSAM error, Open3D ICP failure, -import error, etc.), use judgment. Typos and missing imports → fix and -re-run. Fundamentally broken idea → log `crash` and move on. - -**NEVER STOP**: Once the loop has begun, do NOT pause to ask the human -if you should continue. Don't ask "should I keep going?" or "is this a -good stopping point?". The human might be asleep or away. You are -autonomous; the loop runs until you are manually stopped. - -**Don't give up on a direction after one bad result.** A single -`discard` is one data point about *one specific implementation*, not a -verdict on the underlying idea. If you tried "stricter ICP gating" and -it worsened spread, that doesn't mean ICP gating is the wrong knob — -maybe your value was wrong, maybe it interacted with another setting, -maybe the effect was on a recording you didn't expect. **Investigate -before you abandon** (see the Investigation section above): inspect the -per-recording numbers, run `dimos map ... --markers --no-gui` to see -where the spread actually comes from, write a 20-line script to print -loop-closure scores per recording. Often the next experiment in a -"failed" direction is the one that pays off, once you understand the -mechanism. - -**If you run out of ideas**: re-read `pgo.py` for angles you haven't -touched, look at the papers/code links in its docstrings, try -combining previous near-misses, try more radical changes (different -loop-detection radius scaling, different ICP estimator, swapping GTSAM -optimizer, etc.). Look at the recordings themselves — `dimos map -hk_villageN --pgo --markers` gives you ground truth for what the data -looks like. - -A typical use case: human leaves you running while they sleep. At -~2 min/eval you can run ~30/hour, or roughly 240 experiments over an -8-hour night. They wake up to a `results.tsv` full of attempts and a -branch advanced to the best. diff --git a/dimos/mapping/loop_closure/pgo_auto.py b/dimos/mapping/loop_closure/pgo_auto.py index d4ac46ef65..37dbb7d36b 100644 --- a/dimos/mapping/loop_closure/pgo_auto.py +++ b/dimos/mapping/loop_closure/pgo_auto.py @@ -54,7 +54,7 @@ from __future__ import annotations -from collections.abc import Callable, Iterator +from collections.abc import Callable, Iterable, Iterator from dataclasses import dataclass from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack @@ -163,7 +163,7 @@ class LoopClosure: def pgo_keyframes( - stream: Stream[PointCloud2], + stream: Iterable[Observation[PointCloud2]], *, on_frame: Callable[[Any], None] | None = None, loop_closures_out: list[LoopClosure] | None = None, diff --git a/dimos/mapping/utils/cli/pose_fill.py b/dimos/mapping/utils/cli/pose_fill.py index 012a1fba3d..1a775bcd74 100644 --- a/dimos/mapping/utils/cli/pose_fill.py +++ b/dimos/mapping/utils/cli/pose_fill.py @@ -129,8 +129,8 @@ def pose_fill_db( # the source blob still resolves); plain metadata for everything else. if name == target: rows: Iterable[Observation[Any]] = pose_fill( - src.stream(name).from_time(seek or None).to_time(duration), - src.stream(pose_source).from_time(seek or None).to_time(duration), + src.stream(name).from_time(seek or None).to_time(duration).order_by("ts"), + src.stream(pose_source).from_time(seek or None).to_time(duration).order_by("ts"), tolerance=tolerance, ) else: diff --git a/dimos/mapping/utils/cli/test_pose_fill.py b/dimos/mapping/utils/cli/test_pose_fill.py new file mode 100644 index 0000000000..7b4b39e7d7 --- /dev/null +++ b/dimos/mapping/utils/cli/test_pose_fill.py @@ -0,0 +1,61 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the stream-level `pose_fill` (no LFS data, runs in normal CI). + +The end-to-end `dimos map pose-fill` path against a real recording lives in +`test_cli.py` (self-hosted). These cover the pure stream transform directly. +""" + +from __future__ import annotations + +from dimos.mapping.utils.cli.pose_fill import pose_fill +from dimos.memory2.store.memory import MemoryStore +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 + + +def test_pose_fill_attaches_nearest_pose() -> None: + """Each target obs gets the nearest pose-source pose; payload is preserved.""" + with MemoryStore() as store: + target = store.stream("image", str) + poses = store.stream("odom", Transform) + target.append("img0", ts=0.0) + poses.append(Transform(translation=Vector3(1.0, 0.0, 0.0)), ts=0.001) + + out = pose_fill(target, poses, tolerance=0.05).to_list() + + assert len(out) == 1 + assert out[0].data == "img0" + assert out[0].pose_tuple is not None + assert out[0].pose_tuple[:3] == (1.0, 0.0, 0.0) + + +def test_pose_fill_mount_composes_static_child_transform() -> None: + """`mount` composes ``world_base + mount`` onto each attached pose.""" + with MemoryStore() as store: + target = store.stream("image", str) + poses = store.stream("odom", Transform) + target.append("img0", ts=0.0) + # Base pose 1m forward in x; mount offsets 1m in y (identity rotations). + poses.append(Transform(translation=Vector3(1.0, 0.0, 0.0)), ts=0.001) + mount = Transform(translation=Vector3(0.0, 1.0, 0.0), rotation=Quaternion()) + + out = pose_fill(target, poses, tolerance=0.05, mount=mount).to_list() + + assert len(out) == 1 + assert out[0].pose_tuple is not None + # Identity rotations → composed translation is the component-wise sum. + assert out[0].pose_tuple[:3] == (1.0, 1.0, 0.0) diff --git a/dimos/memory2/test_stream.py b/dimos/memory2/test_stream.py index 63c21b1b86..7ce8e496ac 100644 --- a/dimos/memory2/test_stream.py +++ b/dimos/memory2/test_stream.py @@ -962,6 +962,42 @@ def test_dense_secondary_pairs_correctly(self, memory_session): assert max(abs(p.data[0].ts - p.data[1].ts) for p in pairs) < 1e-9 +class TestWithPose: + """``Observation.with_pose`` attaches/clears a pose without touching the payload.""" + + def test_attaches_pose_and_keeps_payload_lazy(self): + """Pose is set, but the payload loader stays untouched until accessed.""" + loads = 0 + + def loader() -> str: + nonlocal loads + loads += 1 + return "payload" + + obs: Observation[str] = Observation(id=1, ts=5.0, data_type=str, _loader=loader) + posed = obs.with_pose((1.0, 2.0, 3.0)) + + # 3-tuple padded with the identity quaternion; payload not materialized. + assert posed.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) + assert posed.ts == 5.0 + assert loads == 0 + # The payload still resolves lazily on first access. + assert posed.data == "payload" + assert loads == 1 + + def test_pose_none_clears(self): + obs: Observation[str] = Observation(id=1, ts=0.0, data_type=str, pose=(1, 2, 3), _data="x") + cleared = obs.with_pose(None) + assert cleared.pose_tuple is None + assert cleared.data == "x" + + def test_overwrites_existing_pose(self): + obs: Observation[str] = Observation(id=1, ts=0.0, data_type=str, pose=(1, 2, 3), _data="x") + moved = obs.with_pose((7, 8, 9)) + assert moved.pose_tuple is not None + assert moved.pose_tuple[:3] == (7.0, 8.0, 9.0) + + class TestTimeWindowing: """``*_time`` is relative to the first observation, ``*_timestamp`` is absolute. From c70598bfa24dd5fe4c66ec3113f08792d8e654cc Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 1 Jun 2026 18:13:38 +0800 Subject: [PATCH 63/64] data: add go2_mid360_stairs recording from go2stairs branch Pulls the stairs dataset LFS pointer from feat/ivan/go2stairs so the map tooling can use it; object already on remote LFS store. --- data/.lfs/go2_mid360_stairs.db.tar.gz | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 data/.lfs/go2_mid360_stairs.db.tar.gz diff --git a/data/.lfs/go2_mid360_stairs.db.tar.gz b/data/.lfs/go2_mid360_stairs.db.tar.gz new file mode 100644 index 0000000000..615aad3a17 --- /dev/null +++ b/data/.lfs/go2_mid360_stairs.db.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d8d2194332291cf71988458965aa9f8019b8d61661ff153e93886dd72e85e9 +size 54082131 From a1782e53997efd6a4cb939b2fa4f618786345f33 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 1 Jun 2026 20:51:26 +0800 Subject: [PATCH 64/64] test_cli: skip JPEG-decoding map verbs when libturbojpeg is unavailable The self-hosted CI image's published :dev tag is stale (predates libturbojpeg0-dev in docker/python/Dockerfile), so TurboJPEG() raises at runtime and the four verbs that decode the color_image stream fail. Guard them with skipif so they skip on a lib-less image and still run where the native lib is present. --- dimos/mapping/utils/cli/test_cli.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/dimos/mapping/utils/cli/test_cli.py b/dimos/mapping/utils/cli/test_cli.py index 3c12657c97..cb9ece1f3c 100644 --- a/dimos/mapping/utils/cli/test_cli.py +++ b/dimos/mapping/utils/cli/test_cli.py @@ -47,6 +47,25 @@ _runner = CliRunner() +def _turbojpeg_available() -> bool: + """True if the native libturbojpeg can be loaded (decodes the color_image stream).""" + try: + from turbojpeg import TurboJPEG + + TurboJPEG() + except Exception: + return False + return True + + +# The color_image stream is JPEG; decoding it needs the native libturbojpeg, which +# is missing from stale CI images. Skip the verbs that touch images when it's absent. +requires_turbojpeg = pytest.mark.skipif( + not _turbojpeg_available(), + reason="native libturbojpeg unavailable; cannot decode the JPEG color_image stream", +) + + def _run(*args: str, timeout: float = 300.0) -> SimpleNamespace: """Invoke `dimos map ` in-process and capture its result. @@ -90,6 +109,7 @@ def test_summary(dataset: str) -> None: assert "odom" in res.stdout +@requires_turbojpeg def test_rename_snippet(dataset: str, tmp_path: Path) -> None: out = tmp_path / "renamed.db" res = _run( @@ -142,6 +162,7 @@ def test_pose_fill_snippet(dataset: str, tmp_path: Path) -> None: store.stop() +@requires_turbojpeg def test_replay_snippet(dataset: str, tmp_path: Path) -> None: out = tmp_path / "replay.rrd" res = _run("replay", dataset, "--out", str(out), "--no-gui", "--duration", str(DURATION)) @@ -149,6 +170,7 @@ def test_replay_snippet(dataset: str, tmp_path: Path) -> None: assert out.exists() and out.stat().st_size > 0 +@requires_turbojpeg def test_replay_marker_snippet(dataset: str, tmp_path: Path) -> None: out = tmp_path / "markers.rrd" res = _run("replay-marker", dataset, "--out", str(out), "--no-gui", "--duration", str(DURATION)) @@ -156,6 +178,7 @@ def test_replay_marker_snippet(dataset: str, tmp_path: Path) -> None: assert out.exists() and out.stat().st_size > 0 +@requires_turbojpeg def test_replay_map_final_snippet(dataset: str, tmp_path: Path) -> None: out = tmp_path / "replay_map.rrd" # CPU device keeps the voxel accumulation runnable without a GPU.