From ceb08bacf278753497f7726b0e1957c3635dea78 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 17:26:36 +0800 Subject: [PATCH 01/36] map tool first pass --- dimos/mapping/relocalization/pgo.py | 32 ++++++- dimos/msgs/sensor_msgs/PointCloud2.py | 5 +- dimos/robot/cli/dimos.py | 20 ++++ dimos/utils/cli/map.py | 132 ++++++++++++++++++++++++++ 4 files changed, 183 insertions(+), 6 deletions(-) create mode 100644 dimos/utils/cli/map.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 1c2ee8aa6c..b04cb61ebc 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -18,6 +18,7 @@ from __future__ import annotations +from collections.abc import Callable from dataclasses import dataclass from typing import Any @@ -375,6 +376,9 @@ def pgo_then_voxels( voxel_size: float = 0.05, block_count: int = 2_000_000, device: str = "CUDA:0", + corrected_path_out: list[tuple[float, float, float]] | None = None, + pass1_on_frame: Callable[[Any], None] | None = None, + pass2_on_frame: Callable[[Any], None] | None = None, **pgo_cfg: Any, ) -> PointCloud2: """Two-pass PGO mapping (eliminates duplicate-wall artifacts). @@ -401,9 +405,16 @@ def pgo_then_voxels( n_frames = 0 for obs in stream: + if pass1_on_frame is not None: + pass1_on_frame(obs) if obs.pose is None: continue x, y, z, qx, qy, qz, qw = obs.pose + # Skip placeholder poses: zero translation, or invalid (zero-norm) quaternion. + if x == 0 and y == 0 and z == 0: + continue + if qx == 0 and qy == 0 and qz == 0 and qw == 0: + continue r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() t = np.array([x, y, z]) points, _ = obs.data.as_numpy() @@ -420,24 +431,35 @@ def pgo_then_voxels( n_kf = pgo.num_key_poses print(f" Pass 1: {n_frames} frames, {n_kf} keyframes") + if corrected_path_out is not None: + corrected_path_out.extend( + (float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2])) + for kp in pgo._key_poses + ) + grid = VoxelGrid(voxel_size=voxel_size, block_count=block_count, device=device) try: if n_kf < 2: for obs in stream: + if pass2_on_frame is not None: + pass2_on_frame(obs) grid.add_frame(obs.data) return grid.get_global_pointcloud2() - kf_ts = np.array([kp.timestamp for kp in pgo._key_poses]) + # Sort + dedupe by timestamp: Slerp requires strictly increasing ts. + kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) + kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + kf_ts = np.array([kp.timestamp for kp in kps]) # Per-keyframe rigid drift correction: T_corr = T_global @ T_local.inv() - R_corr_list = [kp.r_global @ kp.r_local.T for kp in pgo._key_poses] - t_corr_list = [ - kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in pgo._key_poses - ] + R_corr_list = [kp.r_global @ kp.r_local.T for kp in kps] + t_corr_list = [kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in kps] t_corrs = np.stack(t_corr_list) rot_slerp = Slerp(kf_ts, Rotation.from_matrix(np.stack(R_corr_list))) n_inserted = 0 for obs in stream: + if pass2_on_frame is not None: + pass2_on_frame(obs) if obs.pose is None: continue ts = float(np.clip(obs.ts, kf_ts[0], kf_ts[-1])) diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 20d6b09354..598cb0b094 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -729,7 +729,10 @@ def to_rerun( point_colors = colors else: z = points[:, 2] - class_ids = ((z - z.min()) / (z.max() - z.min() + 1e-8) * 255).astype(np.uint8) + + # Clip to robust percentiles so a few outliers don't squash the colormap range. + z_lo, z_hi = np.percentile(z, [1, 99]) + class_ids = (np.clip((z - z_lo) / (z_hi - z_lo + 1e-8), 0, 1) * 255).astype(np.uint8) if mode == "points": return rr.Points3D( diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 34b74b7441..b344d9be10 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -685,6 +685,26 @@ def send( topic_send(topic, message_expr) +@main.command(name="map") +def map_cmd( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"), + device: str = typer.Option( + "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" + ), + pgo: bool = typer.Option( + False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + ), + block_count: int = typer.Option( + 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" + ), +) -> None: + """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" + from dimos.utils.cli.map import main as map_main + + map_main(dataset=dataset, voxel=voxel, device=device, pgo=pgo, block_count=block_count) + + @main.command(name="export-premap") def export_premap_cmd( dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py new file mode 100644 index 0000000000..1f82617871 --- /dev/null +++ b/dimos/utils/cli/map.py @@ -0,0 +1,132 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +import rerun as rr +import rerun.blueprint as rrb +import typer + +from dimos.mapping.voxels import VoxelMapTransformer +from dimos.memory2.store.sqlite import SqliteStore +from dimos.utils.data import resolve_named_path +from dimos.visualization.rerun.init import rerun_init + + +def progress(total: int, label: str = ""): + seen = 0 + wall_start: float | None = None + last_wall: float | None = None + first_ts: float | None = None + + def _progress(obs): + nonlocal seen, wall_start, last_wall, first_ts + now = time.monotonic() + if wall_start is None: + wall_start = now + first_ts = obs.ts + frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 + last_wall = now + seen += 1 + pct = 100 * seen // total if total else 100 + wall = now - wall_start + data = obs.ts - first_ts + speed = data / wall if wall > 0 else 0.0 + end = "\n" if seen >= total else "" + prefix = f"{label} " if label else "" + print( + f"\r{prefix}{pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", + end=end, + flush=True, + ) + + return _progress + + +def main( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"), + device: str = typer.Option( + "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" + ), + pgo: bool = typer.Option( + False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + ), + block_count: int = typer.Option( + 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" + ), +) -> None: + db_path = resolve_named_path(dataset, ".db") + + store = SqliteStore(path=db_path) + lidar = store.streams.lidar + + print(lidar.summary()) + + path: list[tuple[float, float, float]] = [] + + def collect_path(obs): + if obs.pose is None: + return + # Reject placeholder poses at the world origin (translation = 0,0,0). + if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + return + path.append((obs.pose[0], obs.pose[1], obs.pose[2])) + + pgo_map = None + pgo_path: list[tuple[float, float, float]] = [] + if pgo: + from dimos.mapping.relocalization.pgo import pgo_then_voxels + + total = lidar.count() + print("running PGO twopass map...") + pgo_map = pgo_then_voxels( + lidar, + voxel_size=voxel, + block_count=block_count, + device=device, + corrected_path_out=pgo_path, + pass1_on_frame=progress(total, "pgo pass 1 (optimizing)"), + pass2_on_frame=progress(total, "pgo pass 2 (rebuilding)"), + ) + + global_map = ( + lidar.tap(collect_path) + .transform(VoxelMapTransformer(voxel_size=voxel, device=device)) + .tap(progress(lidar.count(), "reconstructing global map")) + .last() + .data + ) + + rerun_init("coloring", spawn=True) + rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) + rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) + if path: + rr.log( + "world/raw_map/path", + rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.08]), + static=True, + ) + if pgo_map is not None: + rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + if pgo_path: + rr.log( + "world/pgo_map/path", + rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.08]), + static=True, + ) + + +if __name__ == "__main__": + typer.run(main) From 4e0cec34c3e31989c1fb6e3e2995130bc01564e0 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 17:45:21 +0800 Subject: [PATCH 02/36] map tool wrap --- dimos/mapping/relocalization/pgo.py | 2 +- dimos/robot/cli/dimos.py | 57 +++++---------------- dimos/utils/cli/map.py | 55 ++++++++++++++------ docs/capabilities/mapping/relocalization.md | 30 +++++------ 4 files changed, 66 insertions(+), 78 deletions(-) diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index b04cb61ebc..3b95ba426f 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -13,7 +13,7 @@ # limitations under the License. # NOTE: This lives under mapping/relocalization/ for now because the only -# consumer is the premap export pipeline (`dimos export-premap`). It is +# consumer is the premap export pipeline (`dimos map --export`). It is # temporary and can be moved/split out later when PGO grows other consumers. from __future__ import annotations diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index b344d9be10..5607c8b1ca 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -698,55 +698,26 @@ def map_cmd( block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" ), + export: bool = typer.Option( + False, + "--export", + help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + ), + no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main - map_main(dataset=dataset, voxel=voxel, device=device, pgo=pgo, block_count=block_count) - - -@main.command(name="export-premap") -def export_premap_cmd( - dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), - output: Path | None = typer.Option(None, "-o", "--output", help="Output .pc2.lcm path"), - voxel_size: float = typer.Option(0.05, "--voxel-size", help="Voxel size for the rebuild"), - duration: float | None = typer.Option( - None, "--duration", help="Limit to first N seconds (default: full log)" - ), - device: str = typer.Option( - "CUDA:0", - "--device", - help="Open3D compute device (e.g. CUDA:0, CPU:0); fallback to CPU if unavailable", - ), - block_count: int = typer.Option( - 2_000_000, - "--block-count", - help="VoxelBlockGrid capacity", - ), -) -> None: - """Export a twopass relocalization premap (.pc2.lcm) from a recorded SQLite dataset.""" - from dimos.mapping.relocalization.pgo import pgo_then_voxels - from dimos.memory2.store.sqlite import SqliteStore - from dimos.utils.data import get_data_dir, resolve_named_path - - db_path = resolve_named_path(dataset, ".db") - - store = SqliteStore(path=db_path) - lidar = store.streams.lidar - if duration is not None: - lidar = lidar.before(lidar.first().ts + duration) - - typer.echo(f"computing twopass map from {db_path} (voxel_size={voxel_size})...") - twopass_map = pgo_then_voxels( - lidar, voxel_size=voxel_size, block_count=block_count, device=device + map_main( + dataset=dataset, + voxel=voxel, + device=device, + pgo=pgo, + block_count=block_count, + export=export, + no_gui=no_gui, ) - if output is None: - output = get_data_dir() / f"{db_path.stem}_twopass_map.pc2.lcm" - output.parent.mkdir(parents=True, exist_ok=True) - output.write_bytes(twopass_map.lcm_encode()) - typer.echo(f"wrote {output}") - @main.command() def cameracalibrate( diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 1f82617871..9c7ac61794 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -66,8 +66,16 @@ def main( block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" ), + export: bool = typer.Option( + False, + "--export", + help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + ), + no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: db_path = resolve_named_path(dataset, ".db") + if export: + pgo = True store = SqliteStore(path=db_path) lidar = store.streams.lidar @@ -109,23 +117,36 @@ def collect_path(obs): .data ) - rerun_init("coloring", spawn=True) - rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) - if path: - rr.log( - "world/raw_map/path", - rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.08]), - static=True, - ) - if pgo_map is not None: - rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) - if pgo_path: - rr.log( - "world/pgo_map/path", - rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.08]), - static=True, - ) + if not no_gui: + rerun_init("coloring", spawn=True) + rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) + rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) + if path: + rr.log( + "world/raw_map/path", + rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.05]), + static=True, + ) + if pgo_map is not None: + rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + if pgo_path: + rr.log( + "world/pgo_map/path", + rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), + static=True, + ) + + if export and pgo_map is not None: + from pathlib import Path + + out_path = Path.cwd() / f"{db_path.stem}.pc2.lcm" + print(f"exporting PGO twopass map to {out_path}...") + out_path.write_bytes(pgo_map.lcm_encode()) + print(f"wrote {out_path}") + print() + print("load back with:") + print(" from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2") + print(f' pcd = PointCloud2.lcm_decode(open("{out_path.name}", "rb").read())') if __name__ == "__main__": diff --git a/docs/capabilities/mapping/relocalization.md b/docs/capabilities/mapping/relocalization.md index 51a966a1c6..a794f27f04 100644 --- a/docs/capabilities/mapping/relocalization.md +++ b/docs/capabilities/mapping/relocalization.md @@ -33,12 +33,16 @@ Convert the recording to a relocalization premap (`.pc2.lcm`): ```bash # default name from step 1: -dimos export-premap recording_go2 +dimos map recording_go2 --export # renamed: -dimos export-premap {DB_NAME} +dimos map {DB_NAME} --export ``` +`--export` implies `--pgo` (runs pose graph optimization) and writes +`./{DB_NAME}.pc2.lcm` to the current working directory. Add `--no-gui` +to skip the rerun viewer for headless runs. + `{DB_NAME}` can be a file name (with or without extension), or a relative / absolute path. When a bare file name is given, the tool searches in: @@ -49,28 +53,20 @@ When a bare file name is given, the tool searches in: Examples: ```bash -dimos export-premap go2_hongkong_office -dimos export-premap ./go2_hongkong_office.db -dimos export-premap data/go2_hongkong_office.db -dimos export-premap /abs/path/to/scan.db -``` - -Output defaults to `data/{basename}_twopass_map.pc2.lcm`. Pass `-o -` to save elsewhere. Relative paths resolve against the current -working directory; parent directories are created on demand. - -```bash -dimos export-premap recording_go2 -o /tmp/scan.pc2.lcm -dimos export-premap recording_go2 -o ./recording_go2_twopass_map.pc2.lcm +dimos map go2_hongkong_office --export +dimos map ./go2_hongkong_office.db --export +dimos map data/go2_hongkong_office.db --export +dimos map /abs/path/to/scan.db --export ``` Sample log: ``` -computing twopass map from /Users/dimos/Desktop/dimos-reloc/recording_go2.db (voxel_size=0.05)... +running PGO twopass map... Pass 1: 908 frames, 1 keyframes 12:54:32.025[inf][dimos/mapping/voxels.py ] VoxelGrid using device: CPU:0 -wrote /Users/dimos/Desktop/dimos-reloc/data/recording_go2_twopass_map.pc2.lcm +exporting PGO twopass map to /Users/dimos/Desktop/dimos-reloc/recording_go2.pc2.lcm... +wrote /Users/dimos/Desktop/dimos-reloc/recording_go2.pc2.lcm ``` ## 3. Relocalize against replay From 2099e006bd8b6ab2c6dd0848b6b2bbf4bc581fb7 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:16:05 +0800 Subject: [PATCH 03/36] renamed rerun blueprint to dimos map tool --- dimos/utils/cli/map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 9c7ac61794..9ef30b24b2 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -118,7 +118,7 @@ def collect_path(obs): ) if not no_gui: - rerun_init("coloring", spawn=True) + rerun_init("dimos map tool", spawn=True) rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) if path: From 47848d44e881ce8779e9488743b343807275a8ba Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:49:39 +0800 Subject: [PATCH 04/36] readme cleanup --- docs/capabilities/mapping/relocalization.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/capabilities/mapping/relocalization.md b/docs/capabilities/mapping/relocalization.md index a794f27f04..a7fef58ccd 100644 --- a/docs/capabilities/mapping/relocalization.md +++ b/docs/capabilities/mapping/relocalization.md @@ -77,16 +77,16 @@ standard `unitree-go2` stack plus `RelocalizationModule`: ```bash dimos --replay --replay-db {DB_NAME} run unitree-go2-relocalization \ - -o relocalizationmodule.map_file={DB_NAME}_twopass_map + -o relocalizationmodule.map_file={DB_NAME} ``` -`{DB_NAME}_twopass_map` is resolved the same way as in section 2: cwd +`{DB_NAME}` is resolved the same way as in section 2: cwd first, then `data/`. Sample log: ``` -12:58:51.469[inf][imos/mapping/relocalization.py] Relocalization module started: map_file='recording_go2_twopass_map' loaded_map.frame_id='map' placeholder TF 'world' -> 'map' z_offset=20.0 +12:58:51.469[inf][imos/mapping/relocalization.py] Relocalization module started: map_file='recording_go2' loaded_map.frame_id='map' placeholder TF 'world' -> 'map' z_offset=20.0 12:58:56.528[war][imos/mapping/relocalization.py] relocalize skipped: n_pts=14198 < MIN_LOCAL_POINTS=20000 12:59:04.777[war][imos/mapping/relocalization.py] relocalize rejected: fitness=0.466 < threshold=0.6 time_cost=5.3s n_pts=20231 12:59:14.880[war][imos/mapping/relocalization.py] relocalize rejected: fitness=0.433 < threshold=0.6 time_cost=8.1s n_pts=37770 @@ -125,5 +125,5 @@ recorded `.db`: ```bash dimos --robot-ip {YOUR_ROBOT_IP} run unitree-go2-relocalization \ - -o relocalizationmodule.map_file={DB_NAME}_twopass_map + -o relocalizationmodule.map_file={DB_NAME} ``` From d829c0067de680ddf9a613f7bd4d84b7cde43486 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:35:40 +0800 Subject: [PATCH 05/36] mapping/relocalization: split PGO into composable Stream stages (pgo2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the monolithic pgo_then_voxels with four primitives over mem2 Streams and Transforms: pgo_keyframes(lidar) -> Stream[Keyframe] keyframes_to_corrections(kfs) -> Stream[Transform] (world_corrected <- world_raw) make_interpolator(corrections) -> (ts) -> Transform (SLERP + linear, endpoint-clipped) apply_corrections(stream, corr) -> Stream[T] (shuffles obs.pose) Drift correction is now a first-class, reusable Stream[Transform] that any pose-stamped consumer can apply — same math as before (rigid T_corr = T_global @ T_local^-1 per keyframe, SLERP + linear interpolation between bracketing keyframes), just composable. dimos map --pgo migrates to the new primitives; pgo_then_voxels is deleted. The internal _SimplePGO / PGOConfig / _KeyPose machinery stays in pgo.py and is imported by pgo2. --- dimos/mapping/relocalization/pgo.py | 126 +------------- dimos/mapping/relocalization/pgo2.py | 235 +++++++++++++++++++++++++++ dimos/utils/cli/map.py | 48 ++++-- 3 files changed, 277 insertions(+), 132 deletions(-) create mode 100644 dimos/mapping/relocalization/pgo2.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 3b95ba426f..74a48c7236 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -# NOTE: This lives under mapping/relocalization/ for now because the only -# consumer is the premap export pipeline (`dimos map --export`). It is -# temporary and can be moved/split out later when PGO grows other consumers. +# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use +# the Stream-shaped API in pgo2.py — pgo_keyframes, keyframes_to_corrections, +# make_interpolator, apply_corrections. The classes here (_SimplePGO, PGOConfig, +# _KeyPose) are imported from pgo2 to do the heavy lifting. from __future__ import annotations -from collections.abc import Callable from dataclasses import dataclass from typing import Any @@ -30,7 +30,6 @@ from scipy.spatial.transform import Rotation from dimos.core.module import ModuleConfig -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.utils.logging_config import setup_logger FRAME_MAP = "world" @@ -368,120 +367,3 @@ def build_global_map(self, voxel_size: float) -> np.ndarray: @property def num_key_poses(self) -> int: return len(self._key_poses) - - -def pgo_then_voxels( - stream: Any, - *, - voxel_size: float = 0.05, - block_count: int = 2_000_000, - device: str = "CUDA:0", - corrected_path_out: list[tuple[float, float, float]] | None = None, - pass1_on_frame: Callable[[Any], None] | None = None, - pass2_on_frame: Callable[[Any], None] | None = None, - **pgo_cfg: Any, -) -> PointCloud2: - """Two-pass PGO mapping (eliminates duplicate-wall artifacts). - - Pass 1 runs PGO over the lidar stream to build its corrected - keyframe trajectory. - - Pass 2 re-streams every lidar frame through ``VoxelGrid``, but each - frame's world-frame cloud is first transformed by the rigid drift - correction interpolated (SLERP for rotation, linear for translation) - from the keyframe corrections at the frame's timestamp. - - Each frame is therefore inserted exactly once at its converged - corrected pose, so walls collapse to a single layer instead of the - "smear of slightly-offset re-projections" that the single-pass - ``_SimplePGO.build_global_map`` produces. - """ - from scipy.spatial.transform import Slerp - - from dimos.mapping.voxels import VoxelGrid - - cfg = PGOConfig(**pgo_cfg) - pgo = _SimplePGO(cfg) - - n_frames = 0 - for obs in stream: - if pass1_on_frame is not None: - pass1_on_frame(obs) - if obs.pose is None: - continue - x, y, z, qx, qy, qz, qw = obs.pose - # Skip placeholder poses: zero translation, or invalid (zero-norm) quaternion. - if x == 0 and y == 0 and z == 0: - continue - if qx == 0 and qy == 0 and qz == 0 and qw == 0: - continue - r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t = np.array([x, y, z]) - points, _ = obs.data.as_numpy() - if len(points) == 0: - continue - body_pts = ( - (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] - ) - if pgo.add_key_pose(r, t, obs.ts, body_pts): - pgo.search_for_loops() - pgo.smooth_and_update() - n_frames += 1 - - n_kf = pgo.num_key_poses - print(f" Pass 1: {n_frames} frames, {n_kf} keyframes") - - if corrected_path_out is not None: - corrected_path_out.extend( - (float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2])) - for kp in pgo._key_poses - ) - - grid = VoxelGrid(voxel_size=voxel_size, block_count=block_count, device=device) - try: - if n_kf < 2: - for obs in stream: - if pass2_on_frame is not None: - pass2_on_frame(obs) - grid.add_frame(obs.data) - return grid.get_global_pointcloud2() - - # Sort + dedupe by timestamp: Slerp requires strictly increasing ts. - kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) - kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] - kf_ts = np.array([kp.timestamp for kp in kps]) - # Per-keyframe rigid drift correction: T_corr = T_global @ T_local.inv() - R_corr_list = [kp.r_global @ kp.r_local.T for kp in kps] - t_corr_list = [kp.t_global - (kp.r_global @ kp.r_local.T) @ kp.t_local for kp in kps] - t_corrs = np.stack(t_corr_list) - rot_slerp = Slerp(kf_ts, Rotation.from_matrix(np.stack(R_corr_list))) - - n_inserted = 0 - for obs in stream: - if pass2_on_frame is not None: - pass2_on_frame(obs) - if obs.pose is None: - continue - ts = float(np.clip(obs.ts, kf_ts[0], kf_ts[-1])) - r_correction = rot_slerp([ts])[0].as_matrix() - idx = int(np.searchsorted(kf_ts, ts)) - if idx == 0: - t_correction = t_corrs[0] - elif idx >= len(kf_ts): - t_correction = t_corrs[-1] - else: - t_lo, t_hi = kf_ts[idx - 1], kf_ts[idx] - alpha = (ts - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 - t_correction = (1 - alpha) * t_corrs[idx - 1] + alpha * t_corrs[idx] - - points, _ = obs.data.as_numpy() - if len(points) == 0: - continue - corrected_pts = (r_correction @ points[:, :3].T).T + t_correction - grid.add_frame(PointCloud2.from_numpy(corrected_pts.astype(np.float32))) - n_inserted += 1 - - print(f" Pass 2: {n_inserted} frames inserted with PGO-corrected poses") - return grid.get_global_pointcloud2() - finally: - grid.dispose() diff --git a/dimos/mapping/relocalization/pgo2.py b/dimos/mapping/relocalization/pgo2.py new file mode 100644 index 0000000000..3aec9cb0eb --- /dev/null +++ b/dimos/mapping/relocalization/pgo2.py @@ -0,0 +1,235 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""PGO drift corrections as composable Stream stages. + +Pipeline: + + lidar: Stream[PointCloud2] + -> pgo_keyframes(...) -> Stream[Keyframe] + -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) + -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) + +The math: per keyframe, the drift correction is + R_corr = R_global @ R_local.T + t_corr = t_global - R_corr @ t_local +and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, +clipping out-of-range to endpoints. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterator +from dataclasses import dataclass +from typing import Any, TypeVar + +import numpy as np +from scipy.spatial.transform import Rotation, Slerp + +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + +T = TypeVar("T") + + +@dataclass(frozen=True) +class Keyframe: + ts: float + r_local: np.ndarray # 3x3 + t_local: np.ndarray # (3,) + r_global: np.ndarray # 3x3 + t_global: np.ndarray # (3,) + + +def pgo_keyframes( + stream: Stream[PointCloud2], + *, + on_frame: Callable[[Any], None] | None = None, + **pgo_cfg: Any, +) -> Stream[Keyframe]: + """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" + # Imported here to keep pgo.py the only place that imports gtsam at module scope. + from dimos.mapping.relocalization.pgo import PGOConfig, _SimplePGO + + cfg = PGOConfig(**pgo_cfg) + pgo = _SimplePGO(cfg) + + for obs in stream: + if on_frame is not None: + on_frame(obs) + if obs.pose is None: + continue + x, y, z, qx, qy, qz, qw = obs.pose + if x == 0 and y == 0 and z == 0: + continue + if qx == 0 and qy == 0 and qz == 0 and qw == 0: + continue + r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t = np.array([x, y, z]) + points, _ = obs.data.as_numpy() + if len(points) == 0: + continue + body_pts = ( + (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] + ) + if pgo.add_key_pose(r, t, obs.ts, body_pts): + pgo.search_for_loops() + pgo.smooth_and_update() + + kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) + kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + + mem = MemoryStore() + out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) + for kp in kps: + out.append( + Keyframe( + ts=kp.timestamp, + r_local=np.ascontiguousarray(kp.r_local), + t_local=np.ascontiguousarray(kp.t_local), + r_global=np.ascontiguousarray(kp.r_global), + t_global=np.ascontiguousarray(kp.t_global), + ), + ts=kp.timestamp, + ) + return out + + +def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: + """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" + mem = MemoryStore() + out: Stream[Transform] = mem.stream("corrections", Transform) + for obs in keyframes: + kp = obs.data + R_corr = kp.r_global @ kp.r_local.T + t_corr = kp.t_global - R_corr @ kp.t_local + tf = Transform( + translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), + rotation=Quaternion.from_rotation_matrix(R_corr), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=kp.ts, + ) + out.append(tf, ts=kp.ts) + return out + + +def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: + """Materialize corrections once; return a fast ts -> Transform lookup.""" + ts_list: list[float] = [] + R_list: list[np.ndarray] = [] + t_list: list[np.ndarray] = [] + for obs in corrections: + tf = obs.data + ts_list.append(tf.ts) + q = tf.rotation + R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) + t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) + + if not ts_list: + raise ValueError("empty corrections stream") + + if len(ts_list) == 1: + only_ts = ts_list[0] + only_R = R_list[0] + only_t = t_list[0] + + def _const(ts: float) -> Transform: + return Transform( + translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), + rotation=Quaternion.from_rotation_matrix(only_R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=ts if ts is not None else only_ts, + ) + + return _const + + ts_arr = np.array(ts_list) + R_stack = np.stack(R_list) + t_stack = np.stack(t_list) + slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) + + def interp(ts: float) -> Transform: + ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) + R = slerp([ts_clip])[0].as_matrix() + idx = int(np.searchsorted(ts_arr, ts_clip)) + if idx == 0: + t = t_stack[0] + elif idx >= len(ts_arr): + t = t_stack[-1] + else: + t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] + alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 + t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=float(ts), + ) + + return interp + + +def correction_at(corrections: Stream[Transform], ts: float) -> Transform: + """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" + return make_interpolator(corrections)(ts) + + +def apply_corrections( + stream: Stream[T], + corrections: Stream[Transform], +) -> Stream[T]: + """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. + + `obs.data` is untouched. Frames with `obs.pose is None` pass through + unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). + """ + interp = make_interpolator(corrections) + + def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + for obs in upstream: + if obs.pose is None: + yield obs + continue + x, y, z, qx, qy, qz, qw = obs.pose + tf = interp(obs.ts) + R_corr = Rotation.from_quat( + [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] + ).as_matrix() + t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t_raw = np.array([x, y, z]) + R_new = R_corr @ R_raw + t_new = R_corr @ t_raw + t_corr + q_new = Rotation.from_matrix(R_new).as_quat() # xyzw + new_pose = ( + float(t_new[0]), + float(t_new[1]), + float(t_new[2]), + float(q_new[0]), + float(q_new[1]), + float(q_new[2]), + float(q_new[3]), + ) + yield obs.derive(data=obs.data, pose=new_pose) + + return stream.transform(xf) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 9ef30b24b2..dc8f934347 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -95,19 +95,47 @@ def collect_path(obs): pgo_map = None pgo_path: list[tuple[float, float, float]] = [] if pgo: - from dimos.mapping.relocalization.pgo import pgo_then_voxels + import numpy as np + from scipy.spatial.transform import Rotation + + from dimos.mapping.relocalization.pgo2 import ( + keyframes_to_corrections, + make_interpolator, + pgo_keyframes, + ) + from dimos.mapping.voxels import VoxelGrid + from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 total = lidar.count() print("running PGO twopass map...") - pgo_map = pgo_then_voxels( - lidar, - voxel_size=voxel, - block_count=block_count, - device=device, - corrected_path_out=pgo_path, - pass1_on_frame=progress(total, "pgo pass 1 (optimizing)"), - pass2_on_frame=progress(total, "pgo pass 2 (rebuilding)"), - ) + keyframes = pgo_keyframes(lidar, on_frame=progress(total, "pgo pass 1 (optimizing)")) + corrections = keyframes_to_corrections(keyframes) + interp = make_interpolator(corrections) + + for obs in keyframes: + kp = obs.data + pgo_path.append((float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2]))) + + pass2_pb = progress(total, "pgo pass 2 (rebuilding)") + grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) + try: + for obs in lidar: + pass2_pb(obs) + if obs.pose is None: + continue + pts, _ = obs.data.as_numpy() + if len(pts) == 0: + continue + tf = interp(obs.ts) + R = Rotation.from_quat( + [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] + ).as_matrix() + t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + corrected = (R @ pts[:, :3].T).T + t + grid.add_frame(PointCloud2.from_numpy(corrected.astype(np.float32))) + pgo_map = grid.get_global_pointcloud2() + finally: + grid.dispose() global_map = ( lidar.tap(collect_path) From ebef675f98249bc429c8c636e855042520f063aa Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 18:39:44 +0800 Subject: [PATCH 06/36] mapping/relocalization: reclaim pgo.py for the public API Renames: pgo.py -> pgo_internals.py (gtsam/ICP machinery) pgo2.py -> pgo.py (public Stream-shaped API) `dimos.mapping.relocalization.pgo` is now the canonical import path (`pgo_keyframes`, `keyframes_to_corrections`, `make_interpolator`, `apply_corrections`, `correction_at`, `Keyframe`). The internal _SimplePGO / PGOConfig / _KeyPose / _icp / _voxel_downsample helpers live in pgo_internals.py and are imported lazily inside pgo.py to keep gtsam off the public-API import path. --- dimos/mapping/relocalization/pgo.py | 540 +++++++----------- dimos/mapping/relocalization/pgo2.py | 235 -------- dimos/mapping/relocalization/pgo_internals.py | 369 ++++++++++++ dimos/utils/cli/map.py | 2 +- 4 files changed, 573 insertions(+), 573 deletions(-) delete mode 100644 dimos/mapping/relocalization/pgo2.py create mode 100644 dimos/mapping/relocalization/pgo_internals.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 74a48c7236..653c7158d2 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -12,358 +12,224 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use -# the Stream-shaped API in pgo2.py — pgo_keyframes, keyframes_to_corrections, -# make_interpolator, apply_corrections. The classes here (_SimplePGO, PGOConfig, -# _KeyPose) are imported from pgo2 to do the heavy lifting. +"""PGO drift corrections as composable Stream stages. + +Pipeline: + + lidar: Stream[PointCloud2] + -> pgo_keyframes(...) -> Stream[Keyframe] + -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) + -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) + +The math: per keyframe, the drift correction is + R_corr = R_global @ R_local.T + t_corr = t_global - R_corr @ t_local +and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, +clipping out-of-range to endpoints. +""" from __future__ import annotations +from collections.abc import Callable, Iterator from dataclasses import dataclass -from typing import Any +from typing import Any, TypeVar -import gtsam # type: ignore[import-not-found,import-untyped] import numpy as np -import open3d as o3d # type: ignore[import-untyped] -import open3d.core as o3c # type: ignore[import-untyped] -from scipy.spatial import KDTree -from scipy.spatial.transform import Rotation - -from dimos.core.module import ModuleConfig -from dimos.utils.logging_config import setup_logger - -FRAME_MAP = "world" - -logger = setup_logger() - - -class PGOConfig(ModuleConfig): - world_frame: str = FRAME_MAP - - # Keyframe detection - key_pose_delta_trans: float = 0.5 - key_pose_delta_deg: float = 10.0 - - # Loop closure - loop_search_radius: float = 2.0 - loop_time_thresh: float = 20.0 - loop_score_thresh: float = 0.3 - loop_submap_half_range: int = 10 - min_icp_inliers: int = 10 - min_keyframes_for_loop_search: int = 10 - loop_closure_extra_iterations: int = 4 - submap_resolution: float = 0.2 - min_loop_detect_duration: float = 5.0 - - # Input mode - unregister_input: bool = True # Transform world-frame scans to body-frame using odom - - # Global map - publish_global_map: bool = True - global_map_publish_rate: float = 0.5 - global_map_voxel_size: float = 0.15 - - # ICP - max_icp_iterations: int = 50 - max_icp_correspondence_dist: float = 1.0 - - -@dataclass -class _KeyPose: - r_local: np.ndarray # 3x3 rotation in local/odom frame - t_local: np.ndarray # 3-vec translation in local/odom frame - r_global: np.ndarray # 3x3 corrected rotation - t_global: np.ndarray # 3-vec corrected translation - timestamp: float - body_cloud: np.ndarray # Nx3 points in body frame - - -def _icp( - source: np.ndarray, - target: np.ndarray, - max_iter: int = 50, - max_dist: float = 1.0, - tol: float = 1e-6, - min_inliers: int = 10, - init: np.ndarray | None = None, -) -> tuple[np.ndarray, float]: - """Point-to-point ICP using Open3D's tensor pipeline. - - Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier - distance (m²) — same semantic as the previous SVD implementation, so - the GTSAM noise model in ``smooth_and_update`` keeps working. - """ - if len(source) < min_inliers or len(target) < min_inliers: - return np.eye(4), float("inf") - - cpu = o3c.Device("CPU:0") - src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) - tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) - - # Normals on the target enable point-to-plane ICP, which converges - # tighter than point-to-point on indoor scenes (walls give unambiguous - # normals that resolve the slide-along-wall ambiguity). - tgt_pcd.estimate_normals(max_nn=30, radius=0.3) - - init_T = ( - o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) - if init is not None - else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) - ) - - # Silence Open3D's "0 correspondence" warning — we deliberately use a - # tight max_correspondence_distance and reject loops with poor fitness; - # the warning is informational, not an error. - with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): - result = o3d.t.pipelines.registration.icp( - source=src_pcd, - target=tgt_pcd, - max_correspondence_distance=max_dist, - init_source_to_target=init_T, - estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), - criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( - relative_fitness=tol, - relative_rmse=tol, - max_iteration=max_iter, +from scipy.spatial.transform import Rotation, Slerp + +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + +T = TypeVar("T") + + +@dataclass(frozen=True) +class Keyframe: + ts: float + r_local: np.ndarray # 3x3 + t_local: np.ndarray # (3,) + r_global: np.ndarray # 3x3 + t_global: np.ndarray # (3,) + + +def pgo_keyframes( + stream: Stream[PointCloud2], + *, + on_frame: Callable[[Any], None] | None = None, + **pgo_cfg: Any, +) -> Stream[Keyframe]: + """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" + # Imported here to keep pgo_internals.py the only place that imports gtsam at module scope. + from dimos.mapping.relocalization.pgo_internals import PGOConfig, _SimplePGO + + cfg = PGOConfig(**pgo_cfg) + pgo = _SimplePGO(cfg) + + for obs in stream: + if on_frame is not None: + on_frame(obs) + if obs.pose is None: + continue + x, y, z, qx, qy, qz, qw = obs.pose + if x == 0 and y == 0 and z == 0: + continue + if qx == 0 and qy == 0 and qz == 0 and qw == 0: + continue + r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t = np.array([x, y, z]) + points, _ = obs.data.as_numpy() + if len(points) == 0: + continue + body_pts = ( + (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] + ) + if pgo.add_key_pose(r, t, obs.ts, body_pts): + pgo.search_for_loops() + pgo.smooth_and_update() + + kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) + kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + + mem = MemoryStore() + out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) + for kp in kps: + out.append( + Keyframe( + ts=kp.timestamp, + r_local=np.ascontiguousarray(kp.r_local), + t_local=np.ascontiguousarray(kp.t_local), + r_global=np.ascontiguousarray(kp.r_global), + t_global=np.ascontiguousarray(kp.t_global), ), + ts=kp.timestamp, ) - - fitness_inlier_frac = float(result.fitness) - if fitness_inlier_frac == 0.0: - return np.eye(4), float("inf") - - rmse = float(result.inlier_rmse) - T = result.transformation.numpy() - # Return mean squared inlier distance (m²) to match prior _icp contract. - return T, rmse * rmse - - -def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: - if len(pts) == 0 or voxel_size <= 0: - return pts - keys = np.floor(pts / voxel_size).astype(np.int32) - _, idx = np.unique(keys, axis=0, return_index=True) - return pts[idx] - - -class _SimplePGO: - def __init__(self, config: PGOConfig) -> None: - self._cfg = config - self._key_poses: list[_KeyPose] = [] - self._history_pairs: list[tuple[int, int]] = [] - self._cache_pairs: list[dict[str, Any]] = [] - self._r_offset = np.eye(3) - self._t_offset = np.zeros(3) - - params = gtsam.ISAM2Params() - params.setRelinearizeThreshold(0.01) - params.relinearizeSkip = 1 - self._isam2 = gtsam.ISAM2(params) - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: - if not self._key_poses: - return True - last = self._key_poses[-1] - delta_trans = np.linalg.norm(t - last.t_local) - # Angular distance via quaternion dot product - q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] - q_last = Rotation.from_matrix(last.r_local).as_quat() - dot = abs(np.dot(q_cur, q_last)) - delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) - return bool( - delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + return out + + +def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: + """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" + mem = MemoryStore() + out: Stream[Transform] = mem.stream("corrections", Transform) + for obs in keyframes: + kp = obs.data + R_corr = kp.r_global @ kp.r_local.T + t_corr = kp.t_global - R_corr @ kp.t_local + tf = Transform( + translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), + rotation=Quaternion.from_rotation_matrix(R_corr), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=kp.ts, ) + out.append(tf, ts=kp.ts) + return out + + +def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: + """Materialize corrections once; return a fast ts -> Transform lookup.""" + ts_list: list[float] = [] + R_list: list[np.ndarray] = [] + t_list: list[np.ndarray] = [] + for obs in corrections: + tf = obs.data + ts_list.append(tf.ts) + q = tf.rotation + R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) + t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) + + if not ts_list: + raise ValueError("empty corrections stream") + + if len(ts_list) == 1: + only_ts = ts_list[0] + only_R = R_list[0] + only_t = t_list[0] + + def _const(ts: float) -> Transform: + return Transform( + translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), + rotation=Quaternion.from_rotation_matrix(only_R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=ts if ts is not None else only_ts, + ) - def add_key_pose( - self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray - ) -> bool: - if not self.is_key_pose(r_local, t_local): - return False - - idx = len(self._key_poses) - init_r = self._r_offset @ r_local - init_t = self._r_offset @ t_local + self._t_offset + return _const - pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) - self._values.insert(idx, pose) + ts_arr = np.array(ts_list) + R_stack = np.stack(R_list) + t_stack = np.stack(t_list) + slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) + def interp(ts: float) -> Transform: + ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) + R = slerp([ts_clip])[0].as_matrix() + idx = int(np.searchsorted(ts_arr, ts_clip)) if idx == 0: - noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) - self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) + t = t_stack[0] + elif idx >= len(ts_arr): + t = t_stack[-1] else: - last = self._key_poses[-1] - r_between = last.r_local.T @ r_local - t_between = last.r_local.T @ (t_local - last.t_local) - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise - ) - ) - - kp = _KeyPose( - r_local=r_local.copy(), - t_local=t_local.copy(), - r_global=init_r.copy(), - t_global=init_t.copy(), - timestamp=timestamp, - body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), - ) - self._key_poses.append(kp) - return True - - def _get_submap(self, idx: int, half_range: int) -> np.ndarray: - lo = max(0, idx - half_range) - hi = min(len(self._key_poses) - 1, idx + half_range) - parts = [] - for i in range(lo, hi + 1): - kp = self._key_poses[i] - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - if not parts: - return np.empty((0, 3)) - cloud = np.vstack(parts) - return _voxel_downsample(cloud, self._cfg.submap_resolution) - - def search_for_loops(self) -> None: - if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: - return - - # Rate limit - if self._history_pairs: - cur_time = self._key_poses[-1].timestamp - last_time = self._key_poses[self._history_pairs[-1][1]].timestamp - if cur_time - last_time < self._cfg.min_loop_detect_duration: - return - - cur_idx = len(self._key_poses) - 1 - cur_kp = self._key_poses[-1] - - # Build KD-tree of previous keyframe positions - positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) - tree = KDTree(positions) - - idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) - if not idxs: - return - - # Pick the spatially closest keyframe that's also old enough in time. - # query_ball_point doesn't sort, so we sort by distance ourselves. - candidates = [ - (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) - for i in idxs - if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh - ] - if not candidates: - return - candidates.sort() - loop_idx = candidates[0][1] - - # ICP verification - target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) - source = self._get_submap(cur_idx, 0) - - transform, fitness = _icp( - source, - target, - max_iter=self._cfg.max_icp_iterations, - max_dist=self._cfg.max_icp_correspondence_dist, - min_inliers=self._cfg.min_icp_inliers, - ) - if fitness > self._cfg.loop_score_thresh: - return - - # Compute relative pose - R_icp = transform[:3, :3] - t_icp = transform[:3, 3] - r_refined = R_icp @ cur_kp.r_global - t_refined = R_icp @ cur_kp.t_global + t_icp - r_offset = self._key_poses[loop_idx].r_global.T @ r_refined - t_offset = self._key_poses[loop_idx].r_global.T @ ( - t_refined - self._key_poses[loop_idx].t_global + t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] + alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 + t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id="world_corrected", + child_frame_id="world_raw", + ts=float(ts), ) - self._cache_pairs.append( - { - "source": cur_idx, - "target": loop_idx, - "r_offset": r_offset, - "t_offset": t_offset, - "score": fitness, - } - ) - self._history_pairs.append((loop_idx, cur_idx)) - logger.info( - "Loop closure detected", - source=cur_idx, - target=loop_idx, - score=round(fitness, 4), - ) + return interp - def smooth_and_update(self) -> None: - has_loop = bool(self._cache_pairs) - - for pair in self._cache_pairs: - # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves - # have different units (rad² vs m²), so a uniform variance — - # the original behaviour — silently makes one half pathological - # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of - # those is too tight, one is too loose, depending on the loop). - # Use ICP fitness as the *translation* variance and a - # generous fixed rotation variance — loops shouldn't be - # trusted to fix rotation tightly without normals + p2plane. - trans_var = max(0.01, float(pair["score"])) # ≥ σ_trans = 10 cm - rot_var = 0.05 # σ_rot ≈ 13° - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - pair["target"], - pair["source"], - gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), - noise, - ) + +def correction_at(corrections: Stream[Transform], ts: float) -> Transform: + """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" + return make_interpolator(corrections)(ts) + + +def apply_corrections( + stream: Stream[T], + corrections: Stream[Transform], +) -> Stream[T]: + """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. + + `obs.data` is untouched. Frames with `obs.pose is None` pass through + unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). + """ + interp = make_interpolator(corrections) + + def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + for obs in upstream: + if obs.pose is None: + yield obs + continue + x, y, z, qx, qy, qz, qw = obs.pose + tf = interp(obs.ts) + R_corr = Rotation.from_quat( + [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] + ).as_matrix() + t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() + t_raw = np.array([x, y, z]) + R_new = R_corr @ R_raw + t_new = R_corr @ t_raw + t_corr + q_new = Rotation.from_matrix(R_new).as_quat() # xyzw + new_pose = ( + float(t_new[0]), + float(t_new[1]), + float(t_new[2]), + float(q_new[0]), + float(q_new[1]), + float(q_new[2]), + float(q_new[3]), ) - self._cache_pairs.clear() - - self._isam2.update(self._graph, self._values) - self._isam2.update() - if has_loop: - for _ in range(self._cfg.loop_closure_extra_iterations): - self._isam2.update() - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - estimates = self._isam2.calculateBestEstimate() - for i in range(len(self._key_poses)): - pose = estimates.atPose3(i) - self._key_poses[i].r_global = pose.rotation().matrix() - self._key_poses[i].t_global = pose.translation() - - last = self._key_poses[-1] - self._r_offset = last.r_global @ last.r_local.T - self._t_offset = last.t_global - self._r_offset @ last.t_local - - def get_corrected_pose( - self, r_local: np.ndarray, t_local: np.ndarray - ) -> tuple[np.ndarray, np.ndarray]: - return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset - - def build_global_map(self, voxel_size: float) -> np.ndarray: - if not self._key_poses: - return np.empty((0, 3), dtype=np.float32) - parts = [] - for kp in self._key_poses: - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - cloud = np.vstack(parts).astype(np.float32) - return _voxel_downsample(cloud, voxel_size) - - @property - def num_key_poses(self) -> int: - return len(self._key_poses) + yield obs.derive(data=obs.data, pose=new_pose) + + return stream.transform(xf) diff --git a/dimos/mapping/relocalization/pgo2.py b/dimos/mapping/relocalization/pgo2.py deleted file mode 100644 index 3aec9cb0eb..0000000000 --- a/dimos/mapping/relocalization/pgo2.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""PGO drift corrections as composable Stream stages. - -Pipeline: - - lidar: Stream[PointCloud2] - -> pgo_keyframes(...) -> Stream[Keyframe] - -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) - -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) - -The math: per keyframe, the drift correction is - R_corr = R_global @ R_local.T - t_corr = t_global - R_corr @ t_local -and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, -clipping out-of-range to endpoints. -""" - -from __future__ import annotations - -from collections.abc import Callable, Iterator -from dataclasses import dataclass -from typing import Any, TypeVar - -import numpy as np -from scipy.spatial.transform import Rotation, Slerp - -from dimos.memory2.store.memory import MemoryStore -from dimos.memory2.stream import Stream -from dimos.memory2.type.observation import Observation -from dimos.msgs.geometry_msgs.Quaternion import Quaternion -from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 - -T = TypeVar("T") - - -@dataclass(frozen=True) -class Keyframe: - ts: float - r_local: np.ndarray # 3x3 - t_local: np.ndarray # (3,) - r_global: np.ndarray # 3x3 - t_global: np.ndarray # (3,) - - -def pgo_keyframes( - stream: Stream[PointCloud2], - *, - on_frame: Callable[[Any], None] | None = None, - **pgo_cfg: Any, -) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" - # Imported here to keep pgo.py the only place that imports gtsam at module scope. - from dimos.mapping.relocalization.pgo import PGOConfig, _SimplePGO - - cfg = PGOConfig(**pgo_cfg) - pgo = _SimplePGO(cfg) - - for obs in stream: - if on_frame is not None: - on_frame(obs) - if obs.pose is None: - continue - x, y, z, qx, qy, qz, qw = obs.pose - if x == 0 and y == 0 and z == 0: - continue - if qx == 0 and qy == 0 and qz == 0 and qw == 0: - continue - r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t = np.array([x, y, z]) - points, _ = obs.data.as_numpy() - if len(points) == 0: - continue - body_pts = ( - (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] - ) - if pgo.add_key_pose(r, t, obs.ts, body_pts): - pgo.search_for_loops() - pgo.smooth_and_update() - - kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) - kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] - - mem = MemoryStore() - out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) - for kp in kps: - out.append( - Keyframe( - ts=kp.timestamp, - r_local=np.ascontiguousarray(kp.r_local), - t_local=np.ascontiguousarray(kp.t_local), - r_global=np.ascontiguousarray(kp.r_global), - t_global=np.ascontiguousarray(kp.t_global), - ), - ts=kp.timestamp, - ) - return out - - -def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: - """Per-keyframe drift correction as Transform(world_corrected <- world_raw).""" - mem = MemoryStore() - out: Stream[Transform] = mem.stream("corrections", Transform) - for obs in keyframes: - kp = obs.data - R_corr = kp.r_global @ kp.r_local.T - t_corr = kp.t_global - R_corr @ kp.t_local - tf = Transform( - translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), - rotation=Quaternion.from_rotation_matrix(R_corr), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=kp.ts, - ) - out.append(tf, ts=kp.ts) - return out - - -def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: - """Materialize corrections once; return a fast ts -> Transform lookup.""" - ts_list: list[float] = [] - R_list: list[np.ndarray] = [] - t_list: list[np.ndarray] = [] - for obs in corrections: - tf = obs.data - ts_list.append(tf.ts) - q = tf.rotation - R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) - t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) - - if not ts_list: - raise ValueError("empty corrections stream") - - if len(ts_list) == 1: - only_ts = ts_list[0] - only_R = R_list[0] - only_t = t_list[0] - - def _const(ts: float) -> Transform: - return Transform( - translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), - rotation=Quaternion.from_rotation_matrix(only_R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=ts if ts is not None else only_ts, - ) - - return _const - - ts_arr = np.array(ts_list) - R_stack = np.stack(R_list) - t_stack = np.stack(t_list) - slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) - - def interp(ts: float) -> Transform: - ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) - R = slerp([ts_clip])[0].as_matrix() - idx = int(np.searchsorted(ts_arr, ts_clip)) - if idx == 0: - t = t_stack[0] - elif idx >= len(ts_arr): - t = t_stack[-1] - else: - t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] - alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 - t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=float(ts), - ) - - return interp - - -def correction_at(corrections: Stream[Transform], ts: float) -> Transform: - """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" - return make_interpolator(corrections)(ts) - - -def apply_corrections( - stream: Stream[T], - corrections: Stream[Transform], -) -> Stream[T]: - """Shuffle obs.pose on `stream` by the interpolated correction at each obs.ts. - - `obs.data` is untouched. Frames with `obs.pose is None` pass through - unchanged. Out-of-range `obs.ts` get the endpoint correction (clipped). - """ - interp = make_interpolator(corrections) - - def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: - for obs in upstream: - if obs.pose is None: - yield obs - continue - x, y, z, qx, qy, qz, qw = obs.pose - tf = interp(obs.ts) - R_corr = Rotation.from_quat( - [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] - ).as_matrix() - t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t_raw = np.array([x, y, z]) - R_new = R_corr @ R_raw - t_new = R_corr @ t_raw + t_corr - q_new = Rotation.from_matrix(R_new).as_quat() # xyzw - new_pose = ( - float(t_new[0]), - float(t_new[1]), - float(t_new[2]), - float(q_new[0]), - float(q_new[1]), - float(q_new[2]), - float(q_new[3]), - ) - yield obs.derive(data=obs.data, pose=new_pose) - - return stream.transform(xf) diff --git a/dimos/mapping/relocalization/pgo_internals.py b/dimos/mapping/relocalization/pgo_internals.py new file mode 100644 index 0000000000..c3465d8ee5 --- /dev/null +++ b/dimos/mapping/relocalization/pgo_internals.py @@ -0,0 +1,369 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use +# the Stream-shaped API in pgo.py — pgo_keyframes, keyframes_to_corrections, +# make_interpolator, apply_corrections. The classes here (_SimplePGO, +# PGOConfig, _KeyPose) are imported from pgo to do the heavy lifting. + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import gtsam # type: ignore[import-not-found,import-untyped] +import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import open3d.core as o3c # type: ignore[import-untyped] +from scipy.spatial import KDTree +from scipy.spatial.transform import Rotation + +from dimos.core.module import ModuleConfig +from dimos.utils.logging_config import setup_logger + +FRAME_MAP = "world" + +logger = setup_logger() + + +class PGOConfig(ModuleConfig): + world_frame: str = FRAME_MAP + + # Keyframe detection + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + + # Loop closure + loop_search_radius: float = 2.0 + loop_time_thresh: float = 20.0 + loop_score_thresh: float = 0.3 + loop_submap_half_range: int = 10 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 4 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 5.0 + + # Input mode + unregister_input: bool = True # Transform world-frame scans to body-frame using odom + + # Global map + publish_global_map: bool = True + global_map_publish_rate: float = 0.5 + global_map_voxel_size: float = 0.15 + + # ICP + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 1.0 + + +@dataclass +class _KeyPose: + r_local: np.ndarray # 3x3 rotation in local/odom frame + t_local: np.ndarray # 3-vec translation in local/odom frame + r_global: np.ndarray # 3x3 corrected rotation + t_global: np.ndarray # 3-vec corrected translation + timestamp: float + body_cloud: np.ndarray # Nx3 points in body frame + + +def _icp( + source: np.ndarray, + target: np.ndarray, + max_iter: int = 50, + max_dist: float = 1.0, + tol: float = 1e-6, + min_inliers: int = 10, + init: np.ndarray | None = None, +) -> tuple[np.ndarray, float]: + """Point-to-point ICP using Open3D's tensor pipeline. + + Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier + distance (m²) — same semantic as the previous SVD implementation, so + the GTSAM noise model in ``smooth_and_update`` keeps working. + """ + if len(source) < min_inliers or len(target) < min_inliers: + return np.eye(4), float("inf") + + cpu = o3c.Device("CPU:0") + src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) + tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) + + # Normals on the target enable point-to-plane ICP, which converges + # tighter than point-to-point on indoor scenes (walls give unambiguous + # normals that resolve the slide-along-wall ambiguity). + tgt_pcd.estimate_normals(max_nn=30, radius=0.3) + + init_T = ( + o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) + if init is not None + else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) + ) + + # Silence Open3D's "0 correspondence" warning — we deliberately use a + # tight max_correspondence_distance and reject loops with poor fitness; + # the warning is informational, not an error. + with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): + result = o3d.t.pipelines.registration.icp( + source=src_pcd, + target=tgt_pcd, + max_correspondence_distance=max_dist, + init_source_to_target=init_T, + estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), + criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( + relative_fitness=tol, + relative_rmse=tol, + max_iteration=max_iter, + ), + ) + + fitness_inlier_frac = float(result.fitness) + if fitness_inlier_frac == 0.0: + return np.eye(4), float("inf") + + rmse = float(result.inlier_rmse) + T = result.transformation.numpy() + # Return mean squared inlier distance (m²) to match prior _icp contract. + return T, rmse * rmse + + +def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: + if len(pts) == 0 or voxel_size <= 0: + return pts + keys = np.floor(pts / voxel_size).astype(np.int32) + _, idx = np.unique(keys, axis=0, return_index=True) + return pts[idx] + + +class _SimplePGO: + def __init__(self, config: PGOConfig) -> None: + self._cfg = config + self._key_poses: list[_KeyPose] = [] + self._history_pairs: list[tuple[int, int]] = [] + self._cache_pairs: list[dict[str, Any]] = [] + self._r_offset = np.eye(3) + self._t_offset = np.zeros(3) + + params = gtsam.ISAM2Params() + params.setRelinearizeThreshold(0.01) + params.relinearizeSkip = 1 + self._isam2 = gtsam.ISAM2(params) + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: + if not self._key_poses: + return True + last = self._key_poses[-1] + delta_trans = np.linalg.norm(t - last.t_local) + # Angular distance via quaternion dot product + q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] + q_last = Rotation.from_matrix(last.r_local).as_quat() + dot = abs(np.dot(q_cur, q_last)) + delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) + return bool( + delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + ) + + def add_key_pose( + self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray + ) -> bool: + if not self.is_key_pose(r_local, t_local): + return False + + idx = len(self._key_poses) + init_r = self._r_offset @ r_local + init_t = self._r_offset @ t_local + self._t_offset + + pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) + self._values.insert(idx, pose) + + if idx == 0: + noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) + self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) + else: + last = self._key_poses[-1] + r_between = last.r_local.T @ r_local + t_between = last.r_local.T @ (t_local - last.t_local) + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) + ) + self._graph.add( + gtsam.BetweenFactorPose3( + idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise + ) + ) + + kp = _KeyPose( + r_local=r_local.copy(), + t_local=t_local.copy(), + r_global=init_r.copy(), + t_global=init_t.copy(), + timestamp=timestamp, + body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), + ) + self._key_poses.append(kp) + return True + + def _get_submap(self, idx: int, half_range: int) -> np.ndarray: + lo = max(0, idx - half_range) + hi = min(len(self._key_poses) - 1, idx + half_range) + parts = [] + for i in range(lo, hi + 1): + kp = self._key_poses[i] + world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global + parts.append(world) + if not parts: + return np.empty((0, 3)) + cloud = np.vstack(parts) + return _voxel_downsample(cloud, self._cfg.submap_resolution) + + def search_for_loops(self) -> None: + if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: + return + + # Rate limit + if self._history_pairs: + cur_time = self._key_poses[-1].timestamp + last_time = self._key_poses[self._history_pairs[-1][1]].timestamp + if cur_time - last_time < self._cfg.min_loop_detect_duration: + return + + cur_idx = len(self._key_poses) - 1 + cur_kp = self._key_poses[-1] + + # Build KD-tree of previous keyframe positions + positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) + tree = KDTree(positions) + + idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) + if not idxs: + return + + # Pick the spatially closest keyframe that's also old enough in time. + # query_ball_point doesn't sort, so we sort by distance ourselves. + candidates = [ + (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) + for i in idxs + if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh + ] + if not candidates: + return + candidates.sort() + loop_idx = candidates[0][1] + + # ICP verification + target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) + source = self._get_submap(cur_idx, 0) + + transform, fitness = _icp( + source, + target, + max_iter=self._cfg.max_icp_iterations, + max_dist=self._cfg.max_icp_correspondence_dist, + min_inliers=self._cfg.min_icp_inliers, + ) + if fitness > self._cfg.loop_score_thresh: + return + + # Compute relative pose + R_icp = transform[:3, :3] + t_icp = transform[:3, 3] + r_refined = R_icp @ cur_kp.r_global + t_refined = R_icp @ cur_kp.t_global + t_icp + r_offset = self._key_poses[loop_idx].r_global.T @ r_refined + t_offset = self._key_poses[loop_idx].r_global.T @ ( + t_refined - self._key_poses[loop_idx].t_global + ) + + self._cache_pairs.append( + { + "source": cur_idx, + "target": loop_idx, + "r_offset": r_offset, + "t_offset": t_offset, + "score": fitness, + } + ) + self._history_pairs.append((loop_idx, cur_idx)) + logger.info( + "Loop closure detected", + source=cur_idx, + target=loop_idx, + score=round(fitness, 4), + ) + + def smooth_and_update(self) -> None: + has_loop = bool(self._cache_pairs) + + for pair in self._cache_pairs: + # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves + # have different units (rad² vs m²), so a uniform variance — + # the original behaviour — silently makes one half pathological + # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of + # those is too tight, one is too loose, depending on the loop). + # Use ICP fitness as the *translation* variance and a + # generous fixed rotation variance — loops shouldn't be + # trusted to fix rotation tightly without normals + p2plane. + trans_var = max(0.01, float(pair["score"])) # ≥ σ_trans = 10 cm + rot_var = 0.05 # σ_rot ≈ 13° + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) + ) + self._graph.add( + gtsam.BetweenFactorPose3( + pair["target"], + pair["source"], + gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), + noise, + ) + ) + self._cache_pairs.clear() + + self._isam2.update(self._graph, self._values) + self._isam2.update() + if has_loop: + for _ in range(self._cfg.loop_closure_extra_iterations): + self._isam2.update() + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + estimates = self._isam2.calculateBestEstimate() + for i in range(len(self._key_poses)): + pose = estimates.atPose3(i) + self._key_poses[i].r_global = pose.rotation().matrix() + self._key_poses[i].t_global = pose.translation() + + last = self._key_poses[-1] + self._r_offset = last.r_global @ last.r_local.T + self._t_offset = last.t_global - self._r_offset @ last.t_local + + def get_corrected_pose( + self, r_local: np.ndarray, t_local: np.ndarray + ) -> tuple[np.ndarray, np.ndarray]: + return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset + + def build_global_map(self, voxel_size: float) -> np.ndarray: + if not self._key_poses: + return np.empty((0, 3), dtype=np.float32) + parts = [] + for kp in self._key_poses: + world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global + parts.append(world) + cloud = np.vstack(parts).astype(np.float32) + return _voxel_downsample(cloud, voxel_size) + + @property + def num_key_poses(self) -> int: + return len(self._key_poses) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index dc8f934347..645844b735 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -98,7 +98,7 @@ def collect_path(obs): import numpy as np from scipy.spatial.transform import Rotation - from dimos.mapping.relocalization.pgo2 import ( + from dimos.mapping.relocalization.pgo import ( keyframes_to_corrections, make_interpolator, pgo_keyframes, From 25cf609bdc4dd56e8e207c37744507b49ccef86b Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 19:58:19 +0800 Subject: [PATCH 07/36] pgo rewrite --- dimos/mapping/relocalization/pgo.py | 558 +++++++++++++++--- dimos/mapping/relocalization/pgo_internals.py | 369 ------------ dimos/memory2/type/observation.py | 38 +- dimos/utils/cli/map.py | 6 +- 4 files changed, 502 insertions(+), 469 deletions(-) delete mode 100644 dimos/mapping/relocalization/pgo_internals.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 653c7158d2..e006cc1b7b 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -26,15 +26,20 @@ t_corr = t_global - R_corr @ t_local and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, clipping out-of-range to endpoints. + +`gtsam` and `open3d` are imported lazily inside hot helpers so importing this module +stays cheap and gtsam-free for consumers that only need `Keyframe` / `apply_corrections`. """ from __future__ import annotations from collections.abc import Callable, Iterator from dataclasses import dataclass -from typing import Any, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import open3d.core as o3c # type: ignore[import-untyped] from scipy.spatial.transform import Rotation, Slerp from dimos.memory2.store.memory import MemoryStore @@ -44,17 +49,53 @@ from dimos.msgs.geometry_msgs.Transform import Transform from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.protocol.service.spec import BaseConfig +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + import gtsam # type: ignore[import-not-found,import-untyped] T = TypeVar("T") +FRAME_WORLD_CORRECTED = "world_corrected" +FRAME_WORLD_RAW = "world_raw" +FRAME_BODY = "body" + +logger = setup_logger() + + +class PGOConfig(BaseConfig): + # Keyframe detection + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + + # Loop closure + loop_search_radius: float = 2.0 + loop_time_thresh: float = 20.0 + loop_score_thresh: float = 0.3 + loop_submap_half_range: int = 10 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 4 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 5.0 + + # ICP + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 1.0 + @dataclass(frozen=True) class Keyframe: + """Keyframe emitted by `pgo_keyframes`. + + `local` is the body pose in the odom (`world_raw`) frame, `optimized` is + the body pose in the drift-corrected (`world_corrected`) frame. + """ + ts: float - r_local: np.ndarray # 3x3 - t_local: np.ndarray # (3,) - r_global: np.ndarray # 3x3 - t_global: np.ndarray # (3,) + local: Transform + optimized: Transform def pgo_keyframes( @@ -63,51 +104,32 @@ def pgo_keyframes( on_frame: Callable[[Any], None] | None = None, **pgo_cfg: Any, ) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; return one obs per keyframe.""" - # Imported here to keep pgo_internals.py the only place that imports gtsam at module scope. - from dimos.mapping.relocalization.pgo_internals import PGOConfig, _SimplePGO - + """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe.""" cfg = PGOConfig(**pgo_cfg) - pgo = _SimplePGO(cfg) + pgo = _PGO(cfg) for obs in stream: if on_frame is not None: on_frame(obs) if obs.pose is None: continue - x, y, z, qx, qy, qz, qw = obs.pose - if x == 0 and y == 0 and z == 0: - continue - if qx == 0 and qy == 0 and qz == 0 and qw == 0: + # Skip placeholder poses (origin position OR zero quaternion). + if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: continue - r = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t = np.array([x, y, z]) - points, _ = obs.data.as_numpy() - if len(points) == 0: + if ( + obs.pose[3] == 0 + and obs.pose[4] == 0 + and obs.pose[5] == 0 + and (obs.pose[6] == 0 or obs.pose[6] == 1) + ): continue - body_pts = ( - (r.T @ (points[:, :3].T - t[:, None])).T if cfg.unregister_input else points[:, :3] - ) - if pgo.add_key_pose(r, t, obs.ts, body_pts): - pgo.search_for_loops() - pgo.smooth_and_update() - - kps = sorted(pgo._key_poses, key=lambda kp: kp.timestamp) - kps = [kp for i, kp in enumerate(kps) if i == 0 or kp.timestamp > kps[i - 1].timestamp] + local_pose = _obs_to_pose3(obs) + pgo.process(local_pose, obs.ts, obs.data) mem = MemoryStore() out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) - for kp in kps: - out.append( - Keyframe( - ts=kp.timestamp, - r_local=np.ascontiguousarray(kp.r_local), - t_local=np.ascontiguousarray(kp.t_local), - r_global=np.ascontiguousarray(kp.r_global), - t_global=np.ascontiguousarray(kp.t_global), - ), - ts=kp.timestamp, - ) + for kf in pgo.finalize(): + out.append(kf, ts=kf.ts) return out @@ -116,17 +138,10 @@ def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: mem = MemoryStore() out: Stream[Transform] = mem.stream("corrections", Transform) for obs in keyframes: - kp = obs.data - R_corr = kp.r_global @ kp.r_local.T - t_corr = kp.t_global - R_corr @ kp.t_local - tf = Transform( - translation=Vector3(float(t_corr[0]), float(t_corr[1]), float(t_corr[2])), - rotation=Quaternion.from_rotation_matrix(R_corr), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=kp.ts, - ) - out.append(tf, ts=kp.ts) + kf = obs.data + # world_corrected <- body <- world_raw composes to world_corrected <- world_raw. + drift = kf.optimized + kf.local.inverse() + out.append(drift, ts=kf.ts) return out @@ -136,30 +151,22 @@ def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Trans R_list: list[np.ndarray] = [] t_list: list[np.ndarray] = [] for obs in corrections: - tf = obs.data - ts_list.append(tf.ts) - q = tf.rotation - R_list.append(Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix()) - t_list.append(np.array([tf.translation.x, tf.translation.y, tf.translation.z])) + R, t = _r_t_from_transform(obs.data) + # obs.ts is authoritative; obs.data.ts can be mutated by Transform's + # ts=0.0 -> time.time() fallback in its constructor. + ts_list.append(obs.ts) + R_list.append(R) + t_list.append(t) if not ts_list: raise ValueError("empty corrections stream") + # Slerp needs ≥2 keyframes. Pad len==1 with a duplicate so the + # general path handles it; clip-to-endpoints behavior is unchanged. if len(ts_list) == 1: - only_ts = ts_list[0] - only_R = R_list[0] - only_t = t_list[0] - - def _const(ts: float) -> Transform: - return Transform( - translation=Vector3(float(only_t[0]), float(only_t[1]), float(only_t[2])), - rotation=Quaternion.from_rotation_matrix(only_R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=ts if ts is not None else only_ts, - ) - - return _const + ts_list.append(ts_list[0] + 1e-6) + R_list.append(R_list[0]) + t_list.append(t_list[0]) ts_arr = np.array(ts_list) R_stack = np.stack(R_list) @@ -178,13 +185,7 @@ def interp(ts: float) -> Transform: t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(R), - frame_id="world_corrected", - child_frame_id="world_raw", - ts=float(ts), - ) + return _transform_from_r_t(R, t, ts=float(ts)) return interp @@ -210,26 +211,391 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: if obs.pose is None: yield obs continue - x, y, z, qx, qy, qz, qw = obs.pose - tf = interp(obs.ts) - R_corr = Rotation.from_quat( - [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] - ).as_matrix() - t_corr = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - R_raw = Rotation.from_quat([qx, qy, qz, qw]).as_matrix() - t_raw = np.array([x, y, z]) - R_new = R_corr @ R_raw - t_new = R_corr @ t_raw + t_corr - q_new = Rotation.from_matrix(R_new).as_quat() # xyzw - new_pose = ( - float(t_new[0]), - float(t_new[1]), - float(t_new[2]), - float(q_new[0]), - float(q_new[1]), - float(q_new[2]), - float(q_new[3]), - ) - yield obs.derive(data=obs.data, pose=new_pose) + raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped) + # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. + # Observation normalizes Transform back to 7-tuple via __post_init__. + corrected = interp(obs.ts) + raw_tf + yield obs.derive(data=obs.data, pose=corrected) return stream.transform(xf) + + +# --------------------------------------------------------------------------- +# Transform <-> (R, t) and obs.pose helpers (private) +# --------------------------------------------------------------------------- + + +def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: + q = tf.rotation + R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() + t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) + return R, t + + +def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform: + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(R), + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ts=ts, + ) + + +def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: + """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + if obs.pose is None: + raise LookupError("No pose set on this observation") + x, y, z, qx, qy, qz, qw = obs.pose + return gtsam.Pose3( + gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), + gtsam.Point3(float(x), float(y), float(z)), + ) + + +@dataclass +class _KeyPose: + local: gtsam.Pose3 # odom-frame pose at capture + optimized: gtsam.Pose3 # drift-corrected pose + timestamp: float + body_cloud: PointCloud2 # voxel-downsampled, body frame + + +@dataclass +class _LoopPair: + source: int + target: int + offset: gtsam.Pose3 # source pose in target's frame + score: float + + +class _PGO: + """Incremental PGO: gtsam ISAM2 over keyframes with ICP loop closures. + + Call `process` per frame (odom pose + body-frame points). Call + `finalize` once at the end for the sorted, deduped keyframe list. + """ + + def __init__(self, config: PGOConfig) -> None: + import gtsam # type: ignore[import-not-found,import-untyped] + + self._gtsam = gtsam + self._cfg = config + self._key_poses: list[_KeyPose] = [] + self._pending_loops: list[_LoopPair] = [] + self._last_loop_ts: float | None = None + self._world_correction: gtsam.Pose3 = gtsam.Pose3() # identity + + params = gtsam.ISAM2Params() + params.setRelinearizeThreshold(0.01) + params.relinearizeSkip = 1 + self._isam2 = gtsam.ISAM2(params) + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + def process( + self, + local_pose: gtsam.Pose3, + ts: float, + world_cloud: PointCloud2, + ) -> None: + if len(world_cloud) == 0: + return + if not self._is_keyframe(local_pose): + return + # Unregister: lift world-frame scan back into body frame using its + # odom pose, so PGO can re-project it via the optimized pose later. + body_cloud = world_cloud.transform( + _pose3_to_transform(local_pose.inverse(), ts=ts) + ).voxel_downsample(self._cfg.submap_resolution) + self._add_keyframe(local_pose, ts, body_cloud) + self._search_for_loops() + self._smooth_and_update() + + def finalize(self) -> list[Keyframe]: + """Return keyframes sorted by ts, with duplicate-ts entries dropped.""" + kps = sorted(self._key_poses, key=lambda kp: kp.timestamp) + out: list[Keyframe] = [] + for i, kp in enumerate(kps): + if i > 0 and kp.timestamp <= kps[i - 1].timestamp: + continue + out.append( + Keyframe( + ts=kp.timestamp, + local=_pose3_to_transform( + kp.local, + ts=kp.timestamp, + frame_id=FRAME_WORLD_RAW, + child_frame_id=FRAME_BODY, + ), + optimized=_pose3_to_transform( + kp.optimized, + ts=kp.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + ) + ) + return out + + # ---------- internal ---------- + + def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: + if not self._key_poses: + return True + delta = self._key_poses[-1].local.inverse().compose(local_pose) + delta_trans = float(np.linalg.norm(np.asarray(delta.translation()))) + # Rotation magnitude from trace: cos(theta) = (tr(R) - 1) / 2. + R = delta.rotation().matrix() + cos_theta = float(np.clip((np.trace(R) - 1.0) / 2.0, -1.0, 1.0)) + delta_deg = float(np.degrees(np.arccos(cos_theta))) + return ( + delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + ) + + def _add_keyframe( + self, + local_pose: gtsam.Pose3, + ts: float, + body_cloud: PointCloud2, + ) -> None: + gtsam = self._gtsam + idx = len(self._key_poses) + optimized = self._world_correction.compose(local_pose) + + self._values.insert(idx, optimized) + + if idx == 0: + noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) + self._graph.add(gtsam.PriorFactorPose3(idx, optimized, noise)) + else: + last_local = self._key_poses[-1].local + between = last_local.inverse().compose(local_pose) + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) + ) + self._graph.add(gtsam.BetweenFactorPose3(idx - 1, idx, between, noise)) + + self._key_poses.append( + _KeyPose( + local=local_pose, + optimized=optimized, + timestamp=ts, + body_cloud=body_cloud, + ) + ) + + def _get_submap(self, idx: int, half_range: int) -> PointCloud2: + lo = max(0, idx - half_range) + hi = min(len(self._key_poses) - 1, idx + half_range) + if lo > hi: + return PointCloud2() + cloud = self._key_poses[lo].body_cloud.transform( + _pose3_to_transform(self._key_poses[lo].optimized, ts=self._key_poses[lo].timestamp) + ) + for i in range(lo + 1, hi + 1): + kp = self._key_poses[i] + cloud = cloud + kp.body_cloud.transform( + _pose3_to_transform(kp.optimized, ts=kp.timestamp) + ) + return cloud.voxel_downsample(self._cfg.submap_resolution) + + def _search_for_loops(self) -> None: + if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: + return + + cur_ts = self._key_poses[-1].timestamp + if ( + self._last_loop_ts is not None + and cur_ts - self._last_loop_ts < self._cfg.min_loop_detect_duration + ): + return + + cur_idx = len(self._key_poses) - 1 + cur_kp = self._key_poses[-1] + cur_t = np.asarray(cur_kp.optimized.translation()) + + from scipy.spatial import KDTree + + positions = np.array( + [np.asarray(kp.optimized.translation()) for kp in self._key_poses[:-1]] + ) + tree = KDTree(positions) + idxs = tree.query_ball_point(cur_t, self._cfg.loop_search_radius) + if not idxs: + return + + # query_ball_point doesn't sort by distance — do it ourselves and + # filter by min time gap to avoid closing loops to recent keyframes. + candidates = [ + ( + float( + np.linalg.norm(np.asarray(self._key_poses[i].optimized.translation()) - cur_t) + ), + i, + ) + for i in idxs + if abs(cur_ts - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh + ] + if not candidates: + return + candidates.sort() + loop_idx = candidates[0][1] + + target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) + source = self._get_submap(cur_idx, 0) + + icp_tf, fitness = _icp( + source, + target, + max_iter=self._cfg.max_icp_iterations, + max_dist=self._cfg.max_icp_correspondence_dist, + min_inliers=self._cfg.min_icp_inliers, + ) + if fitness > self._cfg.loop_score_thresh: + return + + # icp_tf takes cur_kp.optimized -> refined pose (correcting the drift). + # offset = loop_kp.optimized^-1 * refined = relative pose from loop to cur. + icp_pose = _transform_to_pose3(icp_tf) + refined = icp_pose.compose(cur_kp.optimized) + offset = self._key_poses[loop_idx].optimized.between(refined) + + self._pending_loops.append( + _LoopPair( + source=cur_idx, + target=loop_idx, + offset=offset, + score=fitness, + ) + ) + self._last_loop_ts = cur_ts + logger.info( + "Loop closure detected", + source=cur_idx, + target=loop_idx, + score=round(fitness, 4), + ) + + def _smooth_and_update(self) -> None: + gtsam = self._gtsam + has_loop = bool(self._pending_loops) + + for pair in self._pending_loops: + # Pose3 noise is [rx, ry, rz, x, y, z]. The two halves have + # different units (rad² vs m²), so a uniform variance silently + # makes one half pathological. Use ICP fitness as the + # *translation* variance and a generous fixed rotation variance + # — loops shouldn't be trusted to fix rotation tightly without + # normals + p2plane. + trans_var = max(0.01, float(pair.score)) # >= sigma_trans = 10 cm + rot_var = 0.05 # sigma_rot ~ 13 deg + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) + ) + self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise)) + self._pending_loops.clear() + + self._isam2.update(self._graph, self._values) + self._isam2.update() + if has_loop: + for _ in range(self._cfg.loop_closure_extra_iterations): + self._isam2.update() + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + estimates = self._isam2.calculateBestEstimate() + for i in range(len(self._key_poses)): + self._key_poses[i].optimized = estimates.atPose3(i) + + last = self._key_poses[-1] + self._world_correction = last.optimized.compose(last.local.inverse()) + + +def _pose3_to_transform( + pose: gtsam.Pose3, + *, + ts: float, + frame_id: str = "", + child_frame_id: str = "", +) -> Transform: + """PGO-internal: build a Transform from a Pose3.""" + t = np.asarray(pose.translation()) + return Transform( + translation=Vector3(float(t[0]), float(t[1]), float(t[2])), + rotation=Quaternion.from_rotation_matrix(pose.rotation().matrix()), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + + +def _transform_to_pose3(tf: Transform) -> gtsam.Pose3: + """PGO-internal: build a Pose3 from a Transform.""" + import gtsam # type: ignore[import-not-found,import-untyped] + + return gtsam.Pose3(tf.to_matrix()) + + +def _icp( + source: PointCloud2, + target: PointCloud2, + max_iter: int = 50, + max_dist: float = 1.0, + tol: float = 1e-6, + min_inliers: int = 10, + init: Transform | None = None, +) -> tuple[Transform, float]: + """Point-to-plane ICP using Open3D's tensor pipeline. + + Returns ``(tf, fitness)`` where ``fitness`` is mean squared inlier + distance (m^2) — the GTSAM noise model in `_smooth_and_update` uses + this directly as sigma_trans squared. On rejection (too few points or + zero correspondences) returns the identity transform and inf fitness. + """ + + if len(source) < min_inliers or len(target) < min_inliers: + return Transform.identity(), float("inf") + + src_pcd = source.pointcloud_tensor + tgt_pcd = target.pointcloud_tensor + + # Normals on the target enable point-to-plane ICP — converges tighter + # than point-to-point on indoor scenes (walls give unambiguous normals + # that resolve the slide-along-wall ambiguity). + tgt_pcd.estimate_normals(max_nn=30, radius=0.3) + + device = src_pcd.device + init_T = o3c.Tensor( + init.to_matrix() if init is not None else np.eye(4), + dtype=o3c.float64, + device=device, + ) + + # Silence Open3D's "0 correspondence" warning — we deliberately use a + # tight max_correspondence_distance and reject loops with poor fitness. + with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): + result = o3d.t.pipelines.registration.icp( + source=src_pcd, + target=tgt_pcd, + max_correspondence_distance=max_dist, + init_source_to_target=init_T, + estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), + criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( + relative_fitness=tol, + relative_rmse=tol, + max_iteration=max_iter, + ), + ) + + if float(result.fitness) == 0.0: + return Transform.identity(), float("inf") + + T_mat = result.transformation.numpy() + rmse = float(result.inlier_rmse) + return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse + T_mat = result.transformation.numpy() + rmse = float(result.inlier_rmse) + return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse diff --git a/dimos/mapping/relocalization/pgo_internals.py b/dimos/mapping/relocalization/pgo_internals.py deleted file mode 100644 index c3465d8ee5..0000000000 --- a/dimos/mapping/relocalization/pgo_internals.py +++ /dev/null @@ -1,369 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Internal PGO/ICP machinery (gtsam-backed). Public consumers should use -# the Stream-shaped API in pgo.py — pgo_keyframes, keyframes_to_corrections, -# make_interpolator, apply_corrections. The classes here (_SimplePGO, -# PGOConfig, _KeyPose) are imported from pgo to do the heavy lifting. - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any - -import gtsam # type: ignore[import-not-found,import-untyped] -import numpy as np -import open3d as o3d # type: ignore[import-untyped] -import open3d.core as o3c # type: ignore[import-untyped] -from scipy.spatial import KDTree -from scipy.spatial.transform import Rotation - -from dimos.core.module import ModuleConfig -from dimos.utils.logging_config import setup_logger - -FRAME_MAP = "world" - -logger = setup_logger() - - -class PGOConfig(ModuleConfig): - world_frame: str = FRAME_MAP - - # Keyframe detection - key_pose_delta_trans: float = 0.5 - key_pose_delta_deg: float = 10.0 - - # Loop closure - loop_search_radius: float = 2.0 - loop_time_thresh: float = 20.0 - loop_score_thresh: float = 0.3 - loop_submap_half_range: int = 10 - min_icp_inliers: int = 10 - min_keyframes_for_loop_search: int = 10 - loop_closure_extra_iterations: int = 4 - submap_resolution: float = 0.2 - min_loop_detect_duration: float = 5.0 - - # Input mode - unregister_input: bool = True # Transform world-frame scans to body-frame using odom - - # Global map - publish_global_map: bool = True - global_map_publish_rate: float = 0.5 - global_map_voxel_size: float = 0.15 - - # ICP - max_icp_iterations: int = 50 - max_icp_correspondence_dist: float = 1.0 - - -@dataclass -class _KeyPose: - r_local: np.ndarray # 3x3 rotation in local/odom frame - t_local: np.ndarray # 3-vec translation in local/odom frame - r_global: np.ndarray # 3x3 corrected rotation - t_global: np.ndarray # 3-vec corrected translation - timestamp: float - body_cloud: np.ndarray # Nx3 points in body frame - - -def _icp( - source: np.ndarray, - target: np.ndarray, - max_iter: int = 50, - max_dist: float = 1.0, - tol: float = 1e-6, - min_inliers: int = 10, - init: np.ndarray | None = None, -) -> tuple[np.ndarray, float]: - """Point-to-point ICP using Open3D's tensor pipeline. - - Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier - distance (m²) — same semantic as the previous SVD implementation, so - the GTSAM noise model in ``smooth_and_update`` keeps working. - """ - if len(source) < min_inliers or len(target) < min_inliers: - return np.eye(4), float("inf") - - cpu = o3c.Device("CPU:0") - src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) - tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) - - # Normals on the target enable point-to-plane ICP, which converges - # tighter than point-to-point on indoor scenes (walls give unambiguous - # normals that resolve the slide-along-wall ambiguity). - tgt_pcd.estimate_normals(max_nn=30, radius=0.3) - - init_T = ( - o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) - if init is not None - else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) - ) - - # Silence Open3D's "0 correspondence" warning — we deliberately use a - # tight max_correspondence_distance and reject loops with poor fitness; - # the warning is informational, not an error. - with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): - result = o3d.t.pipelines.registration.icp( - source=src_pcd, - target=tgt_pcd, - max_correspondence_distance=max_dist, - init_source_to_target=init_T, - estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), - criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( - relative_fitness=tol, - relative_rmse=tol, - max_iteration=max_iter, - ), - ) - - fitness_inlier_frac = float(result.fitness) - if fitness_inlier_frac == 0.0: - return np.eye(4), float("inf") - - rmse = float(result.inlier_rmse) - T = result.transformation.numpy() - # Return mean squared inlier distance (m²) to match prior _icp contract. - return T, rmse * rmse - - -def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: - if len(pts) == 0 or voxel_size <= 0: - return pts - keys = np.floor(pts / voxel_size).astype(np.int32) - _, idx = np.unique(keys, axis=0, return_index=True) - return pts[idx] - - -class _SimplePGO: - def __init__(self, config: PGOConfig) -> None: - self._cfg = config - self._key_poses: list[_KeyPose] = [] - self._history_pairs: list[tuple[int, int]] = [] - self._cache_pairs: list[dict[str, Any]] = [] - self._r_offset = np.eye(3) - self._t_offset = np.zeros(3) - - params = gtsam.ISAM2Params() - params.setRelinearizeThreshold(0.01) - params.relinearizeSkip = 1 - self._isam2 = gtsam.ISAM2(params) - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: - if not self._key_poses: - return True - last = self._key_poses[-1] - delta_trans = np.linalg.norm(t - last.t_local) - # Angular distance via quaternion dot product - q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] - q_last = Rotation.from_matrix(last.r_local).as_quat() - dot = abs(np.dot(q_cur, q_last)) - delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) - return bool( - delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg - ) - - def add_key_pose( - self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray - ) -> bool: - if not self.is_key_pose(r_local, t_local): - return False - - idx = len(self._key_poses) - init_r = self._r_offset @ r_local - init_t = self._r_offset @ t_local + self._t_offset - - pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) - self._values.insert(idx, pose) - - if idx == 0: - noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) - self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) - else: - last = self._key_poses[-1] - r_between = last.r_local.T @ r_local - t_between = last.r_local.T @ (t_local - last.t_local) - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise - ) - ) - - kp = _KeyPose( - r_local=r_local.copy(), - t_local=t_local.copy(), - r_global=init_r.copy(), - t_global=init_t.copy(), - timestamp=timestamp, - body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), - ) - self._key_poses.append(kp) - return True - - def _get_submap(self, idx: int, half_range: int) -> np.ndarray: - lo = max(0, idx - half_range) - hi = min(len(self._key_poses) - 1, idx + half_range) - parts = [] - for i in range(lo, hi + 1): - kp = self._key_poses[i] - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - if not parts: - return np.empty((0, 3)) - cloud = np.vstack(parts) - return _voxel_downsample(cloud, self._cfg.submap_resolution) - - def search_for_loops(self) -> None: - if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: - return - - # Rate limit - if self._history_pairs: - cur_time = self._key_poses[-1].timestamp - last_time = self._key_poses[self._history_pairs[-1][1]].timestamp - if cur_time - last_time < self._cfg.min_loop_detect_duration: - return - - cur_idx = len(self._key_poses) - 1 - cur_kp = self._key_poses[-1] - - # Build KD-tree of previous keyframe positions - positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) - tree = KDTree(positions) - - idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) - if not idxs: - return - - # Pick the spatially closest keyframe that's also old enough in time. - # query_ball_point doesn't sort, so we sort by distance ourselves. - candidates = [ - (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) - for i in idxs - if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh - ] - if not candidates: - return - candidates.sort() - loop_idx = candidates[0][1] - - # ICP verification - target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) - source = self._get_submap(cur_idx, 0) - - transform, fitness = _icp( - source, - target, - max_iter=self._cfg.max_icp_iterations, - max_dist=self._cfg.max_icp_correspondence_dist, - min_inliers=self._cfg.min_icp_inliers, - ) - if fitness > self._cfg.loop_score_thresh: - return - - # Compute relative pose - R_icp = transform[:3, :3] - t_icp = transform[:3, 3] - r_refined = R_icp @ cur_kp.r_global - t_refined = R_icp @ cur_kp.t_global + t_icp - r_offset = self._key_poses[loop_idx].r_global.T @ r_refined - t_offset = self._key_poses[loop_idx].r_global.T @ ( - t_refined - self._key_poses[loop_idx].t_global - ) - - self._cache_pairs.append( - { - "source": cur_idx, - "target": loop_idx, - "r_offset": r_offset, - "t_offset": t_offset, - "score": fitness, - } - ) - self._history_pairs.append((loop_idx, cur_idx)) - logger.info( - "Loop closure detected", - source=cur_idx, - target=loop_idx, - score=round(fitness, 4), - ) - - def smooth_and_update(self) -> None: - has_loop = bool(self._cache_pairs) - - for pair in self._cache_pairs: - # Pose3 noise model is [rx, ry, rz, x, y, z]. The two halves - # have different units (rad² vs m²), so a uniform variance — - # the original behaviour — silently makes one half pathological - # (e.g. score=0.07 → σ_rot ≈ 15° AND σ_trans ≈ 26 cm; one of - # those is too tight, one is too loose, depending on the loop). - # Use ICP fitness as the *translation* variance and a - # generous fixed rotation variance — loops shouldn't be - # trusted to fix rotation tightly without normals + p2plane. - trans_var = max(0.01, float(pair["score"])) # ≥ σ_trans = 10 cm - rot_var = 0.05 # σ_rot ≈ 13° - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - pair["target"], - pair["source"], - gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), - noise, - ) - ) - self._cache_pairs.clear() - - self._isam2.update(self._graph, self._values) - self._isam2.update() - if has_loop: - for _ in range(self._cfg.loop_closure_extra_iterations): - self._isam2.update() - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - estimates = self._isam2.calculateBestEstimate() - for i in range(len(self._key_poses)): - pose = estimates.atPose3(i) - self._key_poses[i].r_global = pose.rotation().matrix() - self._key_poses[i].t_global = pose.translation() - - last = self._key_poses[-1] - self._r_offset = last.r_global @ last.r_local.T - self._t_offset = last.t_global - self._r_offset @ last.t_local - - def get_corrected_pose( - self, r_local: np.ndarray, t_local: np.ndarray - ) -> tuple[np.ndarray, np.ndarray]: - return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset - - def build_global_map(self, voxel_size: float) -> np.ndarray: - if not self._key_poses: - return np.empty((0, 3), dtype=np.float32) - parts = [] - for kp in self._key_poses: - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - cloud = np.vstack(parts).astype(np.float32) - return _voxel_downsample(cloud, voxel_size) - - @property - def num_key_poses(self) -> int: - return len(self._key_poses) diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index 63b10a6dbf..179b48744e 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -46,9 +46,42 @@ def __repr__(self) -> str: _UNLOADED = _Unloaded() +def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, float] | None: + """Coerce common pose shapes to the storage 7-tuple `(x, y, z, qx, qy, qz, qw)`. + + Accepts: `None`, an already-normalized tuple, or any object with + `translation`+`rotation` (Transform) or `position`+`orientation` + (Pose / PoseStamped) attributes. Duck-typed to avoid importing + `dimos.msgs.geometry_msgs` at module load. + """ + if p is None or isinstance(p, tuple): + return p + trans = getattr(p, "translation", None) or getattr(p, "position", None) + rot = getattr(p, "rotation", None) or getattr(p, "orientation", None) + if trans is None or rot is None: + raise TypeError( + f"Cannot coerce {type(p).__name__} to a pose tuple — expected " + "tuple, None, Transform, Pose, or PoseStamped." + ) + return ( + float(trans.x), + float(trans.y), + float(trans.z), + float(rot.x), + float(rot.y), + float(rot.z), + float(rot.w), + ) + + @dataclass class Observation(Generic[T]): - """A single timestamped observation with optional spatial pose and metadata.""" + """A single timestamped observation with optional spatial pose and metadata. + + `pose` is stored as a 7-tuple `(x, y, z, qx, qy, qz, qw)`. Passing a + `Transform`, `Pose`, or `PoseStamped` to the constructor or to `derive` + is normalized to the 7-tuple form automatically. + """ id: int ts: float @@ -59,6 +92,9 @@ class Observation(Generic[T]): _loader: Callable[[], T] | None = field(default=None, repr=False) _data_lock: threading.Lock = field(default_factory=threading.Lock, repr=False) + def __post_init__(self) -> None: + self.pose = _normalize_pose(self.pose) + @property def pose_stamped(self) -> PoseStamped: from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 645844b735..543a898f6a 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -112,9 +112,9 @@ def collect_path(obs): corrections = keyframes_to_corrections(keyframes) interp = make_interpolator(corrections) - for obs in keyframes: - kp = obs.data - pgo_path.append((float(kp.t_global[0]), float(kp.t_global[1]), float(kp.t_global[2]))) + for kf_obs in keyframes: + kf_t = kf_obs.data.optimized.translation + pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) pass2_pb = progress(total, "pgo pass 2 (rebuilding)") grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) From d11ef1654ffee88acee9329e95cb4818647d4f94 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 20:09:56 +0800 Subject: [PATCH 08/36] pgo: pytest + map.py type annotations + drop section markers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds test_pgo.py covering pose normalization on Observation, the Transform↔Pose3 conversion helpers, the interpolator edge cases, apply_corrections behavior, and a real-recording smoke test (skipped when data/go2_short.db is absent). Annotates the two map.py helpers so they pass mypy. Removes leftover section divider comments to satisfy the no-section-markers project rule. --- dimos/mapping/relocalization/pgo.py | 7 - dimos/mapping/relocalization/test_pgo.py | 365 +++++++++++++++++++++++ dimos/utils/cli/map.py | 10 +- 3 files changed, 372 insertions(+), 10 deletions(-) create mode 100644 dimos/mapping/relocalization/test_pgo.py diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index e006cc1b7b..9b48aa6437 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -220,11 +220,6 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: return stream.transform(xf) -# --------------------------------------------------------------------------- -# Transform <-> (R, t) and obs.pose helpers (private) -# --------------------------------------------------------------------------- - - def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: q = tf.rotation R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() @@ -340,8 +335,6 @@ def finalize(self) -> list[Keyframe]: ) return out - # ---------- internal ---------- - def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: if not self._key_poses: return True diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/relocalization/test_pgo.py new file mode 100644 index 0000000000..ce35cbd25a --- /dev/null +++ b/dimos/mapping/relocalization/test_pgo.py @@ -0,0 +1,365 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from pathlib import Path + +import numpy as np +import pytest +from scipy.spatial.transform import Rotation + +from dimos.mapping.relocalization.pgo import ( + Keyframe, + PGOConfig, + _obs_to_pose3, + _pose3_to_transform, + _r_t_from_transform, + _transform_from_r_t, + apply_corrections, + correction_at, + keyframes_to_corrections, + make_interpolator, + pgo_keyframes, +) +from dimos.memory2.store.memory import MemoryStore +from dimos.memory2.stream import Stream +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + + +def _random_R(rng: np.random.Generator) -> np.ndarray: + """Random uniform rotation matrix via random quaternion.""" + q = rng.standard_normal(4) + q /= np.linalg.norm(q) + return np.asarray(Rotation.from_quat(q).as_matrix()) + + +class TestPGOConfig: + def test_accepts_known_fields(self) -> None: + cfg = PGOConfig(key_pose_delta_trans=0.7, max_icp_iterations=42) + assert cfg.key_pose_delta_trans == 0.7 + assert cfg.max_icp_iterations == 42 + + def test_rejects_unknown_fields(self) -> None: + # The plan deleted these; BaseConfig has extra="forbid" so they raise. + for dead in ( + "world_frame", + "publish_global_map", + "global_map_publish_rate", + "global_map_voxel_size", + "unregister_input", + ): + with pytest.raises(Exception): + PGOConfig(**{dead: True}) + + +class TestTransformHelpers: + def test_r_t_transform_roundtrip(self) -> None: + rng = np.random.default_rng(0) + R_in = _random_R(rng) + t_in = rng.uniform(-5, 5, size=3) + tf = _transform_from_r_t(R_in, t_in, ts=1.23) + R_out, t_out = _r_t_from_transform(tf) + np.testing.assert_allclose(R_out, R_in, atol=1e-10) + np.testing.assert_allclose(t_out, t_in, atol=1e-10) + assert tf.ts == 1.23 + + def test_observation_normalizes_transform_pose(self) -> None: + """Constructing/deriving with pose=Transform should coerce to 7-tuple.""" + from dimos.memory2.type.observation import Observation + + tf = Transform( + translation=Vector3(1.5, -2.0, 0.7), + rotation=Quaternion(0.1, 0.2, 0.3, 0.927), + ts=1.0, + ) + obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) + assert isinstance(obs.pose, tuple) + assert obs.pose[0] == pytest.approx(1.5) + assert obs.pose[6] == pytest.approx(0.927) + + # derive() also runs the normalization via __post_init__. + derived = obs.derive(data=0, pose=tf) + assert isinstance(derived.pose, tuple) + assert derived.pose == obs.pose + + def test_observation_normalizes_posestamped(self) -> None: + from dimos.memory2.type.observation import Observation + from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped + + ps = PoseStamped(ts=1.0, position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)) + obs: Observation[int] = Observation(id=0, ts=1.0, pose=ps, _data=0) + assert isinstance(obs.pose, tuple) + assert obs.pose == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) + + def test_obs_to_pose3_roundtrip(self) -> None: + from dimos.memory2.type.observation import Observation + + rng = np.random.default_rng(4) + R = _random_R(rng) + t = rng.uniform(-3, 3, size=3) + tf = _transform_from_r_t(R, t, ts=1.0) + obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) + p = _obs_to_pose3(obs) + np.testing.assert_allclose(p.rotation().matrix(), R, atol=1e-9) + np.testing.assert_allclose(np.asarray(p.translation()), t, atol=1e-9) + + def test_pose3_to_transform(self) -> None: + import gtsam # type: ignore[import-not-found,import-untyped] + + rng = np.random.default_rng(2) + R = _random_R(rng) + t = rng.uniform(-3, 3, size=3) + p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t)) + tf = _pose3_to_transform(p, ts=7.89) + R_out, t_out = _r_t_from_transform(tf) + np.testing.assert_allclose(R_out, R, atol=1e-10) + np.testing.assert_allclose(t_out, t, atol=1e-10) + + def test_pose3_to_transform_with_frames(self) -> None: + import gtsam + + rng = np.random.default_rng(3) + R = _random_R(rng) + t = rng.uniform(-3, 3, size=3) + p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t)) + tf = _pose3_to_transform(p, ts=1.0, frame_id="world_corrected", child_frame_id="body") + assert tf.frame_id == "world_corrected" + assert tf.child_frame_id == "body" + R_out, t_out = _r_t_from_transform(tf) + np.testing.assert_allclose(R_out, R, atol=1e-10) + np.testing.assert_allclose(t_out, t, atol=1e-10) + + +def _make_lidar_stream(n_frames: int = 12, points_per_frame: int = 500) -> Stream[PointCloud2]: + """Straight-line trajectory along +x with small yaw, random body points. + + Note: `pgo_keyframes` skips poses with zero translation OR identity + rotation as placeholders, so we use a constant non-identity yaw. + """ + rng = np.random.default_rng(0) + mem = MemoryStore() + lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2) + # Small yaw (~6 deg) -> non-identity quaternion that survives the + # placeholder filter. + q = Rotation.from_euler("z", 0.1).as_quat() # xyzw + qx, qy, qz, qw = float(q[0]), float(q[1]), float(q[2]), float(q[3]) + R_world = Rotation.from_euler("z", 0.1).as_matrix() + for i in range(1, n_frames + 1): + body = rng.uniform(-1, 1, size=(points_per_frame, 3)).astype(np.float32) + world = (R_world @ body.T).T + np.array([i, 0, 0], dtype=np.float32) + lidar.append( + PointCloud2.from_numpy(world.astype(np.float32)), + ts=float(i), + pose=(float(i), 0.0, 0.0, qx, qy, qz, qw), + ) + return lidar + + +class TestPipelineEndToEnd: + def test_straight_line_produces_keyframes(self) -> None: + lidar = _make_lidar_stream(n_frames=12) + kfs = pgo_keyframes(lidar) + # 12 frames spaced 1m apart with key_pose_delta_trans=0.5 -> every frame + # after the first triggers a keyframe; some may dedupe but ~11 emitted. + n = kfs.count() + assert 10 <= n <= 12 + + def test_keyframes_to_corrections_size(self) -> None: + lidar = _make_lidar_stream(n_frames=12) + kfs = pgo_keyframes(lidar) + corr = keyframes_to_corrections(kfs) + assert corr.count() == kfs.count() + + def test_apply_identity_corrections_preserves_poses(self) -> None: + # With no loop closures the optimization is a no-op -> drift = identity -> + # apply_corrections is a no-op on input poses. + lidar = _make_lidar_stream(n_frames=12) + kfs = pgo_keyframes(lidar) + corr = keyframes_to_corrections(kfs) + corrected = apply_corrections(lidar, corr) + in_poses = [o.pose for o in lidar if o.pose is not None] + out_poses = [o.pose for o in corrected if o.pose is not None] + assert len(in_poses) == len(out_poses) + for p_in, p_out in zip(in_poses, out_poses, strict=True): + for a, b in zip(p_in, p_out, strict=True): + assert a == pytest.approx(b, abs=1e-6) + + +def _make_corrections(transforms: list[Transform]) -> Stream[Transform]: + mem = MemoryStore() + stream: Stream[Transform] = mem.stream("corrections", Transform) + for tf in transforms: + stream.append(tf, ts=tf.ts) + return stream + + +class TestInterpolator: + def test_empty_stream_raises(self) -> None: + empty = _make_corrections([]) + with pytest.raises(ValueError): + make_interpolator(empty) + + def test_single_keyframe_returns_constant(self) -> None: + R = Rotation.from_euler("z", np.pi / 4).as_matrix() + t = np.array([1.0, 2.0, 3.0]) + only = _transform_from_r_t(R, t, ts=10.0) + interp = make_interpolator(_make_corrections([only])) + for query_ts in (0.0, 10.0, 100.0): + out = interp(query_ts) + assert out.translation.x == pytest.approx(1.0, abs=1e-10) + assert out.translation.y == pytest.approx(2.0, abs=1e-10) + assert out.translation.z == pytest.approx(3.0, abs=1e-10) + + def test_out_of_range_clips_to_endpoints(self) -> None: + # Note: Transform's constructor maps ts=0.0 -> time.time(); use ts>0 + # so test timestamps are deterministic. + R = np.eye(3) + a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) + b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) + # _make_corrections uses tf.ts; obs.ts ends up the same. + mem = MemoryStore() + stream: Stream[Transform] = mem.stream("corrections", Transform) + stream.append(a, ts=1.0) + stream.append(b, ts=11.0) + interp = make_interpolator(stream) + # Below range -> clipped to a + assert interp(-5.0).translation.x == pytest.approx(0.0, abs=1e-10) + # Above range -> clipped to b + assert interp(100.0).translation.x == pytest.approx(10.0, abs=1e-10) + # In-range midpoint + assert interp(6.0).translation.x == pytest.approx(5.0, abs=1e-10) + + def test_correction_at_oneshot(self) -> None: + R = np.eye(3) + a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) + b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) + mem = MemoryStore() + stream: Stream[Transform] = mem.stream("corrections", Transform) + stream.append(a, ts=1.0) + stream.append(b, ts=11.0) + result = correction_at(stream, 6.0) + assert result.translation.x == pytest.approx(5.0, abs=1e-10) + + +class TestApplyCorrections: + def test_pure_translation_shifts_poses(self) -> None: + # Build a stream of 3 frames at the origin (identity pose) with a known + # correction that shifts everything by +5 in x. Expected: corrected + # poses sit at x=5. + mem = MemoryStore() + lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2) + for i in range(3): + lidar.append( + PointCloud2.from_numpy(np.zeros((1, 3), dtype=np.float32)), + ts=float(i + 1), + pose=(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0), + ) + + # Constant correction: translate by (5, 0, 0), identity rotation. + c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) + c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=3.0) + mem2 = MemoryStore() + corr: Stream[Transform] = mem2.stream("corrections", Transform) + corr.append(c_a, ts=1.0) + corr.append(c_b, ts=3.0) + + corrected = apply_corrections(lidar, corr) + for obs in corrected: + assert obs.pose is not None + assert obs.pose[0] == pytest.approx(5.0, abs=1e-9) + assert obs.pose[1] == pytest.approx(0.0, abs=1e-9) + assert obs.pose[2] == pytest.approx(0.0, abs=1e-9) + + def test_passes_through_pose_none(self) -> None: + mem = MemoryStore() + lidar: Stream[PointCloud2] = mem.stream("lidar", PointCloud2) + lidar.append( + PointCloud2.from_numpy(np.zeros((1, 3), dtype=np.float32)), + ts=1.0, + pose=None, + ) + c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) + c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=2.0) + mem2 = MemoryStore() + corr: Stream[Transform] = mem2.stream("corrections", Transform) + corr.append(c_a, ts=1.0) + corr.append(c_b, ts=2.0) + corrected = apply_corrections(lidar, corr) + for obs in corrected: + assert obs.pose is None + + +class TestKeyframeType: + def test_keyframe_is_frozen(self) -> None: + identity = Transform( + translation=Vector3(0.0, 0.0, 0.0), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + ts=1.0, + ) + kf = Keyframe(ts=1.0, local=identity, optimized=identity) + with pytest.raises(Exception): + kf.ts = 2.0 # type: ignore[misc] + assert isinstance(kf.local, Transform) + assert isinstance(kf.optimized, Transform) + + +# Real-recording smoke test. ~45-60s on go2_short.db. Skipped when the LFS +# file isn't present (CI without LFS pulled, fresh clone, etc.). +_DATA_DB = Path(__file__).resolve().parents[3] / "data" / "go2_short.db" + + +@pytest.mark.skipif(not _DATA_DB.exists(), reason=f"requires LFS file {_DATA_DB}") +class TestRealRecording: + def test_pgo_pipeline_against_go2_short(self) -> None: + """Run the full PGO pipeline on a real 60-second go2 recording. + + Asserts: keyframes produced, drift correction actually corrects (some + optimized poses differ from local), correction stream length matches + keyframes, apply_corrections preserves input frame count. + """ + from dimos.memory2.store.sqlite import SqliteStore + + store = SqliteStore(path=_DATA_DB) + lidar = store.streams.lidar + in_count = lidar.count() + assert in_count > 0, "recording is empty" + + kfs = pgo_keyframes(lidar) + n_kf = kfs.count() + assert n_kf > 0, "PGO emitted no keyframes" + # 60s recording at ~0.5m keyframe spacing -> at least a handful. + assert n_kf >= 5 + + # Loop closure detection: at least one optimized pose should differ + # from its odom-frame counterpart. Without loops, the optimization is + # a no-op and local == optimized for every keyframe. + drifted = sum( + 1 + for obs in kfs + if obs.data.local.translation != obs.data.optimized.translation + or obs.data.local.rotation != obs.data.optimized.rotation + ) + assert drifted > 0, "expected loop closures to drift at least one keyframe" + + corr = keyframes_to_corrections(kfs) + assert corr.count() == n_kf + + # apply_corrections preserves frame count, including pose=None rows. + corrected = apply_corrections(lidar, corr) + out_count = sum(1 for _ in corrected) + assert out_count == in_count diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 543a898f6a..3ae12b5ad9 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections.abc import Callable import time +from typing import Any import rerun as rr import rerun.blueprint as rrb @@ -20,22 +22,24 @@ from dimos.mapping.voxels import VoxelMapTransformer from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.type.observation import Observation from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init -def progress(total: int, label: str = ""): +def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: seen = 0 wall_start: float | None = None last_wall: float | None = None first_ts: float | None = None - def _progress(obs): + def _progress(obs: Observation[Any]) -> None: nonlocal seen, wall_start, last_wall, first_ts now = time.monotonic() if wall_start is None: wall_start = now first_ts = obs.ts + assert first_ts is not None # narrowed by the same `if` above frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 last_wall = now seen += 1 @@ -84,7 +88,7 @@ def main( path: list[tuple[float, float, float]] = [] - def collect_path(obs): + def collect_path(obs: Observation[Any]) -> None: if obs.pose is None: return # Reject placeholder poses at the world origin (translation = 0,0,0). From 45e72da4ff6a391a2534202c6e6cc4f717ff2887 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 21:37:13 +0800 Subject: [PATCH 09/36] pgo cleanup --- dimos/mapping/relocalization/pgo.py | 68 +++++++++++++++++------- dimos/mapping/relocalization/test_pgo.py | 12 ----- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 9b48aa6437..3f5dbbd4ec 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -14,28 +14,47 @@ """PGO drift corrections as composable Stream stages. +A lidar/odom stream comes in with poses that drift over time — the robot's +estimate of where it is in the world slowly diverges from ground truth as +small per-frame errors accumulate. PGO ("pose graph optimization") detects +revisited places via loop closure and pulls the trajectory back into +self-consistency. The result is a stream of *corrections*: a Transform per +keyframe that maps every drifted ("raw") pose to its corrected one. + Pipeline: - lidar: Stream[PointCloud2] - -> pgo_keyframes(...) -> Stream[Keyframe] - -> keyframes_to_corrections(...) -> Stream[Transform] (world_corrected <- world_raw) - -> apply_corrections(any_stream, corrections) -> Stream[T] (obs.pose shuffled) + lidar: Stream[PointCloud2] # pose-stamped scans + -> pgo_keyframes(...) -> Stream[Keyframe] # one per kf + -> keyframes_to_corrections(...) -> Stream[Transform] # world_corrected <- world_raw + -> apply_corrections(any_stream, corrections) -> Stream[T] # obs.pose shuffled + +`pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting +each kept keyframe with both its raw odom pose (`local`) and its optimized +pose (`optimized`) as Transforms. + +`keyframes_to_corrections` is pure arithmetic: per keyframe, the drift +correction is `optimized @ local^-1` (Transform composition). The resulting +stream has one entry per keyframe — sparse in time. -The math: per keyframe, the drift correction is - R_corr = R_global @ R_local.T - t_corr = t_global - R_corr @ t_local -and at arbitrary ts we SLERP R between the two bracketing keyframes and linear-lerp t, -clipping out-of-range to endpoints. +`make_interpolator` materializes the sparse correction stream into a +ts -> Transform function: SLERP for rotation, linear lerp for translation, +clipping out-of-range queries to the endpoints. -`gtsam` and `open3d` are imported lazily inside hot helpers so importing this module -stays cheap and gtsam-free for consumers that only need `Keyframe` / `apply_corrections`. +`apply_corrections` shuffles `obs.pose` on any stream using the interpolated +correction at each obs.ts. It's read-only on `obs.data`, so the same lidar +stream can be re-applied — or a different recording from the same physical +space, if the corrections generalize. + +`gtsam` is imported lazily inside hot helpers so importing this module +stays cheap and gtsam-free for consumers that only need `Keyframe` / +`apply_corrections`. """ from __future__ import annotations from collections.abc import Callable, Iterator from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, TypeVar +from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Unpack import numpy as np import open3d as o3d # type: ignore[import-untyped] @@ -85,6 +104,24 @@ class PGOConfig(BaseConfig): max_icp_correspondence_dist: float = 1.0 +class PGOKwargs(TypedDict, total=False): + """Typed kwargs for `pgo_keyframes`; mirrors `PGOConfig` fields.""" + + key_pose_delta_trans: float + key_pose_delta_deg: float + loop_search_radius: float + loop_time_thresh: float + loop_score_thresh: float + loop_submap_half_range: int + min_icp_inliers: int + min_keyframes_for_loop_search: int + loop_closure_extra_iterations: int + submap_resolution: float + min_loop_detect_duration: float + max_icp_iterations: int + max_icp_correspondence_dist: float + + @dataclass(frozen=True) class Keyframe: """Keyframe emitted by `pgo_keyframes`. @@ -102,7 +139,7 @@ def pgo_keyframes( stream: Stream[PointCloud2], *, on_frame: Callable[[Any], None] | None = None, - **pgo_cfg: Any, + **pgo_cfg: Unpack[PGOKwargs], ) -> Stream[Keyframe]: """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe.""" cfg = PGOConfig(**pgo_cfg) @@ -190,11 +227,6 @@ def interp(ts: float) -> Transform: return interp -def correction_at(corrections: Stream[Transform], ts: float) -> Transform: - """One-off lookup. For hot paths build `make_interpolator` once and reuse.""" - return make_interpolator(corrections)(ts) - - def apply_corrections( stream: Stream[T], corrections: Stream[Transform], diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/relocalization/test_pgo.py index ce35cbd25a..76f8b378eb 100644 --- a/dimos/mapping/relocalization/test_pgo.py +++ b/dimos/mapping/relocalization/test_pgo.py @@ -28,7 +28,6 @@ _r_t_from_transform, _transform_from_r_t, apply_corrections, - correction_at, keyframes_to_corrections, make_interpolator, pgo_keyframes, @@ -244,17 +243,6 @@ def test_out_of_range_clips_to_endpoints(self) -> None: # In-range midpoint assert interp(6.0).translation.x == pytest.approx(5.0, abs=1e-10) - def test_correction_at_oneshot(self) -> None: - R = np.eye(3) - a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) - b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) - mem = MemoryStore() - stream: Stream[Transform] = mem.stream("corrections", Transform) - stream.append(a, ts=1.0) - stream.append(b, ts=11.0) - result = correction_at(stream, 6.0) - assert result.translation.x == pytest.approx(5.0, abs=1e-10) - class TestApplyCorrections: def test_pure_translation_shifts_poses(self) -> None: From df4435f21caf9a66445a5e8062cea0b544f3a27e Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 21:49:53 +0800 Subject: [PATCH 10/36] visualizing loop closures --- dimos/mapping/relocalization/pgo.py | 55 +++++++++++++++++++++++++++-- dimos/utils/cli/map.py | 26 +++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/relocalization/pgo.py index 3f5dbbd4ec..af94921c9d 100644 --- a/dimos/mapping/relocalization/pgo.py +++ b/dimos/mapping/relocalization/pgo.py @@ -30,7 +30,9 @@ `pgo_keyframes` runs ISAM2 + ICP loop closure across the input, emitting each kept keyframe with both its raw odom pose (`local`) and its optimized -pose (`optimized`) as Transforms. +pose (`optimized`) as Transforms. Pass `loop_closures_out=[]` to also +collect each accepted ICP loop edge as a `LoopClosure` (source+target +optimized poses + ICP fitness) — useful for visualizing the pose graph. `keyframes_to_corrections` is pure arithmetic: per keyframe, the drift correction is `optimized @ local^-1` (Transform composition). The resulting @@ -135,13 +137,33 @@ class Keyframe: optimized: Transform +@dataclass(frozen=True) +class LoopClosure: + """An accepted ICP loop-closure edge in the pose graph. + + `source` and `target` are the final optimized body poses (in + `world_corrected`) of the two keyframes the loop connected. `score` is + the ICP fitness used as the loop's translation variance — lower is a + tighter match. + """ + + source: Transform + target: Transform + score: float + + def pgo_keyframes( stream: Stream[PointCloud2], *, on_frame: Callable[[Any], None] | None = None, + loop_closures_out: list[LoopClosure] | None = None, **pgo_cfg: Unpack[PGOKwargs], ) -> Stream[Keyframe]: - """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe.""" + """Run PGO across a pose-stamped point-cloud stream; emit one obs per keyframe. + + If `loop_closures_out` is provided, accepted loop edges are appended to + it (in detection order) using each keyframe's final optimized pose. + """ cfg = PGOConfig(**pgo_cfg) pgo = _PGO(cfg) @@ -167,6 +189,8 @@ def pgo_keyframes( out: Stream[Keyframe] = mem.stream("keyframes", Keyframe) for kf in pgo.finalize(): out.append(kf, ts=kf.ts) + if loop_closures_out is not None: + loop_closures_out.extend(pgo.loop_closures()) return out @@ -312,6 +336,7 @@ def __init__(self, config: PGOConfig) -> None: self._cfg = config self._key_poses: list[_KeyPose] = [] self._pending_loops: list[_LoopPair] = [] + self._accepted_loops: list[_LoopPair] = [] self._last_loop_ts: float | None = None self._world_correction: gtsam.Pose3 = gtsam.Pose3() # identity @@ -367,6 +392,31 @@ def finalize(self) -> list[Keyframe]: ) return out + def loop_closures(self) -> list[LoopClosure]: + """Accepted loop edges, resolved to final optimized poses.""" + out: list[LoopClosure] = [] + for pair in self._accepted_loops: + src = self._key_poses[pair.source] + tgt = self._key_poses[pair.target] + out.append( + LoopClosure( + source=_pose3_to_transform( + src.optimized, + ts=src.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + target=_pose3_to_transform( + tgt.optimized, + ts=tgt.timestamp, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_BODY, + ), + score=pair.score, + ) + ) + return out + def _is_keyframe(self, local_pose: gtsam.Pose3) -> bool: if not self._key_poses: return True @@ -521,6 +571,7 @@ def _smooth_and_update(self) -> None: np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) ) self._graph.add(gtsam.BetweenFactorPose3(pair.target, pair.source, pair.offset, noise)) + self._accepted_loops.extend(self._pending_loops) self._pending_loops.clear() self._isam2.update(self._graph, self._values) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 3ae12b5ad9..5bc4d29c52 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -103,6 +103,7 @@ def collect_path(obs: Observation[Any]) -> None: from scipy.spatial.transform import Rotation from dimos.mapping.relocalization.pgo import ( + LoopClosure, keyframes_to_corrections, make_interpolator, pgo_keyframes, @@ -112,7 +113,12 @@ def collect_path(obs: Observation[Any]) -> None: total = lidar.count() print("running PGO twopass map...") - keyframes = pgo_keyframes(lidar, on_frame=progress(total, "pgo pass 1 (optimizing)")) + loops: list[LoopClosure] = [] + keyframes = pgo_keyframes( + lidar, + on_frame=progress(total, "pgo pass 1 (optimizing)"), + loop_closures_out=loops, + ) corrections = keyframes_to_corrections(keyframes) interp = make_interpolator(corrections) @@ -167,6 +173,24 @@ def collect_path(obs: Observation[Any]) -> None: rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), static=True, ) + rr.log( + "world/pgo_map/keyframes", + rr.Points3D(positions=pgo_path, colors=[[255, 200, 0]], radii=[0.05]), + static=True, + ) + if pgo and loops: + loop_strips = [ + [ + (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), + (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), + ] + for lc in loops + ] + rr.log( + "world/pgo_map/loop_closures", + rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.02]), + static=True, + ) if export and pgo_map is not None: from pathlib import Path From ebdf1ec6315d6f17a0543041b9fc7a72b218cc58 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 21:58:14 +0800 Subject: [PATCH 11/36] pgo vis fixed --- dimos/utils/cli/map.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 5bc4d29c52..440d824111 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -167,28 +167,38 @@ def collect_path(obs: Observation[Any]) -> None: ) if pgo_map is not None: rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( "world/pgo_map/path", rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), static=True, ) + hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( "world/pgo_map/keyframes", - rr.Points3D(positions=pgo_path, colors=[[255, 200, 0]], radii=[0.05]), + rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.05]), static=True, ) if pgo and loops: loop_strips = [ [ - (lc.source.translation.x, lc.source.translation.y, lc.source.translation.z), - (lc.target.translation.x, lc.target.translation.y, lc.target.translation.z), + ( + lc.source.translation.x, + lc.source.translation.y, + lc.source.translation.z + STEM_HEIGHT, + ), + ( + lc.target.translation.x, + lc.target.translation.y, + lc.target.translation.z + STEM_HEIGHT, + ), ] for lc in loops ] rr.log( "world/pgo_map/loop_closures", - rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.02]), + rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]), static=True, ) From 71762c1c97788ca35514b5ea915d93300d9b9f1f Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 22:25:07 +0800 Subject: [PATCH 12/36] much faster pgo --- dimos/robot/cli/dimos.py | 16 +++++++-- dimos/utils/cli/map.py | 78 +++++++++++++++++++++++++++++----------- 2 files changed, 72 insertions(+), 22 deletions(-) diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 5607c8b1ca..730509180b 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -693,7 +693,12 @@ def map_cmd( "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" ), pgo: bool = typer.Option( - False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + False, + "--pgo", + help="Run pose graph optimization and rebuild from spatially-deduped frames", + ), + pgo_tol: float = typer.Option( + 0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)" ), block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" @@ -701,7 +706,12 @@ def map_cmd( export: bool = typer.Option( False, "--export", - help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + help="Export PGO map to ./.pc2.lcm in cwd (implies --pgo)", + ), + full_pgo: bool = typer.Option( + False, + "--full-pgo", + help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: @@ -713,8 +723,10 @@ def map_cmd( voxel=voxel, device=device, pgo=pgo, + pgo_tol=pgo_tol, block_count=block_count, export=export, + full_pgo=full_pgo, no_gui=no_gui, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 440d824111..8b23df1e25 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -65,7 +65,12 @@ def main( "CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)" ), pgo: bool = typer.Option( - False, "--pgo", help="Run pose graph optimization before rebuilding (twopass)" + False, + "--pgo", + help="Run pose graph optimization and rebuild from spatially-deduped frames", + ), + pgo_tol: float = typer.Option( + 0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)" ), block_count: int = typer.Option( 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" @@ -73,12 +78,17 @@ def main( export: bool = typer.Option( False, "--export", - help="Export PGO twopass map to ./.pc2.lcm in cwd (implies --pgo)", + help="Export PGO map to ./.pc2.lcm in cwd (implies --pgo)", + ), + full_pgo: bool = typer.Option( + False, + "--full-pgo", + help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), ) -> None: db_path = resolve_named_path(dataset, ".db") - if export: + if export or full_pgo: pgo = True store = SqliteStore(path=db_path) @@ -99,9 +109,6 @@ def collect_path(obs: Observation[Any]) -> None: pgo_map = None pgo_path: list[tuple[float, float, float]] = [] if pgo: - import numpy as np - from scipy.spatial.transform import Rotation - from dimos.mapping.relocalization.pgo import ( LoopClosure, keyframes_to_corrections, @@ -109,7 +116,6 @@ def collect_path(obs: Observation[Any]) -> None: pgo_keyframes, ) from dimos.mapping.voxels import VoxelGrid - from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 total = lidar.count() print("running PGO twopass map...") @@ -126,27 +132,53 @@ def collect_path(obs: Observation[Any]) -> None: kf_t = kf_obs.data.optimized.translation pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) - pass2_pb = progress(total, "pgo pass 2 (rebuilding)") + # Canonical PGO rebuild: bucket frames by spatial cell using the raw + # odom pose, keep the latest per cell, transform with the interpolated + # correction. obs.data is not touched in the dedup loop so it stays + # cheap (no pointcloud loading). + seen: dict[tuple[int, int, int], Any] = {} + for obs in lidar: + if obs.pose is None: + continue + if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + continue + cell = ( + int(obs.pose[0] / pgo_tol), + int(obs.pose[1] / pgo_tol), + int(obs.pose[2] / pgo_tol), + ) + seen[cell] = obs + + n_kept = len(seen) + pct = 100 * n_kept / total if total else 0 + print(f"pgo rebuild: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") + + pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)") grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) try: - for obs in lidar: + for obs in seen.values(): pass2_pb(obs) - if obs.pose is None: + if len(obs.data) == 0: continue - pts, _ = obs.data.as_numpy() - if len(pts) == 0: - continue - tf = interp(obs.ts) - R = Rotation.from_quat( - [tf.rotation.x, tf.rotation.y, tf.rotation.z, tf.rotation.w] - ).as_matrix() - t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - corrected = (R @ pts[:, :3].T).T + t - grid.add_frame(PointCloud2.from_numpy(corrected.astype(np.float32))) + grid.add_frame(obs.data.transform(interp(obs.ts))) pgo_map = grid.get_global_pointcloud2() finally: grid.dispose() + full_pgo_map = None + if full_pgo: + full_pb = progress(total, "full pgo (rebuilding)") + full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) + try: + for obs in lidar: + full_pb(obs) + if obs.pose is None or len(obs.data) == 0: + continue + full_grid.add_frame(obs.data.transform(interp(obs.ts))) + full_pgo_map = full_grid.get_global_pointcloud2() + finally: + full_grid.dispose() + global_map = ( lidar.tap(collect_path) .transform(VoxelMapTransformer(voxel_size=voxel, device=device)) @@ -167,6 +199,12 @@ def collect_path(obs: Observation[Any]) -> None: ) if pgo_map is not None: rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + if full_pgo_map is not None: + rr.log( + "world/full_pgo_map/pointcloud", + full_pgo_map.to_rerun(size=voxel), + static=True, + ) STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( From d1218361267c65733f7cfbae869af9ea693cb60f Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Sun, 24 May 2026 22:46:17 +0800 Subject: [PATCH 13/36] PointCloud2: drop has_colors() side effect from transform The colors-copy block in `PointCloud2.transform` calls `self.pointcloud` to check `has_colors()`, which forces a tensor->legacy conversion on every invocation. With `pgo --full-pgo` rebuilding from hundreds of lidar frames, that hidden allocation dominates the per-frame cost. The colors path was lidar-irrelevant (lidar clouds have no colors anyway) and the feature it was meant to support never landed; remove it so transform() stays a clean numpy round-trip. --- dimos/msgs/sensor_msgs/PointCloud2.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 598cb0b094..570d33f772 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -373,10 +373,6 @@ def transform(self, tf: Transform) -> PointCloud2: new_pcd = o3d.geometry.PointCloud() new_pcd.points = o3d.utility.Vector3dVector(transformed_xyz) - # Copy colors if available - if self.pointcloud.has_colors(): - new_pcd.colors = self.pointcloud.colors - return PointCloud2( pointcloud=new_pcd, frame_id=tf.frame_id, From 626a4fc854bb8bbcb97c053747676b1c9fd41459 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 11:56:57 +0800 Subject: [PATCH 14/36] ci: install gtsam-extended in tests group so import gtsam works --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1125d433f3..a6e32f409e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -402,7 +402,7 @@ tests = [ # Deps the lint job swaps for stubs (`stubs//` or `types-*`): # the real packages are needed at test time, stubs cover mypy. - "dimos[apriltag,psql,drone,cpu]", + "dimos[apriltag,mapping,psql,drone,cpu]", "mujoco>=3.3.4", "pygame>=2.6.1", "python-can>=4", diff --git a/uv.lock b/uv.lock index 7d50e3d818..8039736ee7 100644 --- a/uv.lock +++ b/uv.lock @@ -2271,7 +2271,7 @@ project-deps = [ ] tests = [ { name = "coverage" }, - { name = "dimos", extra = ["apriltag", "cpu", "drone", "psql", "visualization", "web"] }, + { name = "dimos", extra = ["apriltag", "cpu", "drone", "mapping", "psql", "visualization", "web"] }, { name = "gdown" }, { name = "googlemaps" }, { name = "hydra-core" }, @@ -2312,7 +2312,7 @@ tests = [ ] tests-self-hosted = [ { name = "coverage" }, - { name = "dimos", extra = ["agents", "apriltag", "cpu", "drone", "manipulation", "misc", "perception", "psql", "sim", "unitree", "visualization", "web"] }, + { name = "dimos", extra = ["agents", "apriltag", "cpu", "drone", "manipulation", "mapping", "misc", "perception", "psql", "sim", "unitree", "visualization", "web"] }, { name = "gdown" }, { name = "googlemaps" }, { name = "hydra-core" }, @@ -2558,7 +2558,7 @@ project-deps = [ ] tests = [ { name = "coverage", specifier = ">=7.0" }, - { name = "dimos", extras = ["apriltag", "psql", "drone", "cpu"] }, + { name = "dimos", extras = ["apriltag", "mapping", "psql", "drone", "cpu"] }, { name = "dimos", extras = ["web", "visualization"] }, { name = "gdown", specifier = "==6.0.0" }, { name = "googlemaps", specifier = ">=4.10.0" }, @@ -2601,7 +2601,7 @@ tests = [ tests-self-hosted = [ { name = "coverage", specifier = ">=7.0" }, { name = "dimos", extras = ["agents", "perception", "manipulation", "sim", "unitree", "misc"] }, - { name = "dimos", extras = ["apriltag", "psql", "drone", "cpu"] }, + { name = "dimos", extras = ["apriltag", "mapping", "psql", "drone", "cpu"] }, { name = "dimos", extras = ["web", "visualization"] }, { name = "gdown", specifier = "==6.0.0" }, { name = "googlemaps", specifier = ">=4.10.0" }, From 2f46b38443c180d711f89a24135cd041844be072 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 15:05:07 +0800 Subject: [PATCH 15/36] markers on dimos map --- dimos/msgs/sensor_msgs/PointCloud2.py | 9 +- .../detection/type/detection3d/marker.py | 42 ++++ .../perception/fiducial/marker_transformer.py | 176 +++++++++++++++ dimos/robot/cli/dimos.py | 10 + dimos/utils/cli/map.py | 207 +++++++++++++----- 5 files changed, 388 insertions(+), 56 deletions(-) create mode 100644 dimos/perception/detection/type/detection3d/marker.py create mode 100644 dimos/perception/fiducial/marker_transformer.py diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 570d33f772..add056b1a3 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -686,7 +686,6 @@ def to_rerun( voxel_size: float = 0.05, colors: list[int] | None = None, mode: str = "spheres", - size: float | None = None, fill_mode: str = "solid", bottom_cutoff: float | None = None, **kwargs: object, @@ -699,7 +698,6 @@ def to_rerun( If None, uses height-based turbo colormap via class_ids (requires register_colormap_annotation() called once). mode: "points" for raw points, "boxes" for cubes (default), or "spheres" for sized spheres - size: Box size for mode="boxes" (e.g., voxel_size). Defaults to radii*2. fill_mode: Fill mode for boxes - "solid", "majorwireframe", or "densewireframe" **kwargs: Additional args (ignored for compatibility) @@ -732,13 +730,10 @@ def to_rerun( if mode == "points": return rr.Points3D( - positions=points, - colors=point_colors, - class_ids=class_ids, + positions=points, colors=point_colors, class_ids=class_ids, sizes=voxel_size ) elif mode == "boxes": - box_size = size if size is not None else voxel_size - half = box_size / 2 + half = voxel_size / 2 return rr.Boxes3D( centers=points, half_sizes=[half, half, half], diff --git a/dimos/perception/detection/type/detection3d/marker.py b/dimos/perception/detection/type/detection3d/marker.py new file mode 100644 index 0000000000..a162814d6f --- /dev/null +++ b/dimos/perception/detection/type/detection3d/marker.py @@ -0,0 +1,42 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +import numpy as np + +from dimos.perception.detection.type.detection3d.bbox import Detection3DBBox + + +@dataclass +class Detection3DMarker(Detection3DBBox): + """Fiducial marker (ArUco / AprilTag) detection with a world-frame pose. + + ``bbox`` is the axis-aligned 2D bbox around the four detected corners so + crop/draw helpers from ``Detection2DBBox`` work as-is. ``center`` and + ``orientation`` carry the marker pose in ``frame_id`` (typically + ``"world"``); ``transform`` is the camera-in-world transform used to + compose it. + """ + + marker_id: int = -1 + corners_px: np.ndarray = field(default_factory=lambda: np.zeros((4, 2), dtype=np.float32)) + dictionary: str = "" + + def to_repr_dict(self) -> dict[str, Any]: + parent = super().to_repr_dict() + return {**parent, "marker_id": str(self.marker_id), "dict": self.dictionary} diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py new file mode 100644 index 0000000000..a3839ed9bb --- /dev/null +++ b/dimos/perception/fiducial/marker_transformer.py @@ -0,0 +1,176 @@ +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ArUco / AprilTag detection as a memory2 transformer. + +Wraps the pure helpers in :mod:`dimos.perception.fiducial.marker_tf_module` +and emits one :class:`Detection3DMarker` observation per detected marker, with +``.pose`` composed into world frame from the upstream observation's +camera-in-world pose. The companion module :class:`MarkerTfModule` remains +the right choice for live TF publication; this transformer is for offline / +mem2-stream composition. + +Skips frames where the upstream observation has no ``.pose`` (debug log): +without a camera-in-world pose, we can't honor the "always world-frame" +output contract. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from dimos.memory2.transform import Transformer +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo +from dimos.msgs.sensor_msgs.Image import Image +from dimos.perception.detection.type.detection3d.marker import Detection3DMarker +from dimos.perception.fiducial.marker_tf_module import ( + camera_info_to_cv_matrices, + create_aruco_detector, + estimate_marker_pose, + rvec_tvec_to_transform, +) +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + from collections.abc import Iterator + + from dimos.memory2.type.observation import Observation + +logger = setup_logger() + + +def _pose_tuple_to_transform( + pose: tuple[float, float, float, float, float, float, float], + *, + frame_id: str, + child_frame_id: str, + ts: float, +) -> Transform: + x, y, z, qx, qy, qz, qw = pose + return Transform( + translation=Vector3(x, y, z), + rotation=Quaternion(qx, qy, qz, qw), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + + +class DetectMarkers(Transformer[Image, Detection3DMarker]): + """Detect fiducial markers and emit one world-pose observation per marker.""" + + def __init__( + self, + camera_info: CameraInfo, + marker_length_m: float, + aruco_dictionary: str = "DICT_APRILTAG_36h11", + world_frame: str = "world", + ) -> None: + if marker_length_m <= 0: + raise ValueError(f"marker_length_m must be > 0, got {marker_length_m}") + self.camera_info = camera_info + self.marker_length_m = marker_length_m + self.aruco_dictionary = aruco_dictionary + self.world_frame = world_frame + self._detector = create_aruco_detector(aruco_dictionary) + self._cam_mtx, self._dist = camera_info_to_cv_matrices(camera_info) + + def __call__( + self, upstream: Iterator[Observation[Image]] + ) -> Iterator[Observation[Detection3DMarker]]: + info = self.camera_info + marker_size = Vector3(self.marker_length_m, self.marker_length_m, 0.0) + + for obs in upstream: + if obs.pose is None: + logger.debug("DetectMarkers: obs %s has no .pose; skipping", obs.id) + continue + + image = obs.data + if ( + info.width + and info.height + and (image.width != info.width or image.height != info.height) + ): + logger.debug( + "DetectMarkers: image %sx%s != CameraInfo %sx%s; skip", + image.width, + image.height, + info.width, + info.height, + ) + continue + + gray = image.to_grayscale().as_numpy() + corners, ids, _ = self._detector.detectMarkers(gray) + if ids is None or len(ids) == 0: + continue + + t_world_optical = _pose_tuple_to_transform( + obs.pose, + frame_id=self.world_frame, + child_frame_id="optical", + ts=obs.ts, + ) + + for corner_set, mid_arr in zip(corners, ids, strict=True): + mid = int(mid_arr[0]) + pose = estimate_marker_pose( + corner_set, + self.marker_length_m, + self._cam_mtx, + self._dist, + distortion_model=info.distortion_model, + ) + if pose is None: + continue + rvec, tvec = pose + t_optical_marker = rvec_tvec_to_transform( + rvec, + tvec, + frame_id="optical", + child_frame_id=f"marker_{mid}", + ts=obs.ts, + ) + t_world_marker = t_world_optical + t_optical_marker + + corners_2d = corner_set.reshape(4, 2).astype(np.float32) + xy_min = corners_2d.min(axis=0) + xy_max = corners_2d.max(axis=0) + bbox = (float(xy_min[0]), float(xy_min[1]), float(xy_max[0]), float(xy_max[1])) + + det = Detection3DMarker( + bbox=bbox, + track_id=mid, + class_id=mid, + confidence=1.0, + name=f"marker_{mid}", + ts=obs.ts, + image=image, + center=t_world_marker.translation, + size=marker_size, + transform=t_world_optical, + frame_id=self.world_frame, + orientation=t_world_marker.rotation, + marker_id=mid, + corners_px=corners_2d, + dictionary=self.aruco_dictionary, + ) + + yield obs.derive(data=det, pose=t_world_marker).tag(marker_id=mid) diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 730509180b..23147e9380 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -714,6 +714,14 @@ def map_cmd( help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), + markers: bool = typer.Option( + False, + "--markers", + help="Detect AprilTag markers in color_image and overlay them in rerun", + ), + marker_size: float = typer.Option( + 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" + ), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main @@ -728,6 +736,8 @@ def map_cmd( export=export, full_pgo=full_pgo, no_gui=no_gui, + markers=markers, + marker_size=marker_size, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 8b23df1e25..bea304a538 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -20,12 +20,22 @@ import rerun.blueprint as rrb import typer -from dimos.mapping.voxels import VoxelMapTransformer +from dimos.mapping.voxels import VoxelGrid from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.transform import QualityWindow from dimos.memory2.type.observation import Observation +from dimos.memory2.vis.color import Color +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.sensor_msgs.Image import Image +from dimos.perception.fiducial.marker_transformer import DetectMarkers +from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init +PATH_THICKNESS = 0.01 + def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: seen = 0 @@ -70,11 +80,11 @@ def main( help="Run pose graph optimization and rebuild from spatially-deduped frames", ), pgo_tol: float = typer.Option( - 0.3, "--pgo-tol", help="Spatial dedup tolerance for --pgo (meters)" - ), - block_count: int = typer.Option( - 2_000_000, "--block-count", help="VoxelBlockGrid capacity (--pgo only)" + 0.3, + "--pgo-tol", + help="Spatial dedup tolerance (meters); applies to both raw and --pgo maps", ), + block_count: int = typer.Option(2_000_000, "--block-count", help="VoxelBlockGrid capacity"), export: bool = typer.Option( False, "--export", @@ -86,6 +96,14 @@ def main( help="Also build a full-replay PGO map (every frame) for comparison (implies --pgo)", ), no_gui: bool = typer.Option(False, "--no-gui", help="Skip rerun visualization"), + markers: bool = typer.Option( + False, + "--markers", + help="Detect AprilTag markers in color_image and overlay them in rerun", + ), + marker_size: float = typer.Option( + 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" + ), ) -> None: db_path = resolve_named_path(dataset, ".db") if export or full_pgo: @@ -96,18 +114,39 @@ def main( print(lidar.summary()) - path: list[tuple[float, float, float]] = [] + total = lidar.count() - def collect_path(obs: Observation[Any]) -> None: + # Spatial dedup: bucket frames by 3D cell using the raw pose, keep the + # latest per cell. Shared by raw and PGO rebuilds. Doesn't touch obs.data + # so it stays cheap (no pointcloud loading). + seen: dict[tuple[int, int, int], Observation[Any]] = {} + for obs in lidar: if obs.pose is None: - return - # Reject placeholder poses at the world origin (translation = 0,0,0). + continue + # Reject placeholder poses at the world origin. if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: - return - path.append((obs.pose[0], obs.pose[1], obs.pose[2])) + continue + cell = ( + int(obs.pose[0] / pgo_tol), + int(obs.pose[1] / pgo_tol), + int(obs.pose[2] / pgo_tol), + ) + seen[cell] = obs + + n_kept = len(seen) + pct = 100 * n_kept / total if total else 0 + print(f"dedup: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") + + # Dict insertion order = lidar iteration order = chronological. + # `seen` only contains entries with non-None poses (filtered above). + path: list[tuple[float, float, float]] = [ + (obs.pose[0], obs.pose[1], obs.pose[2]) for obs in seen.values() if obs.pose is not None + ] pgo_map = None pgo_path: list[tuple[float, float, float]] = [] + loops: list[Any] = [] + interp: Any | None = None if pgo: from dimos.mapping.relocalization.pgo import ( LoopClosure, @@ -115,16 +154,15 @@ def collect_path(obs: Observation[Any]) -> None: make_interpolator, pgo_keyframes, ) - from dimos.mapping.voxels import VoxelGrid - total = lidar.count() print("running PGO twopass map...") - loops: list[LoopClosure] = [] + pgo_loops: list[LoopClosure] = [] keyframes = pgo_keyframes( lidar, on_frame=progress(total, "pgo pass 1 (optimizing)"), - loop_closures_out=loops, + loop_closures_out=pgo_loops, ) + loops = list(pgo_loops) corrections = keyframes_to_corrections(keyframes) interp = make_interpolator(corrections) @@ -132,27 +170,6 @@ def collect_path(obs: Observation[Any]) -> None: kf_t = kf_obs.data.optimized.translation pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) - # Canonical PGO rebuild: bucket frames by spatial cell using the raw - # odom pose, keep the latest per cell, transform with the interpolated - # correction. obs.data is not touched in the dedup loop so it stays - # cheap (no pointcloud loading). - seen: dict[tuple[int, int, int], Any] = {} - for obs in lidar: - if obs.pose is None: - continue - if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: - continue - cell = ( - int(obs.pose[0] / pgo_tol), - int(obs.pose[1] / pgo_tol), - int(obs.pose[2] / pgo_tol), - ) - seen[cell] = obs - - n_kept = len(seen) - pct = 100 * n_kept / total if total else 0 - print(f"pgo rebuild: kept [{n_kept}/{total}] frames ({pct:.1f}%) at tol={pgo_tol}m") - pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)") grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) try: @@ -167,6 +184,7 @@ def collect_path(obs: Observation[Any]) -> None: full_pgo_map = None if full_pgo: + assert interp is not None full_pb = progress(total, "full pgo (rebuilding)") full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) try: @@ -179,43 +197,83 @@ def collect_path(obs: Observation[Any]) -> None: finally: full_grid.dispose() - global_map = ( - lidar.tap(collect_path) - .transform(VoxelMapTransformer(voxel_size=voxel, device=device)) - .tap(progress(lidar.count(), "reconstructing global map")) - .last() - .data - ) + # Raw map: same dedup'd frames, no PGO correction. + raw_pb = progress(n_kept, "reconstructing global map") + raw_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) + try: + for obs in seen.values(): + raw_pb(obs) + if len(obs.data) == 0: + continue + raw_grid.add_frame(obs.data) + global_map = raw_grid.get_global_pointcloud2() + finally: + raw_grid.dispose() + + marker_dets: list[Observation[Any]] = [] + if markers: + color_image = store.stream("color_image", Image) + + def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: + # obs.pose is base_link-in-world; DetectMarkers wants optical-in-world. + if obs.pose is None: + return obs.derive(data=obs.data) + x, y, z, qx, qy, qz, qw = obs.pose + t_world_base = Transform( + translation=Vector3(x, y, z), + rotation=Quaternion(qx, qy, qz, qw), + frame_id="world", + child_frame_id="base_link", + ts=obs.ts, + ) + return obs.derive(data=obs.data, pose=t_world_base + BASE_TO_OPTICAL) + + xf = DetectMarkers( + camera_info=_camera_info_static(), + marker_length_m=marker_size, + ) + # 2Hz quality-gated: keep only the sharpest frame per 0.5s window. + marker_dets = ( + color_image.tap(progress(color_image.count(), "filtering frames")) + .transform(QualityWindow(lambda img: img.sharpness, window=0.5)) + .map(_lift_pose_to_optical) + .transform(xf) + .to_list() + ) + unique = sorted({obs.data.marker_id for obs in marker_dets}) + print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}") if not no_gui: rerun_init("dimos map tool", spawn=True) rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - rr.log("world/raw_map/pointcloud", global_map.to_rerun(size=voxel), static=True) + rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True) if path: rr.log( "world/raw_map/path", - rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[0.05]), + rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[PATH_THICKNESS]), static=True, ) if pgo_map is not None: - rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(size=voxel), static=True) + rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(voxel_size=voxel / 2), static=True) if full_pgo_map is not None: rr.log( "world/full_pgo_map/pointcloud", - full_pgo_map.to_rerun(size=voxel), + full_pgo_map.to_rerun(voxel_size=voxel / 2), static=True, ) STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( "world/pgo_map/path", - rr.LineStrips3D(strips=[pgo_path], colors=[[255, 255, 255]], radii=[0.05]), + rr.LineStrips3D( + strips=[pgo_path], colors=[[255, 255, 255]], radii=[PATH_THICKNESS] + ), static=True, ) hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( "world/pgo_map/keyframes", - rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.05]), + rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]), static=True, ) if pgo and loops: @@ -239,6 +297,54 @@ def collect_path(obs: Observation[Any]) -> None: rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]), static=True, ) + if marker_dets: + half = marker_size / 2.0 + n = len(marker_dets) + fill_half = [(half, half, 0.005)] * n + # Outline sits just outside the fill so both stay visible. + outline_bump = marker_size * 0.05 + outline_half = [(half + outline_bump, half + outline_bump, 0.006)] * n + centers = [(d.data.center.x, d.data.center.y, d.data.center.z) for d in marker_dets] + quaternions = [ + ( + d.data.orientation.x, + d.data.orientation.y, + d.data.orientation.z, + d.data.orientation.w, + ) + for d in marker_dets + ] + unique_ids = sorted({d.data.marker_id for d in marker_dets}) + id_to_color = { + mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() + for i, mid in enumerate(unique_ids) + } + colors = [id_to_color[d.data.marker_id] for d in marker_dets] + labels = [f"id={d.data.marker_id}" for d in marker_dets] + rr.log( + "world/markers/fill", + rr.Boxes3D( + centers=centers, + half_sizes=fill_half, + quaternions=quaternions, + colors=colors, + fill_mode=rr.components.FillMode.Solid, + labels=labels, + ), + static=True, + ) + rr.log( + "world/markers/outline", + rr.Boxes3D( + centers=centers, + half_sizes=outline_half, + quaternions=quaternions, + colors=[(255, 255, 255)] * n, + fill_mode=rr.components.FillMode.MajorWireframe, + radii=0.002, + ), + static=True, + ) if export and pgo_map is not None: from pathlib import Path @@ -255,3 +361,6 @@ def collect_path(obs: Observation[Any]) -> None: if __name__ == "__main__": typer.run(main) + typer.run(main) + typer.run(main) + typer.run(main) From 82e2dab13fad12eee902c86575fd61734b19b018 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 15:12:13 +0800 Subject: [PATCH 16/36] map detects markers correctly --- dimos/utils/cli/map.py | 93 ++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index bea304a538..6fb129a4bd 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -25,16 +25,14 @@ from dimos.memory2.transform import QualityWindow from dimos.memory2.type.observation import Observation from dimos.memory2.vis.color import Color -from dimos.msgs.geometry_msgs.Quaternion import Quaternion from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.sensor_msgs.Image import Image from dimos.perception.fiducial.marker_transformer import DetectMarkers -from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static +from dimos.robot.unitree.go2.connection import _camera_info_static from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init -PATH_THICKNESS = 0.01 +PATH_THICKNESS = 0.005 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: @@ -212,31 +210,19 @@ def main( marker_dets: list[Observation[Any]] = [] if markers: + # Image observations in dimos recordings are stamped with + # frame_id="camera_optical", so obs.pose is already optical-in-world + # (verified: matches lidar_base_pose + BASE_TO_OPTICAL to ~1mm). + # No mount composition needed. color_image = store.stream("color_image", Image) - - def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: - # obs.pose is base_link-in-world; DetectMarkers wants optical-in-world. - if obs.pose is None: - return obs.derive(data=obs.data) - x, y, z, qx, qy, qz, qw = obs.pose - t_world_base = Transform( - translation=Vector3(x, y, z), - rotation=Quaternion(qx, qy, qz, qw), - frame_id="world", - child_frame_id="base_link", - ts=obs.ts, - ) - return obs.derive(data=obs.data, pose=t_world_base + BASE_TO_OPTICAL) - xf = DetectMarkers( camera_info=_camera_info_static(), marker_length_m=marker_size, ) # 2Hz quality-gated: keep only the sharpest frame per 0.5s window. marker_dets = ( - color_image.tap(progress(color_image.count(), "filtering frames")) + color_image.tap(progress(color_image.count(), "detecting markers")) .transform(QualityWindow(lambda img: img.sharpness, window=0.5)) - .map(_lift_pose_to_optical) .transform(xf) .to_list() ) @@ -272,7 +258,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: ) hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( - "world/pgo_map/keyframes", + "world/pgo_map/pgo/keyframes", rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]), static=True, ) @@ -293,8 +279,8 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: for lc in loops ] rr.log( - "world/pgo_map/loop_closures", - rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.05]), + "world/pgo_map/pgo/loop_closures", + rr.LineStrips3D(strips=loop_strips, colors=[[231, 76, 60]], radii=[0.025]), static=True, ) if marker_dets: @@ -322,7 +308,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: colors = [id_to_color[d.data.marker_id] for d in marker_dets] labels = [f"id={d.data.marker_id}" for d in marker_dets] rr.log( - "world/markers/fill", + "world/raw_map/markers/fill", rr.Boxes3D( centers=centers, half_sizes=fill_half, @@ -334,7 +320,7 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: static=True, ) rr.log( - "world/markers/outline", + "world/raw_map/markers/outline", rr.Boxes3D( centers=centers, half_sizes=outline_half, @@ -346,6 +332,61 @@ def _lift_pose_to_optical(obs: Observation[Image]) -> Observation[Image]: static=True, ) + if interp is not None: + # PGO-corrected marker poses. interp(ts) maps raw_world → + # pgo_world; composing with each raw marker transform lifts + # it into the corrected frame so it lines up with pgo_map. + pgo_centers: list[tuple[float, float, float]] = [] + pgo_quaternions: list[tuple[float, float, float, float]] = [] + for d in marker_dets: + raw_tf = Transform( + translation=d.data.center, + rotation=d.data.orientation, + frame_id="world", + child_frame_id=f"marker_{d.data.marker_id}", + ts=d.ts, + ) + corrected = interp(d.ts) + raw_tf + pgo_centers.append( + ( + corrected.translation.x, + corrected.translation.y, + corrected.translation.z, + ) + ) + pgo_quaternions.append( + ( + corrected.rotation.x, + corrected.rotation.y, + corrected.rotation.z, + corrected.rotation.w, + ) + ) + rr.log( + "world/pgo_map/markers/fill", + rr.Boxes3D( + centers=pgo_centers, + half_sizes=fill_half, + quaternions=pgo_quaternions, + colors=colors, + fill_mode=rr.components.FillMode.Solid, + labels=labels, + ), + static=True, + ) + rr.log( + "world/pgo_map/markers/outline", + rr.Boxes3D( + centers=pgo_centers, + half_sizes=outline_half, + quaternions=pgo_quaternions, + colors=[(255, 255, 255)] * n, + fill_mode=rr.components.FillMode.MajorWireframe, + radii=0.002, + ), + static=True, + ) + if export and pgo_map is not None: from pathlib import Path From b98b506c31fe35eee1c50759c8205f26beb538ab Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 15:29:36 +0800 Subject: [PATCH 17/36] maptool speed limits --- dimos/memory2/transform.py | 61 ++++++++++++++++++++++++++++++++++++++ dimos/robot/cli/dimos.py | 12 ++++++++ dimos/utils/cli/map.py | 36 ++++++++++++++++------ 3 files changed, 100 insertions(+), 9 deletions(-) diff --git a/dimos/memory2/transform.py b/dimos/memory2/transform.py index 6894807627..a79ccf36b8 100644 --- a/dimos/memory2/transform.py +++ b/dimos/memory2/transform.py @@ -191,6 +191,67 @@ def _speed(upstream: Iterator[Observation[Any]]) -> Iterator[Observation[float]] return FnIterTransformer(_speed) +class SpeedLimit(Transformer[T, T]): + """Pass through observations whose linear (and optionally angular) speed is low. + + Linear: ``||Δtranslation|| / Δt`` in m/s, must be ≤ ``max_mps``. + Angular: rotation-angle-between-quaternions / Δt in deg/s, must be + ≤ ``max_dps`` when set. + + Both metrics are computed between consecutive observations in stream + order. Use to drop motion-blurry frames from fiducial / OCR / + SLAM-frontend pipelines. + + First observation is dropped (no reference). Observations with no + ``.pose`` are skipped. ``Δt ≤ 0`` (duplicate / non-monotonic + timestamps) is treated as unknown and dropped. + + Note: the effective sampling interval is whatever upstream provides. + Chain after ``QualityWindow(window=Δt)`` to measure motion over a + fixed time window instead of per-frame. + """ + + def __init__(self, max_mps: float, max_dps: float | None = None) -> None: + if max_mps <= 0: + raise ValueError(f"max_mps must be > 0, got {max_mps}") + if max_dps is not None and max_dps <= 0: + raise ValueError(f"max_dps must be > 0 when set, got {max_dps}") + self.max_mps = max_mps + self.max_dps = max_dps + + def __call__(self, upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: + prev: Observation[T] | None = None + max_mps = self.max_mps + max_dps = self.max_dps + max_rps = math.radians(max_dps) if max_dps is not None else None + for obs in upstream: + if obs.pose is None: + continue + if prev is not None and prev.pose is not None: + dt = obs.ts - prev.ts + if dt > 0: + dx = obs.pose[0] - prev.pose[0] + dy = obs.pose[1] - prev.pose[1] + dz = obs.pose[2] - prev.pose[2] + v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt + ok = v <= max_mps + if ok and max_rps is not None: + # rotation angle between two unit quaternions. + # |q · q'| collapses the double-cover sign ambiguity; + # clamp guards against numerical drift past 1. + dot = ( + obs.pose[3] * prev.pose[3] + + obs.pose[4] * prev.pose[4] + + obs.pose[5] * prev.pose[5] + + obs.pose[6] * prev.pose[6] + ) + angle = 2.0 * math.acos(min(1.0, abs(dot))) + ok = (angle / dt) <= max_rps + if ok: + yield obs + prev = obs + + def smooth(window: int) -> FnIterTransformer[float, float]: """Sliding window average over obs.data (must be numeric).""" diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 23147e9380..d26d28d771 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -722,6 +722,16 @@ def map_cmd( marker_size: float = typer.Option( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), + marker_max_speed: float = typer.Option( + 0.05, + "--marker-max-speed", + help="Skip frames where robot is moving faster than this (m/s); 0 disables", + ), + marker_max_rot_rate: float = typer.Option( + 15.0, + "--marker-max-rot-rate", + help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", + ), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main @@ -738,6 +748,8 @@ def map_cmd( no_gui=no_gui, markers=markers, marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 6fb129a4bd..a6cafccd09 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -22,7 +22,8 @@ from dimos.mapping.voxels import VoxelGrid from dimos.memory2.store.sqlite import SqliteStore -from dimos.memory2.transform import QualityWindow +from dimos.memory2.stream import Stream +from dimos.memory2.transform import QualityWindow, SpeedLimit from dimos.memory2.type.observation import Observation from dimos.memory2.vis.color import Color from dimos.msgs.geometry_msgs.Transform import Transform @@ -32,7 +33,7 @@ from dimos.utils.data import resolve_named_path from dimos.visualization.rerun.init import rerun_init -PATH_THICKNESS = 0.005 +PATH_THICKNESS = 0.01 def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: @@ -102,6 +103,16 @@ def main( marker_size: float = typer.Option( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), + marker_max_speed: float = typer.Option( + 0.3, + "--marker-max-speed", + help="Skip frames where robot is moving faster than this (m/s); 0 disables", + ), + marker_max_rot_rate: float = typer.Option( + 30.0, + "--marker-max-rot-rate", + help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", + ), ) -> None: db_path = resolve_named_path(dataset, ".db") if export or full_pgo: @@ -219,13 +230,20 @@ def main( camera_info=_camera_info_static(), marker_length_m=marker_size, ) - # 2Hz quality-gated: keep only the sharpest frame per 0.5s window. - marker_dets = ( - color_image.tap(progress(color_image.count(), "detecting markers")) - .transform(QualityWindow(lambda img: img.sharpness, window=0.5)) - .transform(xf) - .to_list() - ) + # 2Hz quality-gated: keep only the sharpest frame per 0.5s window, + # then drop frames where the robot was moving (linear + rotational) + # faster than the limits — speed/rate are averaged across the window. + pipeline: Stream[Image] = color_image.tap( + progress(color_image.count(), "detecting markers") + ).transform(QualityWindow(lambda img: img.sharpness, window=0.5)) + if marker_max_speed > 0: + pipeline = pipeline.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + marker_dets = pipeline.transform(xf).to_list() unique = sorted({obs.data.marker_id for obs in marker_dets}) print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}") From 4d5e9b66cbdea13b4df7690e52d48e0791bdd50b Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 16:11:19 +0800 Subject: [PATCH 18/36] markers_rrd renderer --- dimos/utils/cli/map.py | 27 +++-- dimos/utils/cli/markers_rrd.py | 187 +++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 8 deletions(-) create mode 100644 dimos/utils/cli/markers_rrd.py diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index a6cafccd09..554e1878d2 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -104,12 +104,12 @@ def main( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), marker_max_speed: float = typer.Option( - 0.3, + 0.1, "--marker-max-speed", help="Skip frames where robot is moving faster than this (m/s); 0 disables", ), marker_max_rot_rate: float = typer.Option( - 30.0, + 15, "--marker-max-rot-rate", help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", ), @@ -318,12 +318,23 @@ def main( ) for d in marker_dets ] - unique_ids = sorted({d.data.marker_id for d in marker_dets}) - id_to_color = { - mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() - for i, mid in enumerate(unique_ids) - } - colors = [id_to_color[d.data.marker_id] for d in marker_dets] + # Color mode: turbo over detection time vs. tab10 over marker id. + COLOR_BY_TIME = True + if COLOR_BY_TIME: + ts_min = min(d.ts for d in marker_dets) + ts_max = max(d.ts for d in marker_dets) + ts_span = ts_max - ts_min if ts_max > ts_min else 1.0 + colors = [ + Color.from_cmap("turbo", (d.ts - ts_min) / ts_span).rgb_u8() + for d in marker_dets + ] + else: + unique_ids = sorted({d.data.marker_id for d in marker_dets}) + id_to_color = { + mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() + for i, mid in enumerate(unique_ids) + } + colors = [id_to_color[d.data.marker_id] for d in marker_dets] labels = [f"id={d.data.marker_id}" for d in marker_dets] rr.log( "world/raw_map/markers/fill", diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py new file mode 100644 index 0000000000..3b67f9c2ea --- /dev/null +++ b/dimos/utils/cli/markers_rrd.py @@ -0,0 +1,187 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Temporary: dump apriltag detector replay (.rrd) for reliability investigation. + +Walks a recorded SQLite dataset and writes an rrd containing: +- per-image: camera pose + pinhole + image data +- per-lidar: base_link pose (a moving axis) +- per detection: marker box in world frame, at the detection timestamp + +Usage: + uv run python -m dimos.utils.cli.markers_rrd hk_village1 --out hk.rrd + rerun hk.rrd + +Throwaway script next to ``map.py``; remove once the apriltag reliability work +lands. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import rerun as rr +import typer + +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream +from dimos.memory2.transform import QualityWindow, SpeedLimit +from dimos.memory2.vis.color import Color +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.perception.fiducial.marker_transformer import DetectMarkers +from dimos.robot.unitree.go2.connection import _camera_info_static +from dimos.utils.data import resolve_named_path + +TIMELINE = "ts" + + +def main( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + out: Path = typer.Option(..., "--out", help="Output .rrd path"), + marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"), + marker_max_speed: float = typer.Option( + 0.15, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables" + ), + marker_max_rot_rate: float = typer.Option( + 30.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables" + ), + quality_window: float = typer.Option( + 0.25, "--quality-window", help="Sharpest-frame window for detection (s)" + ), +) -> None: + db_path = resolve_named_path(dataset, ".db") + cam_info = _camera_info_static() + + rr.init("dimos markers", recording_id=db_path.stem) + rr.save(str(out)) + + # Static pinhole on the camera entity; per-frame Transform3D goes on the + # same entity. Image is the child so it projects through the pinhole. + pinhole = cam_info.to_rerun() + assert not isinstance(pinhole, list) + rr.log("world/camera", pinhole, static=True) + + store = SqliteStore(path=str(db_path)) + with store: + color_image = store.stream("color_image", Image) + lidar = store.stream("lidar", PointCloud2) + + # ---- pass 1: robot base pose over time (from lidar.pose) ---- + for lidar_obs in lidar: + if lidar_obs.pose is None: + continue + rr.set_time(TIMELINE, timestamp=lidar_obs.ts) + x, y, z, qx, qy, qz, qw = lidar_obs.pose + rr.log( + "world/robot", + rr.Transform3D( + translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]) + ), + ) + + # ---- pass 2: camera pose + image per color_image frame ---- + n_img = color_image.count() + for i, img_obs in enumerate(color_image): + rr.set_time(TIMELINE, timestamp=img_obs.ts) + if img_obs.pose is not None: + x, y, z, qx, qy, qz, qw = img_obs.pose + rr.log( + "world/camera", + rr.Transform3D( + translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]) + ), + ) + rr.log("world/camera/image", img_obs.data.to_rerun()) + # Clear any prior detection box at this ts; pass 3 will overwrite + # this with the actual detection iff one fires for this frame. + rr.log( + "world/camera/image/detections", + rr.Boxes2D(array=[], array_format=rr.Box2DFormat.XYWH), + ) + if (i + 1) % 50 == 0 or i + 1 == n_img: + print(f"images: {i + 1}/{n_img}") + + # ---- pass 3: marker detections (filtered same way as `dimos map`) ---- + xf = DetectMarkers(camera_info=cam_info, marker_length_m=marker_size) + pipeline: Stream[Any] = color_image.transform( + QualityWindow(lambda img: img.sharpness, window=quality_window) + ) + if marker_max_speed > 0: + pipeline = pipeline.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + # Each 3D detection gets its own entity path so it persists from its + # ts onward — earlier detections stay visible as new ones appear. + # Color by ts (turbo) across the recording's full image-time range. + ts_min = color_image.first().ts + ts_max = color_image.last().ts + ts_span = ts_max - ts_min if ts_max > ts_min else 1.0 + n_det = 0 + for det_obs in pipeline.transform(xf): + d = det_obs.data + rr.set_time(TIMELINE, timestamp=det_obs.ts) + color = Color.from_cmap("turbo", (det_obs.ts - ts_min) / ts_span).rgb_u8() + rr.log( + f"world/markers/det_{n_det:05d}", + rr.Boxes3D( + centers=[(d.center.x, d.center.y, d.center.z)], + half_sizes=[(marker_size / 2, marker_size / 2, 0.005)], + quaternions=[ + rr.Quaternion( + xyzw=[ + d.orientation.x, + d.orientation.y, + d.orientation.z, + d.orientation.w, + ] + ) + ], + colors=[color], + fill_mode=rr.components.FillMode.Solid, + labels=[f"id={d.marker_id}"], + ), + ) + n_det += 1 + xs = d.corners_px[:, 0] + ys = d.corners_px[:, 1] + x1, y1 = float(xs.min()), float(ys.min()) + x2, y2 = float(xs.max()), float(ys.max()) + rr.log( + "world/camera/image/detections", + rr.Boxes2D( + array=[[x1, y1, x2 - x1, y2 - y1]], + array_format=rr.Box2DFormat.XYWH, + labels=[f"id={d.marker_id}"], + ), + ) + print(f"detections: {n_det}") + + print(f"wrote {out}") + print(f"open with: rerun {out}") + + +if __name__ == "__main__": + typer.run(main) + + print(f"wrote {out}") + print(f"open with: rerun {out}") + + +if __name__ == "__main__": + typer.run(main) From b3163bc117dbf932d6d2085c7c51c31f93748215 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 16:53:58 +0800 Subject: [PATCH 19/36] average window for markers --- .../perception/fiducial/marker_transformer.py | 89 ++++++++++++++++++- dimos/utils/cli/markers_rrd.py | 64 +++++++++++-- 2 files changed, 142 insertions(+), 11 deletions(-) diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index a3839ed9bb..e6c3d3c02f 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -28,6 +28,8 @@ from __future__ import annotations +import dataclasses +import math from typing import TYPE_CHECKING import numpy as np @@ -45,6 +47,7 @@ estimate_marker_pose, rvec_tvec_to_transform, ) +from dimos.types.timestamped import TimestampedBufferCollection from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: @@ -72,6 +75,39 @@ def _pose_tuple_to_transform( ) +def _average_marker_pose( + buffer: TimestampedBufferCollection[Detection3DMarker], +) -> tuple[Vector3, Quaternion]: + """Mean translation; quaternion mean with hemisphere alignment. + + Quaternions q and -q encode the same rotation, so naive averaging can + cancel. We pick the first sample as a hemisphere reference and flip the + sign of any sample whose dot product against it is negative before + summing. For closely-spaced rotations within a short window this is + indistinguishable from a proper SLERP-style average. + """ + items = list(buffer) + n = len(items) + cx = sum(d.center.x for d in items) / n + cy = sum(d.center.y for d in items) / n + cz = sum(d.center.z for d in items) / n + + ref = items[0].orientation + qsx = qsy = qsz = qsw = 0.0 + for d in items: + q = d.orientation + s = -1.0 if (q.x * ref.x + q.y * ref.y + q.z * ref.z + q.w * ref.w) < 0 else 1.0 + qsx += s * q.x + qsy += s * q.y + qsz += s * q.z + qsw += s * q.w + norm = math.sqrt(qsx * qsx + qsy * qsy + qsz * qsz + qsw * qsw) + return ( + Vector3(cx, cy, cz), + Quaternion(qsx / norm, qsy / norm, qsz / norm, qsw / norm), + ) + + class DetectMarkers(Transformer[Image, Detection3DMarker]): """Detect fiducial markers and emit one world-pose observation per marker.""" @@ -81,15 +117,27 @@ def __init__( marker_length_m: float, aruco_dictionary: str = "DICT_APRILTAG_36h11", world_frame: str = "world", + smoothing_window: float = 0.0, ) -> None: if marker_length_m <= 0: raise ValueError(f"marker_length_m must be > 0, got {marker_length_m}") + if smoothing_window < 0: + raise ValueError(f"smoothing_window must be >= 0, got {smoothing_window}") self.camera_info = camera_info self.marker_length_m = marker_length_m self.aruco_dictionary = aruco_dictionary self.world_frame = world_frame + self.smoothing_window = smoothing_window self._detector = create_aruco_detector(aruco_dictionary) self._cam_mtx, self._dist = camera_info_to_cv_matrices(camera_info) + # Per marker_id sliding-window buffer of raw detections, used to emit + # smoothed pose updates when ``smoothing_window > 0``. + self._buffers: dict[int, TimestampedBufferCollection[Detection3DMarker]] = {} + # Tracking (smoothing only): if a marker_id reappears after a gap + # larger than the buffer window, treat it as a new track. track_id + # increments monotonically across the whole stream so it's unique. + self._marker_to_track: dict[int, int] = {} + self._next_track_id = 0 def __call__( self, upstream: Iterator[Observation[Image]] @@ -155,9 +203,21 @@ def __call__( xy_max = corners_2d.max(axis=0) bbox = (float(xy_min[0]), float(xy_min[1]), float(xy_max[0]), float(xy_max[1])) + # Decide track_id (only meaningful when smoothing is on). + # Without smoothing, track_id == marker_id (legacy behavior). + if self.smoothing_window > 0: + prior_buf = self._buffers.get(mid) + prior_last = prior_buf.last() if prior_buf is not None else None + if prior_last is None or (obs.ts - prior_last.ts) > self.smoothing_window: + self._next_track_id += 1 + self._marker_to_track[mid] = self._next_track_id + track_id = self._marker_to_track[mid] + else: + track_id = mid + det = Detection3DMarker( bbox=bbox, - track_id=mid, + track_id=track_id, class_id=mid, confidence=1.0, name=f"marker_{mid}", @@ -173,4 +233,29 @@ def __call__( dictionary=self.aruco_dictionary, ) - yield obs.derive(data=det, pose=t_world_marker).tag(marker_id=mid) + yielded_det = det + yielded_pose = t_world_marker + if self.smoothing_window > 0: + # Buffer raw detections per marker_id over a sliding + # window; emit the windowed-mean pose so each successive + # detection refines the same marker's estimate instead + # of producing a fresh independent observation. + buf = self._buffers.setdefault( + mid, TimestampedBufferCollection(self.smoothing_window) + ) + buf.add(det) + avg_center, avg_orient = _average_marker_pose(buf) + yielded_det = dataclasses.replace( + det, center=avg_center, orientation=avg_orient + ) + yielded_pose = Transform( + translation=avg_center, + rotation=avg_orient, + frame_id=self.world_frame, + child_frame_id=f"marker_{mid}", + ts=obs.ts, + ) + + yield obs.derive(data=yielded_det, pose=yielded_pose).tag( + marker_id=mid, track_id=track_id + ) diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py index 3b67f9c2ea..b1e072d1e1 100644 --- a/dimos/utils/cli/markers_rrd.py +++ b/dimos/utils/cli/markers_rrd.py @@ -53,13 +53,16 @@ def main( out: Path = typer.Option(..., "--out", help="Output .rrd path"), marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"), marker_max_speed: float = typer.Option( - 0.15, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables" + 0.5, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables" ), marker_max_rot_rate: float = typer.Option( - 30.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables" + 50.0, "--marker-max-rot-rate", help="Detection rot gate (deg/s); 0 disables" ), quality_window: float = typer.Option( - 0.25, "--quality-window", help="Sharpest-frame window for detection (s)" + 0.1, "--quality-window", help="Sharpest-frame window for detection (s)" + ), + smoothing_window: float = typer.Option( + 5.0, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables" ), ) -> None: db_path = resolve_named_path(dataset, ".db") @@ -172,12 +175,55 @@ def main( ) print(f"detections: {n_det}") - print(f"wrote {out}") - print(f"open with: rerun {out}") - - -if __name__ == "__main__": - typer.run(main) + # ---- pass 4: averaged tracks (smoothing_window > 0 → per-track ids) ---- + # Re-runs the same filtered pipeline through a smoothing detector; + # each track yields one entity that updates as the windowed average + # refines. Color stable per track_id for visual identity. + if smoothing_window > 0: + xf_tracked = DetectMarkers( + camera_info=cam_info, + marker_length_m=marker_size, + smoothing_window=smoothing_window, + ) + pipeline_tracked: Stream[Any] = color_image.transform( + QualityWindow(lambda img: img.sharpness, window=quality_window) + ) + if marker_max_speed > 0: + pipeline_tracked = pipeline_tracked.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + seen_tracks: set[int] = set() + n_updates = 0 + for det_obs in pipeline_tracked.transform(xf_tracked): + d = det_obs.data + rr.set_time(TIMELINE, timestamp=det_obs.ts) + color = Color.from_cmap("tab10", (d.track_id % 10) / 10.0).rgb_u8() + rr.log( + f"world/tracks/track_{d.track_id:04d}", + rr.Boxes3D( + centers=[(d.center.x, d.center.y, d.center.z)], + half_sizes=[(marker_size / 2, marker_size / 2, 0.005)], + quaternions=[ + rr.Quaternion( + xyzw=[ + d.orientation.x, + d.orientation.y, + d.orientation.z, + d.orientation.w, + ] + ) + ], + colors=[color], + fill_mode=rr.components.FillMode.Solid, + labels=[f"track={d.track_id} id={d.marker_id}"], + ), + ) + seen_tracks.add(d.track_id) + n_updates += 1 + print(f"tracks: {len(seen_tracks)} unique, {n_updates} updates") print(f"wrote {out}") print(f"open with: rerun {out}") From e529913c9de461f0ced54d38e33edad8bcd37d59 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 17:00:06 +0800 Subject: [PATCH 20/36] sliding window 7.5s --- dimos/utils/cli/markers_rrd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/utils/cli/markers_rrd.py index b1e072d1e1..eb5306edf7 100644 --- a/dimos/utils/cli/markers_rrd.py +++ b/dimos/utils/cli/markers_rrd.py @@ -62,7 +62,7 @@ def main( 0.1, "--quality-window", help="Sharpest-frame window for detection (s)" ), smoothing_window: float = typer.Option( - 5.0, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables" + 7.5, "--smoothing-window", help="Buffer window for averaged-track pass (s); 0 disables" ), ) -> None: db_path = resolve_named_path(dataset, ".db") From 5751d48deb2cbd123d9965536b5d3273da354dae Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 17:28:36 +0800 Subject: [PATCH 21/36] detector tuned --- dimos/robot/cli/dimos.py | 16 ++++++++-- dimos/utils/cli/map.py | 65 +++++++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/dimos/robot/cli/dimos.py b/dimos/robot/cli/dimos.py index 041891a135..5c67c3e0e1 100644 --- a/dimos/robot/cli/dimos.py +++ b/dimos/robot/cli/dimos.py @@ -710,15 +710,25 @@ def map_cmd( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), marker_max_speed: float = typer.Option( - 0.05, + 0.5, "--marker-max-speed", help="Skip frames where robot is moving faster than this (m/s); 0 disables", ), marker_max_rot_rate: float = typer.Option( - 15.0, + 50.0, "--marker-max-rot-rate", help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", ), + marker_quality_window: float = typer.Option( + 0.1, + "--marker-quality-window", + help="Sharpest-frame window for marker detection (s)", + ), + marker_smoothing: float = typer.Option( + 7.5, + "--marker-smoothing", + help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", + ), ) -> None: """Rebuild a voxel map from a recorded SQLite dataset and view it in rerun.""" from dimos.utils.cli.map import main as map_main @@ -737,6 +747,8 @@ def map_cmd( marker_size=marker_size, marker_max_speed=marker_max_speed, marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, ) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 554e1878d2..3c59c9088b 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -104,15 +104,25 @@ def main( 0.1, "--marker-size", help="Physical marker edge length in meters (--markers only)" ), marker_max_speed: float = typer.Option( - 0.1, + 0.5, "--marker-max-speed", help="Skip frames where robot is moving faster than this (m/s); 0 disables", ), marker_max_rot_rate: float = typer.Option( - 15, + 50.0, "--marker-max-rot-rate", help="Skip frames where robot is rotating faster than this (deg/s); 0 disables", ), + marker_quality_window: float = typer.Option( + 0.1, + "--marker-quality-window", + help="Sharpest-frame window for marker detection (s)", + ), + marker_smoothing: float = typer.Option( + 7.5, + "--marker-smoothing", + help="Sliding-window track buffer for marker pose averaging (s); 0 disables (one box per raw detection)", + ), ) -> None: db_path = resolve_named_path(dataset, ".db") if export or full_pgo: @@ -229,13 +239,14 @@ def main( xf = DetectMarkers( camera_info=_camera_info_static(), marker_length_m=marker_size, + smoothing_window=marker_smoothing, ) - # 2Hz quality-gated: keep only the sharpest frame per 0.5s window, - # then drop frames where the robot was moving (linear + rotational) - # faster than the limits — speed/rate are averaged across the window. + # Keep the sharpest frame per --marker-quality-window window, then + # drop frames where the robot was moving (linear + rotational) faster + # than the limits. Defaults match markers_rrd.py so positions agree. pipeline: Stream[Image] = color_image.tap( progress(color_image.count(), "detecting markers") - ).transform(QualityWindow(lambda img: img.sharpness, window=0.5)) + ).transform(QualityWindow(lambda img: img.sharpness, window=marker_quality_window)) if marker_max_speed > 0: pipeline = pipeline.transform( SpeedLimit( @@ -243,9 +254,21 @@ def main( max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, ) ) - marker_dets = pipeline.transform(xf).to_list() - unique = sorted({obs.data.marker_id for obs in marker_dets}) - print(f"markers: {len(marker_dets)} detections across {len(unique)} unique ids {unique}") + all_dets = pipeline.transform(xf).to_list() + if marker_smoothing > 0: + # Keep only the latest emission per track_id — that's the most + # averaged pose, drawn once per tracked marker session. + by_track: dict[int, Observation[Any]] = {} + for d in all_dets: + by_track[d.data.track_id] = d + marker_dets = list(by_track.values()) + else: + marker_dets = all_dets + unique_ids = sorted({obs.data.marker_id for obs in marker_dets}) + print( + f"markers: {len(marker_dets)} entries from {len(all_dets)} raw detections " + f"across {len(unique_ids)} unique ids {unique_ids}" + ) if not no_gui: rerun_init("dimos map tool", spawn=True) @@ -318,24 +341,12 @@ def main( ) for d in marker_dets ] - # Color mode: turbo over detection time vs. tab10 over marker id. - COLOR_BY_TIME = True - if COLOR_BY_TIME: - ts_min = min(d.ts for d in marker_dets) - ts_max = max(d.ts for d in marker_dets) - ts_span = ts_max - ts_min if ts_max > ts_min else 1.0 - colors = [ - Color.from_cmap("turbo", (d.ts - ts_min) / ts_span).rgb_u8() - for d in marker_dets - ] - else: - unique_ids = sorted({d.data.marker_id for d in marker_dets}) - id_to_color = { - mid: Color.from_cmap("tab10", (i % 10) / 10.0).rgb_u8() - for i, mid in enumerate(unique_ids) - } - colors = [id_to_color[d.data.marker_id] for d in marker_dets] - labels = [f"id={d.data.marker_id}" for d in marker_dets] + # One entry per tracked marker session — color stable per track_id. + colors = [ + Color.from_cmap("tab10", (d.data.track_id % 10) / 10.0).rgb_u8() + for d in marker_dets + ] + labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] rr.log( "world/raw_map/markers/fill", rr.Boxes3D( From dcd7f7b9b746b2b40a1f3264b8dbe0618d2870c1 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:22:34 +0800 Subject: [PATCH 22/36] nicer map --- dimos/utils/cli/map.py | 45 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 3c59c9088b..e6fd428428 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -288,7 +288,7 @@ def main( full_pgo_map.to_rerun(voxel_size=voxel / 2), static=True, ) - STEM_HEIGHT = 2.0 # lift pose-graph viz above the map for legibility + STEM_HEIGHT = 0 # lift pose-graph viz above the map for legibility if pgo_path: rr.log( "world/pgo_map/path", @@ -300,7 +300,7 @@ def main( hovered = [(x, y, z + STEM_HEIGHT) for (x, y, z) in pgo_path] rr.log( "world/pgo_map/pgo/keyframes", - rr.Points3D(positions=hovered, colors=[[255, 255, 255]], radii=[0.025]), + rr.Points3D(positions=hovered, colors=[[255, 0, 0]], radii=[0.025]), static=True, ) if pgo and loops: @@ -347,6 +347,12 @@ def main( for d in marker_dets ] labels = [f"track={d.data.track_id} id={d.data.marker_id}" for d in marker_dets] + # Pin pattern (from dimos/memory2/vis/space/rerun.py): thin + # vertical line from each marker with the label floating at the + # top so multi-marker labels never overlap the boxes. + MARKER_STEM = 1.0 + pin_strips = [[(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in centers] + label_positions = [(cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in centers] rr.log( "world/raw_map/markers/fill", rr.Boxes3D( @@ -355,7 +361,6 @@ def main( quaternions=quaternions, colors=colors, fill_mode=rr.components.FillMode.Solid, - labels=labels, ), static=True, ) @@ -371,6 +376,18 @@ def main( ), static=True, ) + rr.log( + "world/raw_map/markers/pin", + rr.LineStrips3D(strips=pin_strips, colors=colors, radii=[0.005]), + static=True, + ) + rr.log( + "world/raw_map/markers/label", + rr.Points3D( + positions=label_positions, labels=labels, colors=colors, radii=[0.001] * n + ), + static=True, + ) if interp is not None: # PGO-corrected marker poses. interp(ts) maps raw_world → @@ -402,6 +419,12 @@ def main( corrected.rotation.w, ) ) + pgo_pin_strips = [ + [(cx, cy, cz), (cx, cy, cz + MARKER_STEM)] for (cx, cy, cz) in pgo_centers + ] + pgo_label_positions = [ + (cx, cy, cz + MARKER_STEM + 0.01) for (cx, cy, cz) in pgo_centers + ] rr.log( "world/pgo_map/markers/fill", rr.Boxes3D( @@ -410,7 +433,6 @@ def main( quaternions=pgo_quaternions, colors=colors, fill_mode=rr.components.FillMode.Solid, - labels=labels, ), static=True, ) @@ -426,6 +448,21 @@ def main( ), static=True, ) + rr.log( + "world/pgo_map/markers/pin", + rr.LineStrips3D(strips=pgo_pin_strips, colors=colors, radii=[0.005]), + static=True, + ) + rr.log( + "world/pgo_map/markers/label", + rr.Points3D( + positions=pgo_label_positions, + labels=labels, + colors=colors, + radii=[0.001] * n, + ), + static=True, + ) if export and pgo_map is not None: from pathlib import Path From a6d4dcc98f5671bd3e685546f8c994c93e71e411 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:24:32 +0800 Subject: [PATCH 23/36] marker eval first pass --- dimos/utils/cli/marker_eval.py | 225 +++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 dimos/utils/cli/marker_eval.py diff --git a/dimos/utils/cli/marker_eval.py b/dimos/utils/cli/marker_eval.py new file mode 100644 index 0000000000..47aa834c64 --- /dev/null +++ b/dimos/utils/cli/marker_eval.py @@ -0,0 +1,225 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loop-closure tuning eval — runs the dimos map pipeline over all +hk_village recordings and reports two totals to minimize: + +- TOTAL_PGO_TIME (s): wall-clock for the PGO loop across all recordings +- TOTAL_SPREAD (m): per-recording sum of pairwise distances between + final-smoothed marker positions of the same marker_id (PGO-corrected), + summed across all marker_ids across all recordings. Smaller = tighter + loop closures. + +Usage: + uv run python -m dimos.utils.cli.marker_eval + uv run python -m dimos.utils.cli.marker_eval hk_village1 +""" + +from __future__ import annotations + +from concurrent.futures import ProcessPoolExecutor, as_completed +import multiprocessing as mp +import os +import time +from typing import Any + +from rich.console import Console +from rich.table import Table +import typer + +from dimos.mapping.relocalization.pgo import ( + LoopClosure, + keyframes_to_corrections, + make_interpolator, + pgo_keyframes, +) +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream +from dimos.memory2.transform import QualityWindow, SpeedLimit +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.sensor_msgs.Image import Image +from dimos.perception.fiducial.marker_transformer import DetectMarkers +from dimos.robot.unitree.go2.connection import _camera_info_static +from dimos.utils.data import get_data + +DEFAULT_DATASETS = [f"hk_village{i}" for i in range(1, 7)] + + +def _pairwise_sum(pts: list[tuple[float, float, float]]) -> float: + """Sum of euclidean distances over all unordered pairs.""" + total = 0.0 + for i, a in enumerate(pts): + for b in pts[i + 1 :]: + dx, dy, dz = a[0] - b[0], a[1] - b[1], a[2] - b[2] + total += (dx * dx + dy * dy + dz * dz) ** 0.5 + return total + + +def _eval_recording( + name: str, + *, + marker_size: float, + marker_max_speed: float, + marker_max_rot_rate: float, + marker_quality_window: float, + marker_smoothing: float, +) -> tuple[float, float]: + """Returns (pgo_time_s, spread_m) for one recording.""" + db_path = get_data(f"{name}.db") + cam_info = _camera_info_static() + + store = SqliteStore(path=str(db_path)) + with store: + lidar = store.streams.lidar + + # --- PGO (timed) --- + # .tap() captures each keyframe as make_interpolator iterates the + # corrections stream, so no extra pass over lidar. + keyframes: list[Any] = [] + loops_out: list[LoopClosure] = [] + t0 = time.perf_counter() + kf_stream = pgo_keyframes(lidar, loop_closures_out=loops_out).tap( + lambda obs: keyframes.append(obs.data) + ) + interp = make_interpolator(keyframes_to_corrections(kf_stream)) + pgo_time = time.perf_counter() - t0 + + # --- Marker detection (same pipeline as dimos map / markers_rrd) --- + color_image = store.stream("color_image", Image) + xf = DetectMarkers( + camera_info=cam_info, + marker_length_m=marker_size, + smoothing_window=marker_smoothing, + ) + pipeline: Stream[Image] = color_image.transform( + QualityWindow(lambda img: img.sharpness, window=marker_quality_window) + ) + if marker_max_speed > 0: + pipeline = pipeline.transform( + SpeedLimit( + max_mps=marker_max_speed, + max_dps=marker_max_rot_rate if marker_max_rot_rate > 0 else None, + ) + ) + all_dets = pipeline.transform(xf).to_list() + + # Dedup by track_id → final smoothed pose per track. + by_track: dict[int, Observation[Any]] = {} + for d in all_dets: + by_track[d.data.track_id] = d + tracks = list(by_track.values()) + + # PGO-correct each track's pose; group by marker_id. + by_marker: dict[int, list[tuple[float, float, float]]] = {} + for d in tracks: + raw_tf = Transform( + translation=d.data.center, + rotation=d.data.orientation, + frame_id="world", + child_frame_id=f"marker_{d.data.marker_id}", + ts=d.ts, + ) + corrected = interp(d.ts) + raw_tf + t = corrected.translation + by_marker.setdefault(d.data.marker_id, []).append((t.x, t.y, t.z)) + + spread = sum(_pairwise_sum(v) for v in by_marker.values()) + return pgo_time, spread + + +def main( + datasets: list[str] = typer.Argument( + None, help="Recordings to eval; defaults to hk_village1..6" + ), + marker_size: float = typer.Option(0.1, "--marker-size"), + marker_max_speed: float = typer.Option(0.5, "--marker-max-speed"), + marker_max_rot_rate: float = typer.Option(50.0, "--marker-max-rot-rate"), + marker_quality_window: float = typer.Option(0.1, "--marker-quality-window"), + marker_smoothing: float = typer.Option(7.5, "--marker-smoothing"), + workers: int = typer.Option( + 0, "--workers", "-j", help="Parallel workers (0 = min(len(datasets), cpu_count))" + ), +) -> None: + names = datasets or DEFAULT_DATASETS + console = Console() + + n_workers = workers or min(len(names), os.cpu_count() or 1) + wall_start = time.perf_counter() + results: dict[str, tuple[float, float]] = {} + + if n_workers <= 1 or len(names) <= 1: + for name in names: + console.print(f"[dim]eval {name}...[/dim]") + results[name] = _eval_recording( + name, + marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, + ) + else: + console.print(f"[dim]running {len(names)} recordings on {n_workers} workers[/dim]") + # "spawn" — workers are fresh interpreters. Forking after cv2/openmp + # have spun threads in the parent deadlocks because the threads + # don't survive fork; spawn sidesteps it entirely. + with ProcessPoolExecutor(max_workers=n_workers, mp_context=mp.get_context("spawn")) as ex: + futures = { + ex.submit( + _eval_recording, + name, + marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, + ): name + for name in names + } + for f in as_completed(futures): + name = futures[f] + results[name] = f.result() + pgo_time, spread = results[name] + console.print(f" done {name:>14} ({pgo_time:5.2f}s, spread {spread:7.3f}m)") + + wall = time.perf_counter() - wall_start + + table = Table(title="loop-closure eval") + table.add_column("recording") + table.add_column("pgo_time_s", justify="right") + table.add_column("spread_m", justify="right") + + total_pgo = 0.0 + total_spread = 0.0 + for name in names: # original order + pgo_time, spread = results[name] + table.add_row(name, f"{pgo_time:.2f}", f"{spread:.3f}") + total_pgo += pgo_time + total_spread += spread + + table.add_section() + table.add_row( + "[bold]TOTAL[/bold]", + f"[bold]{total_pgo:.2f}[/bold]", + f"[bold]{total_spread:.3f}[/bold]", + ) + console.print(table) + print(f"TOTAL_PGO_TIME={total_pgo:.2f}") + print(f"TOTAL_SPREAD={total_spread:.3f}") + print(f"WALL_TIME={wall:.2f}") + + +if __name__ == "__main__": + typer.run(main) From 89eb996e05724c35a91edbcb87c7457bf9f859ce Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:43:50 +0800 Subject: [PATCH 24/36] extracted loop closure code from relocalization code --- .../loop_closure/eval.py} | 36 +++++++------------ .../{relocalization => loop_closure}/pgo.py | 0 .../test_pgo.py | 2 +- .../loop_closure/utils}/markers_rrd.py | 0 dimos/utils/cli/map.py | 2 +- 5 files changed, 15 insertions(+), 25 deletions(-) rename dimos/{utils/cli/marker_eval.py => mapping/loop_closure/eval.py} (88%) rename dimos/mapping/{relocalization => loop_closure}/pgo.py (100%) rename dimos/mapping/{relocalization => loop_closure}/test_pgo.py (99%) rename dimos/{utils/cli => mapping/loop_closure/utils}/markers_rrd.py (100%) diff --git a/dimos/utils/cli/marker_eval.py b/dimos/mapping/loop_closure/eval.py similarity index 88% rename from dimos/utils/cli/marker_eval.py rename to dimos/mapping/loop_closure/eval.py index 47aa834c64..5fe3f6ae4d 100644 --- a/dimos/utils/cli/marker_eval.py +++ b/dimos/mapping/loop_closure/eval.py @@ -22,8 +22,8 @@ loop closures. Usage: - uv run python -m dimos.utils.cli.marker_eval - uv run python -m dimos.utils.cli.marker_eval hk_village1 + uv run python -m dimos.mapping.loop_closure.marker_eval + uv run python -m dimos.mapping.loop_closure.marker_eval hk_village1 """ from __future__ import annotations @@ -34,11 +34,9 @@ import time from typing import Any -from rich.console import Console -from rich.table import Table import typer -from dimos.mapping.relocalization.pgo import ( +from dimos.mapping.loop_closure.pgo import ( LoopClosure, keyframes_to_corrections, make_interpolator, @@ -153,7 +151,6 @@ def main( ), ) -> None: names = datasets or DEFAULT_DATASETS - console = Console() n_workers = workers or min(len(names), os.cpu_count() or 1) wall_start = time.perf_counter() @@ -161,7 +158,7 @@ def main( if n_workers <= 1 or len(names) <= 1: for name in names: - console.print(f"[dim]eval {name}...[/dim]") + print(f"eval {name}...") results[name] = _eval_recording( name, marker_size=marker_size, @@ -171,7 +168,7 @@ def main( marker_smoothing=marker_smoothing, ) else: - console.print(f"[dim]running {len(names)} recordings on {n_workers} workers[/dim]") + print(f"running {len(names)} recordings on {n_workers} workers") # "spawn" — workers are fresh interpreters. Forking after cv2/openmp # have spun threads in the parent deadlocks because the threads # don't survive fork; spawn sidesteps it entirely. @@ -192,30 +189,23 @@ def main( name = futures[f] results[name] = f.result() pgo_time, spread = results[name] - console.print(f" done {name:>14} ({pgo_time:5.2f}s, spread {spread:7.3f}m)") + print(f" done {name:>14} pgo={pgo_time:5.2f}s spread={spread:7.3f}m") wall = time.perf_counter() - wall_start - table = Table(title="loop-closure eval") - table.add_column("recording") - table.add_column("pgo_time_s", justify="right") - table.add_column("spread_m", justify="right") - total_pgo = 0.0 total_spread = 0.0 + print() + print(f"{'recording':<14} {'pgo_time_s':>10} {'spread_m':>10}") + print("-" * 40) for name in names: # original order pgo_time, spread = results[name] - table.add_row(name, f"{pgo_time:.2f}", f"{spread:.3f}") + print(f"{name:<14} {pgo_time:>10.2f} {spread:>10.3f}") total_pgo += pgo_time total_spread += spread - - table.add_section() - table.add_row( - "[bold]TOTAL[/bold]", - f"[bold]{total_pgo:.2f}[/bold]", - f"[bold]{total_spread:.3f}[/bold]", - ) - console.print(table) + print("-" * 40) + print(f"{'TOTAL':<14} {total_pgo:>10.2f} {total_spread:>10.3f}") + print() print(f"TOTAL_PGO_TIME={total_pgo:.2f}") print(f"TOTAL_SPREAD={total_spread:.3f}") print(f"WALL_TIME={wall:.2f}") diff --git a/dimos/mapping/relocalization/pgo.py b/dimos/mapping/loop_closure/pgo.py similarity index 100% rename from dimos/mapping/relocalization/pgo.py rename to dimos/mapping/loop_closure/pgo.py diff --git a/dimos/mapping/relocalization/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py similarity index 99% rename from dimos/mapping/relocalization/test_pgo.py rename to dimos/mapping/loop_closure/test_pgo.py index 76f8b378eb..b6f5ea3d8e 100644 --- a/dimos/mapping/relocalization/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -20,7 +20,7 @@ import pytest from scipy.spatial.transform import Rotation -from dimos.mapping.relocalization.pgo import ( +from dimos.mapping.loop_closure.pgo import ( Keyframe, PGOConfig, _obs_to_pose3, diff --git a/dimos/utils/cli/markers_rrd.py b/dimos/mapping/loop_closure/utils/markers_rrd.py similarity index 100% rename from dimos/utils/cli/markers_rrd.py rename to dimos/mapping/loop_closure/utils/markers_rrd.py diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 3c59c9088b..07555b375d 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -167,7 +167,7 @@ def main( loops: list[Any] = [] interp: Any | None = None if pgo: - from dimos.mapping.relocalization.pgo import ( + from dimos.mapping.loop_closure.pgo import ( LoopClosure, keyframes_to_corrections, make_interpolator, From 186a4bef1380777a9f53292ab37f33a258375a82 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 25 May 2026 21:46:46 +0800 Subject: [PATCH 25/36] better eval --- dimos/mapping/loop_closure/eval.py | 60 ++++++++---------------------- 1 file changed, 15 insertions(+), 45 deletions(-) diff --git a/dimos/mapping/loop_closure/eval.py b/dimos/mapping/loop_closure/eval.py index 5fe3f6ae4d..c41817b032 100644 --- a/dimos/mapping/loop_closure/eval.py +++ b/dimos/mapping/loop_closure/eval.py @@ -22,15 +22,12 @@ loop closures. Usage: - uv run python -m dimos.mapping.loop_closure.marker_eval - uv run python -m dimos.mapping.loop_closure.marker_eval hk_village1 + uv run python -m dimos.mapping.loop_closure.eval + uv run python -m dimos.mapping.loop_closure.eval hk_village1 """ from __future__ import annotations -from concurrent.futures import ProcessPoolExecutor, as_completed -import multiprocessing as mp -import os import time from typing import Any @@ -146,51 +143,24 @@ def main( marker_max_rot_rate: float = typer.Option(50.0, "--marker-max-rot-rate"), marker_quality_window: float = typer.Option(0.1, "--marker-quality-window"), marker_smoothing: float = typer.Option(7.5, "--marker-smoothing"), - workers: int = typer.Option( - 0, "--workers", "-j", help="Parallel workers (0 = min(len(datasets), cpu_count))" - ), ) -> None: names = datasets or DEFAULT_DATASETS - n_workers = workers or min(len(names), os.cpu_count() or 1) + # Sequential: PGO (Open3D) and marker detection (cv2/numpy) already + # saturate available cores internally via OpenMP — process-level + # parallelism only adds contention. wall_start = time.perf_counter() results: dict[str, tuple[float, float]] = {} - - if n_workers <= 1 or len(names) <= 1: - for name in names: - print(f"eval {name}...") - results[name] = _eval_recording( - name, - marker_size=marker_size, - marker_max_speed=marker_max_speed, - marker_max_rot_rate=marker_max_rot_rate, - marker_quality_window=marker_quality_window, - marker_smoothing=marker_smoothing, - ) - else: - print(f"running {len(names)} recordings on {n_workers} workers") - # "spawn" — workers are fresh interpreters. Forking after cv2/openmp - # have spun threads in the parent deadlocks because the threads - # don't survive fork; spawn sidesteps it entirely. - with ProcessPoolExecutor(max_workers=n_workers, mp_context=mp.get_context("spawn")) as ex: - futures = { - ex.submit( - _eval_recording, - name, - marker_size=marker_size, - marker_max_speed=marker_max_speed, - marker_max_rot_rate=marker_max_rot_rate, - marker_quality_window=marker_quality_window, - marker_smoothing=marker_smoothing, - ): name - for name in names - } - for f in as_completed(futures): - name = futures[f] - results[name] = f.result() - pgo_time, spread = results[name] - print(f" done {name:>14} pgo={pgo_time:5.2f}s spread={spread:7.3f}m") - + for name in names: + print(f"eval {name}...") + results[name] = _eval_recording( + name, + marker_size=marker_size, + marker_max_speed=marker_max_speed, + marker_max_rot_rate=marker_max_rot_rate, + marker_quality_window=marker_quality_window, + marker_smoothing=marker_smoothing, + ) wall = time.perf_counter() - wall_start total_pgo = 0.0 From b2b7afbcdbc2519e8956b625b98599cb48c2b9a0 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 15:40:32 +0800 Subject: [PATCH 26/36] observation pose refactor --- dimos/mapping/loop_closure/pgo.py | 26 ++--- dimos/mapping/loop_closure/test_pgo.py | 27 +++-- dimos/memory2/module.py | 5 +- dimos/memory2/observationstore/sqlite.py | 31 ++--- dimos/memory2/test_save.py | 2 +- dimos/memory2/test_stream.py | 2 +- dimos/memory2/transform.py | 32 ++--- dimos/memory2/type/filter.py | 10 +- dimos/memory2/type/observation.py | 141 ++++++++++++++++++----- dimos/memory2/vis/space/rerun.py | 13 ++- dimos/memory2/vis/space/space.py | 5 +- dimos/memory2/vis/space/svg.py | 7 +- dimos/msgs/geometry_msgs/Quaternion.py | 13 +++ dimos/msgs/sensor_msgs/PointCloud2.py | 2 +- dimos/utils/cli/map.py | 15 +-- 15 files changed, 200 insertions(+), 131 deletions(-) diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py index af94921c9d..84a8a0fa24 100644 --- a/dimos/mapping/loop_closure/pgo.py +++ b/dimos/mapping/loop_closure/pgo.py @@ -170,17 +170,15 @@ def pgo_keyframes( for obs in stream: if on_frame is not None: on_frame(obs) - if obs.pose is None: + p = obs.pose_tuple + if p is None: continue - # Skip placeholder poses (origin position OR zero quaternion). - if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + # Skip placeholder poses written before odom converges: + # all-zero translation OR all-zero (uninitialized) quaternion. + # An identity quaternion (qw=1) is valid and must not be filtered. + if p[0] == 0 and p[1] == 0 and p[2] == 0: continue - if ( - obs.pose[3] == 0 - and obs.pose[4] == 0 - and obs.pose[5] == 0 - and (obs.pose[6] == 0 or obs.pose[6] == 1) - ): + if p[3] == 0 and p[4] == 0 and p[5] == 0 and p[6] == 0: continue local_pose = _obs_to_pose3(obs) pgo.process(local_pose, obs.ts, obs.data) @@ -264,10 +262,11 @@ def apply_corrections( def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: for obs in upstream: - if obs.pose is None: + ps = obs.pose_stamped + if ps is None: yield obs continue - raw_tf = Transform.from_pose(FRAME_BODY, obs.pose_stamped) + raw_tf = Transform.from_pose(FRAME_BODY, ps) # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. # Observation normalizes Transform back to 7-tuple via __post_init__. corrected = interp(obs.ts) + raw_tf @@ -297,9 +296,10 @@ def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" import gtsam # type: ignore[import-not-found,import-untyped] - if obs.pose is None: + p = obs.pose_tuple + if p is None: raise LookupError("No pose set on this observation") - x, y, z, qx, qy, qz, qw = obs.pose + x, y, z, qx, qy, qz, qw = p return gtsam.Pose3( gtsam.Rot3.Quaternion(float(qw), float(qx), float(qy), float(qz)), gtsam.Point3(float(x), float(y), float(z)), diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py index b6f5ea3d8e..0042c19b2c 100644 --- a/dimos/mapping/loop_closure/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -87,14 +87,13 @@ def test_observation_normalizes_transform_pose(self) -> None: ts=1.0, ) obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) - assert isinstance(obs.pose, tuple) - assert obs.pose[0] == pytest.approx(1.5) - assert obs.pose[6] == pytest.approx(0.927) + assert obs.pose_tuple is not None + assert obs.pose_tuple[0] == pytest.approx(1.5) + assert obs.pose_tuple[6] == pytest.approx(0.927) - # derive() also runs the normalization via __post_init__. + # derive() also re-runs the normalization. derived = obs.derive(data=0, pose=tf) - assert isinstance(derived.pose, tuple) - assert derived.pose == obs.pose + assert derived.pose_tuple == obs.pose_tuple def test_observation_normalizes_posestamped(self) -> None: from dimos.memory2.type.observation import Observation @@ -102,8 +101,7 @@ def test_observation_normalizes_posestamped(self) -> None: ps = PoseStamped(ts=1.0, position=(1.0, 2.0, 3.0), orientation=(0.0, 0.0, 0.0, 1.0)) obs: Observation[int] = Observation(id=0, ts=1.0, pose=ps, _data=0) - assert isinstance(obs.pose, tuple) - assert obs.pose == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) + assert obs.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) def test_obs_to_pose3_roundtrip(self) -> None: from dimos.memory2.type.observation import Observation @@ -191,8 +189,8 @@ def test_apply_identity_corrections_preserves_poses(self) -> None: kfs = pgo_keyframes(lidar) corr = keyframes_to_corrections(kfs) corrected = apply_corrections(lidar, corr) - in_poses = [o.pose for o in lidar if o.pose is not None] - out_poses = [o.pose for o in corrected if o.pose is not None] + in_poses = [o.pose_tuple for o in lidar if o.pose_tuple is not None] + out_poses = [o.pose_tuple for o in corrected if o.pose_tuple is not None] assert len(in_poses) == len(out_poses) for p_in, p_out in zip(in_poses, out_poses, strict=True): for a, b in zip(p_in, p_out, strict=True): @@ -268,10 +266,11 @@ def test_pure_translation_shifts_poses(self) -> None: corrected = apply_corrections(lidar, corr) for obs in corrected: - assert obs.pose is not None - assert obs.pose[0] == pytest.approx(5.0, abs=1e-9) - assert obs.pose[1] == pytest.approx(0.0, abs=1e-9) - assert obs.pose[2] == pytest.approx(0.0, abs=1e-9) + p = obs.pose_tuple + assert p is not None + assert p[0] == pytest.approx(5.0, abs=1e-9) + assert p[1] == pytest.approx(0.0, abs=1e-9) + assert p[2] == pytest.approx(0.0, abs=1e-9) def test_passes_through_pose_none(self) -> None: mem = MemoryStore() diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py index b584553bae..929d241882 100644 --- a/dimos/memory2/module.py +++ b/dimos/memory2/module.py @@ -234,7 +234,10 @@ def search(self, query: str) -> PoseStamped: def _similarity(obs: Observation[Any]) -> float: return cast("EmbeddedObservation[Any]", obs).similarity or 0.0 - return results.transform(peaks(key=_similarity, distance=1.0)).last().pose_stamped + best = results.transform(peaks(key=_similarity, distance=1.0)).last() + if best.pose_stamped is None: + raise LookupError("No pose on best search result") + return best.pose_stamped class RecorderConfig(MemoryModuleConfig): diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py index 42f4193581..e82e867dc0 100644 --- a/dimos/memory2/observationstore/sqlite.py +++ b/dimos/memory2/observationstore/sqlite.py @@ -33,7 +33,7 @@ TimeRangeFilter, _xyz, ) -from dimos.memory2.type.observation import _UNLOADED, Observation +from dimos.memory2.type.observation import _UNLOADED, Observation, PoseTuple from dimos.memory2.utils.sqlite import open_disposable_sqlite_connection if TYPE_CHECKING: @@ -46,24 +46,6 @@ _IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") -def _decompose_pose(pose: Any) -> tuple[float, ...] | None: - if pose is None: - return None - if hasattr(pose, "position"): - pos = pose.position - orient = getattr(pose, "orientation", None) - x, y, z = float(pos.x), float(pos.y), float(getattr(pos, "z", 0.0)) - if orient is not None: - return (x, y, z, float(orient.x), float(orient.y), float(orient.z), float(orient.w)) - return (x, y, z, 0.0, 0.0, 0.0, 1.0) - if isinstance(pose, (list, tuple)): - vals = [float(v) for v in pose] - while len(vals) < 7: - vals.append(0.0 if len(vals) < 6 else 1.0) - return tuple(vals[:7]) - return None - - def _reconstruct_pose( x: float | None, y: float | None, @@ -72,7 +54,7 @@ def _reconstruct_pose( qy: float | None, qz: float | None, qw: float | None, -) -> tuple[float, ...] | None: +) -> PoseTuple | None: if x is None: return None return (x, y or 0.0, z or 0.0, qx or 0.0, qy or 0.0, qz or 0.0, qw or 1.0) @@ -328,17 +310,17 @@ def _row_to_obs(self, row: tuple[Any, ...], *, has_blob: bool = False) -> Observ # Scalar data stored inline in value column if value is not None: - return Observation(id=row_id, ts=ts, pose=pose, tags=tags, _data=value) + return Observation(id=row_id, ts=ts, pose_tuple=pose, tags=tags, _data=value) if has_blob and blob_data is not None: assert self._codec is not None, "codec is required for data loading" data = self._codec.decode(blob_data) - return Observation(id=row_id, ts=ts, pose=pose, tags=tags, _data=data) + return Observation(id=row_id, ts=ts, pose_tuple=pose, tags=tags, _data=data) return Observation( id=row_id, ts=ts, - pose=pose, + pose_tuple=pose, tags=tags, _data=_UNLOADED, ) @@ -353,7 +335,8 @@ def _ensure_tag_indexes(self, tags: dict[str, Any]) -> None: self._tag_indexes.add(key) def insert(self, obs: Observation[T]) -> int: - pose = _decompose_pose(obs.pose) + # Observation already normalizes pose to the storage 7-tuple — skip _decompose_pose. + pose = obs.pose_tuple tags_json = json.dumps(obs.tags) if obs.tags else "{}" value = obs._data if isinstance(obs._data, (int, float)) else None diff --git a/dimos/memory2/test_save.py b/dimos/memory2/test_save.py index eba8905974..ec4d627fc3 100644 --- a/dimos/memory2/test_save.py +++ b/dimos/memory2/test_save.py @@ -87,7 +87,7 @@ def test_save_preserves_data(self) -> None: obs = target.first() assert obs.data == 42 assert obs.ts == 1.0 - assert obs.pose == (1, 2, 3) + assert obs.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) assert obs.tags == {"label": "cat"} def test_save_with_transform(self) -> None: diff --git a/dimos/memory2/test_stream.py b/dimos/memory2/test_stream.py index f0aaf70137..32cb542b83 100644 --- a/dimos/memory2/test_stream.py +++ b/dimos/memory2/test_stream.py @@ -485,7 +485,7 @@ def test_derive_preserves_metadata(self): derived = obs.derive(data="transformed") assert derived.id == 42 assert derived.ts == 1.5 - assert derived.pose == (1, 2, 3) + assert derived.pose_tuple == (1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0) assert derived.tags == {"k": "v"} assert derived.data == "transformed" diff --git a/dimos/memory2/transform.py b/dimos/memory2/transform.py index a79ccf36b8..328c1de647 100644 --- a/dimos/memory2/transform.py +++ b/dimos/memory2/transform.py @@ -179,12 +179,11 @@ def speed() -> FnIterTransformer[Any, float]: def _speed(upstream: Iterator[Observation[Any]]) -> Iterator[Observation[float]]: prev: Observation[Any] | None = None for obs in upstream: - if prev is not None and obs.pose is not None and prev.pose is not None: - dx = obs.pose[0] - prev.pose[0] - dy = obs.pose[1] - prev.pose[1] - dz = obs.pose[2] - prev.pose[2] + p = obs.pose + pp = prev.pose if prev is not None else None + if prev is not None and p is not None and pp is not None: dt = obs.ts - prev.ts - v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt if dt > 0 else 0.0 + v = (p.position - pp.position).length() / dt if dt > 0 else 0.0 yield obs.derive(data=v) prev = obs @@ -225,28 +224,17 @@ def __call__(self, upstream: Iterator[Observation[T]]) -> Iterator[Observation[T max_dps = self.max_dps max_rps = math.radians(max_dps) if max_dps is not None else None for obs in upstream: - if obs.pose is None: + p = obs.pose + if p is None: continue - if prev is not None and prev.pose is not None: + pp = prev.pose if prev is not None else None + if prev is not None and pp is not None: dt = obs.ts - prev.ts if dt > 0: - dx = obs.pose[0] - prev.pose[0] - dy = obs.pose[1] - prev.pose[1] - dz = obs.pose[2] - prev.pose[2] - v = math.sqrt(dx * dx + dy * dy + dz * dz) / dt + v = (p.position - pp.position).length() / dt ok = v <= max_mps if ok and max_rps is not None: - # rotation angle between two unit quaternions. - # |q · q'| collapses the double-cover sign ambiguity; - # clamp guards against numerical drift past 1. - dot = ( - obs.pose[3] * prev.pose[3] - + obs.pose[4] * prev.pose[4] - + obs.pose[5] * prev.pose[5] - + obs.pose[6] * prev.pose[6] - ) - angle = 2.0 * math.acos(min(1.0, abs(dot))) - ok = (angle / dt) <= max_rps + ok = (p.orientation.angle_to(pp.orientation) / dt) <= max_rps if ok: yield obs prev = obs diff --git a/dimos/memory2/type/filter.py b/dimos/memory2/type/filter.py index af453498fd..ddc0f86eb5 100644 --- a/dimos/memory2/type/filter.py +++ b/dimos/memory2/type/filter.py @@ -79,17 +79,15 @@ class NearFilter(Filter): radius: float = 0.0 def matches(self, obs: Observation[Any]) -> bool: - if obs.pose is None or self.pose is None: + p2 = obs.pose_tuple + if p2 is None or self.pose is None: return False p1 = self.pose - p2 = obs.pose - # Support both raw (x,y,z) tuples and PoseStamped objects + # NearFilter.pose accepts raw (x,y,z) tuples or PoseStamped-like objects. if hasattr(p1, "position"): p1 = p1.position - if hasattr(p2, "position"): - p2 = p2.position x1, y1, z1 = _xyz(p1) - x2, y2, z2 = _xyz(p2) + x2, y2, z2 = p2[0], p2[1], p2[2] dist_sq = (x1 - x2) ** 2 + (y1 - y2) ** 2 + (z1 - z2) ** 2 return dist_sq <= self.radius**2 diff --git a/dimos/memory2/type/observation.py b/dimos/memory2/type/observation.py index 179b48744e..038d05f161 100644 --- a/dimos/memory2/type/observation.py +++ b/dimos/memory2/type/observation.py @@ -14,7 +14,7 @@ from __future__ import annotations -from dataclasses import dataclass, field, fields +from dataclasses import dataclass, fields import sys import threading from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast @@ -28,11 +28,14 @@ from collections.abc import Callable from dimos.models.embedding.base import Embedding + from dimos.msgs.geometry_msgs.Pose import Pose from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped T = TypeVar("T") R = TypeVar("R") +PoseTuple = tuple[float, float, float, float, float, float, float] + class _Unloaded: """Sentinel indicating data has not been loaded yet.""" @@ -46,18 +49,33 @@ def __repr__(self) -> str: _UNLOADED = _Unloaded() -def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, float] | None: +def _to_tuple(p: Any) -> PoseTuple | None: """Coerce common pose shapes to the storage 7-tuple `(x, y, z, qx, qy, qz, qw)`. - Accepts: `None`, an already-normalized tuple, or any object with + Accepts: `None`, a 3- or 7-tuple, or any object with `translation`+`rotation` (Transform) or `position`+`orientation` - (Pose / PoseStamped) attributes. Duck-typed to avoid importing - `dimos.msgs.geometry_msgs` at module load. + (Pose / PoseStamped) attributes. 3-tuples are padded with the identity + quaternion. Duck-typed to avoid importing `dimos.msgs.geometry_msgs` + at module load. """ - if p is None or isinstance(p, tuple): - return p - trans = getattr(p, "translation", None) or getattr(p, "position", None) - rot = getattr(p, "rotation", None) or getattr(p, "orientation", None) + if p is None: + return None + if isinstance(p, tuple): + if len(p) == 7: + return cast("PoseTuple", tuple(float(v) for v in p)) + if len(p) == 3: + x, y, z = p + return (float(x), float(y), float(z), 0.0, 0.0, 0.0, 1.0) + raise TypeError(f"Pose tuple must have length 3 or 7, got {len(p)}") + # Use ``is not None`` rather than ``or`` — a Vector3 at the origin is + # falsy but valid, and falling through to ``position`` on a Transform + # would crash. + trans = getattr(p, "translation", None) + if trans is None: + trans = getattr(p, "position", None) + rot = getattr(p, "rotation", None) + if rot is None: + rot = getattr(p, "orientation", None) if trans is None or rot is None: raise TypeError( f"Cannot coerce {type(p).__name__} to a pose tuple — expected " @@ -74,36 +92,70 @@ def _normalize_pose(p: Any) -> tuple[float, float, float, float, float, float, f ) -@dataclass +@dataclass(init=False) class Observation(Generic[T]): """A single timestamped observation with optional spatial pose and metadata. - `pose` is stored as a 7-tuple `(x, y, z, qx, qy, qz, qw)`. Passing a - `Transform`, `Pose`, or `PoseStamped` to the constructor or to `derive` - is normalized to the 7-tuple form automatically. + Pose is stored internally as a 7-tuple ``(x, y, z, qx, qy, qz, qw)``. + Use :attr:`pose` for a typed :class:`Pose` object (lazy), or + :attr:`pose_tuple` for direct field access in hot loops. """ id: int ts: float - data_type: type = object - pose: Any | None = None - tags: dict[str, Any] = field(default_factory=dict) - _data: T | _Unloaded = field(default=_UNLOADED, repr=False) - _loader: Callable[[], T] | None = field(default=None, repr=False) - _data_lock: threading.Lock = field(default_factory=threading.Lock, repr=False) + data_type: type + pose_tuple: PoseTuple | None + tags: dict[str, Any] + _data: T | _Unloaded + _loader: Callable[[], T] | None + _data_lock: threading.Lock + + def __init__( + self, + id: int = 0, + ts: float = 0.0, + *, + data_type: type = object, + pose: Any | None = None, + pose_tuple: PoseTuple | None = None, + tags: dict[str, Any] | None = None, + _data: T | _Unloaded = _UNLOADED, + _loader: Callable[[], T] | None = None, + _data_lock: threading.Lock | None = None, + ) -> None: + self.id = id + self.ts = ts + self.data_type = data_type + # `pose` wins if explicitly supplied: derive()/tag() always re-pass + # the current `pose_tuple` from fields(), so an override needs + # to take precedence over it. + self.pose_tuple = _to_tuple(pose) if pose is not None else pose_tuple + self.tags = tags if tags is not None else {} + self._data = _data + self._loader = _loader + self._data_lock = _data_lock if _data_lock is not None else threading.Lock() + + @property + def pose(self) -> Pose | None: + """Typed :class:`Pose` (or None). Allocates per access — read :attr:`pose_tuple` in hot loops.""" + if self.pose_tuple is None: + return None + from dimos.msgs.geometry_msgs.Pose import Pose - def __post_init__(self) -> None: - self.pose = _normalize_pose(self.pose) + return Pose(*self.pose_tuple) @property - def pose_stamped(self) -> PoseStamped: + def pose_stamped(self) -> PoseStamped | None: + """Typed :class:`PoseStamped` (or None) carrying this observation's ts.""" + if self.pose_tuple is None: + return None from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped - if self.pose is None: - raise LookupError("No pose set on this observation") - x, y, z, qx, qy, qz, qw = self.pose - ps: PoseStamped = PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw)) - return ps + x, y, z, qx, qy, qz, qw = self.pose_tuple + return cast( + "PoseStamped", + PoseStamped(ts=self.ts, position=(x, y, z), orientation=(qx, qy, qz, qw)), + ) @property def data(self) -> T: @@ -150,9 +202,38 @@ def tag(self, **tags: Any) -> Self: return type(self)(**kwargs) -@dataclass +@dataclass(init=False) class EmbeddedObservation(Observation[T]): """Observation enriched with a vector embedding and optional similarity score.""" - embedding: Embedding | None = None - similarity: float | None = None + embedding: Embedding | None + similarity: float | None + + def __init__( + self, + id: int = 0, + ts: float = 0.0, + *, + data_type: type = object, + pose: Any | None = None, + pose_tuple: PoseTuple | None = None, + tags: dict[str, Any] | None = None, + embedding: Embedding | None = None, + similarity: float | None = None, + _data: T | _Unloaded = _UNLOADED, + _loader: Callable[[], T] | None = None, + _data_lock: threading.Lock | None = None, + ) -> None: + super().__init__( + id=id, + ts=ts, + data_type=data_type, + pose=pose, + pose_tuple=pose_tuple, + tags=tags, + _data=_data, + _loader=_loader, + _data_lock=_data_lock, + ) + self.embedding = embedding + self.similarity = similarity diff --git a/dimos/memory2/vis/space/rerun.py b/dimos/memory2/vis/space/rerun.py index 435097e77c..1929ecef71 100644 --- a/dimos/memory2/vis/space/rerun.py +++ b/dimos/memory2/vis/space/rerun.py @@ -238,10 +238,13 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: for i, obs in enumerate(observations): path = f"scene/observations/{i}" data = obs.data + ps = obs.pose_stamped + if ps is None: + continue img = _as_image(data) if img is not None: # Apply base→optical extrinsics for camera frustum rendering - world_T_optical = Transform.from_pose("world", obs.pose_stamped) + _BASE_TO_OPTICAL + world_T_optical = Transform.from_pose("world", ps) + _BASE_TO_OPTICAL rr.log(path, world_T_optical.to_pose().to_rerun(), static=True) h, w = img.shape[:2] focal = max(w, h) @@ -257,13 +260,13 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: ) rr.log(f"{path}/image", img.to_rerun(), static=True) elif isinstance(data, PointCloud2): - rr.log(path, obs.pose_stamped.to_rerun(), static=True) + rr.log(path, ps.to_rerun(), static=True) rr.log(f"{path}/pointcloud", data.to_rerun(), static=True) elif isinstance(data, (int, float)): rr.log( path, rr.Points3D( - positions=[[obs.pose_stamped.x, obs.pose_stamped.y, 0]], + positions=[[ps.x, ps.y, 0]], labels=[str(data)], radii=[0.025], ), @@ -283,7 +286,7 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: if line: lines.append(line) label = "\n".join(lines) - x, y = obs.pose_stamped.x, obs.pose_stamped.y + x, y = ps.x, ps.y # Pin: line from ground up, label at the tip rr.log( f"{path}/pin", @@ -306,7 +309,7 @@ def render(space: Space, app_id: str = "space", spawn: bool = True) -> None: else: rr.log( path, - rr.Points3D(positions=[[obs.pose_stamped.x, obs.pose_stamped.y, 0]], radii=[0.05]), + rr.Points3D(positions=[[ps.x, ps.y, 0]], radii=[0.05]), static=True, ) diff --git a/dimos/memory2/vis/space/space.py b/dimos/memory2/vis/space/space.py index 4ce973b133..cf7d8be0b3 100644 --- a/dimos/memory2/vis/space/space.py +++ b/dimos/memory2/vis/space/space.py @@ -96,7 +96,10 @@ def add(self, element: Any, **kwargs: Any) -> Space: for item in element: v = _autocolor_value(item) if v is not None and isinstance(item, Observation): - self._elements.append(Arrow(msg=item.pose_stamped, color=color_range(v))) + ps = item.pose_stamped + if ps is None: + continue + self._elements.append(Arrow(msg=ps, color=color_range(v))) else: self.add(item, **kwargs) else: diff --git a/dimos/memory2/vis/space/svg.py b/dimos/memory2/vis/space/svg.py index e54da044da..f95119973b 100644 --- a/dimos/memory2/vis/space/svg.py +++ b/dimos/memory2/vis/space/svg.py @@ -291,12 +291,13 @@ def _render_element(el: SpaceElement, b: Bounds) -> str: return _render_occupancy_grid(simple_inflate(height_cost_occupancy(el), 0.05), b) elif isinstance(el, Observation): - if el.pose is None: + ps = el.pose_stamped + if ps is None: return "" if el.data_type == float: - return _render_arrow(Arrow(msg=el.pose_stamped, color="#ff0000"), b) + return _render_arrow(Arrow(msg=ps, color="#ff0000"), b) else: - return _render_arrow(Arrow(msg=el.pose_stamped), b) + return _render_arrow(Arrow(msg=ps), b) else: return f"" diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py index e3d64d4bab..42e4dc730d 100644 --- a/dimos/msgs/geometry_msgs/Quaternion.py +++ b/dimos/msgs/geometry_msgs/Quaternion.py @@ -16,6 +16,7 @@ from collections.abc import Sequence from io import BytesIO +import math import struct from typing import TYPE_CHECKING, BinaryIO, TypeAlias @@ -221,6 +222,18 @@ def __mul__(self, other: Quaternion) -> Quaternion: return Quaternion(x, y, z, w) + def dot(self, other: Quaternion) -> float: + """4D inner product. ``|q.dot(q')| == 1`` iff same orientation (up to sign).""" + return float(self.x * other.x + self.y * other.y + self.z * other.z + self.w * other.w) + + def angle_to(self, other: Quaternion) -> float: + """Smallest rotation angle (radians) between two unit quaternions. + + ``abs(self.dot(other))`` collapses the double-cover sign ambiguity; + the ``min(1.0, ...)`` clamps against numerical drift past 1. + """ + return 2.0 * math.acos(min(1.0, abs(self.dot(other)))) + def conjugate(self) -> Quaternion: """Return the conjugate of the quaternion. diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index add056b1a3..7617f09e58 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -730,7 +730,7 @@ def to_rerun( if mode == "points": return rr.Points3D( - positions=points, colors=point_colors, class_ids=class_ids, sizes=voxel_size + positions=points, colors=point_colors, class_ids=class_ids, radii=voxel_size / 2 ) elif mode == "boxes": half = voxel_size / 2 diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 5030c4fb05..b84d6e8dad 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -140,16 +140,13 @@ def main( # so it stays cheap (no pointcloud loading). seen: dict[tuple[int, int, int], Observation[Any]] = {} for obs in lidar: - if obs.pose is None: + p = obs.pose_tuple + if p is None: continue # Reject placeholder poses at the world origin. - if obs.pose[0] == 0 and obs.pose[1] == 0 and obs.pose[2] == 0: + if p[0] == 0 and p[1] == 0 and p[2] == 0: continue - cell = ( - int(obs.pose[0] / pgo_tol), - int(obs.pose[1] / pgo_tol), - int(obs.pose[2] / pgo_tol), - ) + cell = (int(p[0] / pgo_tol), int(p[1] / pgo_tol), int(p[2] / pgo_tol)) seen[cell] = obs n_kept = len(seen) @@ -159,7 +156,7 @@ def main( # Dict insertion order = lidar iteration order = chronological. # `seen` only contains entries with non-None poses (filtered above). path: list[tuple[float, float, float]] = [ - (obs.pose[0], obs.pose[1], obs.pose[2]) for obs in seen.values() if obs.pose is not None + (p[0], p[1], p[2]) for obs in seen.values() if (p := obs.pose_tuple) is not None ] pgo_map = None @@ -209,7 +206,7 @@ def main( try: for obs in lidar: full_pb(obs) - if obs.pose is None or len(obs.data) == 0: + if obs.pose_tuple is None or len(obs.data) == 0: continue full_grid.add_frame(obs.data.transform(interp(obs.ts))) full_pgo_map = full_grid.get_global_pointcloud2() From ea2c9f4b05f0edbd58a698e6ecd23abe0c0266d4 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:11:25 +0800 Subject: [PATCH 27/36] pgo matrices refactor --- dimos/mapping/loop_closure/pgo.py | 51 ++++++++++---------------- dimos/mapping/loop_closure/test_pgo.py | 44 ++++++++-------------- dimos/msgs/geometry_msgs/Quaternion.py | 8 ++++ dimos/msgs/geometry_msgs/Transform.py | 19 ++++++++++ 4 files changed, 63 insertions(+), 59 deletions(-) diff --git a/dimos/mapping/loop_closure/pgo.py b/dimos/mapping/loop_closure/pgo.py index 84a8a0fa24..de6c56547a 100644 --- a/dimos/mapping/loop_closure/pgo.py +++ b/dimos/mapping/loop_closure/pgo.py @@ -207,15 +207,15 @@ def keyframes_to_corrections(keyframes: Stream[Keyframe]) -> Stream[Transform]: def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Transform]: """Materialize corrections once; return a fast ts -> Transform lookup.""" ts_list: list[float] = [] - R_list: list[np.ndarray] = [] + quat_list: list[np.ndarray] = [] t_list: list[np.ndarray] = [] for obs in corrections: - R, t = _r_t_from_transform(obs.data) + tf = obs.data # obs.ts is authoritative; obs.data.ts can be mutated by Transform's # ts=0.0 -> time.time() fallback in its constructor. ts_list.append(obs.ts) - R_list.append(R) - t_list.append(t) + quat_list.append(tf.rotation.to_numpy()) + t_list.append(tf.translation.to_numpy()) if not ts_list: raise ValueError("empty corrections stream") @@ -224,13 +224,12 @@ def make_interpolator(corrections: Stream[Transform]) -> Callable[[float], Trans # general path handles it; clip-to-endpoints behavior is unchanged. if len(ts_list) == 1: ts_list.append(ts_list[0] + 1e-6) - R_list.append(R_list[0]) + quat_list.append(quat_list[0]) t_list.append(t_list[0]) ts_arr = np.array(ts_list) - R_stack = np.stack(R_list) t_stack = np.stack(t_list) - slerp = Slerp(ts_arr, Rotation.from_matrix(R_stack)) + slerp = Slerp(ts_arr, Rotation.from_quat(np.stack(quat_list))) def interp(ts: float) -> Transform: ts_clip = float(np.clip(ts, ts_arr[0], ts_arr[-1])) @@ -244,7 +243,13 @@ def interp(ts: float) -> Transform: t_lo, t_hi = ts_arr[idx - 1], ts_arr[idx] alpha = (ts_clip - t_lo) / (t_hi - t_lo) if t_hi > t_lo else 0.0 t = (1 - alpha) * t_stack[idx - 1] + alpha * t_stack[idx] - return _transform_from_r_t(R, t, ts=float(ts)) + return Transform( + translation=Vector3(t), + rotation=Quaternion.from_rotation_matrix(R), + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ts=float(ts), + ) return interp @@ -268,30 +273,12 @@ def xf(upstream: Iterator[Observation[T]]) -> Iterator[Observation[T]]: continue raw_tf = Transform.from_pose(FRAME_BODY, ps) # Transform.__add__ composes: (T_corr + T_raw) applies T_corr after T_raw. - # Observation normalizes Transform back to 7-tuple via __post_init__. corrected = interp(obs.ts) + raw_tf yield obs.derive(data=obs.data, pose=corrected) return stream.transform(xf) -def _r_t_from_transform(tf: Transform) -> tuple[np.ndarray, np.ndarray]: - q = tf.rotation - R = Rotation.from_quat([q.x, q.y, q.z, q.w]).as_matrix() - t = np.array([tf.translation.x, tf.translation.y, tf.translation.z]) - return R, t - - -def _transform_from_r_t(R: np.ndarray, t: np.ndarray, *, ts: float) -> Transform: - return Transform( - translation=Vector3(float(t[0]), float(t[1]), float(t[2])), - rotation=Quaternion.from_rotation_matrix(R), - frame_id=FRAME_WORLD_CORRECTED, - child_frame_id=FRAME_WORLD_RAW, - ts=ts, - ) - - def _obs_to_pose3(obs: Observation[Any]) -> gtsam.Pose3: """Convert an observation's stored pose tuple directly to a `gtsam.Pose3`.""" import gtsam # type: ignore[import-not-found,import-untyped] @@ -669,9 +656,11 @@ def _icp( if float(result.fitness) == 0.0: return Transform.identity(), float("inf") - T_mat = result.transformation.numpy() - rmse = float(result.inlier_rmse) - return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse - T_mat = result.transformation.numpy() + tf = Transform.from_matrix( + result.transformation.numpy(), + ts=source.ts, + frame_id=FRAME_WORLD_CORRECTED, + child_frame_id=FRAME_WORLD_RAW, + ) rmse = float(result.inlier_rmse) - return _transform_from_r_t(T_mat[:3, :3], T_mat[:3, 3], ts=source.ts), rmse * rmse + return tf, rmse * rmse diff --git a/dimos/mapping/loop_closure/test_pgo.py b/dimos/mapping/loop_closure/test_pgo.py index 0042c19b2c..61a744c893 100644 --- a/dimos/mapping/loop_closure/test_pgo.py +++ b/dimos/mapping/loop_closure/test_pgo.py @@ -25,8 +25,6 @@ PGOConfig, _obs_to_pose3, _pose3_to_transform, - _r_t_from_transform, - _transform_from_r_t, apply_corrections, keyframes_to_corrections, make_interpolator, @@ -67,16 +65,6 @@ def test_rejects_unknown_fields(self) -> None: class TestTransformHelpers: - def test_r_t_transform_roundtrip(self) -> None: - rng = np.random.default_rng(0) - R_in = _random_R(rng) - t_in = rng.uniform(-5, 5, size=3) - tf = _transform_from_r_t(R_in, t_in, ts=1.23) - R_out, t_out = _r_t_from_transform(tf) - np.testing.assert_allclose(R_out, R_in, atol=1e-10) - np.testing.assert_allclose(t_out, t_in, atol=1e-10) - assert tf.ts == 1.23 - def test_observation_normalizes_transform_pose(self) -> None: """Constructing/deriving with pose=Transform should coerce to 7-tuple.""" from dimos.memory2.type.observation import Observation @@ -109,7 +97,7 @@ def test_obs_to_pose3_roundtrip(self) -> None: rng = np.random.default_rng(4) R = _random_R(rng) t = rng.uniform(-3, 3, size=3) - tf = _transform_from_r_t(R, t, ts=1.0) + tf = Transform(translation=Vector3(t), rotation=Quaternion.from_rotation_matrix(R), ts=1.0) obs: Observation[int] = Observation(id=0, ts=1.0, pose=tf, _data=0) p = _obs_to_pose3(obs) np.testing.assert_allclose(p.rotation().matrix(), R, atol=1e-9) @@ -123,9 +111,8 @@ def test_pose3_to_transform(self) -> None: t = rng.uniform(-3, 3, size=3) p = gtsam.Pose3(gtsam.Rot3(R), gtsam.Point3(t)) tf = _pose3_to_transform(p, ts=7.89) - R_out, t_out = _r_t_from_transform(tf) - np.testing.assert_allclose(R_out, R, atol=1e-10) - np.testing.assert_allclose(t_out, t, atol=1e-10) + np.testing.assert_allclose(tf.rotation.to_rotation_matrix(), R, atol=1e-10) + np.testing.assert_allclose(tf.translation.to_numpy(), t, atol=1e-10) def test_pose3_to_transform_with_frames(self) -> None: import gtsam @@ -137,9 +124,8 @@ def test_pose3_to_transform_with_frames(self) -> None: tf = _pose3_to_transform(p, ts=1.0, frame_id="world_corrected", child_frame_id="body") assert tf.frame_id == "world_corrected" assert tf.child_frame_id == "body" - R_out, t_out = _r_t_from_transform(tf) - np.testing.assert_allclose(R_out, R, atol=1e-10) - np.testing.assert_allclose(t_out, t, atol=1e-10) + np.testing.assert_allclose(tf.rotation.to_rotation_matrix(), R, atol=1e-10) + np.testing.assert_allclose(tf.translation.to_numpy(), t, atol=1e-10) def _make_lidar_stream(n_frames: int = 12, points_per_frame: int = 500) -> Stream[PointCloud2]: @@ -213,8 +199,11 @@ def test_empty_stream_raises(self) -> None: def test_single_keyframe_returns_constant(self) -> None: R = Rotation.from_euler("z", np.pi / 4).as_matrix() - t = np.array([1.0, 2.0, 3.0]) - only = _transform_from_r_t(R, t, ts=10.0) + only = Transform( + translation=Vector3(1.0, 2.0, 3.0), + rotation=Quaternion.from_rotation_matrix(R), + ts=10.0, + ) interp = make_interpolator(_make_corrections([only])) for query_ts in (0.0, 10.0, 100.0): out = interp(query_ts) @@ -225,9 +214,8 @@ def test_single_keyframe_returns_constant(self) -> None: def test_out_of_range_clips_to_endpoints(self) -> None: # Note: Transform's constructor maps ts=0.0 -> time.time(); use ts>0 # so test timestamps are deterministic. - R = np.eye(3) - a = _transform_from_r_t(R, np.array([0.0, 0.0, 0.0]), ts=1.0) - b = _transform_from_r_t(R, np.array([10.0, 0.0, 0.0]), ts=11.0) + a = Transform(translation=Vector3(0.0, 0.0, 0.0), ts=1.0) + b = Transform(translation=Vector3(10.0, 0.0, 0.0), ts=11.0) # _make_corrections uses tf.ts; obs.ts ends up the same. mem = MemoryStore() stream: Stream[Transform] = mem.stream("corrections", Transform) @@ -257,8 +245,8 @@ def test_pure_translation_shifts_poses(self) -> None: ) # Constant correction: translate by (5, 0, 0), identity rotation. - c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) - c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=3.0) + c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0) + c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=3.0) mem2 = MemoryStore() corr: Stream[Transform] = mem2.stream("corrections", Transform) corr.append(c_a, ts=1.0) @@ -280,8 +268,8 @@ def test_passes_through_pose_none(self) -> None: ts=1.0, pose=None, ) - c_a = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=1.0) - c_b = _transform_from_r_t(np.eye(3), np.array([5.0, 0.0, 0.0]), ts=2.0) + c_a = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=1.0) + c_b = Transform(translation=Vector3(5.0, 0.0, 0.0), ts=2.0) mem2 = MemoryStore() corr: Stream[Transform] = mem2.stream("corrections", Transform) corr.append(c_a, ts=1.0) diff --git a/dimos/msgs/geometry_msgs/Quaternion.py b/dimos/msgs/geometry_msgs/Quaternion.py index 42e4dc730d..e98e7bcc19 100644 --- a/dimos/msgs/geometry_msgs/Quaternion.py +++ b/dimos/msgs/geometry_msgs/Quaternion.py @@ -164,6 +164,14 @@ def from_rotation_matrix(cls, matrix: np.ndarray) -> Quaternion: quat = rotation.as_quat() # Returns [x, y, z, w] return cls(quat[0], quat[1], quat[2], quat[3]) + def to_rotation_matrix(self) -> np.ndarray: + """Convert quaternion to a 3x3 rotation matrix.""" + from scipy.spatial.transform import ( + Rotation, # ~330ms: deferred to avoid startup cost + ) + + return np.asarray(Rotation.from_quat([self.x, self.y, self.z, self.w]).as_matrix()) + def to_euler(self) -> Vector3: """Convert quaternion to Euler angles (roll, pitch, yaw) in radians. diff --git a/dimos/msgs/geometry_msgs/Transform.py b/dimos/msgs/geometry_msgs/Transform.py index 9b08c8dadd..44bd6b2440 100644 --- a/dimos/msgs/geometry_msgs/Transform.py +++ b/dimos/msgs/geometry_msgs/Transform.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, BinaryIO if TYPE_CHECKING: + import numpy as np import rerun as rr from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped @@ -216,6 +217,24 @@ def to_pose(self, **kwargs: object) -> PoseStamped: ) return result + @classmethod + def from_matrix( + cls, + matrix: np.ndarray, + *, + ts: float = 0.0, + frame_id: str = "world", + child_frame_id: str = "unset", + ) -> Transform: + """Build a Transform from a 4x4 homogeneous transformation matrix.""" + return cls( + translation=Vector3(matrix[:3, 3]), + rotation=Quaternion.from_rotation_matrix(matrix[:3, :3]), + frame_id=frame_id, + child_frame_id=child_frame_id, + ts=ts, + ) + def to_matrix(self) -> np.ndarray: # type: ignore[name-defined] """Convert Transform to a 4x4 transformation matrix. From 6e7825f7e182575817905cb9d9080a69cf4b16bd Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:25:48 +0800 Subject: [PATCH 28/36] pc2 fix --- dimos/msgs/sensor_msgs/PointCloud2.py | 4 ++++ dimos/msgs/sensor_msgs/test_PointCloud2.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 7617f09e58..d31c20d43c 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -373,6 +373,10 @@ def transform(self, tf: Transform) -> PointCloud2: new_pcd = o3d.geometry.PointCloud() new_pcd.points = o3d.utility.Vector3dVector(transformed_xyz) + # Colors are frame-independent — carry them through. + if self.pointcloud.has_colors(): + new_pcd.colors = self.pointcloud.colors + return PointCloud2( pointcloud=new_pcd, frame_id=tf.frame_id, diff --git a/dimos/msgs/sensor_msgs/test_PointCloud2.py b/dimos/msgs/sensor_msgs/test_PointCloud2.py index 56ae622a8d..797e266ed3 100644 --- a/dimos/msgs/sensor_msgs/test_PointCloud2.py +++ b/dimos/msgs/sensor_msgs/test_PointCloud2.py @@ -32,8 +32,8 @@ def test_lcm_encode_decode() -> None: decoded = PointCloud2.lcm_decode(binary_msg) # 1. Check number of points - original_points, _ = lidar_msg.as_numpy() - decoded_points, _ = decoded.as_numpy() + original_points = lidar_msg.as_numpy() + decoded_points = decoded.as_numpy() assert len(original_points) == len(decoded_points), ( f"Point count mismatch: {len(original_points)} vs {len(decoded_points)}" From 9ac43a249bc75056043f06fa4aece047b482c532 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:35:20 +0800 Subject: [PATCH 29/36] map.py: use VoxelMapTransformer for all three accumulations Replaces the three hand-rolled VoxelGrid loops (pgo pass 2, full_pgo, raw) with a small _accumulate() helper that drives VoxelMapTransformer in batch mode. Same behavior, less repetition, and disposal is managed by the transformer. --- dimos/utils/cli/map.py | 104 +++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index b84d6e8dad..8f0e65e9f0 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections.abc import Callable +from collections.abc import Callable, Iterable import time from typing import Any @@ -20,7 +20,7 @@ import rerun.blueprint as rrb import typer -from dimos.mapping.voxels import VoxelGrid +from dimos.mapping.voxels import VoxelMapTransformer from dimos.memory2.store.sqlite import SqliteStore from dimos.memory2.stream import Stream from dimos.memory2.transform import QualityWindow, SpeedLimit @@ -28,6 +28,7 @@ from dimos.memory2.vis.color import Color from dimos.msgs.geometry_msgs.Transform import Transform from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.perception.fiducial.marker_transformer import DetectMarkers from dimos.robot.unitree.go2.connection import _camera_info_static from dimos.utils.data import resolve_named_path @@ -36,6 +37,44 @@ PATH_THICKNESS = 0.01 +def _accumulate( + obs_iter: Iterable[Observation[PointCloud2]], + *, + voxel: float, + block_count: int, + device: str, + interp: Callable[[float], Transform] | None = None, + progress_cb: Callable[[Observation[Any]], None] | None = None, +) -> PointCloud2 | None: + """Accumulate a voxel map from `obs_iter`, optionally PGO-correcting each frame. + + Returns the final ``PointCloud2`` (or ``None`` if the input was empty). + Disposal of the underlying ``VoxelGrid`` is handled by ``VoxelMapTransformer``. + """ + + def prepared() -> Iterable[Observation[PointCloud2]]: + for obs in obs_iter: + if progress_cb is not None: + progress_cb(obs) + if len(obs.data) == 0: + continue + if interp is not None: + if obs.pose_tuple is None: + continue + yield obs.derive(data=obs.data.transform(interp(obs.ts))) + else: + yield obs + + vmt = VoxelMapTransformer( + emit_every=0, # batch mode: emit once on exhaustion + voxel_size=voxel, + block_count=block_count, + device=device, + ) + result = next(iter(vmt(iter(prepared()))), None) + return result.data if result is not None else None + + def progress(total: int, label: str = "") -> Callable[[Observation[Any]], None]: seen = 0 wall_start: float | None = None @@ -186,45 +225,35 @@ def main( kf_t = kf_obs.data.optimized.translation pgo_path.append((kf_t.x, kf_t.y, kf_t.z)) - pass2_pb = progress(n_kept, "pgo pass 2 (rebuilding)") - grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) - try: - for obs in seen.values(): - pass2_pb(obs) - if len(obs.data) == 0: - continue - grid.add_frame(obs.data.transform(interp(obs.ts))) - pgo_map = grid.get_global_pointcloud2() - finally: - grid.dispose() + pgo_map = _accumulate( + seen.values(), + voxel=voxel, + block_count=block_count, + device=device, + interp=interp, + progress_cb=progress(n_kept, "pgo pass 2 (rebuilding)"), + ) full_pgo_map = None if full_pgo: assert interp is not None - full_pb = progress(total, "full pgo (rebuilding)") - full_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) - try: - for obs in lidar: - full_pb(obs) - if obs.pose_tuple is None or len(obs.data) == 0: - continue - full_grid.add_frame(obs.data.transform(interp(obs.ts))) - full_pgo_map = full_grid.get_global_pointcloud2() - finally: - full_grid.dispose() + full_pgo_map = _accumulate( + lidar, + voxel=voxel, + block_count=block_count, + device=device, + interp=interp, + progress_cb=progress(total, "full pgo (rebuilding)"), + ) # Raw map: same dedup'd frames, no PGO correction. - raw_pb = progress(n_kept, "reconstructing global map") - raw_grid = VoxelGrid(voxel_size=voxel, block_count=block_count, device=device) - try: - for obs in seen.values(): - raw_pb(obs) - if len(obs.data) == 0: - continue - raw_grid.add_frame(obs.data) - global_map = raw_grid.get_global_pointcloud2() - finally: - raw_grid.dispose() + global_map = _accumulate( + seen.values(), + voxel=voxel, + block_count=block_count, + device=device, + progress_cb=progress(n_kept, "reconstructing global map"), + ) marker_dets: list[Observation[Any]] = [] if markers: @@ -270,7 +299,10 @@ def main( if not no_gui: rerun_init("dimos map tool", spawn=True) rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world"))) - rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True) + if global_map is not None: + rr.log( + "world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True + ) if path: rr.log( "world/raw_map/path", From ab687a239ac0c7081398ffcfdf529e167db120eb Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:47:43 +0800 Subject: [PATCH 30/36] mapper bugfix --- .../perception/fiducial/marker_transformer.py | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index e6c3d3c02f..fa114b8b8b 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -58,23 +58,6 @@ logger = setup_logger() -def _pose_tuple_to_transform( - pose: tuple[float, float, float, float, float, float, float], - *, - frame_id: str, - child_frame_id: str, - ts: float, -) -> Transform: - x, y, z, qx, qy, qz, qw = pose - return Transform( - translation=Vector3(x, y, z), - rotation=Quaternion(qx, qy, qz, qw), - frame_id=frame_id, - child_frame_id=child_frame_id, - ts=ts, - ) - - def _average_marker_pose( buffer: TimestampedBufferCollection[Detection3DMarker], ) -> tuple[Vector3, Quaternion]: @@ -146,7 +129,8 @@ def __call__( marker_size = Vector3(self.marker_length_m, self.marker_length_m, 0.0) for obs in upstream: - if obs.pose is None: + pose = obs.pose + if pose is None: logger.debug("DetectMarkers: obs %s has no .pose; skipping", obs.id) continue @@ -170,8 +154,9 @@ def __call__( if ids is None or len(ids) == 0: continue - t_world_optical = _pose_tuple_to_transform( - obs.pose, + t_world_optical = Transform( + translation=pose.position, + rotation=pose.orientation, frame_id=self.world_frame, child_frame_id="optical", ts=obs.ts, From fcd53f1cde6a857a54793a7ccc62eb1dcad65f43 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 16:59:57 +0800 Subject: [PATCH 31/36] fix mypy: rename shadowed `pose` in DetectMarkers The outer `pose = obs.pose` (Pose | None) was being shadowed by `pose = estimate_marker_pose(...)` (tuple[rvec, tvec] | None), which mypy flags as an incompatible reassignment. Renamed the inner one to `marker_pose`. --- dimos/perception/fiducial/marker_transformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dimos/perception/fiducial/marker_transformer.py b/dimos/perception/fiducial/marker_transformer.py index fa114b8b8b..36fc2bd6bc 100644 --- a/dimos/perception/fiducial/marker_transformer.py +++ b/dimos/perception/fiducial/marker_transformer.py @@ -164,16 +164,16 @@ def __call__( for corner_set, mid_arr in zip(corners, ids, strict=True): mid = int(mid_arr[0]) - pose = estimate_marker_pose( + marker_pose = estimate_marker_pose( corner_set, self.marker_length_m, self._cam_mtx, self._dist, distortion_model=info.distortion_model, ) - if pose is None: + if marker_pose is None: continue - rvec, tvec = pose + rvec, tvec = marker_pose t_optical_marker = rvec_tvec_to_transform( rvec, tvec, From 3aaaeb98d7633fad101acb455548c6507d902675 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 17:06:12 +0800 Subject: [PATCH 32/36] NearFilter: store position as Vector3, drop _xyz extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NearFilter now holds a typed `Vector3` instead of an `Any` named `pose`. `Stream.near()` does the one-time normalization at construction (Pose/ PoseStamped → .position, then Vector3(...) which accepts tuple/ndarray/ Vector3). `matches()` uses Vector3 subtraction + length_squared(); the sqlite filter compiler reads f.position.x/.y/.z directly. The _xyz() helper is gone. --- dimos/memory2/observationstore/sqlite.py | 8 +------- dimos/memory2/stream.py | 7 ++++++- dimos/memory2/type/filter.py | 25 +++++++----------------- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/dimos/memory2/observationstore/sqlite.py b/dimos/memory2/observationstore/sqlite.py index e82e867dc0..3dd9a34948 100644 --- a/dimos/memory2/observationstore/sqlite.py +++ b/dimos/memory2/observationstore/sqlite.py @@ -31,7 +31,6 @@ NearFilter, TagsFilter, TimeRangeFilter, - _xyz, ) from dimos.memory2.type.observation import _UNLOADED, Observation, PoseTuple from dimos.memory2.utils.sqlite import open_disposable_sqlite_connection @@ -84,12 +83,7 @@ def _compile_filter(f: Filter, stream: str, prefix: str = "") -> tuple[str, list params.append(v) return (" AND ".join(clauses), params) if isinstance(f, NearFilter): - pose = f.pose - if pose is None: - return None - if hasattr(pose, "position"): - pose = pose.position - cx, cy, cz = _xyz(pose) + cx, cy, cz = f.position.x, f.position.y, f.position.z r = f.radius # R*Tree bounding-box pre-filter + exact squared-distance check rtree_sql = ( diff --git a/dimos/memory2/stream.py b/dimos/memory2/stream.py index 3a92f7708f..8f0a767aa7 100644 --- a/dimos/memory2/stream.py +++ b/dimos/memory2/stream.py @@ -38,6 +38,7 @@ TimeRangeFilter, ) from dimos.memory2.type.observation import EmbeddedObservation, Observation +from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: @@ -177,7 +178,11 @@ def at_relative(self, t: float, tolerance: float = 1.0) -> Stream[T, O]: return self.at(t0 + t, tolerance=tolerance) def near(self, pose: Any, radius: float) -> Stream[T, O]: - return self._with_filter(NearFilter(pose, radius)) + # Accept Pose/PoseStamped (any object with `.position`), Vector3, + # numpy arrays, or (x, y, z) tuples — Vector3() handles the rest. + if hasattr(pose, "position"): + pose = pose.position + return self._with_filter(NearFilter(Vector3(pose), radius)) def tags(self, **tags: Any) -> Stream[T, O]: return self._with_filter(TagsFilter(tags)) diff --git a/dimos/memory2/type/filter.py b/dimos/memory2/type/filter.py index ddc0f86eb5..1250c4c31f 100644 --- a/dimos/memory2/type/filter.py +++ b/dimos/memory2/type/filter.py @@ -19,6 +19,8 @@ from itertools import islice from typing import TYPE_CHECKING, Any +from dimos.msgs.geometry_msgs.Vector3 import Vector3 + if TYPE_CHECKING: from collections.abc import Callable, Iterator @@ -75,28 +77,15 @@ def matches(self, obs: Observation[Any]) -> bool: @dataclass(frozen=True) class NearFilter(Filter): - pose: Any = field(hash=False) + position: Vector3 = field(hash=False) radius: float = 0.0 def matches(self, obs: Observation[Any]) -> bool: - p2 = obs.pose_tuple - if p2 is None or self.pose is None: + p = obs.pose_tuple + if p is None: return False - p1 = self.pose - # NearFilter.pose accepts raw (x,y,z) tuples or PoseStamped-like objects. - if hasattr(p1, "position"): - p1 = p1.position - x1, y1, z1 = _xyz(p1) - x2, y2, z2 = p2[0], p2[1], p2[2] - dist_sq = (x1 - x2) ** 2 + (y1 - y2) ** 2 + (z1 - z2) ** 2 - return dist_sq <= self.radius**2 - - -def _xyz(p: Any) -> tuple[float, float, float]: - """Extract (x, y, z) from various pose representations.""" - if isinstance(p, (list, tuple)): - return (float(p[0]), float(p[1]), float(p[2]) if len(p) > 2 else 0.0) - return (float(p.x), float(p.y), float(getattr(p, "z", 0.0))) + delta = self.position - Vector3(p[0], p[1], p[2]) + return delta.length_squared() <= self.radius * self.radius @dataclass(frozen=True) From 5d7f573c4ed766b61cc74fb4568016024605aa81 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 18:17:31 +0800 Subject: [PATCH 33/36] go2 stairs --- data/.lfs/go2_mid360_stairs.db.tar.gz | 3 + dimos/mapping/loop_closure/utils/map_rrd.py | 366 ++++++++++++++++++++ dimos/memory2/store/base.py | 11 + 3 files changed, 380 insertions(+) create mode 100644 data/.lfs/go2_mid360_stairs.db.tar.gz create mode 100644 dimos/mapping/loop_closure/utils/map_rrd.py diff --git a/data/.lfs/go2_mid360_stairs.db.tar.gz b/data/.lfs/go2_mid360_stairs.db.tar.gz new file mode 100644 index 0000000000..e094ccb1f2 --- /dev/null +++ b/data/.lfs/go2_mid360_stairs.db.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32adaeab92baf7780915600ee04b01aac9fa5a4b5965deabe72f539b6633e606 +size 514688073 diff --git a/dimos/mapping/loop_closure/utils/map_rrd.py b/dimos/mapping/loop_closure/utils/map_rrd.py new file mode 100644 index 0000000000..6856dadef7 --- /dev/null +++ b/dimos/mapping/loop_closure/utils/map_rrd.py @@ -0,0 +1,366 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dump a recorded dataset to .rrd: lidar point clouds + camera frames. + +Lidar clouds are assumed to be in world frame and logged directly under +their entity path (no parent transform). Entities written: + +- ``world/lidar`` — Go2 L1 per-frame point cloud +- ``world/lidar_voxels`` — accumulated voxel map of the primary lidar (``--map``) +- ``world/fastlio_lidar`` — fastlio_lidar raw cloud (if present) +- ``world/fastlio_voxels``— accumulated voxel map of fastlio_lidar (``--map``) +- ``world/fastlio`` — fastlio_odometry pose axis (if present) +- ``world/fastlio_path`` — fastlio_odometry trajectory (growing LineStrips3D) +- ``world/odom`` — Go2 onboard odom pose axis (if present) +- ``world/odom_path`` — Go2 onboard odom trajectory (growing LineStrips3D) +- ``world/camera`` — color_image camera pose (static pinhole + Transform3D) +- ``world/camera/image`` — color_image frames + +Usage: + uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd + uv run python -m dimos.mapping.loop_closure.utils.map_rrd mid360 --out map.rrd --map + rerun map.rrd +""" + +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path +import time +from typing import Any + +import rerun as rr +import typer + +from dimos.mapping.voxels import VoxelMapTransformer +from dimos.memory2.store.sqlite import SqliteStore +from dimos.memory2.stream import Stream +from dimos.memory2.transform import throttle +from dimos.memory2.type.observation import Observation +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2, register_colormap_annotation +from dimos.robot.unitree.go2.connection import BASE_TO_OPTICAL, _camera_info_static +from dimos.utils.data import resolve_named_path + +TIMELINE = "ts" + + +def _progress(total: int, label: str) -> Callable[[Observation[Any]], None]: + """Matches dimos/utils/cli/map.py:progress.""" + seen = 0 + wall_start: float | None = None + last_wall: float | None = None + first_ts: float | None = None + + def tick(obs: Observation[Any]) -> None: + nonlocal seen, wall_start, last_wall, first_ts + now = time.monotonic() + if wall_start is None: + wall_start = now + first_ts = obs.ts + assert first_ts is not None + frame_ms = (now - last_wall) * 1000 if last_wall is not None else 0.0 + last_wall = now + seen += 1 + pct = 100 * seen // total if total else 100 + wall = now - wall_start + data = obs.ts - first_ts + speed = data / wall if wall > 0 else 0.0 + end = "\n" if seen >= total else "" + print( + f"\r{label} {pct:>3}% [{seen}/{total}] {data:.1f}s ({speed:.1f} x rt) {frame_ms:.0f}ms/frame", + end=end, + flush=True, + ) + + return tick + + +def _log_clouds( + label: str, + stream: Stream[PointCloud2], + entity: str, + voxel: float, + point_mode: str, + *, + total: int | None = None, +) -> None: + """Iterate a PointCloud2 stream and log each obs to ``entity``. + + ``total`` overrides the progress denominator — useful for transform + pipelines where calling :py:meth:`Stream.count` would materialize the + whole pipeline. + """ + n = total if total is not None else stream.count() + cb = _progress(n, label) + for obs in stream: + cb(obs) + rr.set_time(TIMELINE, timestamp=obs.ts) + rr.log(entity, obs.data.to_rerun(voxel_size=voxel, mode=point_mode)) + + +def _log_path( + label: str, + stream: Stream[Any], + entity: str, + color: tuple[int, int, int], + *, + emit_every: int = 10, +) -> None: + """Iterate a pose-bearing stream and log a growing :class:`LineStrips3D` to + ``entity`` every ``emit_every`` poses (and once more at the end). Frames + without a pose are skipped. + """ + n = stream.count() + cb = _progress(n, label) + points: list[tuple[float, float, float]] = [] + last_ts: float | None = None + emit_count = 0 + for obs in stream: + cb(obs) + p = obs.pose_tuple + if p is None: + continue + points.append((float(p[0]), float(p[1]), float(p[2]))) + last_ts = obs.ts + emit_count += 1 + if emit_every > 0 and emit_count % emit_every == 0 and len(points) >= 2: + rr.set_time(TIMELINE, timestamp=obs.ts) + rr.log(entity, rr.LineStrips3D([points], colors=[color])) + if ( + last_ts is not None + and len(points) >= 2 + and (emit_every <= 0 or emit_count % emit_every != 0) + ): + rr.set_time(TIMELINE, timestamp=last_ts) + rr.log(entity, rr.LineStrips3D([points], colors=[color])) + + +def main( + dataset: str = typer.Argument(..., help="Dataset .db: bare name (cwd or data/) or path"), + out: Path = typer.Option(..., "--out", help="Output .rrd path"), + voxel: float = typer.Option( + 0.05, "--voxel", help="Voxel size hint for the point cloud renderer" + ), + point_mode: str = typer.Option( + "spheres", "--point-mode", help="Render mode: 'spheres', 'boxes', or 'points'" + ), + camera_hz: float = typer.Option( + 2.0, "--camera-hz", help="Throttle color_image to at most this rate; 0 disables" + ), + map: bool = typer.Option( + False, + "--map", + help="Accumulate each lidar stream into a VoxelGrid and log only the final map", + ), + map_voxel: float = typer.Option( + 0.05, "--map-voxel", help="Voxel size for the accumulated map (m); --map only" + ), + map_device: str = typer.Option( + "CUDA:0", "--map-device", help="Open3D device for the VoxelGrid; --map only" + ), + map_emit_every: int = typer.Option( + 10, + "--map-emit-every", + help="Emit accumulated map every N frames (0 = only at end); --map only", + ), + image_pose_from: str = typer.Option( + "own", + "--image-pose-from", + help="Pose authority for color_image frames: 'own' (image pose) or 'fastlio_odom' " + "(nearest fastlio_odometry frame in time)", + ), + image_pose_tol: float = typer.Option( + 0.1, + "--image-pose-tol", + help="Max time gap (s) when matching --image-pose-from fastlio_odom", + ), +) -> None: + db_path = resolve_named_path(dataset, ".db") + cam_info = _camera_info_static() + + rr.init("dimos map_rrd", recording_id=db_path.stem) + rr.save(str(out)) + register_colormap_annotation("turbo") + + # Static pinhole on the camera entity; per-frame Transform3D goes on the + # same entity. Image is the child so it projects through the pinhole. + pinhole = cam_info.to_rerun() + assert not isinstance(pinhole, list) + rr.log("world/camera", pinhole, static=True) + + # Static axis triads as children of each moving Transform3D, so the + # transforms are actually visible in the 3D view. + axes = rr.Arrows3D( + vectors=[[0.3, 0, 0], [0, 0.3, 0], [0, 0, 0.3]], + colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]], + ) + rr.log("world/fastlio/axes", axes, static=True) + rr.log("world/odom/axes", axes, static=True) + + store = SqliteStore(path=str(db_path)) + with store: + print(store.summary()) + + lidar = store.stream("lidar", PointCloud2) + color_image = store.stream("color_image", Image) + has_livox = "fastlio_lidar" in store.streams + livox = store.stream("fastlio_lidar", PointCloud2) if has_livox else None + + # ---- per-frame raw clouds ---- + _log_clouds(" lidar", lidar, "world/lidar", voxel, point_mode) + if livox is not None: + _log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, point_mode) + + # ---- accumulated voxel maps (--map only) ---- + # Go2 L1 forward-facing → column carving on. + # Mid360 spherical → column carving off, just aggregate. + if map: + grid_kwargs = {"voxel_size": map_voxel, "device": map_device, "show_startup_log": False} + _log_clouds( + " lidar_voxels", + lidar.transform( + VoxelMapTransformer( + emit_every=map_emit_every, carve_columns=True, **grid_kwargs + ) + ), + "world/lidar_voxels", + voxel, + point_mode, + total=max(1, lidar.count() // max(map_emit_every, 1)), + ) + if livox is not None: + _log_clouds( + "fastlio_voxels", + livox.transform( + VoxelMapTransformer( + emit_every=map_emit_every, carve_columns=False, **grid_kwargs + ) + ), + "world/fastlio_voxels", + voxel, + point_mode, + total=max(1, livox.count() // max(map_emit_every, 1)), + ) + + # ---- fastlio pose axis + path from fastlio_odometry stream ---- + if "fastlio_odometry" in store.streams: + odometry = store.stream("fastlio_odometry", Odometry) + cb = _progress(odometry.count(), "fastlio_odometry") + for obs in odometry: + cb(obs) + p = obs.pose_tuple + if p is None: + continue + rr.set_time(TIMELINE, timestamp=obs.ts) + x, y, z, qx, qy, qz, qw = p + rr.log( + "world/fastlio", + rr.Transform3D( + translation=[x, y, z], + quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]), + ), + ) + _log_path( + " fastlio_path", + store.stream("fastlio_odometry", Odometry), + "world/fastlio_path", + color=(255, 165, 0), # orange + ) + + # ---- Go2 native odom pose axis + path ---- + if "odom" in store.streams: + odom = store.stream("odom", PoseStamped) + cb = _progress(odom.count(), " odom") + for obs in odom: + cb(obs) + p = obs.pose_tuple + if p is None: + continue + rr.set_time(TIMELINE, timestamp=obs.ts) + x, y, z, qx, qy, qz, qw = p + rr.log( + "world/odom", + rr.Transform3D( + translation=[x, y, z], + quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw]), + ), + ) + _log_path( + " odom_path", + store.stream("odom", PoseStamped), + "world/odom_path", + color=(0, 200, 100), # green + ) + + # ---- pass 2: camera pose + image per color_image ---- + cam_pipeline = ( + color_image.transform(throttle(1.0 / camera_hz)) if camera_hz > 0 else color_image + ) + n_img = cam_pipeline.count() + cb = _progress(n_img, " color_image") + pose_authority: Stream[Any] | None = None + if image_pose_from == "fastlio_odom": + if "fastlio_odometry" not in store.streams: + raise typer.BadParameter( + "--image-pose-from=fastlio_odom but no fastlio_odometry stream in dataset" + ) + pose_authority = store.stream("fastlio_odometry", Odometry) + elif image_pose_from != "own": + raise typer.BadParameter( + f"--image-pose-from must be 'own' or 'fastlio_odom', got {image_pose_from!r}" + ) + + for img_obs in cam_pipeline: + cb(img_obs) + rr.set_time(TIMELINE, timestamp=img_obs.ts) + if pose_authority is not None: + # fastlio_odom is body-in-world; compose with base→optical so the + # camera entity lands where the camera actually is, not the body. + matches = pose_authority.at(img_obs.ts, tolerance=image_pose_tol).to_list() + if matches: + nearest = min(matches, key=lambda o: abs(o.ts - img_obs.ts)) + ps = nearest.pose_stamped + cam_tf = ( + Transform.from_pose("world", ps) + BASE_TO_OPTICAL + if ps is not None + else None + ) + else: + cam_tf = None + else: + # The image's own recorded pose is already optical-in-world. + ps = img_obs.pose_stamped + cam_tf = Transform.from_pose("world", ps) if ps is not None else None + if cam_tf is not None: + t, q = cam_tf.translation, cam_tf.rotation + rr.log( + "world/camera", + rr.Transform3D( + translation=[t.x, t.y, t.z], + quaternion=rr.Quaternion(xyzw=[q.x, q.y, q.z, q.w]), + ), + ) + rr.log("world/camera/image", img_obs.data.to_rerun()) + + print(f"wrote {out}") + print(f"open with: rerun {out}") + + +if __name__ == "__main__": + typer.run(main) diff --git a/dimos/memory2/store/base.py b/dimos/memory2/store/base.py index fb6d76379d..3c07d6ae6d 100644 --- a/dimos/memory2/store/base.py +++ b/dimos/memory2/store/base.py @@ -14,6 +14,7 @@ from __future__ import annotations +from collections.abc import Iterator from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar, cast from dimos.core.resource import CompositeResource @@ -65,6 +66,12 @@ def __getitem__(self, name: str) -> S: raise KeyError(name) return self._container.stream(name) + def __contains__(self, name: object) -> bool: + return isinstance(name, str) and name in self._container.list_streams() + + def __iter__(self) -> Iterator[str]: + return iter(self._container.list_streams()) + def __dir__(self) -> list[str]: return self._container.list_streams() @@ -200,6 +207,10 @@ def list_streams(self) -> list[str]: """Return names of all streams in this store.""" return list(self._streams.keys()) + def summary(self) -> str: + """One line per opened stream — name, count, ts range. See :meth:`Stream.summary`.""" + return "\n".join(s.summary() for _, s in self.streams.items()) + def delete_stream(self, name: str) -> None: """Delete a stream by name (from cache and underlying storage).""" stream = self._streams.pop(name, None) From f7f09e24164e61b7c32b516e66315376d6de0f31 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 18:38:10 +0800 Subject: [PATCH 34/36] recording update --- data/.lfs/go2_mid360_stairs.db.tar.gz | 4 ++-- dimos/utils/data.py | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/data/.lfs/go2_mid360_stairs.db.tar.gz b/data/.lfs/go2_mid360_stairs.db.tar.gz index e094ccb1f2..615aad3a17 100644 --- a/data/.lfs/go2_mid360_stairs.db.tar.gz +++ b/data/.lfs/go2_mid360_stairs.db.tar.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32adaeab92baf7780915600ee04b01aac9fa5a4b5965deabe72f539b6633e606 -size 514688073 +oid sha256:02d8d2194332291cf71988458965aa9f8019b8d61661ff153e93886dd72e85e9 +size 54082131 diff --git a/dimos/utils/data.py b/dimos/utils/data.py index bcdc08fd6a..82afc4c1fd 100644 --- a/dimos/utils/data.py +++ b/dimos/utils/data.py @@ -111,7 +111,13 @@ def get_data_dir(extra_path: str | None = None) -> Path: return get_project_root() / "data" -def resolve_named_path(name: str | Path, suffix: str = "") -> Path: +def resolve_named_path(name: str | Path, suffix: str = "", *, pull: bool = True) -> Path: + """Resolve a bare name / partial path to an absolute Path. + + If ``pull`` is True (default), fall back to :func:`get_data` (which may + download from LFS) when the file isn't found locally. Set ``pull=False`` + to fail fast with ``FileNotFoundError`` instead. + """ s = str(name) p = Path(s) if p.is_absolute() or p.exists(): @@ -120,6 +126,8 @@ def resolve_named_path(name: str | Path, suffix: str = "") -> Path: p = Path(s + suffix) if p.is_absolute() or p.exists(): return p + if not pull: + raise FileNotFoundError(f"{name!r} not found locally (pull=False)") return get_data(p.name) From b140cea1bb97806b3eb7160769d200911052ae12 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Thu, 28 May 2026 18:39:43 +0800 Subject: [PATCH 35/36] oops --- dimos/utils/cli/map.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dimos/utils/cli/map.py b/dimos/utils/cli/map.py index 8f0e65e9f0..af19300549 100644 --- a/dimos/utils/cli/map.py +++ b/dimos/utils/cli/map.py @@ -508,6 +508,3 @@ def main( if __name__ == "__main__": typer.run(main) - typer.run(main) - typer.run(main) - typer.run(main) From 6afa4bc01afa1430962f34c80e7015063641e536 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 1 Jun 2026 20:24:22 +0800 Subject: [PATCH 36/36] map_rrd: rename pose loop var to fix mypy Observation type clash --- dimos/mapping/loop_closure/utils/map_rrd.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dimos/mapping/loop_closure/utils/map_rrd.py b/dimos/mapping/loop_closure/utils/map_rrd.py index 6856dadef7..13c2081d5a 100644 --- a/dimos/mapping/loop_closure/utils/map_rrd.py +++ b/dimos/mapping/loop_closure/utils/map_rrd.py @@ -287,12 +287,12 @@ def main( if "odom" in store.streams: odom = store.stream("odom", PoseStamped) cb = _progress(odom.count(), " odom") - for obs in odom: - cb(obs) - p = obs.pose_tuple + for pose_obs in odom: + cb(pose_obs) + p = pose_obs.pose_tuple if p is None: continue - rr.set_time(TIMELINE, timestamp=obs.ts) + rr.set_time(TIMELINE, timestamp=pose_obs.ts) x, y, z, qx, qy, qz, qw = p rr.log( "world/odom",