Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions data/.lfs/go2_china_office_indoor.db.tar.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions data/.lfs/go2_china_office_indoor.mcap.tar.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions data/.lfs/go2_china_office_indoor_bestz.txt.tar.gz
Git LFS file not shown
15 changes: 12 additions & 3 deletions dimos/mapping/utils/cli/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def _progress(obs: Observation[Any]) -> None:
def _log_reconstruction(
*,
voxel: float,
cubes: bool = False,
global_map: PointCloud2 | None,
path: list[tuple[float, float, float]],
pgo_map: PointCloud2 | None,
Expand All @@ -178,20 +179,24 @@ def _log_reconstruction(
from dimos.msgs.geometry_msgs.Transform import Transform

rr.send_blueprint(rrb.Blueprint(rrb.Spatial3DView(origin="world")))
# Cubes tile the grid at full voxel size; dots (spheres) stay at half.
map_kw: dict[str, Any] = (
{"voxel_size": voxel, "mode": "boxes"} if cubes else {"voxel_size": voxel / 2}
)
if global_map is not None:
rr.log("world/raw_map/pointcloud", global_map.to_rerun(voxel_size=voxel / 2), static=True)
rr.log("world/raw_map/pointcloud", global_map.to_rerun(**map_kw), static=True)
if path:
rr.log(
"world/raw_map/path",
rr.LineStrips3D(strips=[path], colors=[[231, 76, 60]], radii=[PATH_THICKNESS]),
static=True,
)
if pgo_map is not None:
rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(voxel_size=voxel / 2), static=True)
rr.log("world/pgo_map/pointcloud", pgo_map.to_rerun(**map_kw), static=True)
if full_pgo_map is not None:
rr.log(
"world/full_pgo_map/pointcloud",
full_pgo_map.to_rerun(voxel_size=voxel / 2),
full_pgo_map.to_rerun(**map_kw),
static=True,
)
if pgo_path:
Expand Down Expand Up @@ -292,6 +297,9 @@ def main(
None, "--duration", help="Use only N seconds from --seek (default: to the end)"
),
voxel: float = typer.Option(0.05, "--voxel", help="Voxel size for the rebuild"),
cubes: bool = typer.Option(
False, "--cubes", help="Render the map as voxel-sized cubes instead of dots"
),
device: str = typer.Option(
"CUDA:0", "--device", help="Open3D compute device (e.g. CUDA:0, CPU:0)"
),
Expand Down Expand Up @@ -523,6 +531,7 @@ def main(
rr.save(str(out))
_log_reconstruction(
voxel=voxel,
cubes=cubes,
global_map=global_map,
path=path,
pgo_map=pgo_map,
Expand Down
65 changes: 37 additions & 28 deletions dimos/mapping/utils/cli/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,10 @@ def main(
"--voxel",
help="Voxel grid resolution (m) for --map/--map-final; rendering follows the same size",
),
point_mode: str = typer.Option(
"spheres", "--point-mode", help="Render mode: 'spheres', 'boxes', or 'points'"
cube: bool = typer.Option(
False,
"--cube",
help="Render the accumulated --map/--map-final as voxel-sized cubes instead of spheres",
),
camera_hz: float = typer.Option(
0.0,
Expand Down Expand Up @@ -201,6 +203,7 @@ def main(
"--map-emit-every",
help="Emit accumulated map every N frames (0 = only at end); --map only",
),
no_image: bool = typer.Option(False, "--no-image", help="Skip logging color_image frames"),
) -> None:
"""Dump a recording to .rrd (lidar clouds + camera frames) and open it in rerun."""
from dimos.mapping.utils.cli.summary import _stream_payload_types
Expand Down Expand Up @@ -232,9 +235,10 @@ def main(

# Static pinhole on the camera entity; per-frame Transform3D goes on the
# same entity. Image is the child so it projects through the pinhole.
pinhole = cam_info.to_rerun()
assert not isinstance(pinhole, list)
rr.log("world/camera", pinhole, static=True)
if not no_image:
pinhole = cam_info.to_rerun()
assert not isinstance(pinhole, list)
rr.log("world/camera", pinhole, static=True)

# Static axis triads as children of each moving Transform3D, so the
# transforms are actually visible in the 3D view.
Expand All @@ -257,17 +261,21 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]:
has_livox = "fastlio_lidar" in store.streams
livox = clipped("fastlio_lidar", PointCloud2) if has_livox else None

# ---- per-frame raw clouds ----
_log_clouds(" lidar", lidar, "world/lidar", voxel, point_mode)
# ---- per-frame raw clouds (always spheres) ----
_log_clouds(" lidar", lidar, "world/lidar", voxel, "spheres")
if livox is not None:
_log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, point_mode)
_log_clouds("fastlio_lidar", livox, "world/fastlio_lidar", voxel, "spheres")

# ---- accumulated voxel maps over the selected PointCloud2 streams ----
# --map logs a growing map per stream; --map-final logs one static map
# per stream. --map-carve-columns clears the Z column under each surface
# voxel (good for forward-facing lidar like the Go2 L1); off by default.
if map or map_final:
grid_kwargs = {"voxel_size": voxel, "device": map_device, "show_startup_log": False}
# --cube tiles the grid at the true voxel size; spheres render
# smaller so the gaps between them read as transparency.
map_mode = "boxes" if cube else "spheres"
map_render = voxel if cube else voxel / 4
for name in map_sources:
src = clipped(name, PointCloud2)
if not src.exists():
Expand All @@ -283,8 +291,8 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]:
)
),
f"world/{name}_voxels",
voxel / 4, # render smaller than the grid → gaps read as transparency
point_mode,
map_render,
map_mode,
total=max(1, src.count() // max(map_emit_every, 1)),
)
if map_final:
Expand All @@ -296,7 +304,7 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]:
).last()
rr.log(
f"world/{name}_map",
final.data.to_rerun(voxel_size=voxel / 4, mode=point_mode),
final.data.to_rerun(voxel_size=map_render, mode=map_mode),
static=True,
)

Expand Down Expand Up @@ -349,23 +357,24 @@ def clipped(name: str, ptype: type[Any]) -> Stream[Any]:
)

# ---- pass 2: camera pose + image per color_image ----
cam_pipeline = (
color_image.transform(throttle(1.0 / camera_hz)) if camera_hz > 0 else color_image
)
n_img = cam_pipeline.count()
cb = _progress(n_img, " color_image")
for img_obs in cam_pipeline:
cb(img_obs)
rr.set_time(TIMELINE, timestamp=img_obs.ts)
if img_obs.pose_tuple is not None:
x, y, z, qx, qy, qz, qw = img_obs.pose_tuple
rr.log(
"world/camera",
rr.Transform3D(
translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw])
),
)
rr.log("world/camera/image", img_obs.data.to_rerun())
if not no_image:
cam_pipeline = (
color_image.transform(throttle(1.0 / camera_hz)) if camera_hz > 0 else color_image
)
n_img = cam_pipeline.count()
cb = _progress(n_img, " color_image")
for img_obs in cam_pipeline:
cb(img_obs)
rr.set_time(TIMELINE, timestamp=img_obs.ts)
if img_obs.pose_tuple is not None:
x, y, z, qx, qy, qz, qw = img_obs.pose_tuple
rr.log(
"world/camera",
rr.Transform3D(
translation=[x, y, z], quaternion=rr.Quaternion(xyzw=[qx, qy, qz, qw])
),
)
rr.log("world/camera/image", img_obs.data.to_rerun())

print(f"wrote {out}")
if no_gui:
Expand Down
8 changes: 7 additions & 1 deletion dimos/mapping/utils/cli/replay_marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ def main(
None, "--duration", help="Use only N seconds from --seek (default: to the end)"
),
marker_size: float = typer.Option(0.1, "--marker-size", help="Marker edge length (m)"),
camera_info: Path | None = typer.Option(
None,
"--camera-info",
help="YAML calibration file for detection; defaults to Go2 builtin",
),
marker_max_speed: float = typer.Option(
0.5, "--marker-max-speed", help="Detection speed gate (m/s); 0 disables"
),
Expand All @@ -73,6 +78,7 @@ def main(
from dimos.memory2.store.sqlite import SqliteStore
from dimos.memory2.transform import QualityWindow, SpeedLimit
from dimos.memory2.vis.color import Color
from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
from dimos.msgs.sensor_msgs.Image import Image
from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
from dimos.perception.fiducial.marker_transformer import DetectMarkers
Expand All @@ -82,7 +88,7 @@ def main(
db_path = resolve_named_path(dataset, ".db")
if out is None:
out = Path.cwd() / f"{db_path.stem}.rrd"
cam_info = _camera_info_static()
cam_info = CameraInfo.from_yaml(str(camera_info)) if camera_info else _camera_info_static()

rr.init("dimos markers", recording_id=db_path.stem)
rr.save(str(out))
Expand Down
54 changes: 54 additions & 0 deletions dimos/robot/unitree/go2/front_camera_1080.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
image_width: 1920
image_height: 1080
camera_name: go2_front_camera_1080p
distortion_model: equidistant
camera_matrix:
rows: 3
cols: 3
data:
- 1196.2134247297394
- 0.0
- 965.3028251731779
- 0.0
- 1194.7308169154975
- 523.917540801463
- 0.0
- 0.0
- 1.0
distortion_coefficients:
rows: 1
cols: 4
data:
- -0.07309428880537933
- -0.02341140740909078
- -0.0069305931780026956
- 0.009238684474464793
rectification_matrix:
rows: 3
cols: 3
data:
- 1.0
- 0.0
- 0.0
- 0.0
- 1.0
- 0.0
- 0.0
- 0.0
- 1.0
projection_matrix:
rows: 3
cols: 4
data:
- 1196.2134247297394
- 0.0
- 965.3028251731779
- 0.0
- 0.0
- 1194.7308169154975
- 523.917540801463
- 0.0
- 0.0
- 0.0
- 1.0
- 0.0
83 changes: 83 additions & 0 deletions dimos/robot/unitree/go2/mcap/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Go2 MCAP → dataset ingest

Turn a Go2 onboard MCAP recording into a memory2 SQLite dataset (`.db`) that the
`dimos map` CLI reads. Lidar scans are deskewed into the **world frame** (Go2
convention); two trajectories are emitted for comparison.

## Build

```sh
uv run python -m dimos.robot.unitree.go2.mcap.ingest \
data/go2_china_office_indoor.mcap --out data/go2_china_office_indoor.db --seconds 60
```
Drop `--seconds` for the full recording. `mcap` must be installed (`uv pip install mcap`).
The best-z trajectory defaults to `<mcap>_bestz.txt` (data dir, via data utils);
override with `--bestz`.

## Streams

| stream | type | content |
|---|---|---|
| `color_image` | Image | RGB, posed at `camera_optical` in world |
| `odom` | PoseStamped | Go2 leg-inertial odometry |
| `odom_bestz` | PoseStamped | leg xy/yaw + pitch-reconstructed z |
| `lidar` | PointCloud2 | per-scan world cloud (deskewed by `odom`) |
| `lidar_bestz` | PointCloud2 | per-scan world cloud (deskewed by `odom_bestz`) |
| `lidar_1s` / `lidar_bestz_1s` | PointCloud2 | 1 s accumulation per trajectory |

## Validate

```sh
dimos map summary data/go2_china_office_indoor.db
dimos map global data/go2_china_office_indoor.db --lidar lidar --voxel 0.1
dimos map global data/go2_china_office_indoor.db --lidar lidar_bestz --voxel 0.1 # z-corrected
dimos map replay data/go2_china_office_indoor.db --duration 60
```

Extrinsic (`extrinsics.py`): the L1 is mounted nearly upside-down; `EXT_R` is the
official `pitch=2.88` flip + `rotate_yaw_bias` (~-123°), ground-leveled.
`data/<mcap>_bestz.txt` is the reconstructed trajectory (column 6 = z).

## Full-length run (go2_china_office_indoor, 1157 s ≈ 19 min)

Ingest: `python -m ...ingest data/go2_china_office_indoor.mcap` (no `--seconds`)
→ **3 min 26 s**, `data/go2_china_office_indoor.db` ≈ **2.3 GB**.

```
anchor=1780066187180359363 odom poses=173616 span=1157.1s
wrote 34724 odom poses (x2)
wrote 17776 lidar scans (x2)
wrote 1158 1s accumulations (x2)
wrote 5703 color_image frames
```

`dimos map summary data/go2_china_office_indoor.db`:

```
Stream("color_image"): 5703 items, 2026-05-29 14:49:47 — 15:09:04 (1156.8s)
Stream("lidar"): 17776 items, 2026-05-29 14:49:47 — 15:09:04 (1157.0s)
Stream("lidar_1s"): 1158 items, 2026-05-29 14:49:47 — 15:09:04 (1157.0s)
Stream("lidar_bestz"): 17776 items, 2026-05-29 14:49:47 — 15:09:04 (1157.0s)
Stream("lidar_bestz_1s"): 1158 items, 2026-05-29 14:49:47 — 15:09:04 (1157.0s)
Stream("odom"): 34724 items, 2026-05-29 14:49:47 — 15:09:04 (1157.1s)
Stream("odom_bestz"): 34724 items, 2026-05-29 14:49:47 — 15:09:04 (1157.1s)
```

`dimos map global … --lidar lidar_bestz` reconstructs the z-corrected world map
(GPU voxel grid, pose-dedup) and writes an `.rrd`. Notes: odom is downsampled to
~30 Hz; a few onboard JPEG frames log a harmless "Corrupt JPEG data" warning and
are skipped.

# marker validation

for debugging just camera

`dimos map replay-marker go2_china_office_indoor --camera-info dimos/robot/unitree/go2/front_camera_1080.yaml --duration 30`

for global map that includes markers

`dimos map global go2_china_office_indoor --lidar lidar_bestz --voxel 0.1 --duration 60 --markers --camera-info dimos/robot/unitree/go2/front_camera_1080.yaml --image-pose odom_bestz`

for replay of global map creation

`dimos map replay go2_china_office_indoor --duration 100 --voxel 0.05 --map --cube --no-image`
5 changes: 5 additions & 0 deletions dimos/robot/unitree/go2/mcap/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright 2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License").

"""Go2 MCAP → memory2 SQLite dataset ingest (lidar / rgb / odom in world frame)."""
33 changes: 33 additions & 0 deletions dimos/robot/unitree/go2/mcap/extrinsics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License").

"""Go2 L1 lidar extrinsic (base_link <- lidar) and camera mount.

The L1 is mounted nearly upside-down: raw lidar +z points at the floor. The
official Unitree config (go2_l1_lidar.yaml) encodes this as base->lidar
``[0.28216, 0, -0.02467, roll=0, pitch=2.88, yaw=0]`` plus a separate
``rotate_yaw_bias`` (~-123 deg, calibrated to the front-leg position). EXT_R below
is that flip, leveled to the averaged ground normal over several stationary windows
(floor tilt ~1-2 deg, floor below the robot) and yawed so the map heading matches
the trajectory. Validated against the official "default imu reading"
[yaw -57.9, pitch -8.1, roll -167.3] (agrees to a few degrees).
"""

import numpy as np

# base_link <- lidar rotation (lidar points -> base frame: p_base = EXT_R @ p_lidar + EXT_T)
EXT_R = np.array(
[
[0.504486, -0.843018, 0.186588],
[-0.853668, -0.519391, -0.038544],
[0.129405, -0.139840, -0.981682],
],
dtype=np.float64,
)
EXT_T = np.array([0.28216, 0.0, -0.02467], dtype=np.float64)

# base_link -> camera_optical (from dimos GO2 connection BASE_TO_OPTICAL):
# translate 0.3m forward, then rotate into the optical frame.
CAM_T = np.array([0.30, 0.0, 0.0], dtype=np.float64)
CAM_Q = np.array([-0.5, 0.5, -0.5, 0.5], dtype=np.float64) # xyzw
Loading
Loading