Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions autonomy/il/humanoid_act/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from humanoid_act.config import ACTConfig
from humanoid_act.dataset import ACTBatch, ACTChunkDataset, make_act_dataloaders
from humanoid_act.normalize import NormStats, load_or_compute_stats
from humanoid_act.eval import LocalCustomACTPolicy
from humanoid_act.policy import ACTPolicy

__all__ = [
"ACTBatch",
"ACTChunkDataset",
"ACTConfig",
"ACTPolicy",
"LocalCustomACTPolicy",
"NormStats",
"load_or_compute_stats",
"make_act_dataloaders",
]
46 changes: 46 additions & 0 deletions autonomy/il/humanoid_act/checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Save/load humanoid ACT checkpoints."""

from __future__ import annotations

from pathlib import Path

import torch

from humanoid_act.config import ACTConfig
from humanoid_act.normalize import NormStats, save_stats
from humanoid_act.policy import ACTPolicy


def save_checkpoint(
path: Path,
*,
policy: ACTPolicy,
config: ACTConfig,
stats: NormStats,
step: int,
val_loss: float | None = None,
) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"step": step,
"val_loss": val_loss,
"policy_state_dict": policy.state_dict(),
"optimizer_state_dict": policy.configure_optimizers().state_dict(),
}
torch.save(payload, path)
config.save(path.parent / "config.json")
save_stats(stats, path.parent / "stats.json")


def load_policy(checkpoint_path: Path, device: torch.device) -> tuple[ACTPolicy, ACTConfig, NormStats]:
from humanoid_act.normalize import load_stats

ckpt_dir = checkpoint_path.parent
config = ACTConfig.load(ckpt_dir / "config.json")
stats = load_stats(ckpt_dir / "stats.json")
policy = ACTPolicy(config)
payload = torch.load(checkpoint_path, map_location=device)
policy.load_state_dict(payload["policy_state_dict"])
policy.to(device)
policy.eval()
return policy, config, stats
62 changes: 62 additions & 0 deletions autonomy/il/humanoid_act/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Training / model configuration for humanoid ACT."""

from __future__ import annotations

import json
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any


@dataclass
class ACTConfig:
repo_id: str
state_dim: int
action_dim: int
camera_names: list[str]
chunk_size: int = 50
kl_weight: float = 10.0
lr: float = 1e-5
lr_backbone: float = 1e-5
weight_decay: float = 1e-4
backbone: str = "resnet18"
position_embedding: str = "sine"
hidden_dim: int = 512
dim_feedforward: int = 3200
enc_layers: int = 4
dec_layers: int = 7
nheads: int = 8
dropout: float = 0.1
seed: int = 42

@classmethod
def from_dataset_meta(
cls,
repo_id: str,
meta: Any,
*,
chunk_size: int = 50,
**overrides: Any,
) -> ACTConfig:
state_dim = int(meta.features["observation.state"]["shape"][0])
action_dim = int(meta.features["action"]["shape"][0])
camera_names = list(meta.camera_keys)
cfg = cls(
repo_id=repo_id,
state_dim=state_dim,
action_dim=action_dim,
camera_names=camera_names,
chunk_size=chunk_size,
)
for key, value in overrides.items():
if hasattr(cfg, key):
setattr(cfg, key, value)
return cfg

def save(self, path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(asdict(self), indent=2))

@classmethod
def load(cls, path: Path) -> ACTConfig:
return cls(**json.loads(path.read_text()))
202 changes: 202 additions & 0 deletions autonomy/il/humanoid_act/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""LeRobot dataset → ACT training batches (action chunks + padding mask)."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset

from humanoid_act.normalize import NormStats, STATE_KEY, ACTION_KEY


@dataclass
class ACTBatch:
images: torch.Tensor # (B, num_cams, C, H, W), float32 in [0, 1]
qpos: torch.Tensor # (B, state_dim)
actions: torch.Tensor # (B, chunk_size, action_dim)
is_pad: torch.Tensor # (B, chunk_size), True where padded


def _action_chunk_offsets(fps: float, chunk_size: int, camera_keys: list[str]) -> dict[str, list[float]]:
dt = 1.0 / fps
action_offsets = [i * dt for i in range(chunk_size)]
delta: dict[str, list[float]] = {
STATE_KEY: [0.0],
ACTION_KEY: action_offsets,
}
for cam in camera_keys:
delta[cam] = [0.0]
return delta


class ACTChunkDataset(Dataset):
"""
Random-frame ACT dataset backed by LeRobot.

Each sample:
- images at timestep t (all cameras)
- proprio at t
- action chunk [a_t, a_{t+1}, ...] padded to chunk_size
- is_pad mask for positions after episode end
"""

def __init__(
self,
repo_id: str,
*,
root: str | None = None,
episodes: list[int] | None = None,
chunk_size: int = 100,
camera_keys: list[str] | None = None,
stats: NormStats | None = None,
) -> None:
from lerobot.datasets.lerobot_dataset import LeRobotDataset

# Metadata-only load to discover cameras/fps before building deltas.
meta_ds = LeRobotDataset(repo_id, root=root, episodes=episodes)
self.camera_keys = camera_keys or list(meta_ds.meta.camera_keys)
if not self.camera_keys:
raise ValueError("No camera keys found in dataset metadata")

self.chunk_size = chunk_size
self.stats = stats
self.fps = float(meta_ds.meta.fps)

offsets = _action_chunk_offsets(self.fps, chunk_size, self.camera_keys)
self._ds = LeRobotDataset(
repo_id,
root=root,
episodes=episodes,
delta_timestamps=offsets,
)
selected = episodes if episodes is not None else list(range(self._ds.meta.total_episodes))
self._episode_lengths = {
ep: int(self._ds.meta.episodes[ep]["length"]) for ep in selected
}

def __len__(self) -> int:
return len(self._ds)

def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
sample = self._ds[idx]
ep = int(_tensor_scalar(sample["episode_index"]))
frame = int(_tensor_scalar(sample["frame_index"]))
ep_len = self._episode_lengths[ep]
steps_left = max(ep_len - frame, 0)
is_pad = torch.zeros(self.chunk_size, dtype=torch.bool)
if steps_left < self.chunk_size:
is_pad[steps_left:] = True

images = []
for cam in self.camera_keys:
img = _to_numpy(sample[cam]).astype(np.float32)
# LeRobot: (T, C, H, W) with T=1 when delta is [0]
if img.ndim == 4:
img = img[0]
images.append(img)
image_stack = np.stack(images, axis=0) / 255.0 # (num_cams, C, H, W)

qpos = _to_numpy(sample[STATE_KEY]).astype(np.float32).reshape(-1)
if qpos.ndim == 2:
qpos = qpos[0]

actions = _to_numpy(sample[ACTION_KEY]).astype(np.float32)

if self.stats is not None:
qpos = self.stats.normalize_state(qpos)
actions = self.stats.normalize_action(actions)

return {
"images": torch.from_numpy(image_stack),
"qpos": torch.from_numpy(qpos),
"actions": torch.from_numpy(actions),
"is_pad": is_pad,
}


def collate_act_batch(items: list[dict[str, torch.Tensor]]) -> ACTBatch:
return ACTBatch(
images=torch.stack([x["images"] for x in items], dim=0),
qpos=torch.stack([x["qpos"] for x in items], dim=0),
actions=torch.stack([x["actions"] for x in items], dim=0),
is_pad=torch.stack([x["is_pad"] for x in items], dim=0),
)


def split_episodes(num_episodes: int, train_ratio: float = 0.8, seed: int = 42) -> tuple[list[int], list[int]]:
rng = np.random.default_rng(seed)
indices = rng.permutation(num_episodes).tolist()
split = int(train_ratio * num_episodes)
return indices[:split], indices[split:]


def make_act_dataloaders(
repo_id: str,
*,
root: str | None = None,
chunk_size: int = 100,
batch_size: int = 8,
train_ratio: float = 0.8,
seed: int = 42,
stats: NormStats | None = None,
num_workers: int = 0,
) -> tuple[DataLoader, DataLoader, NormStats]:
from lerobot.datasets.lerobot_dataset import LeRobotDataset

probe = LeRobotDataset(repo_id, root=root)
train_eps, val_eps = split_episodes(probe.num_episodes, train_ratio=train_ratio, seed=seed)

if stats is None:
from humanoid_act.normalize import load_or_compute_stats

stats = load_or_compute_stats(
LeRobotDataset(repo_id, root=root, episodes=train_eps),
)

train_ds = ACTChunkDataset(
repo_id,
root=root,
episodes=train_eps,
chunk_size=chunk_size,
stats=stats,
)
val_ds = ACTChunkDataset(
repo_id,
root=root,
episodes=val_eps,
chunk_size=chunk_size,
stats=stats,
)

train_loader = DataLoader(
train_ds,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
collate_fn=collate_act_batch,
pin_memory=torch.cuda.is_available(),
)
val_loader = DataLoader(
val_ds,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
collate_fn=collate_act_batch,
pin_memory=torch.cuda.is_available(),
)
return train_loader, val_loader, stats


def _tensor_scalar(value: Any) -> int | float:
if isinstance(value, torch.Tensor):
return value.item()
return value


def _to_numpy(value: Any) -> np.ndarray:
if isinstance(value, torch.Tensor):
return value.detach().cpu().numpy()
return np.asarray(value)
5 changes: 5 additions & 0 deletions autonomy/il/humanoid_act/detr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Vendored ACT transformer backbone (from tonyzhaozh/act)."""

from humanoid_act.detr.build import build_act_model_and_optimizer

__all__ = ["build_act_model_and_optimizer"]
48 changes: 48 additions & 0 deletions autonomy/il/humanoid_act/detr/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Build ACT model + optimizer without argparse side effects."""

from __future__ import annotations

from types import SimpleNamespace

import torch

from humanoid_act.detr.models import build_ACT_model


def build_act_model_and_optimizer(config) -> tuple[torch.nn.Module, torch.optim.Optimizer]:
args = SimpleNamespace(
lr=config.lr,
lr_backbone=config.lr_backbone,
weight_decay=config.weight_decay,
backbone=config.backbone,
dilation=False,
position_embedding=config.position_embedding,
camera_names=list(config.camera_names),
enc_layers=config.enc_layers,
dec_layers=config.dec_layers,
dim_feedforward=config.dim_feedforward,
hidden_dim=config.hidden_dim,
dropout=config.dropout,
nheads=config.nheads,
num_queries=config.chunk_size,
pre_norm=False,
masks=False,
state_dim=config.state_dim,
)

model = build_ACT_model(args)
param_dicts = [
{
"params": [
p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad
]
},
{
"params": [
p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad
],
"lr": args.lr_backbone,
},
]
optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, weight_decay=args.weight_decay)
return model, optimizer
9 changes: 9 additions & 0 deletions autonomy/il/humanoid_act/detr/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .detr_vae import build, build_cnnmlp


def build_ACT_model(args):
return build(args)


def build_CNNMLP_model(args):
return build_cnnmlp(args)
Loading
Loading