Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@ htmlcov/

# Memory2 autorecord
recording*.db

# DogOps local run state and hardware logs
/.dogops/
46 changes: 40 additions & 6 deletions dimos/agents/mcp/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import json
import os
import time
from typing import TYPE_CHECKING, Any
from typing import Any

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
Expand All @@ -31,13 +31,10 @@
from dimos.agents.annotation import skill
from dimos.agents.mcp import tool_stream
from dimos.core.core import rpc
from dimos.core.module import Module
from dimos.core.module import Module, SkillInfo
from dimos.core.rpc_client import RpcCall, RPCClient
from dimos.utils.logging_config import setup_logger

if TYPE_CHECKING:
from dimos.core.module import SkillInfo

logger = setup_logger()


Expand Down Expand Up @@ -280,7 +277,7 @@ def on_system_modules(self, modules: list[RPCClient]) -> None:
# TODO: this is a bit hacky, also not thread-safe
assert self.rpc is not None
app.state.skills = [
skill_info for module in modules for skill_info in (module.get_skills() or [])
skill_info for module in modules for skill_info in _module_class_skills(module)
]
app.state.rpc_calls = {
skill_info.func_name: RpcCall(
Expand Down Expand Up @@ -342,3 +339,40 @@ def _start_server(self, port: int | None = None) -> None:
loop = self._loop
assert loop is not None
self._serve_future = asyncio.run_coroutine_threadsafe(server.serve(), loop)


def _module_class_skills(module: RPCClient) -> list[SkillInfo]:
"""Return skill metadata without round-tripping to every remote module.

Startup discovery runs inside the MCP server RPC call. Calling each
module's remote get_skills RPC from there can block module startup long
enough that MCP comes up with no tools. The proxy already carries the
module class, and skill schemas are static, so deriving metadata locally is
enough for discovery while tool execution still uses RPC.
"""
from langchain_core.tools import tool

actor_class = getattr(module, "actor_class", None)
if actor_class is None:
return []

skills: list[SkillInfo] = []
for name in dir(actor_class):
attr = getattr(actor_class, name, None)
if not callable(attr) or not hasattr(attr, "__skill__"):
continue
schema = tool(attr).args_schema.model_json_schema()
properties = schema.get("properties")
if isinstance(properties, dict):
properties.pop("self", None)
required = schema.get("required")
if isinstance(required, list) and "self" in required:
schema["required"] = [item for item in required if item != "self"]
skills.append(
SkillInfo(
class_name=actor_class.__name__,
func_name=name,
args_schema=json.dumps(schema),
)
)
return skills
Comment on lines +360 to +378
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 dir() iteration may expose inherited / unrelated __skill__-marked attributes

dir(actor_class) walks the entire MRO — including all Module base-class methods and any mixin that happens to carry __skill__. If a base class ever gains a @skill-decorated method, it will appear as a skill for every deployed module rather than just the one that defines it, inflating the tool list and potentially registering a duplicate func_name key in app.state.rpc_calls. Consider restricting to vars(actor_class) to capture only the methods defined directly on the concrete class.

6 changes: 6 additions & 0 deletions dimos/experimental/dogops/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""DogOps SiteOps Agent offline core."""

from dimos.experimental.dogops.config_loader import load_dogops_config
from dimos.experimental.dogops.mission_engine import run_offline_simulation

__all__ = ["load_dogops_config", "run_offline_simulation"]
48 changes: 48 additions & 0 deletions dimos/experimental/dogops/blueprints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import annotations

from dataclasses import dataclass

from dimos.experimental.dogops.dashboard import DogOpsDashboardModule
from dimos.experimental.dogops.nav_eval import DogOpsNavEvalModule
from dimos.experimental.dogops.observation_module import DogOpsObservationModule
from dimos.experimental.dogops.skills import DogOpsSkillContainer


@dataclass(frozen=True)
class DogOpsBlueprintMetadata:
name: str
modules: tuple[str, ...]
robot_model: str = "unitree_go2"
requires_mcp_client: bool = False
fallback: bool = True


def build_unitree_go2_dogops_blueprint() -> object:
try: # pragma: no cover - exercised only inside a full DimOS checkout.
from dimos.agents.mcp.mcp_server import McpServer
from dimos.core.coordination.blueprints import autoconnect
from dimos.robot.unitree.go2.blueprints.smart.unitree_go2 import unitree_go2_markers
except ModuleNotFoundError:
return DogOpsBlueprintMetadata(
name="unitree-go2-dogops",
modules=(
"unitree_go2_markers",
"DogOpsObservationModule",
"DogOpsSkillContainer",
"McpServer",
"DogOpsDashboardModule",
"DogOpsNavEvalModule",
),
)

return autoconnect(
unitree_go2_markers,
DogOpsObservationModule.blueprint(),
DogOpsSkillContainer.blueprint(),
DogOpsDashboardModule.blueprint(),
DogOpsNavEvalModule.blueprint(),
McpServer.blueprint(),
).global_config(n_workers=12, robot_model="unitree_go2")


unitree_go2_dogops = build_unitree_go2_dogops_blueprint()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Module-level blueprint construction triggers imports at list time

build_unitree_go2_dogops_blueprint() is called at module-import time, which means importing dimos.experimental.dogops.blueprints (e.g. when dimos list walks all_blueprints) immediately executes all the try import blocks, including dimos.agents.mcp.mcp_server and dimos.core.coordination.blueprints. This is the same pattern that the MCP change in this PR is trying to fix for deadlocks. The preferred DimOS pattern is a lazy factory function.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

96 changes: 96 additions & 0 deletions dimos/experimental/dogops/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from __future__ import annotations

import argparse
from pathlib import Path

from dimos.experimental.dogops.config_loader import (
DEFAULT_MANIFEST,
DEFAULT_MISSION,
DEFAULT_POLICY,
DEFAULT_SITE,
load_dogops_config,
)
from dimos.experimental.dogops.dashboard import serve_dashboard
from dimos.experimental.dogops.mission_engine import run_offline_simulation
from dimos.experimental.dogops.report import render_report_markdown
from dimos.experimental.dogops.store import DogOpsStore


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="dogops", description="DogOps offline CLI")
subparsers = parser.add_subparsers(dest="command", required=True)

validate = subparsers.add_parser("validate", help="Validate DogOps YAML configs")
_add_config_args(validate)

simulate = subparsers.add_parser("simulate", help="Run the offline DogOps mission")
_add_config_args(simulate)
simulate.add_argument("--out", default=".dogops/runs/latest")

report = subparsers.add_parser("report", help="Regenerate a report from a run directory")
report.add_argument("--run", default=".dogops/runs/latest")
report.add_argument("--out", default=None)

serve = subparsers.add_parser("serve", help="Serve a local dashboard for a run directory")
serve.add_argument("--run", default=".dogops/runs/latest")
serve.add_argument("--host", default="127.0.0.1")
serve.add_argument("--port", type=int, default=8765)

return parser


def _add_config_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument("--site", default=str(DEFAULT_SITE))
parser.add_argument("--manifest", default=str(DEFAULT_MANIFEST))
parser.add_argument("--mission", default=str(DEFAULT_MISSION))
parser.add_argument("--policy", default=str(DEFAULT_POLICY))


def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)

if args.command == "validate":
config = load_dogops_config(args.site, args.manifest, args.mission, args.policy)
print(
"validated "
f"site={config.site.site_id} "
f"manifest={config.manifest.manifest_id} "
f"mission={config.mission.mission_id}"
)
return 0

if args.command == "simulate":
state = run_offline_simulation(
site=args.site,
manifest=args.manifest,
mission=args.mission,
policy=args.policy,
out=args.out,
)
print(f"run_id={state.run.id}")
print(f"state={getattr(state.run.state, 'value', state.run.state)}")
print(f"report={Path(args.out) / 'report.md'}")
return 0

if args.command == "report":
store = DogOpsStore.load_existing(args.run)
state = store.state
assert state is not None
content = render_report_markdown(state)
out_path = Path(args.out) if args.out else Path(args.run) / "report.md"
out_path.write_text(content, encoding="utf-8")
store.write_report(state.run.id)
print(f"report={out_path}")
return 0

if args.command == "serve":
serve_dashboard(args.run, host=args.host, port=args.port)
return 0

parser.error(f"unknown command: {args.command}")
return 2


if __name__ == "__main__":
raise SystemExit(main())
69 changes: 69 additions & 0 deletions dimos/experimental/dogops/config_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

from pathlib import Path
from typing import Any

import yaml

from dimos.experimental.dogops.models import (
DogOpsConfig,
Manifest,
MissionConfig,
PolicyConfig,
SiteConfig,
)

DEFAULT_SITE = Path("examples/dogops/site_demo.yaml")
DEFAULT_MANIFEST = Path("examples/dogops/manifest_demo.yaml")
DEFAULT_POLICY = Path("examples/dogops/policy_demo.yaml")
DEFAULT_MISSION = Path("examples/dogops/mission_demo.yaml")


def _read_yaml(path: Path) -> dict[str, Any]:
if not path.exists():
raise FileNotFoundError(f"DogOps config not found: {path}")
with path.open("r", encoding="utf-8") as handle:
data = yaml.safe_load(handle) or {}
if not isinstance(data, dict):
raise ValueError(f"DogOps config must be a mapping: {path}")
return data


def load_site_config(path: str | Path = DEFAULT_SITE) -> SiteConfig:
return SiteConfig.model_validate(_read_yaml(Path(path)))


def load_manifest(path: str | Path = DEFAULT_MANIFEST) -> Manifest:
return Manifest.model_validate(_read_yaml(Path(path)))


def load_policy(path: str | Path = DEFAULT_POLICY) -> PolicyConfig:
return PolicyConfig.model_validate(_read_yaml(Path(path)))


def load_mission(path: str | Path = DEFAULT_MISSION) -> MissionConfig:
return MissionConfig.model_validate(_read_yaml(Path(path)))


def load_dogops_config(
site_path: str | Path = DEFAULT_SITE,
manifest_path: str | Path = DEFAULT_MANIFEST,
mission_path: str | Path = DEFAULT_MISSION,
policy_path: str | Path = DEFAULT_POLICY,
) -> DogOpsConfig:
site = load_site_config(site_path)
manifest = load_manifest(manifest_path)
policy = load_policy(policy_path)
mission = load_mission(mission_path)
return DogOpsConfig(
site=site,
manifest=manifest,
policy=policy,
mission=mission,
paths={
"site": Path(site_path),
"manifest": Path(manifest_path),
"policy": Path(policy_path),
"mission": Path(mission_path),
},
)
8 changes: 8 additions & 0 deletions dimos/experimental/dogops/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pytest

from dimos.experimental.dogops.config_loader import load_site_config


@pytest.fixture
def dogops_site():
return load_site_config()
Loading