diff --git a/dimos/agents/skills/speak_skill.py b/dimos/agents/skills/speak_skill.py index b46de157c4..027019a781 100644 --- a/dimos/agents/skills/speak_skill.py +++ b/dimos/agents/skills/speak_skill.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import threading import time @@ -28,6 +29,21 @@ logger = setup_logger() +def openai_api_key_set() -> str | None: + """Blueprint requirement check: SpeakSkill uses OpenAI text-to-speech, which needs + OPENAI_API_KEY. Returns None if set, else a clear message (mirrors ollama_installed). + Lets a missing key fail fast at blueprint build instead of crashing later in start().""" + if os.environ.get("OPENAI_API_KEY"): + return None + return ( + "OPENAI_API_KEY is not set. The agentic blueprint uses OpenAI for text-to-speech " + "(SpeakSkill), and by default for the LLM agent too.\n" + "\n" + " Set it with: export OPENAI_API_KEY=\n" + " Get a key at https://platform.openai.com/api-keys" + ) + + class SpeakSkill(Module): _tts_node: OpenAITTSNode | None = None _audio_output: SounddeviceAudioOutput | None = None diff --git a/dimos/robot/unitree/go2/blueprints/agentic/_common_agentic.py b/dimos/robot/unitree/go2/blueprints/agentic/_common_agentic.py index 93312225bc..c51ef77bd5 100644 --- a/dimos/robot/unitree/go2/blueprints/agentic/_common_agentic.py +++ b/dimos/robot/unitree/go2/blueprints/agentic/_common_agentic.py @@ -15,7 +15,7 @@ from dimos.agents.skills.navigation import NavigationSkillContainer from dimos.agents.skills.person_follow import PersonFollowSkillContainer -from dimos.agents.skills.speak_skill import SpeakSkill +from dimos.agents.skills.speak_skill import SpeakSkill, openai_api_key_set from dimos.agents.web_human_input import WebInput from dimos.core.coordination.blueprints import autoconnect from dimos.robot.unitree.go2.connection import GO2Connection @@ -27,6 +27,11 @@ UnitreeSkillContainer.blueprint(), WebInput.blueprint(), SpeakSkill.blueprint(), +).requirements( + # SpeakSkill (OpenAI TTS) needs OPENAI_API_KEY — fail fast at blueprint build with a + # clear message instead of an ExceptionGroup deep in start_all_modules. Mirrors the + # ollama_installed gate used by the -ollama variant. + openai_api_key_set, ) __all__ = ["_common_agentic"] diff --git a/docs/installation/osx.md b/docs/installation/osx.md index 8f190c1c5e..33e418be93 100644 --- a/docs/installation/osx.md +++ b/docs/installation/osx.md @@ -4,10 +4,12 @@ # install homebrew /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" # install dependencies -brew install gnu-sed gcc portaudio git-lfs libjpeg-turbo python pre-commit +brew install gnu-sed gcc portaudio git-lfs libjpeg-turbo python # install uv curl -LsSf https://astral.sh/uv/install.sh | sh && export PATH="$HOME/.local/bin:$PATH" +# NOTE: the `export` above only affects the current shell. Open a new terminal so `uv` is on +# PATH in later sessions — the installer adds it to your shell profile (e.g. ~/.zprofile). ``` ## Using DimOS as a library diff --git a/docs/installation/ubuntu.md b/docs/installation/ubuntu.md index 60c5b986fe..f9b8fc5f5f 100644 --- a/docs/installation/ubuntu.md +++ b/docs/installation/ubuntu.md @@ -2,10 +2,20 @@ ```sh skip sudo apt-get update -sudo apt-get install -y curl g++ portaudio19-dev git-lfs libturbojpeg python3-dev pre-commit +# Required system libraries. libturbojpeg + portaudio19-dev cover image/audio; libgl1 + +# libegl1 are the OpenGL runtime for open3d and rerun-sdk (both always-installed core deps). +# Without libgl1/libegl1 the visualizer fails at runtime with +# "libGL.so.1: cannot open shared object file" (e.g. on minimal/headless/Docker installs). +sudo apt-get install -y curl g++ portaudio19-dev git-lfs libturbojpeg libgl1 libegl1 python3-dev + +# optional: graphviz enables blueprint-graph visualization. Without it dimos logs +# "graphviz not found, skipping blueprint graph" at startup (everything else still works). +# sudo apt-get install -y graphviz # install uv curl -LsSf https://astral.sh/uv/install.sh | sh && export PATH="$HOME/.local/bin:$PATH" +# NOTE: the `export` above only affects the current shell. Open a new terminal (or run +# `source ~/.bashrc`) so `uv` is on PATH in later sessions — the installer also adds it there. ``` ## Using DimOS as a library diff --git a/docs/platforms/quadruped/go2/index.md b/docs/platforms/quadruped/go2/index.md index 4e392f06ca..f02f46756e 100644 --- a/docs/platforms/quadruped/go2/index.md +++ b/docs/platforms/quadruped/go2/index.md @@ -27,7 +27,7 @@ uv pip install 'dimos[base,unitree]' ```bash # Replay a recorded Go2 navigation session -# First run downloads ~2.4 GB of LiDAR/video data from LFS +# First run downloads ~85 MB from LFS (extracts to a ~220 MB SQLite replay DB) dimos --replay run unitree-go2 ``` @@ -110,6 +110,8 @@ From the command center ([localhost:7779](http://localhost:7779)): ```bash uv pip install 'dimos[base,unitree,sim]' +# First sim run also downloads assets: it git-clones mujoco_menagerie and pulls a +# ~60 MB mujoco_sim asset bundle from LFS (one-time; in addition to the replay data above). dimos --simulation run unitree-go2 ```