oops-rs · WendellXY · Mar 26, 2026 · Mar 26, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
@@ -24,6 +24,7 @@ jobs:
         os:
           - ubuntu-latest
           - macos-latest
+          - windows-latest
 
     steps:
       - name: Check out repository

diff --git a/README.md b/README.md
@@ -106,6 +106,11 @@ including:
 * [`openai_oauth`](./examples/openai_oauth.rs): OpenAI OAuth-backed provider
   setup.
 
+The builtin runtime shell uses `/bin/sh` on Unix hosts and `cmd.exe` on
+Windows hosts. The OpenAI OAuth example keeps `PersistentTokenStoreKind::Auto`
+platform-native as well: macOS uses Keychain, while Windows and Linux use the
+file-backed store.
+
 ## Getting Started
 
 If you want to explore the workspace after cloning the repository, the quickest

diff --git a/mentra/Cargo.toml b/mentra/Cargo.toml
@@ -41,3 +41,6 @@ libc = "0.2"
 regex = "1.12.2"
 rand = { version = "0.9.2", optional = true }
 ring = { version = "0.17.14", optional = true }
+
+[target.'cfg(windows)'.dependencies]
+windows-sys = { version = "0.61.2", features = ["Win32_Foundation", "Win32_System_Threading"] }
diff --git a/mentra/README.md b/mentra/README.md
@@ -143,11 +143,14 @@ Mentra's builtin runtime tools are available by default, but command execution i
 - foreground shell execution is disabled by default
 - background command execution is disabled by default
 - `RuntimePolicy::permissive()` enables both shell and background command execution
+- builtin shell commands run through `/bin/sh -c` on Unix and `cmd.exe /C` on Windows
 - runtime policy still enforces hard limits such as working-directory roots, file read/write roots, allowed environment variables, timeouts, output caps, and background task limits
 - semantic review is opt-in through `RuntimeBuilder::with_tool_authorizer(...)`
 
 Use the default policy when you want a safer runtime surface, and opt into `RuntimePolicy::permissive()` only when you are intentionally building a coding-agent or automation workflow that should be able to act on the local workspace.
 
+If you need different command semantics, such as PowerShell on Windows or a sandboxed executor, replace the default local executor with `RuntimeBuilder::with_executor(...)`.
+
 ## Tool Authorization
 
 Mentra can run a caller-provided authorization pass before any tool executes. This is the recommended integration point for LLM-based security review, human approval, or custom policy engines.

diff --git a/mentra/src/agent/config.rs b/mentra/src/agent/config.rs
@@ -262,11 +262,17 @@ mod tests {
 
     use crate::provider::{ReasoningEffort, ReasoningOptions};
 
+    fn test_path(label: &str) -> PathBuf {
+        std::env::temp_dir()
+            .join("mentra-agent-config-tests")
+            .join(label)
+    }
+
     #[test]
     fn explicit_paths_override_defaults() {
-        let tasks_dir = PathBuf::from("/tmp/custom-tasks");
-        let team_dir = PathBuf::from("/tmp/custom-team");
-        let transcript_dir = PathBuf::from("/tmp/custom-transcripts");
+        let tasks_dir = test_path("custom-tasks");
+        let team_dir = test_path("custom-team");
+        let transcript_dir = test_path("custom-transcripts");
 
         let config = AgentConfig {
             task: TaskConfig {

diff --git a/mentra/src/agent/tests/runtime_snapshot.rs b/mentra/src/agent/tests/runtime_snapshot.rs
@@ -1,13 +1,24 @@
-use tokio::sync::watch;
+use std::{
+    sync::atomic::{AtomicU64, Ordering},
+    time::{SystemTime, UNIX_EPOCH},
+};
+
+use tokio::{
+    sync::watch,
+    time::{Duration, timeout},
+};
 
 use crate::{
     BackgroundTaskStatus, BuiltinProvider, ContentBlock, Role,
     agent::{AgentSnapshot, AgentStatus},
     provider::{ContentBlockDelta, ContentBlockStart, ProviderEvent},
-    runtime::{Runtime, RuntimePolicy},
+    runtime::{Runtime, RuntimePolicy, SqliteRuntimeStore},
 };
 
-use super::support::{ScriptedProvider, controlled_stream, model_info, ok_stream};
+use super::support::{
+    ScriptedProvider, background_success_command, command_input_json, controlled_stream,
+    model_info, ok_stream,
+};
 
 #[tokio::test]
 async fn snapshot_progresses_during_streaming() {
@@ -85,6 +96,7 @@ async fn snapshot_progresses_during_streaming() {
 
 #[tokio::test]
 async fn snapshot_updates_when_background_task_finishes() {
+    let command = background_success_command("bg-done", 50);
     let model = model_info("model", BuiltinProvider::Anthropic);
     let provider = ScriptedProvider::new(
         BuiltinProvider::Anthropic,
@@ -105,9 +117,7 @@ async fn snapshot_updates_when_background_task_finishes() {
                 },
                 ProviderEvent::ContentBlockDelta {
                     index: 0,
-                    delta: ContentBlockDelta::ToolUseInputJson(
-                        r#"{"command":"sleep 0.05; printf bg-done"}"#.to_string(),
-                    ),
+                    delta: ContentBlockDelta::ToolUseInputJson(command_input_json(&command)),
                 },
                 ProviderEvent::ContentBlockStopped { index: 0 },
                 ProviderEvent::MessageStopped,
@@ -133,6 +143,7 @@ async fn snapshot_updates_when_background_task_finishes() {
     );
 
     let runtime = Runtime::builder()
+        .with_store(temp_store("snapshot-background-finish"))
         .with_policy(RuntimePolicy::permissive())
         .with_provider_instance(provider)
         .build()
@@ -147,39 +158,59 @@ async fn snapshot_updates_when_background_task_finishes() {
         .await
         .unwrap();
 
-    wait_for_background_status(&mut snapshot, BackgroundTaskStatus::Running).await;
     wait_for_background_status(&mut snapshot, BackgroundTaskStatus::Finished).await;
     assert_eq!(snapshot.borrow().background_tasks.len(), 1);
-    assert_eq!(
+    assert!(
         snapshot.borrow().background_tasks[0]
             .output_preview
-            .as_deref(),
-        Some("bg-done")
+            .as_deref()
+            .is_some_and(|preview| preview.contains("bg-done"))
     );
 }
 
+static NEXT_TEMP_ID: AtomicU64 = AtomicU64::new(1);
+
+fn temp_store(label: &str) -> SqliteRuntimeStore {
+    let unique = NEXT_TEMP_ID.fetch_add(1, Ordering::Relaxed);
+    let timestamp = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .expect("system time")
+        .as_nanos();
+    SqliteRuntimeStore::new(std::env::temp_dir().join(format!(
+        "mentra-runtime-store-{label}-{timestamp}-{unique}.sqlite"
+    )))
+}
+
 async fn wait_for_status(receiver: &mut watch::Receiver<AgentSnapshot>, status: AgentStatus) {
-    loop {
-        if receiver.borrow().status == status {
-            return;
+    timeout(Duration::from_secs(90), async {
+        loop {
+            if receiver.borrow().status == status {
+                return;
+            }
+            receiver.changed().await.unwrap();
         }
-        receiver.changed().await.unwrap();
-    }
+    })
+    .await
+    .unwrap_or_else(|_| panic!("timed out waiting for agent status {status:?}"));
 }
 
 async fn wait_for_background_status(
     receiver: &mut watch::Receiver<AgentSnapshot>,
     status: BackgroundTaskStatus,
 ) {
-    loop {
-        if receiver
-            .borrow()
-            .background_tasks
-            .iter()
-            .any(|task| task.status == status)
-        {
-            return;
+    timeout(Duration::from_secs(90), async {
+        loop {
+            if receiver
+                .borrow()
+                .background_tasks
+                .iter()
+                .any(|task| task.status == status)
+            {
+                return;
+            }
+            receiver.changed().await.unwrap();
         }
-        receiver.changed().await.unwrap();
-    }
+    })
+    .await
+    .unwrap_or_else(|_| panic!("timed out waiting for background status {status:?}"));
 }