Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/rust-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jobs:
os:
- ubuntu-latest
- macos-latest
- windows-latest

steps:
- name: Check out repository
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ including:
* [`openai_oauth`](./examples/openai_oauth.rs): OpenAI OAuth-backed provider
setup.

The builtin runtime shell uses `/bin/sh` on Unix hosts and `cmd.exe` on
Windows hosts. The OpenAI OAuth example keeps `PersistentTokenStoreKind::Auto`
platform-native as well: macOS uses Keychain, while Windows and Linux use the
file-backed store.

## Getting Started

If you want to explore the workspace after cloning the repository, the quickest
Expand Down
3 changes: 3 additions & 0 deletions mentra/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ libc = "0.2"
regex = "1.12.2"
rand = { version = "0.9.2", optional = true }
ring = { version = "0.17.14", optional = true }

[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0.61.2", features = ["Win32_Foundation", "Win32_System_Threading"] }
3 changes: 3 additions & 0 deletions mentra/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,14 @@ Mentra's builtin runtime tools are available by default, but command execution i
- foreground shell execution is disabled by default
- background command execution is disabled by default
- `RuntimePolicy::permissive()` enables both shell and background command execution
- builtin shell commands run through `/bin/sh -c` on Unix and `cmd.exe /C` on Windows
- runtime policy still enforces hard limits such as working-directory roots, file read/write roots, allowed environment variables, timeouts, output caps, and background task limits
- semantic review is opt-in through `RuntimeBuilder::with_tool_authorizer(...)`

Use the default policy when you want a safer runtime surface, and opt into `RuntimePolicy::permissive()` only when you are intentionally building a coding-agent or automation workflow that should be able to act on the local workspace.

If you need different command semantics, such as PowerShell on Windows or a sandboxed executor, replace the default local executor with `RuntimeBuilder::with_executor(...)`.

## Tool Authorization

Mentra can run a caller-provided authorization pass before any tool executes. This is the recommended integration point for LLM-based security review, human approval, or custom policy engines.
Expand Down
12 changes: 9 additions & 3 deletions mentra/src/agent/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,17 @@ mod tests {

use crate::provider::{ReasoningEffort, ReasoningOptions};

fn test_path(label: &str) -> PathBuf {
std::env::temp_dir()
.join("mentra-agent-config-tests")
.join(label)
}

#[test]
fn explicit_paths_override_defaults() {
let tasks_dir = PathBuf::from("/tmp/custom-tasks");
let team_dir = PathBuf::from("/tmp/custom-team");
let transcript_dir = PathBuf::from("/tmp/custom-transcripts");
let tasks_dir = test_path("custom-tasks");
let team_dir = test_path("custom-team");
let transcript_dir = test_path("custom-transcripts");

let config = AgentConfig {
task: TaskConfig {
Expand Down
81 changes: 56 additions & 25 deletions mentra/src/agent/tests/runtime_snapshot.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
use tokio::sync::watch;
use std::{
sync::atomic::{AtomicU64, Ordering},
time::{SystemTime, UNIX_EPOCH},
};

use tokio::{
sync::watch,
time::{Duration, timeout},
};

use crate::{
BackgroundTaskStatus, BuiltinProvider, ContentBlock, Role,
agent::{AgentSnapshot, AgentStatus},
provider::{ContentBlockDelta, ContentBlockStart, ProviderEvent},
runtime::{Runtime, RuntimePolicy},
runtime::{Runtime, RuntimePolicy, SqliteRuntimeStore},
};

use super::support::{ScriptedProvider, controlled_stream, model_info, ok_stream};
use super::support::{
ScriptedProvider, background_success_command, command_input_json, controlled_stream,
model_info, ok_stream,
};

#[tokio::test]
async fn snapshot_progresses_during_streaming() {
Expand Down Expand Up @@ -85,6 +96,7 @@ async fn snapshot_progresses_during_streaming() {

#[tokio::test]
async fn snapshot_updates_when_background_task_finishes() {
let command = background_success_command("bg-done", 50);
let model = model_info("model", BuiltinProvider::Anthropic);
let provider = ScriptedProvider::new(
BuiltinProvider::Anthropic,
Expand All @@ -105,9 +117,7 @@ async fn snapshot_updates_when_background_task_finishes() {
},
ProviderEvent::ContentBlockDelta {
index: 0,
delta: ContentBlockDelta::ToolUseInputJson(
r#"{"command":"sleep 0.05; printf bg-done"}"#.to_string(),
),
delta: ContentBlockDelta::ToolUseInputJson(command_input_json(&command)),
},
ProviderEvent::ContentBlockStopped { index: 0 },
ProviderEvent::MessageStopped,
Expand All @@ -133,6 +143,7 @@ async fn snapshot_updates_when_background_task_finishes() {
);

let runtime = Runtime::builder()
.with_store(temp_store("snapshot-background-finish"))
.with_policy(RuntimePolicy::permissive())
.with_provider_instance(provider)
.build()
Expand All @@ -147,39 +158,59 @@ async fn snapshot_updates_when_background_task_finishes() {
.await
.unwrap();

wait_for_background_status(&mut snapshot, BackgroundTaskStatus::Running).await;
wait_for_background_status(&mut snapshot, BackgroundTaskStatus::Finished).await;
assert_eq!(snapshot.borrow().background_tasks.len(), 1);
assert_eq!(
assert!(
snapshot.borrow().background_tasks[0]
.output_preview
.as_deref(),
Some("bg-done")
.as_deref()
.is_some_and(|preview| preview.contains("bg-done"))
);
}

static NEXT_TEMP_ID: AtomicU64 = AtomicU64::new(1);

fn temp_store(label: &str) -> SqliteRuntimeStore {
let unique = NEXT_TEMP_ID.fetch_add(1, Ordering::Relaxed);
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system time")
.as_nanos();
SqliteRuntimeStore::new(std::env::temp_dir().join(format!(
"mentra-runtime-store-{label}-{timestamp}-{unique}.sqlite"
)))
}

async fn wait_for_status(receiver: &mut watch::Receiver<AgentSnapshot>, status: AgentStatus) {
loop {
if receiver.borrow().status == status {
return;
timeout(Duration::from_secs(90), async {
loop {
if receiver.borrow().status == status {
return;
}
receiver.changed().await.unwrap();
}
receiver.changed().await.unwrap();
}
})
.await
.unwrap_or_else(|_| panic!("timed out waiting for agent status {status:?}"));
}

async fn wait_for_background_status(
receiver: &mut watch::Receiver<AgentSnapshot>,
status: BackgroundTaskStatus,
) {
loop {
if receiver
.borrow()
.background_tasks
.iter()
.any(|task| task.status == status)
{
return;
timeout(Duration::from_secs(90), async {
loop {
if receiver
.borrow()
.background_tasks
.iter()
.any(|task| task.status == status)
{
return;
}
receiver.changed().await.unwrap();
}
receiver.changed().await.unwrap();
}
})
.await
.unwrap_or_else(|_| panic!("timed out waiting for background status {status:?}"));
}
Loading
Loading