synthetic-sciences · Aayam Bansal (aayambansal) · Jul 5, 2026 · Jul 5, 2026 · Jul 5, 2026 · Jul 5, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -23,6 +23,15 @@ jobs:
       - uses: ./.github/actions/setup-bun
       - run: bun run typecheck
 
+  format:
+    name: Format
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
+      - uses: ./.github/actions/setup-bun
+      - run: bun run format:check
+
   test:
     name: Test
     runs-on: ubuntu-latest

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -162,4 +162,3 @@ jobs:
               environment_url: `https://www.npmjs.com/package/@synsci/openscience/v/${version}`,
               description: `Published @synsci/openscience@${version}`,
             })
-
diff --git a/.prettierignore b/.prettierignore
@@ -1 +1,14 @@
-sst-env.d.ts
+sst-env.d.ts
+.claude/
+# generated output
+tooling/sdk/js/src/gen/
+tooling/sdk/js/src/v2/gen/
+tooling/sdk/openapi.json
+backend/cli/src/provider/models-snapshot.ts
+# build output
+dist/
+# prettier's mdx parser is deprecated and can mangle JSX-in-markdown
+*.mdx
+# curated skill content consumed verbatim by agents
+backend/cli/skills/
+.astro/
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -52,36 +52,36 @@ User request with agent name (e.g., "research")
 
 ### Session prompts (`src/session/prompt/`) (6 provider + 4 utility)
 
-| File | Purpose |
-|------|---------|
-| `anthropic.txt` | Claude models |
-| `beast.txt` | GPT-4o / o1 / o3 |
-| `codex_header.txt` | GPT-5 / Codex |
-| `gemini.txt` | Gemini models |
-| `qwen.txt` | Qwen / fallback |
-| `copilot-gpt-5.txt` | Copilot GPT-5 |
-| `plan.txt`, `plan-reminder-anthropic.txt` | Plan mode |
-| `build-switch.txt`, `max-steps.txt` | Utility |
+| File                                      | Purpose          |
+| ----------------------------------------- | ---------------- |
+| `anthropic.txt`                           | Claude models    |
+| `beast.txt`                               | GPT-4o / o1 / o3 |
+| `codex_header.txt`                        | GPT-5 / Codex    |
+| `gemini.txt`                              | Gemini models    |
+| `qwen.txt`                                | Qwen / fallback  |
+| `copilot-gpt-5.txt`                       | Copilot GPT-5    |
+| `plan.txt`, `plan-reminder-anthropic.txt` | Plan mode        |
+| `build-switch.txt`, `max-steps.txt`       | Utility          |
 
 Routing logic: `src/session/system.ts` → `SystemPrompt.provider(model)`.
 
 ### Agent prompts (`src/agent/prompt/`)
 
-| File | Agent(s) |
-|------|----------|
-| `research.txt` | `research` (default harness) |
-| `biology.txt` | `biology` (specialist) |
-| `physics.txt` | `physics` (specialist) |
-| `ml.txt` | `ml` (specialist) |
-| `physics-critique.txt` | `physics-critique` (subagent) |
-| `critique.txt` | `critique` (subagent) |
-| `reviewer.txt` | `reviewer` (subagent) |
-| `literature-review.txt` | `literature-review` (subagent) |
-| `write.txt` | `write` (subagent) |
-| `explore.txt` | `explore` (subagent) |
-| `plan.txt` | `plan` (mode, in `src/session/prompt/`) |
-| `compaction.txt` | `compaction` (system) |
-| `title.txt` | `title` (system) |
+| File                    | Agent(s)                                |
+| ----------------------- | --------------------------------------- |
+| `research.txt`          | `research` (default harness)            |
+| `biology.txt`           | `biology` (specialist)                  |
+| `physics.txt`           | `physics` (specialist)                  |
+| `ml.txt`                | `ml` (specialist)                       |
+| `physics-critique.txt`  | `physics-critique` (subagent)           |
+| `critique.txt`          | `critique` (subagent)                   |
+| `reviewer.txt`          | `reviewer` (subagent)                   |
+| `literature-review.txt` | `literature-review` (subagent)          |
+| `write.txt`             | `write` (subagent)                      |
+| `explore.txt`           | `explore` (subagent)                    |
+| `plan.txt`              | `plan` (mode, in `src/session/prompt/`) |
+| `compaction.txt`        | `compaction` (system)                   |
+| `title.txt`             | `title` (system)                        |
 
 Routing logic: `src/session/prompt.ts` injects agent workflow prompts by agent name (an if-chain in `insertReminders`).
 
@@ -107,14 +107,14 @@ Custom agents can be added via config file (`openscience.json` → `agent` key).
 
 ### Common failure patterns:
 
-| Symptom | Likely cause | Where to look |
-|---------|-------------|---------------|
-| Agent ignores skills | Skill catalog missing/truncated in prompt | `src/agent/prompt/{agent}.txt`, check toolkit section |
-| Wrong model used | Agent/model config incorrect | `src/agent/agent.ts` + `openscience.json` `agent` config |
-| Agent skips stages | Stage gates not mandatory in prompt | `src/agent/prompt/{agent}.txt`, check BLOCKING vs advisory language |
-| Critique not triggered | Critique is advisory, not mandatory | `src/agent/prompt/critique.txt` + parent prompt's critique section |
-| Sub-agent returns empty | Context window exhaustion or bad prompt | `src/agent/agent.ts`, check subagent's `steps` limit |
-| Custom agent not appearing | Config not in `openscience.json` or wrong `mode` | Config file `agent` key → `src/agent/agent.ts` |
+| Symptom                    | Likely cause                                     | Where to look                                                       |
+| -------------------------- | ------------------------------------------------ | ------------------------------------------------------------------- |
+| Agent ignores skills       | Skill catalog missing/truncated in prompt        | `src/agent/prompt/{agent}.txt`, check toolkit section               |
+| Wrong model used           | Agent/model config incorrect                     | `src/agent/agent.ts` + `openscience.json` `agent` config            |
+| Agent skips stages         | Stage gates not mandatory in prompt              | `src/agent/prompt/{agent}.txt`, check BLOCKING vs advisory language |
+| Critique not triggered     | Critique is advisory, not mandatory              | `src/agent/prompt/critique.txt` + parent prompt's critique section  |
+| Sub-agent returns empty    | Context window exhaustion or bad prompt          | `src/agent/agent.ts`, check subagent's `steps` limit                |
+| Custom agent not appearing | Config not in `openscience.json` or wrong `mode` | Config file `agent` key → `src/agent/agent.ts`                      |
 
 ### Key files for prompt debugging (read these first):
 
@@ -128,6 +128,7 @@ src/session/system.ts       # Provider routing, which system prompt for which mo
 ## Style Guide
 
 See `AGENTS.md` for full style guide. Key points:
+
 - Prefer `const` over `let`, avoid `else`, single-word variable names
 - Use Bun APIs (`Bun.file()`, etc.)
 - Rely on type inference, avoid explicit annotations

diff --git a/README.md b/README.md
@@ -78,13 +78,13 @@ Bring-your-own-key usage is always free and is never gated. Atlas only meters th
 
 OpenScience runs a local server that hosts the workspace UI, the agent runtime, and the tool layer. The agent plans with a research harness, calls tools (shell, editor, LSP, MCP servers, scientific connectors, and skills), and streams its work back to the browser. Models are routed per request, so you can switch between providers or run local models without changing anything else. Sessions, artifacts, and provenance are stored on disk and can be shared as links.
 
-| Path | Contents |
-| --- | --- |
-| `backend/cli` | The CLI, server, provider integrations, sessions, and skills |
-| `frontend/workspace` | The browser workspace UI, served by the CLI |
-| `frontend/docs` | The documentation and session-share site |
-| `tooling/sdk/js` | The TypeScript SDK |
-| `tooling/plugin` | The plugin runtime |
+| Path                 | Contents                                                     |
+| -------------------- | ------------------------------------------------------------ |
+| `backend/cli`        | The CLI, server, provider integrations, sessions, and skills |
+| `frontend/workspace` | The browser workspace UI, served by the CLI                  |
+| `frontend/docs`      | The documentation and session-share site                     |
+| `tooling/sdk/js`     | The TypeScript SDK                                           |
+| `tooling/plugin`     | The plugin runtime                                           |
 
 ## Configuration
 

diff --git a/SECURITY.md b/SECURITY.md
@@ -14,13 +14,13 @@ Server mode is opt-in. The server binds to localhost (127.0.0.1) only and enforc
 
 ### Out of scope
 
-| Category | Why |
-| --- | --- |
-| Server access when opted in | If you enable server mode, API access is expected behavior. |
-| Sandbox escapes | The permission system is not a sandbox. |
-| LLM provider data handling | Data you send to a provider is governed by that provider's policies. |
-| MCP server behavior | External MCP servers you configure are outside the trust boundary. |
-| Malicious config files | You control your own config; editing it is not an attack. |
+| Category                    | Why                                                                  |
+| --------------------------- | -------------------------------------------------------------------- |
+| Server access when opted in | If you enable server mode, API access is expected behavior.          |
+| Sandbox escapes             | The permission system is not a sandbox.                              |
+| LLM provider data handling  | Data you send to a provider is governed by that provider's policies. |
+| MCP server behavior         | External MCP servers you configure are outside the trust boundary.   |
+| Malicious config files      | You control your own config; editing it is not an attack.            |
 
 ## Reporting a vulnerability
 

diff --git a/backend/cli/AGENTS.md b/backend/cli/AGENTS.md
@@ -7,67 +7,80 @@ This file provides default instructions for the OpenScience when working in ML/A
 Load skills proactively based on the task at hand:
 
 ### Training & Post-Training
+
 - **RLHF/GRPO/DPO** → `grpo-rl-training`, `trl-fine-tuning`, `openrlhf`, `simpo`
 - **Fine-tuning** → `axolotl`, `unsloth`, `llama-factory`, `torchtune`
 - **Distributed** → `deepspeed-training`, `fsdp`, `megatron-core`, `accelerate`
 
 ### Inference & Serving
+
 - **High-throughput** → `vllm-inference`, `sglang`, `tensorrt-llm`
 - **Local/Edge** → `llama-cpp`, `gguf-quantization`
 - **Optimization** → `flash-attention`, `gptq`, `awq`, `bitsandbytes`
 
 ### Evaluation & Analysis
+
 - **Benchmarking** → `lm-eval-harness`, `bigcode-eval`, `nemo-evaluator`
 - **Interpretability** → `transformer-lens`, `saelens`, `nnsight`, `pyvene`
 
 ### RAG & Retrieval
+
 - **Vector stores** → `chroma`, `faiss`, `pinecone`, `qdrant`
 - **Embeddings** → `sentence-transformers`
 - **Orchestration** → `langchain`, `llamaindex`
 
 ### Agents & Structured Output
+
 - **Agent frameworks** → `langchain`, `llamaindex`, `crewai`
 - **Structured output** → `dspy`, `instructor`, `guidance`, `outlines`
 
 ### Multimodal
+
 - **Vision** → `clip`, `llava`, `segment-anything`, `stable-diffusion`
 - **Audio** → `whisper`, `audiocraft`
 - **Document** → `blip-2`
 
 ### Data & Infrastructure
+
 - **Data processing** → `ray-data`, `nemo-curator`
 - **Cloud compute** → `modal`, `skypilot`, `lambda-labs`
 - **Experiment tracking** → `weights-and-biases`, `mlflow`, `tensorboard`
 
 ### Emerging Techniques
+
 - **Scaling** → `moe-training`, `speculative-decoding`, `long-context`
 - **Compression** → `knowledge-distillation`, `model-pruning`, `model-merging`
 
 ## ML Workflow Standards
 
 ### Before Training
+
 - [ ] Check GPU availability: `nvidia-smi` or `torch.cuda.is_available()`
 - [ ] Verify CUDA version compatibility with frameworks
 - [ ] Estimate memory requirements for model + optimizer + gradients
 - [ ] Set up experiment tracking (W&B or MLflow)
 - [ ] Validate dataset format and tokenization
 
 ### Training Best Practices
+
 - Use bf16 on Ampere+ GPUs (A100, H100, RTX 30xx+), fp16 otherwise
 - Enable gradient checkpointing for memory-constrained setups
-- Save checkpoints every N steps (N = training_time_hours * 2)
+- Save checkpoints every N steps (N = training_time_hours \* 2)
 - Log learning rate, loss, and gradient norms
 - Set random seeds for reproducibility: `torch.manual_seed(42)`
 
 ### Memory Optimization Priority
+
 1. Gradient checkpointing (free, ~30% memory reduction)
 2. Mixed precision training (free, ~50% memory reduction)
 3. Gradient accumulation (free, enables larger effective batch)
 4. DeepSpeed ZeRO Stage 2 (minimal overhead)
 5. Model sharding / FSDP (for multi-GPU)
 
 ### OOM Error Handling
+
 When encountering CUDA OOM:
+
 1. Reduce batch size by 50%
 2. Enable gradient checkpointing
 3. Switch to 8-bit optimizer (bitsandbytes)
@@ -77,12 +90,14 @@ When encountering CUDA OOM:
 ## Code Style Guidelines
 
 ### Preferred Patterns
+
 - Use HuggingFace Transformers for model loading
 - Use `accelerate` for device management
 - Use `datasets` library for data loading
 - Use `peft` for parameter-efficient fine-tuning
 
 ### Example Setup
+
 ```python
 import torch
 from accelerate import Accelerator
@@ -99,6 +114,7 @@ model = AutoModelForCausalLM.from_pretrained(
 ## Environment Variables
 
 Key environment variables for ML workflows:
+
 - `CUDA_VISIBLE_DEVICES` - GPU selection
 - `WANDB_PROJECT` - W&B project name
 - `HF_TOKEN` - HuggingFace API token

diff --git a/backend/cli/bin/openscience b/backend/cli/bin/openscience
@@ -10,12 +10,18 @@ function run(target) {
   // Install dir stays as ~/.openscience/ until the path-migration follow-up PR.
   const openscienceDir = path.join(os.homedir(), ".openscience")
   for (const poison of ["package.json", ".gitignore", "bun.lockb", "bunfig.toml"]) {
-    try { fs.unlinkSync(path.join(openscienceDir, poison)) } catch {}
+    try {
+      fs.unlinkSync(path.join(openscienceDir, poison))
+    } catch {}
   }
-  try { fs.rmSync(path.join(openscienceDir, "node_modules"), { recursive: true }) } catch {}
+  try {
+    fs.rmSync(path.join(openscienceDir, "node_modules"), { recursive: true })
+  } catch {}
   // Clear macOS extended attributes that cause Bun binaries to hang
   if (os.platform() === "darwin") {
-    try { childProcess.spawnSync("xattr", ["-rc", openscienceDir], { stdio: "ignore" }) } catch {}
+    try {
+      childProcess.spawnSync("xattr", ["-rc", openscienceDir], { stdio: "ignore" })
+    } catch {}
   }
 
   const result = childProcess.spawnSync(target, process.argv.slice(2), {
@@ -66,17 +72,19 @@ const scopedBase = "openscience-" + platform + "-" + arch
 // variants like -baseline, wrong libc only as a last resort.
 const musl = (() => {
   if (platform !== "linux") return false
-  try { return fs.readdirSync("/lib").some((f) => f.startsWith("ld-musl-")) } catch { return false }
+  try {
+    return fs.readdirSync("/lib").some((f) => f.startsWith("ld-musl-"))
+  } catch {
+    return false
+  }
 })()
 function variantRank(prefix, entry) {
   const entryMusl = entry.includes("-musl")
   if (entryMusl !== musl) return 0
   return entry === prefix || entry === prefix + "-musl" ? 2 : 1
 }
 function matchingVariants(prefix, entries) {
-  return entries
-    .filter((e) => e.startsWith(prefix))
-    .sort((a, b) => variantRank(prefix, b) - variantRank(prefix, a))
+  return entries.filter((e) => e.startsWith(prefix)).sort((a, b) => variantRank(prefix, b) - variantRank(prefix, a))
 }
 
 // 2. Search this install's node_modules for the matching platform package.

diff --git a/backend/cli/script/generate-web-assets.ts b/backend/cli/script/generate-web-assets.ts
@@ -12,7 +12,9 @@ const distDir = path.resolve(repoRoot, "frontend/workspace/dist")
 const outFile = path.resolve(cliDir, "src/web/assets.generated.ts")
 
 if (!fs.existsSync(distDir)) {
-  throw new Error(`frontend/workspace/dist not found at ${distDir} — run \`cd frontend/workspace && bun run build\` first`)
+  throw new Error(
+    `frontend/workspace/dist not found at ${distDir} — run \`cd frontend/workspace && bun run build\` first`,
+  )
 }
 
 function walk(dir: string, out: string[] = []): string[] {
@@ -46,13 +48,13 @@ files.forEach((absPath, i) => {
 })
 
 lines.push("")
-lines.push("// Each value is the runtime path injected by Bun's `with { type: \"file\" }` loader.")
+lines.push('// Each value is the runtime path injected by Bun\'s `with { type: "file" }` loader.')
 lines.push("// Typed loosely because tsgo doesn't model that loader's return type uniformly.")
 lines.push("export const WEB_ASSETS: Record<string, string> = {")
 for (const e of entries) lines.push(e.replace(",", " as unknown as string,"))
 lines.push("}")
 lines.push("")
-lines.push("export const WEB_INDEX: string | undefined = WEB_ASSETS[\"/index.html\"]")
+lines.push('export const WEB_INDEX: string | undefined = WEB_ASSETS["/index.html"]')
 lines.push("")
 
 fs.writeFileSync(outFile, lines.join("\n"))

diff --git a/backend/cli/script/postinstall.mjs b/backend/cli/script/postinstall.mjs
@@ -52,10 +52,7 @@ function findBinary() {
   const binaryName = platform === "windows" ? "openscience.exe" : "openscience"
 
   // Try scoped package first (@synsci/openscience-darwin-arm64), then unscoped (openscience-darwin-arm64)
-  const packageNames = [
-    `@synsci/openscience-${platform}-${arch}`,
-    `openscience-${platform}-${arch}`,
-  ]
+  const packageNames = [`@synsci/openscience-${platform}-${arch}`, `openscience-${platform}-${arch}`]
 
   for (const packageName of packageNames) {
     try {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -162,4 +162,3 @@ jobs:
		environment_url: `https://www.npmjs.com/package/@synsci/openscience/v/${version}`,
		description: `Published @synsci/openscience@${version}`,
		})