diff --git a/apps/backend/.env.example b/apps/backend/.env.example
index 0c728bab..b8197606 100644
--- a/apps/backend/.env.example
+++ b/apps/backend/.env.example
@@ -49,4 +49,21 @@ PLATYPUS_SANDBOX_DOCKER_ENABLED=false
 # PLATYPUS_SANDBOX_DOCKER_ALLOWED_NETWORKS=shared-services,public-tools
 
 # Frontend URL for generating resource links in tool responses
-FRONTEND_URL=http://localhost:3001
\ No newline at end of file
+FRONTEND_URL=http://localhost:3001
+
+# Context compaction (ADR-0012 §Config & kill switch).
+# Compaction behavior is global; window/output size stays per-model.
+# COMPACTION_ENABLED=false disables proactive compaction (recovery still runs).
+# COMPACTION_ENABLED=true
+#
+# Optional overrides for the global ceiling. Unset = built-in defaults
+# (trigger 0.8, target 0.5, reserve 0.05, keepRecent 10, minPrunable 2000,
+#  minRecentPrunable 10000).
+# Lower the trigger to exercise auto-compaction on test deployments.
+# Keep target < trigger or compaction re-fires every turn.
+# COMPACTION_TRIGGER_RATIO=0.8
+# COMPACTION_TARGET_RATIO=0.5
+# COMPACTION_RESERVE_RATIO=0.05
+# COMPACTION_KEEP_RECENT=10
+# COMPACTION_MIN_PRUNABLE_CHARS=2000
+# COMPACTION_MIN_RECENT_PRUNABLE_CHARS=10000
\ No newline at end of file
diff --git a/apps/backend/drizzle/0046_context_compaction.sql b/apps/backend/drizzle/0046_context_compaction.sql
new file mode 100644
index 00000000..573845c6
--- /dev/null
+++ b/apps/backend/drizzle/0046_context_compaction.sql
@@ -0,0 +1,5 @@
+ALTER TABLE "chat" ADD COLUMN "context_summary" text;--> statement-breakpoint
+ALTER TABLE "chat" ADD COLUMN "summary_watermark" text;--> statement-breakpoint
+ALTER TABLE "chat" ADD COLUMN "compaction_dirty" boolean DEFAULT false NOT NULL;--> statement-breakpoint
+ALTER TABLE "chat" ADD COLUMN "version" integer DEFAULT 0 NOT NULL;--> statement-breakpoint
+ALTER TABLE "provider" ADD COLUMN "model_meta" jsonb;
diff --git a/apps/backend/drizzle/meta/0046_snapshot.json b/apps/backend/drizzle/meta/0046_snapshot.json
new file mode 100644
index 00000000..cda086e7
--- /dev/null
+++ b/apps/backend/drizzle/meta/0046_snapshot.json
@@ -0,0 +1,4323 @@
+{
+  "id": "c302529b-3427-4f45-a87d-6615109ab2eb",
+  "prevId": "668db7b6-9bad-46e6-b6bc-2533fce5ce32",
+  "version": "7",
+  "dialect": "postgresql",
+  "tables": {
+    "public.agent": {
+      "name": "agent",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "provider_id": {
+          "name": "provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "system_prompt": {
+          "name": "system_prompt",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model_id": {
+          "name": "model_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "max_steps": {
+          "name": "max_steps",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "temperature": {
+          "name": "temperature",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "top_p": {
+          "name": "top_p",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "top_k": {
+          "name": "top_k",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "seed": {
+          "name": "seed",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "presence_penalty": {
+          "name": "presence_penalty",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "frequency_penalty": {
+          "name": "frequency_penalty",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "tool_set_ids": {
+          "name": "tool_set_ids",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'[]'::jsonb"
+        },
+        "skill_ids": {
+          "name": "skill_ids",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'[]'::jsonb"
+        },
+        "sub_agent_ids": {
+          "name": "sub_agent_ids",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'[]'::jsonb"
+        },
+        "input_placeholder": {
+          "name": "input_placeholder",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "avatar_key": {
+          "name": "avatar_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_agent_workspace_id": {
+          "name": "idx_agent_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_organization_id": {
+          "name": "idx_agent_organization_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_agent_provider_id": {
+          "name": "idx_agent_provider_id",
+          "columns": [
+            {
+              "expression": "provider_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "agent_organization_id_organization_id_fk": {
+          "name": "agent_organization_id_organization_id_fk",
+          "tableFrom": "agent",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "agent_workspace_id_workspace_id_fk": {
+          "name": "agent_workspace_id_workspace_id_fk",
+          "tableFrom": "agent",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "agent_provider_id_provider_id_fk": {
+          "name": "agent_provider_id_provider_id_fk",
+          "tableFrom": "agent",
+          "tableTo": "provider",
+          "columnsFrom": [
+            "provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "restrict",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_agent_name_org": {
+          "name": "unique_agent_name_org",
+          "nullsNotDistinct": false,
+          "columns": [
+            "organization_id",
+            "name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.attachment": {
+      "name": "attachment",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "resource_type": {
+          "name": "resource_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "resource_id": {
+          "name": "resource_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_attachment_workspace": {
+          "name": "idx_attachment_workspace",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "resource_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_attachment_resource": {
+          "name": "idx_attachment_resource",
+          "columns": [
+            {
+              "expression": "resource_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "resource_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "attachment_workspace_id_workspace_id_fk": {
+          "name": "attachment_workspace_id_workspace_id_fk",
+          "tableFrom": "attachment",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_attachment": {
+          "name": "unique_attachment",
+          "nullsNotDistinct": false,
+          "columns": [
+            "workspace_id",
+            "resource_type",
+            "resource_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.blueprint": {
+      "name": "blueprint",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "task_model_provider_id": {
+          "name": "task_model_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "memory_extraction_provider_id": {
+          "name": "memory_extraction_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "memory_embedding_provider_id": {
+          "name": "memory_embedding_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "context": {
+          "name": "context",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_blueprint_organization_id": {
+          "name": "idx_blueprint_organization_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "blueprint_organization_id_organization_id_fk": {
+          "name": "blueprint_organization_id_organization_id_fk",
+          "tableFrom": "blueprint",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "blueprint_task_model_provider_id_provider_id_fk": {
+          "name": "blueprint_task_model_provider_id_provider_id_fk",
+          "tableFrom": "blueprint",
+          "tableTo": "provider",
+          "columnsFrom": [
+            "task_model_provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "blueprint_memory_extraction_provider_id_provider_id_fk": {
+          "name": "blueprint_memory_extraction_provider_id_provider_id_fk",
+          "tableFrom": "blueprint",
+          "tableTo": "provider",
+          "columnsFrom": [
+            "memory_extraction_provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "blueprint_memory_embedding_provider_id_provider_id_fk": {
+          "name": "blueprint_memory_embedding_provider_id_provider_id_fk",
+          "tableFrom": "blueprint",
+          "tableTo": "provider",
+          "columnsFrom": [
+            "memory_embedding_provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_blueprint_name_org": {
+          "name": "unique_blueprint_name_org",
+          "nullsNotDistinct": false,
+          "columns": [
+            "organization_id",
+            "name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.blueprint_item": {
+      "name": "blueprint_item",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "blueprint_id": {
+          "name": "blueprint_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "resource_type": {
+          "name": "resource_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "resource_id": {
+          "name": "resource_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_blueprint_item_blueprint": {
+          "name": "idx_blueprint_item_blueprint",
+          "columns": [
+            {
+              "expression": "blueprint_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_blueprint_item_resource": {
+          "name": "idx_blueprint_item_resource",
+          "columns": [
+            {
+              "expression": "resource_type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "resource_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "blueprint_item_blueprint_id_blueprint_id_fk": {
+          "name": "blueprint_item_blueprint_id_blueprint_id_fk",
+          "tableFrom": "blueprint_item",
+          "tableTo": "blueprint",
+          "columnsFrom": [
+            "blueprint_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_blueprint_item": {
+          "name": "unique_blueprint_item",
+          "nullsNotDistinct": false,
+          "columns": [
+            "blueprint_id",
+            "resource_type",
+            "resource_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.chat": {
+      "name": "chat",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "messages": {
+          "name": "messages",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "status": {
+          "name": "status",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'succeeded'"
+        },
+        "is_pinned": {
+          "name": "is_pinned",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "tags": {
+          "name": "tags",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'[]'::jsonb"
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "provider_id": {
+          "name": "provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model_id": {
+          "name": "model_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "system_prompt": {
+          "name": "system_prompt",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "temperature": {
+          "name": "temperature",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "top_p": {
+          "name": "top_p",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "top_k": {
+          "name": "top_k",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "seed": {
+          "name": "seed",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "presence_penalty": {
+          "name": "presence_penalty",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "frequency_penalty": {
+          "name": "frequency_penalty",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "context_summary": {
+          "name": "context_summary",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "summary_watermark": {
+          "name": "summary_watermark",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "compaction_dirty": {
+          "name": "compaction_dirty",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "version": {
+          "name": "version",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 0
+        },
+        "last_memory_processed_at": {
+          "name": "last_memory_processed_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "memory_extraction_status": {
+          "name": "memory_extraction_status",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'pending'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_chat_workspace_id": {
+          "name": "idx_chat_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_chat_tags": {
+          "name": "idx_chat_tags",
+          "columns": [
+            {
+              "expression": "tags",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        },
+        "idx_chat_memory_processing": {
+          "name": "idx_chat_memory_processing",
+          "columns": [
+            {
+              "expression": "memory_extraction_status",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "last_memory_processed_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "updated_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "chat_workspace_id_workspace_id_fk": {
+          "name": "chat_workspace_id_workspace_id_fk",
+          "tableFrom": "chat",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.context": {
+      "name": "context",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "content": {
+          "name": "content",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_context_user_id": {
+          "name": "idx_context_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_context_workspace_id": {
+          "name": "idx_context_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "context_user_id_user_id_fk": {
+          "name": "context_user_id_user_id_fk",
+          "tableFrom": "context",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "context_workspace_id_workspace_id_fk": {
+          "name": "context_workspace_id_workspace_id_fk",
+          "tableFrom": "context",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_context_user_workspace": {
+          "name": "unique_context_user_workspace",
+          "nullsNotDistinct": false,
+          "columns": [
+            "user_id",
+            "workspace_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.dashboard": {
+      "name": "dashboard",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "desktop_layout": {
+          "name": "desktop_layout",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'[]'::jsonb"
+        },
+        "mobile_layout": {
+          "name": "mobile_layout",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'[]'::jsonb"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_dashboard_workspace_id": {
+          "name": "idx_dashboard_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "uq_dashboard_workspace_name": {
+          "name": "uq_dashboard_workspace_name",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "name",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "dashboard_workspace_id_workspace_id_fk": {
+          "name": "dashboard_workspace_id_workspace_id_fk",
+          "tableFrom": "dashboard",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.invitation": {
+      "name": "invitation",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "invited_by": {
+          "name": "invited_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'pending'"
+        },
+        "workspace_name": {
+          "name": "workspace_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_invitation_email": {
+          "name": "idx_invitation_email",
+          "columns": [
+            {
+              "expression": "email",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_invitation_org_id": {
+          "name": "idx_invitation_org_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "invitation_organization_id_organization_id_fk": {
+          "name": "invitation_organization_id_organization_id_fk",
+          "tableFrom": "invitation",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "invitation_invited_by_user_id_fk": {
+          "name": "invitation_invited_by_user_id_fk",
+          "tableFrom": "invitation",
+          "tableTo": "user",
+          "columnsFrom": [
+            "invited_by"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_invitation_org_email": {
+          "name": "unique_invitation_org_email",
+          "nullsNotDistinct": false,
+          "columns": [
+            "organization_id",
+            "email"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.invitation_blueprint": {
+      "name": "invitation_blueprint",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "invitation_id": {
+          "name": "invitation_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "blueprint_id": {
+          "name": "blueprint_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "position": {
+          "name": "position",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_invitation_blueprint_invitation": {
+          "name": "idx_invitation_blueprint_invitation",
+          "columns": [
+            {
+              "expression": "invitation_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_invitation_blueprint_blueprint": {
+          "name": "idx_invitation_blueprint_blueprint",
+          "columns": [
+            {
+              "expression": "blueprint_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "invitation_blueprint_invitation_id_invitation_id_fk": {
+          "name": "invitation_blueprint_invitation_id_invitation_id_fk",
+          "tableFrom": "invitation_blueprint",
+          "tableTo": "invitation",
+          "columnsFrom": [
+            "invitation_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "invitation_blueprint_blueprint_id_blueprint_id_fk": {
+          "name": "invitation_blueprint_blueprint_id_blueprint_id_fk",
+          "tableFrom": "invitation_blueprint",
+          "tableTo": "blueprint",
+          "columnsFrom": [
+            "blueprint_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_invitation_blueprint": {
+          "name": "unique_invitation_blueprint",
+          "nullsNotDistinct": false,
+          "columns": [
+            "invitation_id",
+            "blueprint_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.kanban_board": {
+      "name": "kanban_board",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "labels": {
+          "name": "labels",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'[]'::jsonb"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_kanban_board_workspace_id": {
+          "name": "idx_kanban_board_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "kanban_board_workspace_id_workspace_id_fk": {
+          "name": "kanban_board_workspace_id_workspace_id_fk",
+          "tableFrom": "kanban_board",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_kanban_board_name_workspace": {
+          "name": "unique_kanban_board_name_workspace",
+          "nullsNotDistinct": false,
+          "columns": [
+            "workspace_id",
+            "name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.kanban_card": {
+      "name": "kanban_card",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "column_id": {
+          "name": "column_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "body": {
+          "name": "body",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "label_ids": {
+          "name": "label_ids",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'[]'::jsonb"
+        },
+        "assignees": {
+          "name": "assignees",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'[]'::jsonb"
+        },
+        "due_date": {
+          "name": "due_date",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "priority": {
+          "name": "priority",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'none'"
+        },
+        "position": {
+          "name": "position",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_by_user_id": {
+          "name": "created_by_user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_by_agent_id": {
+          "name": "created_by_agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_edited_by_user_id": {
+          "name": "last_edited_by_user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_edited_by_agent_id": {
+          "name": "last_edited_by_agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_kanban_card_column_id": {
+          "name": "idx_kanban_card_column_id",
+          "columns": [
+            {
+              "expression": "column_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_kanban_card_label_ids": {
+          "name": "idx_kanban_card_label_ids",
+          "columns": [
+            {
+              "expression": "label_ids",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        },
+        "idx_kanban_card_assignees": {
+          "name": "idx_kanban_card_assignees",
+          "columns": [
+            {
+              "expression": "assignees",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "gin",
+          "with": {}
+        },
+        "idx_kanban_card_due_date": {
+          "name": "idx_kanban_card_due_date",
+          "columns": [
+            {
+              "expression": "due_date",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_kanban_card_priority": {
+          "name": "idx_kanban_card_priority",
+          "columns": [
+            {
+              "expression": "priority",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_kanban_card_column_position": {
+          "name": "idx_kanban_card_column_position",
+          "columns": [
+            {
+              "expression": "column_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "position",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "kanban_card_column_id_kanban_column_id_fk": {
+          "name": "kanban_card_column_id_kanban_column_id_fk",
+          "tableFrom": "kanban_card",
+          "tableTo": "kanban_column",
+          "columnsFrom": [
+            "column_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "kanban_card_created_by_user_id_user_id_fk": {
+          "name": "kanban_card_created_by_user_id_user_id_fk",
+          "tableFrom": "kanban_card",
+          "tableTo": "user",
+          "columnsFrom": [
+            "created_by_user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "kanban_card_created_by_agent_id_agent_id_fk": {
+          "name": "kanban_card_created_by_agent_id_agent_id_fk",
+          "tableFrom": "kanban_card",
+          "tableTo": "agent",
+          "columnsFrom": [
+            "created_by_agent_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "kanban_card_last_edited_by_user_id_user_id_fk": {
+          "name": "kanban_card_last_edited_by_user_id_user_id_fk",
+          "tableFrom": "kanban_card",
+          "tableTo": "user",
+          "columnsFrom": [
+            "last_edited_by_user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "kanban_card_last_edited_by_agent_id_agent_id_fk": {
+          "name": "kanban_card_last_edited_by_agent_id_agent_id_fk",
+          "tableFrom": "kanban_card",
+          "tableTo": "agent",
+          "columnsFrom": [
+            "last_edited_by_agent_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.kanban_card_comment": {
+      "name": "kanban_card_comment",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "card_id": {
+          "name": "card_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "body": {
+          "name": "body",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_by_user_id": {
+          "name": "created_by_user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_by_agent_id": {
+          "name": "created_by_agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_kanban_card_comment_card_id": {
+          "name": "idx_kanban_card_comment_card_id",
+          "columns": [
+            {
+              "expression": "card_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "kanban_card_comment_card_id_kanban_card_id_fk": {
+          "name": "kanban_card_comment_card_id_kanban_card_id_fk",
+          "tableFrom": "kanban_card_comment",
+          "tableTo": "kanban_card",
+          "columnsFrom": [
+            "card_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "kanban_card_comment_created_by_user_id_user_id_fk": {
+          "name": "kanban_card_comment_created_by_user_id_user_id_fk",
+          "tableFrom": "kanban_card_comment",
+          "tableTo": "user",
+          "columnsFrom": [
+            "created_by_user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "kanban_card_comment_created_by_agent_id_agent_id_fk": {
+          "name": "kanban_card_comment_created_by_agent_id_agent_id_fk",
+          "tableFrom": "kanban_card_comment",
+          "tableTo": "agent",
+          "columnsFrom": [
+            "created_by_agent_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.kanban_column": {
+      "name": "kanban_column",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "board_id": {
+          "name": "board_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "position": {
+          "name": "position",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_kanban_column_board_id": {
+          "name": "idx_kanban_column_board_id",
+          "columns": [
+            {
+              "expression": "board_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "kanban_column_board_id_kanban_board_id_fk": {
+          "name": "kanban_column_board_id_kanban_board_id_fk",
+          "tableFrom": "kanban_column",
+          "tableTo": "kanban_board",
+          "columnsFrom": [
+            "board_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.mcp": {
+      "name": "mcp",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "url": {
+          "name": "url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "headers": {
+          "name": "headers",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "auth_type": {
+          "name": "auth_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "bearer_token": {
+          "name": "bearer_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "oauth_access_token": {
+          "name": "oauth_access_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "oauth_refresh_token": {
+          "name": "oauth_refresh_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "oauth_token_expires_at": {
+          "name": "oauth_token_expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "oauth_scope": {
+          "name": "oauth_scope",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "oauth_requested_scope": {
+          "name": "oauth_requested_scope",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "oauth_client_id": {
+          "name": "oauth_client_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "oauth_client_secret": {
+          "name": "oauth_client_secret",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_mcp_workspace_id": {
+          "name": "idx_mcp_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_mcp_organization_id": {
+          "name": "idx_mcp_organization_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "mcp_organization_id_organization_id_fk": {
+          "name": "mcp_organization_id_organization_id_fk",
+          "tableFrom": "mcp",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "mcp_workspace_id_workspace_id_fk": {
+          "name": "mcp_workspace_id_workspace_id_fk",
+          "tableFrom": "mcp",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_mcp_name_org": {
+          "name": "unique_mcp_name_org",
+          "nullsNotDistinct": false,
+          "columns": [
+            "organization_id",
+            "name"
+          ]
+        },
+        "unique_mcp_name_workspace": {
+          "name": "unique_mcp_name_workspace",
+          "nullsNotDistinct": false,
+          "columns": [
+            "workspace_id",
+            "name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.mcp_oauth_state": {
+      "name": "mcp_oauth_state",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "mcp_id": {
+          "name": "mcp_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "code_verifier": {
+          "name": "code_verifier",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "redirect_uri": {
+          "name": "redirect_uri",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {
+        "idx_mcp_oauth_state_mcp_id": {
+          "name": "idx_mcp_oauth_state_mcp_id",
+          "columns": [
+            {
+              "expression": "mcp_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "mcp_oauth_state_mcp_id_mcp_id_fk": {
+          "name": "mcp_oauth_state_mcp_id_mcp_id_fk",
+          "tableFrom": "mcp_oauth_state",
+          "tableTo": "mcp",
+          "columnsFrom": [
+            "mcp_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.memory_daily_summary": {
+      "name": "memory_daily_summary",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "summary_date": {
+          "name": "summary_date",
+          "type": "date",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "summary": {
+          "name": "summary",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "embedding": {
+          "name": "embedding",
+          "type": "vector",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_daily_summary_user_workspace": {
+          "name": "idx_daily_summary_user_workspace",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_daily_summary_date": {
+          "name": "idx_daily_summary_date",
+          "columns": [
+            {
+              "expression": "summary_date",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "memory_daily_summary_user_id_user_id_fk": {
+          "name": "memory_daily_summary_user_id_user_id_fk",
+          "tableFrom": "memory_daily_summary",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "memory_daily_summary_workspace_id_workspace_id_fk": {
+          "name": "memory_daily_summary_workspace_id_workspace_id_fk",
+          "tableFrom": "memory_daily_summary",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_daily_summary_user_workspace_date": {
+          "name": "unique_daily_summary_user_workspace_date",
+          "nullsNotDistinct": false,
+          "columns": [
+            "user_id",
+            "workspace_id",
+            "summary_date"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.notification": {
+      "name": "notification",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "body": {
+          "name": "body",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_notification_workspace_id": {
+          "name": "idx_notification_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_notification_agent_id": {
+          "name": "idx_notification_agent_id",
+          "columns": [
+            {
+              "expression": "agent_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_notification_created_at": {
+          "name": "idx_notification_created_at",
+          "columns": [
+            {
+              "expression": "created_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "notification_workspace_id_workspace_id_fk": {
+          "name": "notification_workspace_id_workspace_id_fk",
+          "tableFrom": "notification",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "notification_agent_id_agent_id_fk": {
+          "name": "notification_agent_id_agent_id_fk",
+          "tableFrom": "notification",
+          "tableTo": "agent",
+          "columnsFrom": [
+            "agent_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.notification_read": {
+      "name": "notification_read",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "notification_id": {
+          "name": "notification_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "read_at": {
+          "name": "read_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_notification_read_user_id": {
+          "name": "idx_notification_read_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_notification_read_notification_id": {
+          "name": "idx_notification_read_notification_id",
+          "columns": [
+            {
+              "expression": "notification_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "notification_read_notification_id_notification_id_fk": {
+          "name": "notification_read_notification_id_notification_id_fk",
+          "tableFrom": "notification_read",
+          "tableTo": "notification",
+          "columnsFrom": [
+            "notification_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "notification_read_user_id_user_id_fk": {
+          "name": "notification_read_user_id_user_id_fk",
+          "tableFrom": "notification_read",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_notification_read": {
+          "name": "unique_notification_read",
+          "nullsNotDistinct": false,
+          "columns": [
+            "notification_id",
+            "user_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.organization": {
+      "name": "organization",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.organization_member": {
+      "name": "organization_member",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "role": {
+          "name": "role",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'member'"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_org_member_org_id": {
+          "name": "idx_org_member_org_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_org_member_user_id": {
+          "name": "idx_org_member_user_id",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "organization_member_organization_id_organization_id_fk": {
+          "name": "organization_member_organization_id_organization_id_fk",
+          "tableFrom": "organization_member",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "organization_member_user_id_user_id_fk": {
+          "name": "organization_member_user_id_user_id_fk",
+          "tableFrom": "organization_member",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.provider": {
+      "name": "provider",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider_type": {
+          "name": "provider_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "api_key": {
+          "name": "api_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "region": {
+          "name": "region",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "base_url": {
+          "name": "base_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "headers": {
+          "name": "headers",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "extraBody": {
+          "name": "extraBody",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "organization": {
+          "name": "organization",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "project": {
+          "name": "project",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "api_mode": {
+          "name": "api_mode",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'responses'"
+        },
+        "native_search_enabled": {
+          "name": "native_search_enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "modelIds": {
+          "name": "modelIds",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "task_model_id": {
+          "name": "task_model_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "memory_extraction_model_id": {
+          "name": "memory_extraction_model_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "embedding_model_id": {
+          "name": "embedding_model_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "embedding_dimensions": {
+          "name": "embedding_dimensions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model_meta": {
+          "name": "model_meta",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_provider_workspace_id": {
+          "name": "idx_provider_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_provider_organization_id": {
+          "name": "idx_provider_organization_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "provider_organization_id_organization_id_fk": {
+          "name": "provider_organization_id_organization_id_fk",
+          "tableFrom": "provider",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_provider_name_org": {
+          "name": "unique_provider_name_org",
+          "nullsNotDistinct": false,
+          "columns": [
+            "organization_id",
+            "name"
+          ]
+        },
+        "unique_provider_name_workspace": {
+          "name": "unique_provider_name_workspace",
+          "nullsNotDistinct": false,
+          "columns": [
+            "workspace_id",
+            "name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.sandbox": {
+      "name": "sandbox",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "backend": {
+          "name": "backend",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "config": {
+          "name": "config",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'{}'::jsonb"
+        },
+        "credentials": {
+          "name": "credentials",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'{}'::jsonb"
+        },
+        "admin_env": {
+          "name": "admin_env",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'{}'::jsonb"
+        },
+        "user_env": {
+          "name": "user_env",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'{}'::jsonb"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "unique_sandbox_workspace_id": {
+          "name": "unique_sandbox_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "sandbox_workspace_id_workspace_id_fk": {
+          "name": "sandbox_workspace_id_workspace_id_fk",
+          "tableFrom": "sandbox",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.sandbox_teardown_failure": {
+      "name": "sandbox_teardown_failure",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "backend": {
+          "name": "backend",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "config": {
+          "name": "config",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "error": {
+          "name": "error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "attempted_at": {
+          "name": "attempted_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_sandbox_teardown_failure_workspace_id": {
+          "name": "idx_sandbox_teardown_failure_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.skill": {
+      "name": "skill",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "body": {
+          "name": "body",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_skill_workspace_id": {
+          "name": "idx_skill_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_skill_organization_id": {
+          "name": "idx_skill_organization_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "skill_organization_id_organization_id_fk": {
+          "name": "skill_organization_id_organization_id_fk",
+          "tableFrom": "skill",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "skill_workspace_id_workspace_id_fk": {
+          "name": "skill_workspace_id_workspace_id_fk",
+          "tableFrom": "skill",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "unique_skill_name_workspace": {
+          "name": "unique_skill_name_workspace",
+          "nullsNotDistinct": false,
+          "columns": [
+            "workspace_id",
+            "name"
+          ]
+        },
+        "unique_skill_name_org": {
+          "name": "unique_skill_name_org",
+          "nullsNotDistinct": false,
+          "columns": [
+            "organization_id",
+            "name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.trigger": {
+      "name": "trigger",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "agent_id": {
+          "name": "agent_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "instruction": {
+          "name": "instruction",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "enabled": {
+          "name": "enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "max_runs_to_keep": {
+          "name": "max_runs_to_keep",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 10
+        },
+        "search": {
+          "name": "search",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "config": {
+          "name": "config",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "last_run_at": {
+          "name": "last_run_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "next_run_at": {
+          "name": "next_run_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_trigger_workspace_id": {
+          "name": "idx_trigger_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_trigger_next_run_at": {
+          "name": "idx_trigger_next_run_at",
+          "columns": [
+            {
+              "expression": "next_run_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_trigger_type": {
+          "name": "idx_trigger_type",
+          "columns": [
+            {
+              "expression": "type",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "trigger_workspace_id_workspace_id_fk": {
+          "name": "trigger_workspace_id_workspace_id_fk",
+          "tableFrom": "trigger",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "trigger_agent_id_agent_id_fk": {
+          "name": "trigger_agent_id_agent_id_fk",
+          "tableFrom": "trigger",
+          "tableTo": "agent",
+          "columnsFrom": [
+            "agent_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "restrict",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.trigger_run": {
+      "name": "trigger_run",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "trigger_id": {
+          "name": "trigger_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'pending'"
+        },
+        "event_type": {
+          "name": "event_type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "event_data": {
+          "name": "event_data",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "started_at": {
+          "name": "started_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "error_message": {
+          "name": "error_message",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "stats": {
+          "name": "stats",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_trigger_run_trigger_id": {
+          "name": "idx_trigger_run_trigger_id",
+          "columns": [
+            {
+              "expression": "trigger_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_trigger_run_started_at": {
+          "name": "idx_trigger_run_started_at",
+          "columns": [
+            {
+              "expression": "started_at",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "trigger_run_trigger_id_trigger_id_fk": {
+          "name": "trigger_run_trigger_id_trigger_id_fk",
+          "tableFrom": "trigger_run",
+          "tableTo": "trigger",
+          "columnsFrom": [
+            "trigger_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.webhook": {
+      "name": "webhook",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "workspace_id": {
+          "name": "workspace_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'Webhook'"
+        },
+        "url": {
+          "name": "url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "signing_secret": {
+          "name": "signing_secret",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "headers": {
+          "name": "headers",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "enabled": {
+          "name": "enabled",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": true
+        },
+        "events": {
+          "name": "events",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_webhook_workspace_id": {
+          "name": "idx_webhook_workspace_id",
+          "columns": [
+            {
+              "expression": "workspace_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "webhook_workspace_id_workspace_id_fk": {
+          "name": "webhook_workspace_id_workspace_id_fk",
+          "tableFrom": "webhook",
+          "tableTo": "workspace",
+          "columnsFrom": [
+            "workspace_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.widget": {
+      "name": "widget",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "dashboard_id": {
+          "name": "dashboard_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "data": {
+          "name": "data",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_widget_dashboard_id": {
+          "name": "idx_widget_dashboard_id",
+          "columns": [
+            {
+              "expression": "dashboard_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "uq_widget_dashboard_title": {
+          "name": "uq_widget_dashboard_title",
+          "columns": [
+            {
+              "expression": "dashboard_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            },
+            {
+              "expression": "title",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": true,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "widget_dashboard_id_dashboard_id_fk": {
+          "name": "widget_dashboard_id_dashboard_id_fk",
+          "tableFrom": "widget",
+          "tableTo": "dashboard",
+          "columnsFrom": [
+            "dashboard_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.workspace": {
+      "name": "workspace",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "organization_id": {
+          "name": "organization_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "owner_id": {
+          "name": "owner_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "context": {
+          "name": "context",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "task_model_provider_id": {
+          "name": "task_model_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "memory_extraction_provider_id": {
+          "name": "memory_extraction_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "memory_embedding_provider_id": {
+          "name": "memory_embedding_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "max_daily_summaries": {
+          "name": "max_daily_summaries",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "default": 90
+        },
+        "provider_self_management": {
+          "name": "provider_self_management",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "mcp_self_management": {
+          "name": "mcp_self_management",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "idx_workspace_organization_id": {
+          "name": "idx_workspace_organization_id",
+          "columns": [
+            {
+              "expression": "organization_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        },
+        "idx_workspace_owner_id": {
+          "name": "idx_workspace_owner_id",
+          "columns": [
+            {
+              "expression": "owner_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "workspace_organization_id_organization_id_fk": {
+          "name": "workspace_organization_id_organization_id_fk",
+          "tableFrom": "workspace",
+          "tableTo": "organization",
+          "columnsFrom": [
+            "organization_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "workspace_owner_id_user_id_fk": {
+          "name": "workspace_owner_id_user_id_fk",
+          "tableFrom": "workspace",
+          "tableTo": "user",
+          "columnsFrom": [
+            "owner_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        },
+        "workspace_task_model_provider_id_provider_id_fk": {
+          "name": "workspace_task_model_provider_id_provider_id_fk",
+          "tableFrom": "workspace",
+          "tableTo": "provider",
+          "columnsFrom": [
+            "task_model_provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "workspace_memory_extraction_provider_id_provider_id_fk": {
+          "name": "workspace_memory_extraction_provider_id_provider_id_fk",
+          "tableFrom": "workspace",
+          "tableTo": "provider",
+          "columnsFrom": [
+            "memory_extraction_provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        },
+        "workspace_memory_embedding_provider_id_provider_id_fk": {
+          "name": "workspace_memory_embedding_provider_id_provider_id_fk",
+          "tableFrom": "workspace",
+          "tableTo": "provider",
+          "columnsFrom": [
+            "memory_embedding_provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "set null",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.account": {
+      "name": "account",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "account_id": {
+          "name": "account_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "provider_id": {
+          "name": "provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "access_token": {
+          "name": "access_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "refresh_token": {
+          "name": "refresh_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "id_token": {
+          "name": "id_token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "access_token_expires_at": {
+          "name": "access_token_expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "refresh_token_expires_at": {
+          "name": "refresh_token_expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "scope": {
+          "name": "scope",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "password": {
+          "name": "password",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "account_userId_idx": {
+          "name": "account_userId_idx",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "account_user_id_user_id_fk": {
+          "name": "account_user_id_user_id_fk",
+          "tableFrom": "account",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.session": {
+      "name": "session",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "token": {
+          "name": "token",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "ip_address": {
+          "name": "ip_address",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_agent": {
+          "name": "user_agent",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        }
+      },
+      "indexes": {
+        "session_userId_idx": {
+          "name": "session_userId_idx",
+          "columns": [
+            {
+              "expression": "user_id",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {
+        "session_user_id_user_id_fk": {
+          "name": "session_user_id_user_id_fk",
+          "tableFrom": "session",
+          "tableTo": "user",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "session_token_unique": {
+          "name": "session_token_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "token"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.user": {
+      "name": "user",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "email_verified": {
+          "name": "email_verified",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "image": {
+          "name": "image",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "role": {
+          "name": "role",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'user'"
+        },
+        "banned": {
+          "name": "banned",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": false,
+          "default": false
+        },
+        "ban_reason": {
+          "name": "ban_reason",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "ban_expires": {
+          "name": "ban_expires",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "user_email_unique": {
+          "name": "user_email_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "email"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.verification": {
+      "name": "verification",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "identifier": {
+          "name": "identifier",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "value": {
+          "name": "value",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {
+        "verification_identifier_idx": {
+          "name": "verification_identifier_idx",
+          "columns": [
+            {
+              "expression": "identifier",
+              "isExpression": false,
+              "asc": true,
+              "nulls": "last"
+            }
+          ],
+          "isUnique": false,
+          "concurrently": false,
+          "method": "btree",
+          "with": {}
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    }
+  },
+  "enums": {},
+  "schemas": {},
+  "sequences": {},
+  "roles": {},
+  "policies": {},
+  "views": {},
+  "_meta": {
+    "columns": {},
+    "schemas": {},
+    "tables": {}
+  }
+}
\ No newline at end of file
diff --git a/apps/backend/drizzle/meta/_journal.json b/apps/backend/drizzle/meta/_journal.json
index 44a49c37..847f0e0f 100644
--- a/apps/backend/drizzle/meta/_journal.json
+++ b/apps/backend/drizzle/meta/_journal.json
@@ -323,6 +323,13 @@
       "when": 1780816408681,
       "tag": "0045_married_roxanne_simpson",
       "breakpoints": true
+    },
+    {
+      "idx": 46,
+      "version": "7",
+      "when": 1781201728242,
+      "tag": "0046_context_compaction",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/apps/backend/src/db/schema.ts b/apps/backend/src/db/schema.ts
index 21a1d6e7..52015213 100644
--- a/apps/backend/src/db/schema.ts
+++ b/apps/backend/src/db/schema.ts
@@ -67,6 +67,13 @@ export const provider = pgTable(
     memoryExtractionModelId: t.text("memory_extraction_model_id").notNull(),
     embeddingModelId: t.text("embedding_model_id"),
     embeddingDimensions: t.integer("embedding_dimensions"),
+    // Per-model context-window / output overrides (ADR-0012 §Window resolution).
+    // Keyed by model id; resolveContextWindow consults this before API/registry.
+    modelMeta: t
+      .jsonb("model_meta")
+      .$type<
+        Record<string, { contextWindow?: number; maxOutputTokens?: number }>
+      >(),
     createdAt: t.timestamp("created_at").notNull().defaultNow(),
     updatedAt: t.timestamp("updated_at").notNull().defaultNow(),
   }),
@@ -162,6 +169,16 @@ export const chat = pgTable(
     presencePenalty: t.real("presence_penalty"),
     frequencyPenalty: t.real("frequency_penalty"),
 
+    // Context-compaction state (docs/adr/0012). All additive nullable/defaulted.
+    // View-not-delete (ADR-0012 §View, not delete): these change what is sent to the model, never the
+    // stored `messages`. `summaryWatermark` = message id of the last summarized
+    // message. All mutations go through the single versioned CAS writer (ADR-0012 §One durable writer);
+    // `version` is its compare-and-swap token.
+    contextSummary: t.text("context_summary"),
+    summaryWatermark: t.text("summary_watermark"),
+    compactionDirty: t.boolean("compaction_dirty").notNull().default(false),
+    version: t.integer("version").notNull().default(0),
+
     // Memory processing tracking
     lastMemoryProcessedAt: t.timestamp("last_memory_processed_at"),
     memoryExtractionStatus: t
diff --git a/apps/backend/src/routes/chat.test.ts b/apps/backend/src/routes/chat.test.ts
index f69e879f..43b3161e 100644
--- a/apps/backend/src/routes/chat.test.ts
+++ b/apps/backend/src/routes/chat.test.ts
@@ -6,8 +6,9 @@ import {
   resetMockDb,
 } from "../test-utils.ts";
 
-const { mockPrepareChatTurn } = vi.hoisted(() => ({
+const { mockPrepareChatTurn, mockForceCompactChat } = vi.hoisted(() => ({
   mockPrepareChatTurn: vi.fn(),
+  mockForceCompactChat: vi.fn(),
 }));
 
 vi.mock("../services/chat-execution.ts", () => {
@@ -25,12 +26,20 @@ vi.mock("../services/chat-execution.ts", () => {
   }
   return {
     prepareChatTurn: mockPrepareChatTurn,
+    forceCompactChat: mockForceCompactChat,
+    // loadChatMessages is called by agent-runner before onStart (ADR-0012 §Summary invalidation baseline).
+    loadChatMessages: vi.fn().mockResolvedValue([]),
     ValidationError,
     NotFoundError,
     drizzleChatTurnQueries: {},
   };
 });
 
+import { runRegistry } from "../runs/run-registry.ts";
+// Mocked above — resolves to the mock's NotFoundError class, the same one the
+// route checks with `instanceof`.
+import { NotFoundError } from "../services/chat-execution.ts";
+
 import app from "../server.ts";
 
 // Mock AI SDK
@@ -88,6 +97,10 @@ describe("Chat Routes", () => {
   beforeEach(() => {
     resetMockDb();
     vi.clearAllMocks();
+    // The `POST /` test starts a (mocked) run that registers chat-1 and never
+    // finalizes, leaving it in the process-wide registry. Clear it so the
+    // compact route's in-progress guard sees a clean slate.
+    runRegistry.unregister("chat-1");
     mockDb.where.mockReturnValue(mockDb);
     mockDb.orderBy.mockReturnValue(mockDb);
     mockDb.limit.mockReturnValue(mockDb);
@@ -239,6 +252,7 @@ describe("Chat Routes", () => {
       mockDb.limit.mockResolvedValueOnce([
         { ownerId: "user-1", organizationId: "org-1" },
       ]); // requireWorkspaceAccess
+      mockDb.limit.mockResolvedValueOnce([{ workspaceId: "ws-1" }]); // ADR-0012 §Consequences (cross-tenant safety) chat workspace check
 
       // ChatSink.onStart upserts the chat row with status=running before
       // prepareChatTurn runs. Returning a non-empty array skips the insert
@@ -279,6 +293,36 @@ describe("Chat Routes", () => {
       expect(res.status).toBe(200);
       expect(await res.text()).toBe("stream");
     });
+
+    it("returns 404 when the submitted chat id belongs to another workspace (ADR-0012 §Consequences cross-tenant safety)", async () => {
+      mockSession({
+        id: "user-1",
+        name: "Test User",
+        email: "test@example.com",
+      });
+      mockDb.limit.mockResolvedValueOnce([{ role: "member" }]); // requireOrgAccess
+      mockDb.limit.mockResolvedValueOnce([
+        { ownerId: "user-1", organizationId: "org-1" },
+      ]); // requireWorkspaceAccess
+      // Cross-tenant check: the chat exists but in a DIFFERENT workspace.
+      mockDb.limit.mockResolvedValueOnce([{ workspaceId: "ws-other" }]);
+
+      const res = await app.request(baseUrl, {
+        method: "POST",
+        body: JSON.stringify({
+          id: "chat-1",
+          workspaceId,
+          providerId: "p1",
+          modelId: "m1",
+          messages: [{ role: "user", content: "hello" }],
+        }),
+        headers: { "Content-Type": "application/json" },
+      });
+
+      expect(res.status).toBe(404);
+      // The run must never start — no compaction-store mutation on another tenant's chat.
+      expect(mockPrepareChatTurn).not.toHaveBeenCalled();
+    });
   });
 
   describe("DELETE /:chatId", () => {
@@ -469,4 +513,75 @@ describe("Chat Routes", () => {
       expect(await res.json()).toEqual(mockUpdatedChat);
     });
   });
+
+  describe("POST /:chatId/compact", () => {
+    const ownerAccess = () => {
+      mockSession();
+      mockDb.limit.mockResolvedValueOnce([{ role: "member" }]); // requireOrgAccess
+      mockDb.limit.mockResolvedValueOnce([
+        { ownerId: "user-1", organizationId: "org-1" },
+      ]); // requireWorkspaceAccess + owner
+    };
+
+    it("force-compacts and returns the refreshed usage", async () => {
+      ownerAccess();
+      mockForceCompactChat.mockResolvedValueOnce({
+        estimatedTokens: 1234,
+        contextWindow: 8192,
+        contextWindowIsDefault: false,
+      });
+
+      const res = await app.request(`${baseUrl}/chat-1/compact`, {
+        method: "POST",
+      });
+
+      expect(res.status).toBe(200);
+      expect(await res.json()).toEqual({
+        inputTokens: 1234,
+        contextWindow: 8192,
+        contextWindowIsDefault: false,
+      });
+      expect(mockForceCompactChat).toHaveBeenCalledWith(
+        "chat-1",
+        workspaceId,
+        orgId,
+      );
+    });
+
+    it("returns 409 when a run is in progress (does not compact)", async () => {
+      ownerAccess();
+      // runRegistry is keyed by runId, which equals the chatId for top-level
+      // chat runs — so an in-flight run on this chat blocks the compact.
+      runRegistry.register("chat-1");
+      try {
+        const res = await app.request(`${baseUrl}/chat-1/compact`, {
+          method: "POST",
+        });
+        expect(res.status).toBe(409);
+        expect(mockForceCompactChat).not.toHaveBeenCalled();
+      } finally {
+        runRegistry.unregister("chat-1");
+      }
+    });
+
+    it("returns 404 when the chat is not found / not in the workspace", async () => {
+      ownerAccess();
+      mockForceCompactChat.mockRejectedValueOnce(
+        new NotFoundError("Chat not found"),
+      );
+
+      const res = await app.request(`${baseUrl}/chat-other/compact`, {
+        method: "POST",
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("returns 401 without a session", async () => {
+      mockNoSession();
+      const res = await app.request(`${baseUrl}/chat-1/compact`, {
+        method: "POST",
+      });
+      expect(res.status).toBe(401);
+    });
+  });
 });
diff --git a/apps/backend/src/routes/chat.ts b/apps/backend/src/routes/chat.ts
index 8acaa199..b0129ad4 100644
--- a/apps/backend/src/routes/chat.ts
+++ b/apps/backend/src/routes/chat.ts
@@ -9,7 +9,11 @@ import {
   provider as providerTable,
   workspace as workspaceTable,
 } from "../db/schema.ts";
-import { NotFoundError, ValidationError } from "../services/chat-execution.ts";
+import {
+  forceCompactChat,
+  NotFoundError,
+  ValidationError,
+} from "../services/chat-execution.ts";
 import { openProvider } from "../services/provider.ts";
 import {
   chatGenerateMetadataSchema,
@@ -29,6 +33,7 @@ import { type PlatypusUIMessage } from "../types.ts";
 import { rewriteStorageUrls, deleteFiles } from "../storage/utils.ts";
 import { getOrigin } from "../utils/get-origin.ts";
 import { agentRunner } from "../runs/agent-runner.ts";
+import { runRegistry } from "../runs/run-registry.ts";
 import { ChatSink } from "../runs/sinks/chat-sink.ts";
 import type { RunInput } from "../runs/types.ts";
 
@@ -143,6 +148,23 @@ chat.post(
     const scope = c.get("workspaceScope")!;
     const data = c.req.valid("json");
 
+    // ADR-0012 §Consequences (cross-tenant safety): verify the submitted chat id (if any) belongs to this workspace.
+    // Without this check a workspace-A user could supply a workspace-B chat id
+    // and corrupt B's compaction state via the unscoped store writes.
+    if (data.id) {
+      const existing = await db
+        .select({ workspaceId: chatTable.workspaceId })
+        .from(chatTable)
+        .where(eq(chatTable.id, data.id))
+        .limit(1);
+      if (
+        existing.length > 0 &&
+        existing[0].workspaceId !== scope.workspaceId
+      ) {
+        return c.json({ message: "Chat not found" }, 404);
+      }
+    }
+
     const input: RunInput = {
       runId: data.id,
       request: data,
@@ -417,4 +439,52 @@ chat.post(
   },
 );
 
+chat.post(
+  "/:chatId/compact",
+  requireAuth,
+  requireOrgAccess(),
+  requireWorkspaceAccess,
+  requireWorkspaceOwner,
+  async (c) => {
+    const orgId = c.req.param("orgId")!;
+    const chatId = c.req.param("chatId");
+    const workspaceId = c.req.param("workspaceId")!;
+
+    // Reject if a run is currently in flight — the frontend defers the click
+    // until streaming finishes (ADR-0012 §Force-compact on demand), but guard here as a belt-and-suspenders
+    // check to avoid CAS races with an in-progress writer.
+    if (runRegistry.has(chatId)) {
+      return c.json(
+        { error: "Run in progress; retry after the response finishes" },
+        409,
+      );
+    }
+
+    try {
+      const result = await forceCompactChat(chatId, workspaceId, orgId);
+      return c.json({
+        inputTokens: result.estimatedTokens,
+        // ADR-0012 §Force-compact on demand: the client confirms only when the drop
+        // is significant (messagesDropped > keepRecentMessages OR reduction > 30%).
+        tokensBefore: result.tokensBefore,
+        messagesDropped: result.messagesDropped,
+        keepRecentMessages: result.keepRecentMessages,
+        contextWindow: result.contextWindow,
+        contextWindowIsDefault: result.contextWindowIsDefault,
+        // ADR-0012 §Compaction trace in the timeline: the persisted synthetic trace message (when a summary ran), so
+        // the frontend can append it to the timeline without a full refetch.
+        traceMessage: result.traceMessage,
+      });
+    } catch (error) {
+      if (error instanceof NotFoundError) {
+        return c.json({ error: error.message }, 404);
+      }
+      if (error instanceof ValidationError) {
+        return c.json({ error: error.message }, 400);
+      }
+      throw error;
+    }
+  },
+);
+
 export { chat };
diff --git a/apps/backend/src/routes/org-provider.ts b/apps/backend/src/routes/org-provider.ts
index 9777fe75..d037152e 100644
--- a/apps/backend/src/routes/org-provider.ts
+++ b/apps/backend/src/routes/org-provider.ts
@@ -7,6 +7,7 @@ import { providerCreateSchema, providerUpdateSchema } from "@platypus/schemas";
 import { eq, and } from "drizzle-orm";
 import { handleEmbeddingConfigChange } from "../services/embedding-invalidation.ts";
 import { dedupeArray } from "../utils.ts";
+import { contextWindowResolver } from "../runs/context-window.ts";
 import { requireAuth } from "../middleware/authentication.ts";
 import { requireOrgAccess } from "../middleware/authorization.ts";
 import { requireSharedDeletable } from "../services/scoped-resource.ts";
@@ -117,6 +118,10 @@ orgProvider.put(
       throw new NotFoundError("Provider not found");
     }
 
+    // RV7c: bust the cached context window so a modelMeta override takes effect
+    // immediately rather than waiting out the 1-hour TTL (drift T5).
+    contextWindowResolver.evict(providerId);
+
     return c.json(record[0], 200);
   },
 );
diff --git a/apps/backend/src/routes/provider.ts b/apps/backend/src/routes/provider.ts
index b437c335..74170291 100644
--- a/apps/backend/src/routes/provider.ts
+++ b/apps/backend/src/routes/provider.ts
@@ -7,6 +7,8 @@ import { providerCreateSchema, providerUpdateSchema } from "@platypus/schemas";
 import { eq, and } from "drizzle-orm";
 import { handleEmbeddingConfigChange } from "../services/embedding-invalidation.ts";
 import { dedupeArray } from "../utils.ts";
+import { contextWindowResolver } from "../runs/context-window.ts";
+import { resolveCompactionConfig } from "../services/chat-execution.ts";
 import { requireAuth } from "../middleware/authentication.ts";
 import {
   requireOrgAccess,
@@ -135,6 +137,10 @@ provider.put(
       )
       .returning();
 
+    // ADR-0012 §Window resolution (caching & eviction): bust the cached context window so a modelMeta override takes effect
+    // immediately rather than waiting out the 1-hour TTL (ADR-0012 §Window resolution (caching & eviction)).
+    contextWindowResolver.evict(providerId);
+
     return c.json(record[0], 200);
   },
 );
@@ -171,4 +177,49 @@ provider.delete(
   },
 );
 
+/**
+ * Returns the resolved context window for a specific model on this provider
+ * (ADR-0012 §Context-usage ring). Uses the cached resolver — fast for repeated calls.
+ * Returns `{ contextWindow: null }` when the window fell to the conservative
+ * default so the frontend can render the ring neutral (ADR-0012 §Context-usage ring).
+ */
+provider.get(
+  "/:providerId/context-window",
+  requireAuth,
+  requireOrgAccess(),
+  requireWorkspaceAccess,
+  async (c) => {
+    const orgId = c.req.param("orgId")!;
+    const workspaceId = c.req.param("workspaceId")!;
+    const providerId = c.req.param("providerId");
+    const modelId = c.req.query("modelId");
+
+    if (!modelId) {
+      return c.json({ error: "modelId query parameter required" }, 400);
+    }
+
+    const found = await requireScoped(db, "provider", providerId, {
+      orgId,
+      wsId: workspaceId,
+    });
+
+    const resolved = await contextWindowResolver
+      .resolve(found.row, modelId)
+      .catch(() => null);
+
+    return c.json({
+      contextWindow:
+        resolved && resolved.source !== "default"
+          ? resolved.contextWindow
+          : null,
+      source: resolved?.source ?? "default",
+      // ADR-0012 §Force-compact on demand: the client gates the confirm dialog on
+      // the drop being significant. messagesDropped ≈ total − keepRecent, so
+      // "messagesDropped > keepRecent" ⟺ "total > 2 × keepRecent" — a pre-run
+      // proxy computable client-side from the message count.
+      keepRecentMessages: resolveCompactionConfig().keepRecentMessages,
+    });
+  },
+);
+
 export { provider };
diff --git a/apps/backend/src/runs/agent-runner.test.ts b/apps/backend/src/runs/agent-runner.test.ts
index 45660386..c7e73e00 100644
--- a/apps/backend/src/runs/agent-runner.test.ts
+++ b/apps/backend/src/runs/agent-runner.test.ts
@@ -80,9 +80,17 @@ vi.mock("../logger.ts", () => ({
   },
 }));
 
-import { AgentRunner } from "./agent-runner.ts";
+import {
+  AgentRunner,
+  prependCompactionChunks,
+  stripCompactionTraceParts,
+  withToolTimestamps,
+} from "./agent-runner.ts";
+import { buildTier2PrepareStep } from "./compaction.ts";
+import type { UIMessageChunk } from "ai";
 import { runRegistry, TimeoutError } from "./run-registry.ts";
 import type { ResolvedRunPlan, RunInput, RunSink } from "./types.ts";
+import type { PlatypusUIMessage } from "../types.ts";
 import type { WorkspaceScope } from "../scope.ts";
 
 type LifecycleEvent =
@@ -161,6 +169,14 @@ const fakeTurn = (overrides?: { dispose?: () => Promise<void> }) => {
       providerId: "p1",
       modelId: "m1",
     },
+    recovery: {
+      imageProvider: "default" as const,
+      targetTokens: 1000,
+      keepRecentMessages: 10,
+      minPrunableChars: 2000,
+      summarize: (t: string) => Promise.resolve(t),
+    },
+    tier2: null,
     dispose,
   };
 };
@@ -406,7 +422,15 @@ describe("AgentRunner.stream — success & interruption", () => {
             onFinish: (ctx: { messages: unknown[] }) => Promise<void> | void;
           }) => {
             streamHarness.onFinish = uiOpts.onFinish;
-            return { tee: () => [{}, {}] };
+            // The runner pipes this through withToolTimestamps (pipeThrough) and
+            // tees it, so it must be a real ReadableStream. Its contents are
+            // irrelevant — the snapshot branch is driven via the mocked
+            // readUIMessageStream (streamHarness.queue), not this stream.
+            return new ReadableStream<UIMessageChunk>({
+              start(controller) {
+                controller.close();
+              },
+            });
           },
         };
       },
@@ -434,16 +458,19 @@ describe("AgentRunner.stream — success & interruption", () => {
       usage: { inputTokens: 3, outputTokens: 4 },
       toolCalls: [],
     });
-    // A partial snapshot streams in over the server-side branch.
-    queue.push({ id: "m1", role: "assistant", parts: [] });
+    // The server-side snapshot branch delivers the final assistant message,
+    // updating state.messages; ending the queue drains the consumer, which
+    // finalises the run (the runner does not use toUIMessageStream's onFinish).
+    const finalMessage = {
+      id: "m1",
+      role: "assistant",
+      parts: [{ type: "text", text: "hi" }],
+    };
+    queue.push(finalMessage);
     await tick();
-    // Natural completion delivers the final assistant message.
-    const finalMessages = [
-      { id: "m1", role: "assistant", parts: [{ type: "text", text: "hi" }] },
-    ];
-    await streamHarness.onFinish!({ messages: finalMessages });
     queue.end();
     await tick();
+    const finalMessages = [finalMessage];
 
     expect(sink.names()).toEqual([
       "onStart",
@@ -485,8 +512,7 @@ describe("AgentRunner.stream — success & interruption", () => {
     await tick();
 
     expect(runner.cancel("s-cancel")).toBe(true);
-    // The SDK observes the abort and finishes the UI stream.
-    await streamHarness.onFinish!({ messages: [partial] });
+    // The abort ends the UI stream; the snapshot consumer drains and finalises.
     queue.end();
     await tick();
 
@@ -547,3 +573,420 @@ describe("AgentRunner timeout types", () => {
     expect(e.kind).toBe("run");
   });
 });
+
+describe("withToolTimestamps", () => {
+  const FIXED_NOW = "2026-05-30T12:00:00.000Z";
+
+  const collect = async <T>(stream: ReadableStream<T>): Promise<T[]> => {
+    const out: T[] = [];
+    const reader = stream.getReader();
+    for (;;) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      out.push(value);
+    }
+    return out;
+  };
+
+  const sourceOf = (chunks: UIMessageChunk[]): ReadableStream<UIMessageChunk> =>
+    new ReadableStream<UIMessageChunk>({
+      start(controller) {
+        for (const chunk of chunks) controller.enqueue(chunk);
+        controller.close();
+      },
+    });
+
+  const toolInputAvailable = (
+    overrides: Partial<
+      Extract<UIMessageChunk, { type: "tool-input-available" }>
+    > = {},
+  ): UIMessageChunk => ({
+    type: "tool-input-available",
+    toolCallId: "t1",
+    toolName: "foo",
+    input: { x: 1 },
+    ...overrides,
+  });
+
+  it("injects startedAt on tool-input-available chunks", async () => {
+    const { stream } = withToolTimestamps(
+      sourceOf([toolInputAvailable()]),
+      () => FIXED_NOW,
+    );
+    const result = await collect(stream);
+
+    expect(result).toHaveLength(1);
+    expect(
+      (result[0] as { toolMetadata?: Record<string, unknown> }).toolMetadata,
+    ).toEqual({ startedAt: FIXED_NOW });
+  });
+
+  it("preserves existing toolMetadata fields", async () => {
+    const { stream } = withToolTimestamps(
+      sourceOf([toolInputAvailable({ toolMetadata: { custom: "value" } })]),
+      () => FIXED_NOW,
+    );
+    const result = await collect(stream);
+
+    expect(
+      (result[0] as { toolMetadata?: Record<string, unknown> }).toolMetadata,
+    ).toEqual({
+      custom: "value",
+      startedAt: FIXED_NOW,
+    });
+  });
+
+  it("passes other chunks through unchanged", async () => {
+    const chunks: UIMessageChunk[] = [
+      { type: "text-delta", id: "a", delta: "hello" },
+      {
+        type: "tool-output-available",
+        toolCallId: "t1",
+        output: { ok: true },
+      },
+      { type: "finish", finishReason: "stop" },
+    ];
+
+    const { stream } = withToolTimestamps(sourceOf(chunks), () => FIXED_NOW);
+    const result = await collect(stream);
+
+    expect(result).toEqual(chunks);
+  });
+
+  it("records completedAt for tool-output-available chunks", async () => {
+    const { stream, completions } = withToolTimestamps(
+      sourceOf([
+        toolInputAvailable(),
+        {
+          type: "tool-output-available",
+          toolCallId: "t1",
+          output: { ok: true },
+        },
+      ]),
+      () => FIXED_NOW,
+    );
+    // Completions are populated as the stream drains, so consume it first.
+    await collect(stream);
+
+    expect(completions.get("t1")).toBe(FIXED_NOW);
+  });
+
+  it("records completedAt for tool-output-error chunks", async () => {
+    const { stream, completions } = withToolTimestamps(
+      sourceOf([
+        toolInputAvailable(),
+        {
+          type: "tool-output-error",
+          toolCallId: "t1",
+          errorText: "boom",
+        },
+      ]),
+      () => FIXED_NOW,
+    );
+    await collect(stream);
+
+    expect(completions.get("t1")).toBe(FIXED_NOW);
+  });
+
+  // Mirrors AgentRunner.stream's pipeline: transform -> tee -> readUIMessageStream
+  // drains the snapshot branch. Verifies completions populate AND the built
+  // message's tool part carries the same toolCallId, so applyToolCompletions
+  // (matches on toolCallId) can stamp completedAt.
+  it("integration: completions + built tool part share toolCallId after tee+read", async () => {
+    const { readUIMessageStream } =
+      await vi.importActual<typeof import("ai")>("ai");
+
+    const chunks: UIMessageChunk[] = [
+      { type: "start", messageId: "m1" },
+      { type: "start-step" },
+      {
+        type: "tool-input-available",
+        toolCallId: "call_xyz",
+        toolName: "foo",
+        input: { a: 1 },
+      },
+      {
+        type: "tool-output-available",
+        toolCallId: "call_xyz",
+        output: { ok: true },
+      },
+      { type: "finish-step" },
+      { type: "finish" },
+    ];
+
+    const { stream, completions } = withToolTimestamps(
+      sourceOf(chunks),
+      () => FIXED_NOW,
+    );
+    const [forResponse, forSnapshot] = stream.tee();
+
+    let lastMessage: { parts?: Array<Record<string, unknown>> } | undefined;
+    for await (const message of readUIMessageStream({ stream: forSnapshot })) {
+      lastMessage = message;
+    }
+    await collect(forResponse);
+
+    expect(completions.get("call_xyz")).toBe(FIXED_NOW);
+
+    const toolPart = lastMessage?.parts?.find(
+      (p) => (p as { toolCallId?: string }).toolCallId === "call_xyz",
+    ) as { toolMetadata?: Record<string, unknown>; toolCallId?: string };
+    expect(toolPart).toBeDefined();
+    expect(toolPart.toolCallId).toBe("call_xyz");
+    expect(toolPart.toolMetadata).toMatchObject({ startedAt: FIXED_NOW });
+  });
+});
+
+describe("prependCompactionChunks", () => {
+  const collect = async (
+    stream: ReadableStream<UIMessageChunk>,
+  ): Promise<UIMessageChunk[]> => {
+    const out: UIMessageChunk[] = [];
+    const reader = stream.getReader();
+    for (;;) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      out.push(value);
+    }
+    return out;
+  };
+
+  const sourceOf = (chunks: UIMessageChunk[]): ReadableStream<UIMessageChunk> =>
+    new ReadableStream<UIMessageChunk>({
+      start(controller) {
+        for (const chunk of chunks) controller.enqueue(chunk);
+        controller.close();
+      },
+    });
+
+  it("injects a compact_context tool-call/result pair right after start, before any text", async () => {
+    const out = await collect(
+      prependCompactionChunks(
+        sourceOf([
+          { type: "start" },
+          { type: "text-start", id: "t" },
+          { type: "text-delta", id: "t", delta: "hi" },
+        ]),
+        { messagesDropped: 12, summaryExcerpt: "the user did X" },
+        () => "cc1",
+      ),
+    );
+
+    expect(out.map((c) => c.type)).toEqual([
+      "start",
+      "tool-input-available",
+      "tool-output-available",
+      "text-start",
+      "text-delta",
+    ]);
+    const input = out[1] as Extract<
+      UIMessageChunk,
+      { type: "tool-input-available" }
+    >;
+    expect(input.toolName).toBe("compact_context");
+    expect(input.toolCallId).toBe("cc1");
+    const output = out[2] as Extract<
+      UIMessageChunk,
+      { type: "tool-output-available" }
+    >;
+    expect(output.toolCallId).toBe("cc1");
+    expect(output.output).toEqual({
+      messagesDropped: 12,
+      summaryExcerpt: "the user did X",
+    });
+  });
+
+  it("omits summaryExcerpt when absent", async () => {
+    const out = await collect(
+      prependCompactionChunks(
+        sourceOf([{ type: "start" }]),
+        { messagesDropped: 3 },
+        () => "cc2",
+      ),
+    );
+    const output = out[2] as Extract<
+      UIMessageChunk,
+      { type: "tool-output-available" }
+    >;
+    expect(output.output).toEqual({ messagesDropped: 3 });
+  });
+
+  it("injects only once even if multiple start events appear", async () => {
+    const out = await collect(
+      prependCompactionChunks(
+        sourceOf([{ type: "start" }, { type: "start" }]),
+        { messagesDropped: 1 },
+        () => "cc3",
+      ),
+    );
+    expect(out.filter((c) => c.type === "tool-input-available")).toHaveLength(
+      1,
+    );
+  });
+});
+
+describe("stripCompactionTraceParts", () => {
+  const traceMessage = (id: string): PlatypusUIMessage =>
+    ({
+      id,
+      role: "assistant",
+      parts: [
+        {
+          type: "tool-compact_context",
+          toolCallId: `${id}-call`,
+          state: "output-available",
+          input: { messagesDropped: 2 },
+          output: { messagesDropped: 2 },
+        },
+      ],
+    }) as unknown as PlatypusUIMessage;
+
+  it("drops a trace-only assistant message entirely (never replayed to the model)", () => {
+    const messages = [
+      { id: "u1", role: "user", parts: [{ type: "text", text: "hi" }] },
+      traceMessage("t1"),
+    ] as unknown as PlatypusUIMessage[];
+
+    const out = stripCompactionTraceParts(messages);
+    expect(out.map((m) => m.id)).toEqual(["u1"]);
+  });
+
+  it("strips only the trace part from an assistant message with real content", () => {
+    const messages = [
+      {
+        id: "a1",
+        role: "assistant",
+        parts: [
+          {
+            type: "tool-compact_context",
+            toolCallId: "a1-call",
+            state: "output-available",
+            input: {},
+            output: {},
+          },
+          { type: "text", text: "answer" },
+        ],
+      },
+    ] as unknown as PlatypusUIMessage[];
+
+    const out = stripCompactionTraceParts(messages);
+    expect(out).toHaveLength(1);
+    expect(out[0].parts.map((p) => p.type)).toEqual(["text"]);
+  });
+
+  it("returns the same array reference when nothing to strip", () => {
+    const messages = [
+      { id: "u1", role: "user", parts: [{ type: "text", text: "hi" }] },
+    ] as unknown as PlatypusUIMessage[];
+    expect(stripCompactionTraceParts(messages)).toBe(messages);
+  });
+});
+
+describe("buildTier2PrepareStep", () => {
+  const makeCtx = (triggerTokens = 100) => ({
+    triggerTokens,
+    targetTokens: 50,
+    keepRecentMessages: 4,
+    minPrunableChars: 100,
+    imageProvider: "default" as const,
+    summarize: vi.fn().mockResolvedValue("summary"),
+    summarizerWindow: undefined,
+  });
+
+  // Invoke a PrepareStepFunction supplying only the field under test; the
+  // callback ignores steps/stepNumber/model/experimental_context.
+  const callStep = (
+    fn: ReturnType<typeof buildTier2PrepareStep>,
+    messages: import("ai").ModelMessage[],
+  ) =>
+    fn({
+      messages,
+      steps: [],
+      stepNumber: 0,
+      model: {} as never,
+      experimental_context: undefined,
+    });
+
+  const shortMessages: import("ai").ModelMessage[] = [
+    { role: "user", content: [{ type: "text", text: "hi" }] },
+    {
+      role: "assistant",
+      content: [{ type: "text", text: "hello" }],
+    },
+  ];
+
+  // 6 assistant/tool pairs where each tool result carries 1200 chars of text
+  // (≈ 300 tokens each via char/4). Total ≈ 1800+ tokens > any reasonable
+  // triggerTokens threshold used in these tests.
+  const longMessages = (): import("ai").ModelMessage[] => {
+    const msgs: import("ai").ModelMessage[] = [
+      { role: "user", content: [{ type: "text", text: "start" }] },
+    ];
+    for (let i = 0; i < 6; i++) {
+      msgs.push({
+        role: "assistant",
+        content: [
+          {
+            type: "tool-call",
+            toolCallId: `tc${i}`,
+            toolName: "tool",
+            input: {},
+          },
+        ],
+      });
+      msgs.push({
+        role: "tool",
+        content: [
+          {
+            type: "tool-result",
+            toolCallId: `tc${i}`,
+            toolName: "tool",
+            // Must use typed output shape so tokenEstimator counts the value.
+            output: { type: "text" as const, value: "x".repeat(1200) },
+          },
+        ],
+      });
+    }
+    return msgs;
+  };
+
+  it("returns undefined when messages are below triggerTokens (ADR-0012 §Sub-agents)", async () => {
+    const fn = buildTier2PrepareStep(makeCtx(10_000));
+    const result = await callStep(fn, shortMessages);
+    expect(result).toBeUndefined();
+  });
+
+  it("compacts when messages exceed triggerTokens", async () => {
+    const msgs = longMessages();
+    const ctx = makeCtx(1);
+    const fn = buildTier2PrepareStep(ctx);
+    const result = await callStep(fn, msgs);
+    expect(result?.messages).toBeDefined();
+    const out = result!.messages!;
+    expect(out.length).toBeLessThan(msgs.length);
+    // Stage 2 summarizes the dropped prefix.
+    expect(ctx.summarize).toHaveBeenCalled();
+    // First surviving message is the synthetic summary (role "user"); the one
+    // after it starts the kept tail and must not be an orphaned tool result
+    // (its assistant tool-call would have been dropped into the prefix).
+    expect(out[1]?.role).not.toBe("tool");
+  });
+
+  it("returns undefined when prefix is empty (no-op, ADR-0012 §Sub-agents)", async () => {
+    // Two messages, keepRecentMessages 4 → no prefix to summarize →
+    // compactModelMessages drops nothing → prepareStep returns undefined so the
+    // SDK proceeds unchanged, and the summarizer is never called.
+    const ctx = makeCtx(1);
+    const fn = buildTier2PrepareStep(ctx);
+    const result = await callStep(fn, shortMessages);
+    expect(result).toBeUndefined();
+    expect(ctx.summarize).not.toHaveBeenCalled();
+  });
+
+  it("does not call summarize when estimate is below triggerTokens", async () => {
+    const ctx = makeCtx(10_000);
+    const fn = buildTier2PrepareStep(ctx);
+    await callStep(fn, shortMessages);
+    expect(ctx.summarize).not.toHaveBeenCalled();
+  });
+});
diff --git a/apps/backend/src/runs/agent-runner.ts b/apps/backend/src/runs/agent-runner.ts
index 0959bf9f..51efe020 100644
--- a/apps/backend/src/runs/agent-runner.ts
+++ b/apps/backend/src/runs/agent-runner.ts
@@ -8,8 +8,21 @@ import {
   readUIMessageStream,
   stepCountIs,
   streamText,
+  wrapLanguageModel,
+  type LanguageModel,
+  type UIMessageChunk,
 } from "ai";
 import {
+  contextOverflowRecoveryMiddleware,
+  isContextOverflowError,
+} from "./recovery.ts";
+import {
+  buildTier2PrepareStep,
+  COMPACT_CONTEXT_TOOL_NAME,
+  type CompactionTrace,
+} from "./compaction.ts";
+import {
+  loadChatMessages,
   prepareChatTurn,
   type ChatTurn,
   type ToolActivityEvent,
@@ -32,6 +45,212 @@ import type {
   RunStatus,
 } from "./types.ts";
 
+/**
+ * Result of {@link withToolTimestamps}: the transformed stream plus a map of
+ * `toolCallId` → completion ISO timestamp, populated as tool-output chunks
+ * pass through.
+ */
+export type ToolTimestampStream<TChunk extends UIMessageChunk> = {
+  stream: ReadableStream<TChunk>;
+  /** toolCallId → completedAt ISO timestamp, filled in as the stream drains. */
+  completions: Map<string, string>;
+};
+
+/**
+ * Stamps tool-call timing onto the stream so the UI can show each tool's run
+ * duration:
+ *
+ * - `startedAt` is injected into `tool-input-available` chunks via
+ *   `toolMetadata`. It must go here (not on the output chunk) because the AI
+ *   SDK's tool-output handlers ignore `chunk.toolMetadata` and reuse the
+ *   invocation's existing `toolMetadata` from the input-available phase.
+ * - `completedAt` cannot ride the output chunk for the same reason, so it is
+ *   recorded in the returned `completions` map keyed by `toolCallId`. The run
+ *   loop applies it to the built message via {@link applyToolCompletions}
+ *   before the sink persists it.
+ *
+ * Exported for unit testing.
+ */
+export function withToolTimestamps<TChunk extends UIMessageChunk>(
+  stream: ReadableStream<TChunk>,
+  now: () => string = () => new Date().toISOString(),
+): ToolTimestampStream<TChunk> {
+  const completions = new Map<string, string>();
+  const out = stream.pipeThrough(
+    new TransformStream<TChunk, TChunk>({
+      transform(chunk, controller) {
+        if (chunk.type === "tool-input-available") {
+          controller.enqueue({
+            ...chunk,
+            toolMetadata: {
+              ...chunk.toolMetadata,
+              startedAt: now(),
+            },
+          });
+          return;
+        }
+        if (
+          chunk.type === "tool-output-available" ||
+          chunk.type === "tool-output-error"
+        ) {
+          completions.set(chunk.toolCallId, now());
+        }
+        controller.enqueue(chunk);
+      },
+    }),
+  );
+  return { stream: out, completions };
+}
+
+/**
+ * Injects synthetic `compact_context` tool-call + tool-result chunks into a
+ * UIMessage stream immediately after the `start` event (ADR-0012 §Compaction trace in the timeline). Makes Tier
+ * 1 compaction visible in the chat timeline without a custom renderer — the
+ * existing tool-call expander handles it automatically.
+ *
+ * Exported for unit testing.
+ */
+export function prependCompactionChunks(
+  stream: ReadableStream<UIMessageChunk>,
+  trace: CompactionTrace,
+  generateId: () => string = createIdGenerator({ prefix: "cc", size: 12 }),
+): ReadableStream<UIMessageChunk> {
+  const toolCallId = generateId();
+  const syntheticChunks: UIMessageChunk[] = [
+    {
+      type: "tool-input-available",
+      toolCallId,
+      toolName: COMPACT_CONTEXT_TOOL_NAME,
+      title: "Context compaction",
+      input: { messagesDropped: trace.messagesDropped },
+    },
+    {
+      type: "tool-output-available",
+      toolCallId,
+      output: {
+        messagesDropped: trace.messagesDropped,
+        ...(trace.summaryExcerpt
+          ? { summaryExcerpt: trace.summaryExcerpt }
+          : {}),
+      },
+    },
+  ];
+  let injected = false;
+  return stream.pipeThrough(
+    new TransformStream<UIMessageChunk, UIMessageChunk>({
+      transform(chunk, controller) {
+        controller.enqueue(chunk);
+        if (!injected && chunk.type === "start") {
+          injected = true;
+          for (const c of syntheticChunks) controller.enqueue(c);
+        }
+      },
+    }),
+  );
+}
+
+const COMPACT_CONTEXT_PART_TYPE = `tool-${COMPACT_CONTEXT_TOOL_NAME}`;
+
+/**
+ * Removes the synthetic `compact_context` trace parts (ADR-0012 §Compaction trace in the timeline) from a message
+ * list before it is converted to ModelMessages. The trace is a UI-only marker
+ * persisted in the assistant message for the chat timeline; it must NEVER be
+ * replayed to the provider, which would otherwise see a phantom tool call for a
+ * tool it was never given (provider rejection / model confusion). An assistant
+ * message left with no parts after stripping (the ADR-0012 §Force-compact on demand standalone trace message)
+ * is dropped entirely rather than sent empty.
+ *
+ * Exported for unit testing.
+ */
+export function stripCompactionTraceParts(
+  messages: PlatypusUIMessage[],
+): PlatypusUIMessage[] {
+  let changed = false;
+  const out: PlatypusUIMessage[] = [];
+  for (const message of messages) {
+    if (
+      message.role !== "assistant" ||
+      !message.parts.some((p) => p.type === COMPACT_CONTEXT_PART_TYPE)
+    ) {
+      out.push(message);
+      continue;
+    }
+    changed = true;
+    const parts = message.parts.filter(
+      (p) => p.type !== COMPACT_CONTEXT_PART_TYPE,
+    );
+    if (parts.length > 0) out.push({ ...message, parts });
+    // else: trace-only message (ADR-0012 §Force-compact on demand) — drop it from the model payload.
+  }
+  return changed ? out : messages;
+}
+
+/** Stats stamped on the last assistant message's metadata after each stream (ADR-0012 §Context-usage ring / §Per-message stats). */
+export type MessageStats = {
+  /** Run-wide totals across every step (sum) — ADR-0012 §Per-message stats cost popover. */
+  inputTokens: number;
+  outputTokens: number;
+  /**
+   * Input tokens of the LAST model call = peak context fullness — ADR-0012 §Context-usage ring.
+   * NOT the run-wide sum (which over-counts on multi-step tool loops).
+   */
+  contextTokens: number;
+  startedAt: string;
+  firstTokenAt?: string;
+  finishedAt: string;
+  contextWindow: number;
+  contextWindowIsDefault: boolean;
+};
+
+/**
+ * Stamps per-run stats (token counts, timing, resolved context window) onto
+ * the last assistant message's `metadata.stats` in place. Applied at the same
+ * point as {@link applyToolCompletions} so both mutations happen before the
+ * sink persists the final state (ADR-0012 §Context-usage ring / §Per-message stats).
+ */
+function applyMessageStats(
+  messages: PlatypusUIMessage[],
+  stats: MessageStats,
+): void {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === "assistant") {
+      const msg = messages[i] as PlatypusUIMessage & {
+        metadata?: Record<string, unknown>;
+      };
+      msg.metadata = { ...msg.metadata, stats };
+      return;
+    }
+  }
+}
+
+/**
+ * Stamps `completedAt` onto assistant tool parts in place, reading from the
+ * `completions` map produced by {@link withToolTimestamps}. Applied to the
+ * built message just before it is persisted, since the AI SDK strips
+ * `toolMetadata` from tool-output chunks and the end time can't be injected
+ * inline. Paired with the injected `startedAt`, this lets the UI compute each
+ * tool's run duration.
+ */
+function applyToolCompletions(
+  messages: PlatypusUIMessage[],
+  completions: Map<string, string>,
+): void {
+  if (completions.size === 0) return;
+  for (const message of messages) {
+    for (const part of message.parts ?? []) {
+      const anyPart = part as {
+        toolCallId?: string;
+        toolMetadata?: Record<string, unknown>;
+      };
+      const completedAt = anyPart.toolCallId
+        ? completions.get(anyPart.toolCallId)
+        : undefined;
+      if (!completedAt) continue;
+      anyPart.toolMetadata = { ...anyPart.toolMetadata, completedAt };
+    }
+  }
+}
+
 export type StreamOptions = {
   origin: string;
   frontendUrl?: string;
@@ -153,6 +372,12 @@ type RunState = {
   stats: RunStats;
   messages: PlatypusUIMessage[];
   terminated: boolean;
+  /**
+   * Input tokens reported by the most recent model step = peak context
+   * fullness for the ADR-0012 §Context-usage ring. Tracked separately from `stats.inputTokens`,
+   * which is the run-wide SUM and over-counts multi-step tool loops.
+   */
+  lastStepInputTokens: number;
 };
 
 /**
@@ -177,6 +402,8 @@ export class AgentRunner {
     origin: string | undefined,
     frontendUrl?: string,
     onActivity?: (event?: ToolActivityEvent) => void,
+    priorMessages?: PlatypusUIMessage[],
+    signal?: AbortSignal,
   ): Promise<ChatTurn> {
     return prepareChatTurn({
       orgId: scope.orgId,
@@ -188,6 +415,8 @@ export class AgentRunner {
       frontendUrl,
       runMode: scope.principal.kind === "user" ? "interactive" : "headless",
       onActivity,
+      priorMessages,
+      signal,
     });
   }
 
@@ -218,12 +447,29 @@ export class AgentRunner {
     timeouts?: Pick<RegisterOptions, "perStepTimeoutMs" | "perRunTimeoutMs">;
   }) {
     const { scope, input, sink } = params;
+
+    // ADR-0012 §Summary invalidation: snapshot the DB state BEFORE onStart overwrites it so
+    // applyTier1IfNeeded has the correct ADR-0012 §Summary invalidation baseline. Only interactive chats
+    // carry a `request.id`; headless runs (triggers, sub-agents) have none.
+    const priorMessages = input.request.id
+      ? await loadChatMessages(input.request.id).catch((err) => {
+          // Falls back to the post-overwrite DB read inside applyTier1IfNeeded,
+          // which cannot detect edits below the watermark — log the degradation.
+          logger.warn(
+            { err, chatId: input.request.id },
+            "ADR-0012 §Summary invalidation: failed to snapshot prior messages; ADR-0012 §Summary invalidation edit-detection degraded this turn",
+          );
+          return undefined;
+        })
+      : undefined;
+
     await sink.onStart({ runId: input.runId, messages: input.messages });
 
     const state: RunState = {
       stats: {},
       messages: input.messages,
       terminated: false,
+      lastStepInputTokens: 0,
     };
 
     const finalize = async (
@@ -276,6 +522,8 @@ export class AgentRunner {
         params.origin,
         params.frontendUrl,
         onActivity,
+        priorMessages,
+        handle.signal,
       );
     } catch (error) {
       const err = error instanceof Error ? error : new Error(String(error));
@@ -296,6 +544,8 @@ export class AgentRunner {
     }): void => {
       handle.bumpStep();
       accumulateStepStats(state.stats, step);
+      state.lastStepInputTokens =
+        step.usage?.inputTokens ?? state.lastStepInputTokens;
       logger.info(
         {
           runId: input.runId,
@@ -322,12 +572,26 @@ export class AgentRunner {
     // an `undefined` value identically, and the streaming path has always
     // passed them this way in production.
     const modelArgs = {
-      model: state.turn.stream.model,
-      messages: await convertToModelMessages(state.turn.stream.messages),
+      // Recovery middleware (ADR-0012 §Recovery): every model call — first call and every
+      // tool-loop step, stream and generate alike — gets one trim-and-retry on
+      // a provider "context too long" rejection. Always on; not gated by ADR-0012 §Config & kill switch.
+      model: withOverflowRecovery(state.turn),
+      // Strip the UI-only synthetic compact_context trace parts (ADR-0012 §Compaction trace in the timeline) before
+      // sending history to the provider — replaying them surfaces a phantom tool
+      // call for a tool the model was never given. Applied here so both the
+      // streaming and generate paths (which share modelArgs) are covered.
+      messages: await convertToModelMessages(
+        stripCompactionTraceParts(state.turn.stream.messages),
+      ),
       system: state.turn.stream.system,
       tools: state.turn.stream.tools,
       stopWhen: [stepCountIs(state.turn.stream.maxSteps)],
       abortSignal: handle.signal,
+      // Tier 2 (ADR-0012 §Tier 2): in-turn compaction before each step when the live window
+      // nears the limit. Undefined when the turn has no Tier 2 runtime.
+      prepareStep: state.turn.tier2
+        ? buildTier2PrepareStep(state.turn.tier2)
+        : undefined,
       temperature: state.turn.stream.temperature,
       topP: state.turn.stream.topP,
       topK: state.turn.stream.topK,
@@ -357,66 +621,154 @@ export class AgentRunner {
 
     logger.debug({ systemPrompt: modelArgs.system }, "System prompt for chat");
 
+    const startedAt = new Date().toISOString();
+    let firstTokenAt: string | undefined;
+    // Set when the ADR-0012 §Context-usage ring / §Per-message stats are first emitted (messageMetadata `finish`), so
+    // the post-stream persist stamp reuses the same value rather than a slightly
+    // later one — streamed and reloaded stats then match.
+    let finishedAt: string | undefined;
+
+    // Single source of truth for the per-message stats, so the live-streamed
+    // copy (messageMetadata, below) and the persisted copy (applyMessageStats in
+    // the finally) are identical. Reads the mutable state at call time.
+    const buildMessageStats = (
+      finishedAtValue: string,
+    ): MessageStats | undefined => {
+      if (!state.turn) return undefined;
+      return {
+        inputTokens: state.stats.inputTokens ?? 0,
+        outputTokens: state.stats.outputTokens ?? 0,
+        contextTokens: state.lastStepInputTokens,
+        startedAt,
+        firstTokenAt,
+        finishedAt: finishedAtValue,
+        contextWindow: state.turn.resolved.contextWindow,
+        contextWindowIsDefault: state.turn.resolved.contextWindowIsDefault,
+      };
+    };
+
     const result = streamText({
       ...modelArgs,
       onStepFinish: (step) => onStep(step),
+      // TTFT: stamp the first text token here (fires before the `finish` event),
+      // so the stats are complete by the time messageMetadata emits them.
+      onChunk: ({ chunk }) => {
+        if (!firstTokenAt && chunk.type === "text-delta") {
+          firstTokenAt = new Date().toISOString();
+        }
+      },
     });
 
     // Build the UI message stream and tee it. The response body consumes
     // one branch; we drain the other server-side so a disconnected
     // client (cancelling the response branch) doesn't propagate back to
     // the source. The source keeps pulling as long as the snapshot
-    // branch is being read, so `onFinish` only fires on natural
-    // completion — not when the consumer cancels with partial state.
+    // branch is being read.
     const uiStream = result.toUIMessageStream<PlatypusUIMessage>({
       originalMessages: input.messages,
       generateMessageId: createIdGenerator({ prefix: "msg", size: 16 }),
-      messageMetadata: () =>
-        state.turn?.resolved.agentId
+      // Emit the ADR-0012 §Context-usage ring / §Per-message stats with the `finish` event so the client gets them on
+      // the final stream chunk — the (i) stats action then appears the instant
+      // the answer completes, not a DB-refetch round-trip later. `start` carries
+      // only agentId (timing/usage don't exist yet). The post-stream stamp in
+      // the finally still writes them to the persisted message for reload.
+      messageMetadata: ({ part }) => {
+        const agentId = state.turn?.resolved.agentId
           ? { agentId: state.turn.resolved.agentId }
-          : undefined,
-      onError: (error) => formatStreamError(error),
-      onFinish: async ({ messages: finalMessages }) => {
-        state.messages = finalMessages;
-        let status: RunStatus = "succeeded";
-        let err: Error | undefined;
-        if (handle.signal.aborted) {
-          const reason: unknown = handle.signal.reason;
-          if (reason instanceof TimeoutError) {
-            status = "failed";
-            err = reason;
-          } else {
-            status = "cancelled";
-          }
+          : undefined;
+        if (part.type === "finish") {
+          finishedAt = new Date().toISOString();
+          const stats = buildMessageStats(finishedAt);
+          return stats ? { ...agentId, stats } : agentId;
         }
-        await finalize(status, err);
+        return agentId;
       },
+      onError: (error) => formatStreamError(error),
     });
 
-    const [forResponse, forSnapshot] = uiStream.tee();
+    // ADR-0012 §Compaction trace in the timeline: if Tier 1 compaction fired this turn, prepend synthetic
+    // compact_context tool-call + tool-result chunks so the compaction is
+    // visible in the chat timeline. Injected after the 'start' event so the
+    // AI SDK builds them into the same assistant message as the response.
+    const tracedStream: ReadableStream<UIMessageChunk> = state.turn
+      ?.compactionTrace
+      ? prependCompactionChunks(
+          uiStream as ReadableStream<UIMessageChunk>,
+          state.turn.compactionTrace,
+        )
+      : (uiStream as ReadableStream<UIMessageChunk>);
+
+    const { stream: timedStream, completions } =
+      withToolTimestamps(tracedStream);
+    const [forResponse, forSnapshot] = timedStream.tee();
 
     // Read the snapshot branch as message snapshots and keep `state.messages`
     // up to date. ChatSink's FlushScheduler then writes the in-progress
     // assistant message to the DB on each onProgress bump, so a user who
     // reconnects mid-run sees the partial answer (not just their own
     // input message).
+    //
+    // finalize is called here (not in toUIMessageStream's onFinish) so that
+    // state.messages reflects the fully-drained stream — including the tool
+    // `completedAt` timestamps and ADR-0012 §Context-usage ring / §Per-message stats applied below — before the sink
+    // persists it.
+    // An error chunk (model/tool failure surfaced via formatStreamError) or
+    // an internal stream fault ends the for-await without throwing, because
+    // readUIMessageStream defaults terminateOnError=false. Capture it so the
+    // finally finalizes "failed" instead of silently persisting a partial
+    // message as "succeeded".
+    let streamError: unknown;
     void (async () => {
       try {
         for await (const message of readUIMessageStream<PlatypusUIMessage>({
           stream: forSnapshot,
-          onError: (err) =>
+          onError: (err) => {
+            streamError = err;
             logger.error(
               { err, runId: input.runId },
               "Snapshot stream parse error",
-            ),
+            );
+          },
         })) {
           state.messages = [...input.messages, message];
         }
       } catch (err) {
+        streamError = err;
         logger.error(
           { err, runId: input.runId },
           "Server-side UI stream consumer error",
         );
+      } finally {
+        // Reuse the finish-event timestamp when present so the persisted stats
+        // match what was streamed; fall back if the stream ended without one.
+        const finishedAtFinal = finishedAt ?? new Date().toISOString();
+        applyToolCompletions(state.messages, completions);
+        const stats = buildMessageStats(finishedAtFinal);
+        if (stats) applyMessageStats(state.messages, stats);
+        let status: RunStatus = "succeeded";
+        let err: Error | undefined;
+        if (handle.signal.aborted) {
+          const reason: unknown = handle.signal.reason;
+          if (reason instanceof TimeoutError) {
+            status = "failed";
+            err = reason;
+          } else {
+            status = "cancelled";
+          }
+        } else if (streamError !== undefined) {
+          // The stream errored (model/tool rejection or internal fault) but did
+          // not abort — record the run as failed rather than succeeded.
+          status = "failed";
+          err =
+            streamError instanceof Error
+              ? streamError
+              : new Error(
+                  typeof streamError === "string"
+                    ? streamError
+                    : "Server-side UI stream error",
+                );
+        }
+        await finalize(status, err);
       }
     })();
 
@@ -488,6 +840,22 @@ export class AgentRunner {
   }
 }
 
+/**
+ * Wraps the turn's model with the context-overflow recovery middleware (ADR-0012 §Recovery): every model call — first call and every tool-loop step, stream and
+ * generate alike — gets one trim-and-retry on a provider "context too long"
+ * rejection. Always on; the ADR-0012 §Config & kill switch does not gate it.
+ */
+const withOverflowRecovery = (turn: ChatTurn): LanguageModel =>
+  wrapLanguageModel({
+    // turn.stream.model is typed `LanguageModel` (string | model spec); at this
+    // point it is always a resolved model object, never a string id — narrow to
+    // the spec form wrapLanguageModel requires.
+    model: turn.stream.model as Parameters<
+      typeof wrapLanguageModel
+    >[0]["model"],
+    middleware: contextOverflowRecoveryMiddleware(turn.recovery),
+  });
+
 /**
  * Converts AI SDK errors into user-facing strings for the UI message stream.
  * Behaviour-preserving copy of the previous inline `onError` handler.
@@ -497,6 +865,11 @@ const formatStreamError = (error: unknown): string => {
   if (LoadAPIKeyError.isInstance(error)) {
     return "AI provider API key is missing or not configured.";
   }
+  // Reaching here means recovery (ADR-0012 §Recovery) already trimmed and retried once and the
+  // provider still rejected the prompt — surface the actionable dead end.
+  if (isContextOverflowError(error)) {
+    return "Conversation too large for the model's context window even after trimming — start a new chat or reduce attachments.";
+  }
   if (APICallError.isInstance(error)) {
     if (error.statusCode === 401 || error.statusCode === 403) {
       return "AI provider authentication failed. Your API key may be invalid or expired.";
diff --git a/apps/backend/src/runs/compaction.test.ts b/apps/backend/src/runs/compaction.test.ts
new file mode 100644
index 00000000..49ff6e5d
--- /dev/null
+++ b/apps/backend/src/runs/compaction.test.ts
@@ -0,0 +1,1335 @@
+import { describe, it, expect, vi } from "vitest";
+
+vi.mock("../index.ts", () => ({ db: {} })); // drizzle store unused in these tests
+vi.mock("../logger.ts", () => ({
+  logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() },
+}));
+
+import {
+  commitWatermark,
+  compactUIMessages,
+  compactModelMessages,
+  editToolResults,
+  elidedToolPlaceholder,
+  pickKeepBoundary,
+  softTrim,
+  type CompactionStore,
+  type CompactionState,
+  type WatermarkPatch,
+} from "./compaction.ts";
+import { logger } from "../logger.ts";
+import type { ModelMessage } from "ai";
+import type { PlatypusUIMessage } from "../types.ts";
+
+/**
+ * In-memory store. Since JS is single-threaded, the version check in `casWrite`
+ * is atomic per call — exactly the guarantee Postgres gives via the `version`
+ * predicate. `readState` returns a snapshot copy, so a version bump that happens
+ * after a read (a racing winner) makes that reader's snapshot stale → CAS fails.
+ */
+class FakeStore implements CompactionStore {
+  state: CompactionState;
+  casCalls = 0;
+
+  constructor(init: Partial<CompactionState> = {}) {
+    this.state = {
+      version: 0,
+      summaryWatermark: null,
+      contextSummary: null,
+      compactionDirty: false,
+      ...init,
+    };
+  }
+
+  readState() {
+    return Promise.resolve({ ...this.state });
+  }
+
+  casWrite(
+    _chatId: string,
+    expectVersion: number,
+    patch: WatermarkPatch,
+  ): Promise<boolean> {
+    this.casCalls++;
+    if (this.state.version !== expectVersion) return Promise.resolve(false);
+    if ("watermark" in patch)
+      this.state.summaryWatermark = patch.watermark ?? null;
+    if ("summary" in patch) this.state.contextSummary = patch.summary ?? null;
+    if ("dirty" in patch) this.state.compactionDirty = patch.dirty ?? false;
+    this.state.version = expectVersion + 1;
+    return Promise.resolve(true);
+  }
+}
+
+describe("casWrite — version-gated CAS (ADR-0012 §One durable writer)", () => {
+  it("applies and bumps version when the expected version matches", async () => {
+    const store = new FakeStore({ version: 3 });
+    const won = await store.casWrite("c", 3, { summary: "s", watermark: "m1" });
+    expect(won).toBe(true);
+    expect(store.state.version).toBe(4);
+    expect(store.state.contextSummary).toBe("s");
+    expect(store.state.summaryWatermark).toBe("m1");
+  });
+
+  it("two writers on the same version: one wins, the other loses", async () => {
+    const store = new FakeStore({ version: 0 });
+    const first = await store.casWrite("c", 0, { summary: "A" });
+    const second = await store.casWrite("c", 0, { summary: "B" });
+    expect(first).toBe(true);
+    expect(second).toBe(false); // version is now 1, expected 0
+    expect(store.state.contextSummary).toBe("A");
+  });
+
+  it("an explicit null clears a field; an absent key leaves it untouched", async () => {
+    const store = new FakeStore({
+      version: 1,
+      contextSummary: "old",
+      summaryWatermark: "m5",
+    });
+    await store.casWrite("c", 1, { summary: null }); // reset summary only
+    expect(store.state.contextSummary).toBeNull();
+    expect(store.state.summaryWatermark).toBe("m5"); // untouched
+  });
+});
+
+describe("commitWatermark — loser logic (ADR-0012 §One durable writer)", () => {
+  it("applies a write on an uncontended commit", async () => {
+    const store = new FakeStore({ version: 2 });
+    const res = await commitWatermark(store, "c", () => ({
+      kind: "write",
+      patch: { summary: "sum", watermark: "m9" },
+    }));
+    expect(res).toEqual({ status: "applied", version: 3 });
+    expect(store.state.summaryWatermark).toBe("m9");
+  });
+
+  it("skips immediately when the decision is a no-op", async () => {
+    const store = new FakeStore({ version: 0 });
+    const res = await commitWatermark(store, "c", () => ({
+      kind: "skip",
+      reason: "no-op",
+    }));
+    expect(res).toEqual({ status: "skipped", reason: "no-op" });
+    expect(store.casCalls).toBe(0);
+  });
+
+  it("re-reads after a CAS conflict and succeeds on the retry", async () => {
+    const store = new FakeStore({ version: 0 });
+    let firstDecision = true;
+    const res = await commitWatermark(store, "c", (state) => {
+      if (firstDecision) {
+        firstDecision = false;
+        // Simulate a racing winner committing between our read and write.
+        store.state.version = 1;
+        store.state.summaryWatermark = "winner";
+      }
+      // Decide by the (re-read) version, not the watermark value.
+      return { kind: "write", patch: { summary: `at-v${state.version}` } };
+    });
+    expect(res.status).toBe("applied");
+    // First attempt CAS expected v0 but row is v1 → lost; retry expects v1 → wins.
+    expect(store.state.version).toBe(2);
+    expect(store.state.contextSummary).toBe("at-v1");
+  });
+
+  it("decides 'covered' on the retry and skips (winner already did the work)", async () => {
+    const store = new FakeStore({ version: 0, summaryWatermark: "m1" });
+    let first = true;
+    const res = await commitWatermark(store, "c", (state) => {
+      if (first) {
+        first = false;
+        store.state.version = 1;
+        store.state.summaryWatermark = "m20"; // winner advanced past our prefix
+        return { kind: "write", patch: { summary: "mine", watermark: "m10" } };
+      }
+      // On re-read we see the winner covered us → skip (decide by version).
+      expect(state.version).toBe(1);
+      return { kind: "skip", reason: "covered" };
+    });
+    expect(res).toEqual({ status: "skipped", reason: "covered" });
+    expect(store.state.summaryWatermark).toBe("m20"); // winner's value preserved
+  });
+
+  it("gives up as 'contended' after two conflicts — no livelock", async () => {
+    const store = new FakeStore({ version: 0 });
+    let decideCalls = 0;
+    const res = await commitWatermark(store, "c", (state) => {
+      decideCalls++;
+      // Every decision races a winner → both CAS attempts fail.
+      store.state.version = state.version + 1;
+      return { kind: "write", patch: { summary: "x" } };
+    });
+    expect(res).toEqual({ status: "skipped", reason: "contended" });
+    expect(decideCalls).toBe(2); // exactly MAX_ATTEMPTS, then stop
+  });
+});
+
+// --- Slice 2b: compaction primitives ------------------------------------
+
+function uiText(
+  id: string,
+  role: "user" | "assistant",
+  text: string,
+): PlatypusUIMessage {
+  return { id, role, parts: [{ type: "text", text }] };
+}
+
+function uiTool(id: string, output: unknown): PlatypusUIMessage {
+  return {
+    id,
+    role: "assistant",
+    parts: [
+      {
+        type: "tool-doThing",
+        toolCallId: `${id}-call`,
+        state: "output-available",
+        input: {},
+        output,
+      },
+    ],
+  } as unknown as PlatypusUIMessage;
+}
+
+const noopSummarize = () => Promise.resolve("SUMMARY");
+
+describe("softTrim", () => {
+  it("keeps short text untouched", () => {
+    expect(softTrim("short", 500)).toBe("short");
+  });
+  it("trims long text to head+tail with a marker", () => {
+    const out = softTrim("a".repeat(2000), 100);
+    expect(out.startsWith("a".repeat(100))).toBe(true);
+    expect(out).toContain("elided 1800 chars");
+    expect(out.length).toBeLessThan(2000);
+  });
+});
+
+describe("pickKeepBoundary", () => {
+  it("UIMessage: any split is safe", () => {
+    expect(pickKeepBoundary(5, 2, () => true)).toBe(3);
+  });
+  it("ModelMessage: walks back so recent does not start on an orphan tool result", () => {
+    const roles = ["user", "assistant", "tool", "user"];
+    const safe = (i: number) => i >= roles.length || roles[i] !== "tool";
+    // start at 4-2=2 (role "tool", unsafe) → walk back to 1 (assistant, safe)
+    expect(pickKeepBoundary(4, 2, safe)).toBe(1);
+  });
+});
+
+describe("compactUIMessages (Tier 1)", () => {
+  const baseOpts = {
+    keepRecentMessages: 2,
+    minPrunableChars: 2000,
+    summarize: noopSummarize,
+  };
+
+  it("is a no-op when already within target (hysteresis precondition)", async () => {
+    const msgs = [uiText("a", "user", "hi"), uiText("b", "assistant", "yo")];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      targetTokens: 1000,
+    });
+    expect(res.usedModelCall).toBe(false);
+    expect(res.messagesDropped).toBe(0);
+    expect(res.keptMessages).toBe(msgs);
+  });
+
+  it("Stage 1 prune reaches target WITHOUT a model call", async () => {
+    const summarize = vi.fn(noopSummarize);
+    const msgs = [
+      uiTool("big", "X".repeat(4000)), // ~1000 tokens, prunes to ~250
+      uiText("r1", "user", "hello"),
+      uiText("r2", "assistant", "world"),
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize,
+      targetTokens: 300,
+    });
+    expect(res.usedModelCall).toBe(false);
+    expect(summarize).not.toHaveBeenCalled();
+    expect(res.watermarkId).toBeNull();
+    expect(res.keptMessages).toHaveLength(3); // pruned prefix stays visible
+    expect(res.estimatedTokens).toBeLessThanOrEqual(300);
+  });
+
+  it("Stage 2 summarizes when pruning is insufficient (text-heavy prefix)", async () => {
+    const summarize = vi.fn(noopSummarize);
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      uiText("r1", "user", "hello"),
+      uiText("r2", "assistant", "world"),
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize,
+      targetTokens: 300,
+    });
+    expect(res.usedModelCall).toBe(true);
+    expect(summarize).toHaveBeenCalledOnce();
+    expect(res.summaryText).toBe("SUMMARY");
+    expect(res.watermarkId).toBe("p2"); // last folded message
+    expect(res.keptMessages).toHaveLength(2); // only recent kept
+    expect(res.estimatedTokens).toBeLessThanOrEqual(300);
+  });
+
+  it("does NOT re-fire next turn: feeding the result back is a no-op (ADR-0012 §Tier 1 (hysteresis))", async () => {
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      uiText("r1", "user", "hello"),
+      uiText("r2", "assistant", "world"),
+    ];
+    const target = 300;
+    const first = await compactUIMessages(msgs, {
+      ...baseOpts,
+      targetTokens: target,
+    });
+    expect(first.usedModelCall).toBe(true);
+
+    const second = await compactUIMessages(first.keptMessages, {
+      ...baseOpts,
+      targetTokens: target,
+      priorSummary: first.summaryText,
+    });
+    expect(second.usedModelCall).toBe(false); // already within target
+    expect(second.messagesDropped).toBe(0);
+  });
+
+  it("map-reduces an oversized prefix (ADR-0012 §Tier 1 (summarizer model & map-reduce))", async () => {
+    const summarize = vi.fn(noopSummarize);
+    const msgs = [
+      uiText("p1", "user", "Z".repeat(4000)), // ~1000 tokens of transcript
+      uiText("r1", "user", "hello"),
+      uiText("r2", "assistant", "world"),
+    ];
+    await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize,
+      targetTokens: 50,
+      summarizerWindow: 100, // 400-char chunks → several chunk calls + 1 reduce
+    });
+    expect(summarize.mock.calls.length).toBeGreaterThan(1);
+  });
+
+  it("Stage 2 prunes large tool results in kept (recent) messages", async () => {
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      uiTool("r1", "X".repeat(12000)), // big tool result in recent
+      uiText("r2", "user", "done"),
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 300,
+      minRecentPrunableChars: 5000, // 12000-char output exceeds threshold
+    });
+    expect(res.usedModelCall).toBe(true);
+    expect(res.keptMessages).toHaveLength(2); // r1 + r2
+    // Tool result in r1 should be trimmed (soft-trim produces head+tail, not full string)
+    const toolPart = res.keptMessages[0].parts?.find((p) =>
+      (p as { type: string }).type.startsWith("tool-"),
+    ) as { output?: string } | undefined;
+    expect(typeof toolPart?.output).toBe("string");
+    expect((toolPart?.output as string).length).toBeLessThan(12000);
+  });
+
+  it("Stage 2 does not prune recent tool results below minRecentPrunableChars", async () => {
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      uiTool("r1", "X".repeat(3000)), // below threshold of 20000
+      uiText("r2", "user", "done"),
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 300,
+      minRecentPrunableChars: 20000, // threshold above 3000 → no pruning
+    });
+    expect(res.usedModelCall).toBe(true);
+    const toolPart = res.keptMessages[0].parts?.find((p) =>
+      (p as { type: string }).type.startsWith("tool-"),
+    ) as { output?: string } | undefined;
+    // Output unchanged — 3000 chars below threshold
+    expect(toolPart?.output).toBe("X".repeat(3000));
+  });
+
+  it("prunes large recent tool results when the prefix is empty (no summary)", async () => {
+    // Whole history fits within keepRecentMessages (2) but a huge tool result
+    // pushes it over target. boundary=0 → empty prefix → no model call, but the
+    // outlier in recent must still be trimmed (Finding 1 gap).
+    const msgs = [
+      uiTool("r1", "X".repeat(12000)), // big tool result, no prefix to summarize
+      uiText("r2", "user", "done"),
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 300,
+      minRecentPrunableChars: 5000,
+    });
+    expect(res.usedModelCall).toBe(false); // empty prefix → no summarize
+    const toolPart = res.keptMessages[0].parts?.find((p) =>
+      (p as { type: string }).type.startsWith("tool-"),
+    ) as { output?: string } | undefined;
+    expect(typeof toolPart?.output).toBe("string");
+    expect((toolPart?.output as string).length).toBeLessThan(12000);
+  });
+
+  it("ADR-0012 §Hard window wall: keeps recent VERBATIM in the empty-prefix path when within inputBudget", async () => {
+    // Whole history fits within keepRecentMessages (2) → empty prefix, no model
+    // call. Over the soft target but under the wall → outlier must stay untouched.
+    const msgs = [
+      uiTool("r1", "X".repeat(12000)),
+      uiText("r2", "user", "done"),
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 300,
+      minRecentPrunableChars: 5000,
+      inputBudget: 100000, // wall far above → no recent trim
+    });
+    expect(res.usedModelCall).toBe(false);
+    const toolPart = res.keptMessages[0].parts?.find((p) =>
+      (p as { type: string }).type.startsWith("tool-"),
+    ) as { output?: string } | undefined;
+    expect(toolPart?.output).toBe("X".repeat(12000)); // untouched
+  });
+
+  it("warns (no wall) when Stage 2 result still exceeds 2× targetTokens after pruning", async () => {
+    const warn = vi.spyOn(logger, "warn").mockReturnValue(undefined);
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      // recent messages are huge text (not tool), cannot be pruned
+      uiText("r1", "user", "R".repeat(8000)),
+      uiText("r2", "assistant", "S".repeat(8000)),
+    ];
+    await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 50, // recent alone is ~4000 tokens → well over 2×50
+      // no inputBudget → warn falls back to the target*2 heuristic
+    });
+    expect(warn).toHaveBeenCalledWith(
+      expect.objectContaining({ targetTokens: 50 }),
+      expect.stringContaining("recent messages exceed the window"),
+    );
+    warn.mockRestore();
+  });
+
+  it("ADR-0012 §Hard window wall: does NOT warn on a soft-target miss when recent is under the wall", async () => {
+    const warn = vi.spyOn(logger, "warn").mockReturnValue(undefined);
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      uiText("r1", "user", "R".repeat(8000)),
+      uiText("r2", "assistant", "S".repeat(8000)),
+    ];
+    await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 50, // way over target...
+      inputBudget: 100000, // ...but well under the hard wall → no warn
+    });
+    expect(warn).not.toHaveBeenCalled();
+    warn.mockRestore();
+  });
+
+  it("ADR-0012 §Hard window wall: keeps recent tool results VERBATIM when within inputBudget", async () => {
+    // Over the soft target (300) so Stage 2 fires, but the kept view (summary +
+    // recent) stays under the hard wall → recent must NOT be trimmed.
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      uiTool("r1", "X".repeat(12000)), // ~3000 tokens in recent
+      uiText("r2", "user", "done"),
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 300,
+      minRecentPrunableChars: 5000,
+      inputBudget: 100000, // wall far above the kept view → no recent trim
+    });
+    expect(res.usedModelCall).toBe(true);
+    const toolPart = res.keptMessages[0].parts?.find((p) =>
+      (p as { type: string }).type.startsWith("tool-"),
+    ) as { output?: string } | undefined;
+    expect(toolPart?.output).toBe("X".repeat(12000)); // untouched
+  });
+
+  it("ADR-0012 §Hard window wall: trims recent (except newest) when the kept view breaches inputBudget", async () => {
+    // Two big tool results in recent; the kept view breaches the wall → trim the
+    // older one, exempt the single newest message even though it is bulky.
+    const msgs = [
+      uiText("p1", "user", "P".repeat(4000)),
+      uiText("p2", "assistant", "Q".repeat(4000)),
+      uiTool("r1", "X".repeat(12000)), // older recent → trimmed
+      uiTool("r2", "Y".repeat(12000)), // newest → exempt
+    ];
+    const res = await compactUIMessages(msgs, {
+      ...baseOpts,
+      summarize: noopSummarize,
+      targetTokens: 300,
+      minRecentPrunableChars: 5000,
+      inputBudget: 100, // wall well below the kept view → trim
+    });
+    expect(res.usedModelCall).toBe(true);
+    const out = (i: number) =>
+      (
+        res.keptMessages[i].parts?.find((p) =>
+          (p as { type: string }).type.startsWith("tool-"),
+        ) as { output?: string } | undefined
+      )?.output;
+    expect((out(0) as string).length).toBeLessThan(12000); // r1 trimmed
+    expect(out(1)).toBe("Y".repeat(12000)); // r2 (newest) exempt
+  });
+});
+
+describe("compactModelMessages (Tier 2 / recovery)", () => {
+  const baseOpts = {
+    keepRecentMessages: 2,
+    minPrunableChars: 2000,
+    summarize: noopSummarize,
+  };
+
+  it("is a no-op when within target", async () => {
+    const msgs: ModelMessage[] = [
+      { role: "user", content: "hi" },
+      { role: "assistant", content: "yo" },
+    ];
+    const res = await compactModelMessages(msgs, {
+      ...baseOpts,
+      targetTokens: 1000,
+    });
+    expect(res.usedModelCall).toBe(false);
+    expect(res.messages).toBe(msgs);
+  });
+
+  it("summarizes and prepends one synthetic message, preserving tool pairing", async () => {
+    const msgs: ModelMessage[] = [
+      { role: "user", content: "P".repeat(4000) },
+      {
+        role: "assistant",
+        content: [
+          { type: "tool-call", toolCallId: "t1", toolName: "f", input: {} },
+        ],
+      },
+      {
+        role: "tool",
+        content: [
+          {
+            type: "tool-result",
+            toolCallId: "t1",
+            toolName: "f",
+            output: { type: "json", value: { ok: true } },
+          },
+        ],
+      },
+      { role: "user", content: "recent" },
+    ];
+    const res = await compactModelMessages(msgs, {
+      ...baseOpts,
+      targetTokens: 50,
+    });
+    expect(res.usedModelCall).toBe(true);
+    // First message is the synthetic summary (user-framed).
+    expect(res.messages[0].role).toBe("user");
+    expect(JSON.stringify(res.messages[0].content)).toContain(
+      "Summary of earlier conversation",
+    );
+    // The assistant tool-call and its tool result stay adjacent (not split).
+    const roles = res.messages.map((m) => m.role);
+    const toolIdx = roles.indexOf("tool");
+    expect(roles[toolIdx - 1]).toBe("assistant");
+  });
+
+  it("force bypasses BOTH no-op gates so recovery never retries byte-identically (ADR-0012 §Recovery)", async () => {
+    // Estimator says we are within target AND nothing is prunable (small,
+    // non-bulky messages). Without force both the whole-message gate and the
+    // post-prune gate would no-op → recovery would retry the exact same prompt
+    // and fail again. force must push through to a real summarize.
+    const msgs: ModelMessage[] = [
+      { role: "user", content: "a" },
+      { role: "assistant", content: "b" },
+      { role: "user", content: "recent-1" },
+      { role: "assistant", content: "recent-2" },
+    ];
+    const res = await compactModelMessages(msgs, {
+      ...baseOpts,
+      targetTokens: 100000, // estimator is well under target
+      force: true,
+    });
+    expect(res.usedModelCall).toBe(true);
+    expect(res.messagesDropped).toBeGreaterThan(0);
+    expect(res.messages).not.toBe(msgs);
+  });
+
+  it("force with an empty prefix is a no-op, not a prompt-growing summary (ADR-0012 §Tier 1, model-side)", async () => {
+    // recent alone exceeds keepRecentMessages → prefix is empty. Summarizing
+    // nothing would ADD a synthetic message and grow the prompt, never
+    // converging. Surface the overflow instead.
+    const msgs: ModelMessage[] = [
+      { role: "user", content: "only-1" },
+      { role: "assistant", content: "only-2" },
+    ];
+    const res = await compactModelMessages(msgs, {
+      ...baseOpts,
+      keepRecentMessages: 2,
+      targetTokens: 1,
+      force: true,
+    });
+    expect(res.usedModelCall).toBe(false);
+    expect(res.messages.length).toBe(msgs.length);
+  });
+});
+
+// --- Slice 2c: Tier 1 orchestration -------------------------------------
+
+import {
+  applyTier1Compaction,
+  buildCompactionTraceMessage,
+  computeBudget,
+  invalidateCompaction,
+  affectedBelowWatermark,
+  summaryUIMessage,
+  DEFAULT_COMPACTION_CONFIG,
+  type Budget,
+  type CompactionConfig,
+} from "./compaction.ts";
+
+describe("buildCompactionTraceMessage (ADR-0012 §Force-compact on demand)", () => {
+  it("builds an assistant message with a completed compact_context tool part", () => {
+    const msg = buildCompactionTraceMessage(
+      { messagesDropped: 7, summaryExcerpt: "did things" },
+      "msg-abc",
+    );
+    expect(msg.id).toBe("msg-abc");
+    expect(msg.role).toBe("assistant");
+    expect(msg.parts).toHaveLength(1);
+    const part = msg.parts[0] as {
+      type: string;
+      state: string;
+      toolCallId: string;
+      output: unknown;
+    };
+    expect(part.type).toBe("tool-compact_context");
+    expect(part.state).toBe("output-available");
+    expect(part.toolCallId).toBe("msg-abc-call");
+    expect(part.output).toEqual({
+      messagesDropped: 7,
+      summaryExcerpt: "did things",
+    });
+  });
+
+  it("omits summaryExcerpt from the output when absent", () => {
+    const msg = buildCompactionTraceMessage({ messagesDropped: 1 }, "msg-x");
+    const part = msg.parts[0] as { output: unknown };
+    expect(part.output).toEqual({ messagesDropped: 1 });
+  });
+});
+
+function storeFromState(state: Partial<CompactionState>): FakeStore {
+  return new FakeStore(state);
+}
+
+const cfg = (over: Partial<CompactionConfig> = {}): CompactionConfig => ({
+  ...DEFAULT_COMPACTION_CONFIG,
+  keepRecentMessages: 2,
+  ...over,
+});
+
+describe("computeBudget (ADR-0012 §Tier 1 (budget math) — subtract both reserves)", () => {
+  it("subtracts output + safety reserve before applying ratios", () => {
+    const b = computeBudget(
+      10000,
+      2000,
+      cfg({ reserveRatio: 0.05, triggerRatio: 0.8, targetRatio: 0.5 }),
+    );
+    expect(b.inputBudget).toBe(7500); // 10000 - 2000 - 500
+    expect(b.triggerTokens).toBe(6000);
+    expect(b.targetTokens).toBe(3750);
+  });
+  it("uses a conservative output reserve when maxOutputTokens is unknown", () => {
+    const b = computeBudget(10000, undefined, cfg({ reserveRatio: 0.05 }));
+    expect(b.inputBudget).toBe(7000); // 10000 - min(4096, 2500) - 500
+  });
+
+  it("caps the output reserve at half the window so inputBudget can't collapse (ADR-0012 §Tier 1 (budget math))", () => {
+    // A bogus registry entry where max_output >= the input-scoped window would
+    // otherwise drive inputBudget toward 1 and thrash. The cap keeps it sane.
+    const b = computeBudget(10000, 20000, cfg({ reserveRatio: 0.05 }));
+    // reserve capped at 5000 (half), safety 500 → 10000 - 5000 - 500 = 4500.
+    expect(b.inputBudget).toBe(4500);
+  });
+});
+
+const bigText = (id: string, role: "user" | "assistant") =>
+  uiText(id, role, "X".repeat(4000));
+
+describe("applyTier1Compaction", () => {
+  const baseBudget: Budget = {
+    inputBudget: 100,
+    triggerTokens: 50,
+    targetTokens: 50,
+  };
+
+  it("under trigger: reconstructs the persisted view, no write", async () => {
+    const store = storeFromState({
+      version: 2,
+      summaryWatermark: "m2",
+      contextSummary: "PRIOR",
+    });
+    const messages = ["m1", "m2", "m3", "m4"].map((id) =>
+      uiText(id, "user", "hi"),
+    );
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 2,
+        summaryWatermark: "m2",
+        contextSummary: "PRIOR",
+        compactionDirty: false,
+      },
+      budget: {
+        inputBudget: 100000,
+        triggerTokens: 100000,
+        targetTokens: 50000,
+      },
+      config: cfg(),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+    });
+    expect(out.compacted).toBe(false);
+    expect(out.messages[0]).toEqual(summaryUIMessage("PRIOR")); // re-injected summary
+    expect(out.messages.map((m) => m.id)).toEqual([
+      "context-summary",
+      "m3",
+      "m4",
+    ]); // dropped ≤ watermark
+    expect(store.casCalls).toBe(0); // nothing persisted
+  });
+
+  it("over trigger: compacts, persists summary+watermark, clears dirty, fires event", async () => {
+    const store = storeFromState({ version: 0 });
+    const onEvent = vi.fn();
+    const messages = [
+      bigText("p1", "user"),
+      bigText("p2", "assistant"),
+      uiText("r1", "user", "a"),
+      uiText("r2", "assistant", "b"),
+    ];
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 0,
+        summaryWatermark: null,
+        contextSummary: null,
+        compactionDirty: false,
+      },
+      budget: baseBudget,
+      config: cfg(),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+      onEvent,
+    });
+    expect(out.compacted).toBe(true);
+    expect(store.state.contextSummary).toBe("SUMMARY");
+    expect(store.state.summaryWatermark).toBe("p2");
+    expect(store.state.compactionDirty).toBe(false);
+    expect(store.state.version).toBe(1);
+    expect(out.messages[0].id).toBe("context-summary");
+    expect(onEvent).toHaveBeenCalledOnce();
+    // ADR-0012 §Compaction trace in the timeline: a summary ran → a trace is surfaced with the dropped count and a
+    // summary excerpt.
+    expect(out.compactionTrace).toEqual({
+      messagesDropped: 2,
+      summaryExcerpt: "SUMMARY",
+    });
+  });
+
+  it("disabled + not dirty: no compaction even when over the trigger", async () => {
+    const store = storeFromState({ version: 0 });
+    const messages = [
+      bigText("p1", "user"),
+      bigText("p2", "assistant"),
+      uiText("r1", "user", "a"),
+    ];
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 0,
+        summaryWatermark: null,
+        contextSummary: null,
+        compactionDirty: false,
+      },
+      budget: baseBudget,
+      config: cfg({ compactionEnabled: false }),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+    });
+    expect(out.compacted).toBe(false);
+    expect(store.casCalls).toBe(0);
+  });
+
+  it("dirty forces compaction even when proactive is disabled (ADR-0012 §Recovery is the net recovery hand-off)", async () => {
+    const store = storeFromState({ version: 0, compactionDirty: true });
+    const messages = [
+      bigText("p1", "user"),
+      bigText("p2", "assistant"),
+      uiText("r1", "user", "a"),
+      uiText("r2", "assistant", "b"),
+    ];
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 0,
+        summaryWatermark: null,
+        contextSummary: null,
+        compactionDirty: true,
+      },
+      budget: baseBudget,
+      config: cfg({ compactionEnabled: false }),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+    });
+    expect(out.compacted).toBe(true);
+    expect(store.state.compactionDirty).toBe(false);
+  });
+
+  it("dirty but already within target: just clears the flag (no summary)", async () => {
+    const store = storeFromState({ version: 0, compactionDirty: true });
+    const messages = [
+      uiText("r1", "user", "a"),
+      uiText("r2", "assistant", "b"),
+    ];
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 0,
+        summaryWatermark: null,
+        contextSummary: null,
+        compactionDirty: true,
+      },
+      budget: {
+        inputBudget: 100000,
+        triggerTokens: 100000,
+        targetTokens: 100000,
+      },
+      config: cfg(),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+    });
+    expect(out.compacted).toBe(false);
+    expect(store.state.compactionDirty).toBe(false); // flag cleared
+    expect(store.state.contextSummary).toBeNull(); // no summary written
+    expect(store.state.version).toBe(1);
+    // ADR-0012 §Compaction trace in the timeline: no model summary ran → no trace (would be an empty timeline entry).
+    expect(out.compactionTrace).toBeUndefined();
+  });
+
+  it("under trigger: no trace surfaced", async () => {
+    const store = storeFromState({ version: 0 });
+    const messages = [uiText("r1", "user", "a")];
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 0,
+        summaryWatermark: null,
+        contextSummary: null,
+        compactionDirty: false,
+      },
+      budget: {
+        inputBudget: 100000,
+        triggerTokens: 100000,
+        targetTokens: 50000,
+      },
+      config: cfg(),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+    });
+    expect(out.compacted).toBe(false);
+    expect(out.compactionTrace).toBeUndefined();
+  });
+});
+
+describe("invalidateCompaction (ADR-0012 §Summary invalidation)", () => {
+  const ordered = ["m1", "m2", "m3", "m4"];
+
+  it("resets summary + watermark when a message at/below the watermark changes", async () => {
+    const store = storeFromState({
+      version: 5,
+      summaryWatermark: "m2",
+      contextSummary: "S",
+    });
+    const res = await invalidateCompaction(store, "c", ["m2"], ordered);
+    expect(res.status).toBe("applied");
+    expect(store.state.summaryWatermark).toBeNull();
+    expect(store.state.contextSummary).toBeNull();
+    expect(store.state.version).toBe(6); // bumped so a racing compaction loses (ADR-0012 §One durable writer)
+  });
+
+  it("is a no-op when the edit is entirely above the watermark", async () => {
+    const store = storeFromState({
+      version: 5,
+      summaryWatermark: "m2",
+      contextSummary: "S",
+    });
+    const res = await invalidateCompaction(store, "c", ["m4"], ordered);
+    expect(res).toEqual({ status: "skipped", reason: "no-op" });
+    expect(store.state.contextSummary).toBe("S");
+  });
+
+  it("resets when an affected message was deleted (missing from ordering)", async () => {
+    const store = storeFromState({
+      version: 1,
+      summaryWatermark: "m3",
+      contextSummary: "S",
+    });
+    const res = await invalidateCompaction(store, "c", ["gone"], ordered);
+    expect(res.status).toBe("applied");
+    expect(store.state.summaryWatermark).toBeNull();
+  });
+
+  it("is a no-op when there is no summary/watermark to invalidate", async () => {
+    const store = storeFromState({ version: 0 });
+    const res = await invalidateCompaction(store, "c", ["m1"], ordered);
+    expect(res).toEqual({ status: "skipped", reason: "no-op" });
+  });
+});
+
+describe("affectedBelowWatermark (ADR-0012 §Summary invalidation divergence detection)", () => {
+  const persisted = [
+    uiText("m1", "user", "one"),
+    uiText("m2", "assistant", "two"),
+    uiText("m3", "user", "three"),
+  ];
+
+  it("returns [] when the prefix is unchanged", () => {
+    const incoming = [
+      uiText("m1", "user", "one"),
+      uiText("m2", "assistant", "two"),
+      uiText("m3", "user", "x"),
+    ];
+    expect(affectedBelowWatermark(persisted, incoming, "m2")).toEqual([]);
+  });
+
+  it("flags a content edit at/below the watermark", () => {
+    const incoming = [
+      uiText("m1", "user", "EDITED"),
+      uiText("m2", "assistant", "two"),
+      uiText("m3", "user", "three"),
+    ];
+    expect(affectedBelowWatermark(persisted, incoming, "m2")).toEqual(["m1"]);
+  });
+
+  it("flags a deleted message below the watermark", () => {
+    const incoming = [
+      uiText("m2", "assistant", "two"),
+      uiText("m3", "user", "three"),
+    ];
+    expect(affectedBelowWatermark(persisted, incoming, "m2")).toEqual(["m1"]);
+  });
+
+  it("flags when the watermark message itself is gone from canonical history", () => {
+    expect(affectedBelowWatermark(persisted, persisted, "ghost")).toEqual([
+      "ghost",
+    ]);
+  });
+
+  it("ignores edits strictly above the watermark", () => {
+    const incoming = [
+      uiText("m1", "user", "one"),
+      uiText("m2", "assistant", "two"),
+      uiText("m3", "user", "CHANGED"),
+    ];
+    expect(affectedBelowWatermark(persisted, incoming, "m2")).toEqual([]);
+  });
+});
+
+// --- ADR-0012 §Tier 1 (trigger projection) / §Token estimation (cold-start margin): trigger projection + recovery dirty-flag producer -----
+
+import {
+  projectTier1Tokens,
+  setCompactionDirty,
+  COLD_START_MARGIN,
+} from "./compaction.ts";
+
+describe("projectTier1Tokens (ADR-0012 §Tier 1 (trigger projection) / §Token estimation (cold-start margin))", () => {
+  it("applies the cold-start margin when no provider baseline exists (ADR-0012 §Token estimation (cold-start margin))", () => {
+    expect(
+      projectTier1Tokens({ messageTokens: 100, priorSummaryTokens: 0 }),
+    ).toBe(Math.ceil(100 * COLD_START_MARGIN));
+  });
+
+  it("counts the per-turn overhead toward the trigger (ADR-0012 §Tier 1 (trigger projection))", () => {
+    expect(
+      projectTier1Tokens({
+        messageTokens: 100,
+        priorSummaryTokens: 20,
+        overheadTokens: 50,
+      }),
+    ).toBe(Math.ceil(170 * COLD_START_MARGIN));
+  });
+
+  it("uses the provider-reported count as a floor when available", () => {
+    // The observed live gap: char/4 said ~986, the provider said 8888.
+    expect(
+      projectTier1Tokens({
+        messageTokens: 986,
+        priorSummaryTokens: 0,
+        lastInputTokens: 8888,
+      }),
+    ).toBe(8888);
+  });
+
+  it("drops the margin when a provider baseline is present", () => {
+    expect(
+      projectTier1Tokens({
+        messageTokens: 100,
+        priorSummaryTokens: 0,
+        lastInputTokens: 50,
+      }),
+    ).toBe(100);
+  });
+
+  it("treats a 0 provider count as no baseline and keeps the margin (ADR-0012 §Tier 1 (trigger projection))", () => {
+    // Usage-less providers persist contextTokens=0; a bare `== null` check would
+    // skip the margin AND no-op the max(), leaving the raw char/4 with no buffer.
+    expect(
+      projectTier1Tokens({
+        messageTokens: 100,
+        priorSummaryTokens: 0,
+        lastInputTokens: 0,
+      }),
+    ).toBe(Math.ceil(100 * COLD_START_MARGIN));
+  });
+});
+
+describe("applyTier1Compaction — overhead in the trigger (ADR-0012 §Tier 1 (trigger projection))", () => {
+  it("fires on system/tool overhead even when messages alone are under trigger", async () => {
+    const store = storeFromState({ version: 0 });
+    // ~4 tokens of messages — far under the 50-token trigger on their own.
+    const messages = [
+      uiText("p1", "user", "aaaa"),
+      uiText("p2", "assistant", "bbbb"),
+      uiText("r1", "user", "cccc"),
+      uiText("r2", "assistant", "dddd"),
+    ];
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 0,
+        summaryWatermark: null,
+        contextSummary: null,
+        compactionDirty: false,
+      },
+      budget: { inputBudget: 100, triggerTokens: 50, targetTokens: 25 },
+      config: cfg(),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+      overheadTokens: 60, // tool schemas + system prompt dominate
+    });
+    expect(out.compacted).toBe(true);
+    expect(store.state.summaryWatermark).toBe("p2");
+  });
+
+  it("does not fire when messages + overhead stay under the trigger", async () => {
+    const store = storeFromState({ version: 0 });
+    const messages = [
+      uiText("r1", "user", "cccc"),
+      uiText("r2", "assistant", "dddd"),
+    ];
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages,
+      state: {
+        version: 0,
+        summaryWatermark: null,
+        contextSummary: null,
+        compactionDirty: false,
+      },
+      budget: { inputBudget: 100, triggerTokens: 50, targetTokens: 25 },
+      config: cfg(),
+      imageProvider: "default",
+      summarize: noopSummarize,
+      store,
+      overheadTokens: 10,
+    });
+    expect(out.compacted).toBe(false);
+    expect(store.casCalls).toBe(0);
+  });
+});
+
+describe("setCompactionDirty (ADR-0012 §Recovery producer)", () => {
+  it("sets the flag through the CAS writer", async () => {
+    const store = storeFromState({ version: 3 });
+    const res = await setCompactionDirty(store, "c");
+    expect(res).toEqual({ status: "applied", version: 4 });
+    expect(store.state.compactionDirty).toBe(true);
+  });
+
+  it("is a no-op when already dirty (no version churn)", async () => {
+    const store = storeFromState({ version: 3, compactionDirty: true });
+    const res = await setCompactionDirty(store, "c");
+    expect(res).toEqual({ status: "skipped", reason: "no-op" });
+    expect(store.casCalls).toBe(0);
+    expect(store.state.version).toBe(3);
+  });
+
+  it("never touches summary or watermark (recovery only flags)", async () => {
+    const store = storeFromState({
+      version: 1,
+      contextSummary: "KEEP",
+      summaryWatermark: "m7",
+    });
+    await setCompactionDirty(store, "c");
+    expect(store.state.contextSummary).toBe("KEEP");
+    expect(store.state.summaryWatermark).toBe("m7");
+  });
+});
+
+// --- ADR-0012 §Stage 0 — context editing ---------------------------
+
+/** Tool message with a named tool and arbitrary output. */
+const toolMsg = (
+  id: string,
+  name: string,
+  output: unknown,
+): PlatypusUIMessage =>
+  ({
+    id,
+    role: "assistant",
+    parts: [
+      {
+        type: `tool-${name}`,
+        toolCallId: `${id}-call`,
+        state: "output-available",
+        input: { q: "x" },
+        output,
+      },
+    ],
+  }) as unknown as PlatypusUIMessage;
+
+const bigOut = (n = 200) => "D".repeat(n);
+const outputOf = (m: PlatypusUIMessage) =>
+  (m.parts[0] as { output?: unknown }).output;
+
+describe("editToolResults (Stage 0 — context editing)", () => {
+  const opts = { keepRecentToolResults: 1, minEditableToolChars: 100 };
+
+  it("elides OLD bulky results past the keep-window; keeps recent + all text", () => {
+    const messages = [
+      toolMsg("t1", "search", bigOut()),
+      uiText("u1", "user", "carry on"),
+      toolMsg("t2", "search", bigOut()),
+      toolMsg("t3", "search", bigOut()),
+    ];
+    const res = editToolResults(messages, opts);
+    // 3 results, keep last 1 (t3) → t1, t2 are candidates and both bulky.
+    expect(res.resultsElided).toBe(2);
+    expect(outputOf(res.messages[0])).toBe(
+      elidedToolPlaceholder("search", 200),
+    );
+    expect(outputOf(res.messages[2])).toBe(
+      elidedToolPlaceholder("search", 200),
+    );
+    expect(outputOf(res.messages[3])).toBe(bigOut()); // t3 within keep-window
+    expect(res.messages[1]).toBe(messages[1]); // text untouched (same ref)
+  });
+
+  it("keeps results within keepRecentToolResults verbatim", () => {
+    const messages = [
+      toolMsg("t1", "f", bigOut()),
+      toolMsg("t2", "f", bigOut()),
+      toolMsg("t3", "f", bigOut()),
+    ];
+    const res = editToolResults(messages, {
+      keepRecentToolResults: 2,
+      minEditableToolChars: 100,
+    });
+    expect(res.resultsElided).toBe(1); // only t1
+    expect(outputOf(res.messages[0])).toBe(elidedToolPlaceholder("f", 200));
+    expect(outputOf(res.messages[1])).toBe(bigOut());
+    expect(outputOf(res.messages[2])).toBe(bigOut());
+  });
+
+  it("exempts the newest message even with keepRecentToolResults=0", () => {
+    const messages = [
+      toolMsg("t1", "f", bigOut()),
+      toolMsg("t2", "f", bigOut()),
+    ];
+    const res = editToolResults(messages, {
+      keepRecentToolResults: 0,
+      minEditableToolChars: 100,
+    });
+    expect(res.resultsElided).toBe(1); // t1 only; t2 is the newest message
+    expect(outputOf(res.messages[0])).toBe(elidedToolPlaceholder("f", 200));
+    expect(outputOf(res.messages[1])).toBe(bigOut());
+  });
+
+  it("size gate: leaves results at/under minEditableToolChars untouched", () => {
+    const messages = [
+      toolMsg("small", "f", bigOut(50)), // ≤ gate
+      toolMsg("big", "f", bigOut(200)), // > gate
+      uiText("u1", "user", "tail"), // newest, so both tools are candidates
+    ];
+    const res = editToolResults(messages, {
+      keepRecentToolResults: 0,
+      minEditableToolChars: 100,
+    });
+    expect(res.resultsElided).toBe(1);
+    expect(outputOf(res.messages[0])).toBe(bigOut(50)); // small kept
+    expect(outputOf(res.messages[1])).toBe(elidedToolPlaceholder("f", 200));
+  });
+
+  it("pairing: keeps the tool-call part, swaps only the output body", () => {
+    const messages = [
+      toolMsg("t1", "search", bigOut()),
+      uiText("u1", "user", "x"),
+    ];
+    const res = editToolResults(messages, {
+      keepRecentToolResults: 0,
+      minEditableToolChars: 100,
+    });
+    const part = res.messages[0].parts[0] as Record<string, unknown>;
+    expect(part.type).toBe("tool-search");
+    expect(part.toolCallId).toBe("t1-call");
+    expect(part.input).toEqual({ q: "x" });
+    expect(part.state).toBe("output-available");
+    expect(part.output).toBe(elidedToolPlaceholder("search", 200));
+  });
+
+  it("is deterministic/monotonic: feeding the edited view back elides nothing new", () => {
+    const messages = [
+      toolMsg("t1", "f", bigOut()),
+      toolMsg("t2", "f", bigOut()),
+      uiText("u1", "user", "tail"),
+    ];
+    const first = editToolResults(messages, opts);
+    expect(first.resultsElided).toBeGreaterThan(0);
+    const second = editToolResults(first.messages, opts);
+    expect(second.resultsElided).toBe(0);
+    expect(second.messages).toBe(first.messages); // stable ⇒ cache-friendly
+  });
+
+  it("grow-guard: never elides when the placeholder would be longer than the output", () => {
+    // Tiny gate picks a result just over it, but shorter than the ~140-char
+    // placeholder ⇒ eliding would inflate the prompt. Must skip (no negative
+    // reclaim, no churn, no-op identity).
+    const shortOut = "D".repeat(30); // > gate 10, < placeholder length
+    const messages = [
+      toolMsg("t1", "f", shortOut),
+      uiText("u1", "user", "tail"),
+    ];
+    const res = editToolResults(messages, {
+      keepRecentToolResults: 0,
+      minEditableToolChars: 10,
+    });
+    expect(res.resultsElided).toBe(0);
+    expect(res.charsReclaimed).toBe(0);
+    expect(res.messages).toBe(messages);
+  });
+
+  it("no-op identity: returns the same array reference when nothing qualifies", () => {
+    const messages = [
+      toolMsg("t1", "f", bigOut(50)), // under gate
+      uiText("u1", "user", "hi"),
+    ];
+    const res = editToolResults(messages, opts);
+    expect(res.resultsElided).toBe(0);
+    expect(res.charsReclaimed).toBe(0);
+    expect(res.messages).toBe(messages);
+  });
+});
+
+describe("applyTier1Compaction — Stage 0 avoids summarization (ADR-0012 §Stage 0 — context editing)", () => {
+  const hugeTool = (id: string) => toolMsg(id, "dump", "Z".repeat(8000));
+  // High minPrunableChars so Stage 1 prefix-pruning does NOT rescue the no-edit
+  // case — it must reach Stage 2 (the model call) to make Stage 0's avoidance of
+  // it the real discriminator.
+  const editCfg = cfg({
+    keepRecentToolResults: 1,
+    minEditableToolChars: 100,
+    keepRecentMessages: 2,
+    minPrunableChars: 100000,
+  });
+  // Trigger sits between the post-edit size (~one big tool left) and the
+  // pre-edit size (~two big tools).
+  const budget: Budget = {
+    inputBudget: 100000,
+    triggerTokens: 3000,
+    targetTokens: 1500,
+  };
+  const state: CompactionState = {
+    version: 0,
+    summaryWatermark: null,
+    contextSummary: null,
+    compactionDirty: false,
+  };
+  const messages = () => [
+    hugeTool("bt1"),
+    hugeTool("bt2"),
+    uiText("r1", "user", "ok"),
+    uiText("r2", "assistant", "done"),
+  ];
+
+  it("elides the old dump, drops under trigger, skips the model call", async () => {
+    const summarize = vi.fn(() => Promise.resolve("SUMMARY"));
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages: messages(),
+      state,
+      budget,
+      config: editCfg,
+      imageProvider: "default",
+      summarize,
+      store: storeFromState({ version: 0 }),
+    });
+    expect(summarize).not.toHaveBeenCalled();
+    expect(out.compacted).toBe(false);
+    // Stage 0 still leaned the view: the old dump (bt1) is a placeholder, the
+    // recent dump (bt2, within keep) stays verbatim.
+    expect(outputOf(out.messages[0])).toBe(elidedToolPlaceholder("dump", 8000));
+    expect(outputOf(out.messages[1])).toBe("Z".repeat(8000));
+  });
+
+  it("without context editing the same chat triggers summarization", async () => {
+    const summarize = vi.fn(() => Promise.resolve("SUMMARY"));
+    const out = await applyTier1Compaction({
+      chatId: "c",
+      messages: messages(),
+      state,
+      budget,
+      config: cfg({
+        contextEditingEnabled: false,
+        keepRecentMessages: 2,
+        minPrunableChars: 100000,
+      }),
+      imageProvider: "default",
+      summarize,
+      store: storeFromState({ version: 0 }),
+    });
+    expect(summarize).toHaveBeenCalledOnce();
+    expect(out.compacted).toBe(true);
+  });
+});
diff --git a/apps/backend/src/runs/compaction.ts b/apps/backend/src/runs/compaction.ts
new file mode 100644
index 00000000..bc13df15
--- /dev/null
+++ b/apps/backend/src/runs/compaction.ts
@@ -0,0 +1,1497 @@
+/**
+ * Context compaction (ADR-0012 §Tier 1 / §Tier 2).
+ *
+ * This module owns durable compaction state and the message-shaping primitives.
+ * Slice 2a (this section) is the **single durable writer** (principle ADR-0012 §One durable writer): every
+ * mutation of `summaryWatermark` / `contextSummary` / `compactionDirty` flows
+ * through {@link CompactionStore.casWrite}, a version-gated compare-and-swap.
+ *
+ * Why versioned CAS and not "compare the watermark value" (ADR-0012 §One durable writer): history
+ * edits (ADR-0012 §Tier 1 invalidation) move the watermark **backward**. A loser that compared
+ * watermark values could mistake a reset for "not yet advanced" and write a stale
+ * summary over mutated history. Deciding by `version` removes the monotonicity
+ * assumption entirely — any concurrent mutation bumps the version, so a racing
+ * write simply loses the CAS and re-reads the truth.
+ */
+
+import { and, eq } from "drizzle-orm";
+import type { ModelMessage, PrepareStepFunction } from "ai";
+import { db } from "../index.ts";
+import { chat as chatTable } from "../db/schema.ts";
+import { logger } from "../logger.ts";
+import type { PlatypusUIMessage } from "../types.ts";
+import {
+  estimateTokens,
+  stableStringify,
+  uiMessagesToCountUnits,
+  modelMessagesToCountUnits,
+  CHARS_PER_TOKEN,
+  type ImageProvider,
+} from "./token-estimate.ts";
+
+/** Durable compaction state on the chat row. */
+export type CompactionState = {
+  version: number;
+  summaryWatermark: string | null;
+  contextSummary: string | null;
+  compactionDirty: boolean;
+};
+
+/**
+ * A patch to the compaction fields. Only the keys present are written; absent
+ * keys are left untouched. `version` is always bumped by the writer (not here).
+ */
+export type WatermarkPatch = {
+  watermark?: string | null;
+  summary?: string | null;
+  dirty?: boolean;
+};
+
+/**
+ * The durable-state seam. Production wires this to Drizzle
+ * ({@link drizzleCompactionStore}); tests pass an in-memory implementation so
+ * the CAS algorithm is exercised without Postgres.
+ */
+export type CompactionStore = {
+  readState(chatId: string): Promise<CompactionState | null>;
+  /**
+   * Version-gated compare-and-swap. Applies `patch` and sets
+   * `version = expectVersion + 1` **only if** the row's current version still
+   * equals `expectVersion`. Returns true iff exactly one row was updated
+   * (i.e. this writer won). The single durable writer (ADR-0012 §One durable writer).
+   */
+  casWrite(
+    chatId: string,
+    expectVersion: number,
+    patch: WatermarkPatch,
+  ): Promise<boolean>;
+};
+
+export const drizzleCompactionStore: CompactionStore = {
+  async readState(chatId) {
+    const rows = await db
+      .select({
+        version: chatTable.version,
+        summaryWatermark: chatTable.summaryWatermark,
+        contextSummary: chatTable.contextSummary,
+        compactionDirty: chatTable.compactionDirty,
+      })
+      .from(chatTable)
+      .where(eq(chatTable.id, chatId))
+      .limit(1);
+    return rows[0] ?? null;
+  },
+
+  async casWrite(chatId, expectVersion, patch) {
+    const set: Record<string, unknown> = {
+      version: expectVersion + 1,
+      updatedAt: new Date(),
+    };
+    // Only touch the fields named in the patch — `in` so an explicit null
+    // (clear summary / reset watermark) is distinguishable from "leave alone".
+    if ("watermark" in patch) set.summaryWatermark = patch.watermark;
+    if ("summary" in patch) set.contextSummary = patch.summary;
+    if ("dirty" in patch) set.compactionDirty = patch.dirty;
+
+    const updated = await db
+      .update(chatTable)
+      .set(set)
+      .where(
+        and(eq(chatTable.id, chatId), eq(chatTable.version, expectVersion)),
+      )
+      .returning({ id: chatTable.id });
+    return updated.length === 1;
+  },
+};
+
+/** Outcome of {@link commitWatermark}. */
+export type CommitResult =
+  | { status: "applied"; version: number }
+  | { status: "skipped"; reason: "no-op" | "covered" | "contended" };
+
+/**
+ * Decision an attempt makes against the freshly-read state: either write a patch
+ * or skip (a no-op, or because a concurrent winner already covered this work).
+ */
+export type WatermarkDecision =
+  | { kind: "write"; patch: WatermarkPatch }
+  | { kind: "skip"; reason: "no-op" | "covered" };
+
+/**
+ * The single entry point for mutating compaction state (ADR-0012 §One durable writer).
+ *
+ * Reads the current state, asks `decide` what to do, and CAS-writes it. On a
+ * CAS conflict it re-reads and retries the decision **once**; a second conflict
+ * terminates as `skipped: "contended"` — never a recompute loop, so there is no
+ * livelock. Because `decide` is re-run against the re-read state, a racing
+ * invalidation (which bumps version + resets the watermark) is seen on the
+ * retry, and `decide` can choose to skip rather than write a stale summary.
+ *
+ * `decide` returning `skip: "covered"` means a winner already did this work; the
+ * caller should pass a patch that also clears `compactionDirty` in that branch
+ * if it wants the flag cleared (it is just another field on the patch).
+ */
+export async function commitWatermark(
+  store: CompactionStore,
+  chatId: string,
+  decide: (state: CompactionState) => WatermarkDecision,
+): Promise<CommitResult> {
+  const MAX_ATTEMPTS = 2;
+  for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
+    const state = await store.readState(chatId);
+    if (!state) return { status: "skipped", reason: "no-op" };
+
+    const decision = decide(state);
+    if (decision.kind === "skip") {
+      return { status: "skipped", reason: decision.reason };
+    }
+
+    const won = await store.casWrite(chatId, state.version, decision.patch);
+    if (won) return { status: "applied", version: state.version + 1 };
+    // Lost the CAS — a concurrent writer moved the version. Loop to re-read and
+    // re-decide. The decision compares VERSION (via the re-read), not watermark
+    // values, so a backward watermark reset cannot be misread (ADR-0012 §One durable writer). The metric
+    // gates whether the read→summarize→write contention note ever needs a fix.
+    logger.info(
+      { metric: "cas.conflict", chatId, attempt, version: state.version },
+      "cas.conflict",
+    );
+  }
+
+  logger.warn(
+    { metric: "cas.conflict", chatId, contended: true },
+    "compaction CAS contended past retry — skipping (safe no-op)",
+  );
+  return { status: "skipped", reason: "contended" };
+}
+
+// ===========================================================================
+// Slice 2b — compaction primitives (the message-shaping leaves)
+//
+// Two adapters share the same staged, cheap-first strategy (LibreChat pattern):
+//   Stage 1 — prune bulky tool results (no model call). Often enough.
+//   Stage 2 — summarize the older prefix into one synthetic summary (model call).
+// `compactUIMessages` (Tier 1, durable) and `compactModelMessages` (Tier 2 +
+// recovery, throwaway) differ only in message shape and the tool-pairing rule.
+// Token counting is the ONE estimator from token-estimate.ts (ADR-0012 §One estimator).
+// ===========================================================================
+
+/** Summarizes a transcript into a compact paragraph. Injected (the task model). */
+export type Summarize = (text: string) => Promise<string>;
+
+/** Rough token count of a bare string (summary text) — the same char/4 rule. */
+function textTokens(text: string): number {
+  return Math.ceil(text.length / CHARS_PER_TOKEN);
+}
+
+/**
+ * Soft-trims an over-long string to head+tail with an elision marker, so a bulky
+ * tool result keeps some signal instead of vanishing entirely.
+ */
+export function softTrim(text: string, keepEachSide = 500): string {
+  if (text.length <= keepEachSide * 2) return text;
+  const head = text.slice(0, keepEachSide);
+  const tail = text.slice(-keepEachSide);
+  const elided = text.length - keepEachSide * 2;
+  return `${head}\n…[elided ${elided} chars]…\n${tail}`;
+}
+
+/**
+ * Picks the index splitting `prefix = [0, boundary)` from `recent = [boundary,
+ * total)`. Starts at `total - keepRecent`, then walks backward while the
+ * boundary is unsafe so a tool-call/result pair is never split (ADR-0012 §Tier 1).
+ */
+export function pickKeepBoundary(
+  total: number,
+  keepRecent: number,
+  isSafeBoundary: (index: number) => boolean,
+): number {
+  let boundary = Math.max(0, total - keepRecent);
+  while (boundary > 0 && !isSafeBoundary(boundary)) boundary--;
+  return boundary;
+}
+
+// --- Tier 1: UIMessage shape ---------------------------------------------
+
+/**
+ * Prunes bulky tool-result outputs in a UIMessage in place on a shallow copy.
+ * The tool part is kept (never dropped — the assistant tool message is atomic,
+ * ADR-0012 §Tier 1); only its `output` is soft-trimmed. Returns the (possibly) pruned message.
+ */
+function pruneUIMessage(
+  message: PlatypusUIMessage,
+  minPrunableChars: number,
+): { message: PlatypusUIMessage; changed: boolean } {
+  let changed = false;
+  const parts = (message.parts ?? []).map((part) => {
+    const anyPart = part as { type: string; output?: unknown };
+    const isTool =
+      anyPart.type === "dynamic-tool" || anyPart.type.startsWith("tool-");
+    if (!isTool || anyPart.output === undefined) return part;
+    const serialized =
+      typeof anyPart.output === "string"
+        ? anyPart.output
+        : JSON.stringify(anyPart.output);
+    if (serialized.length <= minPrunableChars) return part;
+    changed = true;
+    return { ...anyPart, output: softTrim(serialized) };
+  });
+  return changed
+    ? { message: { ...message, parts } as PlatypusUIMessage, changed }
+    : { message, changed };
+}
+
+/**
+ * Placeholder body for an elided tool result (ADR-0012 §Stage 0 — context editing).
+ * LLM-AGNOSTIC: Platypus may run small/weak background models, so the string is
+ * EXPLICIT and self-describing. A terse marker ("[Old tool result content
+ * cleared]") assumes the model infers it can re-call the tool; a small model may
+ * not. Names the tool + elided size so the model can decide to re-run it, and is
+ * short enough that Stage 1 / the hard window wall never re-trim it.
+ */
+const ELIDED_PLACEHOLDER_PREFIX = '[Tool result for "';
+
+export function elidedToolPlaceholder(toolName: string, chars: number): string {
+  return `${ELIDED_PLACEHOLDER_PREFIX}${toolName}" omitted to save context (${chars} chars). The full result is still available — call the tool again with the same input if you need it.]`;
+}
+
+export type EditToolResultsOptions = {
+  /** Exempt the last N tool results (most recent) from elision. */
+  keepRecentToolResults: number;
+  /** Only elide a tool result whose serialized output exceeds this many chars. */
+  minEditableToolChars: number;
+};
+
+export type EditToolResultsResult = {
+  messages: PlatypusUIMessage[];
+  resultsElided: number;
+  /** Net chars removed (original output length − placeholder length), for metrics. */
+  charsReclaimed: number;
+};
+
+/**
+ * Stage 0 (ADR-0012 §Stage 0 — context editing; Anthropic `clear_tool_uses`
+ * equivalent): replaces the `output` of OLD bulky tool-result parts with a short
+ * placeholder, keeping the tool part itself (pairing) and ALL text parts intact.
+ * Pure + deterministic — no model call, recomputed from raw messages each turn by
+ * recency, so it needs no durable state (ADR-0012 §View, not delete: raw `chat.messages` is untouched, the
+ * full result stays for UI/audit).
+ *
+ * Recency is by COUNT of tool results (we have no clean turn id): the last
+ * `keepRecentToolResults` results are exempt, and the newest message is exempt
+ * regardless (same invariant as ADR-0012 §Hard window wall). A result is elided only when
+ * its serialized `output` exceeds `minEditableToolChars` — the size gate ≈
+ * Anthropic's `clear_at_least`, so trivial results never churn the prompt cache.
+ *
+ * Monotonic + deterministic ⇒ cache-friendly: a result is elided the turn it ages
+ * past the keep-window and stays elided. Returns the SAME array reference when
+ * nothing qualified, so callers can skip a re-estimate.
+ */
+export function editToolResults(
+  messages: PlatypusUIMessage[],
+  opts: EditToolResultsOptions,
+): EditToolResultsResult {
+  // Enumerate every tool-result-bearing part in order so "keep the last N" is a
+  // simple tail slice. A single message can carry several tool parts.
+  const toolResultLocs: Array<{ mi: number; pi: number }> = [];
+  messages.forEach((m, mi) => {
+    (m.parts ?? []).forEach((part, pi) => {
+      const ap = part as { type: string; output?: unknown };
+      const isTool = ap.type === "dynamic-tool" || ap.type.startsWith("tool-");
+      if (isTool && ap.output !== undefined) toolResultLocs.push({ mi, pi });
+    });
+  });
+
+  // Candidates for elision = all but the last `keepRecentToolResults`; the newest
+  // MESSAGE is exempt regardless (ADR-0012 §Hard window wall invariant). Decide the
+  // FULL elision policy here (recency + size gate + idempotency + grow-guard) and
+  // record the precomputed placeholder, so the rewrite map below fires only when
+  // there is real work — and never allocates a copy for a pure no-op.
+  const keepFrom = Math.max(
+    0,
+    toolResultLocs.length - opts.keepRecentToolResults,
+  );
+  const newestMessageIndex = messages.length - 1;
+  const elideAt = new Map<string, string>(); // "mi:pi" -> placeholder
+  let charsReclaimed = 0;
+  for (let k = 0; k < keepFrom; k++) {
+    const loc = toolResultLocs[k];
+    if (loc.mi === newestMessageIndex) continue; // newest message exempt
+    const ap = (messages[loc.mi].parts ?? [])[loc.pi] as {
+      type: string;
+      output?: unknown;
+      toolName?: string;
+    };
+    const serialized =
+      typeof ap.output === "string" ? ap.output : JSON.stringify(ap.output);
+    // Size gate (≈ clear_at_least): leave trivial results untouched — no churn.
+    if (serialized.length <= opts.minEditableToolChars) continue;
+    // Idempotency guard: never re-elide our own placeholder. At the default gate
+    // (50k) the ~150-char placeholder is far below it, but a misconfigured tiny
+    // gate would otherwise re-elide it every turn. Keeps this monotonic.
+    if (
+      typeof ap.output === "string" &&
+      ap.output.startsWith(ELIDED_PLACEHOLDER_PREFIX)
+    ) {
+      continue;
+    }
+    const toolName =
+      ap.type === "dynamic-tool"
+        ? (ap.toolName ?? "unknown")
+        : ap.type.slice("tool-".length);
+    const placeholder = elidedToolPlaceholder(toolName, serialized.length);
+    // Grow-guard: a tiny gate could pick a result shorter than the placeholder;
+    // eliding would INFLATE the prompt (negative reclaim). Skip — never grow.
+    if (placeholder.length >= serialized.length) continue;
+    elideAt.set(`${loc.mi}:${loc.pi}`, placeholder);
+    charsReclaimed += serialized.length - placeholder.length;
+  }
+
+  // Nothing truly qualified ⇒ return the original reference so callers skip the
+  // re-estimate (cache-friendly no-op) and we allocate no copy.
+  if (elideAt.size === 0) {
+    return { messages, resultsElided: 0, charsReclaimed: 0 };
+  }
+
+  const out = messages.map((m, mi) => {
+    const parts = m.parts ?? [];
+    if (!parts.some((_, pi) => elideAt.has(`${mi}:${pi}`))) return m;
+    const newParts = parts.map((part, pi) => {
+      const placeholder = elideAt.get(`${mi}:${pi}`);
+      if (placeholder === undefined) return part;
+      const ap = part as { output?: unknown };
+      return { ...ap, output: placeholder };
+    });
+    return { ...m, parts: newParts } as PlatypusUIMessage;
+  });
+
+  return { messages: out, resultsElided: elideAt.size, charsReclaimed };
+}
+
+/** Builds a readable transcript of UIMessages for the summarizer. */
+/** Renders each message to its own transcript string (one entry per message), so
+ * the map-reduce summarizer can chunk on message boundaries and never split a
+ * single message mid-content (ADR-0012 §Tier 1 map-reduce). */
+function renderUIMessageList(messages: PlatypusUIMessage[]): string[] {
+  return messages.map((m) => {
+    const text = (m.parts ?? [])
+      .map((p) => {
+        const ap = p as { type: string; text?: string; output?: unknown };
+        if (ap.type === "text") return ap.text ?? "";
+        if (ap.type === "dynamic-tool" || ap.type.startsWith("tool-")) {
+          const out =
+            typeof ap.output === "string"
+              ? ap.output
+              : ap.output !== undefined
+                ? JSON.stringify(ap.output)
+                : "";
+          return `[tool ${ap.type}] ${softTrim(out, 200)}`;
+        }
+        return "";
+      })
+      .filter(Boolean)
+      .join("\n");
+    return `${m.role}: ${text}`;
+  });
+}
+
+export type UICompactOptions = {
+  /** Reduce the model view to at most this many tokens (hysteresis target). */
+  targetTokens: number;
+  keepRecentMessages: number;
+  minPrunableChars: number;
+  /** Threshold for pruning tool results in kept (recent) messages after Stage 2.
+   * Defaults to minPrunableChars * 5 when omitted. */
+  minRecentPrunableChars?: number;
+  /**
+   * The HARD window wall (ADR-0012 §Hard window wall): the kept view's tokens
+   * above which the call would actually overflow (already net of per-turn
+   * overhead by the caller). Recent (kept) tool results are trimmed ONLY when
+   * the kept view breaches this wall — a mere `targetTokens` (hysteresis) miss
+   * is cheap (it re-compacts next turn) and is not worth gutting active data the
+   * user is asking about. The single newest message is always exempt regardless.
+   * When omitted, recent results are always trimmed once over target (the
+   * behaviour predating ADR-0012 §Hard window wall) — safer than never trimming for callers that cannot
+   * supply the wall.
+   */
+  inputBudget?: number;
+  imageProvider?: ImageProvider;
+  /** Existing durable summary to fold the new prefix into (incremental). */
+  priorSummary?: string | null;
+  summarize: Summarize;
+  /** Token budget of one summarize call; larger prefixes are map-reduced (ADR-0012 §Tier 1 (summarizer model & map-reduce)). */
+  summarizerWindow?: number;
+  /**
+   * Bypass the no-op estimate gate and force compaction even when char/4 says
+   * we are within budget. Used for dirty-forced Tier 1 (ADR-0012 §Recovery): recovery sets
+   * the dirty flag AFTER a provider rejection, so the estimator already failed;
+   * re-using it as the no-op gate causes an infinite overflow→dirty→no-op loop.
+   */
+  force?: boolean;
+  /**
+   * Pre-computed estimate of `messages`. The caller's trigger projection
+   * already ran the char/4 pass over this exact set, so reuse it instead of
+   * re-estimating the full history a second time on the hot path.
+   */
+  knownEstimate?: number;
+};
+
+export type UICompactionResult = {
+  /** Messages to send to the model (recent verbatim; pruned prefix if no summary). */
+  keptMessages: PlatypusUIMessage[];
+  /** New folded summary, or unchanged prior summary, or null. */
+  summaryText: string | null;
+  /** Id of the last message folded into the summary (the new watermark), or null. */
+  watermarkId: string | null;
+  messagesDropped: number;
+  usedModelCall: boolean;
+  /** Post-compaction estimate incl. the summary — should be ≤ targetTokens (ADR-0012 §Tier 1 (hysteresis)). */
+  estimatedTokens: number;
+};
+
+/**
+ * Summarizes a prefix transcript, map-reducing when it exceeds the summarizer's
+ * own window (ADR-0012 §Tier 1 (summarizer model & map-reduce) — a huge cold-start history can't be sent whole).
+ */
+/**
+ * Packs per-message transcript segments into chunks that each fit `windowTokens`,
+ * splitting only on MESSAGE boundaries — never mid-message. A lone segment larger
+ * than the window (a single oversized message) is char-sliced as a last resort,
+ * which is unavoidable for one message that cannot fit whole.
+ */
+function packSegments(segments: string[], windowTokens: number): string[] {
+  const chunks: string[] = [];
+  let cur = "";
+  const flush = () => {
+    if (cur) {
+      chunks.push(cur);
+      cur = "";
+    }
+  };
+  for (const seg of segments) {
+    if (textTokens(seg) > windowTokens) {
+      flush();
+      const charBudget = windowTokens * CHARS_PER_TOKEN;
+      for (let i = 0; i < seg.length; i += charBudget) {
+        chunks.push(seg.slice(i, i + charBudget));
+      }
+      continue;
+    }
+    const next = cur ? `${cur}\n\n${seg}` : seg;
+    if (textTokens(next) > windowTokens) {
+      flush();
+      cur = seg;
+    } else {
+      cur = next;
+    }
+  }
+  flush();
+  return chunks;
+}
+
+async function summarizePrefix(
+  segments: string[],
+  priorSummary: string | null | undefined,
+  summarize: Summarize,
+  summarizerWindow: number | undefined,
+): Promise<string> {
+  const fold = (prior: string | null | undefined, body: string) =>
+    prior ? `Previous summary:\n${prior}\n\nNewer messages:\n${body}` : body;
+
+  // Single pass when everything — prior summary AND fold framing included —
+  // fits the window. Checking the *folded* size (not the bare body) closes the
+  // gap where a large prior summary overflowed an otherwise-fitting prefix.
+  const joined = segments.join("\n\n");
+  if (
+    !summarizerWindow ||
+    textTokens(fold(priorSummary, joined)) <= summarizerWindow
+  ) {
+    return summarize(fold(priorSummary, joined));
+  }
+
+  // Map: summarize each window-sized chunk (message-boundary aligned).
+  const chunks = packSegments(segments, summarizerWindow);
+  const chunkSummaries: string[] = [];
+  for (const chunk of chunks) chunkSummaries.push(await summarize(chunk));
+
+  // Reduce: the joined chunk summaries (+ prior) can THEMSELVES exceed the window
+  // when there are many chunks, so recurse rather than summarizing them whole —
+  // the reduce step must never re-overflow (ADR-0012 §Tier 1 map-reduce). Each
+  // pass shrinks the segment count, so this converges.
+  return summarizePrefix(
+    chunkSummaries,
+    priorSummary,
+    summarize,
+    summarizerWindow,
+  );
+}
+
+/**
+ * Tier 1 (durable) compaction over UIMessages. Stage 1 prunes; if that reaches
+ * the target, no model call is made and the prefix stays (lighter). Otherwise
+ * Stage 2 summarizes the prefix into one synthetic summary and drops it from the
+ * model view. Raw messages are never mutated by the caller (ADR-0012 §View, not delete — this returns a
+ * view).
+ */
+export async function compactUIMessages(
+  messages: PlatypusUIMessage[],
+  opts: UICompactOptions,
+): Promise<UICompactionResult> {
+  const provider = opts.imageProvider ?? "default";
+  const priorTokens = opts.priorSummary ? textTokens(opts.priorSummary) : 0;
+  const estimate = (msgs: PlatypusUIMessage[]) =>
+    estimateTokens(uiMessagesToCountUnits(msgs, provider));
+
+  // Reuse the caller's already-computed estimate of `messages` rather than
+  // re-running the full char/4 pass on the hot path.
+  const initialEstimate = opts.knownEstimate ?? estimate(messages);
+
+  // No-op when already within target (incl. the existing summary). This is what
+  // makes a follow-up turn after compaction NOT re-fire (hysteresis, ADR-0012 §Tier 1 (hysteresis)).
+  // Bypassed when `force` is set — recovery sets the dirty flag AFTER a provider
+  // rejection, so the estimator already proved wrong; using it as a no-op gate
+  // causes an infinite overflow→dirty→no-op loop (ADR-0012 §Recovery).
+  if (!opts.force && initialEstimate + priorTokens <= opts.targetTokens) {
+    return {
+      keptMessages: messages,
+      summaryText: opts.priorSummary ?? null,
+      watermarkId: null,
+      messagesDropped: 0,
+      usedModelCall: false,
+      estimatedTokens: initialEstimate + priorTokens,
+    };
+  }
+
+  const boundary = pickKeepBoundary(
+    messages.length,
+    opts.keepRecentMessages,
+    () => true, // UIMessage tool-call+result live in one message — any split is safe
+  );
+  const prefix = messages.slice(0, boundary);
+  const recent = messages.slice(boundary);
+
+  // Stage 1 — prune bulky tool results in the prefix (no model call).
+  const prunedPrefix = prefix.map(
+    (m) => pruneUIMessage(m, opts.minPrunableChars).message,
+  );
+  const prunedAll = [...prunedPrefix, ...recent];
+  if (!opts.force && estimate(prunedAll) + priorTokens <= opts.targetTokens) {
+    return {
+      keptMessages: prunedAll,
+      summaryText: opts.priorSummary ?? null,
+      watermarkId: null, // pruning advances no watermark (no new summary)
+      messagesDropped: 0,
+      usedModelCall: false,
+      estimatedTokens: estimate(prunedAll) + priorTokens,
+    };
+  }
+
+  // Past this point we are over target. Recent (kept) messages stay in the model
+  // view, so extreme outliers (e.g. large MCP tool dumps) bloat tokensAfter.
+  // The hard window wall (ADR-0012 §Hard window wall): trim them ONLY when the kept view would breach
+  // the hard window wall (`inputBudget`); a soft `targetTokens` miss is left at
+  // full fidelity and just re-compacts next turn (cheap). The newest message is
+  // always exempt — it is the data the current turn is actively about.
+  const recentThreshold =
+    opts.minRecentPrunableChars ?? opts.minPrunableChars * 5;
+  const pruneRecentExemptNewest = (
+    msgs: PlatypusUIMessage[],
+  ): { messages: PlatypusUIMessage[]; changed: boolean } => {
+    let changed = false;
+    const messages = msgs.map((m, i) => {
+      if (i === msgs.length - 1) return m; // newest always exempt
+      const pruned = pruneUIMessage(m, recentThreshold);
+      if (pruned.changed) changed = true;
+      return pruned.message;
+    });
+    return { messages, changed };
+  };
+  // Decides whether to keep `recent` verbatim or trim it (ADR-0012 §Hard window wall). Returns the
+  // kept messages and their token estimate (reused for `afterEstimate` so the
+  // recent set is never re-estimated). `fixedTokens` is the kept view's NON-recent
+  // part (pruned prefix and/or folded summary). When `inputBudget` is omitted the
+  // wall is unknown → always trim once over target (guard predating ADR-0012 §Hard window wall).
+  const keepRecentWithinWall = (
+    fixedTokens: number,
+    recentMsgs: PlatypusUIMessage[],
+  ): { messages: PlatypusUIMessage[]; recentTokens: number } => {
+    const recentTokens = estimate(recentMsgs);
+    if (
+      opts.inputBudget !== undefined &&
+      fixedTokens + recentTokens <= opts.inputBudget
+    ) {
+      return { messages: recentMsgs, recentTokens }; // within wall — full fidelity
+    }
+    const trimmed = pruneRecentExemptNewest(recentMsgs);
+    // Nothing prunable (no tool outputs over threshold) → reuse the estimate.
+    return {
+      messages: trimmed.messages,
+      recentTokens: trimmed.changed ? estimate(trimmed.messages) : recentTokens,
+    };
+  };
+
+  // Warn only when the kept view still breaches the HARD wall after trimming —
+  // i.e. recent genuinely couldn't be brought under the window (one oversized
+  // result; ingestion-cap territory). Under ADR-0012 §Hard window wall a soft `targetTokens`
+  // miss is by design (recent kept verbatim below the wall), so it is NOT a
+  // warning. Falls back to the old `target * 2` heuristic when no wall is supplied.
+  const warnIfOverWall = (afterEstimate: number) => {
+    const over =
+      opts.inputBudget !== undefined
+        ? afterEstimate > opts.inputBudget
+        : afterEstimate > opts.targetTokens * 2;
+    if (over) {
+      logger.warn(
+        {
+          afterEstimate,
+          targetTokens: opts.targetTokens,
+          inputBudget: opts.inputBudget,
+          keepRecentMessages: opts.keepRecentMessages,
+        },
+        "compaction fired but recent messages exceed the window — a single oversized tool result may be uncompactable (see ingestion cap)",
+      );
+    }
+  };
+
+  // ADR-0012 §Tier 1: nothing to summarize when the prefix is empty (history fits within
+  // keepRecentMessages). Also bail when the boundary message has no id — we
+  // cannot anchor a watermark there, and committing a watermark:null +
+  // non-null summary would orphan the summary (viewAfterWatermark ignores
+  // contextSummary when the watermark is null, so the previously-summarised
+  // prefix reappears every turn).
+  const watermarkId =
+    prefix.length > 0 ? (prefix[prefix.length - 1].id ?? null) : null;
+  if (prefix.length === 0 || watermarkId === null) {
+    const prunedPrefixTokens = estimate(prunedPrefix) + priorTokens;
+    const keptRecent = keepRecentWithinWall(prunedPrefixTokens, recent);
+    const kept = [...prunedPrefix, ...keptRecent.messages];
+    const afterEstimate = prunedPrefixTokens + keptRecent.recentTokens;
+    warnIfOverWall(afterEstimate);
+    return {
+      keptMessages: kept,
+      summaryText: opts.priorSummary ?? null,
+      watermarkId: null,
+      messagesDropped: 0,
+      usedModelCall: false,
+      estimatedTokens: afterEstimate,
+    };
+  }
+
+  // Stage 2 — summarize the pruned prefix into one synthetic summary.
+  const summaryText = await summarizePrefix(
+    renderUIMessageList(prunedPrefix),
+    opts.priorSummary,
+    opts.summarize,
+    opts.summarizerWindow,
+  );
+
+  const summaryTokens = textTokens(summaryText);
+  const keptRecent = keepRecentWithinWall(summaryTokens, recent);
+  const afterEstimate = keptRecent.recentTokens + summaryTokens;
+  warnIfOverWall(afterEstimate);
+
+  return {
+    keptMessages: keptRecent.messages,
+    summaryText,
+    watermarkId,
+    messagesDropped: prefix.length,
+    usedModelCall: true,
+    estimatedTokens: afterEstimate,
+  };
+}
+
+// --- Tier 2 / recovery: ModelMessage shape -------------------------------
+
+/** Soft-trims bulky tool-result parts in a ModelMessage (role "tool"). */
+function pruneModelMessage(
+  message: ModelMessage,
+  minPrunableChars: number,
+): ModelMessage {
+  if (message.role !== "tool" || typeof message.content === "string") {
+    return message;
+  }
+  const content = message.content.map((part) => {
+    if (part.type !== "tool-result") return part;
+    const output = part.output;
+    if (output.type === "text" || output.type === "error-text") {
+      if (output.value.length > minPrunableChars) {
+        return {
+          ...part,
+          output: { ...output, value: softTrim(output.value) },
+        };
+      }
+      return part;
+    }
+    if (output.type === "json" || output.type === "error-json") {
+      const serialized = JSON.stringify(output.value);
+      if (serialized.length > minPrunableChars) {
+        return {
+          ...part,
+          output: { type: "text" as const, value: softTrim(serialized) },
+        };
+      }
+    }
+    // ADR-0012 §Tier 1 (Stage 1 prune): @ai-sdk/mcp emits {type:"content"} for essentially every MCP tool
+    // result. Without this branch Stage 1 reclaims zero tokens from the bulkiest
+    // payloads and their text is invisible to the summarizer.
+    if (output.type === "content" && Array.isArray(output.value)) {
+      type ContentItem = { type: string; text?: string };
+      const items = output.value as ContentItem[];
+      const text = items
+        .filter((i) => i.type === "text")
+        .map((i) => i.text ?? "")
+        .join("\n");
+      const mediaCount = items.filter((i) => i.type !== "text").length;
+      const marker = mediaCount > 0 ? `\n[${mediaCount} media item(s)]` : "";
+      // Trim the text BEFORE appending the media marker so a huge text payload
+      // can never truncate the "[N media item(s)]" signal.
+      if (text.length + marker.length > minPrunableChars) {
+        return {
+          ...part,
+          output: {
+            type: "content" as const,
+            value: [
+              { type: "text" as const, text: `${softTrim(text)}${marker}` },
+            ],
+          },
+        };
+      }
+    }
+    return part;
+  });
+  return { ...message, content };
+}
+
+/** Per-message transcript strings (one entry per message). See renderUIMessageList. */
+function renderModelMessageList(messages: ModelMessage[]): string[] {
+  return messages.map((m) => {
+    if (typeof m.content === "string") return `${m.role}: ${m.content}`;
+    const text = m.content
+      .map((p) => {
+        if (p.type === "text") return p.text;
+        if (p.type === "tool-call") return `[tool-call ${p.toolName}]`;
+        if (p.type === "tool-result") {
+          const o = p.output;
+          let v: string;
+          if (o.type === "text" || o.type === "error-text") {
+            v = o.value;
+          } else if (o.type === "json" || o.type === "error-json") {
+            v = JSON.stringify(o.value);
+          } else if (o.type === "content") {
+            // ADR-0012 §Tier 1 (Stage 1 prune): extract text items from content-type MCP output.
+            type ContentItem = { type: string; text?: string };
+            v = (o.value as ContentItem[])
+              .filter((i) => i.type === "text")
+              .map((i) => i.text ?? "")
+              .join("\n");
+          } else {
+            v = "";
+          }
+          return `[tool-result] ${softTrim(v, 200)}`;
+        }
+        return "";
+      })
+      .filter(Boolean)
+      .join("\n");
+    return `${m.role}: ${text}`;
+  });
+}
+
+/** A synthetic summary as a model message. User-role + clear framing is the most
+ * broadly accepted shape (avoids mid-array system-message restrictions). */
+export function summaryModelMessage(text: string): ModelMessage {
+  return {
+    role: "user",
+    content: [
+      { type: "text", text: `[Summary of earlier conversation]\n${text}` },
+    ],
+  };
+}
+
+export type ModelCompactOptions = {
+  targetTokens: number;
+  keepRecentMessages: number;
+  minPrunableChars: number;
+  imageProvider?: ImageProvider;
+  summarize: Summarize;
+  summarizerWindow?: number;
+  /** Bypass the no-op estimate gate (same semantics as UICompactOptions.force). */
+  force?: boolean;
+  /**
+   * Estimate of `messages` the caller already computed (e.g. the Tier 2
+   * prepareStep trigger check). Reuses it for gate 1 instead of re-running a
+   * full estimate pass over the same messages.
+   */
+  knownEstimate?: number;
+};
+
+export type ModelCompactionResult = {
+  messages: ModelMessage[];
+  messagesDropped: number;
+  usedModelCall: boolean;
+  estimatedTokens: number;
+};
+
+/**
+ * Tier 2 (intra-turn) / recovery compaction over ModelMessages. Throwaway — the
+ * SDK keeps its canonical list; this only keeps a heavy response executable.
+ * Pairing rule differs from Tier 1: an assistant tool-call and its following
+ * `role:"tool"` result are separate messages and must not be split.
+ */
+export async function compactModelMessages(
+  messages: ModelMessage[],
+  opts: ModelCompactOptions,
+): Promise<ModelCompactionResult> {
+  const provider = opts.imageProvider ?? "default";
+  const estimate = (msgs: ModelMessage[]) =>
+    estimateTokens(modelMessagesToCountUnits(msgs, provider));
+
+  const initialEstimate = opts.knownEstimate ?? estimate(messages);
+  if (!opts.force && initialEstimate <= opts.targetTokens) {
+    return {
+      messages,
+      messagesDropped: 0,
+      usedModelCall: false,
+      estimatedTokens: initialEstimate,
+    };
+  }
+
+  // A boundary is unsafe if it would start `recent` on a tool result orphaned
+  // from its assistant tool-call (which would sit in the dropped prefix).
+  const boundary = pickKeepBoundary(
+    messages.length,
+    opts.keepRecentMessages,
+    (i) => i >= messages.length || messages[i].role !== "tool",
+  );
+  const prefix = messages.slice(0, boundary);
+  const recent = messages.slice(boundary);
+
+  // Stage 1 — prune.
+  const prunedPrefix = prefix.map((m) =>
+    pruneModelMessage(m, opts.minPrunableChars),
+  );
+  const prunedAll = [...prunedPrefix, ...recent];
+  // Force-guarded like gate 1 (ADR-0012 §Recovery): when recovery forces a trim the provider
+  // already rejected this prompt, so the estimator proved wrong — re-trusting
+  // it here would return a byte-identical prompt and burn the single retry.
+  if (!opts.force && estimate(prunedAll) <= opts.targetTokens) {
+    return {
+      messages: prunedAll,
+      messagesDropped: 0,
+      usedModelCall: false,
+      estimatedTokens: estimate(prunedAll),
+    };
+  }
+
+  // ADR-0012 §Tier 1 (model-side): nothing to summarize when the prefix is empty (recent
+  // alone exceeds keepRecentMessages). Summarizing an empty prefix would add a
+  // synthetic message and GROW the prompt — never converges. Surface the
+  // overflow instead (recovery retries once, then propagates).
+  if (prefix.length === 0) {
+    return {
+      messages: prunedAll,
+      messagesDropped: 0,
+      usedModelCall: false,
+      estimatedTokens: estimate(prunedAll),
+    };
+  }
+
+  // Stage 2 — summarize the pruned prefix into one synthetic message.
+  const summaryText = await summarizePrefix(
+    renderModelMessageList(prunedPrefix),
+    null,
+    opts.summarize,
+    opts.summarizerWindow,
+  );
+  const compacted = [summaryModelMessage(summaryText), ...recent];
+  return {
+    messages: compacted,
+    messagesDropped: prefix.length,
+    usedModelCall: true,
+    estimatedTokens: estimate(compacted),
+  };
+}
+
+// ===========================================================================
+// Slice 2c — Tier 1 orchestration (budget, view reconstruction, persist)
+//
+// `applyTier1Compaction` is the durable, cross-turn entry point invoked from
+// `prepareChatTurn`. It is dependency-injected (store + summarizer) so it is
+// unit-testable without standing up the full turn machinery. It:
+//   1. Reconstructs the compacted VIEW from persisted state every turn (ADR-0012 §View, not delete) —
+//      drop messages up to the watermark, re-inject the stored summary.
+//   2. Triggers a fresh compaction when the projected size crosses the trigger
+//      ratio, OR when `compactionDirty` forces it (recovery hand-off, ADR-0012 §Recovery).
+//   3. Persists any new summary/watermark + clears dirty via the single CAS
+//      writer (ADR-0012 §One durable writer), the loser skipping safely on contention.
+// ===========================================================================
+
+/** Resolved per-turn compaction config (ADR-0012 §Config & kill switch), defaults applied. */
+export type CompactionConfig = {
+  compactionEnabled: boolean;
+  triggerRatio: number;
+  targetRatio: number;
+  reserveRatio: number;
+  keepRecentMessages: number;
+  minPrunableChars: number;
+  /** Threshold for pruning tool results in the kept (recent) messages after
+   * Stage 2 summarization. Higher than minPrunableChars — we trim extreme
+   * outliers (e.g. huge MCP tool dumps) without destroying useful context. */
+  minRecentPrunableChars: number;
+  /** Stage 0 context editing (ADR-0012 §Stage 0 — context editing): elide OLD bulky tool results to a
+   * placeholder before the trigger check, so a leaned view can avoid summarizing
+   * entirely. Gated alongside the COMPACTION_ENABLED kill switch. */
+  contextEditingEnabled: boolean;
+  /** Stage 0: exempt the last N tool results from elision (recency, by count). */
+  keepRecentToolResults: number;
+  /** Stage 0: only elide a tool result whose serialized output exceeds this. */
+  minEditableToolChars: number;
+};
+
+export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
+  compactionEnabled: true,
+  triggerRatio: 0.8,
+  targetRatio: 0.5,
+  reserveRatio: 0.05,
+  keepRecentMessages: 10,
+  minPrunableChars: 2000,
+  minRecentPrunableChars: 10000,
+  contextEditingEnabled: true,
+  keepRecentToolResults: 4,
+  // 50k chars ≈ 12.5k tokens — matches LibreChat's minPrunableToolChars, the only
+  // direct per-result char-gate analog. High enough to spare medium results (less
+  // cache churn) while still catching the ~160k-char mempalace dump.
+  minEditableToolChars: 50000,
+};
+
+export type Budget = {
+  inputBudget: number;
+  triggerTokens: number;
+  targetTokens: number;
+};
+
+/**
+ * Budget math (ADR-0012 §Tier 1 (budget math)): the trigger/target are fractions of the INPUT budget —
+ * the window minus the output reservation and a safety headroom — not of the raw
+ * window. When the resolved max output is unknown, reserve a conservative slice.
+ */
+export function computeBudget(
+  contextWindow: number,
+  maxOutputTokens: number | undefined,
+  config: CompactionConfig,
+): Budget {
+  const rawOutputReserve =
+    maxOutputTokens ?? Math.min(4096, Math.floor(contextWindow * 0.25));
+  // Cap the output reservation at half the window (ADR-0012 §Tier 1 (budget math)). litellm's
+  // `max_input_tokens` (which feeds `contextWindow`) is already input-scoped for
+  // some providers, so subtracting a large `max_output_tokens` again can collapse
+  // `inputBudget` toward 1 — making trigger/target ≈ 0 and thrashing. Capping
+  // keeps the (otherwise-safe) over-reservation from degenerating.
+  const maxOutputReserve = Math.min(
+    rawOutputReserve,
+    Math.floor(contextWindow * 0.5),
+  );
+  const safetyReserve = Math.floor(config.reserveRatio * contextWindow);
+  const inputBudget = Math.max(
+    1,
+    contextWindow - maxOutputReserve - safetyReserve,
+  );
+  return {
+    inputBudget,
+    triggerTokens: config.triggerRatio * inputBudget,
+    targetTokens: config.targetRatio * inputBudget,
+  };
+}
+
+/**
+ * First-turn safety margin on the char/4 projection (ADR-0012 §Token estimation (cold-start margin)): char/4
+ * under-counts CJK, dense JSON, and tool chatter, and on a cold start there is
+ * no provider-reported `usage.inputTokens` to correct it.
+ */
+export const COLD_START_MARGIN = 1.15;
+
+/**
+ * The Tier 1 trigger projection (ADR-0012 §Tier 1 (trigger projection)): what THIS turn is about to put on
+ * the wire, not just the stored messages. `overheadTokens` carries the
+ * estimated system prompt + tool schemas + skill payload — invisible to a
+ * message-only estimate but sent to the model on every turn (the observed
+ * live-test gap: provider reported 8888 input tokens vs ~986 message-only).
+ * `lastInputTokens` is the provider-reported count from the prior turn — the
+ * corrective baseline for turns ≥ 2 (threaded in the ADR-0012 §Context-usage ring usage-metadata chunk).
+ * When it is absent the whole char/4 projection is inflated by
+ * {@link COLD_START_MARGIN} (ADR-0012 §Token estimation (cold-start margin)).
+ */
+export function projectTier1Tokens(args: {
+  messageTokens: number;
+  priorSummaryTokens: number;
+  overheadTokens?: number;
+  lastInputTokens?: number;
+}): number {
+  const charBased =
+    args.messageTokens + args.priorSummaryTokens + (args.overheadTokens ?? 0);
+  // Treat a non-positive count as "no baseline" (ADR-0012 §Tier 1 (trigger projection)): some OpenAI-compatible /
+  // vLLM gateways omit `usage.inputTokens`, which we persist as
+  // `contextTokens = 0`. A bare `== null` check would let that 0 slip through —
+  // skipping the cold-start margin AND no-op-ing the `max()` below — leaving the
+  // raw char/4 projection with no safety buffer on EVERY turn for those
+  // providers. Falling back to the margin keeps the conservative over-count.
+  if (args.lastInputTokens == null || args.lastInputTokens <= 0) {
+    return Math.ceil(charBased * COLD_START_MARGIN);
+  }
+  // Two independent estimates of this turn's payload: `charBased` is a fresh
+  // char/4 pass over the whole unsummarized view (+ summary + overhead);
+  // `lastInputTokens` is the provider's accurate count from the prior turn but
+  // stale (missing messages appended since). Take the larger — char/4 chronically
+  // under-counts, so this is usually `lastInputTokens`; over-counting only
+  // triggers compaction earlier, never an overflow.
+  return Math.max(Math.ceil(charBased), args.lastInputTokens);
+}
+
+/** Synthetic UIMessage carrying the persisted summary, injected into the view. */
+export function summaryUIMessage(text: string): PlatypusUIMessage {
+  return {
+    id: "context-summary",
+    role: "user",
+    parts: [
+      { type: "text", text: `[Summary of earlier conversation]\n${text}` },
+    ],
+  };
+}
+
+/** Fail-loud event so the transcript shows compaction happened (ADR-0012 §Tier 1). */
+export type CompactionEvent = {
+  type: "context-compacted";
+  messagesDropped: number;
+  tokensBefore: number;
+  tokensAfter: number;
+};
+
+export type Tier1Input = {
+  chatId: string;
+  /** Full durable history (post-`inlineFileUrls`, ADR-0012 §Token estimation). */
+  messages: PlatypusUIMessage[];
+  state: CompactionState;
+  budget: Budget;
+  config: CompactionConfig;
+  imageProvider: ImageProvider;
+  summarize: Summarize;
+  store: CompactionStore;
+  summarizerWindow?: number;
+  /**
+   * Estimated tokens of the per-turn payload that is NOT in `messages` —
+   * system prompt, tool schemas, skill list (ADR-0012 §Tier 1 (trigger projection)). Counted toward the
+   * trigger and subtracted from the compaction target (compaction cannot
+   * shrink it, so hysteresis must leave room for it — ADR-0012 §Tier 1 (hysteresis)).
+   */
+  overheadTokens?: number;
+  /** Provider-reported `usage.inputTokens` from the prior turn (ADR-0012 §Tier 1 (trigger projection), via ADR-0012 §Context-usage ring). */
+  lastInputTokens?: number;
+  onEvent?: (event: CompactionEvent) => void;
+};
+
+export type CompactionTrace = {
+  /** Number of messages that were folded into the summary. */
+  messagesDropped: number;
+  /** First ~120 chars of the LLM-generated summary. */
+  summaryExcerpt?: string;
+};
+
+/** Tool name for the synthetic compaction-trace tool-call/result pair (ADR-0012 §Compaction trace in the timeline).
+ * Shared by the stream-trace producer (agent-runner), the strip filter that
+ * keeps it out of the model payload, the ADR-0012 §Force-compact on demand persisted-message builder, and the
+ * frontend display-name mapping. */
+export const COMPACT_CONTEXT_TOOL_NAME = "compact_context";
+
+/** Builds a standalone synthetic assistant message carrying the compaction
+ * trace as a `compact_context` tool-call/result pair (ADR-0012 §Force-compact on demand — forced compaction
+ * has no live stream to inject into, so the trace is persisted as its own
+ * message instead). The message is always appended ABOVE the watermark, so it
+ * is never itself summarized; the strip filter keeps it out of the model
+ * payload on subsequent turns. */
+export function buildCompactionTraceMessage(
+  trace: CompactionTrace,
+  id: string,
+): PlatypusUIMessage {
+  return {
+    id,
+    role: "assistant",
+    parts: [
+      {
+        type: `tool-${COMPACT_CONTEXT_TOOL_NAME}`,
+        toolCallId: `${id}-call`,
+        state: "output-available",
+        input: { messagesDropped: trace.messagesDropped },
+        output: {
+          messagesDropped: trace.messagesDropped,
+          ...(trace.summaryExcerpt
+            ? { summaryExcerpt: trace.summaryExcerpt }
+            : {}),
+        },
+      },
+    ],
+  } as unknown as PlatypusUIMessage;
+}
+
+export type Tier1Output = {
+  /** The compacted view to send to the model (summary message + recent). */
+  messages: PlatypusUIMessage[];
+  /** True when a new summary was produced and persisted this turn. */
+  compacted: boolean;
+  commit?: CommitResult;
+  /**
+   * Present ONLY when a model summary was produced this turn — the user-visible
+   * "compaction happened" signal (ADR-0012 §Compaction trace in the timeline). Deliberately undefined for
+   * prune-only and force-dirty-within-target no-op turns: those drop 0 messages
+   * and have no excerpt, so a trace would render an empty/confusing timeline
+   * entry.
+   */
+  compactionTrace?: CompactionTrace;
+};
+
+/** Splits history at the watermark message id. Returns the messages after it and
+ * whether the stored summary is still trustworthy (watermark id still present). */
+function viewAfterWatermark(
+  messages: PlatypusUIMessage[],
+  state: CompactionState,
+): { afterWatermark: PlatypusUIMessage[]; priorSummary: string | null } {
+  if (!state.summaryWatermark) {
+    return { afterWatermark: messages, priorSummary: null };
+  }
+  const idx = messages.findIndex((m) => m.id === state.summaryWatermark);
+  if (idx === -1) {
+    // Watermark message is gone (edited/deleted before invalidation landed):
+    // distrust the summary and fall back to the full history (defensive ADR-0012 §Summary invalidation).
+    return { afterWatermark: messages, priorSummary: null };
+  }
+  return {
+    afterWatermark: messages.slice(idx + 1),
+    priorSummary: state.contextSummary,
+  };
+}
+
+export async function applyTier1Compaction(
+  input: Tier1Input,
+): Promise<Tier1Output> {
+  const { messages, state, budget, config, imageProvider } = input;
+  const estimate = (msgs: PlatypusUIMessage[]) =>
+    estimateTokens(uiMessagesToCountUnits(msgs, imageProvider));
+
+  const { afterWatermark, priorSummary } = viewAfterWatermark(messages, state);
+  const priorSummaryTokens = priorSummary ? textTokens(priorSummary) : 0;
+
+  // Stage 0 — context editing (ADR-0012 §Stage 0 — context editing): elide OLD bulky tool results to
+  // placeholders BEFORE the trigger projection, so a leaned view can drop under
+  // the trigger and skip summarization entirely. Pure/deterministic, no durable
+  // state (ADR-0012 §View, not delete). Gated by the COMPACTION_ENABLED kill switch (recovery stays the
+  // net, ADR-0012 §Recovery is the net) AND the per-feature `contextEditingEnabled`. Returns the same array
+  // reference when nothing qualified, so the no-op case re-estimates nothing.
+  // NB (ADR-0012 §Stage 0 — context editing): the elided placeholders also flow into the prefix that
+  // Stage 2 would summarize, so a summarized result keeps only its placeholder —
+  // an accepted fidelity trade-off (a 40k dump's head+tail is poor summary fodder
+  // and the raw stays in the DB).
+  const contextEditing =
+    config.compactionEnabled && config.contextEditingEnabled
+      ? editToolResults(afterWatermark, {
+          keepRecentToolResults: config.keepRecentToolResults,
+          minEditableToolChars: config.minEditableToolChars,
+        })
+      : { messages: afterWatermark, resultsElided: 0, charsReclaimed: 0 };
+  const editedView = contextEditing.messages;
+  if (contextEditing.resultsElided > 0) {
+    logger.info(
+      {
+        metric: "context_edited",
+        chatId: input.chatId,
+        resultsElided: contextEditing.resultsElided,
+        charsReclaimed: contextEditing.charsReclaimed,
+      },
+      "context_edited",
+    );
+  }
+
+  const inject = (summary: string | null, msgs: PlatypusUIMessage[]) =>
+    summary ? [summaryUIMessage(summary), ...msgs] : msgs;
+
+  // The view that would be sent if we did nothing more this turn.
+  const baseView = inject(priorSummary, editedView);
+  const overheadTokens = input.overheadTokens ?? 0;
+  // Compute the char/4 pass over the unsummarized view once and reuse it
+  // for both the trigger projection and compactUIMessages' no-op gate.
+  const messageTokens = estimate(editedView);
+  const projected = projectTier1Tokens({
+    messageTokens,
+    priorSummaryTokens,
+    overheadTokens,
+    lastInputTokens: input.lastInputTokens,
+  });
+
+  const forceCompact = state.compactionDirty;
+  const triggered =
+    forceCompact ||
+    (config.compactionEnabled && projected >= budget.triggerTokens);
+
+  logger.info(
+    {
+      metric: "compaction.check",
+      chatId: input.chatId,
+      compactionEnabled: config.compactionEnabled,
+      projected,
+      triggerTokens: budget.triggerTokens,
+      targetTokens: budget.targetTokens,
+      inputBudget: budget.inputBudget,
+      triggered,
+      forceCompact,
+      messageTokens,
+      priorSummaryTokens,
+      overheadTokens: input.overheadTokens ?? 0,
+      lastInputTokens: input.lastInputTokens,
+    },
+    "compaction.check",
+  );
+
+  if (!triggered) {
+    return { messages: baseView, compacted: false };
+  }
+
+  // Compaction can only shrink the messages, never the per-turn overhead, so
+  // the target the messages must fit in is reduced by it (ADR-0012 §Tier 1 (hysteresis)). When the
+  // overhead alone exhausts the target, hysteresis is impossible — warn loudly
+  // (compaction will re-fire every turn) but still compact: recovery is the
+  // only other net.
+  const effectiveTarget = Math.max(0, budget.targetTokens - overheadTokens);
+  if (overheadTokens >= budget.targetTokens) {
+    logger.warn(
+      { chatId: input.chatId, overheadTokens, target: budget.targetTokens },
+      "system/tool overhead alone exceeds the compaction target — compaction will re-fire each turn",
+    );
+  }
+
+  // The hard wall the kept view must fit under (ADR-0012 §Hard window wall), net of the per-turn
+  // overhead compaction cannot shrink — mirrors how effectiveTarget adjusts the
+  // soft target. Recent tool results are trimmed only when this is breached.
+  const effectiveInputBudget = Math.max(0, budget.inputBudget - overheadTokens);
+
+  const result = await compactUIMessages(editedView, {
+    targetTokens: effectiveTarget,
+    inputBudget: effectiveInputBudget,
+    keepRecentMessages: config.keepRecentMessages,
+    minPrunableChars: config.minPrunableChars,
+    minRecentPrunableChars: config.minRecentPrunableChars,
+    imageProvider,
+    priorSummary,
+    summarize: input.summarize,
+    summarizerWindow: input.summarizerWindow,
+    // When dirty-forced the estimator already proved wrong (ADR-0012 §Recovery): bypass the
+    // no-op gate so recovery's dirty flag actually shrinks the history.
+    force: forceCompact,
+    // The no-op gate estimates this exact set; reuse the value above.
+    knownEstimate: messageTokens,
+  });
+
+  const view = inject(result.summaryText ?? priorSummary, result.keptMessages);
+
+  // Persist through the single CAS writer (ADR-0012 §One durable writer). The decision is gated on the
+  // version we read; if a concurrent writer advanced it, we skip rather than
+  // recompute (the wasted summarize is bounded, never corrupting). The
+  // version-pinning gate is shared so both write paths decide identically.
+  const capturedVersion = state.version;
+  // On a version mismatch we skip as "covered" WITHOUT clearing dirty (ADR-0012
+  // §One durable writer). Clearing on skip is only safe when the winner actually
+  // compacted; a concurrent invalidateCompaction also advances the version yet
+  // leaves dirty set on purpose (it resets the summary, it does not shrink
+  // history) — clearing dirty here would then drop the forced compaction the
+  // overflow demanded. Leaving dirty set is strictly safe: worst case is one
+  // extra compaction next turn.
+  const pinnedWrite = (patch: WatermarkPatch) =>
+    commitWatermark(input.store, input.chatId, (latest) =>
+      latest.version === capturedVersion
+        ? { kind: "write", patch }
+        : { kind: "skip", reason: "covered" },
+    );
+  let commit: CommitResult | undefined;
+
+  if (result.usedModelCall) {
+    // Same-basis before/after for the user-visible reduction: both are
+    // char/4 message estimates plus the per-turn overhead. The trigger
+    // `projected` mixes in the provider's `lastInputTokens` floor and is NOT
+    // comparable to the message-only post estimate, so reporting it as "before"
+    // overstated the drop. Computed only on the model-call path (the only place
+    // these are reported).
+    const tokensBefore = messageTokens + priorSummaryTokens + overheadTokens;
+    const tokensAfter = result.estimatedTokens + overheadTokens;
+
+    commit = await pinnedWrite({
+      summary: result.summaryText,
+      watermark: result.watermarkId,
+      dirty: false,
+    });
+    logger.info(
+      {
+        metric: "compaction.fired",
+        tier: 1,
+        chatId: input.chatId,
+        tokensBefore,
+        tokensAfter,
+        // Keep the raw trigger projection for correlation with compaction.check.
+        projected,
+        messagesDropped: result.messagesDropped,
+      },
+      "compaction.fired",
+    );
+    input.onEvent?.({
+      type: "context-compacted",
+      messagesDropped: result.messagesDropped,
+      tokensBefore,
+      tokensAfter,
+    });
+  } else if (state.compactionDirty) {
+    // Forced by recovery but pruning/within-target sufficed: just clear the flag.
+    commit = await pinnedWrite({ dirty: false });
+  }
+
+  // Only surface a trace when an actual model summary was produced. Prune-only
+  // and force-dirty-within-target runs drop 0 messages with no excerpt — a
+  // trace there would be an empty, confusing timeline entry (ADR-0012 §Compaction trace in the timeline).
+  const compactionTrace: CompactionTrace | undefined =
+    result.usedModelCall && result.summaryText
+      ? {
+          messagesDropped: result.messagesDropped,
+          summaryExcerpt: result.summaryText.slice(0, 120),
+        }
+      : undefined;
+
+  return {
+    messages: view,
+    compacted: result.usedModelCall,
+    commit,
+    compactionTrace,
+  };
+}
+
+/**
+ * Detects which summarized messages (at/below the watermark) the freshly
+ * submitted history changed or dropped — the ADR-0012 §Summary invalidation trigger. Because the client
+ * resubmits the full message array each turn (there is no separate edit/delete
+ * endpoint), divergence is found by comparing the persisted canonical history
+ * against the incoming one up to the watermark. Returns the ids that an
+ * edit/delete/regenerate touched; empty means the summary is still valid.
+ */
+export function affectedBelowWatermark(
+  persisted: PlatypusUIMessage[],
+  incoming: PlatypusUIMessage[],
+  watermarkId: string | null,
+): string[] {
+  if (!watermarkId) return [];
+  const wmIdx = persisted.findIndex((m) => m.id === watermarkId);
+  if (wmIdx === -1) return [watermarkId]; // watermark message gone entirely
+  const incomingById = new Map(incoming.map((m) => [m.id, m]));
+  const affected: string[] = [];
+  for (let i = 0; i <= wmIdx; i++) {
+    const p = persisted[i];
+    if (!p.id) continue;
+    const inc = incomingById.get(p.id);
+    if (!inc || stableStringify(inc.parts) !== stableStringify(p.parts)) {
+      affected.push(p.id);
+    }
+  }
+  return affected;
+}
+
+/**
+ * Persists `compactionDirty = true` after a context-overflow recovery (ADR-0012 §Recovery).
+ * Recovery never writes summary/watermark — it only flags; the next
+ * `prepareChatTurn` sees the flag, forces Tier 1, and clears it inside the same
+ * CAS write that advances the watermark. Goes through the single writer (ADR-0012 §One durable writer);
+ * already-dirty is a no-op.
+ */
+export async function setCompactionDirty(
+  store: CompactionStore,
+  chatId: string,
+): Promise<CommitResult> {
+  return commitWatermark(store, chatId, (state) =>
+    state.compactionDirty
+      ? { kind: "skip", reason: "no-op" }
+      : { kind: "write", patch: { dirty: true } },
+  );
+}
+
+export async function invalidateCompaction(
+  store: CompactionStore,
+  chatId: string,
+  affectedIds: string[],
+  orderedIds: string[],
+): Promise<CommitResult> {
+  return commitWatermark(store, chatId, (state) => {
+    if (!state.summaryWatermark && !state.contextSummary) {
+      return { kind: "skip", reason: "no-op" };
+    }
+    const wmIndex = state.summaryWatermark
+      ? orderedIds.indexOf(state.summaryWatermark)
+      : orderedIds.length; // null watermark ⇒ everything is "summarized-from-start"
+    const affectsSummarized = affectedIds.some((id) => {
+      const i = orderedIds.indexOf(id);
+      // Affected message is missing (deleted) or sits at/below the watermark.
+      return i === -1 || (wmIndex !== -1 && i <= wmIndex);
+    });
+    if (!affectsSummarized) return { kind: "skip", reason: "no-op" };
+    return { kind: "write", patch: { summary: null, watermark: null } };
+  });
+}
+
+// --- Tier 2 in-turn compaction (ADR-0012 §Tier 2) ---
+
+/**
+ * Per-turn Tier 2 compaction context (ADR-0012 §Tier 2). Null when the ADR-0012 §Config & kill switch or
+ * agent config disables proactive compaction. Sub-agents also receive Tier 2
+ * (ADR-0012 §Sub-agents / §Tier 2 — they have no durable history for Tier 1, but their tool loop
+ * can bloat intra-turn).
+ */
+export type Tier2Context = {
+  triggerTokens: number;
+  targetTokens: number;
+  keepRecentMessages: number;
+  minPrunableChars: number;
+  imageProvider: ImageProvider;
+  summarize: Summarize;
+  summarizerWindow?: number;
+};
+
+/**
+ * Builds the Tier 2 in-turn compaction `prepareStep` callback (ADR-0012 §Tier 2). Fires
+ * before each step of a tool loop when the accumulated model messages exceed
+ * `triggerTokens` — compacts via `compactModelMessages` and returns the
+ * trimmed messages. Returns `undefined` when below the threshold so the SDK
+ * proceeds unchanged (ADR-0012 §Sub-agents / §Tier 2: no per-step overhead when the loop is small).
+ */
+export function buildTier2PrepareStep(ctx: Tier2Context): PrepareStepFunction {
+  return async ({ messages }) => {
+    const estimate = estimateTokens(
+      modelMessagesToCountUnits(messages, ctx.imageProvider),
+    );
+    if (estimate < ctx.triggerTokens) return undefined;
+
+    const result = await compactModelMessages(messages, {
+      targetTokens: ctx.targetTokens,
+      keepRecentMessages: ctx.keepRecentMessages,
+      minPrunableChars: ctx.minPrunableChars,
+      imageProvider: ctx.imageProvider,
+      summarize: ctx.summarize,
+      summarizerWindow: ctx.summarizerWindow,
+      // Reuse the trigger-check estimate; skips a redundant full pass.
+      knownEstimate: estimate,
+    });
+
+    if (result.messagesDropped === 0) return undefined;
+
+    logger.info(
+      {
+        messagesDropped: result.messagesDropped,
+        estimatedTokensBefore: estimate,
+        estimatedTokensAfter: result.estimatedTokens,
+      },
+      "Tier 2 in-turn compaction fired",
+    );
+
+    return { messages: result.messages };
+  };
+}
diff --git a/apps/backend/src/runs/context-window.test.ts b/apps/backend/src/runs/context-window.test.ts
new file mode 100644
index 00000000..27c63a9a
--- /dev/null
+++ b/apps/backend/src/runs/context-window.test.ts
@@ -0,0 +1,350 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("../logger.ts", () => ({
+  logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() },
+}));
+
+import {
+  ContextWindowResolver,
+  lookupRegistry,
+  DEFAULT_CONTEXT_WINDOW,
+  type Registry,
+  type ProviderWindowInput,
+} from "./context-window.ts";
+
+const REGISTRY: Registry = {
+  "gpt-4o": { max_input_tokens: 128000, max_output_tokens: 16384 },
+  "claude-3-5-sonnet-20240620": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "anthropic.claude-3-5-sonnet-20240620-v1:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "legacy-model": { max_tokens: 4096 },
+};
+
+const loadRegistry = () => Promise.resolve(REGISTRY);
+
+function resolver() {
+  return new ContextWindowResolver({ loadRegistry });
+}
+
+const openai: ProviderWindowInput = {
+  id: "prov-openai",
+  providerType: "OpenAI",
+  baseUrl: null,
+  apiKey: "sk-x",
+};
+
+describe("lookupRegistry — key normalization (ADR-0012 §Window resolution (key normalization))", () => {
+  it("exact match", () => {
+    expect(lookupRegistry(REGISTRY, "gpt-4o")?.max_input_tokens).toBe(128000);
+  });
+
+  it("strips a provider prefix", () => {
+    expect(lookupRegistry(REGISTRY, "openai/gpt-4o")?.max_input_tokens).toBe(
+      128000,
+    );
+  });
+
+  it("lowercases", () => {
+    expect(lookupRegistry(REGISTRY, "GPT-4o")?.max_input_tokens).toBe(128000);
+  });
+
+  it("uses the alias map for an Azure deployment name", () => {
+    expect(
+      lookupRegistry(REGISTRY, "my-azure-deploy", {
+        "my-azure-deploy": "gpt-4o",
+      })?.max_input_tokens,
+    ).toBe(128000);
+  });
+
+  it("resolves a Bedrock ARN to its vendor.model id", () => {
+    const arn =
+      "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0";
+    expect(lookupRegistry(REGISTRY, arn)?.max_output_tokens).toBe(4096);
+  });
+
+  it("family heuristic: dated suffix matches the base key", () => {
+    // "gpt-4o-2024-11-20" → longest prefix key "gpt-4o"
+    expect(
+      lookupRegistry(REGISTRY, "gpt-4o-2024-11-20")?.max_input_tokens,
+    ).toBe(128000);
+  });
+
+  it("returns undefined on a true MISS", () => {
+    expect(lookupRegistry(REGISTRY, "totally-unknown-xyz")).toBeUndefined();
+  });
+});
+
+describe("resolveContextWindow — resolution order", () => {
+  beforeEach(() => vi.clearAllMocks());
+
+  it("1. manual override wins over everything", async () => {
+    const r = resolver();
+    const out = await r.resolve(
+      {
+        ...openai,
+        modelMeta: {
+          "gpt-4o": { contextWindow: 64000, maxOutputTokens: 2048 },
+        },
+      },
+      "gpt-4o",
+    );
+    expect(out).toEqual({
+      contextWindow: 64000,
+      maxOutputTokens: 2048,
+      source: "override",
+    });
+  });
+
+  it("3. falls to the litellm registry when no override / API", async () => {
+    const r = resolver();
+    const out = await r.resolve({ ...openai }, "gpt-4o");
+    expect(out).toEqual({
+      contextWindow: 128000,
+      maxOutputTokens: 16384,
+      source: "registry",
+    });
+  });
+
+  it("ignores litellm max_tokens (output cap, not window) → default (ADR-0012 §Window resolution)", async () => {
+    // "legacy-model" has only max_tokens; that is the OUTPUT cap, so it must NOT
+    // be read as the context window. Falls through to the conservative default.
+    const r = resolver();
+    const out = await r.resolve({ ...openai }, "legacy-model");
+    expect(out.contextWindow).toBe(DEFAULT_CONTEXT_WINDOW);
+    expect(out.source).toBe("default");
+  });
+
+  it("merges a maxOutputTokens-only override onto a registry window (ADR-0012 §Window resolution)", async () => {
+    const r = resolver();
+    const out = await r.resolve(
+      { ...openai, modelMeta: { "gpt-4o": { maxOutputTokens: 999 } } },
+      "gpt-4o",
+    );
+    // No contextWindow override → window from registry, but output cap overridden.
+    expect(out).toEqual({
+      contextWindow: 128000,
+      maxOutputTokens: 999,
+      source: "registry",
+    });
+  });
+
+  it("4. conservative default + source=default on a MISS (ADR-0012 §Context-usage ring)", async () => {
+    const r = resolver();
+    const out = await r.resolve({ ...openai }, "unknown-model-zzz");
+    expect(out).toEqual({
+      contextWindow: DEFAULT_CONTEXT_WINDOW,
+      maxOutputTokens: undefined,
+      source: "default",
+    });
+  });
+});
+
+describe("API auto-detect parsers", () => {
+  it("Google: inputTokenLimit / outputTokenLimit", async () => {
+    const httpGetJson = vi.fn().mockResolvedValue({
+      inputTokenLimit: 1048576,
+      outputTokenLimit: 8192,
+    });
+    const r = new ContextWindowResolver({ loadRegistry, httpGetJson });
+    const out = await r.resolve(
+      {
+        id: "g",
+        providerType: "Google",
+        baseUrl: "https://gen.example",
+        apiKey: "k",
+      },
+      "gemini-1.5-pro",
+    );
+    expect(out).toEqual({
+      contextWindow: 1048576,
+      maxOutputTokens: 8192,
+      source: "api",
+    });
+    expect(httpGetJson).toHaveBeenCalledWith(
+      "https://gen.example/v1beta/models/gemini-1.5-pro",
+      { "x-goog-api-key": "k" },
+    );
+  });
+
+  it("OpenRouter: matches id → context_length", async () => {
+    const httpGetJson = vi.fn().mockResolvedValue({
+      data: [
+        { id: "other", context_length: 1 },
+        {
+          id: "meta-llama/llama-3.1-70b",
+          context_length: 131072,
+          top_provider: { max_completion_tokens: 4096 },
+        },
+      ],
+    });
+    const r = new ContextWindowResolver({ loadRegistry, httpGetJson });
+    const out = await r.resolve(
+      {
+        id: "or",
+        providerType: "OpenRouter",
+        baseUrl: "https://openrouter.ai",
+      },
+      "meta-llama/llama-3.1-70b",
+    );
+    expect(out).toEqual({
+      contextWindow: 131072,
+      maxOutputTokens: 4096,
+      source: "api",
+    });
+  });
+
+  it("vLLM / OpenAI-compatible: max_model_len from a custom baseUrl", async () => {
+    const httpGetJson = vi.fn().mockResolvedValue({
+      data: [{ id: "my-vllm-model", max_model_len: 32768 }],
+    });
+    const r = new ContextWindowResolver({ loadRegistry, httpGetJson });
+    const out = await r.resolve(
+      {
+        id: "v",
+        providerType: "OpenAI",
+        baseUrl: "http://localhost:8000",
+        apiKey: "x",
+      },
+      "my-vllm-model",
+    );
+    expect(out.contextWindow).toBe(32768);
+    expect(out.source).toBe("api");
+  });
+
+  it("vLLM: a baseUrl already ending in /v1 probes /v1/models, not /v1/v1/models", async () => {
+    const httpGetJson = vi.fn().mockResolvedValue({
+      data: [{ id: "qwen36", max_model_len: 262144 }],
+    });
+    const r = new ContextWindowResolver({ loadRegistry, httpGetJson });
+    const out = await r.resolve(
+      {
+        id: "v",
+        providerType: "OpenAI",
+        baseUrl: "http://localhost:8000/v1",
+        apiKey: "x",
+      },
+      "qwen36",
+    );
+    expect(out.contextWindow).toBe(262144);
+    expect(out.source).toBe("api");
+    expect(httpGetJson).toHaveBeenCalledWith(
+      "http://localhost:8000/v1/models",
+      expect.anything(),
+    );
+  });
+
+  it("official OpenAI (no baseUrl) skips the probe and falls to registry", async () => {
+    const httpGetJson = vi.fn();
+    const r = new ContextWindowResolver({ loadRegistry, httpGetJson });
+    const out = await r.resolve({ ...openai, baseUrl: null }, "gpt-4o");
+    expect(httpGetJson).not.toHaveBeenCalled();
+    expect(out.source).toBe("registry");
+  });
+
+  it("a failing API probe falls through to the registry", async () => {
+    const httpGetJson = vi.fn().mockRejectedValue(new Error("boom"));
+    const r = new ContextWindowResolver({ loadRegistry, httpGetJson });
+    const out = await r.resolve(
+      { id: "g", providerType: "Google", baseUrl: "https://gen.example" },
+      "gpt-4o",
+    );
+    expect(out.source).toBe("registry");
+  });
+});
+
+describe("registry load failure (ADR-0012 §Window resolution)", () => {
+  it("a throwing loader degrades to empty registry → default, no reject", async () => {
+    const r = new ContextWindowResolver({
+      loadRegistry: () => Promise.reject(new Error("bad vendored json")),
+    });
+    const out = await r.resolve({ ...openai }, "gpt-4o");
+    expect(out.source).toBe("default");
+    expect(out.contextWindow).toBe(DEFAULT_CONTEXT_WINDOW);
+  });
+});
+
+describe("cache + evict (ADR-0012 §Window resolution (caching & eviction))", () => {
+  it("caches within the TTL (one probe), evict forces a re-probe", async () => {
+    const httpGetJson = vi
+      .fn()
+      .mockResolvedValue({ data: [{ id: "m", max_model_len: 1000 }] });
+    const r = new ContextWindowResolver({ loadRegistry, httpGetJson });
+    const p = {
+      id: "v",
+      providerType: "OpenAI",
+      baseUrl: "http://x",
+      apiKey: "k",
+    };
+
+    await r.resolve(p, "m");
+    await r.resolve(p, "m");
+    expect(httpGetJson).toHaveBeenCalledTimes(1); // second hit served from cache
+
+    r.evict("v");
+    await r.resolve(p, "m");
+    expect(httpGetJson).toHaveBeenCalledTimes(2); // evict busted the cache
+  });
+
+  it("the cached value expires after the TTL", async () => {
+    let now = 1000;
+    const httpGetJson = vi
+      .fn()
+      .mockResolvedValue({ data: [{ id: "m", max_model_len: 1000 }] });
+    const r = new ContextWindowResolver({
+      loadRegistry,
+      httpGetJson,
+      ttlMs: 100,
+      now: () => now,
+    });
+    const p = {
+      id: "v",
+      providerType: "OpenAI",
+      baseUrl: "http://x",
+      apiKey: "k",
+    };
+
+    await r.resolve(p, "m");
+    now += 50;
+    await r.resolve(p, "m");
+    expect(httpGetJson).toHaveBeenCalledTimes(1); // still within TTL
+
+    now += 100; // past TTL
+    await r.resolve(p, "m");
+    expect(httpGetJson).toHaveBeenCalledTimes(2);
+  });
+
+  it("a default-source result is cached briefly, not for the full TTL", async () => {
+    let now = 0;
+    // API probe never yields a window and the model is not in the registry →
+    // every resolve falls to source:"default".
+    const httpGetJson = vi.fn().mockResolvedValue({ data: [] });
+    const r = new ContextWindowResolver({
+      loadRegistry,
+      httpGetJson,
+      ttlMs: 60 * 60 * 1000, // full TTL is an hour
+      now: () => now,
+    });
+    const p = {
+      id: "v",
+      providerType: "OpenAI",
+      baseUrl: "http://x",
+      apiKey: "k",
+    };
+
+    const first = await r.resolve(p, "unknown-model");
+    expect(first.source).toBe("default");
+
+    now += 30 * 1000; // within the 60 s default-source TTL
+    await r.resolve(p, "unknown-model");
+    expect(httpGetJson).toHaveBeenCalledTimes(1); // still cached
+
+    now += 40 * 1000; // 70 s total — past the short TTL, far short of the hour
+    await r.resolve(p, "unknown-model");
+    expect(httpGetJson).toHaveBeenCalledTimes(2); // re-probed, blip not pinned
+  });
+});
diff --git a/apps/backend/src/runs/context-window.ts b/apps/backend/src/runs/context-window.ts
new file mode 100644
index 00000000..4e42f621
--- /dev/null
+++ b/apps/backend/src/runs/context-window.ts
@@ -0,0 +1,474 @@
+/**
+ * Context-window resolution (ADR-0012 §Window resolution).
+ *
+ * Resolves the usable context window (and max output tokens) for a
+ * provider+model, in this order:
+ *
+ *   1. Manual override     — `provider.modelMeta[modelId]`.
+ *   2. API auto-detect     — Google / OpenRouter / vLLM expose the window.
+ *   3. litellm registry    — community model price/context JSON (covers
+ *                            OpenAI / Anthropic / Bedrock, which don't expose it).
+ *   4. Conservative default — {@link DEFAULT_CONTEXT_WINDOW} (8192).
+ *
+ * A fall-through to the default, and every registry key MISS, is `log.warn`'d:
+ * the window is then unknown and the ring must render neutral
+ * (ADR-0012 §Context-usage ring).
+ *
+ * Results are cached per `providerId:modelId` with a TTL. Editing a `modelMeta`
+ * override must call {@link ContextWindowResolver.evict} immediately so the
+ * override takes effect without waiting for the TTL
+ * (ADR-0012 §Window resolution (caching & eviction)).
+ *
+ * The registry lookup and HTTP probe are injected so this module is unit
+ * testable without network or a vendored multi-MB JSON file
+ * (ADR-0012 §Window resolution (key normalization) cases are
+ * exercised against small fixture registries).
+ */
+
+import { logger } from "../logger.ts";
+
+/** Conservative window when nothing else resolves. */
+export const DEFAULT_CONTEXT_WINDOW = 8192;
+
+/** Default cache TTL: API-detected windows can drift, the override path evicts. */
+export const DEFAULT_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
+
+/**
+ * Short TTL for `source: "default"` resolutions (ADR-0012 §Window resolution (caching & eviction)). A registry
+ * MISS or a transient API failure falls to 8192; caching that for the full hour
+ * pins a wrong window long after the blip clears. A 60 s TTL lets the next turn
+ * re-probe while still collapsing a burst of same-turn lookups.
+ */
+export const DEFAULT_SOURCE_CACHE_TTL_MS = 60 * 1000; // 1 minute
+
+/** Where a resolved window came from — drives ring neutrality (ADR-0012 §Context-usage ring). */
+export type WindowSource = "override" | "api" | "registry" | "default";
+
+export type ResolvedWindow = {
+  contextWindow: number;
+  maxOutputTokens?: number;
+  source: WindowSource;
+};
+
+/** The slice of a provider row this module needs. */
+export type ProviderWindowInput = {
+  id: string;
+  providerType: string;
+  baseUrl?: string | null;
+  apiKey?: string | null;
+  modelMeta?: Record<
+    string,
+    { contextWindow?: number; maxOutputTokens?: number }
+  > | null;
+};
+
+/** A litellm registry entry (subset of the fields we read). */
+export type RegistryEntry = {
+  max_input_tokens?: number;
+  max_output_tokens?: number;
+  max_tokens?: number;
+};
+
+export type Registry = Record<string, RegistryEntry>;
+
+/** Fetches and parses JSON from a URL. Injected so tests avoid network. */
+export type HttpGetJson = (
+  url: string,
+  headers?: Record<string, string>,
+) => Promise<unknown>;
+
+export type ResolverDeps = {
+  /** Provides the litellm registry (lazy; may be empty until vendored). */
+  loadRegistry?: () => Promise<Registry>;
+  /** model id → registry key aliases (Bedrock ARNs, Azure deployments, …). */
+  aliasMap?: Record<string, string>;
+  httpGetJson?: HttpGetJson;
+  ttlMs?: number;
+  now?: () => number;
+};
+
+// ---------------------------------------------------------------------------
+// litellm registry key normalization (ADR-0012 §Window resolution (key normalization))
+// ---------------------------------------------------------------------------
+
+/** Strips a Bedrock ARN down to its `vendor.model` id, if it is one. */
+function bedrockModelFromArn(modelId: string): string | undefined {
+  const match = /foundation-model\/(.+)$/.exec(modelId);
+  return match?.[1];
+}
+
+/**
+ * Resolves a registry entry for a model id via the normalization chain:
+ * exact → strip provider prefix → lowercase → alias map → Bedrock ARN →
+ * family heuristic (longest registry key that prefixes the id) → MISS.
+ */
+export function lookupRegistry(
+  registry: Registry,
+  modelId: string,
+  aliasMap: Record<string, string> = {},
+): RegistryEntry | undefined {
+  // 1. exact
+  if (registry[modelId]) return registry[modelId];
+
+  // 2. strip provider prefix ("openai/gpt-4o" → "gpt-4o")
+  const slash = modelId.indexOf("/");
+  const stripped = slash >= 0 ? modelId.slice(slash + 1) : modelId;
+  if (stripped !== modelId && registry[stripped]) return registry[stripped];
+
+  // 3. lowercase variants
+  const lowerExact = modelId.toLowerCase();
+  if (registry[lowerExact]) return registry[lowerExact];
+  const lowerStripped = stripped.toLowerCase();
+  if (registry[lowerStripped]) return registry[lowerStripped];
+
+  // 4. alias map (Azure deployment names, custom vLLM names, …)
+  const alias = aliasMap[modelId];
+  if (alias && registry[alias]) return registry[alias];
+
+  // 5. Bedrock ARN → vendor.model, tried bare and under the "bedrock/" prefix,
+  // each also lowercased (registry keys for Bedrock are lowercase; ARNs are not
+  // guaranteed to be — defect 11).
+  const bedrock = bedrockModelFromArn(modelId);
+  if (bedrock) {
+    const candidates = [
+      bedrock,
+      `bedrock/${bedrock}`,
+      bedrock.toLowerCase(),
+      `bedrock/${bedrock.toLowerCase()}`,
+    ];
+    for (const c of candidates) if (registry[c]) return registry[c];
+  }
+
+  // 6. family heuristic — longest registry key that is a proper prefix of the
+  // id, separated by "-", ".", ":", or "/" so "gpt-4" does NOT match "gpt-4.5"
+  // (ADR-0012 §Window resolution (key normalization): raw startsWith caused gpt-4.5-preview to silently resolve via a
+  // stale gpt-4 entry with a wrong 8192 window).
+  // Case-insensitive so mixed-case registry keys ("Qwen/…", "meta-llama/…")
+  // still match lowercase ids from providers that normalize model names.
+  const strippedLower = stripped.toLowerCase();
+  let best: { key: string; entry: RegistryEntry } | undefined;
+  for (const key of Object.keys(registry)) {
+    const keyLower = key.toLowerCase();
+    const isMatch =
+      strippedLower === keyLower ||
+      strippedLower.startsWith(keyLower + "-") ||
+      strippedLower.startsWith(keyLower + ".") ||
+      strippedLower.startsWith(keyLower + ":") ||
+      strippedLower.startsWith(keyLower + "/");
+    if (isMatch && (!best || key.length > best.key.length)) {
+      best = { key, entry: registry[key] };
+    }
+  }
+  if (best) return best.entry;
+
+  // 7. MISS
+  return undefined;
+}
+
+function windowFromRegistryEntry(entry: RegistryEntry): {
+  contextWindow?: number;
+  maxOutputTokens?: number;
+} {
+  // Only trust the explicit input limit. litellm's `max_tokens` is the OUTPUT
+  // cap (not the context window); using it would silently under-size the window
+  // and cause constant over-compaction (ADR-0012 §Window resolution). When `max_input_tokens` is
+  // absent we return no window so the caller falls to the conservative default,
+  // which at least surfaces a warn + neutral ring rather than a wrong number.
+  return {
+    contextWindow: entry.max_input_tokens,
+    maxOutputTokens: entry.max_output_tokens,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// API auto-detect parsers
+// ---------------------------------------------------------------------------
+
+function trimSlash(url: string): string {
+  return url.replace(/\/+$/, "");
+}
+
+async function detectGoogle(
+  provider: ProviderWindowInput,
+  modelId: string,
+  httpGetJson: HttpGetJson,
+): Promise<Partial<ResolvedWindow> | undefined> {
+  const base = trimSlash(
+    provider.baseUrl || "https://generativelanguage.googleapis.com",
+  );
+  const headers = provider.apiKey
+    ? { "x-goog-api-key": provider.apiKey }
+    : undefined;
+  const body = (await httpGetJson(
+    `${base}/v1beta/models/${modelId}`,
+    headers,
+  )) as {
+    inputTokenLimit?: number;
+    outputTokenLimit?: number;
+  };
+  if (typeof body?.inputTokenLimit === "number") {
+    return {
+      contextWindow: body.inputTokenLimit,
+      maxOutputTokens: body.outputTokenLimit,
+      source: "api",
+    };
+  }
+  return undefined;
+}
+
+async function detectOpenRouter(
+  provider: ProviderWindowInput,
+  modelId: string,
+  httpGetJson: HttpGetJson,
+): Promise<Partial<ResolvedWindow> | undefined> {
+  const base = trimSlash(provider.baseUrl || "https://openrouter.ai");
+  const body = (await httpGetJson(`${base}/api/v1/models`)) as {
+    data?: Array<{
+      id?: string;
+      context_length?: number;
+      top_provider?: { max_completion_tokens?: number };
+    }>;
+  };
+  const entry = body?.data?.find((m) => m.id === modelId);
+  if (entry && typeof entry.context_length === "number") {
+    return {
+      contextWindow: entry.context_length,
+      maxOutputTokens: entry.top_provider?.max_completion_tokens,
+      source: "api",
+    };
+  }
+  return undefined;
+}
+
+async function detectOpenAiCompatible(
+  provider: ProviderWindowInput,
+  modelId: string,
+  httpGetJson: HttpGetJson,
+): Promise<Partial<ResolvedWindow> | undefined> {
+  if (!provider.baseUrl) return undefined; // official OpenAI omits the field
+  // baseUrl conventionally ends in "/v1" (the OpenAI SDK needs it that way for
+  // chat calls), but the models endpoint is "{root}/v1/models" — strip a
+  // trailing "/v1" first so we don't request "/v1/v1/models" (404 → the window
+  // silently falls to the default and the usage ring renders "unknown").
+  const base = trimSlash(provider.baseUrl).replace(/\/v1$/, "");
+  const headers = provider.apiKey
+    ? { authorization: `Bearer ${provider.apiKey}` }
+    : undefined;
+  const body = (await httpGetJson(`${base}/v1/models`, headers)) as {
+    data?: Array<{ id?: string; max_model_len?: number }>;
+  };
+  const entry = body?.data?.find((m) => m.id === modelId);
+  // vLLM and most OpenAI-compatible servers expose `max_model_len`.
+  if (entry && typeof entry.max_model_len === "number") {
+    return { contextWindow: entry.max_model_len, source: "api" };
+  }
+  return undefined;
+}
+
+async function detectViaApi(
+  provider: ProviderWindowInput,
+  modelId: string,
+  httpGetJson: HttpGetJson,
+): Promise<Partial<ResolvedWindow> | undefined> {
+  try {
+    switch (provider.providerType) {
+      case "Google":
+        return await detectGoogle(provider, modelId, httpGetJson);
+      case "OpenRouter":
+        return await detectOpenRouter(provider, modelId, httpGetJson);
+      case "OpenAI":
+        return await detectOpenAiCompatible(provider, modelId, httpGetJson);
+      default:
+        return undefined; // Anthropic / Bedrock — no API window, use registry
+    }
+  } catch (error) {
+    logger.warn(
+      {
+        error,
+        providerId: provider.id,
+        modelId,
+        providerType: provider.providerType,
+      },
+      "context-window API auto-detect failed; falling through",
+    );
+    return undefined;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Resolver (cache + evict)
+// ---------------------------------------------------------------------------
+
+/** ADR-0012 §Window resolution (caching & eviction): 5 s hard cap so a hung provider endpoint never blocks turns for ~300 s. */
+const API_DETECT_TIMEOUT_MS = 5000;
+
+const defaultHttpGetJson: HttpGetJson = async (url, headers) => {
+  const res = await fetch(url, {
+    headers,
+    signal: AbortSignal.timeout(API_DETECT_TIMEOUT_MS),
+  });
+  if (!res.ok) throw new Error(`GET ${url} → ${res.status}`);
+  return res.json();
+};
+
+type CacheEntry = { value: ResolvedWindow; expiresAt: number };
+
+export class ContextWindowResolver {
+  #cache = new Map<string, CacheEntry>();
+  /** ADR-0012 §Window resolution (caching & eviction): single-flight — concurrent callers for the same key share one fetch. */
+  #inflight = new Map<string, Promise<ResolvedWindow>>();
+  #loadRegistry: () => Promise<Registry>;
+  #registry: Registry | undefined;
+  #aliasMap: Record<string, string>;
+  #httpGetJson: HttpGetJson;
+  #ttlMs: number;
+  #now: () => number;
+
+  constructor(deps: ResolverDeps = {}) {
+    this.#loadRegistry =
+      deps.loadRegistry ?? ((): Promise<Registry> => Promise.resolve({}));
+    this.#aliasMap = deps.aliasMap ?? {};
+    this.#httpGetJson = deps.httpGetJson ?? defaultHttpGetJson;
+    this.#ttlMs = deps.ttlMs ?? DEFAULT_CACHE_TTL_MS;
+    this.#now = deps.now ?? (() => Date.now());
+  }
+
+  /** Drops all cached windows for a provider — call on `modelMeta` edit (ADR-0012 §Window resolution (caching & eviction)). */
+  evict(providerId: string): void {
+    for (const key of this.#cache.keys()) {
+      if (key.startsWith(`${providerId}:`)) this.#cache.delete(key);
+    }
+    // Also cancel any in-flight fetch for this provider so the next call
+    // re-resolves with the updated modelMeta rather than caching a stale result.
+    for (const key of this.#inflight.keys()) {
+      if (key.startsWith(`${providerId}:`)) this.#inflight.delete(key);
+    }
+  }
+
+  async #registryEntry(modelId: string): Promise<RegistryEntry | undefined> {
+    if (this.#registry === undefined) {
+      // A failing loader (bad vendored JSON, fs error) must not reject the whole
+      // resolution — degrade to an empty registry + warn (ADR-0012 §Window resolution).
+      try {
+        this.#registry = await this.#loadRegistry();
+      } catch (error) {
+        logger.warn(
+          { error },
+          "litellm registry load failed; treating as empty",
+        );
+        this.#registry = {};
+      }
+    }
+    return lookupRegistry(this.#registry, modelId, this.#aliasMap);
+  }
+
+  async resolve(
+    provider: ProviderWindowInput,
+    modelId: string,
+  ): Promise<ResolvedWindow> {
+    const cacheKey = `${provider.id}:${modelId}`;
+    const cached = this.#cache.get(cacheKey);
+    if (cached && cached.expiresAt > this.#now()) return cached.value;
+
+    // ADR-0012 §Window resolution (caching & eviction): single-flight — reuse an in-flight promise rather than spawning a
+    // second fetch for the same key (cold-cache stampede protection).
+    const existing = this.#inflight.get(cacheKey);
+    if (existing) return existing;
+
+    const promise = this.#resolveUncached(provider, modelId).then((value) => {
+      // Only write the cache if this promise is still the live in-flight one.
+      // An evict() during the fetch deletes the inflight entry; without this
+      // guard the resolving promise would repopulate the cache with the stale
+      // pre-update value and defeat the eviction for a full TTL (ADR-0012 §Window resolution (caching & eviction) race).
+      if (this.#inflight.get(cacheKey) === promise) {
+        // ADR-0012 §Window resolution (caching & eviction): a default-source result (MISS or transient API
+        // failure) gets a short TTL so a blip doesn't pin 8192 for an hour.
+        const ttl =
+          value.source === "default"
+            ? Math.min(DEFAULT_SOURCE_CACHE_TTL_MS, this.#ttlMs)
+            : this.#ttlMs;
+        this.#cache.set(cacheKey, { value, expiresAt: this.#now() + ttl });
+        this.#inflight.delete(cacheKey);
+      }
+      return value;
+    });
+    // Store before awaiting so concurrent callers see the same promise.
+    this.#inflight.set(cacheKey, promise);
+    try {
+      return await promise;
+    } catch (err) {
+      this.#inflight.delete(cacheKey);
+      throw err;
+    }
+  }
+
+  async #resolveUncached(
+    provider: ProviderWindowInput,
+    modelId: string,
+  ): Promise<ResolvedWindow> {
+    // 1. Manual override
+    const override = provider.modelMeta?.[modelId];
+    if (override?.contextWindow) {
+      return {
+        contextWindow: override.contextWindow,
+        maxOutputTokens: override.maxOutputTokens,
+        source: "override",
+      };
+    }
+
+    // 2. API auto-detect
+    const api = await detectViaApi(provider, modelId, this.#httpGetJson);
+    if (api?.contextWindow) {
+      return {
+        contextWindow: api.contextWindow,
+        maxOutputTokens: override?.maxOutputTokens ?? api.maxOutputTokens,
+        source: "api",
+      };
+    }
+
+    // 3. litellm registry
+    const entry = await this.#registryEntry(modelId);
+    if (entry) {
+      const { contextWindow, maxOutputTokens } = windowFromRegistryEntry(entry);
+      if (contextWindow) {
+        return {
+          contextWindow,
+          maxOutputTokens: override?.maxOutputTokens ?? maxOutputTokens,
+          source: "registry",
+        };
+      }
+    } else {
+      logger.warn(
+        {
+          metric: "litellm.key_miss",
+          providerId: provider.id,
+          modelId,
+          providerType: provider.providerType,
+        },
+        "litellm registry key MISS — falling to default window",
+      );
+    }
+
+    // 4. Conservative default
+    logger.warn(
+      {
+        metric: "context_window.fell_to_default",
+        providerId: provider.id,
+        modelId,
+        default: DEFAULT_CONTEXT_WINDOW,
+      },
+      "context window unresolved — using conservative default (ring neutral)",
+    );
+    return {
+      contextWindow: DEFAULT_CONTEXT_WINDOW,
+      maxOutputTokens: override?.maxOutputTokens,
+      source: "default",
+    };
+  }
+}
+
+/** Process-wide resolver. Routes use this; tests construct their own. */
+import { loadBuiltinRegistry } from "./litellm-registry.ts";
+export const contextWindowResolver = new ContextWindowResolver({
+  loadRegistry: loadBuiltinRegistry,
+});
diff --git a/apps/backend/src/runs/litellm-registry.ts b/apps/backend/src/runs/litellm-registry.ts
new file mode 100644
index 00000000..8d217a75
--- /dev/null
+++ b/apps/backend/src/runs/litellm-registry.ts
@@ -0,0 +1,346 @@
+/**
+ * Minimal vendored subset of the litellm model_prices_and_context_window.json
+ * (MIT licence — https://github.com/BerriAI/litellm).
+ *
+ * Only includes `max_input_tokens` and `max_output_tokens` — the two fields
+ * {@link ContextWindowResolver} reads. Covers providers whose context window is
+ * not available via a live API call (OpenAI, Anthropic, Bedrock). Google and
+ * OpenRouter are auto-detected at runtime and do not need entries here.
+ *
+ * Keys follow the litellm naming convention — bare model ids without a provider
+ * prefix. The registry lookup in context-window.ts tries exact → stripped →
+ * lowercase → alias → Bedrock ARN → family heuristic before a MISS.
+ *
+ * Keep sorted alphabetically within each vendor section for easier diffing.
+ * Update when models whose windows differ from their family default are released.
+ */
+
+import type { Registry } from "./context-window.ts";
+
+const REGISTRY: Registry = {
+  // ---------------------------------------------------------------------------
+  // OpenAI
+  // ---------------------------------------------------------------------------
+  "chatgpt-4o-latest": { max_input_tokens: 128000, max_output_tokens: 16384 },
+  "gpt-3.5-turbo": { max_input_tokens: 16385, max_output_tokens: 4096 },
+  "gpt-3.5-turbo-0125": { max_input_tokens: 16385, max_output_tokens: 4096 },
+  "gpt-3.5-turbo-16k": { max_input_tokens: 16385, max_output_tokens: 4096 },
+  "gpt-4": { max_input_tokens: 8192, max_output_tokens: 8192 },
+  "gpt-4-0125-preview": { max_input_tokens: 128000, max_output_tokens: 4096 },
+  "gpt-4-1106-preview": { max_input_tokens: 128000, max_output_tokens: 4096 },
+  "gpt-4-turbo": { max_input_tokens: 128000, max_output_tokens: 4096 },
+  "gpt-4-turbo-preview": { max_input_tokens: 128000, max_output_tokens: 4096 },
+  "gpt-4-vision-preview": { max_input_tokens: 128000, max_output_tokens: 4096 },
+  "gpt-4.1": { max_input_tokens: 1047576, max_output_tokens: 32768 },
+  "gpt-4.1-mini": { max_input_tokens: 1047576, max_output_tokens: 32768 },
+  "gpt-4.1-nano": { max_input_tokens: 1047576, max_output_tokens: 32768 },
+  "gpt-4.5-preview": { max_input_tokens: 128000, max_output_tokens: 16384 },
+  "gpt-4o": { max_input_tokens: 128000, max_output_tokens: 16384 },
+  "gpt-4o-2024-05-13": { max_input_tokens: 128000, max_output_tokens: 4096 },
+  "gpt-4o-2024-08-06": { max_input_tokens: 128000, max_output_tokens: 16384 },
+  "gpt-4o-2024-11-20": { max_input_tokens: 128000, max_output_tokens: 16384 },
+  "gpt-4o-audio-preview": {
+    max_input_tokens: 128000,
+    max_output_tokens: 16384,
+  },
+  "gpt-4o-mini": { max_input_tokens: 128000, max_output_tokens: 16384 },
+  "gpt-4o-mini-2024-07-18": {
+    max_input_tokens: 128000,
+    max_output_tokens: 16384,
+  },
+  "gpt-4o-mini-audio-preview": {
+    max_input_tokens: 128000,
+    max_output_tokens: 16384,
+  },
+  o1: { max_input_tokens: 200000, max_output_tokens: 100000 },
+  "o1-mini": { max_input_tokens: 128000, max_output_tokens: 65536 },
+  "o1-preview": { max_input_tokens: 128000, max_output_tokens: 32768 },
+  o3: { max_input_tokens: 200000, max_output_tokens: 100000 },
+  "o3-mini": { max_input_tokens: 200000, max_output_tokens: 100000 },
+  "o4-mini": { max_input_tokens: 200000, max_output_tokens: 100000 },
+
+  // ---------------------------------------------------------------------------
+  // Anthropic (direct API — also covered under bedrock/ below)
+  // ---------------------------------------------------------------------------
+  "claude-2": { max_input_tokens: 100000, max_output_tokens: 4096 },
+  "claude-2.1": { max_input_tokens: 200000, max_output_tokens: 4096 },
+  "claude-3-haiku-20240307": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "claude-3-opus-20240229": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "claude-3-sonnet-20240229": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "claude-3-5-haiku-20241022": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "claude-3-5-sonnet-20240620": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "claude-3-5-sonnet-20241022": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "claude-3-7-sonnet-20250219": {
+    max_input_tokens: 200000,
+    max_output_tokens: 128000,
+  },
+  "claude-haiku-4-5-20251001": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "claude-opus-4-5": { max_input_tokens: 200000, max_output_tokens: 32000 },
+  "claude-opus-4-8": { max_input_tokens: 200000, max_output_tokens: 32000 },
+  "claude-sonnet-4-5": { max_input_tokens: 200000, max_output_tokens: 64000 },
+  "claude-sonnet-4-6": { max_input_tokens: 200000, max_output_tokens: 64000 },
+  "claude-instant-1": { max_input_tokens: 100000, max_output_tokens: 4096 },
+  "claude-instant-1.2": { max_input_tokens: 100000, max_output_tokens: 4096 },
+
+  // ---------------------------------------------------------------------------
+  // Bedrock — Anthropic models
+  // ---------------------------------------------------------------------------
+  "bedrock/anthropic.claude-instant-v1": {
+    max_input_tokens: 100000,
+    max_output_tokens: 4096,
+  },
+  "bedrock/anthropic.claude-v2": {
+    max_input_tokens: 100000,
+    max_output_tokens: 4096,
+  },
+  "bedrock/anthropic.claude-v2:1": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "bedrock/anthropic.claude-3-haiku-20240307-v1:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "bedrock/anthropic.claude-3-sonnet-20240229-v1:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "bedrock/anthropic.claude-3-opus-20240229-v1:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 4096,
+  },
+  "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0": {
+    max_input_tokens: 200000,
+    max_output_tokens: 128000,
+  },
+
+  // ---------------------------------------------------------------------------
+  // Bedrock — Meta Llama
+  // ---------------------------------------------------------------------------
+  "bedrock/meta.llama3-8b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-70b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-1-8b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-1-70b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-1-405b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-2-1b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-2-3b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-2-11b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+  "bedrock/meta.llama3-2-90b-instruct-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 8192,
+  },
+
+  // ---------------------------------------------------------------------------
+  // Bedrock — Amazon Titan/Nova
+  // ---------------------------------------------------------------------------
+  "bedrock/amazon.nova-lite-v1:0": {
+    max_input_tokens: 300000,
+    max_output_tokens: 5120,
+  },
+  "bedrock/amazon.nova-micro-v1:0": {
+    max_input_tokens: 128000,
+    max_output_tokens: 5120,
+  },
+  "bedrock/amazon.nova-pro-v1:0": {
+    max_input_tokens: 300000,
+    max_output_tokens: 5120,
+  },
+  "bedrock/amazon.titan-text-express-v1": {
+    max_input_tokens: 8192,
+    max_output_tokens: 8192,
+  },
+  "bedrock/amazon.titan-text-lite-v1": {
+    max_input_tokens: 4096,
+    max_output_tokens: 4096,
+  },
+  "bedrock/amazon.titan-text-premier-v1:0": {
+    max_input_tokens: 32000,
+    max_output_tokens: 3072,
+  },
+
+  // ---------------------------------------------------------------------------
+  // Bedrock — Mistral
+  // ---------------------------------------------------------------------------
+  "bedrock/mistral.mistral-7b-instruct-v0:2": {
+    max_input_tokens: 32768,
+    max_output_tokens: 8192,
+  },
+  "bedrock/mistral.mistral-large-2402-v1:0": {
+    max_input_tokens: 32768,
+    max_output_tokens: 8192,
+  },
+  "bedrock/mistral.mistral-large-2407-v1:0": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "bedrock/mistral.mixtral-8x7b-instruct-v0:1": {
+    max_input_tokens: 32768,
+    max_output_tokens: 8192,
+  },
+
+  // ---------------------------------------------------------------------------
+  // Mistral (direct API)
+  // ---------------------------------------------------------------------------
+  "mistral-large": { max_input_tokens: 131072, max_output_tokens: 4096 },
+  "mistral-large-latest": { max_input_tokens: 131072, max_output_tokens: 4096 },
+  "mistral-medium": { max_input_tokens: 32768, max_output_tokens: 4096 },
+  "mistral-small": { max_input_tokens: 32768, max_output_tokens: 4096 },
+  "mistral-small-latest": { max_input_tokens: 32768, max_output_tokens: 4096 },
+  "mistral-tiny": { max_input_tokens: 32768, max_output_tokens: 4096 },
+  "mixtral-8x7b": { max_input_tokens: 32768, max_output_tokens: 4096 },
+  "mixtral-8x22b": { max_input_tokens: 65536, max_output_tokens: 4096 },
+
+  // ---------------------------------------------------------------------------
+  // Meta Llama (direct / OpenAI-compat, e.g. Together.ai, Fireworks)
+  // ---------------------------------------------------------------------------
+  "meta-llama/Llama-2-7b-chat-hf": {
+    max_input_tokens: 4096,
+    max_output_tokens: 4096,
+  },
+  "meta-llama/Llama-2-13b-chat-hf": {
+    max_input_tokens: 4096,
+    max_output_tokens: 4096,
+  },
+  "meta-llama/Llama-2-70b-chat-hf": {
+    max_input_tokens: 4096,
+    max_output_tokens: 4096,
+  },
+  "meta-llama/Meta-Llama-3-8B-Instruct": {
+    max_input_tokens: 8192,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Meta-Llama-3-70B-Instruct": {
+    max_input_tokens: 8192,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Meta-Llama-3.1-8B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Meta-Llama-3.1-70B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Meta-Llama-3.1-405B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Llama-3.2-1B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Llama-3.2-3B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Llama-3.2-11B-Vision-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Llama-3.2-90B-Vision-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Llama-3.3-70B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "meta-llama/Llama-4-Scout-17B-16E-Instruct": {
+    max_input_tokens: 10000000,
+    max_output_tokens: 16384,
+  },
+  "meta-llama/Llama-4-Maverick-17B-128E-Instruct": {
+    max_input_tokens: 1000000,
+    max_output_tokens: 16384,
+  },
+
+  // ---------------------------------------------------------------------------
+  // Qwen (via OpenAI-compat, e.g. vLLM / Together)
+  // ---------------------------------------------------------------------------
+  "Qwen/Qwen2-7B-Instruct": {
+    max_input_tokens: 32768,
+    max_output_tokens: 8192,
+  },
+  "Qwen/Qwen2-72B-Instruct": {
+    max_input_tokens: 32768,
+    max_output_tokens: 8192,
+  },
+  "Qwen/Qwen2.5-7B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "Qwen/Qwen2.5-14B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "Qwen/Qwen2.5-72B-Instruct": {
+    max_input_tokens: 131072,
+    max_output_tokens: 8192,
+  },
+  "Qwen/Qwen3-8B": { max_input_tokens: 131072, max_output_tokens: 8192 },
+  "Qwen/Qwen3-14B": { max_input_tokens: 131072, max_output_tokens: 8192 },
+  "Qwen/Qwen3-32B": { max_input_tokens: 131072, max_output_tokens: 8192 },
+};
+
+/** Returns the built-in minimal registry. Returns a Promise so the signature
+ *  matches the injected `loadRegistry` slot and allows a future async fetch path. */
+export function loadBuiltinRegistry(): Promise<Registry> {
+  return Promise.resolve(REGISTRY);
+}
diff --git a/apps/backend/src/runs/recovery.test.ts b/apps/backend/src/runs/recovery.test.ts
new file mode 100644
index 00000000..6959c787
--- /dev/null
+++ b/apps/backend/src/runs/recovery.test.ts
@@ -0,0 +1,344 @@
+import { describe, it, expect, vi } from "vitest";
+
+vi.mock("../index.ts", () => ({ db: {} })); // drizzle store unused in these tests
+vi.mock("../logger.ts", () => ({
+  logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() },
+}));
+
+import { APICallError } from "ai";
+import {
+  contextOverflowRecoveryMiddleware,
+  isContextOverflowError,
+  trimOverflowingPrompt,
+  type RecoveryContext,
+} from "./recovery.ts";
+
+const apiError = (args: {
+  message?: string;
+  statusCode: number;
+  responseBody?: string;
+}) =>
+  new APICallError({
+    message: args.message ?? "Bad Request",
+    url: "https://provider.example/v1",
+    requestBodyValues: {},
+    statusCode: args.statusCode,
+    responseBody: args.responseBody,
+  });
+
+// --- isContextOverflowError — per-provider body matrix (ADR-0012 §Recovery) ---------
+
+describe("isContextOverflowError (ADR-0012 §Recovery)", () => {
+  it("matches the OpenAI phrasing + code", () => {
+    const err = apiError({
+      statusCode: 400,
+      responseBody: JSON.stringify({
+        error: {
+          message:
+            "This model's maximum context length is 8192 tokens. However, your messages resulted in 10042 tokens. Please reduce the length of the messages.",
+          type: "invalid_request_error",
+          code: "context_length_exceeded",
+        },
+      }),
+    });
+    expect(isContextOverflowError(err)).toBe(true);
+  });
+
+  it("matches the Anthropic phrasing", () => {
+    const err = apiError({
+      statusCode: 400,
+      message: "prompt is too long: 210042 tokens > 200000 maximum",
+    });
+    expect(isContextOverflowError(err)).toBe(true);
+  });
+
+  it("matches the vLLM / OpenAI-compatible phrasing", () => {
+    const err = apiError({
+      statusCode: 400,
+      responseBody:
+        '{"object":"error","message":"This model\'s maximum context length is 40960 tokens. However, you requested 45123 tokens (40123 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.","code":40303}',
+    });
+    expect(isContextOverflowError(err)).toBe(true);
+  });
+
+  it("matches the Google phrasing", () => {
+    const err = apiError({
+      statusCode: 400,
+      responseBody:
+        '{"error":{"code":400,"message":"The input token count (1200000) exceeds the maximum number of tokens allowed (1048576).","status":"INVALID_ARGUMENT"}}',
+    });
+    expect(isContextOverflowError(err)).toBe(true);
+  });
+
+  it("matches the Bedrock ValidationException phrasing", () => {
+    const err = apiError({
+      statusCode: 400,
+      responseBody: '{"message":"Input is too long for requested model."}',
+    });
+    expect(isContextOverflowError(err)).toBe(true);
+  });
+
+  it("matches a 413 payload-too-large with a token message", () => {
+    const err = apiError({
+      statusCode: 413,
+      responseBody: '{"error":"too many tokens in request"}',
+    });
+    expect(isContextOverflowError(err)).toBe(true);
+  });
+
+  it("rejects a 400 that is not about context (validation error)", () => {
+    const err = apiError({
+      statusCode: 400,
+      responseBody:
+        '{"error":{"message":"Invalid value for temperature: must be between 0 and 2."}}',
+    });
+    expect(isContextOverflowError(err)).toBe(false);
+  });
+
+  it("rejects 429 / 401 / 5xx regardless of body", () => {
+    for (const statusCode of [401, 429, 500, 503]) {
+      const err = apiError({
+        statusCode,
+        responseBody: '{"error":"maximum context length exceeded"}',
+      });
+      expect(isContextOverflowError(err)).toBe(false);
+    }
+  });
+
+  it("rejects non-APICallError values", () => {
+    expect(isContextOverflowError(new Error("prompt is too long"))).toBe(false);
+    expect(isContextOverflowError(undefined)).toBe(false);
+  });
+});
+
+// --- middleware: trim + retry-once (ADR-0012 §Recovery) --------------------------
+
+type PromptMsg = { role: string; content: unknown };
+
+const text = (role: "user" | "assistant", t: string): PromptMsg => ({
+  role,
+  content: [{ type: "text", text: t }],
+});
+
+/** system + 2 big + 2 small messages: prune can't help (no tool results), so
+ * the trim must go through the shared summarize stage (ADR-0012 §Recovery). */
+const overflowPrompt = (): PromptMsg[] => [
+  { role: "system", content: "SYS" },
+  text("user", "X".repeat(4000)),
+  text("assistant", "Y".repeat(4000)),
+  text("user", "recent question"),
+  text("assistant", "recent answer"),
+];
+
+const ctx = (over: Partial<RecoveryContext> = {}): RecoveryContext => ({
+  chatId: "chat-1",
+  imageProvider: "default",
+  targetTokens: 100,
+  keepRecentMessages: 4, // recovery halves this → keep 2
+  minPrunableChars: 2000,
+  summarize: () => Promise.resolve("RSUM"),
+  ...over,
+});
+
+const overflow = () =>
+  apiError({
+    statusCode: 400,
+    responseBody: '{"error":{"code":"context_length_exceeded"}}',
+  });
+
+/** Fake V3 model capturing retry params. */
+const fakeModel = (result: unknown = "RETRIED", fail?: unknown) => {
+  const calls: Array<{ prompt: PromptMsg[] }> = [];
+  const impl = (params: { prompt: PromptMsg[] }) => {
+    calls.push(params);
+    if (fail) return Promise.reject(fail);
+    return Promise.resolve(result);
+  };
+  return { calls, model: { doGenerate: impl, doStream: impl } };
+};
+
+const runWrapGenerate = (
+  mw: ReturnType<typeof contextOverflowRecoveryMiddleware>,
+  args: {
+    doGenerate: () => Promise<unknown>;
+    params: { prompt: PromptMsg[] };
+    model: unknown;
+  },
+) =>
+  (mw.wrapGenerate as (o: unknown) => Promise<unknown>)({
+    doStream: () => Promise.reject(new Error("unused")),
+    ...args,
+  });
+
+describe("contextOverflowRecoveryMiddleware (ADR-0012 §Recovery)", () => {
+  it("trims via the shared compactor and retries exactly once on overflow", async () => {
+    const markDirty = vi.fn(() => Promise.resolve(undefined));
+    const mw = contextOverflowRecoveryMiddleware(ctx({ markDirty }));
+    const { calls, model } = fakeModel();
+    const doGenerate = vi.fn(() => Promise.reject(overflow()));
+
+    const result = await runWrapGenerate(mw, {
+      doGenerate,
+      params: { prompt: overflowPrompt() },
+      model,
+    });
+
+    expect(result).toBe("RETRIED");
+    expect(doGenerate).toHaveBeenCalledTimes(1);
+    expect(calls).toHaveLength(1);
+
+    const retried = calls[0].prompt;
+    // System head pinned verbatim at the front (ADR-0012 §Tier 1).
+    expect(retried[0]).toEqual({ role: "system", content: "SYS" });
+    // The big prefix was replaced by the shared summary message (ADR-0012
+    // §Recovery — compactModelMessages' shape, not a bespoke trim).
+    const summary = retried[1] as { content: Array<{ text: string }> };
+    expect(summary.content[0].text).toContain(
+      "[Summary of earlier conversation]",
+    );
+    expect(summary.content[0].text).toContain("RSUM");
+    // Recent messages kept verbatim.
+    expect(retried.at(-1)).toEqual(text("assistant", "recent answer"));
+    // Dirty flag persisted on DETECTION (before the retry outcome is known).
+    expect(markDirty).toHaveBeenCalledTimes(1);
+  });
+
+  it("propagates the second overflow — no infinite retry", async () => {
+    const markDirty = vi.fn(() => Promise.resolve(undefined));
+    const mw = contextOverflowRecoveryMiddleware(ctx({ markDirty }));
+    const second = overflow();
+    const { model } = fakeModel(undefined, second);
+
+    await expect(
+      runWrapGenerate(mw, {
+        doGenerate: () => Promise.reject(overflow()),
+        params: { prompt: overflowPrompt() },
+        model,
+      }),
+    ).rejects.toBe(second);
+    // Flag persisted anyway: the NEXT turn must compact durably (ADR-0012 §Recovery).
+    expect(markDirty).toHaveBeenCalledTimes(1);
+  });
+
+  it("rethrows non-overflow errors without retrying or flagging", async () => {
+    const markDirty = vi.fn(() => Promise.resolve(undefined));
+    const mw = contextOverflowRecoveryMiddleware(ctx({ markDirty }));
+    const { calls, model } = fakeModel();
+    const authError = apiError({ statusCode: 401, message: "bad key" });
+
+    await expect(
+      runWrapGenerate(mw, {
+        doGenerate: () => Promise.reject(authError),
+        params: { prompt: overflowPrompt() },
+        model,
+      }),
+    ).rejects.toBe(authError);
+    expect(calls).toHaveLength(0);
+    expect(markDirty).not.toHaveBeenCalled();
+  });
+
+  it("still retries when persisting the dirty flag fails (best-effort)", async () => {
+    const markDirty = vi.fn(() => Promise.reject(new Error("db down")));
+    const mw = contextOverflowRecoveryMiddleware(ctx({ markDirty }));
+    const { calls, model } = fakeModel();
+
+    const result = await runWrapGenerate(mw, {
+      doGenerate: () => Promise.reject(overflow()),
+      params: { prompt: overflowPrompt() },
+      model,
+    });
+    expect(result).toBe("RETRIED");
+    expect(calls).toHaveLength(1);
+  });
+
+  it("surfaces the ORIGINAL overflow when the trim itself fails", async () => {
+    const first = overflow();
+    const mw = contextOverflowRecoveryMiddleware(
+      ctx({
+        summarize: () => Promise.reject(new Error("summarizer down")),
+      }),
+    );
+    const { calls, model } = fakeModel();
+
+    await expect(
+      runWrapGenerate(mw, {
+        doGenerate: () => Promise.reject(first),
+        params: { prompt: overflowPrompt() },
+        model,
+      }),
+    ).rejects.toBe(first);
+    expect(calls).toHaveLength(0);
+  });
+
+  it("covers the stream path: doStream rejection is trimmed and retried", async () => {
+    const mw = contextOverflowRecoveryMiddleware(ctx());
+    const { calls, model } = fakeModel("STREAMED");
+
+    const result = await (mw.wrapStream as (o: unknown) => Promise<unknown>)({
+      doGenerate: () => Promise.reject(new Error("unused")),
+      doStream: () => Promise.reject(overflow()),
+      params: { prompt: overflowPrompt() },
+      model,
+    });
+    expect(result).toBe("STREAMED");
+    expect(calls).toHaveLength(1);
+    expect(calls[0].prompt[0]).toEqual({ role: "system", content: "SYS" });
+  });
+});
+
+describe("trimOverflowingPrompt", () => {
+  it("pins multiple leading system messages and halves keep-recent", async () => {
+    const prompt: PromptMsg[] = [
+      { role: "system", content: "S1" },
+      { role: "system", content: "S2" },
+      text("user", "A".repeat(4000)),
+      text("assistant", "B".repeat(4000)),
+      text("user", "u2"),
+      text("assistant", "a2"),
+    ];
+    const { prompt: out, messagesDropped } = await trimOverflowingPrompt(
+      prompt,
+      ctx(), // keepRecentMessages 4 → recovery keeps 2
+    );
+    expect(out[0]).toEqual({ role: "system", content: "S1" });
+    expect(out[1]).toEqual({ role: "system", content: "S2" });
+    expect(messagesDropped).toBe(2); // the two big messages summarized away
+    expect(out.at(-2)).toEqual(text("user", "u2"));
+    expect(out.at(-1)).toEqual(text("assistant", "a2"));
+  });
+
+  it("never orphans a tool result at the keep boundary", async () => {
+    const toolCall: PromptMsg = {
+      role: "assistant",
+      content: [
+        { type: "tool-call", toolCallId: "t1", toolName: "search", input: {} },
+      ],
+    };
+    const toolResult: PromptMsg = {
+      role: "tool",
+      content: [
+        {
+          type: "tool-result",
+          toolCallId: "t1",
+          toolName: "search",
+          output: { type: "text", value: "Z".repeat(4000) },
+        },
+      ],
+    };
+    const prompt: PromptMsg[] = [
+      { role: "system", content: "SYS" },
+      text("user", "Q".repeat(4000)),
+      toolCall,
+      toolResult, // boundary at keep-2 would start recent here — must walk back
+      text("assistant", "done"),
+    ];
+    const { prompt: out } = await trimOverflowingPrompt(prompt, ctx());
+    const firstNonSystem = out.findIndex((m) => m.role !== "system");
+    // Recent must not begin with an orphaned role:"tool" message.
+    expect(out[firstNonSystem].role).not.toBe("tool");
+    const toolIdx = out.findIndex((m) => m.role === "tool");
+    if (toolIdx !== -1) {
+      expect(out[toolIdx - 1].role).toBe("assistant");
+    }
+  });
+});
diff --git a/apps/backend/src/runs/recovery.ts b/apps/backend/src/runs/recovery.ts
new file mode 100644
index 00000000..ad94ad34
--- /dev/null
+++ b/apps/backend/src/runs/recovery.ts
@@ -0,0 +1,230 @@
+/**
+ * Context-overflow recovery (ADR-0012 §Recovery).
+ *
+ * Recovery is the NET, proactive compaction is the plan: even when Tier 1/2 are
+ * disabled (kill switch — ADR-0012 §Config & kill switch) or their estimates were wrong, a provider 400/413
+ * "context too long" must not hard-fail the turn. The middleware here wraps the
+ * language model so EVERY individual model call — the first call of a turn and
+ * every later step of a tool loop, in both the stream and generate paths — gets
+ * one trim-and-retry:
+ *
+ *   1. Detect the overflow ({@link isContextOverflowError}, per-provider body
+ *      matrix — ADR-0012 §Recovery).
+ *   2. Persist `compactionDirty = true` through the single CAS writer so the
+ *      NEXT `prepareChatTurn` forces a durable Tier 1 compaction (ADR-0012
+ *      §Recovery — recovery never writes summary/watermark itself; it only flags).
+ *   3. Trim in-memory via {@link compactModelMessages} — the shared Tier 2
+ *      adapter, NOT a bespoke trim (ADR-0012 §Recovery) — and retry the call once.
+ *   4. A second failure propagates; {@link formatStreamError} in agent-runner
+ *      surfaces the "conversation too large" message. No infinite retry.
+ *
+ * The middleware operates on the `LanguageModelV3Prompt`. Its message shape is
+ * a structural subset of `ModelMessage` for everything compaction touches
+ * (roles, text / tool-call / tool-result / file parts, output wrappers), so the
+ * prompt is passed to `compactModelMessages` directly rather than through a
+ * lossy converter — one estimator, one trimmer (ADR-0012 §One estimator /
+ * §Recovery). The leading system message(s) are split off first and re-attached
+ * verbatim (ADR-0012 §Tier 1: pin the system prompt; the summary must never
+ * swallow it).
+ */
+
+import {
+  APICallError,
+  type LanguageModelMiddleware,
+  type ModelMessage,
+} from "ai";
+import { logger } from "../logger.ts";
+import { compactModelMessages, type Summarize } from "./compaction.ts";
+import type { ImageProvider } from "./token-estimate.ts";
+
+/**
+ * Everything the middleware needs to trim and retry, resolved once per turn by
+ * `prepareChatTurn`. `markDirty` is absent for headless runs (triggers,
+ * sub-agents) — they have no durable chat row to flag.
+ */
+export type RecoveryContext = {
+  /** Chat id, for log correlation only. Absent on headless runs. */
+  chatId?: string;
+  imageProvider: ImageProvider;
+  /** Trim down to this many tokens (the Tier 1 hysteresis target). */
+  targetTokens: number;
+  /** The configured keep-recent; recovery halves it (aggressive trim, ADR-0012 §Recovery). */
+  keepRecentMessages: number;
+  minPrunableChars: number;
+  summarize: Summarize;
+  summarizerWindow?: number;
+  /**
+   * Persists `compactionDirty = true` (via the single CAS writer). Called as
+   * soon as an overflow is DETECTED — before the retry — so the next turn
+   * compacts durably even if this retry fails. Best-effort: a failure here
+   * never blocks the retry.
+   */
+  markDirty?: () => Promise<unknown>;
+};
+
+/**
+ * Per-provider context-overflow phrasings (ADR-0012 §Recovery). Matched against the
+ * error message AND raw response body, case-insensitive:
+ *  - OpenAI / vLLM / OpenAI-compatible: "This model's maximum context length is
+ *    N tokens…" + code "context_length_exceeded"
+ *  - Anthropic: "prompt is too long: N tokens > N maximum"
+ *  - Google: "The input token count (N) exceeds the maximum number of tokens
+ *    allowed (N)"
+ *  - Bedrock: "Input is too long for requested model." (ValidationException)
+ *  - Generic gateways: "too many tokens", "exceed context limit"
+ */
+const CONTEXT_OVERFLOW_PATTERN =
+  /context[ _]length|context_length_exceeded|prompt is too long|too many tokens|maximum context|exceeds the (?:maximum|max)(?: number of)? (?:input )?tokens|input is too long|exceeds? (?:the )?context limit/i;
+
+/**
+ * True when `error` is a provider context-overflow rejection: an `APICallError`
+ * with status 400 or 413 whose message/body matches a known overflow phrasing.
+ * Rate limits (429), auth (401/403), and 5xx are deliberately excluded — those
+ * have their own handling and a trim-retry would not help.
+ */
+export function isContextOverflowError(error: unknown): boolean {
+  if (!APICallError.isInstance(error)) return false;
+  if (error.statusCode !== 400 && error.statusCode !== 413) return false;
+  const haystack = `${error.message ?? ""}\n${
+    typeof error.responseBody === "string" ? error.responseBody : ""
+  }`;
+  return CONTEXT_OVERFLOW_PATTERN.test(haystack);
+}
+
+/** A V3 prompt message — structurally compatible with ModelMessage (see header). */
+type PromptMessage = { role: string; content: unknown };
+
+/**
+ * Trims an overflowing prompt via the shared Tier 2 adapter. The system head
+ * (leading `role:"system"` messages) is pinned and re-attached verbatim.
+ * Exported for unit testing.
+ */
+export async function trimOverflowingPrompt<T extends PromptMessage>(
+  prompt: T[],
+  ctx: RecoveryContext,
+): Promise<{ prompt: T[]; messagesDropped: number }> {
+  let systemEnd = 0;
+  while (systemEnd < prompt.length && prompt[systemEnd].role === "system") {
+    systemEnd++;
+  }
+  const systemHead = prompt.slice(0, systemEnd);
+  const rest = prompt.slice(systemEnd) as unknown as ModelMessage[];
+
+  const result = await compactModelMessages(rest, {
+    // Aggressive: halve the configured keep-recent (ADR-0012 §Recovery), floor of 2 so a
+    // user/assistant pair survives.
+    keepRecentMessages: Math.max(2, Math.ceil(ctx.keepRecentMessages / 2)),
+    targetTokens: ctx.targetTokens,
+    minPrunableChars: ctx.minPrunableChars,
+    imageProvider: ctx.imageProvider,
+    summarize: ctx.summarize,
+    summarizerWindow: ctx.summarizerWindow,
+    // The provider already rejected this prompt, so the estimator is wrong;
+    // bypass the no-op gate or the retry will be byte-identical (ADR-0012 §Recovery).
+    force: true,
+  });
+
+  return {
+    prompt: [...systemHead, ...(result.messages as unknown as T[])],
+    messagesDropped: result.messagesDropped,
+  };
+}
+
+/**
+ * Wraps both `doGenerate` and `doStream` with the detect → flag → trim → retry-
+ * once sequence. Apply via `wrapLanguageModel({ model, middleware })` in
+ * agent-runner. Note a stream that overflows MID-stream (after chunks started
+ * flowing) is not recoverable — providers reject oversized prompts up front, so
+ * the rejection surfaces from the `doStream()` promise itself, which is caught.
+ */
+export function contextOverflowRecoveryMiddleware(
+  ctx: RecoveryContext,
+): LanguageModelMiddleware {
+  // Shared by both wrappers: returns the retried params, or rethrows.
+  const recoverParams = async <P extends { prompt: PromptMessage[] }>(
+    error: unknown,
+    params: P,
+  ): Promise<P> => {
+    if (!isContextOverflowError(error)) throw error;
+
+    logger.warn(
+      {
+        metric: "recovery.overflow_detected",
+        chatId: ctx.chatId,
+        error: String(error),
+      },
+      "context overflow detected; trimming and retrying once",
+    );
+
+    // Flag durable compaction for the NEXT turn first (ADR-0012 §Recovery) — even if the
+    // retry below fails, the next prepareChatTurn must force Tier 1.
+    if (ctx.markDirty) {
+      try {
+        await ctx.markDirty();
+      } catch (err) {
+        logger.error(
+          { err, chatId: ctx.chatId },
+          "failed to persist compactionDirty after overflow",
+        );
+      }
+    }
+
+    try {
+      const { prompt, messagesDropped } = await trimOverflowingPrompt(
+        params.prompt,
+        ctx,
+      );
+      logger.info(
+        { metric: "recovery.retry", chatId: ctx.chatId, messagesDropped },
+        "overflow recovery trim complete; retrying model call",
+      );
+      return { ...params, prompt };
+    } catch (trimError) {
+      // The trim itself failed (e.g. the summarize call errored). Surface the
+      // ORIGINAL overflow so the user sees the actionable message.
+      logger.error(
+        { err: trimError, chatId: ctx.chatId },
+        "overflow recovery trim failed",
+      );
+      throw error;
+    }
+  };
+
+  // Runs the single retry and logs recovery.failed if the provider rejects the
+  // trimmed prompt too (the dead end formatStreamError then surfaces to the user).
+  const retry = async <R>(op: () => PromiseLike<R>): Promise<R> => {
+    try {
+      return await op();
+    } catch (retryError) {
+      logger.error(
+        {
+          metric: "recovery.failed",
+          chatId: ctx.chatId,
+          error: String(retryError),
+        },
+        "overflow recovery retry still rejected by provider",
+      );
+      throw retryError;
+    }
+  };
+
+  return {
+    specificationVersion: "v3",
+    wrapGenerate: async ({ doGenerate, params, model }) => {
+      try {
+        return await doGenerate();
+      } catch (error) {
+        const next = await recoverParams(error, params);
+        return retry(() => model.doGenerate(next));
+      }
+    },
+    wrapStream: async ({ doStream, params, model }) => {
+      try {
+        return await doStream();
+      } catch (error) {
+        const next = await recoverParams(error, params);
+        return retry(() => model.doStream(next));
+      }
+    },
+  };
+}
diff --git a/apps/backend/src/runs/sinks/chat-sink.test.ts b/apps/backend/src/runs/sinks/chat-sink.test.ts
index ec8b403d..5b6c1c35 100644
--- a/apps/backend/src/runs/sinks/chat-sink.test.ts
+++ b/apps/backend/src/runs/sinks/chat-sink.test.ts
@@ -24,6 +24,8 @@ const planWithAgent: ResolvedRunPlan = {
     seed: undefined,
     presencePenalty: undefined,
     frequencyPenalty: undefined,
+    contextWindow: 128000,
+    contextWindowIsDefault: false,
   },
 };
 
@@ -39,6 +41,8 @@ const planAdhoc: ResolvedRunPlan = {
     seed: 42,
     presencePenalty: 0.1,
     frequencyPenalty: 0.2,
+    contextWindow: 128000,
+    contextWindowIsDefault: false,
   },
 };
 
diff --git a/apps/backend/src/runs/sinks/trigger-sink.test.ts b/apps/backend/src/runs/sinks/trigger-sink.test.ts
index 61ef3c5d..e38c046e 100644
--- a/apps/backend/src/runs/sinks/trigger-sink.test.ts
+++ b/apps/backend/src/runs/sinks/trigger-sink.test.ts
@@ -8,6 +8,8 @@ const plan: ResolvedRunPlan = {
     agentId: "a1",
     providerId: "p1",
     modelId: "m1",
+    contextWindow: 128000,
+    contextWindowIsDefault: false,
   },
 };
 
diff --git a/apps/backend/src/runs/token-estimate.test.ts b/apps/backend/src/runs/token-estimate.test.ts
new file mode 100644
index 00000000..73784bd4
--- /dev/null
+++ b/apps/backend/src/runs/token-estimate.test.ts
@@ -0,0 +1,386 @@
+import { describe, it, expect } from "vitest";
+import { convertToModelMessages, type UIMessage } from "ai";
+import {
+  estimateTokens,
+  uiMessagesToCountUnits,
+  modelMessagesToCountUnits,
+  parseImageDimensions,
+  imageProviderFor,
+  CHARS_PER_TOKEN,
+  DEFAULT_NONTEXT_TOKENS,
+  MODEL_BOUND_UI_PART_TYPES,
+  type CountUnit,
+} from "./token-estimate.ts";
+import type { ModelMessage } from "ai";
+import type { PlatypusUIMessage } from "../types.ts";
+
+// A 24-byte PNG: 8-byte signature + IHDR length/type + width@16 + height@20.
+function fakePng(width: number, height: number): Uint8Array {
+  const b = new Uint8Array(24);
+  b.set([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a], 0); // signature
+  b.set([0, 0, 0, 13], 8); // IHDR length
+  b.set([0x49, 0x48, 0x44, 0x52], 12); // "IHDR"
+  new DataView(b.buffer).setUint32(16, width);
+  new DataView(b.buffer).setUint32(20, height);
+  return b;
+}
+
+// A minimal JPEG with a single SOF0 marker carrying dimensions.
+function fakeJpeg(width: number, height: number): Uint8Array {
+  const b = new Uint8Array(12);
+  b.set([0xff, 0xd8, 0xff, 0xc0, 0x00, 0x11, 0x08], 0); // SOI + SOF0 + len + prec
+  const view = new DataView(b.buffer);
+  view.setUint16(7, height);
+  view.setUint16(9, width);
+  return b;
+}
+
+function dataUrl(bytes: Uint8Array, mediaType = "image/png"): string {
+  return `data:${mediaType};base64,${Buffer.from(bytes).toString("base64")}`;
+}
+
+describe("estimateTokens (the single estimator, ADR-0012 §One estimator)", () => {
+  it("applies char/4 to text only, rounding up", () => {
+    const units: CountUnit[] = [
+      { role: "user", text: "abcdefgh", nonText: [] },
+    ];
+    expect(estimateTokens(units)).toBe(8 / CHARS_PER_TOKEN);
+
+    const odd: CountUnit[] = [{ role: "user", text: "abcde", nonText: [] }];
+    expect(estimateTokens(odd)).toBe(2); // ceil(5/4)
+  });
+
+  it("sums across multiple units (role-agnostic total)", () => {
+    const units: CountUnit[] = [
+      { role: "system", text: "aaaa", nonText: [] },
+      { role: "user", text: "bbbb", nonText: [] },
+      { role: "assistant", text: "cccc", nonText: [] },
+    ];
+    expect(estimateTokens(units)).toBe(3);
+  });
+});
+
+describe("modality table (ADR-0012 §Token estimation — never char/4 an image)", () => {
+  it("anthropic: ceil(w*h/750)", () => {
+    const units: CountUnit[] = [
+      {
+        role: "user",
+        text: "",
+        nonText: [{ provider: "anthropic", width: 100, height: 100 }],
+      },
+    ];
+    expect(estimateTokens(units)).toBe(Math.ceil((100 * 100) / 750)); // 14
+  });
+
+  it("openai high detail: 85 + 170 per tile", () => {
+    const units: CountUnit[] = [
+      {
+        role: "user",
+        text: "",
+        nonText: [{ provider: "openai", width: 100, height: 100 }],
+      },
+    ];
+    expect(estimateTokens(units)).toBe(85 + 170 * 1); // single tile
+  });
+
+  it("openai low detail is a flat 85, even without dimensions", () => {
+    const withDims: CountUnit[] = [
+      {
+        role: "user",
+        text: "",
+        nonText: [
+          { provider: "openai", width: 4000, height: 4000, detail: "low" },
+        ],
+      },
+    ];
+    expect(estimateTokens(withDims)).toBe(85);
+
+    const noDims: CountUnit[] = [
+      {
+        role: "user",
+        text: "",
+        nonText: [{ provider: "openai", detail: "low" }],
+      },
+    ];
+    expect(estimateTokens(noDims)).toBe(85);
+  });
+
+  it("missing dimensions use a pessimistic per-provider ceiling (ADR-0012 §Token estimation)", () => {
+    // Providers with a real per-image cost would be UNDER-counted by the flat
+    // 1200 default when bytes/dims are unavailable (hosted URL), so they fall to
+    // a pessimistic ceiling near each provider's post-resize max instead.
+    const anthropic: CountUnit[] = [
+      { role: "user", text: "", nonText: [{ provider: "anthropic" }] },
+    ];
+    expect(estimateTokens(anthropic)).toBe(1600);
+
+    const openaiHigh: CountUnit[] = [
+      { role: "user", text: "", nonText: [{ provider: "openai" }] },
+    ];
+    expect(estimateTokens(openaiHigh)).toBe(2000);
+
+    // The unknown ("default") provider keeps the conservative flat default.
+    const unknown: CountUnit[] = [
+      { role: "user", text: "", nonText: [{ provider: "default" }] },
+    ];
+    expect(estimateTokens(unknown)).toBe(DEFAULT_NONTEXT_TOKENS);
+  });
+
+  it("unknown provider falls to the conservative default", () => {
+    const units: CountUnit[] = [
+      {
+        role: "user",
+        text: "",
+        nonText: [{ provider: "default", width: 100, height: 100 }],
+      },
+    ];
+    expect(estimateTokens(units)).toBe(DEFAULT_NONTEXT_TOKENS);
+  });
+
+  it("an image is NOT counted as char/4 of its base64 bytes", () => {
+    const png = fakePng(64, 64);
+    const ui: PlatypusUIMessage[] = [
+      {
+        id: "m1",
+        role: "user",
+        parts: [{ type: "file", mediaType: "image/png", url: dataUrl(png) }],
+      },
+    ];
+    const tokens = estimateTokens(uiMessagesToCountUnits(ui, "anthropic"));
+    // char/4 of the base64 data URL would be far larger than the table cost.
+    const charsIfNaive = Math.ceil(dataUrl(png).length / CHARS_PER_TOKEN);
+    expect(tokens).toBe(Math.ceil((64 * 64) / 750));
+    expect(tokens).toBeLessThan(charsIfNaive);
+  });
+});
+
+describe("parseImageDimensions (cheap header parse)", () => {
+  it("reads PNG IHDR dimensions", () => {
+    expect(parseImageDimensions(fakePng(800, 600))).toEqual({
+      width: 800,
+      height: 600,
+    });
+  });
+
+  it("reads JPEG SOF dimensions", () => {
+    expect(parseImageDimensions(fakeJpeg(320, 240))).toEqual({
+      width: 320,
+      height: 240,
+    });
+  });
+
+  it("returns undefined for unrecognized bytes", () => {
+    expect(parseImageDimensions(new Uint8Array([1, 2, 3, 4]))).toBeUndefined();
+  });
+});
+
+describe("MODEL_BOUND filter (ADR-0012 §One estimator — UI-only parts excluded)", () => {
+  it("counts text but ignores reasoning / source / step-start / data parts", () => {
+    const ui: PlatypusUIMessage[] = [
+      {
+        id: "m1",
+        role: "assistant",
+        parts: [
+          { type: "reasoning", text: "thinking hard about it" },
+          { type: "text", text: "hello" },
+          { type: "step-start" },
+          { type: "source-url", sourceId: "s1", url: "https://example.com" },
+          { type: "data-custom", data: { hidden: "payload" } },
+        ],
+      } as unknown as PlatypusUIMessage,
+    ];
+    const units = uiMessagesToCountUnits(ui);
+    expect(units).toHaveLength(1);
+    expect(units[0].text).toBe("hello");
+    expect(units[0].nonText).toHaveLength(0);
+  });
+
+  it("only text/file UI part types are model-bound (the documented set)", () => {
+    expect([...MODEL_BOUND_UI_PART_TYPES]).toEqual(["text", "file"]);
+    // The UI-only types the adapter must drop are NOT in the model-bound set.
+    for (const uiOnly of [
+      "reasoning",
+      "source-url",
+      "source-document",
+      "step-start",
+      "data-custom",
+    ]) {
+      expect(MODEL_BOUND_UI_PART_TYPES).not.toContain(uiOnly);
+    }
+  });
+});
+
+describe("tool-result output variants (model adapter)", () => {
+  const unit = (output: unknown): CountUnit => {
+    const msg = {
+      role: "tool",
+      content: [
+        { type: "tool-result", toolCallId: "c1", toolName: "t", output },
+      ],
+    } as unknown as ModelMessage;
+    return modelMessagesToCountUnits([msg])[0];
+  };
+
+  it("folds text / json / content value into char/4 text", () => {
+    expect(unit({ type: "text", value: "hello world" }).text).toContain(
+      "hello",
+    );
+    expect(unit({ type: "json", value: { a: 1 } }).text).toContain('"a"');
+    expect(
+      unit({ type: "content", value: [{ type: "text", text: "deep" }] }).text,
+    ).toContain("deep");
+  });
+
+  it("uses the reason (not a value) for execution-denied", () => {
+    expect(
+      unit({ type: "execution-denied", reason: "blocked" }).text,
+    ).toContain("blocked");
+  });
+});
+
+describe("adapter equality (ADR-0012 §One estimator — one estimate across both shapes)", () => {
+  it("estimate(UI) === estimate(convertToModelMessages(UI)) exactly", async () => {
+    const png = fakePng(128, 128);
+    const ui: UIMessage[] = [
+      {
+        id: "s",
+        role: "system",
+        parts: [{ type: "text", text: "You are helpful." }],
+      },
+      {
+        id: "u",
+        role: "user",
+        parts: [
+          { type: "text", text: "What is the weather and look at this image?" },
+          { type: "file", mediaType: "image/png", url: dataUrl(png) },
+        ],
+      },
+      {
+        id: "a",
+        role: "assistant",
+        parts: [
+          { type: "text", text: "Let me check." },
+          {
+            type: "tool-getWeather",
+            toolCallId: "call-1",
+            state: "output-available",
+            input: { city: "San Francisco", units: "metric" },
+            output: { temperatureC: 18, condition: "foggy" },
+          },
+        ],
+      } as unknown as UIMessage,
+      {
+        id: "a2",
+        role: "assistant",
+        parts: [{ type: "text", text: "It is 18C and foggy." }],
+      },
+    ];
+
+    const model = await convertToModelMessages(ui);
+
+    const uiTokens = estimateTokens(
+      uiMessagesToCountUnits(ui, "openai"),
+    );
+    const modelTokens = estimateTokens(
+      modelMessagesToCountUnits(model, "openai"),
+    );
+
+    expect(uiTokens).toBe(modelTokens);
+    expect(uiTokens).toBeGreaterThan(0);
+  });
+});
+
+describe("imageProviderFor", () => {
+  it("maps provider types to cost families", () => {
+    expect(imageProviderFor("Anthropic")).toBe("anthropic");
+    expect(imageProviderFor("Bedrock")).toBe("anthropic");
+    expect(imageProviderFor("OpenAI")).toBe("openai");
+    expect(imageProviderFor("OpenRouter")).toBe("default");
+    expect(imageProviderFor("Google")).toBe("default");
+  });
+});
+
+// --- estimateOverheadTokens (ADR-0012 §Tier 1 (trigger projection)) --------
+
+import { z } from "zod";
+import { tool } from "ai";
+import { estimateOverheadTokens } from "./token-estimate.ts";
+
+describe("estimateOverheadTokens (ADR-0012 §Tier 1 (trigger projection))", () => {
+  it("counts the system prompt at char/4", () => {
+    const sys = "S".repeat(400);
+    expect(estimateOverheadTokens(sys, {})).toBe(100);
+  });
+
+  it("handles missing system prompt and tools", () => {
+    expect(estimateOverheadTokens(undefined, undefined)).toBe(0);
+  });
+
+  it("counts tool name, description, and serialized JSON schema", () => {
+    const sys = "system";
+    const base = estimateOverheadTokens(sys, {});
+    const withTool = estimateOverheadTokens(sys, {
+      searchDocuments: tool({
+        description:
+          "Searches the workspace document store and returns ranked matches.",
+        inputSchema: z.object({
+          query: z.string().describe("Full-text query string"),
+          limit: z.number().optional().describe("Maximum results to return"),
+        }),
+      }),
+    });
+    // Name + description alone are ~20 tokens; the serialized schema (with
+    // property names and descriptions) must push it well past that.
+    expect(withTool).toBeGreaterThan(base + 40);
+  });
+
+  it("falls back to a conservative flat cost for unserializable schemas", () => {
+    const tokens = estimateOverheadTokens("", {
+      weird: { description: "", inputSchema: 42 } as never,
+    });
+    // Either the fallback constant fired or some serialization succeeded —
+    // never zero, never a throw.
+    expect(tokens).toBeGreaterThanOrEqual(2); // ≥ name chars / 4
+    expect(Number.isFinite(tokens)).toBe(true);
+  });
+
+  it("scales with a realistic multi-tool agent (the 8888-vs-986 gap)", () => {
+    const sys = "You are a helpful agent.\n".repeat(40); // ~1k chars
+    const tools = Object.fromEntries(
+      Array.from({ length: 8 }, (_, i) => [
+        `tool_${i}`,
+        tool({
+          description:
+            "A realistically verbose tool description explaining inputs, outputs, constraints, and error behaviour for the model.",
+          inputSchema: z.object({
+            target: z.string().describe("The resource identifier to act on"),
+            options: z
+              .object({
+                recursive: z.boolean().optional(),
+                depth: z.number().optional(),
+                filter: z.string().optional(),
+              })
+              .optional(),
+          }),
+        }),
+      ]),
+    );
+    // The point of ADR-0012 §Tier 1 (trigger projection): this payload is large
+    // even with a short history.
+    expect(estimateOverheadTokens(sys, tools)).toBeGreaterThan(500);
+  });
+
+  it("is stable across repeated calls (schema-cache must not change counts)", () => {
+    const sys = "system prompt";
+    const tools = {
+      lookup: tool({
+        description: "Look something up by id.",
+        inputSchema: z.object({ id: z.string().describe("identifier") }),
+      }),
+    };
+    const first = estimateOverheadTokens(sys, tools);
+    // Same tool objects → WeakMap hit on the second call; the memoized schema
+    // length must reproduce the exact token count, never drift.
+    expect(estimateOverheadTokens(sys, tools)).toBe(first);
+  });
+});
diff --git a/apps/backend/src/runs/token-estimate.ts b/apps/backend/src/runs/token-estimate.ts
new file mode 100644
index 00000000..62ea28d3
--- /dev/null
+++ b/apps/backend/src/runs/token-estimate.ts
@@ -0,0 +1,557 @@
+/**
+ * The single token estimator (ADR-0012 §One estimator).
+ *
+ * Token counting lives in **exactly one** function — {@link estimateTokens} —
+ * over **one** neutral structure ({@link CountUnit}). Tier 1 operates on
+ * UIMessages and Tier 2 on ModelMessages; both normalize into `CountUnit[]` via
+ * the adapters here, so the two tiers can never diverge on a count
+ * (ADR-0012 §One estimator).
+ *
+ * Hard rules baked in:
+ *  - **char/4 applies to text only.** Tool-call inputs and tool-result outputs
+ *    are text-like to the model, so they fold into a unit's `text`. Image /
+ *    binary bytes are NEVER char/4'd — they go through the modality table
+ *    ({@link nonTextTokens}, ADR-0012 §Token estimation).
+ *  - **UI-only parts are excluded on both sides.** `reasoning`, `source-url`,
+ *    `source-document`, `step-start`, and `data-*` never reach the model, so
+ *    they are dropped by both adapters (ADR-0012 §One estimator).
+ *  - The estimate is content-only — **no per-message role framing overhead** —
+ *    so the total is invariant to how messages are grouped. That is what lets
+ *    the UIMessage and ModelMessage adapters agree exactly even though
+ *    `convertToModelMessages` splits one UI message into several model messages.
+ *
+ * The char/4 estimate runs every turn. The provider-reported
+ * `usage.inputTokens` from the prior turn acts as a corrective baseline when
+ * available (`Tier1Input.lastInputTokens` — threaded by the ADR-0012
+ * §Context-usage ring); until then the cold-start margin
+ * (ADR-0012 §Token estimation (cold-start margin)) compensates for under-counts.
+ */
+
+import {
+  asSchema,
+  type ModelMessage,
+  type Tool,
+  type ToolResultPart,
+  type DataContent,
+} from "ai";
+import type { PlatypusUIMessage } from "../types.ts";
+
+/** Number of characters approximated as one token (text only). */
+export const CHARS_PER_TOKEN = 4;
+
+/**
+ * Conservative flat cost for a non-text part whose true cost we cannot compute
+ * (unknown provider, missing image dimensions, non-image binary file). Over-
+ * counting beats overflow (ADR-0012 §Token estimation).
+ */
+export const DEFAULT_NONTEXT_TOKENS = 1200;
+
+/** OpenAI's flat cost for a `detail: "low"` image, independent of size. */
+const OPENAI_LOW_DETAIL_TOKENS = 85;
+
+/**
+ * No-dimension fallbacks for providers with a real per-image cost (ADR-0012 §Token estimation). When the
+ * bytes are absent (hosted http(s) URL — and note `inlineFileUrls` turns every
+ * stored attachment into one) or the header can't be parsed, we have no pixels
+ * to plug into the formula. The flat {@link DEFAULT_NONTEXT_TOKENS} (1200)
+ * under-counts a large image on these providers, defeating "over-count beats
+ * overflow" exactly where it matters. Use a pessimistic value near each
+ * provider's effective per-image ceiling after its own resize:
+ *  - Anthropic resizes to ≤1.15 MP ⇒ ~1600 tokens max.
+ *  - OpenAI high-detail tiling tops out a few thousand; 2000 is a safe ceiling
+ *    for the common ≤2048² case.
+ */
+const ANTHROPIC_NO_DIMS_TOKENS = 1600;
+const OPENAI_HIGH_NO_DIMS_TOKENS = 2000;
+
+/**
+ * The provider families with a known image-cost formula. Everything else maps
+ * to `"default"` and pays the conservative flat cost.
+ */
+export type ImageProvider = "anthropic" | "openai" | "default";
+
+/**
+ * A non-text, model-bound part reduced to what the estimator needs: which
+ * provider formula applies, and (when known) the decoded pixel dimensions.
+ * `width`/`height` undefined → the provider's missing-dimension fallback.
+ */
+export type NonTextPart = {
+  provider: ImageProvider;
+  width?: number;
+  height?: number;
+  /** OpenAI image detail hint. Unset is treated as `"high"` (over-count). */
+  detail?: "low" | "high";
+};
+
+/** Message role, neutral across UIMessage and ModelMessage shapes. */
+export type CountRole = "system" | "user" | "assistant" | "tool";
+
+/**
+ * The neutral counting structure. One per source message. `text` is the
+ * char/4'd blob (text parts + serialized tool input/output); `nonText` holds
+ * images/binaries counted via the modality table.
+ */
+export type CountUnit = {
+  role: CountRole;
+  text: string;
+  nonText: NonTextPart[];
+};
+
+/**
+ * UIMessage part `type`s that reach the model and are therefore counted. Kept
+ * as data so the test can assert the UI-only parts are excluded
+ * (ADR-0012 §One estimator).
+ * Tool parts are matched separately by the `tool-`/`dynamic-tool` prefix.
+ */
+export const MODEL_BOUND_UI_PART_TYPES = ["text", "file"] as const;
+
+// ---------------------------------------------------------------------------
+// The estimator (the one function — ADR-0012 §One estimator)
+// ---------------------------------------------------------------------------
+
+function nonTextTokens(part: NonTextPart): number {
+  const { provider, width, height, detail } = part;
+
+  if (width == null || height == null) {
+    // Dimensions unknown. OpenAI low-detail has a flat cost even without dims;
+    // providers with a real per-image cost get a pessimistic ceiling (ADR-0012 §Token estimation);
+    // everything else falls to the conservative default.
+    if (provider === "openai" && detail === "low")
+      return OPENAI_LOW_DETAIL_TOKENS;
+    if (provider === "anthropic") return ANTHROPIC_NO_DIMS_TOKENS;
+    if (provider === "openai") return OPENAI_HIGH_NO_DIMS_TOKENS;
+    return DEFAULT_NONTEXT_TOKENS;
+  }
+
+  switch (provider) {
+    case "anthropic":
+      // Anthropic's documented approximation: tokens ≈ (w × h) / 750.
+      return Math.ceil((width * height) / 750);
+    case "openai":
+      return detail === "low"
+        ? OPENAI_LOW_DETAIL_TOKENS
+        : openaiHighDetailTokens(width, height);
+    default:
+      return DEFAULT_NONTEXT_TOKENS;
+  }
+}
+
+/**
+ * OpenAI's high-detail tiling cost (gpt-4o family): fit within 2048×2048, scale
+ * the shortest side to 768, then 85 base + 170 per 512px tile.
+ */
+function openaiHighDetailTokens(w: number, h: number): number {
+  let width = w;
+  let height = h;
+  const longest = Math.max(width, height);
+  if (longest > 2048) {
+    const scale = 2048 / longest;
+    width = Math.round(width * scale);
+    height = Math.round(height * scale);
+  }
+  const shortest = Math.min(width, height);
+  if (shortest > 768) {
+    const scale = 768 / shortest;
+    width = Math.round(width * scale);
+    height = Math.round(height * scale);
+  }
+  const tiles = Math.ceil(width / 512) * Math.ceil(height / 512);
+  return 85 + 170 * tiles;
+}
+
+/**
+ * The single estimator. Sums char/4 of each unit's text plus the modality-table
+ * cost of each non-text part. Content-only, role-agnostic (see file header).
+ */
+export const estimateTokens = (units: CountUnit[]): number => {
+  let total = 0;
+  for (const unit of units) {
+    total += Math.ceil(unit.text.length / CHARS_PER_TOKEN);
+    for (const part of unit.nonText) total += nonTextTokens(part);
+  }
+  return total;
+};
+
+// ---------------------------------------------------------------------------
+// Shared helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Deterministic JSON with sorted keys, so the same value serializes to the same
+ * string from either adapter (the UIMessage and ModelMessage shapes must agree
+ * exactly — ADR-0012 §One estimator). Cheaper than guarding key order at every
+ * call site.
+ */
+export function stableStringify(value: unknown): string {
+  if (value === null || typeof value !== "object")
+    return JSON.stringify(value) ?? "";
+  if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`;
+  const obj = value as Record<string, unknown>;
+  const keys = Object.keys(obj).sort();
+  return `{${keys
+    .map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`)
+    .join(",")}}`;
+}
+
+function isImageMediaType(mediaType: string | undefined): boolean {
+  return typeof mediaType === "string" && mediaType.startsWith("image/");
+}
+
+/**
+ * Builds a {@link NonTextPart} for an image, parsing pixel dimensions from the
+ * bytes when available (ADR-0012 §Token estimation: a cheap header read, no full
+ * decode).
+ */
+function imagePart(
+  provider: ImageProvider,
+  bytes: Uint8Array | undefined,
+  detail?: "low" | "high",
+): NonTextPart {
+  const dims = bytes ? parseImageDimensions(bytes) : undefined;
+  return { provider, width: dims?.width, height: dims?.height, detail };
+}
+
+/** A non-image binary file: conservative flat cost, no formula. */
+function binaryPart(): NonTextPart {
+  return { provider: "default" };
+}
+
+// ---------------------------------------------------------------------------
+// Image dimension parsing (cheap header parse — PNG IHDR / JPEG SOF)
+// ---------------------------------------------------------------------------
+
+/**
+ * Reads pixel dimensions from PNG / JPEG headers without decoding the image.
+ * Returns undefined for unrecognized formats or truncated data — the caller
+ * then falls to the conservative constant (ADR-0012 §Token estimation).
+ */
+export function parseImageDimensions(
+  bytes: Uint8Array,
+): { width: number; height: number } | undefined {
+  // PNG: 8-byte signature, then IHDR chunk with width@16, height@20 (BE).
+  if (
+    bytes.length >= 24 &&
+    bytes[0] === 0x89 &&
+    bytes[1] === 0x50 &&
+    bytes[2] === 0x4e &&
+    bytes[3] === 0x47
+  ) {
+    const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    return { width: view.getUint32(16), height: view.getUint32(20) };
+  }
+
+  // JPEG: 0xFFD8 start, then walk segment markers to the SOF that carries dims.
+  if (bytes.length >= 4 && bytes[0] === 0xff && bytes[1] === 0xd8) {
+    let offset = 2;
+    while (offset + 9 < bytes.length) {
+      if (bytes[offset] !== 0xff) {
+        offset++;
+        continue;
+      }
+      const marker = bytes[offset + 1];
+      // 0xFF fill bytes pad before a real marker; consume one and re-read so a
+      // run of fill bytes doesn't get mistaken for a segment.
+      if (marker === 0xff) {
+        offset++;
+        continue;
+      }
+      // 0xFF00 is a stuffed data byte inside entropy-coded data, not a marker.
+      if (marker === 0x00) {
+        offset += 2;
+        continue;
+      }
+      // SOF0..SOF15 carry frame dimensions, excluding DHT(C4)/JPG(C8)/DAC(CC).
+      const isSof =
+        marker >= 0xc0 &&
+        marker <= 0xcf &&
+        marker !== 0xc4 &&
+        marker !== 0xc8 &&
+        marker !== 0xcc;
+      if (isSof) {
+        const view = new DataView(
+          bytes.buffer,
+          bytes.byteOffset,
+          bytes.byteLength,
+        );
+        const height = view.getUint16(offset + 5);
+        const width = view.getUint16(offset + 7);
+        return { width, height };
+      }
+      // Standalone markers with no length payload: SOI(D8), EOI(D9),
+      // RSTn(D0-D7), TEM(01). Skip the 2-byte marker.
+      if (
+        marker === 0xd8 ||
+        marker === 0xd9 ||
+        marker === 0x01 ||
+        (marker >= 0xd0 && marker <= 0xd7)
+      ) {
+        offset += 2;
+        continue;
+      }
+      const segLength = (bytes[offset + 2] << 8) | bytes[offset + 3];
+      if (segLength < 2) return undefined;
+      offset += 2 + segLength;
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Upper bound on bytes decoded from a data URL for header parsing. PNG
+ * dimensions live in the first 24 bytes; a JPEG SOF marker is almost always
+ * within the first few KB. Decoding only a 64 KB prefix avoids materializing a
+ * multi-MB image on every estimation pass — we never need the pixel data, only
+ * the header. base64 packs 3 bytes per 4 chars, so cap the input accordingly.
+ */
+const HEADER_DECODE_MAX_BYTES = 64 * 1024;
+const HEADER_DECODE_MAX_B64_CHARS = Math.ceil(HEADER_DECODE_MAX_BYTES / 3) * 4;
+
+/**
+ * Decodes the bytes behind a UIMessage file URL when it is a base64 data URL.
+ * Hosted (http/https) URLs return undefined — we have no bytes in hand, so the
+ * caller falls to the conservative constant. Only a bounded prefix is decoded
+ * since the caller only reads image headers.
+ */
+function bytesFromUrl(url: string): Uint8Array | undefined {
+  const match = /^data:[^;,]*;base64,(.*)$/s.exec(url);
+  if (!match) return undefined;
+  try {
+    const b64 = match[1].slice(0, HEADER_DECODE_MAX_B64_CHARS);
+    return new Uint8Array(Buffer.from(b64, "base64"));
+  } catch {
+    return undefined;
+  }
+}
+
+/** Normalizes the various ModelMessage byte containers into a Uint8Array. */
+function bytesFromDataContent(data: DataContent | URL): Uint8Array | undefined {
+  if (typeof data === "string") return bytesFromUrl(data);
+  if (data instanceof URL) return undefined;
+  if (data instanceof Uint8Array) return data;
+  if (data instanceof ArrayBuffer) return new Uint8Array(data);
+  if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
+    return new Uint8Array(data);
+  }
+  return undefined;
+}
+
+// ---------------------------------------------------------------------------
+// Tier 1 adapter — UIMessage → CountUnit (one unit per message)
+// ---------------------------------------------------------------------------
+
+function uiMessageToCountUnit(
+  message: PlatypusUIMessage,
+  provider: ImageProvider,
+): CountUnit {
+  let text = "";
+  const nonText: NonTextPart[] = [];
+
+  for (const part of message.parts ?? []) {
+    const type = part.type;
+
+    if (type === "text") {
+      text += (part as { text: string }).text;
+      continue;
+    }
+
+    if (type === "file") {
+      const file = part as { mediaType?: string; url: string };
+      const bytes = bytesFromUrl(file.url);
+      if (isImageMediaType(file.mediaType)) {
+        nonText.push(imagePart(provider, bytes));
+      } else {
+        nonText.push(binaryPart());
+      }
+      continue;
+    }
+
+    // Tool invocations (`tool-<name>` and `dynamic-tool`) are model-bound and
+    // text-like: fold their input + output into the char/4 blob.
+    if (type === "dynamic-tool" || type.startsWith("tool-")) {
+      const tool = part as {
+        input?: unknown;
+        output?: unknown;
+        errorText?: string;
+      };
+      if (tool.input !== undefined) text += stableStringify(tool.input);
+      // Count the output OR the error text — `convertToModelMessages` maps an
+      // `output-error` UI part to a `tool-result` with `output: {type:"error-text",
+      // value: errorText}`, which the model adapter counts via `toolResultOutputText`.
+      // Skipping errorText here would make the UI side count 0 for a failed tool call
+      // while the model side counts the error string — breaking the §One estimator
+      // equality (a tier could fire on a number the other never sees).
+      if (tool.output !== undefined) {
+        text += stableStringify(tool.output);
+      } else if (tool.errorText !== undefined) {
+        text += stableStringify(tool.errorText);
+      }
+      continue;
+    }
+
+    // Everything else (reasoning, source-url, source-document, step-start,
+    // data-*) is UI-only and excluded on both sides (ADR-0012 §One estimator).
+  }
+
+  return { role: message.role, text, nonText };
+}
+
+/** Tier 1 adapter: UIMessages → neutral count units. */
+export function uiMessagesToCountUnits(
+  messages: PlatypusUIMessage[],
+  provider: ImageProvider = "default",
+): CountUnit[] {
+  return messages.map((m) => uiMessageToCountUnit(m, provider));
+}
+
+// ---------------------------------------------------------------------------
+// Tier 2 adapter — ModelMessage → CountUnit (one unit per message)
+// ---------------------------------------------------------------------------
+
+/**
+ * Extracts the model-visible string from a tool-result output wrapper. Only two
+ * behaviours exist: `execution-denied` carries a `reason`; every other variant
+ * (`text` / `error-text` / `json` / `error-json` / `content`) carries a `value`
+ * that is char/4'd via `stableStringify` — mirroring the UI adapter, which folds
+ * the raw output the same way (the old per-label switch collapsed to these
+ * two and carried an unreachable `default`).
+ */
+function toolResultOutputText(output: ToolResultPart["output"]): string {
+  return output.type === "execution-denied"
+    ? stableStringify(output.reason ?? "")
+    : stableStringify(output.value);
+}
+
+function modelMessageToCountUnit(
+  message: ModelMessage,
+  provider: ImageProvider,
+): CountUnit {
+  const role = message.role;
+  let text = "";
+  const nonText: NonTextPart[] = [];
+
+  const { content } = message;
+  if (typeof content === "string") {
+    return { role, text: content, nonText };
+  }
+
+  for (const part of content) {
+    switch (part.type) {
+      case "text":
+        text += part.text;
+        break;
+      case "tool-call":
+        text += stableStringify(part.input);
+        break;
+      case "tool-result":
+        text += toolResultOutputText(part.output);
+        break;
+      case "image": {
+        const img = part;
+        nonText.push(imagePart(provider, bytesFromDataContent(img.image)));
+        break;
+      }
+      case "file": {
+        const file = part;
+        if (isImageMediaType(file.mediaType)) {
+          nonText.push(imagePart(provider, bytesFromDataContent(file.data)));
+        } else {
+          nonText.push(binaryPart());
+        }
+        break;
+      }
+      // reasoning / tool-approval-* are UI-only or control parts — excluded.
+      default:
+        break;
+    }
+  }
+
+  return { role, text, nonText };
+}
+
+/** Tier 2 adapter: ModelMessages → neutral count units. */
+export function modelMessagesToCountUnits(
+  messages: ModelMessage[],
+  provider: ImageProvider = "default",
+): CountUnit[] {
+  return messages.map((m) => modelMessageToCountUnit(m, provider));
+}
+
+// ---------------------------------------------------------------------------
+// Per-turn overhead — system prompt + tool schemas
+// (ADR-0012 §Tier 1 (trigger projection))
+// ---------------------------------------------------------------------------
+
+/**
+ * Flat fallback for a tool whose input schema cannot be serialized (e.g. a
+ * provider-defined tool with no JSON-schema representation). Conservative —
+ * over-counting beats overflow.
+ */
+export const TOOL_SCHEMA_FALLBACK_TOKENS = 200;
+
+/**
+ * Serialized-schema char length cached per input-schema object. The
+ * `asSchema(...) → stableStringify` conversion is the expensive part of overhead
+ * estimation and a tool's schema object is stable across turns, so memoize it.
+ * A WeakMap keyed by the schema object never pins a tool that goes out of scope.
+ */
+const schemaLenCache = new WeakMap<object, number>();
+
+/**
+ * Estimates the tokens of the per-turn payload that is NOT in the message
+ * history: the rendered system prompt plus every tool's name, description, and
+ * JSON input schema — all sent to the model on every turn, and the dominant
+ * cause of the trigger under-count on tool-bearing agents
+ * (ADR-0012 §Tier 1 (trigger projection)) (observed 8888
+ * provider-reported vs ~986 message-only). Same char/4 rule as the single
+ * estimator; the result feeds `Tier1Input.overheadTokens`.
+ */
+export function estimateOverheadTokens(
+  systemPrompt: string | undefined,
+  tools: Record<string, Tool> | undefined,
+): number {
+  let tokens = Math.ceil((systemPrompt ?? "").length / CHARS_PER_TOKEN);
+  for (const [name, tool] of Object.entries(tools ?? {})) {
+    const t = tool as { description?: string; inputSchema?: unknown };
+    let schemaLen = 0;
+    if (t.inputSchema != null) {
+      const key = typeof t.inputSchema === "object" ? t.inputSchema : undefined;
+      const cached = key ? schemaLenCache.get(key) : undefined;
+      if (cached !== undefined) {
+        schemaLen = cached;
+      } else {
+        try {
+          // asSchema is the SDK's own conversion to the wire-format JSON schema.
+          schemaLen = stableStringify(
+            asSchema(t.inputSchema as never).jsonSchema,
+          ).length;
+          if (key) schemaLenCache.set(key, schemaLen);
+        } catch {
+          tokens += TOOL_SCHEMA_FALLBACK_TOKENS;
+        }
+      }
+    }
+    // Concatenated length == sum of lengths, so this stays numerically identical
+    // to folding the schema string into `text` before the single char/4 divide.
+    const baseLen = (name + (t.description ?? "")).length + schemaLen;
+    tokens += Math.ceil(baseLen / CHARS_PER_TOKEN);
+  }
+  return tokens;
+}
+
+/**
+ * Maps a provider `providerType` (as stored on the provider row) to the image
+ * cost family. Bedrock most commonly serves Anthropic models, so it maps to
+ * `anthropic`; OpenRouter is heterogeneous and maps to `default`.
+ */
+export function imageProviderFor(providerType: string): ImageProvider {
+  switch (providerType) {
+    case "Anthropic":
+    case "Bedrock":
+      return "anthropic";
+    case "OpenAI":
+      return "openai";
+    default:
+      return "default";
+  }
+}
diff --git a/apps/backend/src/services/chat-execution.test.ts b/apps/backend/src/services/chat-execution.test.ts
index fbdb7645..7291ff93 100644
--- a/apps/backend/src/services/chat-execution.test.ts
+++ b/apps/backend/src/services/chat-execution.test.ts
@@ -65,14 +65,27 @@ vi.mock("@ai-sdk/mcp", () => ({
   auth: vi.fn(),
 }));
 
+// Partial mock of "ai": only `generateText` is replaced (used by the compaction
+// summarizer). `createIdGenerator` and the rest stay real via importActual.
+const { mockGenerateText } = vi.hoisted(() => ({
+  mockGenerateText: vi.fn(),
+}));
+vi.mock("ai", async (importActual) => {
+  const actual = await importActual<typeof import("ai")>();
+  return { ...actual, generateText: mockGenerateText };
+});
+
 import {
   prepareChatTurn,
+  buildCompactionRuntime,
   NotFoundError,
   ValidationError,
   createToolHeartbeat,
   shouldInjectNativeSearch,
 } from "./chat-execution.ts";
 import { createInMemoryChatTurnQueries } from "./chat-execution.test-fixtures.ts";
+import { logger } from "../logger.ts";
+import { contextWindowResolver } from "../runs/context-window.ts";
 
 const baseProvider = {
   id: "p1",
@@ -650,4 +663,110 @@ describe("chat-execution", () => {
       ).toBe(true);
     });
   });
+
+  describe("buildCompactionRuntime summarize (ADR-0012 §Summarizer hardening / review Fix B)", () => {
+    const buildRuntime = (signal?: AbortSignal, onActivity?: () => void) =>
+      buildCompactionRuntime({
+        chatId: "chat-1",
+        provider: baseProvider,
+        resolvedModelId: "gpt-4",
+        opened: {
+          languageModel: vi.fn(() => ({ modelId: "task-model" })),
+        } as never,
+        onActivity,
+        signal,
+      });
+
+    beforeEach(() => {
+      mockGenerateText.mockReset();
+      vi.spyOn(contextWindowResolver, "resolve").mockResolvedValue({
+        contextWindow: 128_000,
+        maxOutputTokens: 4096,
+        source: "registry",
+      } as never);
+    });
+
+    afterEach(() => {
+      vi.restoreAllMocks();
+      vi.useRealTimers();
+    });
+
+    it("threads the abort signal, output ceiling, and ordered prompt into generateText", async () => {
+      mockGenerateText.mockResolvedValue({
+        text: "SUMMARY",
+        usage: {},
+        finishReason: "stop",
+      });
+      const controller = new AbortController();
+      const runtime = await buildRuntime(controller.signal);
+
+      const out = await runtime.summarize("history text");
+
+      expect(out).toBe("SUMMARY");
+      expect(mockGenerateText).toHaveBeenCalledTimes(1);
+      const arg = mockGenerateText.mock.calls[0][0] as {
+        maxOutputTokens?: number;
+        abortSignal?: AbortSignal;
+        prompt?: string;
+        system: string;
+      };
+      expect(arg.maxOutputTokens).toBe(4000);
+      expect(arg.abortSignal).toBe(controller.signal);
+      expect(arg.prompt).toBe("history text");
+      expect(arg.system).toContain("context checkpoint compaction");
+      // Sections ordered most-critical-first so truncation drops the tail
+      // (file/tool detail), not intent or next step.
+      const intentIdx = arg.system.indexOf("Intent & open requests");
+      const nextStepIdx = arg.system.indexOf("Current state & next step");
+      const filesIdx = arg.system.indexOf("Files & tools touched");
+      expect(intentIdx).toBeGreaterThanOrEqual(0);
+      expect(nextStepIdx).toBeGreaterThan(intentIdx);
+      expect(filesIdx).toBeGreaterThan(nextStepIdx);
+    });
+
+    it("warns but still returns the summary when the output ceiling is hit", async () => {
+      mockGenerateText.mockResolvedValue({
+        text: "TRUNCATED",
+        usage: {},
+        finishReason: "length",
+      });
+      const warn = vi.spyOn(logger, "warn").mockImplementation(() => {});
+      const runtime = await buildRuntime();
+
+      const out = await runtime.summarize("x");
+
+      expect(out).toBe("TRUNCATED");
+      expect(warn).toHaveBeenCalledWith(
+        expect.objectContaining({ maxTokens: 4000 }),
+        expect.stringContaining("maxOutputTokens ceiling"),
+      );
+    });
+
+    it("bumps onActivity on each heartbeat tick while summarize runs, then stops", async () => {
+      let resolveGen: (v: unknown) => void = () => {};
+      mockGenerateText.mockImplementation(
+        () =>
+          new Promise((resolve) => {
+            resolveGen = resolve;
+          }),
+      );
+      const onActivity = vi.fn();
+      const runtime = await buildRuntime(undefined, onActivity);
+
+      vi.useFakeTimers();
+      const pending = runtime.summarize("x");
+
+      // Two heartbeat intervals (10 s each) elapse mid-call.
+      await vi.advanceTimersByTimeAsync(25_000);
+      expect(onActivity).toHaveBeenCalledTimes(2);
+
+      resolveGen({ text: "S", usage: {}, finishReason: "stop" });
+      await pending;
+
+      // Interval cleared in the finally block — no further bumps.
+      onActivity.mockClear();
+      await vi.advanceTimersByTimeAsync(30_000);
+      expect(onActivity).not.toHaveBeenCalled();
+    });
+  });
 });
diff --git a/apps/backend/src/services/chat-execution.ts b/apps/backend/src/services/chat-execution.ts
index 6d5ee3d6..20dea580 100644
--- a/apps/backend/src/services/chat-execution.ts
+++ b/apps/backend/src/services/chat-execution.ts
@@ -3,7 +3,7 @@ import {
   type MCPClient,
 } from "@ai-sdk/mcp";
 import { openProvider } from "./provider.ts";
-import { and, eq, or, inArray } from "drizzle-orm";
+import { and, eq, or, inArray, sql } from "drizzle-orm";
 import { db } from "../index.ts";
 import {
   agent as agentTable,
@@ -27,11 +27,46 @@ import {
   type MemorySummary,
 } from "./memory-retrieval.ts";
 import type { Provider, Skill } from "@platypus/schemas";
-import type { LanguageModel, Tool } from "ai";
+import {
+  createIdGenerator,
+  generateText,
+  type LanguageModel,
+  type Tool,
+} from "ai";
 import { logger } from "../logger.ts";
 import { buildMcpTransportConfig } from "./mcp-oauth-provider.ts";
 import { inlineFileUrls } from "../storage/utils.ts";
 import type { PlatypusUIMessage } from "../types.ts";
+import { chat as chatTable } from "../db/schema.ts";
+import {
+  contextWindowResolver,
+  DEFAULT_CONTEXT_WINDOW,
+} from "../runs/context-window.ts";
+import {
+  estimateTokens,
+  estimateOverheadTokens,
+  imageProviderFor,
+  uiMessagesToCountUnits,
+  type ImageProvider,
+} from "../runs/token-estimate.ts";
+import {
+  applyTier1Compaction,
+  affectedBelowWatermark,
+  buildCompactionTraceMessage,
+  buildTier2PrepareStep,
+  computeBudget,
+  drizzleCompactionStore,
+  invalidateCompaction,
+  DEFAULT_COMPACTION_CONFIG,
+  setCompactionDirty,
+  type Budget,
+  type CompactionConfig,
+  type CompactionState,
+  type CompactionTrace,
+  type Summarize,
+  type Tier2Context,
+} from "../runs/compaction.ts";
+import type { RecoveryContext } from "../runs/recovery.ts";
 
 // --- Errors ---
 
@@ -91,6 +126,13 @@ type GenerationConfig = {
  * messages) — those arrive as separate `PrepareChatTurnInput` fields.
  */
 export type ChatTurnRequest = {
+  /**
+   * Chat id. Present for interactive chat turns (the chatSubmit payload);
+   * absent for headless callers (triggers, sub-agents) whose `request` carries
+   * no chat. Tier 1 compaction keys on it — see the skip guard in
+   * `prepareChatTurn` (ADR-0012 §Sub-agents: headless runs are Tier 2 only).
+   */
+  id?: string;
   agentId?: string;
   providerId?: string;
   modelId?: string;
@@ -118,6 +160,12 @@ export type ChatTurn = {
     presencePenalty?: number;
     seed?: number;
   };
+  /**
+   * Set when Tier 1 compaction fired this turn (ADR-0012 §Compaction trace in the timeline). agent-runner emits
+   * a synthetic compact_context tool-call + tool-result pair into the stream so
+   * the compaction is visible in the chat timeline.
+   */
+  compactionTrace?: CompactionTrace;
   resolved: {
     agentId?: string;
     providerId: string;
@@ -129,7 +177,23 @@ export type ChatTurn = {
     frequencyPenalty?: number;
     presencePenalty?: number;
     seed?: number;
+    /** Resolved context window for the main model (ADR-0012 §Context-usage ring, ADR-0012 §Per-message stats). */
+    contextWindow: number;
+    /** True when contextWindow fell to the conservative default (ADR-0012 §Context-usage ring: ring → neutral). */
+    contextWindowIsDefault: boolean;
   };
+  /**
+   * Context-overflow recovery wiring (ADR-0012 §Recovery, ADR-0012 §Recovery is the net). Always present — recovery is
+   * the safety net and stays on even when proactive compaction is disabled.
+   * agent-runner wraps the model with the recovery middleware using this.
+   */
+  recovery: RecoveryContext;
+  /**
+   * Tier 2 in-turn compaction config (ADR-0012 §Tier 2). Null when proactive compaction is
+   * disabled (ADR-0012 §Config & kill switch or agent override). agent-runner builds the
+   * prepareStep callback from this and wires it into streamText/generateText.
+   */
+  tier2: Tier2Context | null;
   dispose: () => Promise<void>;
 };
 
@@ -161,6 +225,20 @@ export type PrepareChatTurnInput = {
    * yield bumps invoke with no event (timer-only).
    */
   onActivity?: (event?: ToolActivityEvent) => void;
+  /**
+   * Messages as they were in the DB BEFORE this submission's `ChatSink.onStart`
+   * overwrote them — the ADR-0012 §Summary invalidation baseline for detecting edits below the watermark
+   * (ADR-0012 §Summary invalidation). Loaded by agent-runner before calling onStart. When absent the ADR-0012 §Summary invalidation
+   * check falls back to a DB read that now returns the post-overwrite state.
+   */
+  priorMessages?: PlatypusUIMessage[];
+  /**
+   * Run abort signal from the run registry. Threaded into the compaction
+   * summarizer so a cancelled or timed-out run aborts the in-flight
+   * `generateText` (review Fix B). Optional: callers without a registry-backed
+   * run (tests, ad-hoc) omit it and the summarize call simply runs uncancelled.
+   */
+  signal?: AbortSignal;
 };
 
 /**
@@ -418,6 +496,418 @@ export const drizzleChatTurnQueries: ChatTurnQueries = {
   },
 };
 
+// --- Tier 1 context compaction (ADR-0012) ---
+
+const EMPTY_COMPACTION_STATE: CompactionState = {
+  version: 0,
+  summaryWatermark: null,
+  contextSummary: null,
+  compactionDirty: false,
+};
+
+/**
+ * Resolves the effective global compaction config from DEFAULT_COMPACTION_CONFIG
+ * + env overrides (ADR-0012 §Config & kill switch). Extracted so both
+ * buildCompactionRuntime and the context-window endpoint (which surfaces
+ * keepRecentMessages to the force-compact confirm gate) share one source of
+ * truth. Pure — depends only on process.env.
+ */
+export function resolveCompactionConfig(): CompactionConfig {
+  const config = { ...DEFAULT_COMPACTION_CONFIG };
+  // Global kill switch (ADR-0012 §Config & kill switch) gates proactive compaction; recovery is unaffected.
+  if (process.env.COMPACTION_ENABLED === "false") {
+    config.compactionEnabled = false;
+  }
+  // Optional env overrides for the global ceiling (ADR-0012 §Config & kill switch). Unset/blank/invalid →
+  // the DEFAULT_COMPACTION_CONFIG value stands, so production behavior is
+  // unchanged. Intended for tuning the trigger on test deployments without a
+  // code change. Keep targetRatio < triggerRatio or compaction re-fires every
+  // turn (the thrash trap).
+  // Reads + RANGE-VALIDATES a numeric env override (ADR-0012 §Config & kill switch). An out-of-range or
+  // non-finite value is rejected (warn + fall back to the default) rather than
+  // silently applied: the old `Number.isFinite`-only check let `0` and negatives
+  // through, so `COMPACTION_KEEP_RECENT=0` summarized the current message away
+  // and `COMPACTION_TRIGGER_RATIO=0` fired on empty chats.
+  const numEnv = (
+    name: string,
+    raw: string | undefined,
+    opts: { min?: number; max?: number; integer?: boolean } = {},
+  ): number | undefined => {
+    if (raw == null || raw === "") return undefined;
+    let n = Number(raw);
+    let invalid = !Number.isFinite(n);
+    if (!invalid && opts.integer) n = Math.floor(n);
+    if (!invalid && opts.min !== undefined && n < opts.min) invalid = true;
+    if (!invalid && opts.max !== undefined && n > opts.max) invalid = true;
+    if (invalid) {
+      logger.warn(
+        { env: name, raw, ...opts },
+        "ignoring out-of-range compaction env override; using default",
+      );
+      return undefined;
+    }
+    return n;
+  };
+  const RATIO = { min: 0.01, max: 1 };
+  config.triggerRatio =
+    numEnv(
+      "COMPACTION_TRIGGER_RATIO",
+      process.env.COMPACTION_TRIGGER_RATIO,
+      RATIO,
+    ) ?? config.triggerRatio;
+  config.targetRatio =
+    numEnv(
+      "COMPACTION_TARGET_RATIO",
+      process.env.COMPACTION_TARGET_RATIO,
+      RATIO,
+    ) ?? config.targetRatio;
+  config.reserveRatio =
+    numEnv("COMPACTION_RESERVE_RATIO", process.env.COMPACTION_RESERVE_RATIO, {
+      min: 0,
+      max: 0.9,
+    }) ?? config.reserveRatio;
+  config.keepRecentMessages =
+    numEnv("COMPACTION_KEEP_RECENT", process.env.COMPACTION_KEEP_RECENT, {
+      min: 1,
+      integer: true,
+    }) ?? config.keepRecentMessages;
+  config.minPrunableChars =
+    numEnv(
+      "COMPACTION_MIN_PRUNABLE_CHARS",
+      process.env.COMPACTION_MIN_PRUNABLE_CHARS,
+      {
+        min: 1,
+        integer: true,
+      },
+    ) ?? config.minPrunableChars;
+  config.minRecentPrunableChars =
+    numEnv(
+      "COMPACTION_MIN_RECENT_PRUNABLE_CHARS",
+      process.env.COMPACTION_MIN_RECENT_PRUNABLE_CHARS,
+      { min: 1, integer: true },
+    ) ?? config.minRecentPrunableChars;
+  // ADR-0012 §Stage 0 — context editing. Disabled via
+  // COMPACTION_CONTEXT_EDITING_ENABLED=false; recency/size gates tunable.
+  if (process.env.COMPACTION_CONTEXT_EDITING_ENABLED === "false") {
+    config.contextEditingEnabled = false;
+  }
+  config.keepRecentToolResults =
+    numEnv(
+      "COMPACTION_KEEP_RECENT_TOOL_RESULTS",
+      process.env.COMPACTION_KEEP_RECENT_TOOL_RESULTS,
+      { min: 0, integer: true },
+    ) ?? config.keepRecentToolResults;
+  config.minEditableToolChars =
+    numEnv(
+      "COMPACTION_MIN_EDITABLE_TOOL_CHARS",
+      process.env.COMPACTION_MIN_EDITABLE_TOOL_CHARS,
+      { min: 1, integer: true },
+    ) ?? config.minEditableToolChars;
+
+  // Hysteresis backstop (ADR-0012 §Tier 1 (hysteresis)): target must stay below trigger or
+  // compaction re-fires every turn (ADR-0012 §Tier 1 hysteresis). The earlier runtime clamp was
+  // dropped when per-agent config was removed (ADR-0012 §Config & kill switch); restore it here so an operator who
+  // sets COMPACTION_TARGET_RATIO >= COMPACTION_TRIGGER_RATIO still runs safely.
+  if (config.targetRatio >= config.triggerRatio) {
+    const clamped = config.triggerRatio * 0.9;
+    logger.warn(
+      {
+        targetRatio: config.targetRatio,
+        triggerRatio: config.triggerRatio,
+        clamped,
+      },
+      "COMPACTION_TARGET_RATIO >= COMPACTION_TRIGGER_RATIO; clamping target to triggerRatio*0.9 (hysteresis)",
+    );
+    config.targetRatio = clamped;
+  }
+  return config;
+}
+
+/**
+ * Loads the canonical (raw) persisted history for a chat. Exported so
+ * agent-runner can snapshot it BEFORE `ChatSink.onStart` overwrites the row —
+ * that snapshot is the ADR-0012 §Summary invalidation baseline (ADR-0012 §Summary invalidation: onStart runs before prepareChatTurn,
+ * so a read inside applyTier1IfNeeded would see the just-submitted messages).
+ */
+export async function loadChatMessages(
+  chatId: string,
+): Promise<PlatypusUIMessage[]> {
+  const rows = await db
+    .select({ messages: chatTable.messages })
+    .from(chatTable)
+    .where(eq(chatTable.id, chatId))
+    .limit(1);
+  return (rows[0]?.messages as PlatypusUIMessage[] | null) ?? [];
+}
+
+/**
+ * Newest-first scan for the last assistant message carrying a POSITIVE
+ * provider-reported `contextTokens` (the ADR-0012 §Context-usage ring stat). Skips two messages that would
+ * otherwise shadow the real baseline:
+ *  - the ADR-0012 §Force-compact on demand standalone trace message (assistant role, no `metadata.stats`) — ADR-0012 §Tier 1 (hysteresis);
+ *  - a turn from a usage-less provider stamped `contextTokens = 0` — ADR-0012 §Tier 1 (trigger projection).
+ * Either would make the Tier 1 projection drop the corrective baseline (and, for
+ * the 0 case, the cold-start margin too).
+ */
+function findLastInputTokens(
+  messages: PlatypusUIMessage[],
+): number | undefined {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role !== "assistant") continue;
+    const ct = (
+      messages[i].metadata as { stats?: { contextTokens?: number } } | undefined
+    )?.stats?.contextTokens;
+    if (typeof ct === "number" && ct > 0) return ct;
+  }
+  return undefined;
+}
+
+/**
+ * Everything the compaction machinery needs that is resolved once per turn:
+ * the budget (from the resolved context window), the effective config, the
+ * summarizer, and the summarizer's own window (ADR-0012 §Tier 1 (summarizer model & map-reduce)). Shared by Tier 1
+ * and the recovery middleware (ADR-0012 §Recovery) so the two never disagree.
+ */
+type CompactionRuntime = {
+  budget: Budget;
+  config: CompactionConfig;
+  imageProvider: ImageProvider;
+  summarize: Summarize;
+  summarizerWindow?: number;
+  /** Resolved context window for the main model (ADR-0012 §Context-usage ring). */
+  contextWindow: number;
+  /** True when the window fell to the conservative default (ADR-0012 §Context-usage ring: ring → neutral). */
+  contextWindowIsDefault: boolean;
+};
+
+/**
+ * Builds the per-turn compaction runtime. Never throws: a failed window
+ * resolution falls back to the conservative default so recovery (ADR-0012 §Recovery is the net) always
+ * has a working configuration.
+ */
+/** Safety ceiling on summarizer output (ADR-0012 §Summarizer hardening). Prevents a runaway
+ * model from producing a summary longer than its input. The system prompt
+ * hard-limits to 1500 tokens; this 4000 backstop catches models that ignore
+ * the instruction (e.g. qwen36 on large tool-heavy inputs). */
+const SUMMARIZE_MAX_OUTPUT_TOKENS = 4000;
+
+/** Heartbeat interval while the summarizer runs (ADR-0012 §Summarizer hardening). Resets the
+ * per-step stall watchdog so a slow summarize call is not misidentified as a
+ * frozen run and killed before it returns. */
+const SUMMARIZE_HEARTBEAT_INTERVAL_MS = 10_000;
+
+export async function buildCompactionRuntime(args: {
+  chatId?: string;
+  provider: Provider;
+  resolvedModelId: string;
+  opened: ReturnType<typeof openProvider>;
+  /** When present, called every ~10 s during `summarize` to keep the per-step
+   *  stall watchdog alive (ADR-0012 §Summarizer hardening). */
+  onActivity?: () => void;
+  /** Run abort signal, threaded into the summarizer `generateText` so a
+   *  cancelled / per-run-timed-out run aborts the call instead of leaking it
+   *  past the heartbeat-suppressed per-step watchdog (review Fix B). */
+  signal?: AbortSignal;
+}): Promise<CompactionRuntime> {
+  const { chatId, provider, resolvedModelId, opened, onActivity, signal } =
+    args;
+
+  const config = resolveCompactionConfig();
+
+  // ADR-0012 §Window resolution: resolve both windows concurrently (they are independent).
+  const taskModelId = provider.taskModelId || resolvedModelId;
+  const [mainWindow, summarizerWindowResult] = await Promise.all([
+    contextWindowResolver.resolve(provider, resolvedModelId).catch((error) => {
+      logger.error(
+        { error, chatId, resolvedModelId },
+        "context window resolution failed; using conservative default",
+      );
+      return null;
+    }),
+    contextWindowResolver.resolve(provider, taskModelId).catch(() => null),
+  ]);
+
+  const contextWindow = mainWindow?.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
+  const maxOutputTokens = mainWindow?.maxOutputTokens;
+  const budget = computeBudget(contextWindow, maxOutputTokens, config);
+
+  const summarizerWindow = summarizerWindowResult
+    ? computeBudget(
+        summarizerWindowResult.contextWindow,
+        summarizerWindowResult.maxOutputTokens,
+        config,
+      ).inputBudget
+    : undefined;
+
+  // Summarizer uses the provider's task model, falling back to the main model
+  // when unset (ADR-0012 §Tier 1 (summarizer model)). generateText is one-shot, no tools.
+  const summarize = async (text: string): Promise<string> => {
+    const startedAt = Date.now();
+    // ADR-0012 §Summarizer hardening: keep the per-step stall watchdog alive while the
+    // summarizer runs. Tier-1 compaction is legitimate long work, not a stall;
+    // without this ping the 120 s watchdog fires and kills the run.
+    const heartbeat = onActivity
+      ? setInterval(onActivity, SUMMARIZE_HEARTBEAT_INTERVAL_MS)
+      : null;
+    try {
+      const result = await generateText({
+        model: opened.languageModel(taskModelId),
+        // ADR-0012 §Summarizer hardening: structured handoff prompt — sections reduce loss
+        // across repeated re-compactions (Codex CLI pattern); explicit concise
+        // instruction + "aim under ~1500 tokens" pairs with the output ceiling.
+        // Sections are ordered most-critical-first: if the output is truncated
+        // at the ceiling (finishReason === "length"), the tail that drops is
+        // the least resume-critical (file/tool detail), not intent or next step.
+        system: `You are performing a context checkpoint compaction. Another instance of this assistant will resume using ONLY your summary plus the most recent messages — earlier history will be gone. Write a dense markdown handoff under these headings, in this order (omit one only if truly empty). Front-load the most important facts within each section — if you run long, later detail may be cut:
+
+- **Intent & open requests** — what the user wants, the latest explicit request, pending tasks.
+- **Current state & next step** — where things stand and the immediate next action.
+- **Decisions & facts** — conclusions, confirmed values/IDs/paths, constraints and user preferences (preserve any security-relevant instruction verbatim).
+- **Files & tools touched** — what was read/changed and why.
+
+If a prior summary appears in the history, integrate it — don't drop facts it captured. Be concise: hard limit 1500 tokens maximum. Output only the summary.`,
+        prompt: text,
+        // ADR-0012 §Summarizer hardening: hard ceiling prevents a runaway model from
+        // producing a summary longer than its input. Prompt hard-limits to
+        // 1500 tokens; 4000 backstop catches models that ignore the instruction.
+        maxOutputTokens: SUMMARIZE_MAX_OUTPUT_TOKENS,
+        // Fix B (review): thread the run's abort signal so a cancelled or
+        // per-run-timed-out run actually aborts this call. The heartbeat above
+        // keeps the per-step watchdog from firing, so without this a hung
+        // summarize would otherwise run until the 10 min per-run timeout.
+        abortSignal: signal,
+      });
+      const { text: summary, usage, finishReason } = result;
+      logger.info(
+        {
+          metric: "summarize.latency_ms",
+          latencyMs: Date.now() - startedAt,
+          chatId,
+          taskModelId,
+          usage,
+          finishReason,
+          hitOutputCeiling: finishReason === "length",
+        },
+        "context compaction summarize",
+      );
+      if (finishReason === "length") {
+        logger.warn(
+          { chatId, taskModelId, maxTokens: SUMMARIZE_MAX_OUTPUT_TOKENS },
+          "summarize hit maxOutputTokens ceiling — summary may be truncated",
+        );
+      }
+      return summary;
+    } finally {
+      if (heartbeat !== null) clearInterval(heartbeat);
+    }
+  };
+
+  return {
+    budget,
+    config,
+    imageProvider: imageProviderFor(provider.providerType),
+    summarize,
+    summarizerWindow,
+    contextWindow,
+    contextWindowIsDefault: !mainWindow || mainWindow.source === "default",
+  };
+}
+
+type ApplyTier1Args = {
+  chatId: string;
+  runtime: CompactionRuntime;
+  /** Post-inlineFileUrls messages — used for the compaction itself (ADR-0012 §Token estimation). */
+  messages: PlatypusUIMessage[];
+  /**
+   * Pre-inlineFileUrls messages from this submission — used as the incoming
+   * side of the ADR-0012 §Summary invalidation divergence check (ADR-0012 §Summary invalidation). Must NOT be inlined: the persisted
+   * side also uses storage:// / http:// URLs, so both sides are comparable.
+   */
+  rawMessages: PlatypusUIMessage[];
+  /**
+   * Messages as they were in the DB BEFORE this submission's onStart overwrote
+   * them (ADR-0012 §Summary invalidation). When absent, the ADR-0012 §Summary invalidation check falls back to a fresh DB read, which
+   * returns the post-overwrite state and therefore never detects edits.
+   */
+  priorMessages?: PlatypusUIMessage[];
+  /** Estimated system-prompt + tool-schema payload for this turn (ADR-0012 §Tier 1 (trigger projection)). */
+  overheadTokens: number;
+  /** Provider-reported `usage.inputTokens` from the prior turn (ADR-0012 §Tier 1 (trigger projection), ADR-0012 §Context-usage ring). */
+  lastInputTokens?: number;
+};
+
+type Tier1IfNeededResult = {
+  messages: PlatypusUIMessage[];
+  compactionTrace?: CompactionTrace;
+};
+
+/**
+ * Reconstructs/advances the compacted view and persists any new summary — all
+ * best-effort. Any throw degrades to the uncompacted messages (recovery ADR-0012 §Recovery
+ * remains the safety net). Returns the messages to send to the model plus an
+ * optional compactionTrace for the stream trace (ADR-0012 §Compaction trace in the timeline).
+ */
+async function applyTier1IfNeeded(
+  args: ApplyTier1Args,
+): Promise<Tier1IfNeededResult> {
+  const { chatId, runtime, messages, rawMessages } = args;
+  try {
+    const store = drizzleCompactionStore;
+    let state = (await store.readState(chatId)) ?? EMPTY_COMPACTION_STATE;
+
+    // ADR-0012 §Summary invalidation: if the submitted history changed at/below the watermark
+    // (edit/delete/regenerate), reset the stale summary before compacting. The
+    // single submit endpoint is the only "edit handler" in this architecture.
+    //
+    // ADR-0012 §Summary invalidation fix: the baseline must be the DB state BEFORE this submission's
+    // onStart overwrote the row. agent-runner reads it before calling onStart
+    // and threads it here as `priorMessages`. We also compare the pre-inline
+    // (`rawMessages`) side so file-URL inlining doesn't trigger false positives.
+    if (state.summaryWatermark || state.contextSummary) {
+      const persisted = args.priorMessages ?? (await loadChatMessages(chatId));
+      const affected = affectedBelowWatermark(
+        persisted,
+        rawMessages,
+        state.summaryWatermark,
+      );
+      if (affected.length > 0) {
+        const orderedIds = rawMessages
+          .map((m) => m.id)
+          .filter((id): id is string => Boolean(id));
+        await invalidateCompaction(store, chatId, affected, orderedIds);
+        state = (await store.readState(chatId)) ?? state;
+      }
+    }
+
+    const result = await applyTier1Compaction({
+      chatId,
+      messages,
+      state,
+      budget: runtime.budget,
+      config: runtime.config,
+      imageProvider: runtime.imageProvider,
+      summarize: runtime.summarize,
+      summarizerWindow: runtime.summarizerWindow,
+      overheadTokens: args.overheadTokens,
+      lastInputTokens: args.lastInputTokens,
+      store,
+      onEvent: (event) =>
+        logger.info({ chatId, ...event }, "context-compacted"),
+    });
+
+    return {
+      messages: result.messages,
+      compactionTrace: result.compactionTrace,
+    };
+  } catch (error) {
+    logger.error(
+      { error, chatId },
+      "Tier 1 compaction failed; sending uncompacted history",
+    );
+    return { messages };
+  }
+}
+
 /**
  * Whether the provider's native web_search tool should be injected for this
  * turn. True only when the request opted into search AND the provider hasn't
@@ -461,6 +951,7 @@ export const prepareChatTurn = async (
     frontendUrl,
     runMode = "interactive",
     onActivity,
+    signal,
   } = input;
 
   const workspace = await queries.getWorkspace(workspaceId);
@@ -557,12 +1048,80 @@ export const prepareChatTurn = async (
 
   const systemPrompt = generation.systemPrompt!;
 
+  // --- Context compaction & recovery (ADR-0012) ---
+  // The runtime (window budget, config, summarizer) is resolved once and shared
+  // by Tier 1 and the recovery middleware so they never disagree. Never throws.
+  const compactionRuntime = await buildCompactionRuntime({
+    chatId: request.id,
+    provider,
+    resolvedModelId,
+    opened,
+    // Thread the activity callback so the summarizer heartbeat can bump the
+    // per-step stall watchdog (ADR-0012 §Summarizer hardening). `onActivity` accepts an
+    // optional event, so it satisfies the `() => void` heartbeat signature
+    // directly — the interval invokes it with no event (timer-only bump).
+    onActivity,
+    // Thread the abort signal so a cancelled/timed-out run aborts summarize
+    // instead of leaking past the heartbeat-suppressed watchdog (review Fix B).
+    signal,
+  });
+
+  // Per-turn overhead: system prompt + tool schemas, sent on every turn but
+  // invisible to a message-only estimate (ADR-0012 §Tier 1 (trigger projection)).
+  const overheadTokens = estimateOverheadTokens(systemPrompt, wrappedTools);
+
+  // Tier 1 is best-effort: a failure here must never break the turn — recovery
+  // (ADR-0012 §Recovery) is the net. Runs AFTER inlineFileUrls so the estimate sees the real
+  // payload (ADR-0012 §Token estimation). Cross-turn durable compaction is keyed by chat id; headless
+  // runs (triggers, sub-agents) carry no chat id and have no durable history to
+  // compact (ADR-0012 §Sub-agents — they are Tier 2 only), so send messages uncompacted.
+  const chatId = request.id;
+  const tier1Result = chatId
+    ? await applyTier1IfNeeded({
+        chatId,
+        runtime: compactionRuntime,
+        messages: inlinedMessages,
+        // Pre-inline messages for ADR-0012 §Summary invalidation comparison (ADR-0012 §Summary invalidation): both sides must use the
+        // same URL format (storage:// / http://) to avoid false positives.
+        rawMessages: messages,
+        // Pre-overwrite baseline threaded from agent-runner (ADR-0012 §Summary invalidation).
+        priorMessages: input.priorMessages,
+        overheadTokens,
+        // Prior turn's provider-reported input token count (ADR-0012 §Tier 1 (trigger projection) / ADR-0012 §Context-usage ring): the
+        // corrective baseline for the Tier 1 trigger projection on turns ≥ 2.
+        // Absent on turn 1 → cold-start margin applies.
+        lastInputTokens: findLastInputTokens(messages),
+      })
+    : { messages: inlinedMessages };
+  const compactedMessages = tier1Result.messages;
+
+  // Recovery (ADR-0012 §Recovery, ADR-0012 §Recovery is the net): always wired, even when proactive compaction is off.
+  // Headless runs get trim+retry but no dirty flag (no durable chat row).
+  const recovery: RecoveryContext = {
+    chatId,
+    imageProvider: compactionRuntime.imageProvider,
+    // ADR-0012 §Tier 1 (budget math): subtract the per-turn overhead so recovery uses the same effective
+    // target as Tier 1. Without this, a large overhead (e.g. 65%+ of the window)
+    // means the recovery retry still overflows even after trimming.
+    targetTokens: Math.max(
+      0,
+      compactionRuntime.budget.targetTokens - overheadTokens,
+    ),
+    keepRecentMessages: compactionRuntime.config.keepRecentMessages,
+    minPrunableChars: compactionRuntime.config.minPrunableChars,
+    summarize: compactionRuntime.summarize,
+    summarizerWindow: compactionRuntime.summarizerWindow,
+    markDirty: chatId
+      ? () => setCompactionDirty(drizzleCompactionStore, chatId)
+      : undefined,
+  };
+
   return {
     stream: {
       model,
       tools: wrappedTools,
       system: systemPrompt,
-      messages: inlinedMessages,
+      messages: compactedMessages,
       maxSteps: resolvedMaxSteps,
       temperature: generation.temperature,
       topP: generation.topP,
@@ -584,7 +1143,35 @@ export const prepareChatTurn = async (
       frequencyPenalty: agent ? undefined : generation.frequencyPenalty,
       presencePenalty: agent ? undefined : generation.presencePenalty,
       seed: agent ? undefined : request.seed,
+      contextWindow: compactionRuntime.contextWindow,
+      contextWindowIsDefault: compactionRuntime.contextWindowIsDefault,
     },
+    compactionTrace: tier1Result.compactionTrace,
+    recovery,
+    tier2: compactionRuntime.config.compactionEnabled
+      ? {
+          // ADR-0012 §Tier 1 (budget math) (Tier 2): the prepareStep estimate counts ModelMessages only —
+          // system prompt + tool schemas go as separate streamText params and
+          // are invisible to it, yet they consume the same window. Subtract the
+          // per-turn overhead so the trigger/target reflect the real wire
+          // payload (mirrors the Tier 1 and recovery targets above). Without
+          // this, a large overhead lets the payload blow past the budget before
+          // Tier 2 ever fires — exactly the tool-heavy case it exists for.
+          triggerTokens: Math.max(
+            0,
+            compactionRuntime.budget.triggerTokens - overheadTokens,
+          ),
+          targetTokens: Math.max(
+            0,
+            compactionRuntime.budget.targetTokens - overheadTokens,
+          ),
+          keepRecentMessages: compactionRuntime.config.keepRecentMessages,
+          minPrunableChars: compactionRuntime.config.minPrunableChars,
+          imageProvider: compactionRuntime.imageProvider,
+          summarize: compactionRuntime.summarize,
+          summarizerWindow: compactionRuntime.summarizerWindow,
+        }
+      : null,
     dispose,
   };
 };
@@ -913,20 +1500,107 @@ const loadSubAgents = async (
     description: sa.description,
   }));
 
+  // Provider lookups are memoized so the Tier 2 loop below and the
+  // createModelFn callback don't each re-fetch + re-open the same provider
+  // (F1): one getProvider + openProvider per distinct providerId per turn.
+  const providerCache = new Map<
+    string,
+    { provider: Provider; opened: ReturnType<typeof openProvider> } | null
+  >();
+  const resolveSubProvider = async (providerId: string) => {
+    if (!providerCache.has(providerId)) {
+      const p = await queries.getProvider(providerId, orgId, workspaceId);
+      providerCache.set(
+        providerId,
+        p ? { provider: p, opened: openProvider(p) } : null,
+      );
+    }
+    return providerCache.get(providerId) ?? null;
+  };
+
+  // Tier 2 only for sub-agents (ADR-0012 §Sub-agents: no durable history for Tier 1).
+  // Resolve per-sub-agent compaction runtime so each sub-agent's tool loop
+  // gets a prepareStep calibrated to its own model's context window.
+  const subAgentPrepareSteps = new Map<
+    string,
+    import("ai").PrepareStepFunction
+  >();
+  // Per-sub-agent overflow recovery (ADR-0012 §Sub-agents). Built ALWAYS — recovery (ADR-0012 §Recovery is the net) is
+  // the net even when the ADR-0012 §Config & kill switch disables proactive compaction, exactly
+  // as on the main path. Tier 2 (below) is the only part gated by the switch.
+  const subAgentRecoveries = new Map<string, RecoveryContext>();
+  await Promise.all(
+    subAgentRecords.map(async (sa) => {
+      try {
+        const resolved = await resolveSubProvider(sa.providerId);
+        if (!resolved) return;
+        const runtime = await buildCompactionRuntime({
+          // Sub-agents have no chat row; tag logs with the sub-agent id (F3).
+          chatId: sa.id,
+          provider: resolved.provider,
+          resolvedModelId: sa.modelId,
+          opened: resolved.opened,
+        });
+        // ADR-0012 §Tier 1 (budget math): subtract the sub-agent's per-turn
+        // overhead so its recovery/Tier 2 targets match the main path. The
+        // sub-agent's tool schemas resolve lazily at invocation and aren't
+        // available here, so the system prompt — the dominant, predictable
+        // component — is the floor; under-counting overhead only trims slightly
+        // less aggressively, and recovery's force-halving still backstops it.
+        const subOverheadTokens = estimateOverheadTokens(
+          sa.systemPrompt ?? undefined,
+          undefined,
+        );
+        // Recovery net first (not gated by compactionEnabled). No markDirty —
+        // sub-agents have no durable chat row to flag.
+        subAgentRecoveries.set(sa.id, {
+          chatId: sa.id,
+          imageProvider: runtime.imageProvider,
+          targetTokens: Math.max(
+            0,
+            runtime.budget.targetTokens - subOverheadTokens,
+          ),
+          keepRecentMessages: runtime.config.keepRecentMessages,
+          minPrunableChars: runtime.config.minPrunableChars,
+          summarize: runtime.summarize,
+          summarizerWindow: runtime.summarizerWindow,
+        });
+        if (!runtime.config.compactionEnabled) return;
+        const tier2: Tier2Context = {
+          triggerTokens: Math.max(
+            0,
+            runtime.budget.triggerTokens - subOverheadTokens,
+          ),
+          targetTokens: Math.max(
+            0,
+            runtime.budget.targetTokens - subOverheadTokens,
+          ),
+          keepRecentMessages: runtime.config.keepRecentMessages,
+          minPrunableChars: runtime.config.minPrunableChars,
+          imageProvider: runtime.imageProvider,
+          summarize: runtime.summarize,
+          summarizerWindow: runtime.summarizerWindow,
+        };
+        subAgentPrepareSteps.set(sa.id, buildTier2PrepareStep(tier2));
+      } catch (error) {
+        logger.warn(
+          { error, subAgentId: sa.id },
+          "Failed to build Tier 2 for sub-agent; skipping",
+        );
+      }
+    }),
+  );
+
   const subAgentMcpClients: MCPClient[] = [];
 
   const subAgentTools = await createSubAgentTools(
     subAgentRecords,
     async (providerId: string, modelId: string) => {
-      const subProvider = await queries.getProvider(
-        providerId,
-        orgId,
-        workspaceId,
-      );
-      if (!subProvider) {
+      const resolved = await resolveSubProvider(providerId);
+      if (!resolved) {
         throw new Error(`Provider '${providerId}' not found for sub-agent`);
       }
-      return openProvider(subProvider).languageModel(modelId);
+      return resolved.opened.languageModel(modelId);
     },
     async (subAgentId: string, toolSetIds: string[]) => {
       const subAgentRecord = subAgentRecords.find((sa) => sa.id === subAgentId);
@@ -941,7 +1615,164 @@ const loadSubAgents = async (
       return subTools;
     },
     onProgress,
+    (id) => subAgentPrepareSteps.get(id),
+    (id) => subAgentRecoveries.get(id),
   );
 
   return { subAgents, subAgentTools, subAgentMcpClients };
 };
+
+// --- Force-compact endpoint (ADR-0012 §Force-compact on demand) ---
+
+/**
+ * Runs Tier 1 compaction unconditionally for a chat (ADR-0012 §Force-compact on demand: clickable ring).
+ * Forces the compaction regardless of the token threshold by injecting
+ * compactionDirty=true so the ADR-0012 §Recovery force path bypasses the estimate gate.
+ * Called from `POST /chats/:id/compact`; the route guards against concurrent
+ * runs before calling here.
+ */
+export async function forceCompactChat(
+  chatId: string,
+  workspaceId: string,
+  orgId: string,
+): Promise<{
+  estimatedTokens: number;
+  /** Message-only estimate of the history BEFORE compaction (same basis as estimatedTokens). */
+  tokensBefore: number;
+  /** Number of prefix messages folded into the summary this run (0 if no summary). */
+  messagesDropped: number;
+  /** The config keep-recent count — the client compares messagesDropped against it. */
+  keepRecentMessages: number;
+  contextWindow: number;
+  contextWindowIsDefault: boolean;
+  /** ADR-0012 §Compaction trace in the timeline — the persisted synthetic trace message, when a summary was produced. */
+  traceMessage?: PlatypusUIMessage;
+}> {
+  // Load the chat record (workspace-scoped).
+  const chatRows = await db
+    .select({
+      agentId: chatTable.agentId,
+      providerId: chatTable.providerId,
+      modelId: chatTable.modelId,
+    })
+    .from(chatTable)
+    .where(
+      and(eq(chatTable.id, chatId), eq(chatTable.workspaceId, workspaceId)),
+    )
+    .limit(1);
+  if (chatRows.length === 0) throw new NotFoundError("Chat not found");
+  const chatRow = chatRows[0];
+
+  // Resolve provider + model via the shared query layer (respects org-scoped
+  // Shared resources and the ADR-0007 attachment gate).
+  let provider: Provider;
+  let resolvedModelId: string;
+
+  if (chatRow.agentId) {
+    const agentRow = await drizzleChatTurnQueries.getAgent(
+      chatRow.agentId,
+      orgId,
+      workspaceId,
+    );
+    if (!agentRow) throw new NotFoundError("Agent not found");
+    resolvedModelId = agentRow.modelId;
+    const providerRow = await drizzleChatTurnQueries.getProvider(
+      agentRow.providerId,
+      orgId,
+      workspaceId,
+    );
+    if (!providerRow) throw new NotFoundError("Provider not found");
+    provider = providerRow;
+  } else if (chatRow.providerId && chatRow.modelId) {
+    const providerRow = await drizzleChatTurnQueries.getProvider(
+      chatRow.providerId,
+      orgId,
+      workspaceId,
+    );
+    if (!providerRow) throw new NotFoundError("Provider not found");
+    provider = providerRow;
+    resolvedModelId = chatRow.modelId;
+  } else {
+    throw new ValidationError("Chat has no provider/model configured");
+  }
+
+  const opened = openProvider(provider);
+  const runtime = await buildCompactionRuntime({
+    chatId,
+    provider,
+    resolvedModelId,
+    opened,
+  });
+
+  const messages = await loadChatMessages(chatId);
+  const rawState =
+    (await drizzleCompactionStore.readState(chatId)) ?? EMPTY_COMPACTION_STATE;
+
+  // Force-trigger by marking dirty in the in-memory copy (ADR-0012 §Recovery: bypass the
+  // estimate gate so the compaction actually shrinks the history).
+  const forcedState: CompactionState = { ...rawState, compactionDirty: true };
+
+  const result = await applyTier1Compaction({
+    chatId,
+    messages,
+    state: forcedState,
+    budget: runtime.budget,
+    config: runtime.config,
+    imageProvider: runtime.imageProvider,
+    summarize: runtime.summarize,
+    store: drizzleCompactionStore,
+    summarizerWindow: runtime.summarizerWindow,
+  });
+
+  // Message-only estimate (no per-turn system/tool overhead): the ring uses it
+  // as a transient post-compact value that the next response's provider count
+  // supersedes. It therefore reads slightly low vs the live ring numerator
+  // (which includes overhead) — acceptable for an immediate visual refresh.
+  const estimatedTokens = estimateTokens(
+    uiMessagesToCountUnits(result.messages, runtime.imageProvider),
+  );
+  // Pre-compaction estimate (same basis) so the client can decide whether the
+  // drop is significant enough to confirm — ADR-0012 §Force-compact on demand.
+  const tokensBefore = estimateTokens(
+    uiMessagesToCountUnits(messages, runtime.imageProvider),
+  );
+
+  // ADR-0012 §Compaction trace in the timeline: a forced compaction has no live stream to inject the trace into, so
+  // persist it as a standalone synthetic assistant message. Appended after the
+  // last real message — above the watermark (which already advanced inside
+  // applyTier1Compaction), so it is never itself summarized. The strip filter
+  // keeps it out of the model payload on subsequent turns. Only written when a
+  // model summary was actually produced (result.compactionTrace is undefined
+  // otherwise — see Tier1Output).
+  let traceMessage: PlatypusUIMessage | undefined;
+  if (result.compactionTrace) {
+    traceMessage = buildCompactionTraceMessage(
+      result.compactionTrace,
+      createIdGenerator({ prefix: "msg", size: 16 })(),
+    );
+    // Atomic jsonb append: concatenate at the DB rather than overwrite
+    // the whole column from the in-memory `messages` snapshot loaded earlier.
+    // The route guards with runRegistry.has(chatId), but a run that registers in
+    // the has()→write window — or a second concurrent POST /compact — would
+    // otherwise be clobbered by this stale array. `||` appends to whatever is
+    // stored now, so no concurrently-written messages are lost.
+    await db
+      .update(chatTable)
+      .set({
+        messages: sql`coalesce(${chatTable.messages}, '[]'::jsonb) || ${JSON.stringify([traceMessage])}::jsonb`,
+      })
+      .where(
+        and(eq(chatTable.id, chatId), eq(chatTable.workspaceId, workspaceId)),
+      );
+  }
+
+  return {
+    estimatedTokens,
+    tokensBefore,
+    messagesDropped: result.compactionTrace?.messagesDropped ?? 0,
+    keepRecentMessages: runtime.config.keepRecentMessages,
+    contextWindow: runtime.contextWindow,
+    contextWindowIsDefault: runtime.contextWindowIsDefault,
+    traceMessage,
+  };
+}
diff --git a/apps/backend/src/tools/sub-agent.test.ts b/apps/backend/src/tools/sub-agent.test.ts
index 5af36d14..3ab227c9 100644
--- a/apps/backend/src/tools/sub-agent.test.ts
+++ b/apps/backend/src/tools/sub-agent.test.ts
@@ -43,13 +43,16 @@ function createMockFullStream(
   };
 }
 
-const { mockStream, MockToolLoopAgent } = vi.hoisted(() => {
+const { mockStream, MockToolLoopAgent, capturedSettings } = vi.hoisted(() => {
   const mockStream = vi.fn();
+  const capturedSettings: Record<string, unknown>[] = [];
   class MockToolLoopAgent {
-    constructor() {}
+    constructor(settings: Record<string, unknown>) {
+      capturedSettings.push(settings);
+    }
     stream = mockStream;
   }
-  return { mockStream, MockToolLoopAgent };
+  return { mockStream, MockToolLoopAgent, capturedSettings };
 });
 
 vi.mock("ai", async () => {
@@ -74,6 +77,25 @@ describe("createSubAgentTool", () => {
     tools: {},
   };
 
+  beforeEach(() => {
+    capturedSettings.length = 0;
+  });
+
+  describe("Tier 2 prepareStep (ADR-0012 §Sub-agents)", () => {
+    it("passes prepareStep to ToolLoopAgent when provided", () => {
+      const mockPrepareStep = vi.fn();
+      createSubAgentTool({ ...baseOptions, prepareStep: mockPrepareStep });
+      expect(capturedSettings[0]).toMatchObject({
+        prepareStep: mockPrepareStep,
+      });
+    });
+
+    it("passes undefined prepareStep when not provided", () => {
+      createSubAgentTool(baseOptions);
+      expect(capturedSettings[0].prepareStep).toBeUndefined();
+    });
+  });
+
   describe("toolName generation", () => {
     it("generates PascalCase delegateTo prefix", () => {
       const { toolName } = createSubAgentTool(baseOptions);
@@ -398,4 +420,34 @@ describe("createSubAgentTools", () => {
 
     expect(Object.keys(result)).toHaveLength(1);
   });
+
+  it("threads prepareStepFn to ToolLoopAgent for each sub-agent (ADR-0012 §Sub-agents)", async () => {
+    capturedSettings.length = 0;
+    const subAgents = [
+      { id: "sa-1", name: "Alpha", providerId: "p1", modelId: "m1" },
+      { id: "sa-2", name: "Beta", providerId: "p1", modelId: "m1" },
+    ];
+    const mockStep1 = vi.fn();
+    const mockStep2 = vi.fn();
+    const prepareStepFn = vi
+      .fn()
+      .mockImplementation((id: string) =>
+        id === "sa-1" ? mockStep1 : mockStep2,
+      );
+
+    const createModelFn = vi.fn().mockResolvedValue({});
+    const loadToolsFn = vi.fn().mockResolvedValue({});
+
+    await createSubAgentTools(
+      subAgents,
+      createModelFn,
+      loadToolsFn,
+      undefined,
+      prepareStepFn,
+    );
+
+    expect(capturedSettings).toHaveLength(2);
+    expect(capturedSettings[0].prepareStep).toBe(mockStep1);
+    expect(capturedSettings[1].prepareStep).toBe(mockStep2);
+  });
 });
diff --git a/apps/backend/src/tools/sub-agent.ts b/apps/backend/src/tools/sub-agent.ts
index 0a83e86b..52f13f55 100644
--- a/apps/backend/src/tools/sub-agent.ts
+++ b/apps/backend/src/tools/sub-agent.ts
@@ -2,11 +2,17 @@ import {
   stepCountIs,
   tool,
   ToolLoopAgent,
+  wrapLanguageModel,
   type LanguageModel,
+  type PrepareStepFunction,
   type Tool,
 } from "ai";
 import { z } from "zod";
 import { logger } from "../logger.ts";
+import {
+  contextOverflowRecoveryMiddleware,
+  type RecoveryContext,
+} from "../runs/recovery.ts";
 
 /**
  * Single source of truth for the sub-agent delegation tool name.
@@ -49,6 +55,17 @@ interface SubAgentToolOptions {
   maxSteps?: number;
   /** Called on each activity update from the sub-agent. Used to reset the parent run's per-step timeout. */
   onProgress?: () => void;
+  /** Tier 2 in-turn compaction callback (ADR-0012 §Tier 2 / §Sub-agents). Null when compaction disabled. */
+  prepareStep?: PrepareStepFunction;
+  /**
+   * Context-overflow recovery (ADR-0012 §Recovery) for the sub-agent's own model calls.
+   * Sub-agents run a ToolLoopAgent OUTSIDE the parent run's recovery-wrapped
+   * model, so without this their only overflow protection is Tier 2 — which
+   * fires late (its trigger omits the sub-agent's tool/prompt overhead) and has
+   * no net behind it. Wrapping here gives every sub-agent step one trim+retry,
+   * matching the main path (ADR-0012 §Sub-agents). `markDirty` is omitted (no chat row).
+   */
+  recovery?: RecoveryContext;
 }
 
 /**
@@ -68,17 +85,36 @@ export const createSubAgentTool = (options: SubAgentToolOptions) => {
     tools,
     maxSteps = 50,
     onProgress,
+    prepareStep,
+    recovery,
   } = options;
 
   const toolName = subAgentToolName({ name });
 
+  // Wrap the sub-agent model with the overflow-recovery middleware (ADR-0012 §Sub-agents) so
+  // a step that overflows gets one trim+retry instead of hard-failing the task.
+  // Guard on `typeof model !== "string"`: `wrapLanguageModel` needs a model
+  // INSTANCE, and `LanguageModel` permits a bare string id. The factory returns
+  // an instance today, but a string would otherwise throw here and the catch in
+  // `createSubAgentTools` would silently drop the whole sub-agent — so degrade to
+  // the unwrapped model instead. The remaining cast only reconciles the
+  // V2/V3 instance union (wrapLanguageModel accepts both at runtime).
+  const recoveredModel: LanguageModel =
+    recovery && typeof model !== "string"
+      ? wrapLanguageModel({
+          model: model as Parameters<typeof wrapLanguageModel>[0]["model"],
+          middleware: contextOverflowRecoveryMiddleware(recovery),
+        })
+      : model;
+
   const agent = new ToolLoopAgent({
-    model,
+    model: recoveredModel,
     instructions:
       systemPrompt ||
       `You are a specialized sub-agent named "${name}". Complete the task you are given thoroughly and accurately.`,
     tools,
     stopWhen: [stepCountIs(maxSteps)],
+    prepareStep,
   });
 
   return {
@@ -192,6 +228,8 @@ export const createSubAgentTools = async (
     toolSetIds: string[],
   ) => Promise<Record<string, Tool>>,
   onProgress?: () => void,
+  prepareStepFn?: (id: string) => PrepareStepFunction | undefined,
+  recoveryFn?: (id: string) => RecoveryContext | undefined,
 ): Promise<Record<string, Tool>> => {
   const tools: Record<string, Tool> = {};
 
@@ -216,6 +254,8 @@ export const createSubAgentTools = async (
         tools: subAgentTools,
         maxSteps: subAgent.maxSteps || 50,
         onProgress,
+        prepareStep: prepareStepFn?.(subAgent.id),
+        recovery: recoveryFn?.(subAgent.id),
       });
 
       tools[toolName] = tool;
diff --git a/apps/frontend/components/ai-elements/tool.tsx b/apps/frontend/components/ai-elements/tool.tsx
index fee20b81..aebb14b3 100644
--- a/apps/frontend/components/ai-elements/tool.tsx
+++ b/apps/frontend/components/ai-elements/tool.tsx
@@ -7,6 +7,7 @@ import {
   CollapsibleTrigger,
 } from "@/components/ui/collapsible";
 import { cn } from "@/lib/utils";
+import { useToolDuration } from "@/hooks/use-tool-completed-at";
 import type { ToolUIPart } from "ai";
 import {
   ArrowRightLeftIcon,
@@ -44,6 +45,9 @@ import { CodeBlock } from "./code-block";
 export function humanizeToolType(type: string): string {
   // Strip the "tool-" prefix
   const name = type.startsWith("tool-") ? type.slice(5) : type;
+  // Synthetic compaction trace (§K/11c) — render a human label instead of the
+  // raw, underscore-laden function name.
+  if (name === "compact_context") return "Context compaction";
   // Split on camelCase boundaries
   const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").split(" ");
   // Capitalise the first word, lowercase the rest
@@ -172,6 +176,10 @@ export type ToolHeaderProps = {
   label?: string;
   type: ToolUIPart["type"];
   state: ToolUIPart["state"];
+  /** ISO timestamp of when this tool call began, if known. */
+  startedAt?: string;
+  /** ISO timestamp of when this tool call completed, if known. */
+  completedAt?: string;
   className?: string;
 };
 
@@ -210,8 +218,11 @@ export const ToolHeader = ({
   label,
   type,
   state,
+  startedAt,
+  completedAt,
   ...props
 }: ToolHeaderProps) => {
+  const duration = useToolDuration(state, startedAt, completedAt);
   // getToolIcon returns a stable module-level Lucide icon; render via
   // createElement so the dynamic selection isn't flagged as a component
   // created during render.
@@ -237,6 +248,11 @@ export const ToolHeader = ({
           )}
         </span>
         {getStatusBadge(state)}
+        {duration && (
+          <span className="text-xs text-muted-foreground shrink-0">
+            {duration}
+          </span>
+        )}
       </div>
       <ChevronDownIcon className="size-4 shrink-0 text-muted-foreground transition-transform group-data-[state=open]:rotate-180" />
     </CollapsibleTrigger>
diff --git a/apps/frontend/components/chat-message.tsx b/apps/frontend/components/chat-message.tsx
index 102e8bb1..b709af0a 100644
--- a/apps/frontend/components/chat-message.tsx
+++ b/apps/frontend/components/chat-message.tsx
@@ -35,7 +35,7 @@ import {
   TextUIPart,
   type ChatStatus,
 } from "ai";
-import { Agent } from "@platypus/schemas";
+import { Agent, type MessageStats } from "@platypus/schemas";
 import {
   BotIcon,
   CheckIcon,
@@ -44,10 +44,87 @@ import {
   TrashIcon,
   RefreshCwIcon,
   XIcon,
+  InfoIcon,
 } from "lucide-react";
 import { Textarea } from "./ui/textarea";
 import { LoadSkillTool } from "./load-skill-tool";
 import { SubAgentTool } from "./sub-agent-tool";
+import { Button } from "./ui/button";
+import { Popover, PopoverContent, PopoverTrigger } from "./ui/popover";
+import { Tooltip, TooltipContent, TooltipTrigger } from "./ui/tooltip";
+import { formatDurationMs } from "@/lib/utils";
+
+const getToolStartedAt = (part: unknown): string | undefined => {
+  const raw = (part as { toolMetadata?: { startedAt?: unknown } })?.toolMetadata
+    ?.startedAt;
+  return typeof raw === "string" ? raw : undefined;
+};
+
+const getToolCompletedAt = (part: unknown): string | undefined => {
+  const raw = (part as { toolMetadata?: { completedAt?: unknown } })
+    ?.toolMetadata?.completedAt;
+  return typeof raw === "string" ? raw : undefined;
+};
+
+function MessageStatsPopover({ stats }: { stats: MessageStats }) {
+  const ttft = stats.firstTokenAt
+    ? formatDurationMs(
+        new Date(stats.firstTokenAt).getTime() -
+          new Date(stats.startedAt).getTime(),
+      )
+    : undefined;
+  const total = formatDurationMs(
+    new Date(stats.finishedAt).getTime() - new Date(stats.startedAt).getTime(),
+  );
+  return (
+    <Tooltip delayDuration={500}>
+      <Popover>
+        <TooltipTrigger asChild>
+          <PopoverTrigger asChild>
+            <Button
+              size="icon"
+              variant="ghost"
+              type="button"
+              className="cursor-pointer text-muted-foreground"
+            >
+              <InfoIcon className="size-4" />
+              <span className="sr-only">Response stats</span>
+            </Button>
+          </PopoverTrigger>
+        </TooltipTrigger>
+        <PopoverContent className="w-auto min-w-44 p-3" align="start">
+          <div className="flex flex-col gap-1 text-sm">
+            <p className="text-xs font-medium text-muted-foreground mb-1">
+              Response stats
+            </p>
+            <p>
+              <span className="text-muted-foreground">In:</span>{" "}
+              {stats.inputTokens.toLocaleString()}{" "}
+              <span className="text-muted-foreground">Out:</span>{" "}
+              {stats.outputTokens.toLocaleString()}
+            </p>
+            {ttft && (
+              <p>
+                <span className="text-muted-foreground">TTFT:</span> {ttft}
+              </p>
+            )}
+            {total && (
+              <p>
+                <span className="text-muted-foreground">Total:</span> {total}
+              </p>
+            )}
+          </div>
+        </PopoverContent>
+      </Popover>
+      <TooltipContent side="top">
+        In: {stats.inputTokens.toLocaleString()} · Out:{" "}
+        {stats.outputTokens.toLocaleString()}
+        {ttft ? ` · TTFT: ${ttft}` : ""}
+        {total ? ` · Total: ${total}` : ""}
+      </TooltipContent>
+    </Tooltip>
+  );
+}
 
 interface ChatMessageProps {
   /** The message object to render */
@@ -120,6 +197,7 @@ export const ChatMessage = memo(function ChatMessage({
         <BotIcon className="size-3.5 text-muted-foreground" />
       </div>
     ));
+
   const fileParts = message.parts?.filter(
     (part): part is FileUIPart =>
       part.type === "file" && !part.mediaType?.startsWith("image/"),
@@ -134,6 +212,11 @@ export const ChatMessage = memo(function ChatMessage({
       .map((part) => part.text)
       .join("") || "";
 
+  const assistantStats =
+    message.role === "assistant"
+      ? (message.metadata as { stats?: MessageStats } | undefined)?.stats
+      : undefined;
+
   return (
     <Fragment key={message.id}>
       {fileParts && fileParts.length > 0 && (
@@ -154,7 +237,17 @@ export const ChatMessage = memo(function ChatMessage({
         </Sources>
       )}
       {message.parts?.map((part, i) => {
-        if (part.type === "text") {
+        if (part.type === "step-start") {
+          // The SDK emits step-start at every round boundary. We don't render
+          // it — tool-call timestamps appear inside the tool header below.
+          return null;
+        } else if (part.type === "text") {
+          const partText = (part as TextUIPart).text;
+
+          // Skip empty text parts on assistant messages — the SDK emits them
+          // between steps; rendering would leave a bare avatar bubble.
+          if (message.role === "assistant" && !partText.trim()) return null;
+
           if (isEditing) {
             const isFirstTextPart =
               i === message.parts.findIndex((p) => p.type === "text");
@@ -186,7 +279,7 @@ export const ChatMessage = memo(function ChatMessage({
               avatar={assistantAvatar}
             >
               <MessageContent className="max-w-full">
-                <MessageResponse>{(part as TextUIPart).text}</MessageResponse>
+                <MessageResponse>{partText}</MessageResponse>
               </MessageContent>
             </Message>
           );
@@ -212,6 +305,8 @@ export const ChatMessage = memo(function ChatMessage({
               <DynamicToolHeader
                 state={toolPart.state}
                 title={toolPart.toolName}
+                startedAt={getToolStartedAt(toolPart)}
+                completedAt={getToolCompletedAt(toolPart)}
               />
               <ToolContent>
                 <ToolInput input={toolPart.input} />
@@ -251,6 +346,8 @@ export const ChatMessage = memo(function ChatMessage({
                 state={toolPart.state}
                 type={toolPart.type}
                 label={toolLabel}
+                startedAt={getToolStartedAt(toolPart)}
+                completedAt={getToolCompletedAt(toolPart)}
               />
               <ToolContent>
                 <ToolInput input={toolPart.input} />
@@ -354,6 +451,7 @@ export const ChatMessage = memo(function ChatMessage({
                 <RefreshCwIcon className="size-4" />
               </MessageAction>
             )}
+            {assistantStats && <MessageStatsPopover stats={assistantStats} />}
           </MessageActions>
         ))}
     </Fragment>
diff --git a/apps/frontend/components/chat.tsx b/apps/frontend/components/chat.tsx
index cfc38d1a..d9fe1a19 100644
--- a/apps/frontend/components/chat.tsx
+++ b/apps/frontend/components/chat.tsx
@@ -32,6 +32,7 @@ import {
   Agent,
   ToolSet,
   Skill,
+  type MessageStats,
 } from "@platypus/schemas";
 import { type PlatypusUIMessage } from "@platypus/backend/src/types";
 import useSWR from "swr";
@@ -55,6 +56,7 @@ import {
   TooltipTrigger,
 } from "@/components/ui/tooltip";
 import { ChatMessage } from "./chat-message";
+import { ContextUsageRing } from "./context-usage-ring";
 import { ModelSelectorDialog } from "./model-selector-dialog";
 import { toast } from "sonner";
 
@@ -378,19 +380,46 @@ export const Chat = ({
     [messages, setMessages],
   );
 
-  // TODO: Ideally show a loading indicator here
-  if (isLoading || !providersData) return null;
+  // Resolve the effective provider+model for the ring (ADR-0012 §Context-usage ring: use selected
+  // model's window, not last message's window). When an agent is selected we
+  // look up its provider/model; otherwise use the directly selected values.
+  const effectiveRingProviderId = agentId
+    ? (agents.find((a) => a.id === agentId)?.providerId ?? "")
+    : providerId;
+  const effectiveRingModelId = agentId
+    ? (agents.find((a) => a.id === agentId)?.modelId ?? "")
+    : modelId;
+
+  // Fetch resolved context window for the currently-selected model (cached on
+  // the backend). Returns null contextWindow when source = "default" so the ring
+  // renders neutral (ADR-0012 §Context-usage ring). Re-fetches automatically on model/agent change.
+  const { data: contextWindowData } = useSWR<{
+    contextWindow: number | null;
+    source: string;
+    keepRecentMessages?: number;
+  }>(
+    backendUrl && user && effectiveRingProviderId && effectiveRingModelId
+      ? joinUrl(
+          backendUrl,
+          `/organizations/${orgId}/workspaces/${workspaceId}/providers/${effectiveRingProviderId}/context-window?modelId=${encodeURIComponent(effectiveRingModelId)}`,
+        )
+      : null,
+    fetcher,
+  );
 
-  // Show alert if no providers are configured
-  if (providers.length === 0) {
-    return (
-      <div className="flex items-center justify-center h-full p-8">
-        <div className="w-full xl:w-4/5 max-w-4xl">
-          <NoProvidersEmptyState orgId={orgId} workspaceId={workspaceId} />
-        </div>
-      </div>
-    );
-  }
+  // Stats from the last completed assistant message for the ring (ADR-0012 §Context-usage ring) and
+  // per-message stats popover (ADR-0012 §Per-message stats).
+  const lastAssistantStats = useMemo<MessageStats | null>(() => {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      const stats = (msg.metadata as { stats?: MessageStats } | undefined)
+        ?.stats;
+      if (msg.role === "assistant" && stats) {
+        return stats;
+      }
+    }
+    return null;
+  }, [messages]);
 
   const selectedAgent = agentId ? agents.find((a) => a.id === agentId) : null;
   // Resolve the provider backing the current selection, whether that's a raw
@@ -416,6 +445,145 @@ export const Chat = ({
     chatData?.status === "running" && status === "ready";
   const effectiveStatus = isReconnectedToRunningRun ? "streaming" : status;
 
+  // ADR-0012 §Force-compact on demand — state for pending (deferred while streaming),
+  // in-flight compaction spinner, and the post-compact token estimate that
+  // refreshes the ring immediately (before the next completed message).
+  const [compactPending, setCompactPending] = useState(false);
+  const [isCompacting, setIsCompacting] = useState(false);
+  // Stable count of assistant messages — unaffected by optimistic user-message
+  // pushes (ADR-0012 §Context-usage ring). Used to tag post-compact estimates so the ring doesn't
+  // snap back to the old value when the user hits Send.
+  const assistantMessageCount = useMemo(
+    () => messages.filter((m) => m.role === "assistant").length,
+    [messages],
+  );
+
+  // Post-compact estimate, tagged with the assistant message count at
+  // compaction time so it auto-expires once a new assistant message arrives
+  // (the next provider count is authoritative). Using assistantMessageCount
+  // instead of messages.length fixes the ring-jump bug (ADR-0012 §Context-usage ring): an optimistic user
+  // message increments messages.length but not assistantMessageCount, so the
+  // compacted estimate stays valid until the real response lands.
+  const [compacted, setCompacted] = useState<{
+    atAssistantMessageCount: number;
+    tokens: number;
+  } | null>(null);
+
+  const runCompact = useCallback(async () => {
+    if (!backendUrl) return;
+    setIsCompacting(true);
+    try {
+      const res = await fetch(
+        joinUrl(
+          backendUrl,
+          `/organizations/${orgId}/workspaces/${workspaceId}/chat/${chatId}/compact`,
+        ),
+        { method: "POST", credentials: "include" },
+      );
+      if (!res.ok) {
+        const body = await res.json().catch(() => ({}));
+        toast.error((body as { error?: string }).error ?? "Compact failed");
+        return;
+      }
+      // Refresh the ring immediately from the post-compact estimate (ADR-0012 §Force-compact on demand). This
+      // is a message-only char/4 estimate (no per-turn system/tool overhead),
+      // so it reads slightly low until the next real response replaces it with
+      // the provider's authoritative count.
+      const body = (await res.json().catch(() => ({}))) as {
+        inputTokens?: number;
+        traceMessage?: PlatypusUIMessage;
+      };
+      if (typeof body.inputTokens === "number") {
+        setCompacted({
+          atAssistantMessageCount: assistantMessageCount,
+          tokens: body.inputTokens,
+        });
+      }
+      // ADR-0012 §Compaction trace in the timeline: append the persisted compaction-trace message so it shows in the
+      // timeline immediately. It carries the id the backend persisted, so a
+      // later SWR revalidation reconciles rather than duplicating it.
+      if (body.traceMessage) {
+        const traceMessage = body.traceMessage;
+        setMessages((prev) =>
+          prev.some((m) => m.id === traceMessage.id)
+            ? prev
+            : [...prev, traceMessage],
+        );
+      }
+      toast.success("Context compacted");
+    } catch {
+      toast.error("Compact request failed");
+    } finally {
+      setIsCompacting(false);
+    }
+  }, [
+    backendUrl,
+    orgId,
+    workspaceId,
+    chatId,
+    assistantMessageCount,
+    setMessages,
+  ]);
+
+  const handleCompact = useCallback(() => {
+    // ADR-0012 §Force-compact on demand: confirm ONLY when the drop is significant;
+    // below that, run immediately. The summarized prefix is everything before the
+    // keep-recent boundary, so messagesDropped ≈ messages.length − keepRecent, and
+    // the ADR's "messagesDropped > keepRecentMessages" criterion reduces to the
+    // pre-run-computable "messages.length > 2 × keepRecent". (The >30%-reduction
+    // criterion needs the post-run summary size; we don't gate on it here — the op
+    // is non-destructive either way per ADR-0012 §View, not delete.)
+    // Confirm at click time (not after the deferred run fires) so the prompt never
+    // surprises the user mid-stream.
+    const keepRecent = contextWindowData?.keepRecentMessages ?? 10;
+    const significant = messages.length > keepRecent * 2;
+    if (
+      significant &&
+      !window.confirm(
+        "This will summarize older messages to reduce context usage. The full conversation history is preserved. Continue?",
+      )
+    ) {
+      return;
+    }
+    if (effectiveStatus === "streaming" || effectiveStatus === "submitted") {
+      setCompactPending(true);
+    } else {
+      void runCompact();
+    }
+  }, [contextWindowData, messages.length, effectiveStatus, runCompact]);
+
+  // Fire deferred compact once streaming finishes (ADR-0012 §Force-compact on demand). Already confirmed
+  // at click time, so this just runs.
+  useEffect(() => {
+    if (
+      compactPending &&
+      effectiveStatus !== "streaming" &&
+      effectiveStatus !== "submitted"
+    ) {
+      // Reacting to a streaming→idle transition to fire a queued action is the
+      // intended use of an effect here; clearing the flag prevents a re-fire.
+      // eslint-disable-next-line react-hooks/set-state-in-effect
+      setCompactPending(false);
+      void runCompact();
+    }
+  }, [compactPending, effectiveStatus, runCompact]);
+
+  // Early returns live below ALL hooks so hook order stays unconditional
+  // (react-hooks/rules-of-hooks). The ADR-0012 §Context-usage ring / §Force-compact ring hooks above must always run.
+  // TODO: Ideally show a loading indicator here
+  if (isLoading || !providersData) return null;
+
+  // Show alert if no providers are configured
+  if (providers.length === 0) {
+    return (
+      <div className="flex items-center justify-center h-full p-8">
+        <div className="w-full xl:w-4/5 max-w-4xl">
+          <NoProvidersEmptyState orgId={orgId} workspaceId={workspaceId} />
+        </div>
+      </div>
+    );
+  }
+
   const handleSubmit = async (message: PromptInputMessage) => {
     // Stop the stream if currently streaming or submitted
     if (effectiveStatus === "streaming" || effectiveStatus === "submitted") {
@@ -576,6 +744,22 @@ export const Chat = ({
                         <TooltipContent>Search</TooltipContent>
                       </Tooltip>
                     )}
+                    <ContextUsageRing
+                      usedTokens={
+                        compacted?.atAssistantMessageCount ===
+                        assistantMessageCount
+                          ? compacted.tokens
+                          : lastAssistantStats?.contextTokens
+                      }
+                      contextWindow={contextWindowData?.contextWindow}
+                      onClick={chatId ? handleCompact : undefined}
+                      isStreaming={
+                        effectiveStatus === "streaming" ||
+                        effectiveStatus === "submitted"
+                      }
+                      isPending={compactPending}
+                      isCompacting={isCompacting}
+                    />
                     <ModelSelectorDialog
                       agents={agents}
                       providers={providers}
@@ -595,11 +779,18 @@ export const Chat = ({
                         open={isAgentInfoDialogOpen}
                         onOpenChange={setIsAgentInfoDialogOpen}
                       >
-                        <DialogTrigger asChild>
-                          <PromptInputButton>
-                            <Info />
-                          </PromptInputButton>
-                        </DialogTrigger>
+                        <Tooltip delayDuration={500}>
+                          <TooltipTrigger asChild>
+                            <DialogTrigger asChild>
+                              <PromptInputButton aria-label="Agent info">
+                                <Info />
+                              </PromptInputButton>
+                            </DialogTrigger>
+                          </TooltipTrigger>
+                          <TooltipContent side="top">
+                            {selectedAgent.description?.trim() || "Agent info"}
+                          </TooltipContent>
+                        </Tooltip>
                         <AgentInfoDialog
                           agent={selectedAgent}
                           agents={agents}
diff --git a/apps/frontend/components/context-usage-ring.tsx b/apps/frontend/components/context-usage-ring.tsx
new file mode 100644
index 00000000..15994829
--- /dev/null
+++ b/apps/frontend/components/context-usage-ring.tsx
@@ -0,0 +1,139 @@
+"use client";
+
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
+import { Loader2 } from "lucide-react";
+
+/**
+ * Context-usage ring (ADR-0012 §Context-usage ring + §Force-compact on demand).
+ *
+ * Renders a small SVG donut ring showing `usedTokens / contextWindow` fill.
+ * Colours: green < 0.7, amber >= 0.7, red >= 0.9.
+ * Shows neutral grey with no percentage when contextWindow is unknown/default
+ * (ADR-0012 §Context-usage ring) or when no run has completed yet.
+ *
+ * When `onClick` is provided the ring is clickable (ADR-0012 §Force-compact on demand).
+ * - While `isPending` (click queued, waiting for streaming to finish): shows
+ *   a pending badge and is disabled (ADR-0012 §Force-compact on demand).
+ * - While `isCompacting`: shows a spinner.
+ * - `isStreaming` disables clicks entirely (frontend defers via pending flag).
+ */
+export function ContextUsageRing({
+  usedTokens,
+  contextWindow,
+  onClick,
+  isStreaming,
+  isPending,
+  isCompacting,
+}: {
+  usedTokens?: number;
+  contextWindow?: number | null;
+  onClick?: () => void;
+  isStreaming?: boolean;
+  isPending?: boolean;
+  isCompacting?: boolean;
+}) {
+  const r = 7;
+  const circumference = 2 * Math.PI * r;
+  // Amber has no semantic token (unlike primary/destructive); use Tailwind v4's
+  // default-palette CSS var so the threshold colour isn't a bare hex literal.
+  const amber = "var(--color-amber-500, #f59e0b)";
+
+  const isNeutral = !contextWindow || usedTokens === undefined;
+  const fill = isNeutral
+    ? 0
+    : Math.min(1, Math.max(0, usedTokens / contextWindow));
+
+  const color = isNeutral
+    ? "var(--color-muted-foreground)"
+    : fill >= 0.9
+      ? "var(--color-destructive)"
+      : fill >= 0.7
+        ? amber
+        : "var(--color-primary)";
+
+  const isDisabled = isPending || isCompacting || isStreaming || !onClick;
+  const isClickable = !!onClick && !isDisabled;
+
+  // Append the compact affordance whenever the ring is actually clickable —
+  // including the neutral (unknown-window) state, where the user can still
+  // force a compaction even though no fill is shown.
+  const clickHint = isClickable ? " · click to compact" : "";
+  let tooltipLabel: string;
+  if (isPending) {
+    tooltipLabel = "Will compact when response finishes";
+  } else if (isCompacting) {
+    tooltipLabel = "Compacting…";
+  } else if (isNeutral) {
+    if (!contextWindow) {
+      tooltipLabel = `Context window unknown · model not found in provider registry${clickHint}`;
+    } else {
+      tooltipLabel = `No messages yet · ${contextWindow.toLocaleString()} token window${clickHint}`;
+    }
+  } else {
+    tooltipLabel = `Last response: ${usedTokens!.toLocaleString()} / ${contextWindow!.toLocaleString()} (${Math.round(fill * 100)}%) · current input not yet counted${clickHint}`;
+  }
+
+  return (
+    <Tooltip delayDuration={500}>
+      <TooltipTrigger asChild>
+        <div
+          className={`flex items-center justify-center w-8 h-8 relative ${isClickable ? "cursor-pointer hover:opacity-70 transition-opacity" : "cursor-default"}`}
+          onClick={isClickable ? onClick : undefined}
+          onKeyDown={
+            isClickable
+              ? (e) => {
+                  if (e.key === "Enter" || e.key === " ") {
+                    e.preventDefault();
+                    onClick!();
+                  }
+                }
+              : undefined
+          }
+          tabIndex={isClickable ? 0 : undefined}
+          role={isClickable ? "button" : undefined}
+          aria-label={tooltipLabel}
+          aria-disabled={isDisabled || undefined}
+        >
+          {isCompacting ? (
+            <Loader2 size={16} className="animate-spin text-muted-foreground" />
+          ) : (
+            <svg width="20" height="20" viewBox="0 0 20 20" aria-hidden>
+              {/* Track */}
+              <circle
+                cx="10"
+                cy="10"
+                r={r}
+                fill="none"
+                stroke="var(--color-border)"
+                strokeWidth="3"
+              />
+              {/* Fill */}
+              <circle
+                cx="10"
+                cy="10"
+                r={r}
+                fill="none"
+                stroke={isPending ? "var(--color-muted-foreground)" : color}
+                strokeWidth="3"
+                strokeDasharray={`${circumference}`}
+                strokeDashoffset={`${circumference * (1 - fill)}`}
+                strokeLinecap="butt"
+                transform="rotate(-90 10 10)"
+                style={{ transition: "stroke-dashoffset 0.3s ease" }}
+              />
+              {/* Pending dot */}
+              {isPending && <circle cx="10" cy="3.5" r="2" fill={amber} />}
+            </svg>
+          )}
+        </div>
+      </TooltipTrigger>
+      <TooltipContent side="top" className="max-w-xs text-center text-xs">
+        {tooltipLabel}
+      </TooltipContent>
+    </Tooltip>
+  );
+}
diff --git a/apps/frontend/components/dynamic-tool-header.tsx b/apps/frontend/components/dynamic-tool-header.tsx
index db72bdbf..766402fb 100644
--- a/apps/frontend/components/dynamic-tool-header.tsx
+++ b/apps/frontend/components/dynamic-tool-header.tsx
@@ -3,6 +3,7 @@
 import { Badge } from "@/components/ui/badge";
 import { CollapsibleTrigger } from "@/components/ui/collapsible";
 import { cn } from "@/lib/utils";
+import { useToolDuration } from "@/hooks/use-tool-completed-at";
 import type { DynamicToolUIPart } from "ai";
 import {
   CheckCircleIcon,
@@ -17,6 +18,10 @@ import type { ReactNode } from "react";
 export type DynamicToolHeaderProps = {
   title: string;
   state: DynamicToolUIPart["state"];
+  /** ISO timestamp of when this tool call began, if known. */
+  startedAt?: string;
+  /** ISO timestamp of when this tool call completed, if known. */
+  completedAt?: string;
   className?: string;
 };
 
@@ -53,20 +58,30 @@ export const DynamicToolHeader = ({
   className,
   title,
   state,
+  startedAt,
+  completedAt,
   ...props
-}: DynamicToolHeaderProps) => (
-  <CollapsibleTrigger
-    className={cn(
-      "flex w-full items-center justify-between gap-4 p-3",
-      className,
-    )}
-    {...props}
-  >
-    <div className="flex items-center gap-2">
-      <WrenchIcon className="size-4 text-muted-foreground" />
-      <span className="font-medium text-sm">{title}</span>
-      {getStatusBadge(state)}
-    </div>
-    <ChevronDownIcon className="size-4 text-muted-foreground transition-transform group-data-[state=open]:rotate-180" />
-  </CollapsibleTrigger>
-);
+}: DynamicToolHeaderProps) => {
+  const duration = useToolDuration(state, startedAt, completedAt);
+  return (
+    <CollapsibleTrigger
+      className={cn(
+        "flex w-full items-center justify-between gap-4 p-3",
+        className,
+      )}
+      {...props}
+    >
+      <div className="flex items-center gap-2">
+        <WrenchIcon className="size-4 text-muted-foreground" />
+        <span className="font-medium text-sm">{title}</span>
+        {getStatusBadge(state)}
+        {duration && (
+          <span className="text-xs text-muted-foreground shrink-0">
+            {duration}
+          </span>
+        )}
+      </div>
+      <ChevronDownIcon className="size-4 text-muted-foreground transition-transform group-data-[state=open]:rotate-180" />
+    </CollapsibleTrigger>
+  );
+};
diff --git a/apps/frontend/hooks/use-tool-completed-at.ts b/apps/frontend/hooks/use-tool-completed-at.ts
new file mode 100644
index 00000000..c7c59d65
--- /dev/null
+++ b/apps/frontend/hooks/use-tool-completed-at.ts
@@ -0,0 +1,90 @@
+import { useEffect, useState } from "react";
+import { formatDurationMs, formatToolDuration } from "@/lib/utils";
+
+const isTerminalState = (state: string): boolean => state.startsWith("output-");
+
+const toMs = (iso?: string): number | undefined => {
+  if (!iso) return undefined;
+  const t = new Date(iso).getTime();
+  return Number.isNaN(t) ? undefined : t;
+};
+
+/**
+ * Resolves a tool call's run-duration string for the tool header.
+ *
+ * - While the tool is running it shows a live elapsed timer, ticking once a
+ *   second from when the tool was first observed (the server start time isn't
+ *   carried on the streamed message, so we measure on the client).
+ * - When it turns terminal it freezes: the exact server-measured span if both
+ *   `startedAt`/`completedAt` are persisted (after a chat reload), otherwise
+ *   the client-observed span.
+ *
+ * A client clock is only used when the tool was actually seen running this
+ * session, so reloading a chat (tool already terminal at mount) never shows a
+ * bogus value — it relies on the server timestamps or shows nothing.
+ *
+ * Returns undefined when there's nothing meaningful to show (e.g. a historical
+ * message that predates duration tracking).
+ */
+export function useToolDuration(
+  state: string,
+  startedAt?: string,
+  completedAt?: string,
+): string | undefined {
+  const running = !isTerminalState(state);
+  // All render-visible values are state, never refs or live Date.now() reads
+  // (upstream's react-hooks rules forbid both during render). Every write is
+  // deferred into a timer callback — setState synchronously inside an effect
+  // body is also disallowed, but a timer/interval callback is a permitted site.
+  const [clientStart, setClientStart] = useState<number>();
+  const [clientEnd, setClientEnd] = useState<number>();
+  const [elapsedMs, setElapsedMs] = useState(0);
+
+  // While running: record the client-observed start once and tick the elapsed
+  // time every second. `start` is captured in the effect body (reading
+  // Date.now() there is fine); the setState calls run in deferred callbacks.
+  useEffect(() => {
+    if (!running) return;
+    const start = Date.now();
+    const startTimer = setTimeout(
+      () => setClientStart((prev) => prev ?? start),
+      0,
+    );
+    const id = setInterval(() => setElapsedMs(Date.now() - start), 1000);
+    return () => {
+      clearTimeout(startTimer);
+      clearInterval(id);
+    };
+  }, [running]);
+
+  // First terminal transition after we saw it running: freeze the end span.
+  // Deferred to a timer callback so it is not a synchronous effect-body write.
+  useEffect(() => {
+    if (running || clientStart === undefined) return;
+    const endTimer = setTimeout(
+      () => setClientEnd((prev) => prev ?? Date.now()),
+      0,
+    );
+    return () => clearTimeout(endTimer);
+  }, [running, clientStart]);
+
+  // Live elapsed timer while running. `elapsedMs` is 0 until the first tick and
+  // `clientStart` is set on the next frame, so the very first render returns
+  // undefined (nothing meaningful to show yet).
+  if (running) {
+    if (clientStart === undefined) return undefined;
+    return formatDurationMs(elapsedMs);
+  }
+
+  // Terminal: exact server span if available, else the client-observed span.
+  const serverDuration = formatToolDuration(startedAt, completedAt);
+  if (serverDuration) return serverDuration;
+
+  const startMs = toMs(startedAt) ?? clientStart;
+  const endMs =
+    toMs(completedAt) ?? (clientStart !== undefined ? clientEnd : undefined);
+  if (startMs !== undefined && endMs !== undefined && endMs >= startMs) {
+    return formatDurationMs(endMs - startMs);
+  }
+  return undefined;
+}
diff --git a/apps/frontend/lib/utils.test.ts b/apps/frontend/lib/utils.test.ts
index 53217425..ac8a6b25 100644
--- a/apps/frontend/lib/utils.test.ts
+++ b/apps/frontend/lib/utils.test.ts
@@ -1,5 +1,5 @@
 import { describe, it, expect } from "vitest";
-import { joinUrl, parseValidationErrors } from "./utils";
+import { formatToolDuration, joinUrl, parseValidationErrors } from "./utils";
 
 describe("joinUrl", () => {
   it("should join base URL and path", () => {
@@ -25,6 +25,34 @@ describe("joinUrl", () => {
   });
 });
 
+describe("formatToolDuration", () => {
+  const start = "2026-05-30T12:00:00.000Z";
+  const plus = (ms: number) =>
+    new Date(new Date(start).getTime() + ms).toISOString();
+
+  it("returns undefined when a timestamp is missing", () => {
+    expect(formatToolDuration(undefined, start)).toBeUndefined();
+    expect(formatToolDuration(start, undefined)).toBeUndefined();
+  });
+
+  it("returns undefined for invalid or negative durations", () => {
+    expect(formatToolDuration("not-a-date", start)).toBeUndefined();
+    expect(formatToolDuration(plus(1000), start)).toBeUndefined();
+  });
+
+  it("formats sub-second durations in milliseconds", () => {
+    expect(formatToolDuration(start, plus(950))).toBe("950ms");
+  });
+
+  it("formats sub-minute durations in seconds with one decimal", () => {
+    expect(formatToolDuration(start, plus(1200))).toBe("1.2s");
+  });
+
+  it("formats durations over a minute as minutes and seconds", () => {
+    expect(formatToolDuration(start, plus(63000))).toBe("1m 3s");
+  });
+});
+
 describe("parseValidationErrors", () => {
   it("should parse validation errors correctly", () => {
     const errorData = {
diff --git a/apps/frontend/lib/utils.ts b/apps/frontend/lib/utils.ts
index d7b014f6..b5673274 100644
--- a/apps/frontend/lib/utils.ts
+++ b/apps/frontend/lib/utils.ts
@@ -18,6 +18,36 @@ export function joinUrl(base: string, path: string): string {
   return `${normalizedBase}${normalizedPath}`;
 }
 
+/**
+ * Formats a tool call's run duration from its start/end ISO timestamps.
+ * Returns undefined when either timestamp is missing or invalid (e.g. an
+ * in-progress tool, or a historical message persisted before durations were
+ * tracked) so the UI can simply render nothing.
+ *
+ * Output scales with magnitude: `950ms`, `1.2s`, `1m 3s`.
+ */
+export function formatToolDuration(
+  startedAt?: string,
+  completedAt?: string,
+): string | undefined {
+  if (!startedAt || !completedAt) return undefined;
+  const start = new Date(startedAt).getTime();
+  const end = new Date(completedAt).getTime();
+  if (Number.isNaN(start) || Number.isNaN(end) || end < start) return undefined;
+  return formatDurationMs(end - start);
+}
+
+/** Formats an elapsed millisecond span: `950ms`, `1.2s`, `1m 3s`. */
+export function formatDurationMs(ms: number): string | undefined {
+  if (!Number.isFinite(ms) || ms < 0) return undefined;
+  if (ms < 1000) return `${Math.round(ms)}ms`;
+  const seconds = ms / 1000;
+  if (seconds < 60) return `${seconds.toFixed(1)}s`;
+  const minutes = Math.floor(seconds / 60);
+  const remSeconds = Math.round(seconds % 60);
+  return `${minutes}m ${remSeconds}s`;
+}
+
 export const fetcher = async (input: RequestInfo | URL, init?: RequestInit) => {
   const res = await fetch(input, { ...init, credentials: "include" });
   if (!res.ok) {
diff --git a/docs/adr/0012-context-compaction.md b/docs/adr/0012-context-compaction.md
new file mode 100644
index 00000000..d06cb26e
--- /dev/null
+++ b/docs/adr/0012-context-compaction.md
@@ -0,0 +1,385 @@
+---
+status: accepted
+---
+
+# Chat Context Compaction
+
+Chats hard-fail when message history exceeds a model's context window. This ADR
+records the design we shipped to keep them alive, **why** the obvious simpler
+options were rejected, and the named parts the implementation refers back to.
+
+It is self-contained: every decision, mechanism, and trade-off the code cites
+lives in a section below. Code comments reference this ADR by section name (e.g.
+_"ADR-0012 §Tier 1"_, _"ADR-0012 §Summary invalidation"_) rather than by any
+external plan or chunk number.
+
+If a future change forces a different choice, supersede with a new ADR rather
+than editing this one.
+
+## Context
+
+The AI SDK (`ai@6`) reports real token usage **after** each call
+(`usage.inputTokens`/`outputTokens`/`totalTokens`) but exposes **no**
+context-window metadata on the model interface and **no** pre-call tokenizer.
+Providers diverge on whether the window is discoverable: Google
+(`inputTokenLimit`), OpenRouter (`context_length`), and vLLM/OpenAI-compatible
+(`max_model_len`) expose it via API; OpenAI, Anthropic, and Bedrock do not.
+Error handling previously covered only auth/rate-limit/5xx — a context-overflow
+rejection killed the turn. Top-level chats and sub-agents both run through the
+shared `agent-runner`/`ToolLoopAgent`, so one implementation covers both.
+
+## Decision
+
+A **two-tier, view-not-delete** compaction model, fed by a **single token
+estimator**, with all durable state mutated through a **single versioned CAS
+writer**, an always-on **recovery net** for overflow errors the proactive path
+misses, and a deterministic **context-editing** pass that prunes stale bulky
+tool results without a model call.
+
+## Principles (load-bearing)
+
+### View, not delete
+
+The watermark + summary change _what is sent to the model_, never _what is
+stored_. Raw messages persist in the DB untouched. Forced/automatic compaction
+is therefore non-destructive in the data sense — a user can still read full
+history; a future "expand summary" UI is free — which reduces "irreversible data
+loss" objections to a UX-courtesy confirmation rather than a correctness concern.
+Never hard-delete a summarized message.
+
+### One estimator
+
+Token counting lives in exactly one function over one neutral structure
+(`CountUnit[]`). Tier 1 (UIMessages) and Tier 2 (ModelMessages) both normalize
+into it, counting only **model-bound** parts (`text`, `tool-call`,
+`tool-result`, `file`, `image`). UI-only parts (`reasoning`, `source`,
+`step-start`, `data-*`) are excluded on both sides. Divergence between the tiers
+is impossible by construction rather than monitored — a tier cannot fire on a
+number the other never sees.
+
+### One durable writer
+
+All mutations of compaction state (`summaryWatermark`, `contextSummary`,
+`compactionDirty`) go through a single compare-and-swap function keyed on a
+`version` column. Concurrent runs on one chat (e.g. a trigger run and a user
+run), and the interaction between compaction and history-edit invalidation, are
+resolved by **version**, not by comparing watermark values — so a watermark that
+moves _backward_ on an edit cannot be misread as "not yet advanced" and produce a
+stale summary over mutated history. On a CAS conflict the loser re-reads the row:
+if the winner already covered its prefix it **skips** (safe no-op); otherwise it
+retries **once**, then skips with a contended warning. No recompute-loop, no
+livelock. A covered-skip deliberately does **not** clear `compactionDirty`: a
+concurrent invalidation also bumps the version but intentionally leaves dirty set
+(it resets the summary without shrinking history), so clearing dirty on a skip
+could drop a forced compaction the overflow demanded. Leaving it set is strictly
+safe — worst case is one extra compaction next turn.
+
+### Recovery is the net
+
+A `400/413` context-overflow error is caught, the messages aggressively trimmed
+in-memory (via the same Tier 2 adapter — no bespoke trimmer), and the call
+retried **once**. Recovery never writes durable summary/watermark state directly —
+it flags `compactionDirty` on detection (before the retry outcome), and the next
+turn's Tier 1 does the durable compaction. Recovery stays on even when proactive
+compaction is globally disabled; it is the last line of defense, not a risk
+surface.
+
+## Mechanisms
+
+### Window resolution
+
+`resolveContextWindow(provider, modelId)` resolves per-model in order: manual
+override (`provider.modelMeta`) → provider API auto-detect (Google / OpenRouter /
+vLLM) → the community-maintained **litellm registry** JSON (covers
+OpenAI/Anthropic/Bedrock, which don't expose it) → a conservative `8192` default.
+We do **not** maintain our own context-window table.
+
+- **Key normalization.** Registry keys don't match `resolvedModelId` 1:1. Lookup
+  order: `exact → strip provider prefix → lowercase → alias map → family
+heuristic → MISS`. The family heuristic uses boundary-safe separators
+  (`"-"`, `"."`, `":"`, `"/"`) so `gpt-4.5-preview` never resolves via a stale
+  `gpt-4` entry. Every MISS warns (it falls to default — must be visible).
+- **Caching & eviction.** Results cache in-memory per provider+model with a TTL.
+  Editing a `modelMeta` override **immediately** evicts (`evict(providerId)`) in
+  the provider PUT handler — TTL is only a backstop. `source:"default"` results
+  use a short TTL (60 s) so a registry miss or transient API blip doesn't pin
+  8192 for the full hour. API fetches use a 5 s timeout and single-flight
+  (`#inflight`) to avoid a cold-cache stampede.
+- When the window is default/unknown the ring renders **neutral**, never a
+  guessed ramp. `maxOutputTokens` is resolved the same way (needed for the budget
+  math).
+
+### Token estimation
+
+Char/4 over **text parts only** (never char/4 a base64 image); a modality table
+sizes non-text parts (`anthropic`/`openai`/`default` constants, dimensions from a
+cheap PNG/JPEG header parse when bytes are in hand). Used **only on the first
+turn** before any provider `usage` exists; every later turn uses the
+provider-reported real `usage.inputTokens`. We accept first-turn imprecision
+(guarded by a 1.15 cold-start margin and the recovery net) rather than ship a
+per-provider tokenizer. The Tier 1 estimate runs **after** file inlining so the
+payload counted is the real one. Where image `detail` is unset (the common case)
+we assume `high` — **over-counting beats overflow**. A turn-2 divergence check is
+a designed-in feedback hook: compare the cold-start estimate against the real
+`usage.inputTokens` and warn when they diverge by >50%, to tune the image
+constants over time (currently log-only).
+
+### Tier 1 — cross-turn compaction (durable)
+
+Runs in `prepareChatTurn` before a response, over durable history (UIMessages).
+
+- **Budget math.** Trigger and target are fractions of the **input** budget, not
+  the raw window: `inputBudget = window − maxOutputReserve − safetyReserve`
+  (safety = `reserveRatio × window`, default 0.05). `triggerTokens = 0.8 ×
+inputBudget`, `targetTokens = 0.5 × inputBudget`. Per-turn **overhead** (system
+  prompt + tool schemas + skill list) is counted toward the trigger and
+  subtracted from the effective target, since it consumes the same window but is
+  invisible to a message-only estimate. When `maxOutputTokens` is unknown the
+  output reserve falls back to `min(4096, 0.25 × window)`.
+- **Trigger projection.** `projected = max(charBasedEstimate, lastInputTokens)`
+  where `lastInputTokens` is the prior turn's provider-reported
+  `usage.inputTokens` (threaded from the last assistant message's
+  `metadata.stats.contextTokens`). The cold-start ×1.15 margin applies only on
+  turn 1 when no provider baseline exists. Compact when `projected ≥ trigger`.
+- **Hysteresis.** Compaction must reduce the conversation to `≤ targetTokens`,
+  well below the trigger, so it does **not** re-fire next turn. The trigger (0.8)
+  and target (0.5) ratios are deliberately distinct. Config is global/env-only
+  (no submitted schema to validate), so the runtime clamps `target → trigger ×
+0.9` when an operator sets `COMPACTION_TARGET_RATIO ≥ COMPACTION_TRIGGER_RATIO`.
+- **Staged, cheap-first.** Stage 1 **prunes** the older prefix without a model
+  call (soft-trim bulky tool/RAG results to head+tail, then placeholder over
+  `minPrunableChars`); only if still above target does Stage 2 **summarize** the
+  prefix into one synthetic summary message. Tool-call/result pairs are atomic and
+  never split across the keep boundary. Output: `[system, summary, …kept recent]`.
+  A visible `context-compacted` event makes it fail-loud.
+- **Summarizer model & map-reduce.** Summarize uses the task model
+  (`taskModelId`), falling back to the main model; same-provider only. When the
+  prefix exceeds the summarizer's own window it is chunked and map-reduced (a
+  large cold-start/imported history can't be sent whole). Summarization is
+  **incremental**: each turn only the messages _after_ the watermark are
+  summarized and folded into the existing summary, then the watermark advances.
+- **Summary invalidation.** If a message at/below `summaryWatermark` is
+  edited/deleted/regenerated the summary is stale. The handler bumps version +
+  clears `contextSummary` + resets the watermark in one CAS write. Because the CAS
+  loser compares **version** (not watermark value), a compaction racing an
+  invalidation sees a conflict and re-reads the reset state — it can never write a
+  stale summary over mutated history. The invalidation compares the **un-inlined**
+  submission (file URLs match on both sides) with stable key ordering (jsonb is
+  re-ordered by Postgres), against the pre-overwrite DB snapshot loaded before the
+  sink overwrites the row.
+
+### Tier 2 — intra-turn compaction (in-memory)
+
+For a single heavy response (many tool/sub-agent calls) that bloats the window
+mid-loop. Runs in the AI SDK `prepareStep` hook on both `streamText` and
+`generateText`, over ModelMessages, summarizing old completed tool results while
+keeping recent steps verbatim and preserving call/result pairing. Fires **only
+when genuinely near the limit** (no per-step overhead on a small loop). **Not
+persisted** — the SDK's canonical message list commits to history as normal, and
+next turn Tier 1 folds it into the durable summary. One tier cannot cover both
+cases: a single response can blow the window without any cross-turn growth (Tier
+2's job), and durable history must be compacted before a turn starts (Tier 1's
+job).
+
+### Recovery
+
+`isContextOverflowError` matches `APICallError` with status `400/413` and a
+per-provider body regex (OpenAI/vLLM, Anthropic, Google, Bedrock — fixture-tested
+matrix). The recovery middleware wraps the model in **both** `streamText` and
+`generateText`, so every step of a tool loop gets detect → flag `compactionDirty`
+(persisted on detection, via the durable writer) → trim via the **same Tier 2
+adapter** (system head pinned, keep-recent halved with a floor of 2, forced past
+the estimate gate since the provider already rejected the prompt) → retry once. A
+second failure surfaces "Conversation too large — start a new chat". Durable
+compaction happens on the **next** `prepareChatTurn`, which sees the dirty flag.
+Headless runs (no chat row) still get the in-memory trim + retry, but cannot flag
+`compactionDirty` — there is nothing to persist to.
+
+### Sub-agents
+
+Sub-agents start fresh each invocation (only a `task` string, no cross-turn
+history), so they have nothing for Tier 1 to compact — they use **Tier 2 only**.
+Each resolves its own model's window/output and passes it through; recovery covers
+them too because `agent-runner` is shared.
+
+### Config & kill switch
+
+Compaction behavior is **global** (`DEFAULT_COMPACTION_CONFIG`); only window/output
+**size** is per-model (via §Window resolution / `provider.modelMeta`). Per-agent
+tuning was shipped and then removed — no surveyed tool (Hermes/Codex/Claude
+Code/Cline) exposes per-agent knobs, and the ratios self-normalize to the model
+window, so per-agent variance bought nothing measurable. The env
+`COMPACTION_ENABLED` (default true) disables **all proactive** compaction (Tier 1
+
+- Tier 2) in prod without a deploy; **recovery ignores it**. A single-agent
+  opt-out, if ever needed, would be per-model or per-workspace — not per-agent.
+
+### Context-usage ring
+
+The frontend shows a small SVG ring next to the model selector, fill =
+`usedTokens / contextWindow`, ramping green → amber (≥0.7) → red (≥0.9), and
+**neutral grey when the window is unknown/default**. The window comes from the
+**currently selected model** (not the last assistant message's metadata, else it
+shows the previous model's window after a switch); the numerator is the last
+response's `contextTokens` (the **last step's** `usage.inputTokens` — peak context
+fullness, not the run-wide sum, which would over-count multi-step loops). A
+required tooltip states the ring reflects the last response, not the unsent
+composer input.
+
+### Per-message stats
+
+An `(i)` action under each assistant response shows input/output tokens, TTFT, and
+total generation time, reusing the existing tool-call timing mechanism. Stats are
+stamped on `message.metadata.stats` at the `applyToolCompletions` point (the
+`messageMetadata` callback fires at message start, before timing/usage exist).
+TTFT/total are server-measured; cost figures use the run-wide token sums.
+
+### Force-compact on demand
+
+The ring is clickable: `POST /chats/:id/compact` runs Tier 1 once **regardless of
+threshold** (force), persists via the durable writer, and returns the post-compact
+usage so the ring refreshes immediately. If a response is streaming the click is
+**deferred** (pending badge, disabled, fires on finish); a confirm dialog appears
+only when the drop is significant — `messagesDropped > keepRecentMessages` **or**
+an estimated reduction `> 30%` of history — below that it runs immediately. Per
+§View-not-delete this is not destructive regardless.
+
+### Compaction trace in the timeline
+
+Compaction is surfaced as a synthetic `compact_context` tool-call + tool-result
+pair, reusing the existing tool-call UI (active spinner → collapsed expander with
+a summary excerpt). The Tier 1 path injects the pair into the response stream; the
+force-compact path (no live stream) persists a standalone synthetic assistant
+message **above** the watermark. The trace part is **stripped before
+`convertToModelMessages`** at both call sites so it never replays to the provider
+as a phantom tool call; a trace-only message is dropped entirely. The trace is
+emitted only when an actual model summary ran (not for prune-only or
+dirty-within-target no-ops).
+
+### Stage 0 — context editing (prune, don't summarize)
+
+A deterministic, no-model-call pass that runs as **Stage 0 inside
+`applyTier1Compaction`, before the trigger decision**, replacing the `output` of
+**old bulky** tool results with a self-describing placeholder (names the tool +
+elided size; tells the model to re-call). This mirrors Anthropic's
+`clear_tool_uses` context editing. It keeps the tool-call block (pairing stays
+valid), prunes by **recency count** of tool results (`keepRecentToolResults`,
+default 4) above a **size gate** (`minEditableToolChars`, default 50 000), exempts
+the newest message, and is idempotent + grow-guarded (never re-elides a
+placeholder, never inflates a result smaller than the placeholder). Running it
+before the trigger lets a lean view **avoid** summarization entirely (cheaper).
+It needs **no durable state, no CAS, no version bump** — it is recomputed from raw
+messages each turn, a sibling of the trace-stripping transform. Accepted fidelity
+loss: an elided placeholder also flows into any prefix Stage 2 later summarizes
+(a huge dump's head+tail is poor summary fodder anyway; raw stays in the DB).
+
+### Hard window wall (recent-trim gate)
+
+Missing the soft `targetTokens` is cheap (a hysteresis goal). The hard wall is
+`inputBudget` (window − output reserve − safety). Recent (kept) messages are
+trimmed **only** when `estimate(recent) + summary > inputBudget` (the call would
+actually overflow), not on the soft-target miss — below the wall `recent` stays
+full-fidelity and simply re-compacts next turn. The single newest message is
+always exempt. A single result too large to fit _even as the newest_ is the
+unsolved ingestion-cap case (an over-large dump as the last message will
+hard-error) — out of scope here, would need an ingestion cap at storage time.
+
+### Summarizer hardening
+
+Tier-1 `summarize()` is a long blocking call inside `prepareChatTurn`, before the
+response stream opens, and does not bump the per-step stall timer — the 120 s
+watchdog once killed a slow summarize mid-call. Hardening: a **heartbeat** pings
+`onActivity` (~every 10 s) so the watchdog keeps resetting; a `maxOutputTokens`
+**ceiling** (≈4 000) backstops a degenerate runaway expansion (`finishReason ===
+"length"` is logged); the summarize call is **cancellable** (`abortSignal`); and a
+**structured handoff prompt** (intent · decisions/facts · files/tools · current
+state) with an explicit length target reduces loss across repeated
+re-compactions.
+
+## Considered Options
+
+- **Single-tier compaction (cross-turn only)** — rejected. Cannot rescue a single
+  response whose own tool loop overflows the window.
+- **One estimator per tier** — rejected. Two estimators over two message shapes
+  drift; collapsed to one estimator + two adapters.
+- **Hard-delete / truncate old messages** — rejected. Irreversible, and the
+  "drops the middle silently" failure mode seen in gateway truncation. View-not-
+  delete keeps the data and makes the action auditable.
+- **Homegrown context-window lookup table** — rejected. Unmaintainable across
+  providers; the litellm registry is the industry "don't maintain your own table"
+  answer.
+- **A real pre-call tokenizer** — rejected for v1. A heavy per-provider dependency
+  for a number the provider returns accurately after the first call.
+- **Optimistic concurrency by comparing watermark values** — rejected. Breaks when
+  invalidation moves the watermark backward; versioned CAS removes the
+  monotonicity assumption.
+- **Compacting to the trigger threshold** — rejected; it re-fires every turn (the
+  thrash failure). Trigger and target ratios are distinct for hysteresis.
+- **Per-agent compaction tuning** — shipped then removed; no real tool exposes it
+  and the ratios self-normalize to the model window.
+- **Summarize-only (no context editing)** — insufficient. Bulky kept tool results
+  dominate `tokensAfter`; deterministic prune-not-summarize is cheaper and lossless
+  to the DB.
+- **A token FLOOR on the trigger** (`max(window × pct, 64000)`) — rejected as an
+  anti-pattern. It overflows sub-64k models (the trigger never fires). A floor
+  belongs only on the _window fallback_ (`detected ?? DEFAULT`), never as
+  `max(detected, FLOOR)`.
+- **Sizing the window from litellm `max_tokens`** — rejected. Only
+  `max_input_tokens` is trusted; `max_tokens` is the output cap, not the context
+  window, and conflating them mis-sizes the budget.
+- **A selectable compaction model in the provider UI** — rejected. Compaction
+  already runs through `taskModelId` on the chat provider's own client
+  (same-provider only); on a single-model provider a dropdown is a no-op, and
+  `workspace.taskModelProviderId` (which routes _other_ task work) deliberately
+  does not apply to compaction. Not worth the multi-provider wiring now.
+
+## Open / deferred decisions
+
+Consciously deferred, with rationale — recorded so the _why-not-yet_ isn't lost:
+
+- **CAS-contention optimization (the per-turn full-history read + stringify)** is
+  left unoptimized; the full-prefix compare is already correct, so this is gated on
+  the `cas.conflict` metric actually showing waste before it's touched.
+- **The estimate-vs-real divergence metric** (see §Token estimation) stays log-only
+  until the image-constant tuning work is picked up.
+- **Live `Pending → Running` compaction trace** is deferred; the trace renders
+  post-hoc "Completed" only. Run/connection decoupling already neutralizes the
+  data-loss vector, so this is a liveness/UX gap, not correctness.
+- **Also deferred:** a message-count force-compact valve (a count-based backstop
+  independent of the token estimate), a projected-input arc on the ring, persisting
+  Tier 2 output, model-aware trim aggressiveness, and Anthropic's `count_tokens` for
+  exact Claude counts — none are needed for the "no more hard fails" goal.
+- **Latent invariant — content-type tool-result media.** The `content`-variant
+  tool result currently serializes media bytes into the char/4 text blob. Fixing it
+  must be **symmetric** across both adapters (extract media into `nonText` on the UI
+  _and_ Model side) or it breaks the §One estimator equality — the load-bearing
+  invariant. No current tool emits content-type media; fix it **before** the first
+  one does, not after.
+
+## Consequences
+
+- **Schema additions.** `provider.modelMeta` (JSONB, per-model window/output
+  overrides); chat/run gain `contextSummary`, `summaryWatermark`,
+  `compactionDirty`, `version`. All additive, nullable/defaulted.
+- **Lazy rollout, no backfill.** Existing chats compact only on their next turn; no
+  eager backfill job (it would create a thundering herd of summarize calls).
+- **A summarize call costs money and latency.** Stage 0 (context editing) and Stage
+  1 (prune) run first without a model call; Stage 2 summarizes only when needed.
+- **First-turn token estimates are imprecise** (image-heavy/CJK/JSON); the recovery
+  net absorbs the misses.
+- **Cross-tenant safety.** The submit route verifies the body `id` belongs to the
+  caller's workspace before a run starts — the compaction store is keyed by chat id
+  only, so an unvalidated id would otherwise let one workspace mutate another's
+  summary/watermark.
+- **A global `COMPACTION_ENABLED` kill switch** disables proactive compaction in
+  prod without a deploy; recovery is unaffected.
+- **Observability is part of the contract** — emitted as structured `metric:`-tagged
+  log lines: `compaction.fired`, `summarize.latency_ms`, `recovery.*`,
+  `context_window.fell_to_default`, `litellm.key_miss`, `cas.conflict`,
+  `context_edited`.
+- **Frontend gains a context-usage ring** (window from the selected model, neutral
+  when unknown), a per-message stats popover, and a `compact_context` timeline
+  trace, all reusing the existing tool-call timing/rendering mechanism.
+- **Unsolved: the single oversized newest result.** A tool result too large to fit
+  even as the newest message hard-errors; the fix is an ingestion cap at storage
+  time, out of scope here.
diff --git a/packages/schemas/index.test.ts b/packages/schemas/index.test.ts
index c8a3b881..c4915856 100644
--- a/packages/schemas/index.test.ts
+++ b/packages/schemas/index.test.ts
@@ -13,7 +13,10 @@ import {
   sandboxEnvSchema,
   SANDBOX_ENV_MAX_ENTRIES,
   SANDBOX_ENV_MAX_VALUE_BYTES,
+  providerSchema,
+  providerUpdateSchema,
   providerCreateSchema,
+  chatSchema,
 } from "./index";
 
 describe("Organization Schema", () => {
@@ -286,6 +289,106 @@ describe("Agent Schema", () => {
   });
 });
 
+describe("Provider modelMeta (context-compaction §A)", () => {
+  const base = {
+    id: "prov-1",
+    workspaceId: "ws-1",
+    name: "My Provider",
+    providerType: "OpenAI" as const,
+    apiKey: "sk-x",
+    modelIds: ["gpt-4o"],
+    taskModelId: "gpt-4o",
+    memoryExtractionModelId: "gpt-4o",
+    createdAt: new Date(),
+    updatedAt: new Date(),
+  };
+
+  it("is valid with modelMeta omitted (additive, optional)", () => {
+    expect(providerSchema.safeParse(base).success).toBe(true);
+  });
+
+  it("accepts per-model contextWindow / maxOutputTokens overrides", () => {
+    const result = providerSchema.safeParse({
+      ...base,
+      modelMeta: {
+        "gpt-4o": { contextWindow: 128000, maxOutputTokens: 16384 },
+        "o1-mini": { contextWindow: 200000 },
+      },
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it("rejects a non-positive contextWindow", () => {
+    const result = providerSchema.safeParse({
+      ...base,
+      modelMeta: { "gpt-4o": { contextWindow: 0 } },
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it("rejects a non-integer window", () => {
+    const result = providerSchema.safeParse({
+      ...base,
+      modelMeta: { "gpt-4o": { contextWindow: 1.5 } },
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it("carries modelMeta through the update schema", () => {
+    const result = providerUpdateSchema.safeParse({
+      name: "My Provider",
+      providerType: "OpenAI",
+      apiKey: "sk-x",
+      modelIds: ["gpt-4o"],
+      taskModelId: "gpt-4o",
+      memoryExtractionModelId: "gpt-4o",
+      modelMeta: { "gpt-4o": { contextWindow: 128000 } },
+    });
+    expect(result.success).toBe(true);
+  });
+});
+
+describe("Chat compaction state (context-compaction §C)", () => {
+  const base = {
+    id: "chat-1",
+    workspaceId: "ws-1",
+    title: "My Chat Title",
+    status: "succeeded" as const,
+    isPinned: false,
+    createdAt: new Date(),
+    updatedAt: new Date(),
+  };
+
+  it("is valid with compaction fields omitted (existing rows)", () => {
+    expect(chatSchema.safeParse(base).success).toBe(true);
+  });
+
+  it("accepts a populated summary + watermark + version", () => {
+    const result = chatSchema.safeParse({
+      ...base,
+      contextSummary: "Summary of earlier turns.",
+      summaryWatermark: "msg-42",
+      compactionDirty: true,
+      version: 3,
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it("accepts an explicitly null summary / watermark", () => {
+    const result = chatSchema.safeParse({
+      ...base,
+      contextSummary: null,
+      summaryWatermark: null,
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it("rejects a non-integer version", () => {
+    const result = chatSchema.safeParse({ ...base, version: 1.5 });
+    expect(result.success).toBe(false);
+  });
+});
+
 describe("Provider Create Schema", () => {
   const baseProvider = {
     organizationId: "org-123",
diff --git a/packages/schemas/index.ts b/packages/schemas/index.ts
index 2086beec..ccfe643b 100644
--- a/packages/schemas/index.ts
+++ b/packages/schemas/index.ts
@@ -101,6 +101,13 @@ export const chatSchema = z.object({
   seed: z.number().optional(),
   presencePenalty: z.number().optional(),
   frequencyPenalty: z.number().optional(),
+  // Context-compaction state (docs/adr/0012). Server-managed; intentionally NOT
+  // part of chatSubmit/chatUpdate. summaryWatermark is the message id of the
+  // last summarized message (P1: a view over history, never a delete).
+  contextSummary: z.string().nullable().optional(),
+  summaryWatermark: z.string().nullable().optional(),
+  compactionDirty: z.boolean().optional(),
+  version: z.number().int().optional(),
   createdAt: z.date(),
   updatedAt: z.date(),
 });
@@ -553,6 +560,19 @@ export const providerApiModeSchema = z.enum(["chat", "responses"]);
 
 export type ProviderApiMode = z.infer<typeof providerApiModeSchema>;
 
+// Per-model context-window / output overrides (ADR-0012 §Window resolution).
+// Keyed by model id; both fields optional so an override can set just one.
+export const modelMetaEntrySchema = z.object({
+  contextWindow: z.number().int().positive().optional(),
+  maxOutputTokens: z.number().int().positive().optional(),
+});
+
+export type ModelMetaEntry = z.infer<typeof modelMetaEntrySchema>;
+
+export const modelMetaSchema = z.record(z.string(), modelMetaEntrySchema);
+
+export type ModelMeta = z.infer<typeof modelMetaSchema>;
+
 const providerBaseSchema = z.object({
   id: z.string(),
   organizationId: z.string().optional(),
@@ -592,6 +612,7 @@ const providerBaseSchema = z.object({
     .max(4096)
     .nullable()
     .optional(),
+  modelMeta: modelMetaSchema.optional(),
   createdAt: z.date(),
   updatedAt: z.date(),
 });
@@ -643,6 +664,7 @@ export const providerCreateSchema = providerBaseSchema.pick({
   memoryExtractionModelId: true,
   embeddingModelId: true,
   embeddingDimensions: true,
+  modelMeta: true,
 });
 
 // Sandbox
@@ -778,6 +800,7 @@ export const providerUpdateSchema = providerBaseSchema.pick({
   memoryExtractionModelId: true,
   embeddingModelId: true,
   embeddingDimensions: true,
+  modelMeta: true,
 });
 
 export type ProviderUpdateData = z.infer<typeof providerUpdateSchema>;
@@ -1527,3 +1550,23 @@ export const dashboardUpdateSchema = z.object({
   desktopLayout: z.array(rglLayoutItemSchema).optional(),
   mobileLayout: z.array(rglLayoutItemSchema).optional(),
 });
+
+// Message stats (ADR-0012 §Context-usage ring / §Per-message stats)
+// Stamped on the last assistant message's metadata.stats after each stream run.
+// Used by the frontend context-usage ring and per-message stats popover.
+
+export const messageStatsSchema = z.object({
+  // Run-wide totals across every step (sum) — for the cost popover (§Per-message stats).
+  inputTokens: z.number().nonnegative(),
+  outputTokens: z.number().nonnegative(),
+  // Input tokens of the LAST model call = peak context fullness — for the
+  // §Context-usage ring. NOT the run-wide sum (which over-counts on multi-step tool loops).
+  contextTokens: z.number().nonnegative(),
+  startedAt: z.string(),
+  firstTokenAt: z.string().optional(),
+  finishedAt: z.string(),
+  contextWindow: z.number().positive(),
+  contextWindowIsDefault: z.boolean(),
+});
+
+export type MessageStats = z.infer<typeof messageStatsSchema>;