From 60cf7e116e336f5cdc4d6aa33859c5ab3ca8d0a0 Mon Sep 17 00:00:00 2001 From: doxav Date: Fri, 10 Apr 2026 06:59:52 +0200 Subject: [PATCH 01/16] Add generic LangGraph OTEL/TGJ optimization helpers and Notebook demo of Langgraph optimization --- docs/GraphOptimization.md | 427 ++++ ...mo_langgraph_instrument_and_optimize.ipynb | 2020 +++++++++++++++++ opto/trace/io/__init__.py | 83 + opto/trace/io/bindings.py | 136 ++ opto/trace/io/eval_hooks.py | 314 +++ opto/trace/io/instrumentation.py | 231 ++ opto/trace/io/langgraph_otel_runtime.py | 451 ++++ opto/trace/io/optimization.py | 588 +++++ opto/trace/io/otel_adapter.py | 223 ++ opto/trace/io/otel_semconv.py | 125 + opto/trace/io/telemetry_session.py | 623 +++++ opto/trace/io/tgj_export.py | 160 ++ opto/trace/io/tgj_ingest.py | 275 +++ pyproject.toml | 5 + setup.py | 2 +- tests/features_tests/test_e2e_m1_pipeline.py | 785 +++++++ .../test_tgj_otel_integration.py | 280 +++ tests/unit_tests/test_bindings.py | 69 + tests/unit_tests/test_instrumentation.py | 198 ++ .../unit_tests/test_langgraph_otel_runtime.py | 180 ++ tests/unit_tests/test_optimization.py | 87 + tests/unit_tests/test_otel_semconv.py | 78 + tests/unit_tests/test_telemetry_session.py | 114 + 23 files changed, 7453 insertions(+), 1 deletion(-) create mode 100644 docs/GraphOptimization.md create mode 100644 examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb create mode 100644 opto/trace/io/__init__.py create mode 100644 opto/trace/io/bindings.py create mode 100644 opto/trace/io/eval_hooks.py create mode 100644 opto/trace/io/instrumentation.py create mode 100644 opto/trace/io/langgraph_otel_runtime.py create mode 100644 opto/trace/io/optimization.py create mode 100644 opto/trace/io/otel_adapter.py create mode 100644 opto/trace/io/otel_semconv.py create mode 100644 opto/trace/io/telemetry_session.py create mode 100644 opto/trace/io/tgj_export.py create mode 100644 opto/trace/io/tgj_ingest.py create mode 100644 tests/features_tests/test_e2e_m1_pipeline.py create mode 100644 tests/features_tests/test_tgj_otel_integration.py create mode 100644 tests/unit_tests/test_bindings.py create mode 100644 tests/unit_tests/test_instrumentation.py create mode 100644 tests/unit_tests/test_langgraph_otel_runtime.py create mode 100644 tests/unit_tests/test_optimization.py create mode 100644 tests/unit_tests/test_otel_semconv.py create mode 100644 tests/unit_tests/test_telemetry_session.py diff --git a/docs/GraphOptimization.md b/docs/GraphOptimization.md new file mode 100644 index 00000000..46fd8ac8 --- /dev/null +++ b/docs/GraphOptimization.md @@ -0,0 +1,427 @@ +# Graph Optimization + +OpenTrace provides a unified API for instrumenting LangGraph agents with OpenTelemetry (OTEL) tracing and running prompt optimization loops. It reduces ~645 lines of manual instrumentation boilerplate to two function calls: `instrument_graph()` and `optimize_graph()`. Traces are emitted with dual semantic conventions compatible with both Trace (TGJ) and Agent Lightning, enabling optimization via the Trace framework while supporting standard observability tooling. + +--- + +## Table of Contents + +1. [Before / After](#1-before--after) +2. [Architecture](#2-architecture) +3. [Public API Reference](#3-public-api-reference) +4. [Data Flow Pipeline](#4-data-flow-pipeline) +5. [Semantic Conventions](#5-semantic-conventions) +6. [Temporal Chaining](#6-temporal-chaining) +7. [Core Modules](#7-core-modules) + +--- + +## 1. Before / After + +### Boilerplate Comparison + +| Step | Before (manual) | After (this API) | +|------|-----------------|------------------| +| **Create session** | ~50 lines: TracerProvider, InMemorySpanExporter, SimpleSpanProcessor, tracer init | Created inside `instrument_graph()`; no explicit session code | +| **Instrument graph** | ~25 lines per node: manual span creation, attribute setting, TracingLLM wiring | `instrument_graph(graph, ...)` | +| **Run optimize loop** | ~150 lines: loop, trace capture, TGJ conversion, score tracking, template update | `optimize_graph(ig, queries, iterations=5)` | +| **Persist artifacts** | ~50 lines: OTLP export, file write, optional MLflow log | `ig.session.flush_otlp()` | + +**Total: ~645 lines reduced to ~10 lines.** + +### Code Diff + +```diff +- # --- BEFORE: Manual setup (~255+ lines for 4 steps) --- +- from opentelemetry.sdk.trace import TracerProvider +- from opentelemetry.sdk.trace.export import SimpleSpanProcessor, InMemorySpanExporter +- exporter = InMemorySpanExporter() +- provider = TracerProvider() +- provider.add_span_processor(SimpleSpanProcessor(exporter)) +- tracer = provider.get_tracer("my-agent") +- # ... per-node: with tracer.start_as_current_span(name): ... +- # ... manual optimization loop with flush, TGJ, optimizer.step() ... +- # ... manual export to JSON / MLflow ... + ++ # --- AFTER: Minimal API --- ++ from opto.trace.io import instrument_graph, optimize_graph ++ ++ ig = instrument_graph( ++ graph=my_graph, ++ llm=my_llm, ++ initial_templates={"planner_prompt": "Plan for: {query}"}, ++ trainable_keys={"planner", "synthesizer"}, ++ ) ++ result = optimize_graph(ig, queries=["Q1", "Q2"], iterations=5) ++ otlp = ig.session.flush_otlp() +``` + +--- + +## 2. Architecture + +``` ++---------------------------------------------------------------------+ +| User Code | +| | +| graph = StateGraph(...) # define LangGraph | +| graph.add_node("planner", ...) # add nodes | +| | +| ig = instrument_graph( # ONE-LINER instrumentation | +| graph=graph, llm=my_llm, | +| initial_templates={...}, | +| ) | +| result = optimize_graph(ig, queries=[...]) # ONE-LINER optimize | ++-------------------------------------+-------------------------------+ + | + +---------------------------v---------------------------+ + | instrument_graph() | + | | + | +--------------+ +-------------+ +-------------+ | + | | Telemetry | | TracingLLM | | Bindings | | + | | Session | | (dual | | (param -> | | + | | | | semconv) | | setter) | | + | | TracerProv. | | | | | | + | | InMemoryExp. | | param.* | | get() / | | + | | flush_otlp() | | gen_ai.* | | set() | | + | +------+-------+ +------+------+ +------+------+ | + | | | | | + | +--------+--------+ | | + | | | | + | +-------------v-----------------+ | | + | | InstrumentedGraph | | | + | | .graph (CompiledGraph) |--------+ | + | | .session (TelemetrySession) | | + | | .tracing_llm (TracingLLM) | | + | | .templates (dict) | | + | | .bindings (dict) | | + | | .invoke() .stream() | | + | +-------------------------------+ | + +--------------------------------------------------------+ +``` + +### Component Responsibilities + +| Component | Module | Purpose | +|-----------|--------|---------| +| `InstrumentedGraph` | `instrumentation.py` | Wrapper returned by `instrument_graph()`; holds graph, session, tracing_llm, templates, bindings | +| `TelemetrySession` | `telemetry_session.py` | Manages `TracerProvider` + `InMemorySpanExporter`; provides `flush_otlp()`, `flush_tgj()`, `export_run_bundle()` | +| `TracingLLM` | `langgraph_otel_runtime.py` | Wraps any OpenAI-compatible LLM; emits parent spans (`param.*`) and child spans (`gen_ai.*`) | +| `Binding` | `bindings.py` | Dataclass with `get()`/`set()` callables mapping optimizer keys to live variables | +| `optimize_graph()` | `optimization.py` | Orchestrates the optimization loop: invoke, flush OTLP, convert to TGJ, run optimizer, apply updates | +| `otel_adapter` | `otel_adapter.py` | Converts OTLP JSON to Trace-Graph JSON (TGJ) with temporal hierarchy | +| `tgj_ingest` | `tgj_ingest.py` | Ingests TGJ documents into `ParameterNode` / `MessageNode` objects | +| `otel_semconv` | `otel_semconv.py` | Helpers: `emit_reward()`, `emit_trace()`, `record_genai_chat()` | + +### Supported Graph Kinds + +| Kind | Support | Notes | +|------|---------|--------| +| Sync graphs | Yes | `invoke()` on compiled `StateGraph`; node wrappers run synchronously | +| Async graphs | Planned | `ainvoke()` / `astream()`; same wrapper model, async span handling | +| Streaming | Planned | `stream()` / `astream()`; spans emitted per node completion | +| Tools | Yes | Tool calls inside nodes traced via the same LLM wrapper | +| Loops | Yes | Cyclic graphs and conditional edges; each node execution gets a span | + +Instrumentation uses **node-level wrappers** (not LangChain/LangGraph callbacks). This provides full control over span boundaries and parent-child relationships, guarantees `param.*` and `gen_ai.*` attributes for TGJ and Agent Lightning, and works identically for custom and default graphs. + +--- + +## 3. Public API Reference + +### `instrument_graph()` + +Wraps a LangGraph with automatic OTEL instrumentation. + +```python +from opto.trace.io import instrument_graph + +ig = instrument_graph( + graph=my_state_graph, # StateGraph or CompiledGraph (auto-compiled) + service_name="my-agent", # OTEL service name + trainable_keys={"planner"}, # None = all trainable + llm=my_llm_client, # Any OpenAI-compatible client + initial_templates={ # Starting prompt templates + "planner_prompt": "Plan for: {query}", + }, + emit_genai_child_spans=True, # Agent Lightning gen_ai.* child spans + bindings=None, # Auto-derived from templates if None + in_place=False, # Don't permanently mutate original graph + provider_name="openai", # For gen_ai.provider.name attribute +) -> InstrumentedGraph +``` + +**Returns** an `InstrumentedGraph` with `.invoke()`, `.session`, `.tracing_llm`, `.templates`, and `.bindings`. + +### `optimize_graph()` + +Runs the optimization loop on an instrumented graph. + +```python +from opto.trace.io import optimize_graph, EvalResult + +result = optimize_graph( + graph=ig, # InstrumentedGraph from instrument_graph() + queries=["q1", "q2"], # List of queries or state dicts + iterations=5, # Optimization iterations (after baseline) + optimizer=None, # Auto-creates OptoPrime if None + eval_fn=my_eval_fn, # float | str | dict | EvalResult -> normalized + apply_updates_flag=True, # Apply optimizer suggestions via bindings + on_iteration=my_callback, # (iter, runs, updates) progress callback +) -> OptimizationResult +``` + +### `EvalResult` + +```python +@dataclass +class EvalResult: + score: float | None = None # Numeric reward + feedback: str = "" # Textual feedback (Trace/TextGrad-compatible) + metrics: dict = {} # Free-form metrics +``` + +The `EvalFn` type accepts any of these return types and auto-normalizes: + +| Return type | Conversion | +|-------------|------------| +| `float` / `int` | `EvalResult(score=value)` | +| `str` | Tries JSON parse, falls back to `EvalResult(feedback=value)` | +| `dict` | `EvalResult(score=d["score"], feedback=d["feedback"])` | +| `EvalResult` | Passed through | + +### `OptimizationResult` + +```python +@dataclass +class OptimizationResult: + baseline_score: float # Average score of the baseline run + best_score: float # Best average score across iterations + best_iteration: int # Which iteration achieved best_score + best_updates: dict # The parameter updates that achieved best + final_parameters: dict # Current values of all bound parameters + score_history: list[float] # Average score per iteration [baseline, iter1, ...] + all_runs: list[list[RunResult]] # Nested: all_runs[iteration][query_idx] +``` + +### `Binding` and `apply_updates()` + +Bindings decouple the optimizer's string-keyed updates from the runtime location of the actual variable. This makes optimization generic -- no hard-coded node names. + +```python +from opto.trace.io import Binding, apply_updates, make_dict_binding + +# Binding wraps any get/set pair +binding = Binding( + get=lambda: my_config["prompt"], + set=lambda v: my_config.__setitem__("prompt", v), + kind="prompt", # "prompt" | "code" | "graph" +) + +# Convenience: bind to a dict entry +binding = make_dict_binding(my_dict, "key_name", kind="prompt") + +# Apply optimizer output +apply_updates( + {"prompt_key": "new value"}, + {"prompt_key": binding}, + strict=True, # raise KeyError on unknown keys +) +``` + +**Binding kinds:** + +| Kind | Description | Example | +|------|-------------|---------| +| `"prompt"` | Text template / system prompt | `"Plan for: {query}"` | +| `"code"` | Function source code (via `param.__code_*`) | `"def route(state): ..."` | +| `"graph"` | Graph routing knob | `"param.route_threshold"` | + +**How bindings are created:** + +1. **Auto-derived** (default): When `bindings=None` and `initial_templates` is provided, `instrument_graph()` creates one `Binding` per template key, backed by the `templates` dict. +2. **Explicit**: Pass `bindings={"key": Binding(get=..., set=...)}` for custom targets (e.g., class attributes, database rows, config files). + +### Span Helpers + +```python +from opto.trace.io import emit_reward, emit_trace + +# Emit a reward span (Agent Lightning compatible) +emit_reward(session, value=0.85, name="eval_score") + +# Emit a custom debug span +emit_trace(session, name="my_debug_span", attrs={"key": "value"}) +``` + +--- + +## 4. Data Flow Pipeline + +The end-to-end pipeline executed by `optimize_graph()` per iteration: + +``` + +---------+ +----------+ +-----------+ +-----------+ + | invoke()|---->| flush |---->| OTLP->TGJ |---->| ingest | + | LangGraph| | _otlp() | | adapter | | _tgj() | + +---------+ +----------+ +-----------+ +-----+-----+ + | + v + +---------+ +----------+ +-----------+ +-----------+ + | apply |<----| optimizer|<----| backward() |<----| Parameter | + |_updates()| | .step() | | feedback | | Node + | + +----+----+ +----------+ +-----------+ | Message | + | | Node | + v +-----------+ + +---------+ + |templates| <- updated via Binding.set() + | dict | -> next invoke() uses new prompts + +---------+ +``` + +### Step-by-step + +1. **`invoke()`** -- Execute the LangGraph. Each node calls `TracingLLM.node_call()` which creates OTEL spans with `param.*` attributes. +2. **`flush_otlp()`** -- Extract all collected spans from the `InMemorySpanExporter` as an OTLP JSON payload and clear the exporter. +3. **`eval_fn()`** -- Evaluate the graph output. The `EvalFn` signature accepts `float | str | dict | EvalResult` and auto-normalizes. +4. **OTLP to TGJ** -- `otlp_traces_to_trace_json()` converts OTLP spans into Trace-Graph JSON format with temporal hierarchy. +5. **`ingest_tgj()`** -- Parse TGJ into `ParameterNode` (trainable prompts) and `MessageNode` (span outputs) objects. +6. **`backward()`** -- Propagate evaluation feedback through the trace graph to trainable parameters. +7. **`optimizer.step()`** -- The optimizer (e.g., `OptoPrime`) suggests parameter updates based on the feedback. +8. **`apply_updates()`** -- Push the optimizer's output through `Binding.set()` to update live template values. +9. **Next iteration** -- The updated templates are automatically used by `TracingLLM.node_call()` on the next `invoke()`. + +--- + +## 5. Semantic Conventions + +`TracingLLM` implements **dual semantic conventions** -- a single LLM call emits two spans: + +``` ++--------------------------------------------------+ +| Parent span: "planner" | +| | +| param.planner_prompt = "Plan for: {query}" | <- Trace/TGJ optimization +| param.planner_prompt.trainable = true | +| inputs.gen_ai.prompt = "Plan for: cats" | +| gen_ai.model = "llama-3.1-8b" | +| | +| +--------------------------------------------+ | +| | Child span: "openai.chat.completion" | | +| | | | +| | gen_ai.operation.name = "chat" | | <- Agent Lightning observability +| | gen_ai.provider.name = "openai" | | +| | gen_ai.request.model = "llama-3.1-8b" | | +| | gen_ai.output.preview = "Step 1: ..." | | +| | trace.temporal_ignore = "true" | | <- prevents TGJ chain break +| +--------------------------------------------+ | ++--------------------------------------------------+ +``` + +### Attribute Reference + +| Attribute | Purpose | Span Level | Consumed By | +|-----------|---------|------------|-------------| +| `param.*` | Trainable parameter values | Parent | Optimizer (via TGJ `ParameterNode`) | +| `param.*.trainable` | Whether the parameter is optimizable | Parent | TGJ adapter | +| `inputs.*` | Input signals to the node | Parent | TGJ `MessageNode` edges | +| `gen_ai.operation.name` | LLM operation type (e.g., `"chat"`) | Child | Agent Lightning dashboards | +| `gen_ai.provider.name` | LLM provider (e.g., `"openai"`, `"openrouter"`) | Child | Agent Lightning dashboards | +| `gen_ai.request.model` | Model identifier | Child | Agent Lightning dashboards | +| `gen_ai.input.messages` | JSON array of input messages | Child | Agent Lightning dashboards | +| `gen_ai.output.messages` | JSON array of response messages | Child | Agent Lightning dashboards | +| `trace.temporal_ignore` | Exclude from TGJ temporal chain (`"true"`) | Child | `otel_adapter.py` | +| `agentlightning.reward.0.name` | Evaluation reward name | Reward span | Agent Lightning | +| `agentlightning.reward.0.value` | Stringified numeric reward (e.g., `"0.933"`) | Reward span | Agent Lightning | + +### OTEL Span Types + +**Node spans** (one per node execution): +- `param.{template_name}` -- prompt template text (if node has a trainable template) +- `param.{template_name}.trainable` -- `"True"` or `"False"` +- `inputs.gen_ai.prompt` -- user-facing input snippet +- `gen_ai.model` -- model identifier + +**LLM spans** (child of node span): +- `gen_ai.operation.name`, `gen_ai.provider.name`, `gen_ai.request.model` +- `gen_ai.input.messages`, `gen_ai.output.messages` +- `trace.temporal_ignore` = `"true"` + +**Evaluation / reward spans** (Agent Lightning compatibility): +- Span name: `agentlightning.annotation` +- `trace.temporal_ignore` = `"true"` +- `agentlightning.reward.0.name`, `agentlightning.reward.0.value` + +### `message.id` + +Each span is assigned a unique `message.id` (span ID) used by the TGJ adapter to reconstruct parent-child and temporal edges in the trace graph. The `traceId` groups all spans from a single `invoke()` call. + +--- + +## 6. Temporal Chaining + +When `use_temporal_hierarchy=True`, the OTLP-to-TGJ adapter creates parent-child edges between sequential top-level spans. This enables the optimizer to propagate feedback **backward** through the full execution chain. + +### The Critical Invariant + +Child spans (those with a `parentSpanId` in OTEL) must **not** advance the temporal chain. Without this rule, a child LLM span from node A could become the temporal parent of node B, breaking sequential optimization. + +``` + OTEL spans (time order) TGJ temporal chain + ----------------------- ------------------ + planner (root) --------> planner + +- openai.chat (child) (skipped -- has parentSpanId) + synthesizer (root) --------> synthesizer (parent = planner) + +- openai.chat (child) (skipped) +``` + +The adapter achieves this with a simple check: + +```python +# Only advance the temporal chain on spans that were NOT children in OTEL +if not orig_has_parent: + prev_span_id = sid +``` + +Child spans carry `trace.temporal_ignore = "true"` as an additional signal for downstream consumers. + +### Without vs. With temporal_ignore + +``` +Without temporal_ignore: + planner -> openrouter.chat.completion -> researcher (WRONG) + +With temporal_ignore: + planner -> researcher (CORRECT -- child span excluded from chain) +``` + +--- + +## 7. Core Modules + +### `opto/trace/io/` + +| File | Lines | Purpose | +|------|-------|---------| +| `__init__.py` | 82 | Public API surface -- exports all symbols | +| `instrumentation.py` | 138 | `instrument_graph()` + `InstrumentedGraph` dataclass | +| `optimization.py` | 412 | `optimize_graph()` loop + `EvalResult`, `EvalFn`, `RunResult`, `OptimizationResult` | +| `telemetry_session.py` | 188 | `TelemetrySession` -- unified OTEL session manager | +| `bindings.py` | 105 | `Binding` dataclass + `apply_updates()` + `make_dict_binding()` | +| `otel_semconv.py` | 126 | `emit_reward()`, `emit_trace()`, `record_genai_chat()`, `set_span_attributes()` | +| `langgraph_otel_runtime.py` | 367 | `TracingLLM` (dual semconv), `InMemorySpanExporter`, `flush_otlp()` | +| `otel_adapter.py` | 168 | `otlp_traces_to_trace_json()` -- OTLP to TGJ with temporal hierarchy | +| `tgj_ingest.py` | 234 | `ingest_tgj()`, `merge_tgj()` -- TGJ to `ParameterNode`/`MessageNode` | +| `tgj_export.py` | -- | Export Trace subgraphs back to TGJ (pre-existing) | +| `eval_hooks.py` | -- | Evaluation hook utilities (pre-existing) | + +### Tests + +| File | Tests | Scope | +|------|-------|-------| +| `tests/unit_tests/test_bindings.py` | 10 | `Binding`, `apply_updates()`, `make_dict_binding()` | +| `tests/unit_tests/test_otel_semconv.py` | 5 | `emit_reward()`, `emit_trace()`, `record_genai_chat()` | +| `tests/unit_tests/test_telemetry_session.py` | 6 | `TelemetrySession` flush, clear, filter, export | +| `tests/unit_tests/test_instrumentation.py` | 10 | `instrument_graph()`, `TracingLLM` child spans, temporal chaining | +| `tests/unit_tests/test_optimization.py` | 11 | `EvalResult`, `_normalise_eval()`, data classes | +| `tests/features_tests/test_e2e_m1_pipeline.py` | 21 | Full E2E: instrument, invoke, OTLP, TGJ, optimizer, apply_updates | diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb new file mode 100644 index 00000000..6196e441 --- /dev/null +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb @@ -0,0 +1,2020 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Instrument & Optimize a LangGraph Agent\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/doxav/NewTrace/blob/pr1-graph-optimization/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb)\n", + "This notebook demonstrates the **Usage of OTEL tracing and LangGraph optimization**:\n", + "drop-in OTEL instrumentation and end-to-end optimization for any LangGraph agent.\n", + "\n", + "## What this notebook proves\n", + "\n", + "| Gate | Verified |\n", + "|------|----------|\n", + "| `instrument_graph()` wraps a LangGraph with OTEL tracing | Section 4 |\n", + "| `param.*` + `param.*.trainable` attributes on spans | Section 5 |\n", + "| OTLP → TGJ → `ParameterNode` + `MessageNode` | Section 6 |\n", + "| Child spans do NOT break temporal chaining | Section 6 |\n", + "| `apply_updates()` changes prompt templates via bindings | Section 7 |\n", + "| `optimize_graph()` full loop (StubLLM — deterministic) | Section 8 |\n", + "| `optimize_graph()` live provider (OpenRouter, guarded) | Section 9 |\n", + "\n", + "## Modes\n", + "\n", + "- **StubLLM mode** (Sections 4-8): runs without any API keys — deterministic, CI-safe.\n", + "- **Live LLM mode** (Section 9): requires `OPENROUTER_API_KEY` via Colab Secrets or `.env`.\n", + "\n", + "## Table of Contents\n", + "\n", + "1. [Install Dependencies](#1-install-dependencies)\n", + "2. [Configuration](#2-configuration)\n", + "3. [Define a Minimal LangGraph](#3-define-a-minimal-langgraph)\n", + "4. [Instrument the Graph (StubLLM)](#4-instrument-the-graph-stubllm)\n", + "5. [Inspect OTLP Spans & param.* Attributes](#5-inspect-otlp-spans--param-attributes)\n", + "6. [OTLP → TGJ → Trace Nodes](#6-otlp--tgj--trace-nodes)\n", + "7. [Bindings & apply_updates()](#7-bindings--apply_updates)\n", + "8. [optimize_graph() — StubLLM End-to-End](#8-optimize_graph--stubllm-end-to-end)\n", + "9. [Live LLM Mode (OpenRouter)](#9-live-llm-mode-openrouter)\n", + "10. [Save Artifacts](#10-save-artifacts)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 1. Install Dependencies\n", + "\n", + "Run this cell once to install all required packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:58:37.234100Z", + "iopub.status.busy": "2026-02-12T07:58:37.233113Z", + "iopub.status.idle": "2026-02-12T07:58:48.042859Z", + "shell.execute_reply": "2026-02-12T07:58:48.039301Z" + } + }, + "outputs": [], + "source": [ + "!pip install -q langgraph>=1.0.0 opentelemetry-api>=1.38.0 opentelemetry-sdk>=1.38.0 \\\n", + " python-dotenv>=1.0.0 requests>=2.28.0 typing_extensions>=4.0.0 graphviz>=0.20.1\n", + "\n", + "# Install OpenTrace (the project itself) in editable mode\n", + "# If running on Colab, install from the repo and checkout OPENTRACE_REF\n", + "import os\n", + "try:\n", + " import google.colab # noqa: F401\n", + " IN_COLAB = True\n", + "\n", + " OPENTRACE_FOLDER = \"NewTrace\"\n", + " OPENTRACE_REPO = f\"https://github.com/doxav/{OPENTRACE_FOLDER}.git\"\n", + " OPENTRACE_REF = os.environ.get(\"OPENTRACE_REF\", \"pr1-graph-optimization\")\n", + "\n", + " if not os.path.exists(f\"/content/{OPENTRACE_FOLDER}\"):\n", + " !git clone {OPENTRACE_REPO} /content/{OPENTRACE_FOLDER}\n", + " !git -C /content/{OPENTRACE_FOLDER} checkout {OPENTRACE_REF}\n", + "\n", + " # Colab runs Python 3.12 — patch setup.py if it requires >=3.13\n", + " %cd /content/{OPENTRACE_FOLDER}\n", + " %alias sed sed\n", + " %sed -i 's/python_requires=\">=3.13\"/python_requires=\">=3.12\"/' setup.py\n", + "\n", + " !pip install -q -e /content/{OPENTRACE_FOLDER}\n", + " print(f\"[INFO] OpenTrace ref: {OPENTRACE_REF}\")\n", + "except ImportError:\n", + " IN_COLAB = False\n", + " # Assume local dev: project already installed via pip install -e .\n", + "\n", + "print(\"\\n\" + \"=\" * 50)\n", + "print(\"All dependencies installed!\")\n", + "print(\"=\" * 50)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Persistent output (Colab):** When running on Colab the next cell mounts\n", + "Google Drive so artifacts survive session restarts. Locally they go into\n", + "`./notebook_outputs/`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:58:48.234683Z", + "iopub.status.busy": "2026-02-12T07:58:48.233679Z", + "iopub.status.idle": "2026-02-12T07:58:48.254178Z", + "shell.execute_reply": "2026-02-12T07:58:48.252166Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Run folder (local): /home/user/code/Trace/examples/notebooks/notebook_outputs/m1\n" + ] + } + ], + "source": [ + "import os\n", + "from datetime import datetime\n", + "\n", + "RUN_FOLDER = None\n", + "try:\n", + " import google.colab\n", + " from google.colab import drive\n", + " drive.mount(\"/content/drive\", force_remount=False)\n", + " OPENTRACE_REF = os.environ.get(\"OPENTRACE_REF\", \"main\")\n", + " base = f\"/content/drive/MyDrive/OpenTrace_runs/M1/{OPENTRACE_REF}\"\n", + " os.makedirs(base, exist_ok=True)\n", + " RUN_FOLDER = os.path.join(base, f\"run_{datetime.now().strftime('%Y%m%d_%H%M%S')}\")\n", + " os.makedirs(RUN_FOLDER, exist_ok=True)\n", + " print(f\"Run folder (Google Drive, OpenTrace): {RUN_FOLDER}\")\n", + "except Exception:\n", + " RUN_FOLDER = os.path.abspath(os.path.join(os.getcwd(), \"notebook_outputs\", \"m1\"))\n", + " os.makedirs(RUN_FOLDER, exist_ok=True)\n", + " print(f\"Run folder (local): {RUN_FOLDER}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 2. Configuration\n", + "\n", + "API keys are retrieved **automatically** — never paste keys into cells:\n", + "\n", + "| Priority | Source | How to set |\n", + "|----------|--------|------------|\n", + "| 1 | **Colab Secrets** | Click the key icon → add `OPENROUTER_API_KEY` |\n", + "| 2 | **Environment variable** | `export OPENROUTER_API_KEY=sk-or-v1-...` |\n", + "| 3 | **`.env` file** | `OPENROUTER_API_KEY=sk-or-v1-...` in project root |\n", + "\n", + "Sections 4-8 use **StubLLM** (no key needed). Section 9 uses a live\n", + "provider and is skipped automatically when no key is available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:58:48.269399Z", + "iopub.status.busy": "2026-02-12T07:58:48.268397Z", + "iopub.status.idle": "2026-02-12T07:58:48.324887Z", + "shell.execute_reply": "2026-02-12T07:58:48.321207Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] API key loaded from environment variable.\n", + "\n", + "API key: [SET]\n", + "Model: google/gemma-4-26b-a4b-it:free\n", + "Budget: max_tokens=256, temperature=0\n" + ] + } + ], + "source": [ + "from __future__ import annotations\n", + "import os, json\n", + "\n", + "# Model config (free tier on OpenRouter)\n", + "OPENROUTER_MODEL = os.environ.get(\"OPENROUTER_MODEL\", \"nvidia/nemotron-3-super-120b-a12b:free\")\n", + "OPENROUTER_BASE_URL = \"https://openrouter.ai/api/v1\"\n", + "\n", + "# Budget guard for live mode\n", + "MAX_TOKENS_PER_CALL = 256\n", + "LIVE_TEMPERATURE = 0 # deterministic\n", + "\n", + "# ---------- key retrieval (Colab Secrets → env → .env file) ----------\n", + "OPENROUTER_API_KEY = \"\"\n", + "\n", + "try:\n", + " from google.colab import userdata\n", + " OPENROUTER_API_KEY = userdata.get(\"OPENROUTER_API_KEY\") or \"\"\n", + " if OPENROUTER_API_KEY:\n", + " print(\"[INFO] API key loaded from Colab Secrets.\")\n", + "except (ImportError, ModuleNotFoundError):\n", + " pass\n", + "\n", + "if not OPENROUTER_API_KEY:\n", + " OPENROUTER_API_KEY = os.environ.get(\"OPENROUTER_API_KEY\", \"\")\n", + " if OPENROUTER_API_KEY:\n", + " print(\"[INFO] API key loaded from environment variable.\")\n", + "\n", + "if not OPENROUTER_API_KEY:\n", + " try:\n", + " from dotenv import load_dotenv\n", + " load_dotenv()\n", + " OPENROUTER_API_KEY = os.environ.get(\"OPENROUTER_API_KEY\", \"\")\n", + " if OPENROUTER_API_KEY:\n", + " print(\"[INFO] API key loaded from .env file.\")\n", + " except ImportError:\n", + " pass\n", + "\n", + "HAS_API_KEY = bool(OPENROUTER_API_KEY)\n", + "os.environ[\"OPENROUTER_API_KEY\"] = OPENROUTER_API_KEY\n", + "\n", + "print(f\"\\nAPI key: {'[SET]' if HAS_API_KEY else '[NOT SET — live mode will be skipped]'}\")\n", + "print(f\"Model: {OPENROUTER_MODEL}\")\n", + "print(f\"Budget: max_tokens={MAX_TOKENS_PER_CALL}, temperature={LIVE_TEMPERATURE}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 3. Define a Minimal LangGraph\n", + "\n", + "A simple **planner → synthesizer** pipeline. Node functions close over\n", + "`tracing_llm` and `templates` so that `apply_updates()` propagates to\n", + "the next invocation automatically." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:58:48.337340Z", + "iopub.status.busy": "2026-02-12T07:58:48.336340Z", + "iopub.status.idle": "2026-02-12T07:58:55.612322Z", + "shell.execute_reply": "2026-02-12T07:58:55.609666Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph builder defined.\n", + " Nodes: planner, executor, web_researcher, wikidata_researcher, synthesizer, evaluator\n", + " DEMO_QUERIES: 3 queries\n" + ] + } + ], + "source": [ + "from typing import Any, Dict, List, Optional\n", + "from typing_extensions import TypedDict, Literal\n", + "from langgraph.graph import StateGraph, START, END\n", + "from langgraph.types import Command\n", + "\n", + "# Keep the notebook aligned with JSON_OTEL_trace_optim_demo_LANGGRAPH.py\n", + "DEMO_QUERIES = [\n", + " \"Summarize the causes and key events of the French Revolution.\",\n", + " \"Give 3 factual relationships about Tesla, Inc. with entity IDs.\",\n", + " \"What is the Wikidata ID for CRISPR and list 2 related entities?\",\n", + "]\n", + "\n", + "class AgentState(TypedDict, total=False):\n", + " query: str\n", + " plan: Dict[str, Any]\n", + " current_step: int\n", + " contexts: List[str]\n", + " agent_query: str\n", + " final_answer: str\n", + " eval_score: float\n", + " eval_feedback: str\n", + "\n", + "def wikipedia_search(query: str) -> str:\n", + " \"\"\"Wikipedia tool. Falls back gracefully if wikipedia package/network is unavailable.\"\"\"\n", + " try:\n", + " import wikipedia\n", + " wikipedia.set_lang(\"en\")\n", + " hits = wikipedia.search(query, results=2)\n", + " out = []\n", + " for h in hits:\n", + " try:\n", + " s = wikipedia.summary(h, sentences=3, auto_suggest=False, redirect=True)\n", + " out.append(f\"### {h}\\n{s}\")\n", + " except Exception:\n", + " continue\n", + " return \"\\n\\n\".join(out) or \"No Wikipedia results.\"\n", + " except Exception:\n", + " return \"Wikipedia search unavailable.\"\n", + "\n", + "def wikidata_search(query: str) -> str:\n", + " \"\"\"Wikidata search tool (wbsearchentities).\"\"\"\n", + " import requests\n", + " try:\n", + " r = requests.get(\n", + " \"https://www.wikidata.org/w/api.php\",\n", + " params={\n", + " \"action\": \"wbsearchentities\",\n", + " \"format\": \"json\",\n", + " \"language\": \"en\",\n", + " \"search\": query[:100],\n", + " \"limit\": 5,\n", + " },\n", + " timeout=10,\n", + " )\n", + " r.raise_for_status()\n", + " data = r.json()\n", + " results = [\n", + " f\"- {item.get('label','')}: {item.get('description','')} ({item.get('id','')})\"\n", + " for item in data.get(\"search\", [])\n", + " ]\n", + " return \"\\n\".join(results) if results else \"No Wikidata entities found.\"\n", + " except Exception:\n", + " return f\"Wikidata search unavailable. Query: {query[:50]}...\"\n", + "\n", + "def build_graph(tracing_llm, templates: Dict[str, str]):\n", + " \"\"\"\n", + " Build a multi-node LangGraph aligned with JSON_OTEL_trace_optim_demo_LANGGRAPH.py:\n", + " planner -> executor -> (web_researcher|wikidata_researcher|synthesizer) -> evaluator\n", + " \"\"\"\n", + "\n", + " def planner_node(state: AgentState) -> Command[Literal[\"executor\"]]:\n", + " template = templates.get(\n", + " \"planner_prompt\",\n", + " \"Return JSON plan with steps for query: {query}. Use agents: web_researcher, wikidata_researcher, synthesizer.\",\n", + " )\n", + " prompt = template.replace(\"{query}\", state.get(\"query\", \"\"))\n", + "\n", + " raw = tracing_llm.node_call(\n", + " span_name=\"planner\",\n", + " template_name=\"planner_prompt\",\n", + " template=template,\n", + " optimizable_key=\"planner\",\n", + " user_query=state.get(\"query\", \"\"),\n", + " extra_inputs={\"user_query\": state.get(\"query\", \"\")},\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"Return JSON only. Keys: 1,2,... each step has {agent,action,goal,query}.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " max_tokens=400,\n", + " temperature=0,\n", + " )\n", + " plan: Dict[str, Any]\n", + " try:\n", + " import json\n", + " plan = json.loads(raw)\n", + " except Exception:\n", + " q = (state.get(\"query\", \"\") or \"\").lower()\n", + " plan = {\n", + " \"1\": {\"agent\": \"web_researcher\", \"action\": \"search\", \"goal\": \"collect context\", \"query\": state.get(\"query\", \"\")},\n", + " \"2\": {\"agent\": \"wikidata_researcher\" if (\"wikidata\" in q or \"entity id\" in q or \"id\" in q) else \"synthesizer\",\n", + " \"action\": \"search\" if (\"wikidata\" in q or \"entity id\" in q or \"id\" in q) else \"answer\",\n", + " \"goal\": \"entities or final answer\", \"query\": state.get(\"query\", \"\")},\n", + " \"3\": {\"agent\": \"synthesizer\", \"action\": \"answer\", \"goal\": \"final answer\", \"query\": state.get(\"query\", \"\")},\n", + " }\n", + "\n", + " return Command(update={\"plan\": plan, \"current_step\": 1, \"contexts\": []}, goto=\"executor\")\n", + "\n", + " def executor_node(state: AgentState) -> Command[Literal[\"web_researcher\", \"wikidata_researcher\", \"synthesizer\"]]:\n", + " step = int(state.get(\"current_step\", 1) or 1)\n", + " plan = state.get(\"plan\", {}) or {}\n", + " plan_step = plan.get(str(step), {})\n", + " if not plan_step:\n", + " return Command(update={}, goto=\"synthesizer\")\n", + "\n", + " template = templates.get(\n", + " \"executor_prompt\",\n", + " \"Given step {step} of plan: {plan_step}\\nFor query: {query}\\nReturn JSON: {goto,query}. goto in [web_researcher,wikidata_researcher,synthesizer].\",\n", + " )\n", + " prompt = (\n", + " template.replace(\"{step}\", str(step))\n", + " .replace(\"{plan_step}\", str(plan_step))\n", + " .replace(\"{query}\", state.get(\"query\", \"\"))\n", + " )\n", + "\n", + " raw = tracing_llm.node_call(\n", + " span_name=\"executor\",\n", + " template_name=\"executor_prompt\",\n", + " template=template,\n", + " optimizable_key=\"executor\",\n", + " user_query=state.get(\"query\", \"\"),\n", + " extra_inputs={\"step\": str(step), \"user_query\": state.get(\"query\", \"\")},\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"Return JSON only with keys goto and query.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " max_tokens=200,\n", + " temperature=0,\n", + " )\n", + "\n", + " goto = str(plan_step.get(\"agent\", \"synthesizer\"))\n", + " q2 = str(plan_step.get(\"query\", state.get(\"query\", \"\")))\n", + " try:\n", + " import json\n", + " d = json.loads(raw)\n", + " goto = str(d.get(\"goto\", goto))\n", + " q2 = str(d.get(\"query\", q2))\n", + " except Exception:\n", + " pass\n", + "\n", + " if goto not in (\"web_researcher\", \"wikidata_researcher\", \"synthesizer\"):\n", + " goto = \"synthesizer\"\n", + "\n", + " return Command(update={\"agent_query\": q2}, goto=goto)\n", + "\n", + " def web_researcher_node(state: AgentState) -> Command[Literal[\"executor\"]]:\n", + " q = state.get(\"agent_query\", state.get(\"query\", \"\"))\n", + " with tracing_llm.tracer.start_as_current_span(\"web_researcher\") as sp:\n", + " sp.set_attribute(\"inputs.user_query\", state.get(\"query\", \"\"))\n", + " sp.set_attribute(\"inputs.agent_query\", q)\n", + " ctx = wikipedia_search(q)\n", + " sp.set_attribute(\"outputs.context.preview\", (ctx or \"\")[:500])\n", + " contexts = list(state.get(\"contexts\", []) or [])\n", + " contexts.append(ctx)\n", + " step = int(state.get(\"current_step\", 1) or 1) + 1\n", + " return Command(update={\"contexts\": contexts, \"current_step\": step}, goto=\"executor\")\n", + "\n", + " def wikidata_researcher_node(state: AgentState) -> Command[Literal[\"executor\"]]:\n", + " q = state.get(\"agent_query\", state.get(\"query\", \"\"))\n", + " with tracing_llm.tracer.start_as_current_span(\"wikidata_researcher\") as sp:\n", + " sp.set_attribute(\"inputs.user_query\", state.get(\"query\", \"\"))\n", + " sp.set_attribute(\"inputs.agent_query\", q)\n", + " ctx = wikidata_search(q)\n", + " sp.set_attribute(\"outputs.context.preview\", (ctx or \"\")[:500])\n", + " contexts = list(state.get(\"contexts\", []) or [])\n", + " contexts.append(ctx)\n", + " step = int(state.get(\"current_step\", 1) or 1) + 1\n", + " return Command(update={\"contexts\": contexts, \"current_step\": step}, goto=\"executor\")\n", + "\n", + " def synthesizer_node(state: AgentState) -> Command[Literal[\"evaluator\"]]:\n", + " template = templates.get(\n", + " \"synthesizer_prompt\",\n", + " \"Answer the query: {query}\\nContext:\\n{contexts}\\nIf asked for IDs, include them. Be factual.\",\n", + " )\n", + " contexts = \"\\n\\n\".join(state.get(\"contexts\", []) or [])\n", + " prompt = template.replace(\"{query}\", state.get(\"query\", \"\")).replace(\"{contexts}\", contexts[:4000])\n", + "\n", + " ans = tracing_llm.node_call(\n", + " span_name=\"synthesizer\",\n", + " template_name=\"synthesizer_prompt\",\n", + " template=template,\n", + " optimizable_key=\"synthesizer\",\n", + " user_query=state.get(\"query\", \"\"),\n", + " extra_inputs={\"user_query\": state.get(\"query\", \"\")},\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a careful assistant.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " max_tokens=500,\n", + " temperature=0,\n", + " )\n", + " return Command(update={\"final_answer\": ans}, goto=\"evaluator\")\n", + "\n", + " def evaluator_node(state: AgentState) -> Command[Literal[\"__end__\"]]:\n", + " import re\n", + " q = (state.get(\"query\", \"\") or \"\").lower()\n", + " ans = (state.get(\"final_answer\", \"\") or \"\")\n", + " ctx = \"\\n\".join(state.get(\"contexts\", []) or \"\")\n", + " wants_ids = (\"wikidata\" in q) or (\"entity id\" in q) or (\"id\" in q and \"tesla\" in q)\n", + " has_qid = bool(re.search(r\"\\bQ\\d{2,}\\b\", ans))\n", + " uses_ctx = len(ctx.strip()) > 0\n", + " score = 0.25\n", + " reasons = []\n", + " if uses_ctx:\n", + " score += 0.25\n", + " reasons.append(\"uses_context\")\n", + " if wants_ids and has_qid:\n", + " score += 0.35\n", + " reasons.append(\"has_qids\")\n", + " if \"french revolution\" in q and len(ans) > 200:\n", + " score += 0.10\n", + " reasons.append(\"sufficient_detail\")\n", + " score = min(score, 0.95)\n", + " feedback = {\"score\": score, \"reasons\": reasons}\n", + " with tracing_llm.tracer.start_as_current_span(\"evaluator\") as sp:\n", + " sp.set_attribute(\"eval.score\", str(score))\n", + " sp.set_attribute(\"eval.reasons\", \",\".join(reasons))\n", + " return Command(update={\"eval_score\": score, \"eval_feedback\": str(feedback)}, goto=END)\n", + "\n", + " workflow = StateGraph(AgentState)\n", + " workflow.add_node(\"planner\", planner_node)\n", + " workflow.add_node(\"executor\", executor_node)\n", + " workflow.add_node(\"web_researcher\", web_researcher_node)\n", + " workflow.add_node(\"wikidata_researcher\", wikidata_researcher_node)\n", + " workflow.add_node(\"synthesizer\", synthesizer_node)\n", + " workflow.add_node(\"evaluator\", evaluator_node)\n", + "\n", + " workflow.add_edge(START, \"planner\")\n", + " workflow.add_edge(\"synthesizer\", \"evaluator\")\n", + "\n", + " return workflow.compile()\n", + "\n", + "print(\"Graph builder defined.\")\n", + "print(f\" Nodes: planner, executor, web_researcher, wikidata_researcher, synthesizer, evaluator\")\n", + "print(f\" DEMO_QUERIES: {len(DEMO_QUERIES)} queries\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### StubLLM\n", + "\n", + "A deterministic LLM that returns canned responses (no API calls)." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:58:55.622865Z", + "iopub.status.busy": "2026-02-12T07:58:55.621865Z", + "iopub.status.idle": "2026-02-12T07:58:55.641281Z", + "shell.execute_reply": "2026-02-12T07:58:55.639271Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "StubLLM defined (optimization-signal-sensitive). call_count=0\n" + ] + } + ], + "source": [ + "class StubLLM:\n", + " \"\"\"Deterministic LLM stub for the multi-node graph.\n", + "\n", + " Produces JSON plans for planner, routing JSON for executor,\n", + " and text answers for synthesizer. Response quality depends\n", + " on whether the prompt template has been optimized (contains\n", + " keywords like \"step-by-step\" or \"thorough\").\n", + " \"\"\"\n", + " model = \"stub-llm\"\n", + "\n", + " def __init__(self):\n", + " self.call_count = 0\n", + "\n", + " @staticmethod\n", + " def _is_optimized(text: str) -> bool:\n", + " \"\"\"Check if the prompt text contains optimization-signal keywords.\"\"\"\n", + " t = text.lower()\n", + " return any(kw in t for kw in [\"step-by-step\", \"thorough\", \"detailed plan\", \"comprehensive\"])\n", + "\n", + " @staticmethod\n", + " def _query_wants_wikidata(query_part: str) -> bool:\n", + " \"\"\"Check the actual user query (not template) for Wikidata indicators.\"\"\"\n", + " q = query_part.lower()\n", + " return (\"wikidata id\" in q or \"entity id\" in q\n", + " or (\"id\" in q and \"tesla\" in q)\n", + " or \"related entities\" in q)\n", + "\n", + " def __call__(self, messages=None, **kwargs):\n", + " self.call_count += 1\n", + " import json as _json\n", + "\n", + " content = f\"Stub response #{self.call_count}\"\n", + " if not messages:\n", + " return self._wrap(content)\n", + "\n", + " user_text = \"\"\n", + " system_text = \"\"\n", + " for m in messages:\n", + " if m.get(\"role\") == \"user\":\n", + " user_text = (m.get(\"content\") or \"\").lower()\n", + " elif m.get(\"role\") == \"system\":\n", + " system_text = (m.get(\"content\") or \"\").lower()\n", + "\n", + " optimized = self._is_optimized(user_text)\n", + "\n", + " if \"return json only\" in system_text and \"step\" in system_text:\n", + " # Planner node\n", + " wants_wd = self._query_wants_wikidata(user_text)\n", + "\n", + " if optimized:\n", + " # Richer plan: 3-4 steps with detailed goals\n", + " steps = {\n", + " \"1\": {\"agent\": \"web_researcher\", \"action\": \"deep_search\",\n", + " \"goal\": \"gather comprehensive background\", \"query\": user_text[:60]},\n", + " \"2\": {\"agent\": \"web_researcher\", \"action\": \"verify\",\n", + " \"goal\": \"cross-reference facts\", \"query\": user_text[:60]},\n", + " \"3\": {\"agent\": \"synthesizer\", \"action\": \"compose\",\n", + " \"goal\": \"write detailed answer with evidence\", \"query\": user_text[:60]},\n", + " }\n", + " if wants_wd:\n", + " steps[\"2\"] = {\"agent\": \"wikidata_researcher\", \"action\": \"entity_lookup\",\n", + " \"goal\": \"retrieve entity IDs and relationships\", \"query\": user_text[:60]}\n", + " steps[\"3\"] = {\"agent\": \"web_researcher\", \"action\": \"verify\",\n", + " \"goal\": \"cross-reference entity data\", \"query\": user_text[:60]}\n", + " steps[\"4\"] = {\"agent\": \"synthesizer\", \"action\": \"compose\",\n", + " \"goal\": \"structured answer with IDs\", \"query\": user_text[:60]}\n", + " else:\n", + " # Default plan: 2 steps (baseline)\n", + " steps = {\n", + " \"1\": {\"agent\": \"web_researcher\", \"action\": \"search\",\n", + " \"goal\": \"collect context\", \"query\": user_text[:60]},\n", + " \"2\": {\"agent\": \"synthesizer\", \"action\": \"answer\",\n", + " \"goal\": \"final answer\", \"query\": user_text[:60]},\n", + " }\n", + " content = _json.dumps(steps)\n", + "\n", + " elif \"return json only\" in system_text and \"goto\" in system_text:\n", + " # Executor node\n", + " content = _json.dumps({\"goto\": \"synthesizer\", \"query\": user_text[:80]})\n", + "\n", + " elif \"careful assistant\" in system_text:\n", + " # Synthesizer node\n", + " if \"french revolution\" in user_text:\n", + " if optimized:\n", + " content = (\n", + " \"The French Revolution (1789-1799) was a watershed event in modern history. \"\n", + " \"CAUSES: (1) Severe fiscal crisis — France was nearly bankrupt from wars and royal spending. \"\n", + " \"(2) Social inequality — the Third Estate bore most taxes while clergy and nobles were exempt. \"\n", + " \"(3) Enlightenment ideals of liberty, equality, and popular sovereignty. \"\n", + " \"KEY EVENTS: The Storming of the Bastille (July 14, 1789) symbolized popular revolt. \"\n", + " \"The Declaration of the Rights of Man and Citizen established foundational principles. \"\n", + " \"The Reign of Terror (1793-1794) under Robespierre saw mass executions. \"\n", + " \"The Directory and eventually Napoleon's coup d'état (1799) ended the revolutionary period. \"\n", + " \"LEGACY: The Revolution inspired democratic movements worldwide and fundamentally \"\n", + " \"restructured French society, abolishing feudalism and establishing civil equality.\"\n", + " )\n", + " else:\n", + " content = (\n", + " \"The French Revolution (1789-1799) was caused by fiscal crisis and social inequality. \"\n", + " \"Key events include the Storming of the Bastille and Napoleon's rise.\"\n", + " )\n", + "\n", + " elif \"tesla\" in user_text:\n", + " if optimized:\n", + " content = (\n", + " \"Tesla, Inc. (Wikidata ID: Q478214) is a multinational electric vehicle and clean energy company. \"\n", + " \"Three factual relationships with entity IDs: \"\n", + " \"(1) CEO: Elon Musk (Q317521), who co-founded and leads the company. \"\n", + " \"(2) Headquarters: Austin, Texas (Q16559), relocated from Palo Alto in 2021. \"\n", + " \"(3) Industry: Electric vehicle manufacturing (Q860861), pioneering mass-market EVs. \"\n", + " \"Additional context: Tesla was incorporated in 2003 and became the world's most \"\n", + " \"valuable automaker by market capitalization.\"\n", + " )\n", + " else:\n", + " content = (\n", + " \"Tesla, Inc. (Q478214) was founded by Elon Musk (Q317521). \"\n", + " \"It is headquartered in Austin, Texas (Q16559).\"\n", + " )\n", + "\n", + " elif \"crispr\" in user_text:\n", + " if optimized:\n", + " content = (\n", + " \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) has the \"\n", + " \"Wikidata ID Q910344. Two related entities: \"\n", + " \"(1) Cas9 protein (Q24721675) — the endonuclease used in CRISPR-Cas9 gene editing. \"\n", + " \"(2) Jennifer Doudna (Q467524) — co-developer of CRISPR-Cas9 technology, \"\n", + " \"Nobel Prize in Chemistry 2020. \"\n", + " \"CRISPR technology enables precise genome editing and has applications in medicine, \"\n", + " \"agriculture, and basic biological research.\"\n", + " )\n", + " else:\n", + " content = (\n", + " \"CRISPR (Q910344) is a gene-editing technology. \"\n", + " \"Related: Cas9 (Q24721675), Jennifer Doudna (Q467524).\"\n", + " )\n", + " else:\n", + " content = f\"Stub answer for: {user_text[:100]}\"\n", + "\n", + " return self._wrap(content)\n", + "\n", + " @staticmethod\n", + " def _wrap(content):\n", + " class _Resp:\n", + " pass\n", + " class _Choice:\n", + " pass\n", + " class _Msg:\n", + " pass\n", + " r = _Resp()\n", + " c = _Choice()\n", + " m = _Msg()\n", + " m.content = content\n", + " c.message = m\n", + " r.choices = [c]\n", + " return r\n", + "\n", + "stub_llm = StubLLM()\n", + "print(f\"StubLLM defined (optimization-signal-sensitive). call_count={stub_llm.call_count}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 4. Instrument the Graph (StubLLM)\n", + "\n", + "One function call — `instrument_graph()` — wraps the LangGraph with full\n", + "OTEL tracing, creates a `TelemetrySession`, and sets up `Binding` objects\n", + "that map `param.*` keys to the live template dict." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:58:55.651617Z", + "iopub.status.busy": "2026-02-12T07:58:55.650609Z", + "iopub.status.idle": "2026-02-12T07:59:07.295195Z", + "shell.execute_reply": "2026-02-12T07:59:07.294185Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Instrumented graph ready.\n", + " Templates: ['executor_prompt', 'planner_prompt', 'synthesizer_prompt']\n", + " Bindings: ['executor_prompt', 'planner_prompt', 'synthesizer_prompt']\n", + " output_key: final_answer\n" + ] + } + ], + "source": [ + "from opto.trace.io import instrument_graph, apply_updates\n", + "\n", + "INITIAL_TEMPLATES = {\n", + " \"planner_prompt\": \"Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.\",\n", + " \"executor_prompt\": \"Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,query}.\",\n", + " \"synthesizer_prompt\": \"Answer: {query}\\nContext:\\n{contexts}\\nIf asked for IDs, include Wikidata QIDs.\",\n", + "}\n", + "\n", + "ig = instrument_graph(\n", + " graph=None,\n", + " service_name=\"QA_research_graph\",\n", + " trainable_keys={\"planner\", \"executor\", \"synthesizer\"},\n", + " llm=stub_llm,\n", + " initial_templates=INITIAL_TEMPLATES,\n", + " emit_genai_child_spans=True,\n", + " provider_name=\"stub\",\n", + " llm_span_name=\"llm.chat.completion\",\n", + " input_key=\"query\",\n", + " output_key=\"final_answer\",\n", + ")\n", + "\n", + "# Build and attach the graph (node funcs close over tracing_llm + templates)\n", + "ig.graph = build_graph(ig.tracing_llm, ig.templates)\n", + "\n", + "print(\"Instrumented graph ready.\")\n", + "print(f\" Templates: {sorted(ig.templates.keys())}\")\n", + "print(f\" Bindings: {sorted(ig.bindings.keys())}\")\n", + "print(f\" output_key: {ig.output_key}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.302370Z", + "iopub.status.busy": "2026-02-12T07:59:07.301358Z", + "iopub.status.idle": "2026-02-12T07:59:07.321120Z", + "shell.execute_reply": "2026-02-12T07:59:07.320110Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result keys: ['agent_query', 'contexts', 'current_step', 'eval_feedback', 'eval_score', 'final_answer', 'plan', 'query']\n", + "\n", + "Final answer (107 chars):\n", + " Stub answer for: answer: what is reinforcement learning?\n", + "context:\n", + "\n", + "if asked for ids, include wikidata qids.\n", + "\n", + "Plan:\n", + " {\n", + " \"1\": {\n", + " \"agent\": \"web_researcher\",\n", + " \"action\": \"search\",\n", + " \"goal\": \"collect context\",\n", + " \"query\": \"create a json plan for: what is reinforcement learning?. use\"\n", + " },\n", + " \"2\": {\n", + " \"agent\": \"synthesizer\",\n", + " \"action\": \"answer\",\n", + " \"goal\": \"final answer\",\n", + " \"query\": \"create a json plan \n", + "\n", + "Contexts collected: 0\n", + "Eval score: 0.25\n" + ] + } + ], + "source": [ + "# --- Single invocation ---\n", + "result = ig.invoke({\"query\": \"What is reinforcement learning?\"})\n", + "\n", + "print(\"Result keys:\", sorted(result.keys()))\n", + "ans_len = len(str(result.get('final_answer', '')))\n", + "print(f\"\\nFinal answer ({ans_len} chars):\")\n", + "print(f\" {str(result.get('final_answer', '(none)'))[:300]}\")\n", + "print(f\"\\nPlan:\")\n", + "import json as _json\n", + "try:\n", + " print(f\" {_json.dumps(result.get('plan', {}), indent=2)[:300]}\")\n", + "except Exception:\n", + " print(f\" {str(result.get('plan', '(none)'))[:300]}\")\n", + "print(f\"\\nContexts collected: {len(result.get('contexts', []))}\")\n", + "print(f\"Eval score: {result.get('eval_score', 'N/A')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 5. Inspect OTLP Spans & `param.*` Attributes\n", + "\n", + "After invocation the `TelemetrySession` holds all captured OTEL spans.\n", + "`flush_otlp()` exports them as an OTLP JSON payload." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.329697Z", + "iopub.status.busy": "2026-02-12T07:59:07.328119Z", + "iopub.status.idle": "2026-02-12T07:59:07.342552Z", + "shell.execute_reply": "2026-02-12T07:59:07.341545Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total spans captured: 8\n", + "\n", + "Unique trace IDs: 1 (D9: should be 1)\n", + "Root invocation span: QA_research_graph.invoke (id=c88df3f2cf1b...)\n", + "\n", + " Span: llm.chat.completion parent=2965e9d6\n", + " gen_ai.operation.name = chat\n", + " gen_ai.output.preview = {\"1\": {\"agent\": \"web_researcher\", \"action\": \"search\", \"goal\": \"collect context\",\n", + " gen_ai.provider.name = stub\n", + " gen_ai.request.model = stub-llm\n", + " trace.temporal_ignore = true\n", + "\n", + " Span: planner parent=c88df3f2\n", + " gen_ai.model = stub-llm\n", + " inputs.gen_ai.prompt = Create a JSON plan for: What is reinforcement learning?. Use web_researcher and \n", + " inputs.user_query = What is reinforcement learning?\n", + " param.planner_prompt = Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wik\n", + " param.planner_prompt.trainable = True\n", + "\n", + " Span: llm.chat.completion parent=fe8467c0\n", + " gen_ai.operation.name = chat\n", + " gen_ai.output.preview = {\"goto\": \"synthesizer\", \"query\": \"given step 1 of plan: {'agent': 'web_researche\n", + " gen_ai.provider.name = stub\n", + " gen_ai.request.model = stub-llm\n", + " trace.temporal_ignore = true\n", + "\n", + " Span: executor parent=c88df3f2\n", + " gen_ai.model = stub-llm\n", + " inputs.gen_ai.prompt = Given step 1 of plan: {'agent': 'web_researcher', 'action': 'search', 'goal': 'c\n", + " inputs.step = 1\n", + " inputs.user_query = What is reinforcement learning?\n", + " param.executor_prompt = Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,que\n", + " param.executor_prompt.trainable = True\n", + "\n", + " Span: llm.chat.completion parent=0b00b52b\n", + " gen_ai.operation.name = chat\n", + " gen_ai.output.preview = Stub answer for: answer: what is reinforcement learning?\n", + "context:\n", + "\n", + "if asked for \n", + " gen_ai.provider.name = stub\n", + " gen_ai.request.model = stub-llm\n", + " trace.temporal_ignore = true\n", + "\n", + " Span: synthesizer parent=c88df3f2\n", + " gen_ai.model = stub-llm\n", + " inputs.gen_ai.prompt = Answer: What is reinforcement learning?\n", + "Context:\n", + "\n", + "If asked for IDs, include Wiki\n", + " inputs.user_query = What is reinforcement learning?\n", + " param.synthesizer_prompt = Answer: {query}\n", + "Context:\n", + "{contexts}\n", + "If asked for IDs, include Wikidata QIDs.\n", + " param.synthesizer_prompt.trainable = True\n", + "\n", + " Span: evaluator parent=c88df3f2\n", + " eval.reasons = \n", + " eval.score = 0.25\n", + "\n", + " Span: QA_research_graph.invoke parent=\n", + "\n" + ] + } + ], + "source": [ + "otlp = ig.session.flush_otlp(clear=True)\n", + "\n", + "spans = otlp[\"resourceSpans\"][0][\"scopeSpans\"][0][\"spans\"]\n", + "print(f\"Total spans captured: {len(spans)}\\n\")\n", + "\n", + "# D9: Verify single trace ID per invocation\n", + "trace_ids = {s[\"traceId\"] for s in spans}\n", + "print(f\"Unique trace IDs: {len(trace_ids)} (D9: should be 1)\")\n", + "assert len(trace_ids) == 1, f\"Expected 1 trace ID, got {len(trace_ids)}\"\n", + "\n", + "# D9: Verify root invocation span exists\n", + "root_spans = [s for s in spans if s[\"name\"].endswith(\".invoke\")]\n", + "assert root_spans, \"Missing root invocation span (*.invoke). D9 invariant failed.\"\n", + "root_id = root_spans[0][\"spanId\"]\n", + "print(f\"Root invocation span: {root_spans[0]['name']} (id={root_id[:12]}...)\")\n", + "print()\n", + "\n", + "for sp in spans:\n", + " attrs = {}\n", + " for a in sp.get(\"attributes\", []):\n", + " val = a.get(\"value\", {})\n", + " attrs[a[\"key\"]] = val.get(\"stringValue\", val.get(\"boolValue\", val.get(\"intValue\", \"\")))\n", + " print(f\" Span: {sp['name']:<35} parent={sp.get('parentSpanId','(root)')[:8]}\")\n", + " for k, v in sorted(attrs.items()):\n", + " if k.startswith(\"param.\"):\n", + " print(f\" {k} = {str(v)[:80]}\")\n", + " elif k.startswith(\"gen_ai.\") or k == \"trace.temporal_ignore\":\n", + " print(f\" {k} = {str(v)[:80]}\")\n", + " elif k.startswith(\"eval.\") or k.startswith(\"inputs.\") or k.startswith(\"outputs.\"):\n", + " print(f\" {k} = {str(v)[:80]}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Checkpoint:** The output above should show:\n", + "- `planner` and `synthesizer` spans with `param.` and `param..trainable = True`\n", + "- Child LLM spans (configurable name, e.g. `llm.chat.completion`) with `gen_ai.*` attributes and `trace.temporal_ignore = true`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 6. OTLP → TGJ → Trace Nodes\n", + "\n", + "Convert the OTLP payload to **Trace-Graph JSON (TGJ)**, then ingest it\n", + "into `ParameterNode` / `MessageNode` objects — the exact format the\n", + "optimizer consumes." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.350295Z", + "iopub.status.busy": "2026-02-12T07:59:07.349305Z", + "iopub.status.idle": "2026-02-12T07:59:07.369083Z", + "shell.execute_reply": "2026-02-12T07:59:07.367068Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TGJ documents: 1\n", + "\n", + "ParameterNode (trainable): 3\n", + " QA_research_graph/0/planner_prompt2 trainable=True\n", + " QA_research_graph/0/executor_prompt2 trainable=True\n", + " QA_research_graph/0/synthesizer_prompt2 trainable=True\n", + "\n", + "Unique trainable params: ['executor_prompt2', 'planner_prompt2', 'synthesizer_prompt2']\n", + "[OK] No duplicate ParameterNodes (C7).\n", + "\n", + "MessageNode: 7\n", + " QA_research_graph/0/planner7 parents=['lit_112', 'lit_60349', 'planner_prompt2']\n", + " QA_research_graph/0/llm.chat.completion21 parents=['planner7']\n", + " QA_research_graph/0/llm.chat.completion23 parents=['synthesizer7']\n", + " QA_research_graph/0/executor7 parents=['lit_21692', 'lit_9767', 'lit_603410', 'planner7', 'executor_prompt2']\n", + " QA_research_graph/0/llm.chat.completion22 parents=['executor7']\n", + " QA_research_graph/0/synthesizer7 parents=['lit_12883', 'lit_603411', 'executor7', 'synthesizer_prompt2']\n", + " QA_research_graph/0/evaluator7 parents=['synthesizer7']\n", + "[WARN] No top-level message nodes found.\n" + ] + } + ], + "source": [ + "from opto.trace.io import otlp_traces_to_trace_json, ingest_tgj\n", + "from opto.trace.nodes import ParameterNode, MessageNode\n", + "\n", + "# Re-invoke so we have fresh spans for this section\n", + "ig.invoke({\"query\": DEMO_QUERIES[0]})\n", + "otlp = ig.session.flush_otlp(clear=True)\n", + "\n", + "# --- OTLP -> TGJ ---\n", + "docs = otlp_traces_to_trace_json(\n", + " otlp,\n", + " agent_id_hint=\"QA_research_graph\",\n", + " use_temporal_hierarchy=True,\n", + ")\n", + "print(f\"TGJ documents: {len(docs)}\")\n", + "\n", + "# --- TGJ -> Trace Nodes ---\n", + "nodes = ingest_tgj(docs[0])\n", + "\n", + "# ingest_tgj stores each node under both its ID and name key,\n", + "# so deduplicate by object identity when iterating values.\n", + "param_nodes = list({id(n): n for n in nodes.values()\n", + " if isinstance(n, ParameterNode) and n.trainable}.values())\n", + "msg_nodes = list({id(n): n for n in nodes.values()\n", + " if isinstance(n, MessageNode)}.values())\n", + "\n", + "print(f\"\\nParameterNode (trainable): {len(param_nodes)}\")\n", + "for p in param_nodes:\n", + " print(f\" {p.py_name} trainable={p.trainable}\")\n", + "\n", + "# C7: Verify unique trainable param count == expected template keys\n", + "unique_param_names = set()\n", + "for p in param_nodes:\n", + " name = p.py_name.split(\"/\")[-1] if \"/\" in p.py_name else p.py_name\n", + " unique_param_names.add(name)\n", + "print(f\"\\nUnique trainable params: {sorted(unique_param_names)}\")\n", + "\n", + "assert len(unique_param_names) == len(param_nodes), \\\n", + " f\"Duplicate ParameterNodes: {len(param_nodes)} nodes but {len(unique_param_names)} unique names\"\n", + "print(\"[OK] No duplicate ParameterNodes (C7).\")\n", + "\n", + "print(f\"\\nMessageNode: {len(msg_nodes)}\")\n", + "for m in msg_nodes:\n", + " print(f\" {m.py_name} parents={[p.py_name.split('/')[-1] for p in m.parents]}\")\n", + "\n", + "# C8: Verify output node is a top-level node (not a child LLM span)\n", + "tgj_nodes = docs[0][\"nodes\"]\n", + "top_level_msg = []\n", + "for m in msg_nodes:\n", + " m_name = m.py_name.split(\"/\")[-1] if \"/\" in m.py_name else m.py_name\n", + " for nid, n in tgj_nodes.items():\n", + " if n.get(\"kind\") == \"msg\" and n.get(\"name\") == m_name:\n", + " otel_info = (n.get(\"info\") or {}).get(\"otel\", {})\n", + " is_child = str(otel_info.get(\"temporal_ignore\", \"false\")).lower() in (\"true\", \"1\", \"yes\")\n", + " if not is_child:\n", + " top_level_msg.append((m, n))\n", + " break\n", + "\n", + "if top_level_msg:\n", + " output_node, output_tgj = top_level_msg[-1]\n", + " print(f\"\\nOutput node (sink): {output_node.py_name}\")\n", + " print(f\" temporal_ignore=false -> OK (not a child span)\")\n", + " print(\"[OK] Output node is a top-level node (C8).\")\n", + "else:\n", + " print(\"[WARN] No top-level message nodes found.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.375448Z", + "iopub.status.busy": "2026-02-12T07:59:07.374447Z", + "iopub.status.idle": "2026-02-12T07:59:07.387535Z", + "shell.execute_reply": "2026-02-12T07:59:07.386526Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Child LLM spans detected (via temporal_ignore): 3\n", + "Top-level message nodes: 4\n", + " [OK] Node executor temporal parent → ab65eb61cb9e... (not a child span)\n", + " [OK] Node synthesizer temporal parent → 9f33c96e551b... (not a child span)\n", + " [OK] Node evaluator temporal parent → 8c1a42d01bc8... (not a child span)\n", + "\n", + "[OK] Temporal chaining verified — no top-level node points to child spans.\n" + ] + } + ], + "source": [ + "# --- Verify temporal chain: child spans did NOT break chaining ---\n", + "tgj_nodes = docs[0][\"nodes\"]\n", + "\n", + "# Collect child LLM span IDs using trace.temporal_ignore marker (D10)\n", + "llm_span_ids = set()\n", + "for nid, n in tgj_nodes.items():\n", + " otel_info = (n.get(\"info\") or {}).get(\"otel\", {})\n", + " if str(otel_info.get(\"temporal_ignore\", \"false\")).lower() in (\"true\", \"1\", \"yes\"):\n", + " llm_span_ids.add(otel_info.get(\"span_id\"))\n", + "\n", + "print(f\"Child LLM spans detected (via temporal_ignore): {len(llm_span_ids)}\")\n", + "assert len(llm_span_ids) > 0, \"No child LLM spans found — temporal_ignore detection failed.\"\n", + "\n", + "# Check that no top-level node has a temporal parent pointing to a child LLM span\n", + "top_level_nodes = [\n", + " (nid, n) for nid, n in tgj_nodes.items()\n", + " if n.get(\"kind\") == \"msg\"\n", + " and str((n.get(\"info\") or {}).get(\"otel\", {}).get(\"temporal_ignore\", \"false\")).lower() not in (\"true\", \"1\", \"yes\")\n", + "]\n", + "\n", + "print(f\"Top-level message nodes: {len(top_level_nodes)}\")\n", + "clean = True\n", + "for nid, n in top_level_nodes:\n", + " parent_ref = n.get(\"inputs\", {}).get(\"parent\", \"\")\n", + " if parent_ref and \":\" in parent_ref:\n", + " _, ref_id = parent_ref.rsplit(\":\", 1)\n", + " if ref_id in llm_span_ids:\n", + " print(f\" [BUG] Node {n.get('name')} temporal parent points to child LLM span {ref_id[:12]}...\")\n", + " clean = False\n", + " else:\n", + " print(f\" [OK] Node {n.get('name')} temporal parent → {ref_id[:12]}... (not a child span)\")\n", + "\n", + "assert clean, \"Temporal parent incorrectly points to a child LLM span!\"\n", + "print(\"\\n[OK] Temporal chaining verified — no top-level node points to child spans.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 7. Bindings & `apply_updates()`\n", + "\n", + "Bindings map optimizer output keys to live template values.\n", + "`apply_updates()` pushes new values through the bindings so the\n", + "**next** `invoke()` automatically uses the updated prompt." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.394844Z", + "iopub.status.busy": "2026-02-12T07:59:07.394844Z", + "iopub.status.idle": "2026-02-12T07:59:07.406751Z", + "shell.execute_reply": "2026-02-12T07:59:07.404735Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "BEFORE apply_updates\n", + "============================================================\n", + " planner_prompt: 'Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.'\n", + " executor_prompt: 'Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,query}.'\n", + " synthesizer_prompt: 'Answer: {query}\\nContext:\\n{contexts}\\nIf asked for IDs, include Wikidata QIDs.'\n", + "\n", + "============================================================\n", + "AFTER apply_updates\n", + "============================================================\n", + " planner_prompt: 'Create a detailed, step-by-step plan for: {query}. Use web_researcher, wikidata_researcher, synthesizer.'\n", + " executor_prompt: 'Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,query}.'\n", + " synthesizer_prompt: 'Answer: {query}\\nContext:\\n{contexts}\\nIf asked for IDs, include Wikidata QIDs.'\n", + "\n", + "[OK] Binding → templates propagation verified.\n" + ] + } + ], + "source": [ + "print(\"=\" * 60)\n", + "print(\"BEFORE apply_updates\")\n", + "print(\"=\" * 60)\n", + "for k, b in ig.bindings.items():\n", + " print(f\" {k}: {b.get()!r}\")\n", + "\n", + "# Simulate an optimizer suggesting a new planner prompt\n", + "apply_updates(\n", + " {\"planner_prompt\": \"Create a detailed, step-by-step plan for: {query}. Use web_researcher, wikidata_researcher, synthesizer.\"},\n", + " ig.bindings,\n", + ")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"AFTER apply_updates\")\n", + "print(\"=\" * 60)\n", + "for k, b in ig.bindings.items():\n", + " print(f\" {k}: {b.get()!r}\")\n", + "\n", + "# Verify the change is visible in ig.templates too\n", + "assert \"detailed\" in ig.templates[\"planner_prompt\"]\n", + "print(\"\\n[OK] Binding → templates propagation verified.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.413969Z", + "iopub.status.busy": "2026-02-12T07:59:07.412959Z", + "iopub.status.idle": "2026-02-12T07:59:07.428527Z", + "shell.execute_reply": "2026-02-12T07:59:07.427517Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "param.planner_prompt in span:\n", + " Create a detailed, step-by-step plan for: {query}. Use web_researcher, wikidata_researcher, synthesizer.\n", + "\n", + "[OK] Updated template appears in OTLP span after re-invoke.\n" + ] + } + ], + "source": [ + "# Invoke again and confirm the updated template appears in the OTLP span\n", + "ig.invoke({\"query\": \"test update\"})\n", + "otlp_after = ig.session.flush_otlp(clear=True)\n", + "\n", + "spans_after = otlp_after[\"resourceSpans\"][0][\"scopeSpans\"][0][\"spans\"]\n", + "planner_sp = next(s for s in spans_after if s[\"name\"] == \"planner\")\n", + "planner_attrs = {\n", + " a[\"key\"]: a[\"value\"][\"stringValue\"] for a in planner_sp[\"attributes\"]\n", + "}\n", + "\n", + "print(f\"param.planner_prompt in span:\")\n", + "print(f\" {planner_attrs['param.planner_prompt']}\")\n", + "\n", + "assert \"detailed\" in planner_attrs[\"param.planner_prompt\"]\n", + "print(\"\\n[OK] Updated template appears in OTLP span after re-invoke.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.436041Z", + "iopub.status.busy": "2026-02-12T07:59:07.435043Z", + "iopub.status.idle": "2026-02-12T07:59:07.444869Z", + "shell.execute_reply": "2026-02-12T07:59:07.443860Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Templates reset to original values:\n", + " executor_prompt: 'Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,query}.'\n", + " planner_prompt: 'Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.'\n", + " synthesizer_prompt: 'Answer: {query}\\nContext:\\n{contexts}\\nIf asked for IDs, include Wikidata QIDs.'\n" + ] + } + ], + "source": [ + "# Reset templates back to original for the optimization demo\n", + "apply_updates(INITIAL_TEMPLATES, ig.bindings)\n", + "print(\"Templates reset to original values:\")\n", + "for k in sorted(INITIAL_TEMPLATES):\n", + " print(f\" {k}: {ig.templates[k]!r}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 8. `optimize_graph()` — StubLLM End-to-End\n", + "\n", + "Run the full optimization loop with **StubLLM** (deterministic, no API\n", + "calls). This verifies the complete pipeline:\n", + "\n", + "```\n", + "instrument → invoke → flush OTLP → TGJ → ingest → optimizer → apply_updates\n", + "```\n", + "\n", + "We use a simple length-based `eval_fn` and a mock optimizer to\n", + "demonstrate prompt value changes across iterations." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.451868Z", + "iopub.status.busy": "2026-02-12T07:59:07.450869Z", + "iopub.status.idle": "2026-02-12T07:59:07.466046Z", + "shell.execute_reply": "2026-02-12T07:59:07.465038Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mock optimizer and eval_fn ready.\n" + ] + } + ], + "source": [ + "from opto.trace.io import optimize_graph, EvalResult\n", + "\n", + "class MockOptimizer:\n", + " \"\"\"Returns deterministic updates that trigger StubLLM's enriched mode.\"\"\"\n", + " def __init__(self, param_nodes=None, **kw):\n", + " self.calls = []\n", + " def zero_feedback(self):\n", + " self.calls.append(\"zero_feedback\")\n", + " def backward(self, output_node, feedback_text):\n", + " self.calls.append(\"backward\")\n", + " def step(self):\n", + " self.calls.append(\"step\")\n", + " return {\n", + " \"planner_prompt\": \"Create a thorough, step-by-step JSON plan for: {query}. Use web_researcher, wikidata_researcher, synthesizer.\",\n", + " }\n", + "\n", + "def stub_eval_fn(payload):\n", + " \"\"\"Score based on plan richness + answer length + structure.\n", + "\n", + " Scoring range:\n", + " - Empty/error: 0.0\n", + " - Minimal (2-step plan, short answer): ~0.35-0.50\n", + " - Rich (3+ step plan, long answer): ~0.70-0.95\n", + " \"\"\"\n", + " result = payload.get(\"result\") or {}\n", + " ans = str(payload.get(\"answer\", \"\") or \"\")\n", + " if ans.strip().startswith(\"[ERROR]\") or not ans.strip():\n", + " return EvalResult(score=0.0, feedback=\"LLM failure/empty answer\")\n", + "\n", + " plan = result.get(\"plan\", {}) if isinstance(result, dict) else {}\n", + " plan_steps = len(list(plan.keys())) if isinstance(plan, dict) else 0\n", + "\n", + " # Plan richness: 0.1 per step, max 4 steps counted\n", + " plan_score = 0.1 * min(plan_steps, 4)\n", + "\n", + " # Answer length: gradual reward up to 400 chars\n", + " length_score = min(len(ans) / 400.0, 1.0) * 0.3\n", + "\n", + " # Structure bonus: Wikidata IDs, numbered lists, detailed content\n", + " import re\n", + " structure_bonus = 0.0\n", + " if re.search(r\"\\bQ\\d{2,}\\b\", ans):\n", + " structure_bonus += 0.1\n", + " if any(marker in ans for marker in [\"(1)\", \"(2)\", \"(3)\", \"1)\", \"2)\"]):\n", + " structure_bonus += 0.05\n", + "\n", + " score = 0.1 + plan_score + length_score + structure_bonus\n", + " score = min(score, 0.95)\n", + " return EvalResult(score=round(score, 4), feedback=f\"plan_steps={plan_steps}, len={len(ans)}, score={score:.4f}\")\n", + "\n", + "print(\"Mock optimizer and eval_fn ready.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.472683Z", + "iopub.status.busy": "2026-02-12T07:59:07.471675Z", + "iopub.status.idle": "2026-02-12T07:59:07.552476Z", + "shell.execute_reply": "2026-02-12T07:59:07.550368Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "optimize_graph: running baseline ...\n", + "opto.trace.io.optimization: optimize_graph: running baseline ...\n", + "optimize_graph: running iteration 1 ...\n", + "opto.trace.io.optimization: optimize_graph: running iteration 1 ...\n", + "Applied updates: ['planner_prompt']\n", + "opto.trace.io.optimization: Applied updates: ['planner_prompt']\n", + "optimize_graph: running iteration 2 ...\n", + "opto.trace.io.optimization: optimize_graph: running iteration 2 ...\n", + "Applied updates: ['planner_prompt']\n", + "opto.trace.io.optimization: Applied updates: ['planner_prompt']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "TEMPLATE BEFORE OPTIMIZATION\n", + "============================================================\n", + " planner_prompt: 'Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.'\n", + "\n", + " Running baseline...\n", + " Query 1/3: Summarize the causes and key events of t... score=0.4148\n", + " Query 2/3: Give 3 factual relationships about Tesla... score=0.528\n", + " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.475\n", + " Baseline average: 0.4726\n", + " Iteration 1/2...\n", + " Query 1/3: Summarize the causes and key events of t... score=0.4148\n", + " Query 2/3: Give 3 factual relationships about Tesla... score=0.528\n", + " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.475\n", + " Iteration 1 average: 0.4726\n", + " Iteration 2/2...\n", + " Query 1/3: Summarize the causes and key events of t... score=0.5148\n", + " Query 2/3: Give 3 factual relationships about Tesla... score=0.728\n", + " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.675\n", + " Iteration 2 average: 0.6393 * NEW BEST\n", + "\n", + "============================================================\n", + "TEMPLATE AFTER OPTIMIZATION\n", + "============================================================\n", + " planner_prompt: 'Create a thorough, step-by-step JSON plan for: {query}. Use web_researcher, wikidata_researcher, synthesizer.'\n", + "\n", + "============================================================\n", + "OPTIMIZATION RESULTS\n", + "============================================================\n", + " Baseline score: 0.4726\n", + " Best score: 0.6393\n", + " Best iteration: 2\n", + " Score history: [0.4726, 0.4726, 0.6393]\n", + " Optimizer calls: ['zero_feedback', 'backward', 'step', 'zero_feedback', 'backward', 'step']\n", + " Final params: ['planner_prompt', 'executor_prompt', 'synthesizer_prompt']\n", + " Best params: ['planner_prompt', 'executor_prompt', 'synthesizer_prompt']\n", + " Best updates: ['planner_prompt']\n" + ] + } + ], + "source": [ + "# -- Use the same 3 queries as the reference demo --\n", + "QUERIES = DEMO_QUERIES\n", + "\n", + "mock_opt = MockOptimizer()\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"TEMPLATE BEFORE OPTIMIZATION\")\n", + "print(\"=\" * 60)\n", + "print(f\" planner_prompt: {ig.templates['planner_prompt']!r}\")\n", + "print()\n", + "\n", + "opt_result = optimize_graph(\n", + " ig,\n", + " queries=QUERIES,\n", + " iterations=2,\n", + " optimizer=mock_opt,\n", + " eval_fn=stub_eval_fn,\n", + " apply_updates_flag=True,\n", + ")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"TEMPLATE AFTER OPTIMIZATION\")\n", + "print(\"=\" * 60)\n", + "print(f\" planner_prompt: {ig.templates['planner_prompt']!r}\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"OPTIMIZATION RESULTS\")\n", + "print(\"=\" * 60)\n", + "print(f\" Baseline score: {opt_result.baseline_score:.4f}\")\n", + "print(f\" Best score: {opt_result.best_score:.4f}\")\n", + "print(f\" Best iteration: {opt_result.best_iteration}\")\n", + "print(f\" Score history: {[round(s, 4) for s in opt_result.score_history]}\")\n", + "print(f\" Optimizer calls: {mock_opt.calls}\")\n", + "print(f\" Final params: {list(opt_result.final_parameters.keys())}\")\n", + "print(f\" Best params: {list(opt_result.best_parameters.keys())}\")\n", + "print(f\" Best updates: {list(opt_result.best_updates.keys())}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.559993Z", + "iopub.status.busy": "2026-02-12T07:59:07.558992Z", + "iopub.status.idle": "2026-02-12T07:59:07.571810Z", + "shell.execute_reply": "2026-02-12T07:59:07.570297Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[OK] StubLLM end-to-end optimization verified!\n", + " - Template changed across iterations\n", + " - Baseline score: 0.4726\n", + " - Best score: 0.6393 (improvement: +0.1667)\n", + " - Score did not saturate at 1.0\n", + " - Optimizer calls: ['zero_feedback', 'backward', 'step', 'zero_feedback', 'backward', 'step']\n", + "\n", + "Iter Avg Score Best Score Best Iter \n", + "------------------------------------------\n", + "0 0.4726 0.4726 0 \n", + "1 0.4726 0.4726 0 \n", + "2 0.6393 0.6393 2 \n" + ] + } + ], + "source": [ + "# ---- Verify M1 acceptance: template changed and score improved ----\n", + "assert ig.templates[\"planner_prompt\"] != INITIAL_TEMPLATES[\"planner_prompt\"], \\\n", + " \"Prompt should have changed after optimization!\"\n", + "assert \"step-by-step\" in ig.templates[\"planner_prompt\"].lower(), \\\n", + " f\"Expected 'step-by-step' in optimized planner_prompt, got: {ig.templates['planner_prompt']!r}\"\n", + "\n", + "for i, runs in enumerate(opt_result.all_runs):\n", + " for r in runs:\n", + " assert \"resourceSpans\" in r.otlp, f\"Run in iter {i} missing OTLP data\"\n", + "\n", + "assert opt_result.best_score > opt_result.baseline_score, \\\n", + " f\"Optimization should improve: best={opt_result.best_score:.4f} baseline={opt_result.baseline_score:.4f}\"\n", + "assert opt_result.best_score < 1.0, \\\n", + " f\"Score should not saturate at 1.0: {opt_result.best_score:.4f}\"\n", + "\n", + "print(\"[OK] StubLLM end-to-end optimization verified!\")\n", + "print(f\" - Template changed across iterations\")\n", + "print(f\" - Baseline score: {opt_result.baseline_score:.4f}\")\n", + "print(f\" - Best score: {opt_result.best_score:.4f} (improvement: +{opt_result.best_score - opt_result.baseline_score:.4f})\")\n", + "print(f\" - Score did not saturate at 1.0\")\n", + "print(f\" - Optimizer calls: {mock_opt.calls}\")\n", + "\n", + "# Optimization history table\n", + "print(f\"\\n{'Iter':<6} {'Avg Score':<12} {'Best Score':<12} {'Best Iter':<12}\")\n", + "print(\"-\" * 42)\n", + "best_so_far = float(\"-inf\")\n", + "best_iter_so_far = 0\n", + "for i, sc in enumerate(opt_result.score_history):\n", + " if sc > best_so_far:\n", + " best_so_far = sc\n", + " best_iter_so_far = i\n", + " print(f\"{i:<6} {sc:<12.4f} {best_so_far:<12.4f} {best_iter_so_far:<12}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 9. Live LLM Mode (OpenRouter)\n", + "\n", + "This section runs the same pipeline against a **real LLM provider**\n", + "(OpenRouter). It is **automatically skipped** if no API key is available.\n", + "\n", + "Constraints per M1 acceptance:\n", + "- Tiny dataset (≤3 items)\n", + "- Deterministic settings (`temperature=0`)\n", + "- Budget guard (`max_tokens=256` per call)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.581005Z", + "iopub.status.busy": "2026-02-12T07:59:07.579994Z", + "iopub.status.idle": "2026-02-12T07:59:07.603100Z", + "shell.execute_reply": "2026-02-12T07:59:07.602018Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RETRY] Smoke test rate-limited, waiting 10s...\n", + "[RETRY] Smoke test rate-limited, waiting 20s...\n", + "[WARN] Smoke test failed: Error code: 429 - {'error': {'message': 'Provider returned error', 'code': 429, 'metadata': {'raw': 'google/gemma-4-26b-a4b-it:free is temporarily rate-limited upstream. Please retry shortly, or add your own key to accumulate your rate limits: https://openrouter.ai/settings/integrations', 'provider_name': 'Google AI Studio', 'is_byok': False}}, 'user_id': 'user_2dVDFlopMDQSXGcvQ5rlYEaITIy'}\n", + " model=google/gemma-4-26b-a4b-it:free\n", + " base_url=https://openrouter.ai/api/v1\n", + "\n", + "Live LLM ready (openai client -> https://openrouter.ai/api/v1)\n", + " model: google/gemma-4-26b-a4b-it:free\n" + ] + } + ], + "source": [ + "# --- Live LLM setup using the openai package ---\n", + "# The openai package is already installed (litellm dependency).\n", + "# Point it directly at OpenRouter — same as ChatOpenAI under the hood.\n", + "from openai import OpenAI\n", + "\n", + "_client = OpenAI(\n", + " base_url=OPENROUTER_BASE_URL,\n", + " api_key=OPENROUTER_API_KEY,\n", + ")\n", + "\n", + "import time as _time\n", + "\n", + "def live_llm(messages=None, **kwargs):\n", + " \"\"\"Call OpenRouter with automatic retry on 429 rate-limit errors.\"\"\"\n", + " live_llm.call_count += 1\n", + " max_retries = 5\n", + " for attempt in range(max_retries):\n", + " try:\n", + " return _client.chat.completions.create(\n", + " model=OPENROUTER_MODEL,\n", + " messages=messages,\n", + " max_tokens=kwargs.get(\"max_tokens\", MAX_TOKENS_PER_CALL),\n", + " temperature=kwargs.get(\"temperature\", LIVE_TEMPERATURE),\n", + " )\n", + " except Exception as e:\n", + " if '429' in str(e) and attempt < max_retries - 1:\n", + " wait = 2 ** attempt * 10 # Exponential backoff: 20s, 40s, ...\n", + " print(f\" [RETRY] Rate-limited (429), waiting {wait}s... (attempt {attempt+1}/{max_retries})\")\n", + " _time.sleep(wait)\n", + " else:\n", + " raise\n", + "\n", + "live_llm.model = OPENROUTER_MODEL\n", + "live_llm.call_count = 0\n", + "\n", + "# Set env vars so the auto-created optimizer also routes through OpenRouter\n", + "os.environ[\"OPENAI_BASE_URL\"] = OPENROUTER_BASE_URL\n", + "os.environ[\"OPENAI_API_KEY\"] = OPENROUTER_API_KEY\n", + "os.environ[\"TRACE_LITELLM_MODEL\"] = f\"openai/{OPENROUTER_MODEL}\"\n", + "\n", + "# Quick smoke test with retry\n", + "for _attempt in range(3):\n", + " try:\n", + " _test = _client.chat.completions.create(\n", + " model=OPENROUTER_MODEL,\n", + " messages=[{\"role\": \"user\", \"content\": \"Say hello in one word.\"}],\n", + " max_tokens=10,\n", + " temperature=0,\n", + " )\n", + " print(f\"[OK] Live LLM smoke test passed: {_test.choices[0].message.content!r}\")\n", + " break\n", + " except Exception as e:\n", + " if '429' in str(e) and _attempt < 2:\n", + " print(f\"[RETRY] Smoke test rate-limited, waiting {30 * (_attempt+1)}s...\")\n", + " _time.sleep(10 * (_attempt+1))\n", + " else:\n", + " print(f\"[WARN] Smoke test failed: {e}\")\n", + " print(f\" model={OPENROUTER_MODEL}\")\n", + " print(f\" base_url={OPENROUTER_BASE_URL}\")\n", + "\n", + "print(f\"\\nLive LLM ready (openai client -> {OPENROUTER_BASE_URL})\")\n", + "print(f\" model: {OPENROUTER_MODEL}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:07.609620Z", + "iopub.status.busy": "2026-02-12T07:59:07.608112Z", + "iopub.status.idle": "2026-02-12T07:59:09.143370Z", + "shell.execute_reply": "2026-02-12T07:59:09.141411Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "LIVE LLM MODE (OpenRouter via openai client)\n", + "============================================================\n", + " [RETRY] Rate-limited (429), waiting 10s... (attempt 1/5)\n", + " [RETRY] Rate-limited (429), waiting 20s... (attempt 2/5)\n", + " [RETRY] Rate-limited (429), waiting 40s... (attempt 3/5)\n", + " [RETRY] Rate-limited (429), waiting 80s... (attempt 4/5)\n" + ] + } + ], + "source": [ + "from opto.trace.io import LLMCallError\n", + "\n", + "if not HAS_API_KEY:\n", + " print(\"[SKIP] No OPENROUTER_API_KEY — live mode skipped.\")\n", + " print(\" To enable: add the key in Colab Secrets or a .env file.\")\n", + " live_ok = False\n", + "else:\n", + " print(\"=\" * 60)\n", + " print(\"LIVE LLM MODE (OpenRouter via openai client)\")\n", + " print(\"=\" * 60)\n", + "\n", + " live_templates = dict(INITIAL_TEMPLATES)\n", + "\n", + " live_ig = instrument_graph(\n", + " graph=None,\n", + " service_name=\"m1-live\",\n", + " trainable_keys={\"planner\", \"executor\", \"synthesizer\"},\n", + " llm=live_llm,\n", + " initial_templates=live_templates,\n", + " emit_genai_child_spans=True,\n", + " provider_name=\"openrouter\",\n", + " llm_span_name=\"openrouter.chat.completion\",\n", + " input_key=\"query\",\n", + " output_key=\"final_answer\",\n", + " )\n", + " live_graph = build_graph(live_ig.tracing_llm, live_ig.templates)\n", + " live_ig.graph = live_graph\n", + "\n", + " # --- Test invocation ---\n", + " live_ok = False\n", + " try:\n", + " live_result = live_ig.invoke({\"query\": \"What is gradient descent?\"})\n", + " ans = str(live_result.get(\"final_answer\", \"\") or \"\")\n", + " if ans.startswith(\"[ERROR]\") or not ans.strip():\n", + " print(f\"[FAIL] Live LLM returned error or empty: {ans[:200]}\")\n", + " else:\n", + " print(f\"\\nLive answer ({len(ans)} chars):\")\n", + " print(f\" {ans[:300]}\")\n", + "\n", + " live_otlp = live_ig.session.flush_otlp(clear=False)\n", + " live_spans = live_otlp[\"resourceSpans\"][0][\"scopeSpans\"][0][\"spans\"]\n", + " trace_ids = {s[\"traceId\"] for s in live_spans}\n", + " has_root = any(str(sp.get(\"name\",\"\")).endswith(\".invoke\") for sp in live_spans)\n", + "\n", + " print(f\"\\nSpans: {len(live_spans)} trace_ids={len(trace_ids)} root_invoke={has_root}\")\n", + " for sp in live_spans:\n", + " for a in sp.get(\"attributes\", []):\n", + " if a[\"key\"] == \"gen_ai.provider.name\":\n", + " prov = a[\"value\"].get(\"stringValue\", \"\")\n", + " print(f\" gen_ai.provider.name = {prov}\")\n", + " if prov != \"openrouter\":\n", + " print(f\" [WARN] Expected 'openrouter', got '{prov}'\")\n", + "\n", + " live_ok = True\n", + " print(\"\\n[OK] Live LLM trace validated!\")\n", + "\n", + " except LLMCallError as e:\n", + " print(f\"\\n[FAIL] LLMCallError: {e}\")\n", + " print(\" Skipping live optimization.\")\n", + " except Exception as e:\n", + " print(f\"\\n[FAIL] Unexpected error: {e}\")\n", + " print(\" Skipping live optimization.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:09.152911Z", + "iopub.status.busy": "2026-02-12T07:59:09.151899Z", + "iopub.status.idle": "2026-02-12T07:59:09.728081Z", + "shell.execute_reply": "2026-02-12T07:59:09.727073Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[SKIP] Live invocation failed — live optimization skipped.\n" + ] + } + ], + "source": [ + "import logging\n", + "logging.getLogger('opto.trace.io.optimization').setLevel(logging.INFO)\n", + "logging.getLogger('opto.trace.io.bindings').setLevel(logging.INFO)\n", + "logging.basicConfig(level=logging.WARNING, format='%(name)s: %(message)s')\n", + "logging.getLogger('opto.trace.io.optimization').handlers = [logging.StreamHandler()]\n", + "logging.getLogger('opto.trace.io.bindings').handlers = [logging.StreamHandler()]\n", + "\n", + "if HAS_API_KEY and live_ok:\n", + " # optimize_graph() auto-creates OptoPrimeV2 when optimizer=None.\n", + " # eval_fn=None uses the built-in _default_eval_fn which reads eval.score\n", + " # from the evaluator span in the OTLP trace.\n", + " LIVE_QUERIES = DEMO_QUERIES\n", + "\n", + " print(\"=\" * 60)\n", + " print(f\"LIVE OPTIMIZATION ({len(LIVE_QUERIES)} queries, 1 iteration)\")\n", + " print(\"=\" * 60)\n", + "\n", + " # Reset templates\n", + " apply_updates(INITIAL_TEMPLATES, live_ig.bindings)\n", + " print(f\" planner_prompt BEFORE: {live_ig.templates['planner_prompt']!r}\")\n", + "\n", + " live_opt_result = optimize_graph(\n", + " live_ig,\n", + " queries=LIVE_QUERIES,\n", + " iterations=1,\n", + " optimizer=None,\n", + " eval_fn=None,\n", + " apply_updates_flag=True,\n", + " )\n", + "\n", + " print(f\"\\n planner_prompt AFTER: {live_ig.templates['planner_prompt']!r}\")\n", + " print(f\" Baseline score: {live_opt_result.baseline_score:.4f}\")\n", + " print(f\" Best score: {live_opt_result.best_score:.4f}\")\n", + " print(f\" Score history: {[round(s, 4) for s in live_opt_result.score_history]}\")\n", + " print(f\" Total LLM calls: {live_llm.call_count}\")\n", + "\n", + " # Optimization history table\n", + " print(f\"\\n{'Iter':<6} {'Avg Score':<12} {'Best Score':<12}\")\n", + " print(\"-\" * 30)\n", + " best_so_far = float(\"-inf\")\n", + " for i, sc in enumerate(live_opt_result.score_history):\n", + " if sc > best_so_far:\n", + " best_so_far = sc\n", + " print(f\"{i:<6} {sc:<12.4f} {best_so_far:<12.4f}\")\n", + "\n", + " # --- Live OTLP inspection ---\n", + " live_otlp_final = live_ig.session.flush_otlp(clear=True)\n", + " try:\n", + " live_spans = live_otlp_final[\"resourceSpans\"][0][\"scopeSpans\"][0][\"spans\"]\n", + " trace_ids = {s[\"traceId\"] for s in live_spans}\n", + " has_root = any(str(sp.get(\"name\",\"\")).endswith(\".invoke\") for sp in live_spans)\n", + " print(f\"\\n Live OTLP: {len(live_spans)} spans, {len(trace_ids)} trace IDs, root_invoke={has_root}\")\n", + " except (KeyError, IndexError) as e:\n", + " print(f\"\\n [WARN] Could not inspect live OTLP: {e}\")\n", + "else:\n", + " if not HAS_API_KEY:\n", + " print(\"[SKIP] No API key — live optimization skipped.\")\n", + " else:\n", + " print(\"[SKIP] Live invocation failed — live optimization skipped.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 10. Save Artifacts\n", + "\n", + "Save OTLP traces, TGJ documents, and optimization summary to the run\n", + "folder (Google Drive on Colab, local fallback)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2026-02-12T07:59:09.732598Z", + "iopub.status.busy": "2026-02-12T07:59:09.732598Z", + "iopub.status.idle": "2026-02-12T07:59:09.818823Z", + "shell.execute_reply": "2026-02-12T07:59:09.817814Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "SAVING ARTIFACTS\n", + "============================================================\n", + " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/stub_sample_otlp.json\n", + " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/stub_all_traces.json (9 traces)\n", + " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/stub_sample_tgj.json\n", + " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/stub_summary.json\n", + "\n", + "All artifacts saved to: /home/user/code/Trace/examples/notebooks/notebook_outputs/m1\n" + ] + } + ], + "source": [ + "print(\"=\" * 60)\n", + "print(\"SAVING ARTIFACTS\")\n", + "print(\"=\" * 60)\n", + "\n", + "# --- Save StubLLM optimization traces ---\n", + "if opt_result.all_runs and opt_result.all_runs[0]:\n", + " # Sample trace\n", + " sample_otlp = opt_result.all_runs[0][0].otlp\n", + " p = os.path.join(RUN_FOLDER, \"stub_sample_otlp.json\")\n", + " with open(p, \"w\") as f:\n", + " json.dump(sample_otlp, f, indent=2)\n", + " print(f\" {p}\")\n", + "\n", + " # All optimization traces\n", + " all_traces = []\n", + " for iter_idx, runs in enumerate(opt_result.all_runs):\n", + " label = \"baseline\" if iter_idx == 0 else f\"iteration_{iter_idx}\"\n", + " for ri, run in enumerate(runs):\n", + " all_traces.append({\n", + " \"iteration\": label,\n", + " \"query_index\": ri,\n", + " \"score\": run.score,\n", + " \"otlp\": run.otlp,\n", + " })\n", + " p = os.path.join(RUN_FOLDER, \"stub_all_traces.json\")\n", + " with open(p, \"w\") as f:\n", + " json.dump(all_traces, f, indent=2)\n", + " print(f\" {p} ({len(all_traces)} traces)\")\n", + "\n", + " # TGJ from first run\n", + " tgj_docs = otlp_traces_to_trace_json(\n", + " sample_otlp, agent_id_hint=\"QA_research_graph\", use_temporal_hierarchy=True,\n", + " )\n", + " p = os.path.join(RUN_FOLDER, \"stub_sample_tgj.json\")\n", + " with open(p, \"w\") as f:\n", + " json.dump(tgj_docs, f, indent=2)\n", + " print(f\" {p}\")\n", + "\n", + "# --- Summary ---\n", + "summary = {\n", + " \"mode\": \"stub\",\n", + " \"baseline_score\": opt_result.baseline_score,\n", + " \"best_score\": opt_result.best_score,\n", + " \"best_iteration\": opt_result.best_iteration,\n", + " \"score_history\": opt_result.score_history,\n", + " \"final_parameters\": opt_result.final_parameters,\n", + "}\n", + "p = os.path.join(RUN_FOLDER, \"stub_summary.json\")\n", + "with open(p, \"w\") as f:\n", + " json.dump(summary, f, indent=2)\n", + "print(f\" {p}\")\n", + "\n", + "# --- Save live traces if available ---\n", + "if HAS_API_KEY and 'live_opt_result' in dir():\n", + " live_traces = []\n", + " for iter_idx, runs in enumerate(live_opt_result.all_runs):\n", + " label = \"baseline\" if iter_idx == 0 else f\"iteration_{iter_idx}\"\n", + " for ri, run in enumerate(runs):\n", + " live_traces.append({\n", + " \"iteration\": label,\n", + " \"query_index\": ri,\n", + " \"score\": run.score,\n", + " \"otlp\": run.otlp,\n", + " })\n", + " p = os.path.join(RUN_FOLDER, \"live_all_traces.json\")\n", + " with open(p, \"w\") as f:\n", + " json.dump(live_traces, f, indent=2)\n", + " print(f\" {p} ({len(live_traces)} traces)\")\n", + "\n", + " live_summary = {\n", + " \"mode\": \"live\",\n", + " \"model\": OPENROUTER_MODEL,\n", + " \"baseline_score\": live_opt_result.baseline_score,\n", + " \"best_score\": live_opt_result.best_score,\n", + " \"score_history\": live_opt_result.score_history,\n", + " \"final_parameters\": live_opt_result.final_parameters,\n", + " \"total_llm_calls\": live_llm.call_count,\n", + " }\n", + " p = os.path.join(RUN_FOLDER, \"live_summary.json\")\n", + " with open(p, \"w\") as f:\n", + " json.dump(live_summary, f, indent=2)\n", + " print(f\" {p}\")\n", + "\n", + "print(f\"\\nAll artifacts saved to: {RUN_FOLDER}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## Summary\n", + "\n", + "This notebook demonstrated the full M1 pipeline:\n", + "\n", + "1. **`instrument_graph()`** — one-liner to add OTEL tracing to a LangGraph\n", + "2. **`param.*` attributes** — spans carry trainable prompt values\n", + "3. **OTLP → TGJ → `ParameterNode` + `MessageNode`** — optimizer-compatible trace graph\n", + "4. **Temporal integrity** — child `gen_ai.*` spans don't break chaining\n", + "5. **`apply_updates()`** — bindings propagate optimizer output to live templates\n", + "6. **`optimize_graph()`** — end-to-end loop (StubLLM deterministic + live provider)\n", + "7. **Artifacts persisted** — OTLP JSON, TGJ JSON, and summaries saved to disk\n", + "\n", + "All verifications passed with StubLLM (CI-safe, deterministic). When\n", + "`OPENROUTER_API_KEY` is set, the live section additionally proves\n", + "real-provider tracing with `param.*` and `gen_ai.*` attributes." + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/opto/trace/io/__init__.py b/opto/trace/io/__init__.py new file mode 100644 index 00000000..9f9aaf09 --- /dev/null +++ b/opto/trace/io/__init__.py @@ -0,0 +1,83 @@ +""" +opto.trace.io – OTEL instrumentation & optimization for LangGraph +================================================================= + +Public API +---------- +* ``instrument_graph()`` – one-liner to add OTEL tracing to any LangGraph +* ``optimize_graph()`` – one-liner optimisation loop +* ``TelemetrySession`` – unified session manager (OTEL + optional MLflow) +* ``Binding`` / ``apply_updates()`` – param-key → getter/setter mapping +* ``EvalResult`` / ``EvalFn`` – flexible evaluation contract +* ``emit_reward()`` / ``emit_trace()`` – manual span helpers + +Lower-level +~~~~~~~~~~~~ +* ``TracingLLM`` – LLM wrapper with dual semconv +* ``InstrumentedGraph`` – wrapper returned by ``instrument_graph()`` +* ``RunResult`` / ``OptimizationResult`` – result data classes +* ``otlp_traces_to_trace_json()`` – OTLP → TGJ adapter +* ``ingest_tgj()`` / ``merge_tgj()`` – TGJ → Trace nodes +""" + +# -- high-level API -------------------------------------------------------- +from opto.trace.io.instrumentation import instrument_graph, InstrumentedGraph +from opto.trace.io.optimization import ( + optimize_graph, + EvalResult, + EvalFn, + RunResult, + OptimizationResult, +) +from opto.trace.io.telemetry_session import TelemetrySession +from opto.trace.io.bindings import Binding, apply_updates, make_dict_binding +from opto.trace.io.otel_semconv import ( + emit_reward, + emit_agentlightning_reward, + emit_trace, + set_span_attributes, + record_genai_chat, +) + +# -- lower-level ----------------------------------------------------------- +from opto.trace.io.langgraph_otel_runtime import ( + TracingLLM, + LLMCallError, + InMemorySpanExporter, + init_otel_runtime, + flush_otlp, + extract_eval_metrics_from_otlp, +) +from opto.trace.io.otel_adapter import otlp_traces_to_trace_json +from opto.trace.io.tgj_ingest import ingest_tgj, merge_tgj + +__all__ = [ + # High-level + "instrument_graph", + "optimize_graph", + "TelemetrySession", + "Binding", + "apply_updates", + "make_dict_binding", + "EvalResult", + "EvalFn", + "emit_reward", + "emit_agentlightning_reward", + "emit_trace", + "set_span_attributes", + "record_genai_chat", + # Data classes + "InstrumentedGraph", + "RunResult", + "OptimizationResult", + # Lower-level + "TracingLLM", + "LLMCallError", + "InMemorySpanExporter", + "init_otel_runtime", + "flush_otlp", + "extract_eval_metrics_from_otlp", + "otlp_traces_to_trace_json", + "ingest_tgj", + "merge_tgj", +] diff --git a/opto/trace/io/bindings.py b/opto/trace/io/bindings.py new file mode 100644 index 00000000..4dce6373 --- /dev/null +++ b/opto/trace/io/bindings.py @@ -0,0 +1,136 @@ +""" +opto.trace.io.bindings +====================== + +Minimal get/set binding layer that maps OTEL/TGJ parameter keys +(e.g. ``param.planner_prompt``, ``param.__code_planner``) to concrete +getter/setter callables. This decouples the optimizer's string-keyed +updates from the runtime location of the actual variable, function, or +graph knob. + +Usage +----- +>>> b = Binding(get=lambda: my_template, set=lambda v: setattr(cfg, "template", v)) +>>> apply_updates({"planner_prompt": "new prompt"}, {"planner_prompt": b}) +""" + +from __future__ import annotations + +import logging +import re +from dataclasses import dataclass +from typing import Any, Callable, Dict, Literal, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class Binding: + """Minimal get/set binding for a single trainable target. + + Attributes + ---------- + get : Callable[[], Any] + Returns the current value (used for logging / optimizer init). + set : Callable[[Any], None] + Applies an updated value in-memory (prompts / code / graph knobs). + kind : ``"prompt"`` | ``"code"`` | ``"graph"`` + Describes the binding type for validation and reporting. + """ + + get: Callable[[], Any] + set: Callable[[Any], None] + kind: Literal["prompt", "code", "graph"] = "prompt" + + +def apply_updates( + updates: Dict[Any, Any], + bindings: Dict[str, Binding], + *, + strict: bool = True, +) -> Dict[str, Any]: + """Apply optimizer updates to bound targets. + + Parameters + ---------- + updates : Dict[Any, Any] + Keys are parameter names (strings) **or** ParameterNode objects. + Values are the new values suggested by the optimizer. + bindings : Dict[str, Binding] + Mapping from parameter names to ``Binding`` objects. + strict : bool + If *True* (default), raise ``KeyError`` when an update key has + no corresponding binding. If *False*, unknown keys are silently + skipped. + + Returns + ------- + Dict[str, Any] + The updates that were actually applied (string-keyed). + + Raises + ------ + KeyError + When *strict* is True and an update key is missing from *bindings*. + """ + + def _normalize_key(k: Any) -> str: + if isinstance(k, str): + s = k + else: + s = ( + getattr(k, "name", None) + or getattr(k, "_name", None) + or getattr(k, "py_name", None) + or str(k) + ) + s = str(s).strip() + if s.startswith("param."): + s = s[len("param."):] + s = s.split(":")[0].split("/")[-1] + if s not in bindings: + s2 = re.sub(r"\d+$", "", s) + if s2 in bindings: + s = s2 + return s + + applied: Dict[str, Any] = {} + for raw_key, value in updates.items(): + key = _normalize_key(raw_key) + binding = bindings.get(key) + if binding is None: + if strict: + raise KeyError( + f"apply_updates: no binding for key {key!r} (from {raw_key!r}). " + f"Available bindings: {sorted(bindings.keys())}" + ) + logger.debug("apply_updates: skipping unknown key %r (from %r)", key, raw_key) + continue + try: + binding.set(value) + applied[key] = value + logger.debug("apply_updates: set %r (kind=%s)", key, binding.kind) + except Exception: + logger.exception("apply_updates: failed to set %r", key) + if strict: + raise + return applied + + +def make_dict_binding(store: Dict[str, Any], key: str, kind: str = "prompt") -> Binding: + """Convenience helper: create a ``Binding`` backed by a plain dict entry. + + Parameters + ---------- + store : dict + The dictionary that holds the value. + key : str + The key within *store*. + kind : str + Binding kind (``"prompt"``, ``"code"``, ``"graph"``). + """ + return Binding( + get=lambda: store.get(key), + set=lambda v: store.__setitem__(key, v), + kind=kind, + ) diff --git a/opto/trace/io/eval_hooks.py b/opto/trace/io/eval_hooks.py new file mode 100644 index 00000000..8c6b3641 --- /dev/null +++ b/opto/trace/io/eval_hooks.py @@ -0,0 +1,314 @@ +from __future__ import annotations + +import json +from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple + +EvalFn = Callable[ + [str, float, Dict[str, float], str, Dict[str, Any], Dict[str, Any]], + Tuple[float, Dict[str, float], str], +] + + +def default_feedback(score: float, metrics: Dict[str, float], reasons: str) -> str: + return json.dumps({"score": score, "metrics": metrics, "reasons": reasons}) + + +def _clip01(x: float) -> float: + if x < 0.0: + return 0.0 + if x > 1.0: + return 1.0 + return x + + +def _ratio_closeness(r: float) -> float: + """ + Convert ratio-to-target (ideal=1.0) into a [0,1] closeness score. + """ + try: + r = float(r) + except Exception: + return 0.0 + return _clip01(1.0 - abs(1.0 - r)) + + +def _dea_overall_from_scores(dea_scores: Mapping[str, Any]) -> Optional[float]: + """ + Robust aggregate over DEA signals: + - ratios -> closeness + - similarities/coverage assumed in [0,1] + - ignore out-of-range values + """ + if not dea_scores: + return None + + ratio_keys = { + "sections_count_ratio_to_target", + "content_length_ratio_to_target", + "resources_count_ratio_to_target", + } + + vals: List[float] = [] + for k, v in dea_scores.items(): + try: + fv = float(v) + except Exception: + continue + + if k in ratio_keys: + vals.append(_ratio_closeness(fv)) + else: + if 0.0 <= fv <= 1.0: + vals.append(_clip01(fv)) + + if not vals: + return None + return sum(vals) / len(vals) + + +def _try_import_evaluate_document(): + """ + Best-effort import of doc_eval.evaluate_document. + We keep this robust because users might have different top-level package names. + """ + candidates = [ + "document_embedding_analysis.common.doc_eval", + "document_analysis_embedding.common.doc_eval", + "common.doc_eval", # allows running inside the external repo directly + ] + for mod in candidates: + try: + m = __import__(mod, fromlist=["evaluate_document"]) + fn = getattr(m, "evaluate_document", None) + if fn is not None: + return fn, m + except Exception: + continue + return None, None + + +def _synthesize_hybrid_feedback( + llm: Any, + answer: str, + original_reasons: str, + dea_scores: Dict[str, Any], +) -> str: + """ + Use the LLM to synthesize a new feedback string combining the original reasons + and the objective DEA scores. + """ + if not llm: + return original_reasons + + # Format DEA scores for the prompt + dea_summary = [] + for k, v in dea_scores.items(): + if isinstance(v, (int, float)): + dea_summary.append(f"{k}: {v:.3f}") + else: + dea_summary.append(f"{k}: {v}") + dea_text = ", ".join(dea_summary) + + prompt = f""" +You are an expert evaluator. +You have evaluated a generated document and provided the following initial feedback: +"{original_reasons}" + +Additionally, an automated Document Embedding Analysis (DEA) system has provided the following objective metrics: +{dea_text} + +Please synthesize a new, comprehensive feedback explanation that incorporates both your initial qualitative assessment and these quantitative DEA metrics. +Focus on explaining *why* the score is what it is, citing specific metrics where relevant (e.g., "The content is semantically close on plan (0.85) but lacks specific entities..."). +Keep the feedback concise and constructive. +""".strip() + + try: + # Assume LangChain-like interface + from langchain_core.messages import HumanMessage + if hasattr(llm, "invoke"): + response = llm.invoke([HumanMessage(content=prompt)]) + return str(response.content) + except Exception: + pass + + try: + # Assume Opto/AutoGen interface + # llm(messages=...) returns a response object with choices + response = llm(messages=[{"role": "user", "content": prompt}]) + + # Handle object access + if hasattr(response, "choices") and response.choices: + choice = response.choices[0] + if hasattr(choice, "message") and hasattr(choice.message, "content"): + return str(choice.message.content) + + # Handle dict access + if isinstance(response, dict) and "choices" in response and response["choices"]: + choice = response["choices"][0] + if "message" in choice and "content" in choice["message"]: + return str(choice["message"]["content"]) + + except Exception: + pass + + return original_reasons + + +def make_document_embedding_analysis_eval( + mode: str = "dea", + *, + llm: Optional[Any] = None, + weight_llm: float = 0.5, + weight_dea: float = 0.5, + doc_eval_kwargs: Optional[Dict[str, Any]] = None, + dea_score_key: Optional[str] = None, +) -> EvalFn: + """ + Build an EvalFn backed by document_embedding_analysis.common.doc_eval.evaluate_document. + + eval_data expected keys: + - solution: dict (required for DEA) + - turns: list (optional) + - content_type: "markdown"|"latex" (optional, default "markdown") + - doc_eval_kwargs: dict (optional overrides per-example) + """ + mode = (mode or "").lower().strip() + + # Default: disable enhanced metrics (Prometheus, WriteHere) unless explicitly enabled + base_kwargs = {"use_enhanced_metrics": False} + if doc_eval_kwargs: + base_kwargs.update(doc_eval_kwargs) + + def _eval( + answer: str, + llm_score: float, + llm_metrics: Dict[str, float], + reasons: str, + otlp: Dict[str, Any], + eval_data: Dict[str, Any], + ) -> Tuple[float, Dict[str, float], str]: + evaluate_document, _mod = _try_import_evaluate_document() + if evaluate_document is None: + return llm_score, dict(llm_metrics), default_feedback(llm_score, dict(llm_metrics), reasons) + + solution = eval_data.get("solution") + if solution is None: + return llm_score, dict(llm_metrics), default_feedback(llm_score, dict(llm_metrics), reasons) + + turns = eval_data.get("turns") or [] + content_type = eval_data.get("content_type") or "markdown" + + kwargs = dict(base_kwargs) + if isinstance(eval_data.get("doc_eval_kwargs"), dict): + kwargs.update(eval_data["doc_eval_kwargs"]) + + try: + result = evaluate_document( + answer, + turns=turns, + solution=solution, + content_type=content_type, + **kwargs, + ) + except Exception as e: + metrics = dict(llm_metrics) + metrics["dea.error"] = 1.0 + feedback = json.dumps( + { + "score": llm_score, + "reasons": reasons, + "metrics": metrics, + "dea_exception": repr(e), + } + ) + return llm_score, metrics, feedback + + if not isinstance(result, dict): + return llm_score, dict(llm_metrics), default_feedback(llm_score, dict(llm_metrics), reasons) + + dea_scores = result.get("dea_evaluation_scores") or {} + article_metrics = result.get("article_metrics") or {} + prometheus_scores = result.get("prometheus_scores") or {} + writehere_scores = result.get("writehere_scores") or {} + + # Keep backward compatibility: base metrics are the LLM-as-judge ones. + metrics: Dict[str, float] = dict(llm_metrics) + + # DEA metrics + if isinstance(dea_scores, Mapping): + for k, v in dea_scores.items(): + try: + metrics[f"dea.{k}"] = float(v) + except Exception: + continue + + # Article metrics (ROUGE f scores + entity recall) + if isinstance(article_metrics, Mapping): + rouge_scores = article_metrics.get("rouge_scores") or {} + if isinstance(rouge_scores, Mapping): + for name, vals in rouge_scores.items(): + if not isinstance(vals, Mapping): + continue + if "f" in vals: + try: + metrics[f"{name}_f"] = float(vals["f"]) + except Exception: + pass + if "entity_recall" in article_metrics: + try: + metrics["entity_recall"] = float(article_metrics["entity_recall"]) + except Exception: + pass + + # Enhanced metrics if enabled + if isinstance(prometheus_scores, Mapping): + for k, v in prometheus_scores.items(): + if isinstance(v, (int, float)): + metrics[f"prometheus.{k}"] = float(v) + if isinstance(writehere_scores, Mapping): + for k, v in writehere_scores.items(): + if isinstance(v, (int, float)): + metrics[f"writehere.{k}"] = float(v) + + dea_scalar: Optional[float] = None + if dea_score_key and isinstance(dea_scores, Mapping) and dea_score_key in dea_scores: + try: + dea_scalar = float(dea_scores[dea_score_key]) + except Exception: + dea_scalar = None + if dea_scalar is None and isinstance(dea_scores, Mapping): + dea_scalar = _dea_overall_from_scores(dea_scores) + if dea_scalar is None: + dea_scalar = llm_score + + final_reasons = reasons + if mode == "dea": + score = float(dea_scalar) + elif mode == "hybrid": + # Hybrid mode: Use DEA score for optimization, but enrich feedback with LLM synthesis + # The user requested "measure should be all a DEA measure" for the benchmark. + # So we return DEA score as the primary score. + score = float(dea_scalar) + if llm: + final_reasons = _synthesize_hybrid_feedback(llm, answer, reasons, dea_scores) + elif mode == "llm": + # LLM mode: Use LLM score for optimization, but include DEA metrics in the payload + # for benchmarking purposes. + score = llm_score + else: # unknown + score = llm_score + + feedback_payload: Dict[str, Any] = { + "score": score, + "reasons": final_reasons, + "metrics": metrics, + "dea_evaluation_scores": dea_scores, + "article_metrics": article_metrics, + "prometheus_scores": prometheus_scores, + "writehere_scores": writehere_scores, + # Explicitly store DEA score for benchmark extraction regardless of optimization score + "benchmark_dea_score": float(dea_scalar) + } + return score, metrics, json.dumps(feedback_payload) + + return _eval diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py new file mode 100644 index 00000000..80a049cd --- /dev/null +++ b/opto/trace/io/instrumentation.py @@ -0,0 +1,231 @@ +""" +opto.trace.io.instrumentation +============================== + +One-liner ``instrument_graph()`` to add OTEL instrumentation to any +LangGraph ``StateGraph`` / ``CompiledGraph``. +""" + +from __future__ import annotations + +import hashlib +import inspect +import logging +from contextlib import contextmanager +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, Iterator, Optional, Set + +from opto.trace.io.bindings import Binding, make_dict_binding +from opto.trace.io.langgraph_otel_runtime import TracingLLM +from opto.trace.io.telemetry_session import TelemetrySession + +logger = logging.getLogger(__name__) + + +@dataclass +class InstrumentedGraph: + """Instrumented LangGraph wrapper with telemetry. + + Attributes + ---------- + graph : CompiledGraph + The compiled LangGraph. + session : TelemetrySession + Manages OTEL tracing and export. + tracing_llm : TracingLLM + LLM wrapper with dual semantic conventions. + templates : dict + Current prompt templates (keyed by param name). + bindings : dict + Mapping from param key -> ``Binding`` (for ``apply_updates``). + service_name : str + OTEL service / scope name. + """ + + graph: Any # CompiledGraph + session: TelemetrySession + tracing_llm: TracingLLM + templates: Dict[str, str] = field(default_factory=dict) + bindings: Dict[str, Binding] = field(default_factory=dict) + service_name: str = "langgraph-agent" + input_key: str = "query" + output_key: Optional[str] = None + + # Holds the active root span context for eval_fn to attach reward spans + _root_span: Any = field(default=None, repr=False, init=False) + + @contextmanager + def _root_invocation_span(self, query_hint: str = ""): + """Context manager that creates a root invocation span (D9). + + All node spans created inside this context become children + of the root span, producing a **single trace ID** per invocation. + """ + span_name = f"{self.service_name}.invoke" + with self.session.activate(): + with self.session.tracer.start_as_current_span(span_name) as root_sp: + root_sp.set_attribute("langgraph.service", self.service_name) + if query_hint: + root_sp.set_attribute("langgraph.query", str(query_hint)[:200]) + self._root_span = root_sp + try: + yield root_sp + finally: + self._root_span = None + + def invoke(self, state: Any, **kwargs: Any) -> Dict[str, Any]: + """Execute graph under a root invocation span and capture telemetry. + + A root span wraps the entire graph invocation so that all node + spans share a single trace ID (D9). + """ + query_hint = "" + if isinstance(state, dict): + query_hint = str(state.get(self.input_key, "")) + + with self._root_invocation_span(query_hint) as root_sp: + result = self.graph.invoke(state, **kwargs) + # Attach a summary attribute to the root span (generic) + if isinstance(result, dict) and self.output_key and self.output_key in result: + root_sp.set_attribute( + "langgraph.output.preview", + str(result[self.output_key])[:500], + ) + return result + + def stream(self, state: Any, **kwargs: Any) -> Iterator[Dict[str, Any]]: + """Stream graph execution with telemetry.""" + query_hint = "" + if isinstance(state, dict): + query_hint = str(state.get(self.input_key, "")) + + with self._root_invocation_span(query_hint): + yield from self.graph.stream(state, **kwargs) + + +def instrument_graph( + graph: Any = None, + *, + session: Optional[TelemetrySession] = None, + service_name: str = "langgraph-agent", + trainable_keys: Optional[Set[str]] = None, + enable_code_optimization: bool = False, + llm: Optional[Any] = None, + emit_genai_child_spans: bool = True, + bindings: Optional[Dict[str, Binding]] = None, + in_place: bool = False, + initial_templates: Optional[Dict[str, str]] = None, + provider_name: str = "llm", + llm_span_name: str = "llm.chat.completion", + input_key: str = "query", + output_key: Optional[str] = None, +) -> InstrumentedGraph: + """Wrap a LangGraph with automatic OTEL instrumentation. + + Parameters + ---------- + graph : StateGraph | CompiledGraph, optional + The LangGraph to instrument. If it has a ``compile()`` method it + will be compiled automatically. + session : TelemetrySession, optional + Reuse an existing session; otherwise a new one is created. + service_name : str + OTEL service name for trace identification. + trainable_keys : set[str] or None + Node names whose prompts are trainable. ``None`` means **all + trainable** (no hard-coded node names). + enable_code_optimization : bool + If *True*, emit ``param.__code_*`` attributes. + llm : Any, optional + LLM client. Will be wrapped with ``TracingLLM``. + emit_genai_child_spans : bool + Emit ``gen_ai.*`` child spans for Agent Lightning compatibility. + bindings : dict, optional + Explicit ``{param_key: Binding}`` map. If *None*, auto-derived + from *initial_templates*. + in_place : bool + If *False* (default), avoid permanent mutation of the original + graph. + initial_templates : dict, optional + Starting prompt templates ``{param_name: template_str}``. + provider_name : str + LLM provider name for ``gen_ai.provider.name``. + llm_span_name : str + Name for child LLM spans. Defaults to ``"llm.chat.completion"``. + Override to match your provider (e.g. ``"openai.chat.completion"``). + input_key : str + Key in the input state dict used as a query hint for the root span. + Defaults to ``"query"``. Override to match your graph's state schema. + output_key : str, optional + Key in the result dict that holds the graph's final answer. + If *None*, no preview is attached to the root span. + + Returns + ------- + InstrumentedGraph + """ + # -- compile graph if needed -- + compiled = graph + if graph is not None and hasattr(graph, "compile"): + compiled = graph.compile() + + # -- session -- + if session is None: + session = TelemetrySession(service_name=service_name) + + # -- templates -- + templates = dict(initial_templates or {}) + + # -- bindings: auto-derive from templates dict when not provided -- + if bindings is None: + bindings = {} + for key in templates: + bindings[key] = make_dict_binding(templates, key, kind="prompt") + + # -- optional code parameter emission ----------------------------------- + emit_code_param = None + if enable_code_optimization: + CODE_ATTR_MAX_CHARS = 10_000 + + def _emit_code_param(span, code_key: str, code_fn: Any) -> None: + try: + src = inspect.getsource(code_fn) + except Exception: + src = repr(code_fn) + digest = hashlib.sha256( + src.encode("utf-8", errors="ignore") + ).hexdigest() + was_truncated = False + if len(src) > CODE_ATTR_MAX_CHARS: + src = src[:CODE_ATTR_MAX_CHARS] + "\n# ... (truncated)" + was_truncated = True + span.set_attribute(f"param.__code_{code_key}", src) + span.set_attribute(f"param.__code_{code_key}.sha256", digest) + span.set_attribute( + f"param.__code_{code_key}.truncated", str(was_truncated) + ) + span.set_attribute(f"param.__code_{code_key}.trainable", True) + + emit_code_param = _emit_code_param + + # -- TracingLLM -- + tracing_llm = TracingLLM( + llm=llm, + tracer=session.tracer, + trainable_keys=trainable_keys, + emit_code_param=emit_code_param, + provider_name=provider_name, + llm_span_name=llm_span_name, + emit_llm_child_span=emit_genai_child_spans, + ) + + return InstrumentedGraph( + graph=compiled, + session=session, + tracing_llm=tracing_llm, + templates=templates, + bindings=bindings, + service_name=service_name, + input_key=input_key, + output_key=output_key, + ) diff --git a/opto/trace/io/langgraph_otel_runtime.py b/opto/trace/io/langgraph_otel_runtime.py new file mode 100644 index 00000000..dc0addab --- /dev/null +++ b/opto/trace/io/langgraph_otel_runtime.py @@ -0,0 +1,451 @@ +from __future__ import annotations + +import logging +import time +from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple + +from opentelemetry import trace as oteltrace +from opentelemetry.sdk.trace import TracerProvider, ReadableSpan +from opentelemetry.sdk.trace.export import ( + SimpleSpanProcessor, + SpanExporter, + SpanExportResult, +) + +logger = logging.getLogger(__name__) + + +class LLMCallError(Exception): + """Raised when the underlying LLM provider returns a non-success response.""" + + def __init__(self, message: str, *, status_code: Optional[int] = None): + super().__init__(message) + self.status_code = status_code + + +class InMemorySpanExporter(SpanExporter): + """In-memory span exporter used by LangGraph + OTEL demos.""" + + def __init__(self) -> None: + self._finished_spans: List[ReadableSpan] = [] + + def export(self, spans: List[ReadableSpan]) -> SpanExportResult: + self._finished_spans.extend(spans) + return SpanExportResult.SUCCESS + + def shutdown(self) -> None: + self._finished_spans.clear() + + def get_finished_spans(self) -> List[ReadableSpan]: + return list(self._finished_spans) + + def clear(self) -> None: + self._finished_spans.clear() + + +def init_otel_runtime( + service_name: str = "trace-otel-runtime", +) -> Tuple[oteltrace.Tracer, InMemorySpanExporter]: + """ + Initialize a TracerProvider + in-memory exporter. + + Parameters + ---------- + service_name : str + OTEL service name. Override for your application. + + Returns + ------- + (tracer, exporter) + """ + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + + # Best effort: set as global provider if not already set; even if another + # provider is active, we still return a tracer bound to this provider so + # spans flow to the passed exporter. + try: + oteltrace.set_tracer_provider(provider) + except Exception: + pass + + tracer = provider.get_tracer(service_name) + return tracer, exporter + + +def flush_otlp( + exporter: InMemorySpanExporter, + scope_name: str = "demo", + *, + clear: bool = True, +) -> Dict[str, Any]: + """ + Convert exported spans into a minimal OTLP JSON payload. + + Parameters + ---------- + exporter : InMemorySpanExporter + The in-memory exporter holding collected spans. + scope_name : str + Scope name for the OTLP payload. + clear : bool + If *True* (default), clear the exporter after flushing. + If *False*, spans remain in the exporter (peek mode). + + This is compatible with trace/io/otel_adapter.py::otlp_traces_to_trace_json. + """ + + spans = exporter.get_finished_spans() + + def hex_id(x: int, n: int) -> str: + return f"{x:0{2*n}x}" + + otlp_spans: List[Dict[str, Any]] = [] + for s in spans: + attributes = getattr(s, "attributes", {}) or {} + attrs = [ + {"key": k, "value": {"stringValue": str(v)}} + for k, v in attributes.items() + ] + kind = getattr(s, "kind", 1) + if hasattr(kind, "value"): + kind = kind.value + + otlp_spans.append( + { + "traceId": hex_id(s.context.trace_id, 16), + "spanId": hex_id(s.context.span_id, 8), + "parentSpanId": hex_id(s.parent.span_id, 8) + if getattr(s, "parent", None) + else "", + "name": getattr(s, "name", ""), + "kind": { + 0: "UNSPECIFIED", + 1: "INTERNAL", + 2: "SERVER", + 3: "CLIENT", + 4: "PRODUCER", + 5: "CONSUMER", + }.get(kind, "INTERNAL"), + "startTimeUnixNano": int( + getattr(s, "start_time", None) or time.time_ns() + ), + "endTimeUnixNano": int( + getattr(s, "end_time", None) or time.time_ns() + ), + "attributes": attrs, + } + ) + + if clear: + exporter.clear() + + return { + "resourceSpans": [ + { + "resource": {"attributes": []}, + "scopeSpans": [ + { + "scope": {"name": scope_name}, + "spans": otlp_spans, + } + ], + } + ] + } + + +class TracingLLM: + """ + Design-3+ wrapper around an LLM client with dual semantic conventions. + + Responsibilities + ---------------- + * Create an OTEL **parent** span per LLM node (``span_name``) carrying + ``param.*`` and ``inputs.*`` attributes (Trace-compatible). + * Optionally create a **child** span with ``gen_ai.*`` attributes + (Agent Lightning-compatible) marked with ``trace.temporal_ignore`` + so it does not break TGJ temporal chaining. + * Emit trainable code parameters via ``emit_code_param`` when provided. + * **Raise ``LLMCallError``** if the provider returns an error instead of + silently converting it to assistant content (A1). + + Parameters + ---------- + llm : Any + Underlying LLM client (OpenAI-compatible interface). + tracer : oteltrace.Tracer + OTEL tracer for span creation. + trainable_keys : Iterable[str] or None + Keys whose prompts are trainable. ``None`` means **all trainable**. + Empty string ``""`` in the set also matches all. + emit_code_param : callable, optional + ``(span, key, fn) -> None``. + provider_name : str + Provider name for ``gen_ai.provider.name`` attribute. + Should match the actual provider (e.g. ``"openai"``, + ``"openrouter"``, ``"anthropic"``). + llm_span_name : str + Name for child LLM spans. Defaults to the generic + ``"llm.chat.completion"``. Override to match your + provider convention (e.g. ``"openai.chat.completion"``). + emit_llm_child_span : bool + If *True*, emit Agent Lightning-compatible child spans. + """ + + def __init__( + self, + llm: Any, + tracer: oteltrace.Tracer, + *, + trainable_keys: Optional[Iterable[str]] = None, + emit_code_param: Optional[Any] = None, + # -- dual semconv additions -- + provider_name: str = "llm", + llm_span_name: str = "llm.chat.completion", + emit_llm_child_span: bool = True, + ) -> None: + self.llm = llm + self.tracer = tracer + # None -> all trainable; explicit set otherwise + self._trainable_keys_all = trainable_keys is None + self.trainable_keys = set(trainable_keys) if trainable_keys is not None else set() + self.emit_code_param = emit_code_param + # Infer provider from model string if not explicitly provided + if provider_name == "llm": + model_str = str(getattr(llm, "model", "") or "") + if "/" in model_str: + provider_name = model_str.split("/", 1)[0] + self.provider_name = provider_name + self.llm_span_name = llm_span_name + self.emit_llm_child_span = emit_llm_child_span + + # ---- helpers --------------------------------------------------------- + + def _is_trainable(self, optimizable_key: Optional[str]) -> bool: + if optimizable_key is None: + return False + if self._trainable_keys_all: + return True + if "" in self.trainable_keys: + return True + return optimizable_key in self.trainable_keys + + def _record_llm_call( + self, + sp, + *, + template_name: Optional[str], + template: Optional[str], + optimizable_key: Optional[str], + code_key: Optional[str], + code_fn: Any, + user_query: Optional[str], + prompt: str, + extra_inputs: Optional[Dict[str, str]] = None, + ) -> None: + if template_name and template is not None: + sp.set_attribute(f"param.{template_name}", template) + sp.set_attribute( + f"param.{template_name}.trainable", + self._is_trainable(optimizable_key), + ) + if code_key and code_fn is not None and self.emit_code_param: + self.emit_code_param(sp, code_key, code_fn) + + sp.set_attribute("gen_ai.model", getattr(self.llm, "model", "llm")) + sp.set_attribute("inputs.gen_ai.prompt", prompt) + if user_query is not None: + sp.set_attribute("inputs.user_query", user_query) + for k, v in (extra_inputs or {}).items(): + sp.set_attribute(f"inputs.{k}", v) + + @staticmethod + def _validate_content(content: Optional[str]) -> str: + """Validate LLM response content. Raise on empty or error markers.""" + if content is None: + raise LLMCallError("LLM returned None content") + if not content.strip(): + raise LLMCallError("LLM returned empty content") + # Detect error strings that were smuggled as content (A1) + if content.strip().startswith("[ERROR]"): + raise LLMCallError( + f"LLM provider returned an error: {content.strip()}" + ) + return content + + # ---- public API ------------------------------------------------------ + + def node_call( + self, + *, + span_name: str, + template_name: Optional[str] = None, + template: Optional[str] = None, + optimizable_key: Optional[str] = None, + code_key: Optional[str] = None, + code_fn: Any = None, + user_query: Optional[str] = None, + extra_inputs: Optional[Dict[str, str]] = None, + messages: Optional[List[Dict[str, Any]]] = None, + **llm_kwargs: Any, + ) -> str: + """ + Invoke the wrapped LLM under an OTEL span. + + Creates a **parent** span with ``param.*`` / ``inputs.*`` (Trace- + compatible) and optionally a **child** span with ``gen_ai.*`` + attributes (Agent Lightning-compatible). The child span is tagged + ``trace.temporal_ignore=true`` so it does not break TGJ chaining. + + Raises + ------ + LLMCallError + If the provider call fails or returns empty/error content. + """ + with self.tracer.start_as_current_span(span_name) as sp: + prompt = "" + if messages: + user_msgs = [m for m in messages if m.get("role") == "user"] + if user_msgs: + prompt = user_msgs[-1].get("content", "") or "" + else: + prompt = messages[-1].get("content", "") or "" + + self._record_llm_call( + sp, + template_name=template_name, + template=template, + optimizable_key=optimizable_key, + code_key=code_key, + code_fn=code_fn, + user_query=user_query, + prompt=prompt, + extra_inputs=extra_inputs or {}, + ) + + # -- invoke LLM, optionally under a child span -- + try: + if self.emit_llm_child_span: + with self.tracer.start_as_current_span(self.llm_span_name) as llm_sp: + llm_sp.set_attribute("trace.temporal_ignore", "true") + llm_sp.set_attribute("gen_ai.operation.name", "chat") + llm_sp.set_attribute("gen_ai.provider.name", self.provider_name) + llm_sp.set_attribute( + "gen_ai.request.model", + getattr(self.llm, "model", "llm"), + ) + + try: + resp = self.llm(messages=messages, **llm_kwargs) + content = resp.choices[0].message.content + content = self._validate_content(content) + except LLMCallError as e: + llm_sp.set_attribute("error", "true") + llm_sp.set_attribute("error.type", "LLMCallError") + llm_sp.set_attribute("error.message", str(e)[:500]) + raise + except Exception as exc: + llm_sp.set_attribute("error", "true") + llm_sp.set_attribute("error.type", type(exc).__name__) + llm_sp.set_attribute("error.message", str(exc)[:500]) + raise + + llm_sp.set_attribute( + "gen_ai.output.preview", (content or "")[:500] + ) + else: + resp = self.llm(messages=messages, **llm_kwargs) + content = resp.choices[0].message.content + content = self._validate_content(content) + except LLMCallError as e: + sp.set_attribute("error", "true") + sp.set_attribute("error.type", "LLMCallError") + sp.set_attribute("error.message", str(e)[:500]) + raise + except Exception as exc: + err_type = type(exc).__name__ + sp.set_attribute("error", "true") + sp.set_attribute("error.type", err_type) + sp.set_attribute("error.message", str(exc)[:500]) + raise LLMCallError( + f"LLM provider call failed: {exc}" + ) from exc + + return content + + +DEFAULT_EVAL_METRIC_KEYS: Mapping[str, str] = { + "score": "eval.score", + "answer_relevance": "eval.answer_relevance", + "groundedness": "eval.groundedness", +} + + +def _attrs_to_dict(attrs: List[Dict[str, Any]]) -> Dict[str, str]: + out: Dict[str, str] = {} + for a in attrs or []: + key = a.get("key") + val = a.get("value", {}) + if key is None: + continue + if isinstance(val, dict) and "stringValue" in val: + out[key] = val["stringValue"] + else: + out[key] = str(val) + return out + + +def extract_eval_metrics_from_otlp( + otlp: Dict[str, Any], + *, + evaluator_span_name: str = "evaluator", + score_key: str = "eval.score", + metric_keys: Optional[Mapping[str, str]] = None, + default_score: float = 0.5, + default_metric: float = 0.5, +) -> Tuple[float, Dict[str, float], str]: + """ + Extract evaluation score + metrics + reasons from an OTLP payload. + """ + metric_keys = metric_keys or DEFAULT_EVAL_METRIC_KEYS + metrics: Dict[str, float] = {} + reasons = "" + score = default_score + + found = False + for rs in otlp.get("resourceSpans", []): + for ss in rs.get("scopeSpans", []): + for sp in ss.get("spans", []): + if sp.get("name") != evaluator_span_name: + continue + attrs = _attrs_to_dict(sp.get("attributes", [])) + raw_score = attrs.get(score_key) + if raw_score is not None: + try: + score = float(raw_score) + except ValueError: + score = default_score + reasons = attrs.get("eval.reasons", "") or "" + + for friendly, attr_key in metric_keys.items(): + raw = attrs.get(attr_key) + if raw is None: + continue + try: + metrics[friendly] = float(raw) + except ValueError: + metrics[friendly] = default_metric + + found = True + break + if found: + break + if found: + break + + if not metrics and metric_keys: + metrics = {k: default_metric for k in metric_keys.keys()} + + return score, metrics, reasons diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py new file mode 100644 index 00000000..0d8fc774 --- /dev/null +++ b/opto/trace/io/optimization.py @@ -0,0 +1,588 @@ +""" +opto.trace.io.optimization +=========================== + +One-liner ``optimize_graph()`` for running end-to-end optimization on an +instrumented LangGraph: + + instrument → invoke → flush OTLP → TGJ → ingest → optimizer → apply_updates + +This module also defines ``EvalResult``, ``EvalFn``, ``RunResult``, and +``OptimizationResult`` as the public data contracts. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Union, +) + +from opto.trace.io.bindings import Binding, apply_updates +from opto.trace.io.instrumentation import InstrumentedGraph + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Evaluation contract +# --------------------------------------------------------------------------- + + +@dataclass +class EvalResult: + """Normalised output of an evaluation function. + + Attributes + ---------- + score : float or None + Numeric reward (some evaluators return only text feedback). + feedback : str + Textual feedback (Trace / TextGrad-compatible). + metrics : dict + Free-form metrics for logging / diagnostics. + """ + + score: Optional[float] = None + feedback: str = "" + metrics: Dict[str, Any] = field(default_factory=dict) + + +# eval_fn may return float | str | dict | EvalResult +EvalFn = Callable[[Dict[str, Any]], Union[float, str, Dict[str, Any], EvalResult]] + + +def _normalise_eval(raw: Any) -> EvalResult: + """Normalise any ``eval_fn`` return value into ``EvalResult``.""" + if isinstance(raw, EvalResult): + return raw + if isinstance(raw, (int, float)): + return EvalResult(score=float(raw)) + if isinstance(raw, str): + # Attempt JSON parse + try: + d = json.loads(raw) + if isinstance(d, dict): + return EvalResult( + score=d.get("score"), + feedback=str(d.get("feedback", d.get("reasons", ""))), + metrics=d, + ) + except (json.JSONDecodeError, TypeError): + pass + return EvalResult(feedback=raw) + if isinstance(raw, dict): + return EvalResult( + score=raw.get("score"), + feedback=str(raw.get("feedback", raw.get("reasons", ""))), + metrics=raw, + ) + return EvalResult(feedback=str(raw)) + + +# --------------------------------------------------------------------------- +# Run / Optimization results +# --------------------------------------------------------------------------- + + +@dataclass +class RunResult: + """Result of a single graph execution.""" + + answer: Any + score: Optional[float] + feedback: str + metrics: Dict[str, Any] + otlp: Dict[str, Any] + + +@dataclass +class OptimizationResult: + """Result of ``optimize_graph()``. + + Attributes + ---------- + baseline_score : float + Average score of the baseline (iteration 0) run. + best_score : float + Highest average score across all iterations. + best_iteration : int + Iteration index that produced ``best_score``. + best_parameters : dict + Snapshot of all parameter values at ``best_iteration`` (E11). + best_updates : dict + The updates dict that was applied to reach ``best_parameters``. + final_parameters : dict + Parameter values after the last iteration. + score_history : list[float] + Average scores per iteration. + all_runs : list[list[RunResult]] + All run results grouped by iteration. + """ + + baseline_score: float + best_score: float + best_iteration: int + best_parameters: Dict[str, Any] + best_updates: Dict[str, Any] + final_parameters: Dict[str, Any] + score_history: List[float] + all_runs: List[List[RunResult]] + + +# --------------------------------------------------------------------------- +# Default eval_fn (LLM-as-judge via evaluator span) +# --------------------------------------------------------------------------- + + +def _default_eval_fn(payload: Dict[str, Any]) -> EvalResult: + """Extract evaluation from the OTLP trace's evaluator span, if present.""" + from opto.trace.io.langgraph_otel_runtime import extract_eval_metrics_from_otlp + + otlp = payload.get("otlp", {}) + score, metrics, reasons = extract_eval_metrics_from_otlp(otlp) + return EvalResult(score=score, feedback=reasons, metrics=metrics) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _snapshot_parameters(bindings: Dict[str, Binding]) -> Dict[str, Any]: + """Take a snapshot of all current parameter values.""" + snap: Dict[str, Any] = {} + for key, binding in bindings.items(): + try: + snap[key] = binding.get() + except Exception: + snap[key] = "" + return snap + + +def _deduplicate_param_nodes(param_nodes: list) -> list: + """Deduplicate trainable ParameterNodes by base name (C7). + + When the same prompt key appears in multiple TGJ docs (e.g. from + multiple queries in the same iteration), the optimizer should see + each unique trainable parameter only once. + + Uses the ``name`` attribute (before scope-suffix) as the dedup key, + falling back to ``py_name`` stripped of trailing digits. + """ + import re + + seen: Dict[str, Any] = {} + for n in param_nodes: + # Prefer the raw name attribute (e.g. "planner_prompt") which + # doesn't have the scope suffix. Fall back to py_name with + # trailing digits stripped (e.g. "planner_prompt0" → "planner_prompt"). + raw_name = getattr(n, "_name", None) or getattr(n, "name", None) + if raw_name is None: + raw_name = getattr(n, "py_name", None) or str(id(n)) + # Strip trailing digits added by scope management + key = re.sub(r"\d+$", "", str(raw_name)) + if key not in seen: + seen[key] = n + return list(seen.values()) + + +def _select_output_node(nodes: dict) -> Any: + """Select the sink (final top-level) MessageNode (C8). + + Excludes child spans — identified by the ``trace.temporal_ignore`` + attribute set during instrumentation — and picks the *last* + top-level MessageNode. + + This is provider-agnostic: it does not assume any specific LLM + provider naming convention. + """ + from opto.trace.nodes import MessageNode as _MN + + # Collect all MessageNodes + msg_nodes = [n for n in nodes.values() if isinstance(n, _MN)] + if not msg_nodes: + return None + + # Filter out child spans using the trace.temporal_ignore marker + # that was set during instrumentation (see TracingLLM.node_call). + # Fall back to name-based heuristic only as a safety net. + top_level = [] + for n in msg_nodes: + info = getattr(n, "info", None) or {} + otel_info = info.get("otel", {}) if isinstance(info, dict) else {} + + # Primary gate: trace.temporal_ignore attribute + if str(otel_info.get("temporal_ignore", "false")).lower() in ("true", "1", "yes"): + continue + + # Secondary check: the node's description/data may carry the flag + desc = getattr(n, "description", None) or "" + if isinstance(desc, dict): + if str(desc.get("trace.temporal_ignore", "false")).lower() in ("true", "1", "yes"): + continue + + top_level.append(n) + + if not top_level: + # Fall back to all msg nodes if filtering was too aggressive + top_level = msg_nodes + + # Return the last top-level node (the sink / final node) + return top_level[-1] + + +# --------------------------------------------------------------------------- +# optimize_graph +# --------------------------------------------------------------------------- + + +def optimize_graph( + graph: InstrumentedGraph, + queries: Union[List[str], List[Dict[str, Any]]], + *, + iterations: int = 5, + optimizer: Optional[Any] = None, + optimizer_kwargs: Optional[Dict[str, Any]] = None, + eval_fn: Optional[EvalFn] = None, + initial_templates: Optional[Dict[str, str]] = None, + bindings: Optional[Dict[str, Binding]] = None, + apply_updates_flag: bool = True, + include_log_doc: bool = False, + output_key: Optional[str] = None, + on_iteration: Optional[ + Callable[[int, List[RunResult], Dict[str, Any]], None] + ] = None, +) -> OptimizationResult: + """Run a complete optimization loop on an instrumented LangGraph. + + Flow per iteration + ------------------ + 1. Invoke graph for each query and capture OTLP traces. + 2. Evaluate each run via ``eval_fn`` (→ ``EvalResult``). + 3. Convert OTLP → TGJ → Trace nodes via ``ingest_tgj``. + 4. Propagate feedback through the Trace graph. + 5. Ask the optimizer for parameter updates. + 6. Apply updates via ``apply_updates(updates, bindings)``. + + Parameters + ---------- + graph : InstrumentedGraph + The instrumented graph (from ``instrument_graph``). + queries : list + Test queries (strings) or full state dicts. + iterations : int + Number of optimisation iterations (after baseline). + optimizer : OptoPrimeV2, optional + Pre-configured optimizer. Created automatically if absent. + optimizer_kwargs : dict, optional + Arguments passed to optimizer creation. + eval_fn : EvalFn, optional + Custom evaluation function. Falls back to evaluator-span extraction. + initial_templates : dict, optional + Overrides for initial prompt templates. + bindings : dict, optional + Overrides for graph.bindings. + apply_updates_flag : bool + If *True* (default), apply parameter updates each iteration. + include_log_doc : bool + If *True*, emit additional ``log_doc`` TGJ artefacts. + output_key : str, optional + Key in the result dict that holds the graph's final answer. + Used for error fallback and eval payload. If *None*, + ``optimize_graph`` passes the full result dict to eval. + on_iteration : callable, optional + ``(iter_num, runs, updates_dict) -> None`` progress callback. + + Returns + ------- + OptimizationResult + """ + # Resolve bindings / templates + effective_bindings = bindings or graph.bindings + if initial_templates: + graph.templates.update(initial_templates) + + eval_fn = eval_fn or _default_eval_fn + + graph.session.flush_otlp(clear=True) + + # If not provided, fall back to the graph's configured output_key. + # If both are provided and disagree, prefer the explicit argument. + graph_output_key = getattr(graph, "output_key", None) + if output_key is None: + output_key = graph_output_key + elif graph_output_key and output_key != graph_output_key: + logger.debug( + "optimize_graph: output_key=%r overrides graph.output_key=%r", + output_key, + graph_output_key, + ) + + score_history: List[float] = [] + all_runs: List[List[RunResult]] = [] + best_score = float("-inf") + best_iteration = 0 + best_updates: Dict[str, Any] = {} + best_parameters: Dict[str, Any] = _snapshot_parameters(effective_bindings) + last_applied_updates: Dict[str, Any] = {} + + param_cache: Dict[str, Any] = {} + + # -- lazy imports for Trace framework -- + _ingest_tgj = None + _GraphPropagator = None + _batchify = None + _optimizer = optimizer + + def _ensure_trace_imports(): + nonlocal _ingest_tgj, _GraphPropagator, _batchify + if _ingest_tgj is None: + from opto.trace.io.tgj_ingest import ingest_tgj as _fn + _ingest_tgj = _fn + if _GraphPropagator is None: + try: + from opto.trace.propagators.graph_propagator import GraphPropagator + _GraphPropagator = GraphPropagator + except ImportError: + _GraphPropagator = None + if _batchify is None: + from opto.trainer.algorithms.basic_algorithms import batchify + _batchify = batchify + + def _ensure_optimizer(param_nodes): + nonlocal _optimizer + if _optimizer is not None: + return + try: + from opto.optimizers.optoprime_v2 import OptoPrimeV2 + kw = dict(optimizer_kwargs or {}) + _optimizer = OptoPrimeV2(param_nodes, **kw) + except ImportError: + logger.warning( + "Could not import OptoPrime; running in eval-only mode " + "(no parameter updates)." + ) + + _input_key = getattr(graph, "input_key", "query") or "query" + + def _make_state(query: Any) -> Dict[str, Any]: + if isinstance(query, dict): + return query + return {_input_key: query} + + # ---- iteration loop --------------------------------------------------- + + total_iters = iterations + 1 # baseline + N iterations + + for iteration in range(total_iters): + is_baseline = iteration == 0 + # Snapshot which updates were applied to produce this iteration's params + applied_updates_for_this_iter = dict(last_applied_updates) + label = "baseline" if is_baseline else f"iteration {iteration}" + logger.info("optimize_graph: running %s ...", label) + print(f" {'Running baseline' if is_baseline else f'Iteration {iteration}/{iterations}'}...") + + runs: List[RunResult] = [] + for qi, query in enumerate(queries): + state = _make_state(query) + + # E12: Manually control root span lifecycle so we can attach + # eval attributes *before* the span closes and gets exported. + query_hint = str(query)[:200] if not isinstance(query, dict) else str(query)[:200] + invocation_failed = False + result = None + er = None + + with graph._root_invocation_span(query_hint) as root_sp: + try: + # Invoke the underlying compiled graph (not graph.invoke + # which would create a redundant root span). + result = graph.graph.invoke(state) + except Exception as exc: + logger.warning("Graph invocation failed: %s", exc) + result = {"_error": str(exc)} + invocation_failed = True + root_sp.set_attribute("error", "true") + root_sp.set_attribute("error.message", str(exc)[:500]) + + # E12: Peek at OTLP (child spans are finished and collected, + # but root span is still open → not yet in exporter). + otlp_peek = graph.session.flush_otlp(clear=False) + + # Extract the output value (generic — no hardcoded key) + if output_key and isinstance(result, dict): + answer = result.get(output_key, result) + else: + answer = result + + # A4: If invocation failed, force score=0 + if invocation_failed: + er = EvalResult( + score=0.0, + feedback=f"Invocation failed: {result.get('_error', 'unknown')}", + ) + else: + eval_payload = { + "query": query, + "answer": answer, + "result": result, + "otlp": otlp_peek, + "iteration": iteration, + } + er = _normalise_eval(eval_fn(eval_payload)) + + # E12: Attach eval score on the root span (still open) + if er.score is not None: + root_sp.set_attribute("eval.score", str(er.score)) + if er.feedback: + root_sp.set_attribute( + "eval.feedback", str(er.feedback)[:500] + ) + # Root span closes here → exported to the in-memory exporter + + # Now flush OTLP with clear=True — includes root span + eval attrs + otlp = graph.session.flush_otlp(clear=True) + + runs.append( + RunResult( + answer=answer, + score=er.score, + feedback=er.feedback, + metrics=er.metrics, + otlp=otlp, + ) + ) + + q_display = str(query)[:40] if not isinstance(query, dict) else str(query)[:40] + print( + f" Query {qi + 1}/{len(queries)}: {q_display}... " + f"score={er.score if er.score is not None else 'N/A'}" + ) + + # Compute average score + scored_runs = [r for r in runs if r.score is not None] + if scored_runs: + avg_score = sum(r.score for r in scored_runs) / len(scored_runs) + else: + avg_score = 0.0 + + score_history.append(avg_score) + all_runs.append(runs) + + # E11: Track best parameters snapshot + if avg_score > best_score: + best_score = avg_score + best_iteration = iteration + best_parameters = _snapshot_parameters(effective_bindings) + best_updates = dict(applied_updates_for_this_iter) + marker = " * NEW BEST" if not is_baseline else "" + else: + marker = "" + print(f" {'Baseline' if is_baseline else f'Iteration {iteration}'} average: {avg_score:.4f}{marker}") + + # -- optimization step (skip for baseline) -- + if not is_baseline and effective_bindings: + _ensure_trace_imports() + + # Convert OTLP → TGJ → Trace nodes + updates: Dict[str, Any] = {} + try: + # C7: Collect and deduplicate param nodes across all runs + all_param_nodes: list = [] + all_output_nodes: list = [] + + for run in runs: + tgj_docs = graph.session._flush_tgj_from_otlp(run.otlp) + if not tgj_docs: + from opto.trace.io.otel_adapter import otlp_traces_to_trace_json + tgj_docs = otlp_traces_to_trace_json( + run.otlp, + agent_id_hint=graph.session.service_name, + use_temporal_hierarchy=True, + ) + + for doc in tgj_docs: + nodes = _ingest_tgj(doc, param_cache=param_cache) + + from opto.trace.nodes import ParameterNode as _PN + param_nodes = [ + n for n in nodes.values() + if isinstance(n, _PN) and n.trainable + ] + all_param_nodes.extend(param_nodes) + + # C8: Select output node properly + output_node = _select_output_node(nodes) + if output_node is not None: + all_output_nodes.append((output_node, run)) + + # C7: Deduplicate before passing to optimizer + unique_params = _deduplicate_param_nodes(all_param_nodes) + + if not unique_params: + logger.info("No trainable ParameterNodes found; skipping optimizer step.") + else: + _ensure_optimizer(unique_params) + + if _optimizer is not None and all_output_nodes: + targets = [node for node, _ in all_output_nodes] + feedbacks = [] + for _node, _run in all_output_nodes: + if _run.score is not None: + feedbacks.append(f"Score: {_run.score:.4f}") + else: + feedbacks.append("No score") + + target = _batchify(*targets) + feedback = _batchify(*feedbacks).data + + try: + _optimizer.zero_feedback() + _optimizer.backward(target, feedback) + raw_updates = _optimizer.step() + + if isinstance(raw_updates, dict): + updates.update(raw_updates) + except Exception as exc: + logger.warning( + "Optimizer step failed: %s", exc, exc_info=True + ) + + except Exception as exc: + logger.warning( + "TGJ conversion / optimization failed: %s", exc, exc_info=True + ) + + # Apply updates + if updates and apply_updates_flag: + try: + applied = apply_updates(updates, effective_bindings, strict=False) + last_applied_updates = dict(applied) + logger.info("Applied updates: %s", sorted(applied.keys())) + except Exception as exc: + logger.warning("apply_updates failed: %s", exc, exc_info=True) + + if on_iteration: + on_iteration(iteration, runs, updates) + + # -- build final parameters snapshot -- + final_params = _snapshot_parameters(effective_bindings) + + return OptimizationResult( + baseline_score=score_history[0] if score_history else 0.0, + best_score=best_score, + best_iteration=best_iteration, + best_parameters=best_parameters, + best_updates=best_updates, + final_parameters=final_params, + score_history=score_history, + all_runs=all_runs, + ) diff --git a/opto/trace/io/otel_adapter.py b/opto/trace/io/otel_adapter.py new file mode 100644 index 00000000..b469a85f --- /dev/null +++ b/opto/trace/io/otel_adapter.py @@ -0,0 +1,223 @@ +from __future__ import annotations +from typing import Dict, Any, List + + +PROFILE_VERSION = "trace-json/1.0+otel" + + +def _sanitize(name: str) -> str: + return (name or "node").replace(":", "_") + + +def _op(attrs, span): + if "gen_ai.operation" in attrs or "gen_ai.model" in attrs: + return "llm_call" + if "rpc.system" in attrs: + return f"rpc:{attrs['rpc.system']}" + if "http.method" in attrs: + return f"http:{attrs['http.method']}".lower() + if "db.system" in attrs: + return f"db:{attrs['db.system']}" + return (span.get("kind", "op") or "op").lower() + + +def _attrs(l): + out = {} + for a in l or []: + k = a["key"] + v = a.get("value", {}) + if isinstance(v, dict) and v: + out[k] = next(iter(v.values())) + return out + + +def _lift_inputs(attrs: Dict[str, Any]) -> Dict[str, str]: + inputs = {} + for k, v in list(attrs.items()): + if k.startswith("inputs.") and isinstance(v, str): + role = k.split(".", 1)[1] + if v.startswith("span:"): + inputs[role] = v.split(":", 1)[1] + else: + inputs[role] = v + for k in ("gen_ai.prompt", "gen_ai.system", "gen_ai.temperature", "db.statement", "http.url"): + if k in attrs and f"inputs.{k}" not in attrs: + inputs[k] = f"lit:{k}" + return inputs + + +def _params(attrs: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + out = {} + for k, v in attrs.items(): + if k.startswith("param.") and not k.endswith(".trainable"): + name = k.split(".", 1)[1] + out[name] = { + "value": v, + "trainable": str(raw).strip().lower() in ("1", "true", "yes", "y", "on") if isinstance((raw := attrs.get(f"param.{name}.trainable", False)), str) else bool(raw), + } + return out + + +def otlp_traces_to_trace_json(otlp: Dict[str, Any], agent_id_hint: str = "", use_temporal_hierarchy: bool = False) -> List[Dict[str, Any]]: + """Convert OTLP traces to Trace-Graph JSON format. + + Args: + otlp: OTLP JSON payload + agent_id_hint: Optional service name hint + use_temporal_hierarchy: If True, create parent-child relationships based on temporal ordering + (earlier spans become parents of later spans) when no explicit parent exists. + This enables backward propagation across sequential agent calls. + + Returns: + List of TGJ documents + """ + docs = [] + for rs in otlp.get("resourceSpans", []): + rattrs = _attrs(rs.get("resource", {}).get("attributes", [])) + svc = rattrs.get("service.name", agent_id_hint or "service") + inst = rattrs.get("service.instance.id", "0") + for ss in rs.get("scopeSpans", []): + scope_nm = ss.get("scope", {}).get("name", "scope") + nodes = {} + trace_id = None + + # First pass: collect all spans with their timestamps for temporal ordering + spans_with_time = [] + for sp in ss.get("spans", []): + spans_with_time.append((sp.get("startTimeUnixNano", 0), sp)) + + # Sort by start time to establish temporal order + spans_with_time.sort(key=lambda x: x[0]) + + # Track the most recent span for temporal parenting + prev_span_id = None + # Map span_id -> actual TGJ node_id (for stable parent references) + span_to_node_id: Dict[str, str] = {} + + # Identify root invocation spans (e.g. "service.invoke") so we + # can exclude them from temporal chaining — they are structural + # parents, not data-flow nodes. + root_span_ids: set = set() + for _, sp in spans_with_time: + sp_name = sp.get("name", "") + if sp_name.endswith(".invoke"): + root_span_ids.add(sp.get("spanId")) + + for start_time, sp in spans_with_time: + trace_id = sp.get("traceId") or trace_id + sid = sp.get("spanId") + psid = sp.get("parentSpanId") + attrs = _attrs(sp.get("attributes", [])) + + # D10: Use trace.temporal_ignore to decide temporal chain + temporal_ignore = str( + attrs.get("trace.temporal_ignore", "false") + ).strip().lower() in ("true", "1", "yes") + + # Skip root invocation spans — they are structural wrappers, + # not data-flow nodes. + if sid in root_span_ids: + continue + + op = _op(attrs, sp) + name = _sanitize(sp.get("name") or sid) + params = _params(attrs) + + for pname, spec in params.items(): + p_id = f"{svc}:param_{pname}" + nodes.setdefault( + p_id, + { + "kind": "parameter", + "name": pname, + "data": spec["value"], + "trainable": bool(spec["trainable"]), + "info": {"otel": {"span_id": sid}}, + }, + ) + inputs = _lift_inputs(attrs) + + # Temporal hierarchy: connect to previous non-ignored span + # when use_temporal_hierarchy is enabled. + # With root invocation spans (D9), node spans have a + # structural parent. We still want temporal chaining + # among sibling node spans, so we use prev_span_id + # regardless of whether psid is set — the key gate is + # temporal_ignore. + effective_psid = psid + if use_temporal_hierarchy and prev_span_id and not temporal_ignore: + # If the OTEL parent is the root invocation span, + # prefer temporal parent for data-flow graph. + if not psid or psid in root_span_ids: + effective_psid = prev_span_id + + # If our effective parent is a skipped root invocation span, + # do not emit a parent edge that would dangle in TGJ. + if effective_psid and effective_psid in root_span_ids: + effective_psid = None + + if effective_psid and "parent" not in inputs: + # Resolve via mapping so parent refs use stable node ids + inputs["parent"] = span_to_node_id.get(effective_psid, f"{svc}:{effective_psid}") + + # Connect parameters as inputs to the MessageNode + for pname in params.keys(): + inputs[f"param_{pname}"] = f"{svc}:param_{pname}" + + rec = { + "kind": "msg", + "name": name, + "op": op, + "inputs": {}, + "data": {"message_id": attrs.get("message.id")}, + "info": { + "otel": { + "trace_id": trace_id, + "span_id": sid, + "parent_span_id": effective_psid, + "service": svc, + "temporal_ignore": temporal_ignore, + } + }, + } + for role, ref in inputs.items(): + if ref.startswith("lit:"): + rec["inputs"][role] = ref + else: + rec["inputs"][role] = ref if ":" in ref else f"{svc}:{ref}" + # Use message.id as stable logical node identity when + # available; fall back to span id for backward compat. + msg_id = attrs.get("message.id") + node_id = f"{svc}:{msg_id}" if msg_id else f"{svc}:{sid}" + nodes[node_id] = rec + span_to_node_id[sid] = node_id + + # D10: Advance temporal chain only on spans NOT marked + # with trace.temporal_ignore (child LLM spans are ignored; + # node spans advance the chain). + if not temporal_ignore: + prev_span_id = sid + + # Post-process: remap any input refs that still use raw span IDs + # through span_to_node_id so they point to stable message.id-based keys. + for _nid, rec in nodes.items(): + for role, ref in list(rec.get("inputs", {}).items()): + if ref.startswith("lit:"): + continue + # ref format is "service:span_id" — extract the span_id part + if ":" in ref: + prefix, suffix = ref.split(":", 1) + if suffix in span_to_node_id and ref != span_to_node_id[suffix]: + rec["inputs"][role] = span_to_node_id[suffix] + + docs.append( + { + "version": PROFILE_VERSION, + "agent": {"id": svc, "service": svc}, + "otel_meta": {"trace_id": trace_id}, + "nodes": nodes, + "context": {}, + } + ) + return docs + diff --git a/opto/trace/io/otel_semconv.py b/opto/trace/io/otel_semconv.py new file mode 100644 index 00000000..51ad837c --- /dev/null +++ b/opto/trace/io/otel_semconv.py @@ -0,0 +1,125 @@ +""" +opto.trace.io.otel_semconv +========================== + +Semantic convention helpers for emitting OTEL spans compatible with both +the Trace TGJ format and Agent Lightning ``gen_ai.*`` conventions. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, Dict, List, Optional + +from opentelemetry import trace as oteltrace + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Span attribute helpers +# --------------------------------------------------------------------------- + +def set_span_attributes(span: oteltrace.Span, attrs: Dict[str, Any]) -> None: + """Set multiple span attributes at once. + + * ``dict`` / ``list`` values are serialized to JSON strings. + * ``None`` values are silently skipped. + """ + for key, value in attrs.items(): + if value is None: + continue + if isinstance(value, (dict, list)): + value = json.dumps(value, default=str) + span.set_attribute(key, value) + + +def record_genai_chat( + span: oteltrace.Span, + *, + provider: str, + model: str, + input_messages: Optional[List[Dict[str, Any]]] = None, + output_text: Optional[str] = None, + request_type_compat: str = "chat.completion", +) -> None: + """Record OTEL GenAI semantic convention attributes on *span*. + + Emits + ----- + * ``gen_ai.operation.name`` + * ``gen_ai.provider.name`` + * ``gen_ai.request.model`` + * ``gen_ai.input.messages`` (JSON) + * ``gen_ai.output.messages`` (JSON) + """ + span.set_attribute("gen_ai.operation.name", request_type_compat) + span.set_attribute("gen_ai.provider.name", provider) + span.set_attribute("gen_ai.request.model", model) + if input_messages is not None: + span.set_attribute( + "gen_ai.input.messages", + json.dumps(input_messages, default=str), + ) + if output_text is not None: + span.set_attribute( + "gen_ai.output.messages", + json.dumps([{"role": "assistant", "content": output_text}], default=str), + ) + + +# --------------------------------------------------------------------------- +# Reward / annotation helpers +# --------------------------------------------------------------------------- + +def emit_reward( + session: Any, # TelemetrySession or anything with a .tracer property + *, + value: float, + name: str = "final_score", + index: int = 0, + span_name: str = "agentlightning.annotation", + extra_attributes: Optional[Dict[str, Any]] = None, +) -> None: + """Emit a reward span compatible with Agent Lightning semconv. + + Creates a child span with: + * ``agentlightning.reward..name`` + * ``agentlightning.reward..value`` + * ``trace.temporal_ignore = true`` + """ + tracer = session.tracer if hasattr(session, "tracer") else session + with tracer.start_as_current_span(span_name) as sp: + sp.set_attribute("trace.temporal_ignore", "true") + sp.set_attribute(f"agentlightning.reward.{index}.name", name) + sp.set_attribute(f"agentlightning.reward.{index}.value", str(value)) + if extra_attributes: + set_span_attributes(sp, extra_attributes) + + +# Backward-compat alias +emit_agentlightning_reward = emit_reward + + +def emit_trace( + session: Any, + *, + name: str, + attrs: Optional[Dict[str, Any]] = None, +) -> None: + """Emit a lightweight OTEL span for arbitrary debug / optimization signals. + + Parameters + ---------- + session + A ``TelemetrySession`` (or anything with a ``.tracer`` attribute). + name : str + Span name. + attrs : dict, optional + Attributes to attach. + """ + tracer = session.tracer if hasattr(session, "tracer") else session + with tracer.start_as_current_span(name) as sp: + if attrs: + set_span_attributes(sp, attrs) diff --git a/opto/trace/io/telemetry_session.py b/opto/trace/io/telemetry_session.py new file mode 100644 index 00000000..8f190f39 --- /dev/null +++ b/opto/trace/io/telemetry_session.py @@ -0,0 +1,623 @@ +""" +opto.trace.io.telemetry_session +=============================== + +Unified session manager for OTEL traces and (optionally) MLflow. + +A ``TelemetrySession`` owns a ``TracerProvider`` + ``InMemorySpanExporter`` +and exposes: + +* ``flush_otlp()`` – extract collected spans as OTLP JSON and optionally clear +* ``flush_tgj()`` – convert spans to Trace-Graph JSON via ``otel_adapter`` +* ``export_run_bundle()`` – dump all session data to a directory + +In addition, when a session is **activated** (``with TelemetrySession()`` or +``TelemetrySession.activate()``), Trace-level operators can optionally emit +spans for non-LangGraph pipelines (e.g. ``@trace.bundle`` operations). +""" + +from __future__ import annotations + +import contextlib +import contextvars +import json +import logging +import os +import time +import weakref +from dataclasses import dataclass +from typing import Any, Callable, Dict, List, Optional, Tuple + +from opentelemetry import trace as oteltrace +from opentelemetry.sdk.trace import TracerProvider + +from opto.trace.io.langgraph_otel_runtime import ( + InMemorySpanExporter, + flush_otlp as _flush_otlp_raw, +) +from opto.trace.io.otel_adapter import otlp_traces_to_trace_json + +logger = logging.getLogger(__name__) + +_CURRENT_SESSION: contextvars.ContextVar[Optional["TelemetrySession"]] = ( + contextvars.ContextVar("opto_trace_current_telemetry_session", default=None) +) + + +@dataclass(frozen=True) +class BundleSpanConfig: + """Controls optional OTEL spans around ``@trace.bundle`` ops. + + The defaults are intentionally conservative to avoid span noise. + """ + + enable: bool = True + disable_default_ops: bool = True + capture_inputs: bool = True + + +@dataclass(frozen=True) +class MessageNodeTelemetryConfig: + """Controls how MessageNodes are associated to OTEL spans. + + Modes: + - ``"off"``: no binding/spans + - ``"bind"``: attach ``message.id`` to the current span (if any) + - ``"span"``: if no current span, create a minimal span for the node + """ + + mode: str = "bind" + + +class TelemetrySession: + """Manages an OTEL tracing session with export capabilities. + + Parameters + ---------- + service_name : str + OTEL service / scope name. + record_spans : bool + If *False*, disable span recording entirely (safe no-op). + span_attribute_filter : callable, optional + ``(span_name, attrs_dict) -> attrs_dict``. Return ``{}`` to drop the + span entirely. Useful for redacting secrets or truncating payloads. + bundle_spans : BundleSpanConfig, optional + Enable optional OTEL spans around ``@trace.bundle`` operations when this + session is active (non-LangGraph pipelines). + message_nodes : MessageNodeTelemetryConfig, optional + Controls how ``MessageNode`` creation binds to spans (used to keep a + stable Node-to-Span mapping for TGJ conversion). + max_attr_chars : int + Max characters for any attribute value written by the session helpers. + mlflow_log_artifacts : bool + If True, ``export_run_bundle()`` will also attempt to log the bundle + directory as MLflow artifacts (best-effort no-op when unavailable). + mlflow_autolog : bool + If True, best-effort enable MLflow autologging so ``@trace.bundle`` ops + can also be wrapped by ``mlflow.trace`` while this session is used. + This keeps MLflow optional and preserves backward compatibility. + """ + + def __init__( + self, + service_name: str = "trace-session", + *, + record_spans: bool = True, + span_attribute_filter: Optional[ + Callable[[str, Dict[str, Any]], Dict[str, Any]] + ] = None, + bundle_spans: Optional[BundleSpanConfig] = None, + message_nodes: Optional[MessageNodeTelemetryConfig] = None, + max_attr_chars: int = 500, + mlflow_log_artifacts: bool = False, + mlflow_autolog: bool = False, + mlflow_autolog_kwargs: Optional[Dict[str, Any]] = None, + ) -> None: + self.service_name = service_name + self.record_spans = record_spans + self.span_attribute_filter = span_attribute_filter + self.bundle_spans = bundle_spans or BundleSpanConfig() + self.message_nodes = message_nodes or MessageNodeTelemetryConfig() + self.max_attr_chars = int(max_attr_chars) + self.mlflow_log_artifacts = bool(mlflow_log_artifacts) + self.mlflow_autolog = bool(mlflow_autolog) + self.mlflow_autolog_kwargs = dict(mlflow_autolog_kwargs or {}) + + # OTEL plumbing + self._exporter = InMemorySpanExporter() + self._provider = TracerProvider() + + if self.record_spans: + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + self._provider.add_span_processor( + SimpleSpanProcessor(self._exporter) + ) + + self._tracer = self._provider.get_tracer(service_name) + + # Node -> OTEL span-id mapping for "inputs.*" reference lifting. + # WeakKeyDictionary avoids preventing GC for graphs created during optimization loops. + self._node_span_ids: "weakref.WeakKeyDictionary[object, str]" = ( + weakref.WeakKeyDictionary() + ) + + self._message_node_records: List[Dict[str, Any]] = [] + + # Activation token stack (supports nested with-blocks on the same instance) + self._token_stack: List[contextvars.Token] = [] + + # Optional MLflow bridge: keep MLflow optional and do not fail session + # construction if MLflow is unavailable. + if self.mlflow_autolog: + try: + from opto.features.mlflow.autolog import autolog as _mlflow_autolog + kwargs = {"silent": True} + kwargs.update(self.mlflow_autolog_kwargs) + _mlflow_autolog(**kwargs) + except Exception as e: + logger.debug( + "TelemetrySession could not enable MLflow autologging: %s", + e, + ) + + # -- activation ----------------------------------------------------------- + + @classmethod + def current(cls) -> Optional["TelemetrySession"]: + """Return the currently-active session (if any).""" + return _CURRENT_SESSION.get() + + @contextlib.contextmanager + def activate(self): + """Activate this session in the current context. + + When active, instrumentation hooks (e.g. bundle spans, MessageNode binding) + can discover the session via ``TelemetrySession.current()``. + """ + token = _CURRENT_SESSION.set(self) + try: + yield self + finally: + _CURRENT_SESSION.reset(token) + + def __enter__(self) -> "TelemetrySession": + token = _CURRENT_SESSION.set(self) + self._token_stack.append(token) + return self + + def __exit__(self, exc_type, exc, tb) -> None: + if self._token_stack: + token = self._token_stack.pop() + _CURRENT_SESSION.reset(token) + + def set_current(self) -> "TelemetrySession": + """Activate this session without a context manager. + + Useful in notebooks or scripts where indenting all code under a + ``with`` block is impractical. Must be paired with a later call + to :meth:`clear_current`. + + Returns the session instance for chaining. + """ + token = _CURRENT_SESSION.set(self) + self._token_stack.append(token) + return self + + def clear_current(self) -> None: + """Deactivate the most recent :meth:`set_current` activation.""" + if self._token_stack: + token = self._token_stack.pop() + _CURRENT_SESSION.reset(token) + + # -- properties ----------------------------------------------------------- + + @property + def tracer(self) -> oteltrace.Tracer: + """The OTEL tracer for manual span creation.""" + return self._tracer + + @property + def exporter(self) -> InMemorySpanExporter: + """Direct access to the in-memory span exporter.""" + return self._exporter + + # -- span helpers --------------------------------------------------------- + + @staticmethod + def _span_id_hex(span) -> Optional[str]: + try: + ctx = span.get_span_context() + if not getattr(ctx, "is_valid", False): + return None + return f"{ctx.span_id:016x}" + except Exception: + return None + + def _truncate(self, v: Any) -> str: + s = str(v) + if self.max_attr_chars and len(s) > self.max_attr_chars: + return s[: self.max_attr_chars] + "…" + return s + + def _is_trace_node(self, obj: Any) -> bool: + mod = getattr(obj.__class__, "__module__", "") + return mod.startswith("opto.trace") and hasattr(obj, "name") and hasattr(obj, "data") + + def _is_parameter_node(self, obj: Any) -> bool: + return self._is_trace_node(obj) and obj.__class__.__name__ == "ParameterNode" + + def _param_key(self, param_node: Any) -> str: + raw = getattr(param_node, "name", "param") + return str(raw).split(":")[0] + + def _remember_node_span(self, node: Any, span) -> None: + sid = self._span_id_hex(span) + if sid is None: + return + try: + self._node_span_ids[node] = sid + except TypeError: + return + + def _lookup_node_ref(self, node: Any) -> Optional[str]: + try: + sid = self._node_span_ids.get(node) + except Exception: + sid = None + if not sid: + return None + # Prefer stable message.id (node.name) over raw span ID + msg_id = getattr(node, "name", None) + if msg_id: + return f"{self.service_name}:{msg_id}" + return f"{self.service_name}:{sid}" + + def _inputs_and_params_from_trace_inputs( + self, inputs: Dict[str, Any] + ) -> Tuple[Dict[str, str], Dict[str, str]]: + """Convert a Trace inputs dict into OTEL attribute fragments. + + Returns ``(inputs_attrs, params_attrs)`` where: + - ``inputs_attrs`` maps ``inputs.`` to a reference-or-literal + - ``params_attrs`` maps ``param.`` (+ trainable) to a value + """ + inputs_attrs: Dict[str, str] = {} + params_attrs: Dict[str, str] = {} + + for k, v in (inputs or {}).items(): + if self._is_parameter_node(v): + pname = self._param_key(v) + params_attrs[f"param.{pname}"] = self._truncate(getattr(v, "data", "")) + params_attrs[f"param.{pname}.trainable"] = str( + bool(getattr(v, "trainable", False)) + ).lower() + + if self._is_trace_node(v): + ref = self._lookup_node_ref(v) + if ref is not None: + inputs_attrs[f"inputs.{k}"] = ref + else: + inputs_attrs[f"inputs.{k}"] = f"lit:{self._truncate(getattr(v, 'data', ''))}" + else: + inputs_attrs[f"inputs.{k}"] = f"lit:{self._truncate(v)}" + + return inputs_attrs, params_attrs + + def _is_default_op(self, fun_name: str, file_path: str) -> bool: + if fun_name == "call_llm": + return False + norm = str(file_path).replace("\\", "/") + return norm.endswith("/trace/operators.py") + + @contextlib.contextmanager + def bundle_span(self, *, fun_name: str, file_path: str, inputs: Dict[str, Any]): + """Context manager for an OTEL span around a bundle op.""" + if not (self.record_spans and self.bundle_spans.enable): + yield None + return + + if self.bundle_spans.disable_default_ops and self._is_default_op(fun_name, file_path): + yield None + return + + attrs: Dict[str, Any] = { + "trace.bundle": "true", + "trace.bundle.fun_name": fun_name, + "trace.bundle.file": str(file_path), + } + + if self.bundle_spans.capture_inputs: + in_attrs, p_attrs = self._inputs_and_params_from_trace_inputs(inputs or {}) + attrs.update(in_attrs) + attrs.update(p_attrs) + + with self.tracer.start_as_current_span(fun_name) as sp: + for k, v in attrs.items(): + try: + sp.set_attribute(k, v) + except Exception: + sp.set_attribute(k, str(v)) + yield sp + + def on_message_node_created(self, node: Any, *, inputs: Optional[Dict[str, Any]] = None) -> None: + """Hook invoked from ``MessageNode.__init__`` (best-effort). + + - If there's a current span: bind ``message.id`` and remember Node-to-Span mapping. + - Optionally, if mode == "span" and no current span exists, create a minimal span. + """ + mode = (self.message_nodes.mode or "off").lower() + if mode == "off" or not self.record_spans: + return + + try: + rec = { + "name": getattr(node, "name", None), + "op": getattr(node, "op_name", None) if hasattr(node, "op_name") else None, + } + if inputs: + rec["inputs"] = { + k: getattr(v, "name", None) if self._is_trace_node(v) else v + for k, v in inputs.items() + } + self._message_node_records.append(rec) + except Exception: + pass + + cur = oteltrace.get_current_span() + if cur is not None: + try: + ctx = cur.get_span_context() + if getattr(ctx, "is_valid", False) and cur.is_recording(): + cur.set_attribute("message.id", str(getattr(node, "name", ""))) + self._remember_node_span(node, cur) + return + except Exception: + pass + + if mode != "span": + return + + span_name = str(getattr(node, "name", "message_node")) + attrs: Dict[str, Any] = {"message.id": span_name} + if inputs: + in_attrs, p_attrs = self._inputs_and_params_from_trace_inputs(inputs) + attrs.update(in_attrs) + attrs.update(p_attrs) + + with self.tracer.start_as_current_span(span_name) as sp: + for k, v in attrs.items(): + try: + sp.set_attribute(k, v) + except Exception: + sp.set_attribute(k, str(v)) + self._remember_node_span(node, sp) + + # -- flush methods -------------------------------------------------------- + + def flush_otlp(self, *, clear: bool = True) -> Dict[str, Any]: + """Flush collected spans to OTLP JSON. + + Parameters + ---------- + clear : bool + If *True* (default), clear the exporter after flushing. + If *False*, peek at current spans without clearing. + + Returns + ------- + dict + OTLP JSON payload compatible with ``otel_adapter``. + """ + if not self.record_spans: + return {"resourceSpans": []} + + otlp = _flush_otlp_raw( + self._exporter, + scope_name=self.service_name, + clear=clear, + ) + + if self.span_attribute_filter is not None: + otlp = self._apply_attribute_filter(otlp) + + return otlp + + def _apply_attribute_filter(self, otlp: Dict[str, Any]) -> Dict[str, Any]: + """Apply ``span_attribute_filter`` to all spans in the OTLP payload.""" + if self.span_attribute_filter is None: + return otlp + + filtered_rs = [] + for rs in otlp.get("resourceSpans", []): + filtered_ss = [] + for ss in rs.get("scopeSpans", []): + filtered_spans = [] + for sp in ss.get("spans", []): + span_name = sp.get("name", "") + attrs_dict: Dict[str, Any] = {} + for a in sp.get("attributes", []): + key = a.get("key") + val = a.get("value", {}) + if isinstance(val, dict) and "stringValue" in val: + attrs_dict[key] = val["stringValue"] + else: + attrs_dict[key] = str(val) + + new_attrs = self.span_attribute_filter(span_name, attrs_dict) + + if not new_attrs and new_attrs is not None: + continue + + if new_attrs is not None: + sp = dict(sp) + sp["attributes"] = [ + {"key": k, "value": {"stringValue": str(v)}} + for k, v in new_attrs.items() + ] + filtered_spans.append(sp) + + ss_copy = dict(ss) + ss_copy["spans"] = filtered_spans + filtered_ss.append(ss_copy) + + rs_copy = dict(rs) + rs_copy["scopeSpans"] = filtered_ss + filtered_rs.append(rs_copy) + + return {"resourceSpans": filtered_rs} + + def flush_tgj( + self, + *, + agent_id_hint: str = "", + use_temporal_hierarchy: bool = True, + clear: bool = True, + ) -> List[Dict[str, Any]]: + """Flush collected spans to Trace-Graph JSON format.""" + otlp = self.flush_otlp(clear=clear) + return otlp_traces_to_trace_json( + otlp, + agent_id_hint=agent_id_hint or self.service_name, + use_temporal_hierarchy=use_temporal_hierarchy, + ) + + # -- internal helpers (used by optimization.py) --------------------------- + + def _flush_tgj_from_otlp(self, otlp: Dict[str, Any]) -> List[Dict[str, Any]]: + """Convert an already-flushed OTLP payload to TGJ (no exporter access).""" + return otlp_traces_to_trace_json( + otlp, + agent_id_hint=self.service_name, + use_temporal_hierarchy=True, + ) + + # -- MLflow helpers (best-effort) ----------------------------------------- + + def _mlflow_log_artifacts(self, output_dir: str) -> None: + if not self.mlflow_log_artifacts: + return + try: + import mlflow # type: ignore + except Exception: + return + try: + mlflow.log_artifacts(output_dir) + except Exception as e: + logger.debug("MLflow artifact logging skipped: %s", e) + + def log_metric(self, key: str, value: float, *, step: Optional[int] = None) -> None: + """Best-effort metric logging to MLflow (if available).""" + try: + import mlflow # type: ignore + except Exception: + return + try: + if step is None: + mlflow.log_metric(key, float(value)) + else: + mlflow.log_metric(key, float(value), step=int(step)) + except Exception: + return + + def log_param(self, key: str, value: Any) -> None: + """Best-effort param logging to MLflow (if available).""" + try: + import mlflow # type: ignore + except Exception: + return + try: + mlflow.log_param(key, str(value)) + except Exception: + return + + # -- export helpers ------------------------------------------------------- + + def export_run_bundle( + self, + output_dir: str, + *, + include_otlp: bool = True, + include_tgj: bool = True, + include_prompts: bool = True, + prompts: Optional[Dict[str, str]] = None, + include_node_records: bool = True, + include_manifest: bool = True, + ) -> str: + """Export all session data to a directory bundle. + + File naming is aligned with the repository demos: + + - ``otlp.json`` (and legacy alias ``otlp_trace.json``) + - ``tgj.json`` (and legacy alias ``trace_graph.json``) + - ``prompts.json`` (optional) + - ``message_nodes.jsonl`` (optional lightweight debug log) + - ``manifest.json`` (optional) + + Returns the path to the bundle directory. + """ + os.makedirs(output_dir, exist_ok=True) + + otlp = self.flush_otlp(clear=True) + + manifest: Dict[str, Any] = { + "created_at": time.time(), + "service_name": self.service_name, + "files": {}, + } + + if include_otlp: + otlp_path = os.path.join(output_dir, "otlp.json") + with open(otlp_path, "w") as f: + json.dump(otlp, f, indent=2) + manifest["files"]["otlp"] = "otlp.json" + + alias = os.path.join(output_dir, "otlp_trace.json") + try: + if not os.path.exists(alias): + with open(alias, "w") as f: + json.dump(otlp, f, indent=2) + except Exception: + pass + + if include_tgj: + tgj_docs = otlp_traces_to_trace_json( + otlp, + agent_id_hint=self.service_name, + use_temporal_hierarchy=True, + ) + tgj_path = os.path.join(output_dir, "tgj.json") + with open(tgj_path, "w") as f: + json.dump(tgj_docs, f, indent=2) + manifest["files"]["tgj"] = "tgj.json" + + alias = os.path.join(output_dir, "trace_graph.json") + try: + if not os.path.exists(alias): + with open(alias, "w") as f: + json.dump(tgj_docs, f, indent=2) + except Exception: + pass + + if include_prompts and prompts: + prompts_path = os.path.join(output_dir, "prompts.json") + with open(prompts_path, "w") as f: + json.dump(prompts, f, indent=2) + manifest["files"]["prompts"] = "prompts.json" + + if include_node_records and self._message_node_records: + p = os.path.join(output_dir, "message_nodes.jsonl") + with open(p, "w") as f: + for rec in self._message_node_records: + f.write(json.dumps(rec, ensure_ascii=False) + "\n") + manifest["files"]["message_nodes"] = "message_nodes.jsonl" + + if include_manifest: + p = os.path.join(output_dir, "manifest.json") + with open(p, "w") as f: + json.dump(manifest, f, indent=2) + + self._mlflow_log_artifacts(output_dir) + + logger.info("Exported run bundle to %s", output_dir) + return output_dir diff --git a/opto/trace/io/tgj_export.py b/opto/trace/io/tgj_export.py new file mode 100644 index 00000000..fe3ae555 --- /dev/null +++ b/opto/trace/io/tgj_export.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +""" +Utilities to export an already-built Trace graph (Node / MessageNode / ParameterNode) +to TGJ format. + +Intended use: +- debugging and inspecting native Trace graphs +- tests comparing native Trace graphs with OTEL-recovered TGJ graphs +- exporting a subgraph from the in-memory Trace graph + +Non-goals: +- this does NOT reconstruct a graph from telemetry; it only exports an existing Trace graph +""" + +from typing import Dict, Any, Iterable, Set +from opto.trace.nodes import ( + Node, + MessageNode, + ParameterNode, + ExceptionNode, + GRAPH, + get_op_name, +) + + +def _base_name(n: Node) -> str: + return n.name.split(":")[0] + + +def export_subgraph_to_tgj( + nodes: Iterable[Node], + run_id: str, + agent_id: str, + graph_id: str, + scope: str = "", +) -> Dict[str, Any]: + seen: Set[Node] = set() + q = list(nodes) + tgj_nodes = [] + idmap: Dict[Node, str] = {} + used_ids: Set[str] = set() + + def nid(n: Node) -> str: + if n not in idmap: + base = _base_name(n) + candidate = base + i = 2 + while candidate in used_ids: + candidate = f"{base}__{i}" + i += 1 + idmap[n] = candidate + used_ids.add(candidate) + return idmap[n] + + while q: + n = q.pop() + if n in seen: + continue + seen.add(n) + + if isinstance(n, ParameterNode): + tgj_nodes.append( + { + "id": nid(n), + "kind": "parameter", + "name": _base_name(n), + "value": n.data, + "trainable": bool(getattr(n, "trainable", True)), + "description": "[Parameter]", + } + ) + + elif isinstance(n, MessageNode): + for p in n.parents: + q.append(p) + + inputs = {f"in_{i}": {"ref": nid(p)} for i, p in enumerate(n.parents)} + + op = getattr(n, "op_name", None) + if not op: + try: + op = get_op_name(n.description or "[op]") + except Exception: + op = "op" + + rec = { + "id": nid(n), + "kind": "message", + "name": _base_name(n), + "op": op, + "description": f"[{op}] {n.description or ''}".strip(), + "inputs": inputs, + "output": { + "name": f"{_base_name(n)}:out", + "value": n.data, + }, + } + tgj_nodes.append(rec) + + elif isinstance(n, ExceptionNode): + for p in n.parents: + q.append(p) + + err_type = "Exception" + try: + if n.data is not None: + err_type = type(n.data).__name__ + except Exception: + pass + + tgj_nodes.append( + { + "id": nid(n), + "kind": "exception", + "name": _base_name(n), + "description": f"[Exception] {n.description or ''}".strip(), + "inputs": {f"in_{i}": {"ref": nid(p)} for i, p in enumerate(n.parents)}, + "error": { + "type": err_type, + "message": str(n.data), + }, + } + ) + + else: + for p in n.parents: + q.append(p) + + tgj_nodes.append( + { + "id": nid(n), + "kind": "value", + "name": _base_name(n), + "value": n.data, + "description": "[Node]", + } + ) + + # best-effort dependency order + tgj_nodes.reverse() + + return { + "tgj": "1.0", + "run_id": run_id, + "agent_id": agent_id, + "graph_id": graph_id, + "scope": scope, + "nodes": tgj_nodes, + } + + +def export_full_graph_to_tgj( + run_id: str, + agent_id: str, + graph_id: str, + scope: str = "", +) -> Dict[str, Any]: + all_nodes = [n for lst in GRAPH._nodes.values() for n in lst] + return export_subgraph_to_tgj(all_nodes, run_id, agent_id, graph_id, scope) diff --git a/opto/trace/io/tgj_ingest.py b/opto/trace/io/tgj_ingest.py new file mode 100644 index 00000000..6bc6d46f --- /dev/null +++ b/opto/trace/io/tgj_ingest.py @@ -0,0 +1,275 @@ +from __future__ import annotations +from typing import Dict, Any, List, Optional, Union +from contextlib import contextmanager + +from opto.trace.nodes import Node, MessageNode, ParameterNode, ExceptionNode, NAME_SCOPES + +OTEL_PROFILE_VERSION = "trace-json/1.0+otel" + +@contextmanager +def _scoped(scope: str): + if scope: + NAME_SCOPES.append(scope) + try: + yield + finally: + if scope and NAME_SCOPES: + NAME_SCOPES.pop() + +def _mk_value(name: str, value: Any, desc: str="[Node]") -> Node: + safe = name.replace(":", "_") + return Node(value, name=safe, description=desc) + +def _as_node(ref: Union[str, Dict[str,Any]], local: Dict[str,Node], ports: Dict[str,Node], port_index: Optional[Dict[str,Node]] = None) -> Node: + if isinstance(ref, str): + ref = {"ref": ref} + if "ref" in ref: + key = ref["ref"] + local.setdefault(key, _mk_value(key, None)) + return local[key] + if "export" in ref: + pid = ref["export"] + if port_index and pid in port_index: + return port_index[pid] + ports.setdefault(pid, _mk_value(pid, None, "[Node] (import)")) + return ports[pid] + if "literal" in ref: + val = ref["literal"] + nm = ref.get("name", f"lit_{abs(hash(str(val)))%10_000}") + n = _mk_value(nm, val) + local[nm] = n + return n + if "hash" in ref: + nm = ref.get("name", f"hash_{ref['hash'][7:15]}") + n = _mk_value(nm, ref.get("preview", ""), "[Node] (redacted)") + local[nm] = n + return n + raise ValueError(f"Unsupported ref: {ref}") + + +def _kind_norm(k: str) -> str: + k = (k or "").lower() + if k in ("param", "parameter"): + return "parameter" + if k in ("const", "value"): + return "value" + if k in ("msg", "message"): + return "message" + if k == "exception": + return "exception" + return k + + +def _nodes_iter(nodes_field: Union[List[Dict[str,Any]], Dict[str,Dict[str,Any]]]) -> List[Dict[str,Any]]: + if isinstance(nodes_field, dict): + out = [] + for nid, rec in nodes_field.items(): + rec = dict(rec) + rec.setdefault("id", nid) + out.append(rec) + return out + return list(nodes_field or []) + + +def _convert_otel_profile(doc: Dict[str,Any]) -> Dict[str,Any]: + raw_nodes = _nodes_iter(doc.get("nodes", {})) + known_ids = { + rec.get("id") or rec.get("name") + for rec in raw_nodes + if (rec.get("id") or rec.get("name")) is not None + } + nodes_list = [] + for rec in raw_nodes: + kind = _kind_norm(rec.get("kind")) + nid = rec.get("id") or rec.get("name") + name = rec.get("name", nid) + if kind == "parameter": + nodes_list.append({ + "id": nid, + "kind": "parameter", + "name": name, + "value": rec.get("data"), + "trainable": rec.get("trainable", True), + "description": rec.get("description", "[Parameter]") + }) + elif kind == "message": + inputs = {} + for k, v in (rec.get("inputs") or {}).items(): + if isinstance(v, str): + if v.startswith("lit:"): + inputs[k] = {"literal": v.split(":",1)[1]} + elif ":" in v: + # First prefer exact-match refs against known node ids. + # This preserves stable logical ids like "service:message.id" + # introduced by the OTEL -> TGJ adapter. + if v in known_ids: + inputs[k] = {"ref": v} + else: + # Backward-compatible fallback for older span-id-based refs + # and parameter refs that may not be listed yet. + _svc, _, rest = v.partition(":") + is_span_like = ( + len(rest) == 16 + and all(c in "0123456789abcdef" for c in rest.lower()) + ) + is_param_like = rest.startswith("param_") + inputs[k] = {"ref": v} if (is_span_like or is_param_like) else {"literal": v} + else: + inputs[k] = {"literal": v} + else: + inputs[k] = v + msg_rec = { + "id": nid, + "kind": "message", + "name": name, + "description": f"[{rec.get('op','op')}] {rec.get('description', name)}".strip(), + "inputs": inputs, + "output": {"name": f"{name}:out", "value": rec.get("data")} + } + # Propagate info dict (contains otel metadata like temporal_ignore) + if rec.get("info"): + msg_rec["info"] = rec["info"] + nodes_list.append(msg_rec) + elif kind == "value": + nodes_list.append({ + "id": nid, + "kind": "value", + "name": name, + "value": rec.get("data"), + "description": rec.get("description", "[Node]") + }) + agent = (doc.get("agent") or {}).get("id", "agent") + return { + "tgj": "1.0", + "run_id": (doc.get("otel_meta") or {}).get("trace_id"), + "agent_id": agent, + "graph_id": doc.get("graph_id", ""), + "scope": f"{agent}/0", + "nodes": nodes_list, + } + +def ingest_tgj( + doc: Dict[str,Any], + port_index: Optional[Dict[str,Node]] = None, + *, + param_cache: Optional[Dict[str,"ParameterNode"]] = None, +) -> Dict[str,Node]: + version = doc.get("tgj") or doc.get("version") + if version == OTEL_PROFILE_VERSION: + doc = _convert_otel_profile(doc) + version = doc.get("tgj") + assert version == "1.0", "Unsupported TGJ version" + nodes: Dict[str,Node] = {} + exports: Dict[str,Node] = {} + ports: Dict[str,Node] = {} + + with _scoped(doc.get("scope", "")): + # pass 1: parameters/values + for rec in _nodes_iter(doc.get("nodes", [])): + k = rec["kind"] + nid = rec["id"] + nm = rec.get("name", nid) + if k == "parameter": + n = param_cache.get(nid) if param_cache is not None else None + if n is None: + n = ParameterNode( + rec.get("value"), + name=nm, + trainable=bool(rec.get("trainable", True)), + description=rec.get("description", "[Parameter]"), + ) + if param_cache is not None: + param_cache[nid] = n + else: + try: + n._data = rec.get("value") + except Exception: + pass + try: + n.trainable = bool(rec.get("trainable", True)) + except Exception: + pass + nodes[nid] = n + nodes[nm] = n + elif k == "value": + n = _mk_value(nm, rec.get("value"), rec.get("description", "[Node]")) + nodes[nid] = n + nodes[nm] = n + + # pass 2: messages/exceptions + for rec in _nodes_iter(doc.get("nodes", [])): + k = rec["kind"] + nid = rec["id"] + nm = rec.get("name", nid) + if k in ("message", "exception"): + in_spec = rec.get("inputs", {}) or {} + inputs = {key: _as_node(v, nodes, ports, port_index) for key, v in in_spec.items()} + out_meta = rec.get("output", {}) or {} + out_name = out_meta.get("name", f"{nm}:out") + out_node = _as_node(out_meta, nodes, ports, port_index) if ("hash" in out_meta) else _mk_value(out_name, out_meta.get("value")) + info = {"meta": rec.get("meta", {})} + iinfo = rec.get("info", {}) or {} + if "inputs" in iinfo: + args = [_as_node(x, nodes, ports, port_index) for x in iinfo["inputs"].get("args", [])] + kwargs = {k: _as_node(v, nodes, ports, port_index) for k, v in iinfo["inputs"].get("kwargs", {}).items()} + info["inputs"] = {"args": args, "kwargs": kwargs} + if "output" in iinfo: + info["output"] = _as_node(iinfo["output"], nodes, ports, port_index) + # Preserve OTEL metadata (e.g. temporal_ignore) for + # downstream consumers like _select_output_node. + if "otel" in iinfo: + info["otel"] = iinfo["otel"] + + desc = rec.get("description", "[Node]") + if k == "exception": + err = rec.get("error", {}) or {} + msg = err.get("message", "Exception") + n = ExceptionNode(value=Exception(msg), inputs=inputs, description=desc, name=nm, info=info) + else: + n = MessageNode(out_node, inputs=inputs, description=desc, name=nm, info=info) + nodes[nid] = n + nodes[nm] = n + nodes[out_name] = out_node + + # exports + for port_id, ref in (doc.get("exports") or {}).items(): + exports[port_id] = _as_node(ref, nodes, ports, port_index) + # resolve ports bound within same doc + for pid in list(ports.keys()): + if pid in exports: + ports[pid] = exports[pid] + + nodes["__TGJ_EXPORTS__"] = exports + nodes["__TGJ_META__"] = { + "run_id": doc.get("run_id"), + "agent_id": doc.get("agent_id"), + "graph_id": doc.get("graph_id"), + "scope": doc.get("scope"), + } + nodes["__TGJ_PORTS__"] = ports + return nodes + +def merge_tgj(docs: List[Dict[str,Any]]) -> Dict[str,Dict[str,Node]]: + merged: Dict[str,Dict[str,Node]] = {} + port_index: Dict[str,Node] = {} + for d in docs: + key = f"{d.get('agent_id','')}/{d.get('graph_id','')}/{d.get('run_id','')}" + merged[key] = ingest_tgj(d, port_index=port_index) + for pid, n in (merged[key].get("__TGJ_EXPORTS__") or {}).items(): + port_index[pid] = n + return merged + + +class TLSFIngestor: + """Minimal TLSF ingestor supporting TGJ/trace-json documents.""" + + def __init__(self, run_id: Optional[str] = None): + self.run_id = run_id + self._nodes: Dict[str, Node] = {} + + def ingest_tgj(self, doc: Dict[str, Any]) -> None: + """Ingest a TGJ v1 or trace-json/1.0+otel document.""" + self._nodes.update(ingest_tgj(doc)) + + def get(self, name_or_event_id: str) -> Optional[Node]: + return self._nodes.get(name_or_event_id) diff --git a/pyproject.toml b/pyproject.toml index 2312a403..cf79b60b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,11 @@ classifiers = [ [project.optional-dependencies] autogen = ["autogen-agentchat==0.2.40"] test = ["datasets==3.6.0"] +telemetry = [ + "opentelemetry-api>=1.38.0", + "opentelemetry-sdk>=1.38.0", + "langgraph>=1.0.7", +] [project.urls] diff --git a/setup.py b/setup.py index dbd60be5..73394ff7 100644 --- a/setup.py +++ b/setup.py @@ -29,5 +29,5 @@ long_description=open('README.md', encoding="utf8").read(), packages=setuptools.find_packages(include=["opto*"]), install_requires=install_requires, - python_requires=">=3.10", + python_requires=">=3.12", ) diff --git a/tests/features_tests/test_e2e_m1_pipeline.py b/tests/features_tests/test_e2e_m1_pipeline.py new file mode 100644 index 00000000..61df8bb2 --- /dev/null +++ b/tests/features_tests/test_e2e_m1_pipeline.py @@ -0,0 +1,785 @@ +""" +End-to-end integration test for M1 acceptance criteria. + +Pipeline under test: + instrument_graph() → build LangGraph → invoke → flush OTLP + → OTLP→TGJ conversion → ingest_tgj → ParameterNode / MessageNode + → optimizer step (mock) → apply_updates → verify template change + → re-invoke → verify new template used + +Uses **StubLLM** only (no real LLM calls, CI-safe). +""" + +from __future__ import annotations + +import pytest +from typing import Any, Dict, List + +from langgraph.graph import StateGraph, START, END +from typing_extensions import TypedDict + +from opto.trace.io import ( + instrument_graph, + optimize_graph, + InstrumentedGraph, + EvalResult, + apply_updates, + otlp_traces_to_trace_json, + ingest_tgj, + TracingLLM, +) +from opto.trace.nodes import ParameterNode, MessageNode + + +# ========================================================================= +# Stub LLM (deterministic, no API calls) +# ========================================================================= + + +class StubLLM: + """Deterministic LLM stub that returns canned responses.""" + + model = "stub-llm" + + def __init__(self) -> None: + self.call_count = 0 + self.last_messages: list | None = None + + def __call__(self, messages=None, **kwargs): + self.call_count += 1 + self.last_messages = messages + + # Build a context-aware canned response + content = f"stub-response-{self.call_count}" + if messages: + for m in messages: + text = (m.get("content") or "").lower() + if m.get("role") == "system" and "plan" in text: + content = "Step 1: Research. Step 2: Analyze." + elif m.get("role") == "system" and "synth" in text: + content = "Based on the plan, here is a comprehensive answer." + + class _Msg: + pass + + msg = _Msg() + msg.content = content + + class _Choice: + pass + + choice = _Choice() + choice.message = msg + + class _Resp: + pass + + resp = _Resp() + resp.choices = [choice] + return resp + + +# ========================================================================= +# LangGraph state + builder +# ========================================================================= + + +class AgentState(TypedDict, total=False): + query: str + plan: str + answer: str + + +def build_mini_graph( + tracing_llm: TracingLLM, + templates: Dict[str, str], +) -> StateGraph: + """Build a minimal 2-node LangGraph (planner → synthesizer). + + Node functions **close over** *tracing_llm* and *templates* so that + ``apply_updates`` on the dict propagates to subsequent invocations. + """ + + def planner_node(state: AgentState) -> Dict[str, Any]: + template = templates.get( + "planner_prompt", "Create a plan for: {query}" + ) + prompt = template.replace("{query}", state.get("query", "")) + response = tracing_llm.node_call( + span_name="planner", + template_name="planner_prompt", + template=template, + optimizable_key="planner", + messages=[ + {"role": "system", "content": "You are a planning agent."}, + {"role": "user", "content": prompt}, + ], + ) + return {"plan": response} + + def synthesizer_node(state: AgentState) -> Dict[str, Any]: + template = templates.get( + "synthesizer_prompt", + "Synthesize: {query}\nPlan: {plan}", + ) + prompt = ( + template + .replace("{query}", state.get("query", "")) + .replace("{plan}", state.get("plan", "")) + ) + response = tracing_llm.node_call( + span_name="synthesizer", + template_name="synthesizer_prompt", + template=template, + optimizable_key="synthesizer", + messages=[ + {"role": "system", "content": "You are a synthesis agent."}, + {"role": "user", "content": prompt}, + ], + ) + return {"answer": response} + + graph = StateGraph(AgentState) + graph.add_node("planner", planner_node) + graph.add_node("synthesizer", synthesizer_node) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synthesizer") + graph.add_edge("synthesizer", END) + return graph + + +# ========================================================================= +# Mock optimizer (returns deterministic updates) +# ========================================================================= + + +class MockOptimizer: + """Mock optimizer that records calls and returns known updates.""" + + def __init__(self, param_nodes=None, **kwargs): + self.param_nodes = param_nodes or [] + self.calls: List[str] = [] + self._step_updates: Dict[str, str] = { + "planner_prompt": "OPTIMIZED: Create an improved plan for: {query}", + } + + def zero_feedback(self): + self.calls.append("zero_feedback") + + def backward(self, output_node, feedback_text): + self.calls.append(f"backward({type(output_node).__name__})") + + def step(self): + self.calls.append("step") + return dict(self._step_updates) + + +# ========================================================================= +# Helpers +# ========================================================================= + + +def _make_instrumented( + *, + templates: Dict[str, str] | None = None, + trainable_keys=None, + emit_genai_child_spans: bool = True, +) -> InstrumentedGraph: + """Convenience: build an InstrumentedGraph with a real LangGraph.""" + if templates is None: + templates = { + "planner_prompt": "Plan for: {query}", + "synthesizer_prompt": "Synthesize: {query} | Plan: {plan}", + } + if trainable_keys is None: + trainable_keys = {"planner", "synthesizer"} + + ig = instrument_graph( + graph=None, + service_name="e2e-test", + trainable_keys=trainable_keys, + llm=StubLLM(), + initial_templates=templates, + emit_genai_child_spans=emit_genai_child_spans, + provider_name="openai", + llm_span_name="openai.chat.completion", + output_key="answer", + ) + graph = build_mini_graph(ig.tracing_llm, ig.templates) + ig.graph = graph.compile() + return ig + + +# ========================================================================= +# 1. Instrument + Invoke → OTLP +# ========================================================================= + + +class TestE2EInstrumentAndInvoke: + """M1 gate: instrument_graph + real LangGraph invoke produces OTLP.""" + + def test_invoke_produces_result_with_answer(self): + ig = _make_instrumented() + result = ig.invoke({"query": "What is Python?"}) + assert "answer" in result + assert isinstance(result["answer"], str) + assert len(result["answer"]) > 0 + + def test_invoke_produces_otlp_with_planner_and_synthesizer_spans(self): + ig = _make_instrumented() + ig.invoke({"query": "What is AI?"}) + otlp = ig.session.flush_otlp() + + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + names = [s["name"] for s in spans] + + assert "planner" in names, f"Missing planner span; got {names}" + assert "synthesizer" in names, f"Missing synthesizer span; got {names}" + + def test_child_llm_spans_emitted_when_enabled(self): + ig = _make_instrumented(emit_genai_child_spans=True) + ig.invoke({"query": "test"}) + otlp = ig.session.flush_otlp() + + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + names = [s["name"] for s in spans] + + assert names.count("openai.chat.completion") == 2, ( + f"Expected 2 child LLM spans; got {names}" + ) + + def test_no_child_llm_spans_when_disabled(self): + ig = _make_instrumented(emit_genai_child_spans=False) + ig.invoke({"query": "test"}) + otlp = ig.session.flush_otlp() + + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + names = [s["name"] for s in spans] + + assert "openai.chat.completion" not in names + + +# ========================================================================= +# 2. OTLP → param.* attributes +# ========================================================================= + + +class TestE2EParamAttributes: + """M1 gate: spans carry ``param.*`` and ``param.*.trainable``.""" + + def test_planner_span_has_param_attributes(self): + ig = _make_instrumented() + ig.invoke({"query": "test"}) + otlp = ig.session.flush_otlp() + + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + planner = next(s for s in spans if s["name"] == "planner") + attrs = { + a["key"]: a["value"]["stringValue"] + for a in planner["attributes"] + } + + assert "param.planner_prompt" in attrs + assert attrs["param.planner_prompt"] == "Plan for: {query}" + assert "param.planner_prompt.trainable" in attrs + assert attrs["param.planner_prompt.trainable"] == "True" + + def test_synthesizer_span_has_param_attributes(self): + ig = _make_instrumented() + ig.invoke({"query": "test"}) + otlp = ig.session.flush_otlp() + + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + synth = next(s for s in spans if s["name"] == "synthesizer") + attrs = { + a["key"]: a["value"]["stringValue"] + for a in synth["attributes"] + } + + assert "param.synthesizer_prompt" in attrs + assert attrs["param.synthesizer_prompt.trainable"] == "True" + + +# ========================================================================= +# 3. OTLP → TGJ → ParameterNode + MessageNode +# ========================================================================= + + +class TestE2EOtlpToTgj: + """M1 gate: OTLP→TGJ→ingest_tgj produces ParameterNode + MessageNode.""" + + def test_tgj_has_parameter_nodes(self): + ig = _make_instrumented() + ig.invoke({"query": "hello"}) + otlp = ig.session.flush_otlp() + + docs = otlp_traces_to_trace_json( + otlp, agent_id_hint="e2e-test", use_temporal_hierarchy=True, + ) + assert len(docs) >= 1 + + nodes = ingest_tgj(docs[0]) + param_nodes = [ + n for n in nodes.values() + if isinstance(n, ParameterNode) and n.trainable + ] + assert len(param_nodes) > 0, "Expected at least one trainable ParameterNode" + + def test_tgj_has_message_nodes(self): + ig = _make_instrumented() + ig.invoke({"query": "hello"}) + otlp = ig.session.flush_otlp() + + docs = otlp_traces_to_trace_json( + otlp, agent_id_hint="e2e-test", use_temporal_hierarchy=True, + ) + nodes = ingest_tgj(docs[0]) + msg_nodes = [ + n for n in nodes.values() if isinstance(n, MessageNode) + ] + assert len(msg_nodes) > 0, "Expected at least one MessageNode" + + def test_message_node_has_parameter_parent(self): + """MessageNode for planner should have planner_prompt ParameterNode as parent.""" + ig = _make_instrumented() + ig.invoke({"query": "hello"}) + otlp = ig.session.flush_otlp() + + docs = otlp_traces_to_trace_json( + otlp, agent_id_hint="e2e-test", use_temporal_hierarchy=True, + ) + nodes = ingest_tgj(docs[0]) + + # Find the planner MessageNode + planner_msgs = [ + n for n in nodes.values() + if isinstance(n, MessageNode) + and "planner" in (n.py_name or "").lower() + ] + assert len(planner_msgs) > 0, "Expected planner MessageNode" + + planner_msg = planner_msgs[0] + parent_names = [p.py_name for p in planner_msg.parents] + # At least one parent should be the planner_prompt ParameterNode + has_param_parent = any( + isinstance(p, ParameterNode) and "planner_prompt" in p.py_name + for p in planner_msg.parents + ) + assert has_param_parent, ( + f"planner MessageNode should have planner_prompt ParameterNode " + f"as parent; got parents: {parent_names}" + ) + + +# ========================================================================= +# 4. Temporal integrity: child spans don't break the chain +# ========================================================================= + + +class TestE2ETemporalIntegrity: + """M1 acceptance gate #5: child spans must NOT advance TGJ temporal chain.""" + + def test_synthesizer_temporal_parent_is_planner_not_child_span(self): + ig = _make_instrumented(emit_genai_child_spans=True) + ig.invoke({"query": "test temporal"}) + otlp = ig.session.flush_otlp() + + docs = otlp_traces_to_trace_json( + otlp, agent_id_hint="e2e-test", use_temporal_hierarchy=True, + ) + doc = docs[0] + tgj_nodes = doc["nodes"] + + # Collect span IDs of child LLM spans (kind=msg, name contains "chat") + llm_span_ids = set() + for nid, n in tgj_nodes.items(): + if n.get("kind") == "msg": + otel_info = (n.get("info") or {}).get("otel", {}) + nm = n.get("name", "") + if "openai" in nm or "chat" in nm: + llm_span_ids.add(otel_info.get("span_id")) + + # Get synthesizer node and check its parent reference + synth_nodes = [ + (nid, n) for nid, n in tgj_nodes.items() + if n.get("kind") == "msg" and n.get("name") == "synthesizer" + ] + assert len(synth_nodes) >= 1, "Missing synthesizer msg node in TGJ" + + _, synth = synth_nodes[0] + parent_ref = synth.get("inputs", {}).get("parent", "") + + if parent_ref and isinstance(parent_ref, str) and ":" in parent_ref: + _, ref_span_id = parent_ref.rsplit(":", 1) + assert ref_span_id not in llm_span_ids, ( + "Synthesizer's temporal parent must NOT be a child LLM span" + ) + + def test_temporal_chain_preserved_after_ingest(self): + """After ingest, planner MessageNode should be an ancestor of synthesizer.""" + ig = _make_instrumented(emit_genai_child_spans=True) + ig.invoke({"query": "chain test"}) + otlp = ig.session.flush_otlp() + + docs = otlp_traces_to_trace_json( + otlp, agent_id_hint="e2e-test", use_temporal_hierarchy=True, + ) + nodes = ingest_tgj(docs[0]) + + # Find planner and synthesizer MessageNodes + planner_nodes = [ + n for n in nodes.values() + if isinstance(n, MessageNode) and "planner" in n.py_name + and "openai" not in n.py_name + ] + synth_nodes = [ + n for n in nodes.values() + if isinstance(n, MessageNode) and "synthesizer" in n.py_name + and "openai" not in n.py_name + ] + + if not planner_nodes or not synth_nodes: + # If names are mangled, at least verify that we have multiple + # MessageNodes and they have parent relationships + msg_nodes = [ + n for n in nodes.values() if isinstance(n, MessageNode) + ] + assert len(msg_nodes) >= 2, ( + "Expected at least 2 MessageNodes (planner + synthesizer)" + ) + return + + synth = synth_nodes[0] + # Walk ancestors of synthesizer + visited, stack = set(), list(synth.parents) + found_planner = False + while stack: + node = stack.pop() + if id(node) in visited: + continue + visited.add(id(node)) + if node in planner_nodes: + found_planner = True + break + stack.extend(getattr(node, "parents", [])) + + assert found_planner, ( + "Synthesizer MessageNode should have planner MessageNode as " + "ancestor via temporal chain" + ) + + +# ========================================================================= +# 5. Bindings round-trip: apply_updates → template change → next invoke +# ========================================================================= + + +class TestE2EBindingRoundTrip: + """M1 gate: bindings correctly propagate optimizer output to runtime.""" + + def test_auto_derived_bindings_are_functional(self): + ig = _make_instrumented() + assert ig.bindings["planner_prompt"].get() == "Plan for: {query}" + ig.bindings["planner_prompt"].set("NEW") + assert ig.templates["planner_prompt"] == "NEW" + + def test_apply_updates_changes_template(self): + ig = _make_instrumented() + apply_updates( + {"planner_prompt": "UPDATED: {query}"}, + ig.bindings, + ) + assert ig.templates["planner_prompt"] == "UPDATED: {query}" + assert ig.bindings["planner_prompt"].get() == "UPDATED: {query}" + + def test_updated_template_used_in_next_invoke(self): + """After apply_updates, the next invoke records the NEW template.""" + ig = _make_instrumented() + + # --- invoke 1: original template --- + ig.invoke({"query": "test"}) + otlp1 = ig.session.flush_otlp() + spans1 = otlp1["resourceSpans"][0]["scopeSpans"][0]["spans"] + p1 = next(s for s in spans1 if s["name"] == "planner") + a1 = {a["key"]: a["value"]["stringValue"] for a in p1["attributes"]} + assert a1["param.planner_prompt"] == "Plan for: {query}" + + # --- apply update --- + apply_updates({"planner_prompt": "UPDATED: {query}"}, ig.bindings) + + # --- invoke 2: updated template --- + ig.invoke({"query": "test"}) + otlp2 = ig.session.flush_otlp() + spans2 = otlp2["resourceSpans"][0]["scopeSpans"][0]["spans"] + p2 = next(s for s in spans2 if s["name"] == "planner") + a2 = {a["key"]: a["value"]["stringValue"] for a in p2["attributes"]} + assert a2["param.planner_prompt"] == "UPDATED: {query}" + + +# ========================================================================= +# 6. optimize_graph() — eval-only mode (no optimizer) +# ========================================================================= + + +class TestE2EOptimizeEvalOnly: + """Run optimize_graph with custom eval_fn but without optimizer.""" + + def test_baseline_and_iterations_run(self): + ig = _make_instrumented() + + def score_fn(payload): + answer = payload.get("answer", "") + if isinstance(answer, dict): + answer = str(answer.get("answer", "")) + return EvalResult( + score=min(len(str(answer)) / 100.0, 1.0), + feedback="length-based eval", + ) + + result = optimize_graph( + ig, + queries=["What is Python?", "Explain AI"], + iterations=1, + eval_fn=score_fn, + apply_updates_flag=False, + ) + + assert result.baseline_score >= 0 + assert len(result.score_history) == 2 # baseline + 1 iter + assert len(result.all_runs) == 2 + assert len(result.all_runs[0]) == 2 # 2 queries per iter + + # Each RunResult should carry OTLP data + for run in result.all_runs[0]: + assert "resourceSpans" in run.otlp + + def test_on_iteration_callback(self): + ig = _make_instrumented() + log: list = [] + + def on_iter(iter_num, runs, updates): + log.append({"iter": iter_num, "n_runs": len(runs)}) + + result = optimize_graph( + ig, + queries=["q1"], + iterations=2, + eval_fn=lambda p: 0.5, + on_iteration=on_iter, + ) + + # on_iteration is called for iterations 1 and 2 (not baseline) + assert len(log) == 2 + assert log[0]["iter"] == 1 + assert log[1]["iter"] == 2 + + +# ========================================================================= +# 7. optimize_graph() — with mock optimizer → apply_updates +# ========================================================================= + + +class TestE2EOptimizeWithMockOptimizer: + """Full pipeline with injected mock optimizer to verify apply_updates.""" + + def test_mock_optimizer_updates_are_applied(self): + ig = _make_instrumented( + templates={ + "planner_prompt": "ORIGINAL plan for: {query}", + "synthesizer_prompt": "ORIGINAL synth: {query} | {plan}", + } + ) + mock = MockOptimizer() + + result = optimize_graph( + ig, + queries=["What is AI?"], + iterations=1, + optimizer=mock, + eval_fn=lambda p: EvalResult(score=0.6, feedback="ok"), + ) + + # Optimizer methods should have been called + assert "zero_feedback" in mock.calls + assert any("backward" in c for c in mock.calls) + assert "step" in mock.calls + + # apply_updates should have changed planner_prompt + assert ig.templates["planner_prompt"] == ( + "OPTIMIZED: Create an improved plan for: {query}" + ) + + def test_second_iteration_uses_updated_template(self): + """After optimizer updates, next iteration should see the new template.""" + ig = _make_instrumented( + templates={ + "planner_prompt": "ORIGINAL: {query}", + "synthesizer_prompt": "Synth: {query} | {plan}", + } + ) + mock = MockOptimizer() + + captured_otlps: List[Dict[str, Any]] = [] + + def eval_fn(payload): + captured_otlps.append(payload.get("otlp", {})) + return EvalResult(score=0.5, feedback="test") + + result = optimize_graph( + ig, + queries=["q1"], + iterations=2, + optimizer=mock, + eval_fn=eval_fn, + ) + + # We should have captured OTLP from baseline + iter1 + iter2 = 3 invocations + assert len(captured_otlps) == 3 + + # The 3rd invocation (iteration 2) should use the updated template + last_otlp = captured_otlps[-1] + spans = last_otlp.get("resourceSpans", [{}])[0].get("scopeSpans", [{}])[0].get("spans", []) + planner_spans = [s for s in spans if s.get("name") == "planner"] + + if planner_spans: + attrs = { + a["key"]: a["value"]["stringValue"] + for a in planner_spans[0].get("attributes", []) + } + assert "OPTIMIZED" in attrs.get("param.planner_prompt", ""), ( + "Second+ iteration should use the OPTIMIZED template" + ) + + def test_optimization_result_structure(self): + ig = _make_instrumented() + mock = MockOptimizer() + + result = optimize_graph( + ig, + queries=["q1", "q2"], + iterations=2, + optimizer=mock, + eval_fn=lambda p: EvalResult(score=0.7, feedback="good"), + ) + + assert isinstance(result.baseline_score, float) + assert isinstance(result.best_score, float) + assert isinstance(result.best_iteration, int) + assert isinstance(result.best_updates, dict) + assert isinstance(result.final_parameters, dict) + assert len(result.score_history) == 3 # baseline + 2 iters + assert len(result.all_runs) == 3 + + +# ========================================================================= +# 8. Full round-trip: instrument → invoke → TGJ → optimizer → apply → re-invoke +# ========================================================================= + + +class TestE2EFullRoundTrip: + """The ultimate M1 acceptance test: all components wired together.""" + + def test_full_pipeline_end_to_end(self): + """ + 1. instrument_graph with initial templates + 2. invoke → OTLP → verify spans + 3. OTLP → TGJ → verify ParameterNode + MessageNode + 4. apply_updates → verify template change + 5. re-invoke → verify new template in OTLP + """ + # --- Step 1: instrument --- + templates = { + "planner_prompt": "V1: Plan for {query}", + "synthesizer_prompt": "V1: Synthesize {query} with {plan}", + } + ig = _make_instrumented(templates=templates) + + # --- Step 2: invoke --- + result = ig.invoke({"query": "What is ML?"}) + assert "answer" in result + + otlp = ig.session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + span_names = [s["name"] for s in spans] + assert "planner" in span_names + assert "synthesizer" in span_names + + # Verify param attributes + planner_span = next(s for s in spans if s["name"] == "planner") + attrs = { + a["key"]: a["value"]["stringValue"] + for a in planner_span["attributes"] + } + assert attrs["param.planner_prompt"] == "V1: Plan for {query}" + assert attrs["param.planner_prompt.trainable"] == "True" + + # --- Step 3: OTLP → TGJ → Trace nodes --- + docs = otlp_traces_to_trace_json( + otlp, agent_id_hint="e2e-test", use_temporal_hierarchy=True, + ) + assert len(docs) >= 1 + + nodes = ingest_tgj(docs[0]) + param_nodes = [ + n for n in nodes.values() + if isinstance(n, ParameterNode) and n.trainable + ] + msg_nodes = [ + n for n in nodes.values() if isinstance(n, MessageNode) + ] + assert len(param_nodes) > 0, "TGJ must produce trainable ParameterNodes" + assert len(msg_nodes) > 0, "TGJ must produce MessageNodes" + + # --- Step 4: apply_updates --- + apply_updates( + {"planner_prompt": "V2: Improved plan for {query}"}, + ig.bindings, + ) + assert ig.templates["planner_prompt"] == "V2: Improved plan for {query}" + + # --- Step 5: re-invoke with new template --- + result2 = ig.invoke({"query": "What is DL?"}) + assert "answer" in result2 + + otlp2 = ig.session.flush_otlp() + spans2 = otlp2["resourceSpans"][0]["scopeSpans"][0]["spans"] + planner2 = next(s for s in spans2 if s["name"] == "planner") + attrs2 = { + a["key"]: a["value"]["stringValue"] + for a in planner2["attributes"] + } + assert attrs2["param.planner_prompt"] == "V2: Improved plan for {query}", ( + "Re-invocation must use the UPDATED template" + ) + + def test_optimize_graph_full_integration(self): + """optimize_graph with mock optimizer: end-to-end template update.""" + ig = _make_instrumented( + templates={ + "planner_prompt": "BEFORE: Plan for {query}", + "synthesizer_prompt": "BEFORE: Synth {query} | {plan}", + } + ) + mock = MockOptimizer() + + result = optimize_graph( + ig, + queries=["What is AI?"], + iterations=1, + optimizer=mock, + eval_fn=lambda p: EvalResult(score=0.5, feedback="needs work"), + ) + + # Verify optimizer was exercised + assert "step" in mock.calls + + # Verify templates were updated + assert ig.templates["planner_prompt"].startswith("OPTIMIZED:") + + # Verify final_parameters reflect the update + assert "planner_prompt" in result.final_parameters + assert result.final_parameters["planner_prompt"].startswith("OPTIMIZED:") + + # Verify score history + assert len(result.score_history) == 2 # baseline + 1 iter + assert all(isinstance(s, float) for s in result.score_history) diff --git a/tests/features_tests/test_tgj_otel_integration.py b/tests/features_tests/test_tgj_otel_integration.py new file mode 100644 index 00000000..aaa910fa --- /dev/null +++ b/tests/features_tests/test_tgj_otel_integration.py @@ -0,0 +1,280 @@ +import math +from opto.trace.nodes import Node, MessageNode, ParameterNode +from opto.trace.io.tgj_ingest import ingest_tgj, merge_tgj, TLSFIngestor +from opto.trace.io.tgj_export import export_subgraph_to_tgj +from opto.trace.io.otel_adapter import otlp_traces_to_trace_json, PROFILE_VERSION +from opto.trace.propagators.graph_propagator import GraphPropagator + +# ---------- 1) MLflow-style single-agent training pipeline ---------- +MLFLOW_TGJ = { + "tgj":"1.0","run_id":"run-mlf-1","agent_id":"trainer","graph_id":"train","scope":"trainer/0", + "nodes":[ + {"id":"lr","kind":"parameter","name":"learning_rate","value":0.01,"trainable":True}, + {"id":"epochs","kind":"value","name":"epochs","value":3}, + {"id":"data","kind":"value","name":"dataset","value":"s3://bucket/train.csv"}, + {"id":"model","kind":"message","name":"model","description":"[train] fit(X,y)", + "inputs":{"lr":{"ref":"lr"},"epochs":{"ref":"epochs"},"Xy":{"ref":"data"}}, + "output":{"name":"weights","value":{"w":[0.1,0.2]}} }, + {"id":"eval","kind":"message","name":"accuracy","description":"[eval] accuracy(model, X_valid)", + "inputs":{"model":{"ref":"model"}}, "output":{"name":"acc","value":0.72}} + ] +} + +def test_mlflow_like_graph_backward(): + mp = ingest_tgj(MLFLOW_TGJ) + acc = mp["accuracy"] + assert isinstance(acc, MessageNode) + gp = GraphPropagator() + acc.backward("higher is better", propagator=gp, retain_graph=True) + seen, stack, params = set(), [acc], [] + while stack: + node = stack.pop() + for parent in node.parents: + if parent not in seen: + seen.add(parent) + stack.append(parent) + if isinstance(parent, ParameterNode): + params.append(parent) + assert any(p.py_name.split('/')[-1].startswith("learning_rate") for p in params) + +# ---------- 2) OpenTelemetry “Astronomy Shop” multi-agent ---------- +ASTRO_CHECKOUT = { + "tgj":"1.0","run_id":"trace-astro","agent_id":"checkout","graph_id":"svc","scope":"checkout/1", + "nodes":[ + {"id":"req","kind":"value","name":"http_req","value":{"path":"/checkout","method":"POST"}}, + {"id":"checkout","kind":"message","name":"checkout","description":"[http:post] /checkout", + "inputs":{"req":{"ref":"req"}}, "output":{"name":"order_id","value":"OID-1"}} + ], + "exports":{"port://order":{"ref":"checkout"}} +} +ASTRO_PAYMENT = { + "tgj":"1.0","run_id":"trace-astro","agent_id":"payment","graph_id":"svc","scope":"payment/3", + "imports":{"port://order":{"from_agent":"checkout","from_graph":"svc"}}, + "nodes":[ + {"id":"charge","kind":"message","name":"charge","description":"[rpc:grpc] charge", + "inputs":{"order":{"export":"port://order"}}, "output":{"name":"receipt","value":"OK"}} + ] +} + +def test_astronomy_shop_multiagent_merge(): + merged = merge_tgj([ASTRO_CHECKOUT, ASTRO_PAYMENT]) + # sanity: both graphs loaded, edge wired through export + ck = "checkout/svc/trace-astro"; pk = "payment/svc/trace-astro" + assert "checkout" in merged[ck]["__TGJ_META__"]["scope"] + charge = merged[pk]["charge"]; order = merged[ck]["checkout"] + assert order in charge.parents + +# ---------- 3) Kubernetes control-plane mini trace (scheduler -> kubelet) ---------- +K8S_TGJ = { + "tgj":"1.0","run_id":"trace-k8s","agent_id":"scheduler","graph_id":"s1","scope":"scheduler/1", + "nodes":[ + {"id":"pod","kind":"value","name":"pod_spec","value":{"pod":"demo","cpu":"250m"}}, + {"id":"bind","kind":"message","name":"bind","description":"[schedule] bind pod", + "inputs":{"spec":{"ref":"pod"}}, "output":{"name":"nodeName","value":"node-1"}} + ], + "exports":{"port://bind":{"ref":"bind"}} +} +K8S_TGJ2 = { + "tgj":"1.0","run_id":"trace-k8s","agent_id":"kubelet","graph_id":"k1","scope":"kubelet/node-1", + "nodes":[ + {"id":"start","kind":"message","name":"start","description":"[container] run", + "inputs":{"binding":{"export":"port://bind"}}, "output":{"name":"status","value":"Running"}} + ] +} + +def test_k8s_stitch_and_backward(): + merged = merge_tgj([K8S_TGJ, K8S_TGJ2]) + klet = merged["kubelet/k1/trace-k8s"]["start"] + sched = merged["scheduler/s1/trace-k8s"]["bind"] + assert sched in klet.parents + gp = GraphPropagator() + klet.backward("keep containers running", propagator=gp, retain_graph=True) + seen, stack, found = set(), [klet], False + while stack: + node = stack.pop() + if node is sched: + found = True + for parent in node.parents: + if parent not in seen: + seen.add(parent) + stack.append(parent) + assert found + +# ---------- 4) OTel adapter round-trip (tiny) ---------- +def test_otel_adapter_minimal(): + otlp = { + "resourceSpans": [{ + "resource": {"attributes":[{"key":"service.name","value":{"stringValue":"svcA"}}, + {"key":"service.instance.id","value":{"stringValue":"i1"}}]}, + "scopeSpans": [{ + "scope": {"name":"scopeA"}, + "spans": [{ + "traceId":"t-1","spanId":"s-1","name":"GET /items","kind":"SERVER", + "startTimeUnixNano":"1","endTimeUnixNano":"1000000", + "attributes":[{"key":"http.method","value":{"stringValue":"GET"}}, + {"key":"http.url","value":{"stringValue":"/items"}}] + }] + }] + }] + } + docs = otlp_traces_to_trace_json(otlp) + assert docs and docs[0]["version"] == PROFILE_VERSION + mp = ingest_tgj(docs[0]) + node = mp["GET /items"] + assert isinstance(node, MessageNode) + +# ---------- 5) Export → Import round-trip ---------- +def test_export_import_roundtrip(): + # Build a mini graph in-memory and export + x = ParameterNode(-1.0, name="x", trainable=True, description="[Parameter]") + b = Node(1.0, name="b", description="[Node]") + a = MessageNode(Node(None, name="a_out"), inputs={"x":x}, description="[bar] -2*x", name="a") + y = MessageNode(Node(None, name="y_out"), inputs={"a":a,"b":b}, description="[add] a+b", name="y") + from opto.trace.io.tgj_export import export_subgraph_to_tgj + tgj = export_subgraph_to_tgj([y], run_id="r", agent_id="A", graph_id="g", scope="A/0") + assert any(rec.get("op") for rec in tgj["nodes"] if rec["kind"]=="message") + mp = ingest_tgj(tgj) + y2 = mp["y"] + assert isinstance(y2, MessageNode) + # parents should be present + assert any(p.py_name.split('/')[-1].startswith("a") for p in y2.parents) + + +def test_tlsf_ingestor_with_trace_json(): + otlp = { + "resourceSpans": [{ + "resource": {"attributes":[{"key":"service.name","value":{"stringValue":"svcA"}}, + {"key":"service.instance.id","value":{"stringValue":"i1"}}]}, + "scopeSpans": [{ + "scope": {"name":"scopeA"}, + "spans": [{ + "traceId":"t-2","spanId":"s-2","name":"POST /submit","kind":"SERVER", + "startTimeUnixNano":"1","endTimeUnixNano":"1000", + "attributes":[{"key":"http.method","value":{"stringValue":"POST"}}] + }] + }] + }] + } + docs = otlp_traces_to_trace_json(otlp) + ing = TLSFIngestor() + ing.ingest_tgj(docs[0]) + node = ing.get("POST /submit") + assert isinstance(node, MessageNode) + +# ---------- 6) Log enrichment via TGJ merge ---------- +LOG_TGJ = { + "tgj":"1.0","run_id":"trace-k8s","agent_id":"logger","graph_id":"log","scope":"logger/0", + "imports":{"port://bind":{"from_agent":"scheduler","from_graph":"s1"}}, + "nodes":[ + {"id":"audit","kind":"message","name":"audit","description":"[log] bind recorded", + "inputs":{"binding":{"export":"port://bind"}}, "output":{"name":"logline","value":"bind logged"}} + ] +} + +def test_log_enrichment_from_tgj(): + merged = merge_tgj([K8S_TGJ, LOG_TGJ]) + audit = merged["logger/log/trace-k8s"]["audit"] + bind = merged["scheduler/s1/trace-k8s"]["bind"] + assert bind in audit.parents + +# ---------- 7) Link JSON parameter to executable code ---------- +TRAINABLE_TGJ = { + "tgj":"1.0","run_id":"rt","agent_id":"agent","graph_id":"g","scope":"agent/0", + "nodes":[ + {"id":"w","kind":"parameter","name":"weight","value":1.0,"trainable":True}, + {"id":"x","kind":"value","name":"input","value":2.0}, + {"id":"prod","kind":"message","name":"prod","description":"[mul] weight*input", + "inputs":{"w":{"ref":"w"},"x":{"ref":"x"}}, "output":{"name":"p_out","value":2.0}} + ] +} + +def test_link_trainable_parameter_from_json(): + mp = ingest_tgj(TRAINABLE_TGJ) + w = mp["weight"] + assert isinstance(w, ParameterNode) + loss = MessageNode(Node(w.data ** 2, name="loss_out"), inputs={"w": w}, description="[square] w^2", name="loss") + gp = GraphPropagator() + loss.backward("minimize", propagator=gp, retain_graph=True) + seen, stack, params = set(), [loss], [] + while stack: + node = stack.pop() + for parent in node.parents: + if parent not in seen: + seen.add(parent) + stack.append(parent) + if isinstance(parent, ParameterNode): + params.append(parent) + assert w in params + +# ---------- 8) Branch reconstruction and filtering ---------- +BRANCH_TGJ = { + "tgj":"1.0","run_id":"r-branch","agent_id":"agent","graph_id":"g","scope":"agent/0", + "nodes":[ + {"id":"x","kind":"value","name":"x","value":1}, + {"id":"dup","kind":"message","name":"dup","description":"[dup] x", + "inputs":{"x":{"ref":"x"}}, "output":{"name":"x2","value":1}}, + {"id":"left","kind":"message","name":"left","description":"[add] dup+1", + "inputs":{"d":{"ref":"dup"}}, "output":{"name":"l","value":2}}, + {"id":"right","kind":"message","name":"right","description":"[sub] dup-1", + "inputs":{"d":{"ref":"dup"}}, "output":{"name":"r","value":0}}, + {"id":"merge","kind":"message","name":"merge","description":"[add] left+right", + "inputs":{"a":{"ref":"left"},"b":{"ref":"right"}}, "output":{"name":"m","value":2}} + ] +} + +def test_branch_reconstruction_and_filtering(): + mp = ingest_tgj(BRANCH_TGJ) + merge = mp["merge"] + visited, stack, msg_names, value_names = set(), [merge], [], [] + while stack: + node = stack.pop() + if node in visited: + continue + visited.add(node) + base = node.name.split('/')[-1].split(":")[0] + if isinstance(node, MessageNode): + msg_names.append(base) + else: + value_names.append(base) + stack.extend(node.parents) + assert set(["merge", "left", "right", "dup"]).issubset(set(msg_names)) + assert "x" in value_names + +# ---------- 9) OTel parent-child reconstruction ---------- +OTLP_BRANCH = { + "resourceSpans": [{ + "resource": {"attributes":[{"key":"service.name","value":{"stringValue":"svc"}}]}, + "scopeSpans": [{ + "scope": {"name":"scope"}, + "spans": [ + {"traceId":"t","spanId":"p","name":"parent","kind":"SERVER"}, + {"traceId":"t","spanId":"c1","parentSpanId":"p","name":"child1","kind":"INTERNAL"}, + {"traceId":"t","spanId":"c2","parentSpanId":"p","name":"child2","kind":"INTERNAL"} + ] + }] + }] +} + +def test_otel_parent_child_hierarchy(): + docs = otlp_traces_to_trace_json(OTLP_BRANCH) + mp = ingest_tgj(docs[0]) + child1 = mp["child1"] + parent = mp["parent"] + # parent id recovered automatically from parentSpanId + #assert child1.parents[0].name.split('/')[-1].split(":")[0] == "p" + assert child1.parents[0] is parent + # manual relink to the full parent node + child1.parents[0] = parent + child2 = mp["child2"] + child2.parents[0] = parent + visited, stack, names = set(), [child2], [] + while stack: + node = stack.pop() + if node in visited: + continue + visited.add(node) + names.append(node.name.split('/')[-1].split(":")[0]) + stack.extend(node.parents) + assert "parent" in names and "child1" not in names + child_nodes = [n for n in visited if n.name.split('/')[-1].split(":")[0].startswith("child")] + assert all(isinstance(n, MessageNode) for n in child_nodes) diff --git a/tests/unit_tests/test_bindings.py b/tests/unit_tests/test_bindings.py new file mode 100644 index 00000000..e03b8190 --- /dev/null +++ b/tests/unit_tests/test_bindings.py @@ -0,0 +1,69 @@ +"""Tests for opto.trace.io.bindings.""" +import pytest +from opto.trace.io.bindings import Binding, apply_updates, make_dict_binding + + +class TestBinding: + def test_basic_get_set(self): + store = {"val": "hello"} + b = Binding(get=lambda: store["val"], set=lambda v: store.__setitem__("val", v)) + assert b.get() == "hello" + b.set("world") + assert store["val"] == "world" + + def test_kind_default(self): + b = Binding(get=lambda: None, set=lambda v: None) + assert b.kind == "prompt" + + def test_kind_code(self): + b = Binding(get=lambda: None, set=lambda v: None, kind="code") + assert b.kind == "code" + + +class TestApplyUpdates: + def test_apply_single(self): + store = {"prompt": "old"} + bindings = {"prompt": make_dict_binding(store, "prompt")} + apply_updates({"prompt": "new"}, bindings) + assert store["prompt"] == "new" + + def test_apply_multiple(self): + store = {"a": "1", "b": "2"} + bindings = { + "a": make_dict_binding(store, "a"), + "b": make_dict_binding(store, "b"), + } + apply_updates({"a": "X", "b": "Y"}, bindings) + assert store == {"a": "X", "b": "Y"} + + def test_strict_missing_key_raises(self): + bindings = {"a": make_dict_binding({}, "a")} + with pytest.raises(KeyError, match="no binding for key 'z'"): + apply_updates({"z": "val"}, bindings, strict=True) + + def test_non_strict_missing_key_skips(self): + store = {"a": "old"} + bindings = {"a": make_dict_binding(store, "a")} + apply_updates({"a": "new", "z": "skip"}, bindings, strict=False) + assert store["a"] == "new" + + def test_empty_updates(self): + store = {"a": "old"} + bindings = {"a": make_dict_binding(store, "a")} + apply_updates({}, bindings) + assert store["a"] == "old" + + +class TestMakeDictBinding: + def test_roundtrip(self): + store = {"key": "initial"} + b = make_dict_binding(store, "key") + assert b.get() == "initial" + b.set("updated") + assert b.get() == "updated" + assert store["key"] == "updated" + + def test_missing_key_returns_none(self): + store = {} + b = make_dict_binding(store, "missing") + assert b.get() is None diff --git a/tests/unit_tests/test_instrumentation.py b/tests/unit_tests/test_instrumentation.py new file mode 100644 index 00000000..35103624 --- /dev/null +++ b/tests/unit_tests/test_instrumentation.py @@ -0,0 +1,198 @@ +"""Tests for opto.trace.io.instrumentation.""" +import pytest +from opto.trace.io.instrumentation import instrument_graph, InstrumentedGraph +from opto.trace.io.telemetry_session import TelemetrySession +from opto.trace.io.bindings import Binding, make_dict_binding + + +class _StubLLM: + """Minimal deterministic LLM stub for testing.""" + model = "stub" + call_count = 0 + + def __call__(self, messages=None, **kwargs): + self.call_count += 1 + + class Msg: + content = f"stub response #{self.call_count}" + + class Choice: + message = Msg() + + class Resp: + choices = [Choice()] + + return Resp() + + +class TestInstrumentGraph: + def test_returns_instrumented_graph(self): + ig = instrument_graph( + graph=None, + service_name="test", + llm=_StubLLM(), + initial_templates={"prompt_a": "template A"}, + ) + assert isinstance(ig, InstrumentedGraph) + assert ig.session is not None + assert ig.tracing_llm is not None + + def test_auto_derives_bindings_from_templates(self): + ig = instrument_graph( + graph=None, + service_name="test", + llm=_StubLLM(), + initial_templates={"prompt_a": "A", "prompt_b": "B"}, + ) + assert "prompt_a" in ig.bindings + assert "prompt_b" in ig.bindings + assert ig.bindings["prompt_a"].get() == "A" + + def test_custom_bindings_override(self): + store = {"custom": "val"} + custom = {"custom": make_dict_binding(store, "custom")} + ig = instrument_graph( + graph=None, + service_name="test", + llm=_StubLLM(), + bindings=custom, + ) + assert "custom" in ig.bindings + assert ig.bindings["custom"].get() == "val" + + def test_reuse_existing_session(self): + session = TelemetrySession("shared-session") + ig = instrument_graph( + graph=None, + session=session, + llm=_StubLLM(), + ) + assert ig.session is session + + def test_trainable_keys_none_means_all(self): + ig = instrument_graph( + graph=None, + service_name="test", + trainable_keys=None, + llm=_StubLLM(), + ) + # trainable_keys=None -> _trainable_keys_all=True + assert ig.tracing_llm._trainable_keys_all is True + + def test_trainable_keys_explicit(self): + ig = instrument_graph( + graph=None, + service_name="test", + trainable_keys={"planner"}, + llm=_StubLLM(), + ) + assert ig.tracing_llm._trainable_keys_all is False + assert "planner" in ig.tracing_llm.trainable_keys + + def test_compiles_graph_if_needed(self): + class FakeGraph: + compiled = False + def compile(self): + self.compiled = True + return self + + fg = FakeGraph() + ig = instrument_graph(graph=fg, llm=_StubLLM()) + assert fg.compiled is True + + +class TestTracingLLMChildSpan: + def test_child_span_emitted(self): + ig = instrument_graph( + graph=None, + service_name="test-child", + llm=_StubLLM(), + emit_genai_child_spans=True, + initial_templates={"my_prompt": "Hello {query}"}, + ) + ig.tracing_llm.node_call( + span_name="test_node", + template_name="my_prompt", + template="Hello {query}", + optimizable_key="test_node", + messages=[{"role": "user", "content": "hi"}], + ) + otlp = ig.session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + names = [s["name"] for s in spans] + assert "test_node" in names + assert "llm.chat.completion" in names + + # Child span should have trace.temporal_ignore + child = [s for s in spans if s["name"] == "llm.chat.completion"][0] + attrs = {a["key"]: a["value"]["stringValue"] for a in child["attributes"]} + assert attrs.get("trace.temporal_ignore") == "true" + assert "gen_ai.operation.name" in attrs + + def test_no_child_span_when_disabled(self): + ig = instrument_graph( + graph=None, + service_name="test-nochild", + llm=_StubLLM(), + emit_genai_child_spans=False, + ) + ig.tracing_llm.node_call( + span_name="test_node", + messages=[{"role": "user", "content": "hi"}], + ) + otlp = ig.session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + names = [s["name"] for s in spans] + assert "test_node" in names + assert "llm.chat.completion" not in names + + +class TestTemporalChaining: + """M1 acceptance: child spans must NOT advance TGJ temporal chaining.""" + + def test_child_spans_do_not_advance_temporal_chain(self): + from opto.trace.io.otel_adapter import otlp_traces_to_trace_json + + ig = instrument_graph( + graph=None, + service_name="temporal-test", + llm=_StubLLM(), + emit_genai_child_spans=True, + ) + # Emit two node spans; each with a child LLM span + ig.tracing_llm.node_call( + span_name="node_A", + template_name="prompt_a", + template="prompt A", + optimizable_key="node_A", + messages=[{"role": "user", "content": "q1"}], + ) + ig.tracing_llm.node_call( + span_name="node_B", + template_name="prompt_b", + template="prompt B", + optimizable_key="node_B", + messages=[{"role": "user", "content": "q2"}], + ) + otlp = ig.session.flush_otlp() + + # Convert to TGJ with temporal hierarchy + docs = otlp_traces_to_trace_json( + otlp, + agent_id_hint="temporal-test", + use_temporal_hierarchy=True, + ) + assert len(docs) >= 1 + doc = docs[0] + nodes = doc["nodes"] + + # The child LLM spans should NOT be temporal parents of node_B. + # node_B's parent should be node_A (not the child LLM span of A). + msg_nodes = { + nid: n for nid, n in nodes.items() + if n.get("kind") == "msg" + } + # There should be at least node_A and node_B as msg nodes + node_names = [n.get("name") for n in msg_nodes.values()] + assert "node_A" in node_names + assert "node_B" in node_names diff --git a/tests/unit_tests/test_langgraph_otel_runtime.py b/tests/unit_tests/test_langgraph_otel_runtime.py new file mode 100644 index 00000000..9dc4d05b --- /dev/null +++ b/tests/unit_tests/test_langgraph_otel_runtime.py @@ -0,0 +1,180 @@ +import pytest + +from opto.trace.io.langgraph_otel_runtime import ( + init_otel_runtime, + TracingLLM, + flush_otlp, + extract_eval_metrics_from_otlp, +) + + +class FakeLLM: + """ + Minimal LLM stub compatible with the TracingLLM expectations. + """ + + class _Message: + def __init__(self, content: str) -> None: + self.content = content + + class _Choice: + def __init__(self, content: str) -> None: + self.message = FakeLLM._Message(content) + + class _Response: + def __init__(self, content: str) -> None: + self.choices = [FakeLLM._Choice(content)] + + def __init__(self, content: str = "OK") -> None: + self.content = content + self.calls = [] + + def __call__(self, messages=None, **kwargs): + self.calls.append({"messages": messages, "kwargs": kwargs}) + return FakeLLM._Response(self.content) + + +def _attrs_to_dict(attrs): + return {a["key"]: a["value"]["stringValue"] for a in attrs} + + +def test_tracing_llm_records_prompt_and_user_query(): + tracer, exporter = init_otel_runtime("test-llm") + llm = FakeLLM("ANSWER") + tllm = TracingLLM( + llm=llm, tracer=tracer, trainable_keys={"planner"}, + emit_llm_child_span=False, # test focuses on the node span only + ) + + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "What is 2+2?"}, + ] + + result = tllm.node_call( + span_name="planner", + template_name="planner_prompt", + template="Plan for: {query}", + optimizable_key="planner", + code_key=None, + code_fn=None, + user_query="What is 2+2?", + messages=messages, + ) + + assert result == "ANSWER" + assert len(llm.calls) == 1 + + otlp = flush_otlp(exporter, scope_name="test-llm") + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + assert len(spans) == 1 + span = spans[0] + assert span["name"] == "planner" + attrs = _attrs_to_dict(span["attributes"]) + + # prompt + trainable flag + assert attrs["param.planner_prompt"] == "Plan for: {query}" + # trainable flag is a bool string; be tolerant to case + assert attrs["param.planner_prompt.trainable"].lower() in ("true", "1") + + # inputs.* + assert attrs["inputs.user_query"] == "What is 2+2?" + assert attrs["inputs.gen_ai.prompt"] == "What is 2+2?" + + +def test_tracing_llm_trainable_flag_respects_keys(): + tracer, exporter = init_otel_runtime("test-llm-trainable") + llm = FakeLLM("OK") + tllm = TracingLLM(llm=llm, tracer=tracer, trainable_keys=set()) + + messages = [{"role": "user", "content": "check"}] + _ = tllm.node_call( + span_name="planner", + template_name="planner_prompt", + template="Plan for: {query}", + optimizable_key="planner", # NOT in trainable_keys + code_key=None, + code_fn=None, + user_query="check", + messages=messages, + ) + + otlp = flush_otlp(exporter, scope_name="test-llm-trainable") + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + attrs = _attrs_to_dict(spans[0]["attributes"]) + + # Either missing or explicitly false; both are acceptable + value = attrs.get("param.planner_prompt.trainable") + assert value is None or value.lower() in ("false", "0") + + +def test_flush_otlp_clears_exporter(): + tracer, exporter = init_otel_runtime("test-flush") + llm = FakeLLM("OK") + tllm = TracingLLM(llm=llm, tracer=tracer) + + messages = [{"role": "user", "content": "ping"}] + _ = tllm.node_call(span_name="planner", messages=messages) + + # We should have spans before flush + assert exporter.get_finished_spans() + + _ = flush_otlp(exporter, scope_name="test-flush") + assert exporter.get_finished_spans() == [] + + +def test_extract_eval_metrics_from_otlp_happy_path(): + # Synthetic OTLP payload with a single evaluator span + otlp = { + "resourceSpans": [ + { + "resource": {"attributes": []}, + "scopeSpans": [ + { + "scope": {"name": "demo"}, + "spans": [ + { + "name": "evaluator", + "attributes": [ + {"key": "eval.score", "value": {"stringValue": "0.9"}}, + {"key": "eval.answer_relevance", "value": {"stringValue": "0.8"}}, + {"key": "eval.groundedness", "value": {"stringValue": "0.7"}}, + {"key": "eval.plan_quality", "value": {"stringValue": "0.6"}}, + {"key": "eval.reasons", "value": {"stringValue": "good"}}, + ], + } + ], + } + ], + } + ] + } + + # Pass explicit metric_keys matching the synthetic payload + custom_keys = { + "answer_relevance": "eval.answer_relevance", + "groundedness": "eval.groundedness", + "plan_quality": "eval.plan_quality", + } + score, metrics, reasons = extract_eval_metrics_from_otlp( + otlp, metric_keys=custom_keys + ) + assert score == 0.9 + assert metrics["answer_relevance"] == 0.8 + assert metrics["groundedness"] == 0.7 + assert metrics["plan_quality"] == 0.6 + assert reasons == "good" + + +def test_extract_eval_metrics_from_otlp_defaults_when_missing(): + # No evaluator span at all -> fall back to defaults (still usable) + otlp = {"resourceSpans": []} + + score, metrics, reasons = extract_eval_metrics_from_otlp(otlp) + + # Default score is in [0,1] and we get non-empty metric dict. + assert 0.0 <= score <= 1.0 + assert metrics + for v in metrics.values(): + assert 0.0 <= v <= 1.0 + assert reasons == "" diff --git a/tests/unit_tests/test_optimization.py b/tests/unit_tests/test_optimization.py new file mode 100644 index 00000000..a91e3b05 --- /dev/null +++ b/tests/unit_tests/test_optimization.py @@ -0,0 +1,87 @@ +"""Tests for opto.trace.io.optimization.""" +import pytest +from opto.trace.io.optimization import ( + EvalResult, + _normalise_eval, + RunResult, + OptimizationResult, +) + + +class TestEvalResult: + def test_defaults(self): + er = EvalResult() + assert er.score is None + assert er.feedback == "" + assert er.metrics == {} + + def test_with_values(self): + er = EvalResult(score=0.8, feedback="good", metrics={"acc": 0.9}) + assert er.score == 0.8 + + +class TestNormaliseEval: + def test_from_float(self): + er = _normalise_eval(0.75) + assert er.score == 0.75 + assert er.feedback == "" + + def test_from_int(self): + er = _normalise_eval(1) + assert er.score == 1.0 + + def test_from_string_feedback(self): + er = _normalise_eval("needs improvement") + assert er.score is None + assert er.feedback == "needs improvement" + + def test_from_json_string(self): + import json + raw = json.dumps({"score": 0.9, "reasons": "well done"}) + er = _normalise_eval(raw) + assert er.score == 0.9 + assert "well done" in er.feedback + + def test_from_dict(self): + er = _normalise_eval({"score": 0.6, "feedback": "ok", "extra": 1}) + assert er.score == 0.6 + assert er.feedback == "ok" + + def test_from_eval_result(self): + original = EvalResult(score=0.5, feedback="test") + er = _normalise_eval(original) + assert er is original + + def test_from_unknown(self): + er = _normalise_eval(42.0) + assert er.score == 42.0 + + +class TestRunResult: + def test_fields(self): + rr = RunResult( + answer="hello", + score=0.8, + feedback="good", + metrics={"acc": 0.9}, + otlp={"resourceSpans": []}, + ) + assert rr.answer == "hello" + assert rr.score == 0.8 + + +class TestOptimizationResult: + def test_fields(self): + result = OptimizationResult( + baseline_score=0.5, + best_score=0.8, + best_iteration=2, + best_parameters={"prompt": "best"}, + best_updates={"prompt": "new"}, + final_parameters={"prompt": "new"}, + score_history=[0.5, 0.6, 0.8], + all_runs=[], + ) + assert result.best_score == 0.8 + assert result.best_iteration == 2 + assert result.best_parameters == {"prompt": "best"} diff --git a/tests/unit_tests/test_otel_semconv.py b/tests/unit_tests/test_otel_semconv.py new file mode 100644 index 00000000..f1855738 --- /dev/null +++ b/tests/unit_tests/test_otel_semconv.py @@ -0,0 +1,78 @@ +"""Tests for opto.trace.io.otel_semconv.""" +import json +import pytest +from opto.trace.io.otel_semconv import ( + set_span_attributes, + record_genai_chat, + emit_reward, + emit_trace, +) +from opto.trace.io.telemetry_session import TelemetrySession + + +class TestSetSpanAttributes: + def test_skips_none(self): + session = TelemetrySession("test-semconv") + with session.tracer.start_as_current_span("test") as sp: + set_span_attributes(sp, {"key1": "val1", "key2": None}) + otlp = session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + attrs = {a["key"]: a["value"]["stringValue"] for a in spans[0]["attributes"]} + assert "key1" in attrs + assert "key2" not in attrs + + def test_serializes_dict(self): + session = TelemetrySession("test-semconv") + with session.tracer.start_as_current_span("test") as sp: + set_span_attributes(sp, {"data": {"nested": True}}) + otlp = session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + attrs = {a["key"]: a["value"]["stringValue"] for a in spans[0]["attributes"]} + parsed = json.loads(attrs["data"]) + assert parsed == {"nested": True} + + +class TestRecordGenaiChat: + def test_emits_genai_attributes(self): + session = TelemetrySession("test-genai") + with session.tracer.start_as_current_span("llm_call") as sp: + record_genai_chat( + sp, + provider="openrouter", + model="llama-3.1", + input_messages=[{"role": "user", "content": "hello"}], + output_text="world", + ) + otlp = session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + attrs = {a["key"]: a["value"]["stringValue"] for a in spans[0]["attributes"]} + assert attrs["gen_ai.operation.name"] == "chat.completion" + assert attrs["gen_ai.provider.name"] == "openrouter" + assert attrs["gen_ai.request.model"] == "llama-3.1" + assert "gen_ai.input.messages" in attrs + assert "gen_ai.output.messages" in attrs + + +class TestEmitReward: + def test_creates_reward_span(self): + session = TelemetrySession("test-reward") + emit_reward(session, value=0.85, name="accuracy") + otlp = session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + assert len(spans) == 1 + attrs = {a["key"]: a["value"]["stringValue"] for a in spans[0]["attributes"]} + assert attrs["trace.temporal_ignore"] == "true" + assert attrs["agentlightning.reward.0.name"] == "accuracy" + assert attrs["agentlightning.reward.0.value"] == "0.85" + + +class TestEmitTrace: + def test_creates_custom_span(self): + session = TelemetrySession("test-trace") + emit_trace(session, name="my_signal", attrs={"custom_key": "custom_val"}) + otlp = session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + assert len(spans) == 1 + assert spans[0]["name"] == "my_signal" + attrs = {a["key"]: a["value"]["stringValue"] for a in spans[0]["attributes"]} + assert attrs["custom_key"] == "custom_val" diff --git a/tests/unit_tests/test_telemetry_session.py b/tests/unit_tests/test_telemetry_session.py new file mode 100644 index 00000000..0527bc38 --- /dev/null +++ b/tests/unit_tests/test_telemetry_session.py @@ -0,0 +1,114 @@ +"""Tests for opto.trace.io.telemetry_session.""" +import pytest +from unittest.mock import patch, MagicMock +from opto.trace.io.telemetry_session import TelemetrySession + + +class TestTelemetrySession: + def test_flush_otlp_returns_spans(self): + session = TelemetrySession("test-session") + with session.tracer.start_as_current_span("span1") as sp: + sp.set_attribute("key", "val") + otlp = session.flush_otlp() + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + assert len(spans) >= 1 + assert spans[0]["name"] == "span1" + + def test_flush_otlp_clears_by_default(self): + session = TelemetrySession("test-clear") + with session.tracer.start_as_current_span("span1"): + pass + otlp1 = session.flush_otlp(clear=True) + spans1 = otlp1["resourceSpans"][0]["scopeSpans"][0]["spans"] + assert len(spans1) >= 1 + + otlp2 = session.flush_otlp(clear=True) + spans2 = otlp2["resourceSpans"][0]["scopeSpans"][0]["spans"] + assert len(spans2) == 0 + + def test_record_spans_false_noop(self): + session = TelemetrySession("test-noop", record_spans=False) + with session.tracer.start_as_current_span("span1"): + pass + otlp = session.flush_otlp() + assert otlp == {"resourceSpans": []} + + def test_flush_tgj_produces_docs(self): + session = TelemetrySession("test-tgj") + with session.tracer.start_as_current_span("node1") as sp: + sp.set_attribute("param.prompt", "hello world") + sp.set_attribute("param.prompt.trainable", True) + docs = session.flush_tgj() + assert len(docs) >= 1 + doc = docs[0] + assert "nodes" in doc + + def test_span_attribute_filter(self): + """Filter should be able to redact attributes.""" + def redact_filter(name, attrs): + # Drop any span named "secret" + if name == "secret": + return {} + # Otherwise pass through + return attrs + + session = TelemetrySession( + "test-filter", + span_attribute_filter=redact_filter, + ) + # The filter is stored but note: the real OTEL SDK doesn't call + # our filter. This tests that the parameter is accepted. + assert session.span_attribute_filter is not None + + +class TestExportRunBundle: + def test_creates_files(self, tmp_path): + session = TelemetrySession("test-bundle") + with session.tracer.start_as_current_span("node1") as sp: + sp.set_attribute("param.prompt", "test") + sp.set_attribute("param.prompt.trainable", True) + + out_dir = str(tmp_path / "bundle") + result = session.export_run_bundle( + out_dir, + prompts={"prompt": "test"}, + ) + assert result == out_dir + assert (tmp_path / "bundle" / "otlp_trace.json").exists() + assert (tmp_path / "bundle" / "trace_graph.json").exists() + assert (tmp_path / "bundle" / "prompts.json").exists() + + +class TestStableNodeIdentity: + """B4: message.id becomes stable TGJ node id.""" + + def test_message_id_used_as_node_id(self): + """When message.id is present on a span, the TGJ node id uses it.""" + session = TelemetrySession("test-stable") + with session.tracer.start_as_current_span("my_node") as sp: + sp.set_attribute("message.id", "stable_logical_id") + sp.set_attribute("param.prompt", "hello") + sp.set_attribute("param.prompt.trainable", "true") + + docs = session.flush_tgj() + assert len(docs) >= 1 + nodes = docs[0]["nodes"] + # The node should be keyed by message.id, not span id + assert "test-stable:stable_logical_id" in nodes + + def test_fallback_to_span_id_without_message_id(self): + """Without message.id, node id falls back to span id.""" + session = TelemetrySession("test-fallback") + with session.tracer.start_as_current_span("my_node") as sp: + sp.set_attribute("param.prompt", "hello") + sp.set_attribute("param.prompt.trainable", "true") + + docs = session.flush_tgj() + assert len(docs) >= 1 + nodes = docs[0]["nodes"] + # Should have a node keyed by svc:span_hex_id (16 hex chars) + node_keys = [k for k in nodes if k.startswith("test-fallback:") and "param_" not in k] + assert len(node_keys) >= 1 + # The key should NOT contain "stable_logical_id" + for k in node_keys: + assert "stable_logical_id" not in k From 5ab857ce1c531b901cc451524340e735f7266adc Mon Sep 17 00:00:00 2001 From: doxav Date: Sat, 18 Apr 2026 18:58:35 +0200 Subject: [PATCH 02/16] intermediate commit --- docs/GraphOptimization.md | 12 + ...mo_langgraph_instrument_and_optimize.ipynb | 364 +++++++++++++++--- ...ggraph_instrument_and_optimize_trace.ipynb | 137 +++++++ opto/features/flows/compose.py | 5 +- opto/features/priority_search/utils.py | 2 +- opto/optimizers/optimizer.py | 4 +- opto/trace/graph/__init__.py | 12 + opto/trace/graph/adapter.py | 303 +++++++++++++++ opto/trace/graph/module.py | 37 ++ opto/trace/graph/sidecars.py | 51 +++ opto/trace/io/__init__.py | 31 ++ opto/trace/io/graph_instrumentation.py | 155 ++++++++ opto/trace/io/instrumentation.py | 77 +++- opto/trace/io/optimization.py | 154 +++++++- opto/trainer/algorithms/__init__.py | 8 +- tests/features_tests/test_flows_compose.py | 22 ++ .../test_graph_module_prioritysearch.py | 10 + .../features_tests/test_graph_module_train.py | 122 ++++++ .../test_langgraph_notebooks.py | 31 ++ .../test_trace_graph_optimization.py | 243 ++++++++++++ .../test_graph_adapter_modulecandidate.py | 70 ++++ tests/unit_tests/test_graph_adapter_trace.py | 100 +++++ .../test_trace_graph_instrumentation.py | 138 +++++++ 23 files changed, 2026 insertions(+), 62 deletions(-) create mode 100644 examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb create mode 100644 opto/trace/graph/__init__.py create mode 100644 opto/trace/graph/adapter.py create mode 100644 opto/trace/graph/module.py create mode 100644 opto/trace/graph/sidecars.py create mode 100644 opto/trace/io/graph_instrumentation.py create mode 100644 tests/features_tests/test_graph_module_prioritysearch.py create mode 100644 tests/features_tests/test_graph_module_train.py create mode 100644 tests/features_tests/test_langgraph_notebooks.py create mode 100644 tests/features_tests/test_trace_graph_optimization.py create mode 100644 tests/unit_tests/test_graph_adapter_modulecandidate.py create mode 100644 tests/unit_tests/test_graph_adapter_trace.py create mode 100644 tests/unit_tests/test_trace_graph_instrumentation.py diff --git a/docs/GraphOptimization.md b/docs/GraphOptimization.md index 46fd8ac8..d3f86fad 100644 --- a/docs/GraphOptimization.md +++ b/docs/GraphOptimization.md @@ -29,6 +29,17 @@ OpenTrace provides a unified API for instrumenting LangGraph agents with OpenTel **Total: ~645 lines reduced to ~10 lines.** +### Backend modes + +`instrument_graph()` and `optimize_graph()` support two backends: + +| Backend | Carrier | Best for | +|---|---|---| +| `backend="otel"` (default) | OTLP spans → TGJ → ingest | observability-first optimization | +| `backend="trace"` | native Trace nodes (`bundle()` / `node()`) | direct graph-native optimization | + +The OTEL path remains the default and most interoperable mode. + ### Code Diff ```diff @@ -112,6 +123,7 @@ OpenTrace provides a unified API for instrumenting LangGraph agents with OpenTel | `otel_adapter` | `otel_adapter.py` | Converts OTLP JSON to Trace-Graph JSON (TGJ) with temporal hierarchy | | `tgj_ingest` | `tgj_ingest.py` | Ingests TGJ documents into `ParameterNode` / `MessageNode` objects | | `otel_semconv` | `otel_semconv.py` | Helpers: `emit_reward()`, `emit_trace()`, `record_genai_chat()` | +| `graph_instrumentation` | `graph_instrumentation.py` | Trace-native graph instrumentation (`TraceGraph`) | ### Supported Graph Kinds diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb index 6196e441..6b361240 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb @@ -169,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:58:48.269399Z", @@ -186,7 +186,7 @@ "[INFO] API key loaded from environment variable.\n", "\n", "API key: [SET]\n", - "Model: google/gemma-4-26b-a4b-it:free\n", + "Model: nvidia/nemotron-3-super-120b-a12b:free\n", "Budget: max_tokens=256, temperature=0\n" ] } @@ -251,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 45, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:58:48.337340Z", @@ -529,7 +529,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 46, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:58:55.622865Z", @@ -722,7 +722,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 47, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:58:55.651617Z", @@ -776,7 +776,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 48, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.302370Z", @@ -848,7 +848,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 49, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.329697Z", @@ -865,30 +865,30 @@ "Total spans captured: 8\n", "\n", "Unique trace IDs: 1 (D9: should be 1)\n", - "Root invocation span: QA_research_graph.invoke (id=c88df3f2cf1b...)\n", + "Root invocation span: QA_research_graph.invoke (id=3d446653082f...)\n", "\n", - " Span: llm.chat.completion parent=2965e9d6\n", + " Span: llm.chat.completion parent=bd8208b9\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"1\": {\"agent\": \"web_researcher\", \"action\": \"search\", \"goal\": \"collect context\",\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: planner parent=c88df3f2\n", + " Span: planner parent=3d446653\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Create a JSON plan for: What is reinforcement learning?. Use web_researcher and \n", " inputs.user_query = What is reinforcement learning?\n", " param.planner_prompt = Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wik\n", " param.planner_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=fe8467c0\n", + " Span: llm.chat.completion parent=c17b2240\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"goto\": \"synthesizer\", \"query\": \"given step 1 of plan: {'agent': 'web_researche\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: executor parent=c88df3f2\n", + " Span: executor parent=3d446653\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Given step 1 of plan: {'agent': 'web_researcher', 'action': 'search', 'goal': 'c\n", " inputs.step = 1\n", @@ -896,7 +896,7 @@ " param.executor_prompt = Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,que\n", " param.executor_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=0b00b52b\n", + " Span: llm.chat.completion parent=8b9ef57d\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = Stub answer for: answer: what is reinforcement learning?\n", "context:\n", @@ -906,7 +906,7 @@ " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: synthesizer parent=c88df3f2\n", + " Span: synthesizer parent=3d446653\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Answer: What is reinforcement learning?\n", "Context:\n", @@ -919,7 +919,7 @@ "If asked for IDs, include Wikidata QIDs.\n", " param.synthesizer_prompt.trainable = True\n", "\n", - " Span: evaluator parent=c88df3f2\n", + " Span: evaluator parent=3d446653\n", " eval.reasons = \n", " eval.score = 0.25\n", "\n", @@ -985,7 +985,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 50, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.350295Z", @@ -1002,21 +1002,21 @@ "TGJ documents: 1\n", "\n", "ParameterNode (trainable): 3\n", - " QA_research_graph/0/planner_prompt2 trainable=True\n", - " QA_research_graph/0/executor_prompt2 trainable=True\n", - " QA_research_graph/0/synthesizer_prompt2 trainable=True\n", + " QA_research_graph/0/planner_prompt4 trainable=True\n", + " QA_research_graph/0/executor_prompt4 trainable=True\n", + " QA_research_graph/0/synthesizer_prompt4 trainable=True\n", "\n", - "Unique trainable params: ['executor_prompt2', 'planner_prompt2', 'synthesizer_prompt2']\n", + "Unique trainable params: ['executor_prompt4', 'planner_prompt4', 'synthesizer_prompt4']\n", "[OK] No duplicate ParameterNodes (C7).\n", "\n", "MessageNode: 7\n", - " QA_research_graph/0/planner7 parents=['lit_112', 'lit_60349', 'planner_prompt2']\n", - " QA_research_graph/0/llm.chat.completion21 parents=['planner7']\n", - " QA_research_graph/0/llm.chat.completion23 parents=['synthesizer7']\n", - " QA_research_graph/0/executor7 parents=['lit_21692', 'lit_9767', 'lit_603410', 'planner7', 'executor_prompt2']\n", - " QA_research_graph/0/llm.chat.completion22 parents=['executor7']\n", - " QA_research_graph/0/synthesizer7 parents=['lit_12883', 'lit_603411', 'executor7', 'synthesizer_prompt2']\n", - " QA_research_graph/0/evaluator7 parents=['synthesizer7']\n", + " QA_research_graph/0/planner14 parents=['lit_114', 'lit_603418', 'planner_prompt4']\n", + " QA_research_graph/0/llm.chat.completion42 parents=['planner14']\n", + " QA_research_graph/0/llm.chat.completion44 parents=['synthesizer14']\n", + " QA_research_graph/0/executor14 parents=['lit_21694', 'lit_97614', 'lit_603419', 'planner14', 'executor_prompt4']\n", + " QA_research_graph/0/llm.chat.completion43 parents=['executor14']\n", + " QA_research_graph/0/synthesizer14 parents=['lit_12886', 'lit_603420', 'executor14', 'synthesizer_prompt4']\n", + " QA_research_graph/0/evaluator14 parents=['synthesizer14']\n", "[WARN] No top-level message nodes found.\n" ] } @@ -1090,7 +1090,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 51, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.375448Z", @@ -1106,9 +1106,9 @@ "text": [ "Child LLM spans detected (via temporal_ignore): 3\n", "Top-level message nodes: 4\n", - " [OK] Node executor temporal parent → ab65eb61cb9e... (not a child span)\n", - " [OK] Node synthesizer temporal parent → 9f33c96e551b... (not a child span)\n", - " [OK] Node evaluator temporal parent → 8c1a42d01bc8... (not a child span)\n", + " [OK] Node executor temporal parent → a409e8991e44... (not a child span)\n", + " [OK] Node synthesizer temporal parent → 7241c782d5e7... (not a child span)\n", + " [OK] Node evaluator temporal parent → de7e4a824c31... (not a child span)\n", "\n", "[OK] Temporal chaining verified — no top-level node points to child spans.\n" ] @@ -1165,7 +1165,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 52, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.394844Z", @@ -1223,7 +1223,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 53, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.413969Z", @@ -1264,7 +1264,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 54, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.436041Z", @@ -1313,7 +1313,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 55, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.451868Z", @@ -1387,7 +1387,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 56, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.472683Z", @@ -1498,7 +1498,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 57, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.559993Z", @@ -1580,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.581005Z", @@ -1594,14 +1594,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "[RETRY] Smoke test rate-limited, waiting 10s...\n", - "[RETRY] Smoke test rate-limited, waiting 20s...\n", - "[WARN] Smoke test failed: Error code: 429 - {'error': {'message': 'Provider returned error', 'code': 429, 'metadata': {'raw': 'google/gemma-4-26b-a4b-it:free is temporarily rate-limited upstream. Please retry shortly, or add your own key to accumulate your rate limits: https://openrouter.ai/settings/integrations', 'provider_name': 'Google AI Studio', 'is_byok': False}}, 'user_id': 'user_2dVDFlopMDQSXGcvQ5rlYEaITIy'}\n", - " model=google/gemma-4-26b-a4b-it:free\n", - " base_url=https://openrouter.ai/api/v1\n", + "[OK] Live LLM smoke test passed: 'User says: \"Say hello in one word.\"'\n", "\n", "Live LLM ready (openai client -> https://openrouter.ai/api/v1)\n", - " model: google/gemma-4-26b-a4b-it:free\n" + " model: nvidia/nemotron-3-super-120b-a12b:free\n" ] } ], @@ -1672,7 +1668,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:07.609620Z", @@ -1689,10 +1685,18 @@ "============================================================\n", "LIVE LLM MODE (OpenRouter via openai client)\n", "============================================================\n", - " [RETRY] Rate-limited (429), waiting 10s... (attempt 1/5)\n", - " [RETRY] Rate-limited (429), waiting 20s... (attempt 2/5)\n", - " [RETRY] Rate-limited (429), waiting 40s... (attempt 3/5)\n", - " [RETRY] Rate-limited (429), waiting 80s... (attempt 4/5)\n" + "\n", + "Live answer (574 chars):\n", + " Gradient descent is a first‑order iterative optimization algorithm used to find a (local) minimum of a differentiable multivariate function. At each step it computes the gradient of the function at the current point and moves in the opposite direction—that is, along the direction of steepest descent\n", + "\n", + "Spans: 14 trace_ids=1 root_invoke=True\n", + " gen_ai.provider.name = openrouter\n", + " gen_ai.provider.name = openrouter\n", + " gen_ai.provider.name = openrouter\n", + " gen_ai.provider.name = openrouter\n", + " gen_ai.provider.name = openrouter\n", + "\n", + "[OK] Live LLM trace validated!\n" ] } ], @@ -1763,7 +1767,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:09.152911Z", @@ -1773,11 +1777,265 @@ } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "optimize_graph: running baseline ...\n", + "opto.trace.io.optimization: optimize_graph: running baseline ...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================================================\n", + "LIVE OPTIMIZATION (3 queries, 1 iteration)\n", + "============================================================\n", + " planner_prompt BEFORE: 'Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.'\n", + " Running baseline...\n", + " Query 1/3: Summarize the causes and key events of t... score=0.35\n", + " Query 2/3: Give 3 factual relationships about Tesla... score=0.6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "optimize_graph: running iteration 1 ...\n", + "opto.trace.io.optimization: optimize_graph: running iteration 1 ...\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "[SKIP] Live invocation failed — live optimization skipped.\n" + " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.85\n", + " Baseline average: 0.6000\n", + " Iteration 1/1...\n", + " Query 1/3: Summarize the causes and key events of t... score=0.35\n", + " Query 2/3: Give 3 factual relationships about Tesla... score=0.85\n", + " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.85\n", + " Iteration 1 average: 0.6833 * NEW BEST\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Optimizer step failed: litellm.APIError: APIError: OpenAIException - \n", + "Traceback (most recent call last):\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 725, in completion\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 669, in completion\n", + " final_response_obj = convert_to_model_response_object(\n", + " response_object=stringified_response,\n", + " model_response_object=model_response,\n", + " _response_headers=headers,\n", + " )\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py\", line 488, in convert_to_model_response_object\n", + " raise raised_exception\n", + "Exception\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1973, in completion\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1946, in completion\n", + " response = openai_chat_completions.completion(\n", + " model=model,\n", + " ...<15 lines>...\n", + " custom_llm_provider=custom_llm_provider,\n", + " )\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 736, in completion\n", + " raise OpenAIError(\n", + " ...<4 lines>...\n", + " )\n", + "litellm.llms.openai.common_utils.OpenAIError\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/user/code/Trace/opto/trace/io/optimization.py\", line 550, in optimize_graph\n", + " raw_updates = _optimizer.step()\n", + " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 236, in step\n", + " update_dict = self.propose(*args, **kwargs)\n", + " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 281, in propose\n", + " return self._step(*args, **kwargs)\n", + " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 616, in _step\n", + " response = self.call_llm(\n", + " system_prompt=system_prompt,\n", + " ...<2 lines>...\n", + " max_tokens=self.max_tokens,\n", + " )\n", + " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 674, in call_llm\n", + " response = self.llm(messages=messages, max_tokens=max_tokens, response_format=response_format)\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 84, in __call__\n", + " return self.model(*args, **kwargs)\n", + " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 297, in \n", + " return lambda *args, **kwargs: self._model(*args, **kwargs)\n", + " ~~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 282, in \n", + " return lambda *args, **kwargs: retry_with_exponential_backoff(\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", + " lambda: litellm.completion(model_name, *args, **kwargs),\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " ...<2 lines>...\n", + " operation_name=\"LiteLLM_completion\"\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " )\n", + " ^\n", + " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 77, in retry_with_exponential_backoff\n", + " raise e\n", + " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 23, in retry_with_exponential_backoff\n", + " return func()\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 283, in \n", + " lambda: litellm.completion(model_name, *args, **kwargs),\n", + " ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1332, in wrapper\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1207, in wrapper\n", + " result = original_function(*args, **kwargs)\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 3452, in completion\n", + " raise exception_type(\n", + " ~~~~~~~~~~~~~~^\n", + " model=model,\n", + " ^^^^^^^^^^^^\n", + " ...<3 lines>...\n", + " extra_kwargs=kwargs,\n", + " ^^^^^^^^^^^^^^^^^^^^\n", + " )\n", + " ^\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 2301, in exception_type\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 528, in exception_type\n", + " raise APIError(\n", + " ...<6 lines>...\n", + " )\n", + "litellm.exceptions.APIError: litellm.APIError: APIError: OpenAIException - \n", + "opto.trace.io.optimization: Optimizer step failed: litellm.APIError: APIError: OpenAIException - \n", + "Traceback (most recent call last):\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 725, in completion\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 669, in completion\n", + " final_response_obj = convert_to_model_response_object(\n", + " response_object=stringified_response,\n", + " model_response_object=model_response,\n", + " _response_headers=headers,\n", + " )\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py\", line 488, in convert_to_model_response_object\n", + " raise raised_exception\n", + "Exception\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1973, in completion\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1946, in completion\n", + " response = openai_chat_completions.completion(\n", + " model=model,\n", + " ...<15 lines>...\n", + " custom_llm_provider=custom_llm_provider,\n", + " )\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 736, in completion\n", + " raise OpenAIError(\n", + " ...<4 lines>...\n", + " )\n", + "litellm.llms.openai.common_utils.OpenAIError\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/user/code/Trace/opto/trace/io/optimization.py\", line 550, in optimize_graph\n", + " raw_updates = _optimizer.step()\n", + " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 236, in step\n", + " update_dict = self.propose(*args, **kwargs)\n", + " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 281, in propose\n", + " return self._step(*args, **kwargs)\n", + " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 616, in _step\n", + " response = self.call_llm(\n", + " system_prompt=system_prompt,\n", + " ...<2 lines>...\n", + " max_tokens=self.max_tokens,\n", + " )\n", + " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 674, in call_llm\n", + " response = self.llm(messages=messages, max_tokens=max_tokens, response_format=response_format)\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 84, in __call__\n", + " return self.model(*args, **kwargs)\n", + " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 297, in \n", + " return lambda *args, **kwargs: self._model(*args, **kwargs)\n", + " ~~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 282, in \n", + " return lambda *args, **kwargs: retry_with_exponential_backoff(\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", + " lambda: litellm.completion(model_name, *args, **kwargs),\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " ...<2 lines>...\n", + " operation_name=\"LiteLLM_completion\"\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " )\n", + " ^\n", + " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 77, in retry_with_exponential_backoff\n", + " raise e\n", + " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 23, in retry_with_exponential_backoff\n", + " return func()\n", + " File \"/home/user/code/Trace/opto/utils/llm.py\", line 283, in \n", + " lambda: litellm.completion(model_name, *args, **kwargs),\n", + " ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1332, in wrapper\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1207, in wrapper\n", + " result = original_function(*args, **kwargs)\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 3452, in completion\n", + " raise exception_type(\n", + " ~~~~~~~~~~~~~~^\n", + " model=model,\n", + " ^^^^^^^^^^^^\n", + " ...<3 lines>...\n", + " extra_kwargs=kwargs,\n", + " ^^^^^^^^^^^^^^^^^^^^\n", + " )\n", + " ^\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 2301, in exception_type\n", + " raise e\n", + " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 528, in exception_type\n", + " raise APIError(\n", + " ...<6 lines>...\n", + " )\n", + "litellm.exceptions.APIError: litellm.APIError: APIError: OpenAIException - \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\u001b[0m\n", + "LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.\n", + "\n", + "\n", + "\u001b[1;31mProvider List: https://docs.litellm.ai/docs/providers\u001b[0m\n", + "\n", + "LiteLLM_completion: Non-retryable error: litellm.APIError: APIError: OpenAIException - \n", + "\n", + " planner_prompt AFTER: 'Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.'\n", + " Baseline score: 0.6000\n", + " Best score: 0.6833\n", + " Score history: [0.6, 0.6833]\n", + " Total LLM calls: 29\n", + "\n", + "Iter Avg Score Best Score \n", + "------------------------------\n", + "0 0.6000 0.6000 \n", + "1 0.6833 0.6833 \n", + "\n", + " Live OTLP: 0 spans, 0 trace IDs, root_invoke=False\n" ] } ], @@ -1856,7 +2114,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": { "execution": { "iopub.execute_input": "2026-02-12T07:59:09.732598Z", @@ -1877,6 +2135,8 @@ " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/stub_all_traces.json (9 traces)\n", " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/stub_sample_tgj.json\n", " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/stub_summary.json\n", + " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/live_all_traces.json (6 traces)\n", + " /home/user/code/Trace/examples/notebooks/notebook_outputs/m1/live_summary.json\n", "\n", "All artifacts saved to: /home/user/code/Trace/examples/notebooks/notebook_outputs/m1\n" ] diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb new file mode 100644 index 00000000..a5993ee0 --- /dev/null +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb @@ -0,0 +1,137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LangGraph trace-native optimization demo\n", + "\n", + "Compact, deterministic demo for `backend=\"trace\"` without OTEL ingestion.\n" + ], + "id": "ec84920b" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langgraph.graph import StateGraph, START, END\n", + "from opto.trace import node\n", + "from opto.trace.io import instrument_graph, optimize_graph\n", + "\n", + "def _raw(value):\n", + " return getattr(value, 'data', value)\n" + ], + "id": "a6bb3b02" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "planner_prompt = node('Create a plan for: {query}', trainable=True, name='planner_prompt')\n", + "synth_prompt = node('Answer: {query}\\nPlan: {plan}', trainable=True, name='synth_prompt')\n", + "\n", + "def planner_node(state):\n", + " query = _raw(state['query'])\n", + " return {'query': str(query), 'plan': planner_prompt.data.replace('{query}', str(query))}\n", + "\n", + "def synth_node(state):\n", + " query = _raw(state['query'])\n", + " plan = _raw(state['plan'])\n", + " answer = synth_prompt.data.replace('{query}', str(query)).replace('{plan}', str(plan))\n", + " return {'final_answer': node(answer, name='final_answer_node')}\n", + "\n", + "def build_graph():\n", + " g = StateGraph(dict)\n", + " g.add_node('planner', planner_node)\n", + " g.add_node('synth', synth_node)\n", + " g.add_edge(START, 'planner')\n", + " g.add_edge('planner', 'synth')\n", + " g.add_edge('synth', END)\n", + " return g\n" + ], + "id": "68f6f76b" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ig = instrument_graph(\n", + " backend='trace',\n", + " graph_factory=build_graph,\n", + " scope=globals(),\n", + " graph_agents_functions=['planner_node', 'synth_node'],\n", + " graph_prompts_list=[planner_prompt, synth_prompt],\n", + " output_key='final_answer',\n", + ")\n", + "result = ig.invoke({'query': 'What is CRISPR?'})\n", + "assert 'CRISPR' in result['final_answer'].data\n", + "result['final_answer'].data\n" + ], + "id": "9cb6347f" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class NotebookOptimizer:\n", + " def __init__(self, prompt):\n", + " self.prompt = prompt\n", + " self.calls = 0\n", + "\n", + " def zero_feedback(self):\n", + " return None\n", + "\n", + " def backward(self, *_args, **_kwargs):\n", + " return None\n", + "\n", + " def step(self):\n", + " self.calls += 1\n", + " if self.calls == 1:\n", + " self.prompt._data = 'CRISPR optimized :: {query} :: {plan}'\n", + " return {'synth_prompt': self.prompt._data}\n", + " return {}\n", + "\n", + "def eval_fn(payload):\n", + " answer = str(payload['answer'])\n", + " return {\n", + " 'score': 1.0 if 'CRISPR optimized' in answer else 0.0,\n", + " 'feedback': 'Prefer mentioning CRISPR optimized explicitly.',\n", + " }\n", + "\n", + "opt = optimize_graph(\n", + " ig,\n", + " queries=['What is gene editing?'],\n", + " iterations=2,\n", + " optimizer=NotebookOptimizer(synth_prompt),\n", + " eval_fn=eval_fn,\n", + ")\n", + "assert opt.best_iteration == 2\n", + "assert opt.best_score == 1.0\n", + "assert opt.best_updates['synth_prompt'].startswith('CRISPR optimized')\n", + "opt.best_score, opt.best_updates\n" + ], + "id": "6f15abf5" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/opto/features/flows/compose.py b/opto/features/flows/compose.py index dff95bbc..f54b1881 100644 --- a/opto/features/flows/compose.py +++ b/opto/features/flows/compose.py @@ -169,7 +169,10 @@ def __init__(self, if llm is None: llm = LLM() - assert isinstance(llm, AbstractModel), f"{llm} must be an instance of AbstractModel" + if not isinstance(llm, AbstractModel) and not callable(llm): + raise TypeError( + f"{llm} must be an instance of AbstractModel or a callable" + ) self.llm = llm self.chat_history = ChatHistory() self.chat_history_on = chat_history_on diff --git a/opto/features/priority_search/utils.py b/opto/features/priority_search/utils.py index c61c81c2..ec86e8e2 100644 --- a/opto/features/priority_search/utils.py +++ b/opto/features/priority_search/utils.py @@ -71,7 +71,7 @@ def set_module_parameters(agent, update_dict): """ remapped_update_dict = remap_update_dict(agent, update_dict) # remap the update dict to the agent's parameters for k, v in remapped_update_dict.items(): - k._data = v # set the parameter's data to the value in the update_dict + k._set(v) # preserve Node-unwrapping semantics consistently def create_module_from_update_dict(agent, update_dict): """ Create a new agent from the update_dict. diff --git a/opto/optimizers/optimizer.py b/opto/optimizers/optimizer.py index 79b37370..eefa827f 100644 --- a/opto/optimizers/optimizer.py +++ b/opto/optimizers/optimizer.py @@ -295,7 +295,7 @@ def update(self, update_dict: Dict[ParameterNode, Any]): """ for p, d in update_dict.items(): if p.trainable: - p._data = d + p._set(d) def zero_feedback(self): """Clear accumulated feedback from all parameters. @@ -403,4 +403,4 @@ def __deepcopy__(self, memo): setattr(result, k, copy.deepcopy(v, memo)) else: setattr(result, k, v) # parameters is not copied, it is the original parameters - return result \ No newline at end of file + return result diff --git a/opto/trace/graph/__init__.py b/opto/trace/graph/__init__.py new file mode 100644 index 00000000..1e60ae64 --- /dev/null +++ b/opto/trace/graph/__init__.py @@ -0,0 +1,12 @@ +from opto.trace.graph.sidecars import GraphRunSidecar, OTELRunSidecar, GraphCandidateSnapshot +from opto.trace.graph.adapter import GraphAdapter, LangGraphAdapter +from opto.trace.graph.module import GraphModule + +__all__ = [ + "GraphRunSidecar", + "OTELRunSidecar", + "GraphCandidateSnapshot", + "GraphAdapter", + "LangGraphAdapter", + "GraphModule", +] diff --git a/opto/trace/graph/adapter.py b/opto/trace/graph/adapter.py new file mode 100644 index 00000000..05309996 --- /dev/null +++ b/opto/trace/graph/adapter.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +import contextlib +import inspect +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union + +from opto.trace import bundle, node +from opto.trace.bundle import FunModule, to_data +from opto.trace.graph.module import GraphModule +from opto.trace.graph.sidecars import GraphRunSidecar, OTELRunSidecar +from opto.trace.io.bindings import Binding +from opto.trace.io.graph_instrumentation import TraceGraph +from opto.trace.nodes import Node, ParameterNode + + +def _raw(value: Any) -> Any: + return getattr(value, "data", value) + + +def _normalize_named_callables( + targets: Union[None, List[str], List[Callable[..., Any]], Mapping[str, Callable[..., Any]]], + scope: Optional[Dict[str, Any]] = None, +) -> Dict[str, Callable[..., Any]]: + if targets is None: + return {} + if isinstance(targets, Mapping): + return dict(targets) + out: Dict[str, Callable[..., Any]] = {} + for item in targets: + if isinstance(item, str): + if scope is None or item not in scope: + raise KeyError(f"Function {item!r} not found in adapter scope") + out[item] = scope[item] + else: + out[getattr(item, "__name__", f"fn_{len(out)}")] = item + return out + + +def _as_parameter(name: str, value: Any) -> ParameterNode: + if isinstance(value, ParameterNode): + return value + if isinstance(value, Node): + return node(value, name=name, trainable=True) + return node(value, name=name, trainable=True) + + +@dataclass +class GraphAdapter: + graph_factory: Callable[..., Any] + backend: str = "trace" + bindings: Dict[str, Binding] = field(default_factory=dict) + service_name: str = "graph-adapter" + input_key: str = "query" + output_key: Optional[str] = None + + def build_graph(self, backend: Optional[str] = None): + raise NotImplementedError + + def invoke_runtime(self, state: Dict[str, Any], **kwargs: Any): + raise NotImplementedError + + def invoke_trace(self, state: Dict[str, Any], **kwargs: Any): + raise NotImplementedError + + def new_run_sidecar(self): + return GraphRunSidecar() + + def bindings_dict(self) -> Dict[str, Binding]: + return dict(self.bindings) + + def parameters(self) -> List[ParameterNode]: + raise NotImplementedError + + def as_module(self) -> GraphModule: + return GraphModule(self) + + def instrument(self, backend: Optional[str] = None, **kwargs: Any): + effective_backend = backend or self.backend + if effective_backend == "trace": + return TraceGraph( + graph=self, + parameters=self.parameters(), + bindings=self.bindings_dict(), + service_name=self.service_name, + input_key=self.input_key, + output_key=self.output_key, + ) + if effective_backend == "otel": + from opto.trace.io.instrumentation import instrument_graph + + merged = self.bindings_dict() + merged.update(kwargs.pop("bindings", {}) or {}) + graph = self.build_graph(backend="otel") + return instrument_graph( + graph=graph, + backend="otel", + bindings=merged, + service_name=self.service_name, + input_key=self.input_key, + output_key=self.output_key, + **kwargs, + ) + raise ValueError(f"Unsupported backend: {effective_backend!r}") + + +@dataclass +class LangGraphAdapter(GraphAdapter): + function_targets: Union[None, List[str], List[Callable[..., Any]], Mapping[str, Callable[..., Any]]] = None + prompt_targets: Optional[Mapping[str, Any]] = None + graph_knobs: Optional[Mapping[str, Any]] = None + scope: Optional[Dict[str, Any]] = None + train_graph_agents_functions: bool = True + + def __post_init__(self) -> None: + self.function_targets = _normalize_named_callables(self.function_targets, self.scope) + self.prompt_targets = {k: _as_parameter(k, v) for k, v in dict(self.prompt_targets or {}).items()} + self.graph_knobs = {k: _as_parameter(k, v) for k, v in dict(self.graph_knobs or {}).items()} + self._active_sidecar: Optional[GraphRunSidecar] = None + self._compiled_cache: Dict[Tuple[str, Tuple[Tuple[str, str], ...]], Any] = {} + self._original_functions = dict(self.function_targets) + self._traced_functions = { + name: (fn if isinstance(fn, FunModule) or hasattr(fn, "_fun") else bundle( + trainable=self.train_graph_agents_functions, + traceable_code=True, + allow_external_dependencies=True, + )(fn)) + for name, fn in self.function_targets.items() + } + for fn in self._original_functions.values(): + fn_globals = getattr(fn, "__globals__", {}) + for name, prompt in self.prompt_targets.items(): + fn_globals[name] = prompt + for name, knob in self.graph_knobs.items(): + fn_globals[name] = knob + self._build_bindings() + + def __getstate__(self): + state = self.__dict__.copy() + state["_active_sidecar"] = None + state["_compiled_cache"] = {} + return state + + def _build_bindings(self) -> None: + auto: Dict[str, Binding] = {} + for name, prompt in self.prompt_targets.items(): + auto[name] = Binding( + get=lambda p=prompt: p.data, + set=lambda v, p=prompt: p._set(v), + kind="prompt", + ) + for name, knob in self.graph_knobs.items(): + auto[name] = Binding( + get=lambda p=knob: p.data, + set=lambda v, p=knob: p._set(v), + kind="graph", + ) + for name, fn in self._traced_functions.items(): + if getattr(fn, "parameter", None) is not None: + code_param = fn.parameter + auto[f"__code_{name}"] = Binding( + get=lambda p=code_param: p.data, + set=lambda v, p=code_param: p._set(v), + kind="code", + ) + user = dict(self.bindings) + auto.update(user) + self.bindings = auto + + def parameters(self) -> List[ParameterNode]: + params: List[ParameterNode] = [] + params.extend(self.prompt_targets.values()) + params.extend(self.graph_knobs.values()) + for fn in self._traced_functions.values(): + if getattr(fn, "parameter", None) is not None: + params.append(fn.parameter) + try: + params.extend(list(fn.parameters())) + except Exception: + pass + out = [] + seen = set() + for p in params: + if id(p) in seen: + continue + seen.add(id(p)) + out.append(p) + return out + + def _knob_values(self) -> Dict[str, Any]: + return {k: _raw(v) for k, v in self.graph_knobs.items()} + + def _cache_key(self, backend: str): + return backend, tuple(sorted((k, repr(v)) for k, v in self._knob_values().items())) + + @contextlib.contextmanager + def _scope_override(self, overrides: Dict[str, Any]): + if not self.scope: + yield + return + backup = {k: self.scope[k] for k in overrides if k in self.scope} + self.scope.update(overrides) + try: + yield + finally: + for key in overrides: + if key in backup: + self.scope[key] = backup[key] + + def _merge_shadow(self, sidecar: GraphRunSidecar, runtime_out: Any, traced_out: Any) -> None: + if not isinstance(runtime_out, dict): + return + if isinstance(traced_out, Node) and isinstance(getattr(traced_out, "data", None), dict): + for key in runtime_out: + try: + sidecar.shadow_state[key] = traced_out[key] + except Exception: + sidecar.shadow_state[key] = node(runtime_out[key], name=key) + else: + for key, value in runtime_out.items(): + sidecar.shadow_state[key] = value if isinstance(value, Node) else node(value, name=key) + + def _trace_runtime_wrapper(self, name: str, traced_fn: FunModule): + def _wrapped(state: Dict[str, Any], *args: Any, **kwargs: Any): + if self._active_sidecar is None: + raise RuntimeError("Trace runtime wrapper called without active sidecar") + sidecar = self._active_sidecar + trace_state = dict(state) + for key, traced_value in sidecar.shadow_state.items(): + trace_state[key] = traced_value + traced_out = traced_fn(trace_state, *args, **kwargs) + runtime_out = to_data(traced_out) + sidecar.record_node_output(name, traced_out, runtime_out) + self._merge_shadow(sidecar, runtime_out, traced_out) + return runtime_out + + _wrapped.__name__ = name + return _wrapped + + def build_graph(self, backend: Optional[str] = None): + effective_backend = backend or self.backend + key = self._cache_key(effective_backend) + if key in self._compiled_cache: + return self._compiled_cache[key] + + if effective_backend == "trace": + fn_overrides = { + name: self._trace_runtime_wrapper(name, fn) + for name, fn in self._traced_functions.items() + } + elif effective_backend == "otel": + fn_overrides = dict(self._original_functions) + else: + raise ValueError(f"Unsupported backend: {effective_backend!r}") + + call_kwargs = dict(self._knob_values()) + sig = inspect.signature(self.graph_factory) + for name, fn in fn_overrides.items(): + if name in sig.parameters: + call_kwargs[name] = fn + + with self._scope_override({**fn_overrides, **call_kwargs}): + graph = self.graph_factory(**{k: v for k, v in call_kwargs.items() if k in sig.parameters}) + + compiled = graph.compile() if hasattr(graph, "compile") else graph + self._compiled_cache[key] = compiled + return compiled + + def invoke_runtime(self, state: Dict[str, Any], backend: Optional[str] = None, **kwargs: Any): + effective_backend = backend or self.backend + if effective_backend == "otel": + graph = self.build_graph(backend="otel") + result = graph.invoke(state, **kwargs) + sidecar = OTELRunSidecar() + sidecar.otlp = None + sidecar.tgj_docs = None + return result, sidecar + return self.invoke_trace(state, **kwargs) + + def invoke_trace(self, state: Dict[str, Any], **kwargs: Any): + sidecar = self.new_run_sidecar() + for key, value in state.items(): + sidecar.shadow_state[key] = value if isinstance(value, Node) else node(value, name=key) + + self._active_sidecar = sidecar + try: + graph = self.build_graph(backend="trace") + result = graph.invoke(state, **kwargs) + finally: + self._active_sidecar = None + + output_node = None + if self.output_key and self.output_key in sidecar.shadow_state: + output_node = sidecar.shadow_state[self.output_key] + elif isinstance(result, Node): + output_node = result + + if output_node is None and self.output_key and isinstance(result, dict): + output_value = result.get(self.output_key) + output_node = output_value if isinstance(output_value, Node) else node(output_value, name=self.output_key) + + sidecar.set_output(output_node, result) + return result, sidecar diff --git a/opto/trace/graph/module.py b/opto/trace/graph/module.py new file mode 100644 index 00000000..a3ac8646 --- /dev/null +++ b/opto/trace/graph/module.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Any, TYPE_CHECKING + +from opto.trace.modules import Module + +if TYPE_CHECKING: + from opto.trace.graph.adapter import GraphAdapter + + +class GraphModule(Module): + """Module view over a graph adapter.""" + + def __init__(self, adapter: "GraphAdapter"): + self.adapter = adapter + self._last_sidecar = None + + def forward(self, x: Any): + state = x if isinstance(x, dict) else {self.adapter.input_key: x} + _runtime, sidecar = self.adapter.invoke_trace(state) + self._last_sidecar = sidecar + if sidecar.output_node is None: + raise TypeError("GraphModule.forward expected sidecar.output_node to be set") + return sidecar.output_node + + def invoke(self, state: Any, **kwargs: Any) -> Any: + result, sidecar = self.adapter.invoke_runtime(state, **kwargs) + self._last_sidecar = sidecar + return result + + def parameters(self): + return self.adapter.parameters() + + def __getstate__(self): + state = self.__dict__.copy() + state["_last_sidecar"] = None + return state diff --git a/opto/trace/graph/sidecars.py b/opto/trace/graph/sidecars.py new file mode 100644 index 00000000..1ad3cda1 --- /dev/null +++ b/opto/trace/graph/sidecars.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class GraphRunSidecar: + """Per-run sidecar preserving optimization state alongside runtime outputs.""" + + node_outputs: Dict[str, Any] = field(default_factory=dict) + shadow_state: Dict[str, Any] = field(default_factory=dict) + output_node: Any | None = None + runtime_result: Any | None = None + + def record_node_output( + self, + node_name: str, + traced_output: Any, + runtime_value: Any = None, + ) -> None: + self.node_outputs[node_name] = traced_output + if runtime_value is not None and isinstance(runtime_value, dict): + self.shadow_state.update(runtime_value) + + def set_output(self, output_node: Any, runtime_result: Any) -> None: + self.output_node = output_node + self.runtime_result = runtime_result + + def clear(self) -> None: + self.node_outputs.clear() + self.shadow_state.clear() + self.output_node = None + self.runtime_result = None + + +@dataclass +class OTELRunSidecar: + """OTEL artefacts sidecar for a single graph run.""" + + otlp: Dict[str, Any] | None = None + tgj_docs: List[Dict[str, Any]] | None = None + + +@dataclass +class GraphCandidateSnapshot: + """Debug/introspection snapshot for graph candidate state.""" + + graph_knobs: Dict[str, Any] = field(default_factory=dict) + parameter_snapshot: Dict[str, Any] = field(default_factory=dict) + metadata: Dict[str, Any] = field(default_factory=dict) diff --git a/opto/trace/io/__init__.py b/opto/trace/io/__init__.py index 9f9aaf09..5fce8d8a 100644 --- a/opto/trace/io/__init__.py +++ b/opto/trace/io/__init__.py @@ -22,6 +22,7 @@ # -- high-level API -------------------------------------------------------- from opto.trace.io.instrumentation import instrument_graph, InstrumentedGraph +from opto.trace.io.graph_instrumentation import instrument_trace_graph, TraceGraph from opto.trace.io.optimization import ( optimize_graph, EvalResult, @@ -50,6 +51,22 @@ ) from opto.trace.io.otel_adapter import otlp_traces_to_trace_json from opto.trace.io.tgj_ingest import ingest_tgj, merge_tgj +try: + from opto.trace.graph import ( + GraphAdapter, + LangGraphAdapter, + GraphModule, + GraphRunSidecar, + OTELRunSidecar, + GraphCandidateSnapshot, + ) +except Exception: # pragma: no cover - optional/lazy import safety + GraphAdapter = None + LangGraphAdapter = None + GraphModule = None + GraphRunSidecar = None + OTELRunSidecar = None + GraphCandidateSnapshot = None __all__ = [ # High-level @@ -68,6 +85,8 @@ "record_genai_chat", # Data classes "InstrumentedGraph", + "instrument_trace_graph", + "TraceGraph", "RunResult", "OptimizationResult", # Lower-level @@ -81,3 +100,15 @@ "ingest_tgj", "merge_tgj", ] + +if GraphAdapter is not None: + __all__.extend( + [ + "GraphAdapter", + "LangGraphAdapter", + "GraphModule", + "GraphRunSidecar", + "OTELRunSidecar", + "GraphCandidateSnapshot", + ] + ) diff --git a/opto/trace/io/graph_instrumentation.py b/opto/trace/io/graph_instrumentation.py new file mode 100644 index 00000000..430c107d --- /dev/null +++ b/opto/trace/io/graph_instrumentation.py @@ -0,0 +1,155 @@ +"""Graph instrumentation helpers shared by multiple IO backends.""" + +from __future__ import annotations + +import inspect +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Optional + +from opto.trace import bundle, node +from opto.trace.bundle import FunModule + + +@dataclass +class TraceGraph: + """Trace-native instrumented graph wrapper. + + This backend rebuilds a graph after rebinding selected node functions with + ``trace.bundle()``. Graph execution then emits native Trace nodes directly, + without relying on OTEL as the primary optimization carrier. + """ + + graph: Any + parameters: List[Any] = field(default_factory=list) + bindings: Dict[str, Any] = field(default_factory=dict) + service_name: str = "langgraph-trace" + input_key: str = "query" + output_key: Optional[str] = None + backend: str = "trace" + _last_sidecar: Any = field(default=None, repr=False, init=False) + + def invoke(self, state: Any, **kwargs: Any) -> Any: + if hasattr(self.graph, "invoke_runtime"): + result, sidecar = self.graph.invoke_runtime(state, backend="trace", **kwargs) + self._last_sidecar = sidecar + return result + return self.graph.invoke(state, **kwargs) + + def stream(self, state: Any, **kwargs: Any): + yield from self.graph.stream(state, **kwargs) + + +def _dedupe_identity(values: List[Any]) -> List[Any]: + seen = set() + out = [] + for value in values: + obj_id = id(value) + if obj_id in seen: + continue + seen.add(obj_id) + out.append(value) + return out + + +def _to_funmodule( + func: Any, + *, + trainable: bool = True, + traceable_code: bool = True, + allow_external_dependencies: bool = True, +) -> Any: + if isinstance(func, FunModule) or hasattr(func, "_fun"): + return func + + wrapped = bundle( + trainable=trainable, + traceable_code=traceable_code, + allow_external_dependencies=allow_external_dependencies, + )(func) + + try: + wrapped.__signature__ = inspect.signature(wrapped._fun) + except Exception: + # Signature metadata is nice-to-have only. + pass + + return wrapped + + +def _replace_scope_object(scope: Dict[str, Any], old_obj: Any, new_obj: Any) -> bool: + replaced = False + for key, value in list(scope.items()): + if value is old_obj: + scope[key] = new_obj + replaced = True + return replaced + + +def instrument_trace_graph( + graph_factory: Callable[[], Any], + *, + scope: Dict[str, Any], + graph_agents_functions: List[str], + graph_prompts_list: Optional[List[Any]] = None, + train_graph_agents_functions: bool = True, + service_name: str = "langgraph-trace", + input_key: str = "query", + output_key: Optional[str] = None, +) -> TraceGraph: + """Instrument a graph in trace-native mode. + + The graph factory is rebuilt *after* rebinding selected functions in scope + with ``trace.bundle()``. Prompt objects can be passed as already-trace nodes + or raw objects that are replaced in the given scope by identity. + """ + if scope is None: + raise ValueError("backend='trace' requires scope=globals() or equivalent") + if not callable(graph_factory): + raise ValueError("backend='trace' requires a callable graph_factory") + + parameters: List[Any] = [] + + for name in graph_agents_functions: + if name not in scope: + raise KeyError(f"Function '{name}' not found in scope") + fn = scope[name] + if fn is None or not callable(fn): + raise ValueError(f"Function '{name}' is not callable: {fn!r}") + + wrapped = _to_funmodule( + fn, + trainable=train_graph_agents_functions, + traceable_code=True, + allow_external_dependencies=True, + ) + scope[name] = wrapped + if hasattr(wrapped, "parameters"): + parameters.extend(list(wrapped.parameters())) + + if graph_prompts_list: + for idx, prompt in enumerate(list(graph_prompts_list)): + if hasattr(prompt, "data") and hasattr(prompt, "name"): + parameters.append(prompt) + continue + + new_prompt = node(str(getattr(prompt, "data", prompt)), trainable=True) + if not _replace_scope_object(scope, prompt, new_prompt): + raise ValueError( + "Prompt object was not found in scope by identity. Pass a trace " + "node prompt or provide scope that contains the prompt object." + ) + + graph_prompts_list[idx] = new_prompt + parameters.append(new_prompt) + + graph = graph_factory() + compiled = graph.compile() if hasattr(graph, "compile") else graph + + return TraceGraph( + graph=compiled, + parameters=_dedupe_identity(parameters), + bindings={}, + service_name=service_name, + input_key=input_key, + output_key=output_key, + ) diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py index 80a049cd..b533ebba 100644 --- a/opto/trace/io/instrumentation.py +++ b/opto/trace/io/instrumentation.py @@ -13,9 +13,10 @@ import logging from contextlib import contextmanager from dataclasses import dataclass, field -from typing import Any, Callable, Dict, Iterator, Optional, Set +from typing import Any, Callable, Dict, Iterator, List, Optional, Set from opto.trace.io.bindings import Binding, make_dict_binding +from opto.trace.io.graph_instrumentation import instrument_trace_graph from opto.trace.io.langgraph_otel_runtime import TracingLLM from opto.trace.io.telemetry_session import TelemetrySession @@ -106,6 +107,13 @@ def stream(self, state: Any, **kwargs: Any) -> Iterator[Dict[str, Any]]: def instrument_graph( graph: Any = None, *, + adapter: Optional[Any] = None, + backend: str = "otel", + graph_factory: Optional[Callable[[], Any]] = None, + scope: Optional[Dict[str, Any]] = None, + graph_agents_functions: Optional[List[str]] = None, + graph_prompts_list: Optional[List[Any]] = None, + train_graph_agents_functions: bool = True, session: Optional[TelemetrySession] = None, service_name: str = "langgraph-agent", trainable_keys: Optional[Set[str]] = None, @@ -119,7 +127,7 @@ def instrument_graph( llm_span_name: str = "llm.chat.completion", input_key: str = "query", output_key: Optional[str] = None, -) -> InstrumentedGraph: +) -> Any: """Wrap a LangGraph with automatic OTEL instrumentation. Parameters @@ -164,6 +172,71 @@ def instrument_graph( ------- InstrumentedGraph """ + try: + from opto.trace.graph.adapter import GraphAdapter + except Exception: + GraphAdapter = None + + if adapter is not None: + if GraphAdapter is not None and not isinstance(adapter, GraphAdapter): + raise TypeError("adapter must be an instance of GraphAdapter") + return adapter.instrument( + backend=backend, + service_name=service_name, + input_key=input_key, + output_key=output_key, + session=session, + trainable_keys=trainable_keys, + enable_code_optimization=enable_code_optimization, + llm=llm, + emit_genai_child_spans=emit_genai_child_spans, + bindings=bindings, + in_place=in_place, + initial_templates=initial_templates, + provider_name=provider_name, + llm_span_name=llm_span_name, + ) + + if GraphAdapter is not None and isinstance(graph, GraphAdapter): + return graph.instrument( + backend=backend, + service_name=service_name, + input_key=input_key, + output_key=output_key, + session=session, + trainable_keys=trainable_keys, + enable_code_optimization=enable_code_optimization, + llm=llm, + emit_genai_child_spans=emit_genai_child_spans, + bindings=bindings, + in_place=in_place, + initial_templates=initial_templates, + provider_name=provider_name, + llm_span_name=llm_span_name, + ) + + if backend == "trace": + if graph_factory is None: + if callable(graph): + graph_factory = graph + else: + raise ValueError( + "backend='trace' requires graph_factory or a callable graph" + ) + return instrument_trace_graph( + graph_factory, + scope=scope, + graph_agents_functions=list(graph_agents_functions or []), + graph_prompts_list=graph_prompts_list, + train_graph_agents_functions=train_graph_agents_functions, + service_name=service_name, + input_key=input_key, + output_key=output_key, + ) + + if backend != "otel": + raise ValueError("Unsupported backend. Expected 'otel' or 'trace'.") + # -- compile graph if needed -- compiled = graph if graph is not None and hasattr(graph, "compile"): diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index 0d8fc774..db97ddd6 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -21,11 +21,13 @@ Callable, Dict, List, + Tuple, Optional, Union, ) from opto.trace.io.bindings import Binding, apply_updates +from opto.trace.io.graph_instrumentation import TraceGraph from opto.trace.io.instrumentation import InstrumentedGraph logger = logging.getLogger(__name__) @@ -239,13 +241,23 @@ def _select_output_node(nodes: dict) -> Any: return top_level[-1] +def _batchify_items(*items: Any) -> Any: + """Build a batched Trace node payload without importing trainer packages.""" + from opto.trace import node + + output = "" + for i, item in enumerate(items): + output += f"ID {[i]}: {item}\n" + return node(output, name="batch_output") + + # --------------------------------------------------------------------------- # optimize_graph # --------------------------------------------------------------------------- def optimize_graph( - graph: InstrumentedGraph, + graph: Any, queries: Union[List[str], List[Dict[str, Any]]], *, iterations: int = 5, @@ -305,6 +317,18 @@ def optimize_graph( ------- OptimizationResult """ + if getattr(graph, "backend", None) == "trace": + return _optimize_trace_graph( + graph, + queries=queries, + iterations=iterations, + optimizer=optimizer, + optimizer_kwargs=optimizer_kwargs, + eval_fn=eval_fn, + output_key=output_key, + on_iteration=on_iteration, + ) + # Resolve bindings / templates effective_bindings = bindings or graph.bindings if initial_templates: @@ -354,8 +378,7 @@ def _ensure_trace_imports(): except ImportError: _GraphPropagator = None if _batchify is None: - from opto.trainer.algorithms.basic_algorithms import batchify - _batchify = batchify + _batchify = _batchify_items def _ensure_optimizer(param_nodes): nonlocal _optimizer @@ -586,3 +609,128 @@ def _make_state(query: Any) -> Dict[str, Any]: score_history=score_history, all_runs=all_runs, ) + + +def _optimize_trace_graph( + graph: TraceGraph, + *, + queries: Union[List[str], List[Dict[str, Any]]], + iterations: int = 5, + optimizer: Optional[Any] = None, + optimizer_kwargs: Optional[Dict[str, Any]] = None, + eval_fn: Optional[EvalFn] = None, + output_key: Optional[str] = None, + on_iteration: Optional[Callable[[int, List[RunResult], Dict[str, Any]], None]] = None, +) -> OptimizationResult: + from opto.optimizers.optoprime_v2 import OptoPrimeV2 + from opto.trace.nodes import Node + + if eval_fn is None: + raise ValueError("backend='trace' requires an explicit eval_fn") + + key = output_key or getattr(graph, "output_key", None) + score_history: List[float] = [] + all_runs: List[List[RunResult]] = [] + best_score = float("-inf") + best_iteration = 0 + best_updates: Dict[str, Any] = {} + last_applied_updates: Dict[str, Any] = {} + + def _snapshot(parameters: List[Any]) -> Dict[str, Any]: + snapshot: Dict[str, Any] = {} + for p in parameters: + snapshot[getattr(p, "name", repr(p))] = getattr(p, "data", None) + return snapshot + + best_parameters = _snapshot(graph.parameters) + opt = optimizer or OptoPrimeV2(parameters=list(graph.parameters), **dict(optimizer_kwargs or {})) + + def _extract_output(result: Any, sidecar: Any = None) -> Tuple[Any, Any]: + if sidecar is not None and getattr(sidecar, "output_node", None) is not None: + output_node = sidecar.output_node + return output_node, getattr(output_node, "data", output_node) + answer = result.get(key, result) if (key and isinstance(result, dict)) else result + if key and isinstance(answer, Node) and isinstance(getattr(answer, "data", None), dict): + answer = answer.data.get(key, answer) + if isinstance(answer, Node): + return answer, getattr(answer, "data", answer) + raise TypeError( + "trace backend requires the graph result (or result[output_key]) to be a Trace Node" + ) + + for iteration in range(iterations + 1): + state_parameters = _snapshot(graph.parameters) + applied_updates_for_iteration = dict(last_applied_updates) + runs: List[RunResult] = [] + output_nodes: List[Any] = [] + updates: Dict[str, Any] = {} + + for query in queries: + state = query if isinstance(query, dict) else {graph.input_key: query} + result = graph.invoke(state) + sidecar = getattr(graph, "_last_sidecar", None) + output_node, answer = _extract_output(result, sidecar=sidecar) + er = _normalise_eval( + eval_fn( + { + "query": query, + "answer": answer, + "result": result, + "iteration": iteration, + } + ) + ) + runs.append( + RunResult( + answer=answer, + score=er.score, + feedback=er.feedback, + metrics=er.metrics, + otlp={}, + ) + ) + output_nodes.append(output_node) + + avg_score = sum((run.score or 0.0) for run in runs) / max(1, len(runs)) + score_history.append(avg_score) + all_runs.append(runs) + + if avg_score > best_score: + best_score = avg_score + best_iteration = iteration + best_parameters = state_parameters + best_updates = dict(applied_updates_for_iteration) if iteration > 0 else {} + + if iteration > 0 and output_nodes: + opt.zero_feedback() + if len(output_nodes) == 1: + opt.backward(output_nodes[0], runs[0].feedback or f"Score: {runs[0].score}") + else: + target = _batchify_items(*output_nodes) + feedback = _batchify_items( + *[(r.feedback or f"Score: {r.score}") for r in runs] + ).data + opt.backward(target, feedback) + raw_updates = opt.step() + if isinstance(raw_updates, dict): + updates = raw_updates + if getattr(graph, "bindings", None) and all(isinstance(k, str) for k in raw_updates): + last_applied_updates = apply_updates(raw_updates, graph.bindings, strict=False) + else: + last_applied_updates = dict(raw_updates) + else: + last_applied_updates = {} + + if on_iteration: + on_iteration(iteration, runs, updates) + + return OptimizationResult( + baseline_score=score_history[0], + best_score=best_score, + best_iteration=best_iteration, + best_parameters=best_parameters, + best_updates=best_updates, + final_parameters=_snapshot(graph.parameters), + score_history=score_history, + all_runs=all_runs, + ) diff --git a/opto/trainer/algorithms/__init__.py b/opto/trainer/algorithms/__init__.py index 09333a7f..4d2b11fc 100644 --- a/opto/trainer/algorithms/__init__.py +++ b/opto/trainer/algorithms/__init__.py @@ -1,4 +1,10 @@ from opto.trainer.algorithms.algorithm import Trainer -from opto.trainer.algorithms.basic_algorithms import Minibatch, MinibatchAlgorithm, BasicSearchAlgorithm +from opto.trainer.algorithms.basic_algorithms import ( + Minibatch, + MinibatchAlgorithm, + BasicSearchAlgorithm, + MinibatchCurriculumAccumulationCommonFeedbackAlgorithm, + BasicSearchCurriculumAccumulationCommonFeedbackAlgorithm, +) from opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm diff --git a/tests/features_tests/test_flows_compose.py b/tests/features_tests/test_flows_compose.py index 65e51cba..fab168ba 100644 --- a/tests/features_tests/test_flows_compose.py +++ b/tests/features_tests/test_flows_compose.py @@ -76,3 +76,25 @@ def test_tracedllm_and_optoprimev2_prompt_with_mock_llm(mock_llm_globally): assert "Your response:" in part2 print(part2) + +def test_tracedllm_accepts_callable_llm(): + from opto.features.flows.compose import TracedLLM + + class _Choice: + def __init__(self, content): + self.message = type("m", (), {"content": content}) + + def llm_callable(*args, **kwargs): + return type("r", (), {"choices": [_Choice("callable response")]}) + + traced_llm = TracedLLM(system_prompt="test", llm=llm_callable) + output = traced_llm("hello") + assert output.data == "callable response" + + +def test_tracedllm_rejects_non_callable_non_abstract_model(): + from opto.features.flows.compose import TracedLLM + + with pytest.raises(TypeError, match="AbstractModel or a callable"): + TracedLLM(system_prompt="test", llm=object()) + diff --git a/tests/features_tests/test_graph_module_prioritysearch.py b/tests/features_tests/test_graph_module_prioritysearch.py new file mode 100644 index 00000000..481959de --- /dev/null +++ b/tests/features_tests/test_graph_module_prioritysearch.py @@ -0,0 +1,10 @@ +import pytest + +# PrioritySearch integration is covered by graph_module_train smoke. +# This file exists to keep a dedicated feature test entry point. + +pytest.importorskip("langgraph.graph") + + +def test_prioritysearch_entrypoint_smoke(): + assert True diff --git a/tests/features_tests/test_graph_module_train.py b/tests/features_tests/test_graph_module_train.py new file mode 100644 index 00000000..73cb3b66 --- /dev/null +++ b/tests/features_tests/test_graph_module_train.py @@ -0,0 +1,122 @@ +import pytest + +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END + +from opto.features.priority_search import PrioritySearch +from opto.optimizers.optimizer import Optimizer +from opto.trace import node +from opto.trace.graph import LangGraphAdapter +from opto.trainer import train +from opto.trainer.guide import Guide + + +def _raw(x): + return getattr(x, "data", x) + + +class KeywordGuide(Guide): + def get_feedback(self, query, response, reference=None, **kwargs): + score = 1.0 if str(reference) in str(response) else 0.0 + return score, f"Expected keyword: {reference}" + + +class MutatingOptimizer(Optimizer): + def __init__(self, parameters): + super().__init__(parameters) + self.calls = 0 + + def _step(self, *args, **kwargs): + self.calls += 1 + updates = {p: p.data for p in self.parameters} + for p in self.parameters: + if p.name == "synth_prompt" and self.calls == 1: + updates[p] = "CRISPR optimized :: {query} :: {plan}" + return updates + + +class RouteOptimizer(Optimizer): + def _step(self, *args, **kwargs): + updates = {p: p.data for p in self.parameters} + for p in self.parameters: + if p.name == "route_policy": + updates[p] = "review" + return updates + + +def build_adapter(): + planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") + synth_prompt = node("Answer: {query} :: {plan}", trainable=True, name="synth_prompt") + + def planner_node(state): + query = _raw(state["query"]) + return {"plan": planner_prompt.data.replace("{query}", str(query))} + + def synth_node(state): + query = _raw(state["query"]) + plan = _raw(state["plan"]) + route = _raw(state.get("route_policy", "direct")) + if route == "review": + return {"final_answer": f"Reviewed CRISPR :: {query}"} + return {"final_answer": synth_prompt.data.replace("{query}", str(query)).replace("{plan}", str(plan))} + + def build_graph(planner_node=planner_node, synth_node=synth_node, route_policy="direct"): + graph = StateGraph(dict) + graph.add_node("planner", planner_node) + graph.add_node("synth", synth_node) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + return LangGraphAdapter( + backend="trace", + graph_factory=build_graph, + function_targets={"planner_node": planner_node, "synth_node": synth_node}, + prompt_targets={"planner_prompt": planner_prompt, "synth_prompt": synth_prompt}, + graph_knobs={"route_policy": "direct"}, + input_key="query", + output_key="final_answer", + ) + + +def test_graphmodule_is_train_compatible(): + adapter = build_adapter() + model = adapter.as_module() + guide = KeywordGuide() + optimizer = MutatingOptimizer(model.parameters()) + algo = train( + model=model, + train_dataset={"inputs": ["gene editing"], "infos": ["CRISPR"]}, + algorithm="MinibatchAlgorithm", + optimizer=optimizer, + guide=guide, + num_epochs=1, + batch_size=1, + ) + assert algo is not None + out = model("gene editing") + assert isinstance(out.data, str) + + +def test_graphmodule_prioritysearch_smoke_for_graph_knob(): + adapter = build_adapter() + model = adapter.as_module() + guide = KeywordGuide() + optimizer = RouteOptimizer(model.parameters()) + algo = PrioritySearch(model, optimizer, num_threads=1) + result = algo.train( + guide=guide, + train_dataset={"inputs": ["gene editing"], "infos": ["Reviewed"]}, + num_epochs=1, + batch_size=1, + num_batches=1, + num_candidates=1, + num_proposals=1, + validate_exploration_candidates=True, + ) + assert result is None or isinstance(result, tuple) + out = model("gene editing") + assert isinstance(out.data, str) diff --git a/tests/features_tests/test_langgraph_notebooks.py b/tests/features_tests/test_langgraph_notebooks.py new file mode 100644 index 00000000..aafa5daf --- /dev/null +++ b/tests/features_tests/test_langgraph_notebooks.py @@ -0,0 +1,31 @@ +from pathlib import Path +from unittest.mock import patch + +import nbformat +import pytest + +nbclient = pytest.importorskip("nbclient") +pytest.importorskip("langgraph.graph") + + +@pytest.mark.parametrize( + "notebook_path", + [ + "examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb", + "examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb", + ], +) +def test_notebook_executes(notebook_path): + root = Path(__file__).resolve().parents[2] + with (root / notebook_path).open("r", encoding="utf-8") as f: + nb = nbformat.read(f, as_version=4) + + client = nbclient.NotebookClient( + nb, + timeout=180, + kernel_name="python3", + resources={"metadata": {"path": str(root)}}, + ) + # Force notebook live-provider sections to skip for deterministic CI runs. + with patch.dict("os.environ", {"OPENROUTER_API_KEY": ""}, clear=False): + client.execute() diff --git a/tests/features_tests/test_trace_graph_optimization.py b/tests/features_tests/test_trace_graph_optimization.py new file mode 100644 index 00000000..ed3df753 --- /dev/null +++ b/tests/features_tests/test_trace_graph_optimization.py @@ -0,0 +1,243 @@ +import pytest + +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END + +from opto.trace import node +from opto.trace.io import instrument_graph, optimize_graph + +_TRACE_SCOPE = {} + + +def _raw(value): + return getattr(value, "data", value) + + +def planner_node(state): + query = _raw(state["query"]) + template = _raw(_TRACE_SCOPE["planner_prompt"]) + return { + "query": str(query), + "plan": str(template).replace("{query}", str(query)), + } + + +def synth_node(state): + query = _raw(state["query"]) + plan = _raw(state["plan"]) + template = _raw(_TRACE_SCOPE["synth_prompt"]) + answer = str(template).replace("{query}", str(query)).replace("{plan}", str(plan)) + return {"final_answer": node(answer, name="final_answer_node")} + + +def bad_planner_node(state): + query = _raw(state["query"]) + return {"query": str(query), "plan": "bad-plan"} + + +def bad_synth_node(state): + query = _raw(state["query"]) + return {"final_answer": f"plain-text answer for {query}"} + + +def _make_trace_graph(): + planner_prompt = node( + "Create a plan for: {query}", + trainable=True, + name="planner_prompt", + ) + synth_prompt = node( + "Answer: {query}\nPlan: {plan}", + trainable=True, + name="synth_prompt", + ) + + scope = { + "planner_prompt": planner_prompt, + "synth_prompt": synth_prompt, + "planner_node": planner_node, + "synth_node": synth_node, + } + _TRACE_SCOPE.clear() + _TRACE_SCOPE.update(scope) + + def build_graph(): + graph = StateGraph(dict) + graph.add_node("planner", scope["planner_node"]) + graph.add_node("synth", scope["synth_node"]) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + return build_graph, scope + + +def _make_bad_trace_graph(): + scope = { + "planner_node": bad_planner_node, + "synth_node": bad_synth_node, + } + + def build_graph(): + graph = StateGraph(dict) + graph.add_node("planner", scope["planner_node"]) + graph.add_node("synth", scope["synth_node"]) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + return build_graph, scope + + +class MutatingOptimizer: + def __init__(self, prompt_node): + self.prompt_node = prompt_node + self.zero_calls = 0 + self.backward_calls = 0 + self.step_calls = 0 + + def zero_feedback(self): + self.zero_calls += 1 + + def backward(self, *_args, **_kwargs): + self.backward_calls += 1 + + def step(self): + self.step_calls += 1 + if self.step_calls == 1: + self.prompt_node._data = "CRISPR optimized :: {query} :: {plan}" + return {"synth_prompt": self.prompt_node._data} + return {} + + +class BatchSpyOptimizer: + def __init__(self): + self.saw_batched_output = False + self.feedback_len = None + + def zero_feedback(self): + return None + + def backward(self, output, feedback): + output_data = getattr(output, "data", None) + if ( + isinstance(output_data, str) + and "ID [0]:" in output_data + and "ID [1]:" in output_data + ): + self.saw_batched_output = True + if isinstance(feedback, str): + self.feedback_len = feedback.count("ID [") + + def step(self): + return {} + + +def test_optimize_graph_trace_backend_reports_progress_and_best_updates(): + build_graph, scope = _make_trace_graph() + graph = instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[scope["planner_prompt"], scope["synth_prompt"]], + output_key="final_answer", + ) + + callbacks = [] + optimizer = MutatingOptimizer(scope["synth_prompt"]) + + result = optimize_graph( + graph, + queries=["What is gene editing?"], + iterations=2, + optimizer=optimizer, + eval_fn=lambda payload: { + "score": 1.0 if "CRISPR optimized" in str(payload["answer"]) else 0.0, + "feedback": "Prefer mentioning CRISPR optimized explicitly.", + }, + on_iteration=lambda i, runs, updates: callbacks.append((i, len(runs), dict(updates))), + ) + + assert result.baseline_score == 0.0 + assert result.best_score == 1.0 + assert result.best_iteration == 2 + assert result.best_updates == {"synth_prompt": "CRISPR optimized :: {query} :: {plan}"} + assert optimizer.zero_calls == 2 + assert optimizer.backward_calls == 2 + assert optimizer.step_calls == 2 + assert callbacks == [ + (0, 1, {}), + (1, 1, {"synth_prompt": "CRISPR optimized :: {query} :: {plan}"}), + (2, 1, {}), + ] + + +def test_optimize_graph_trace_backend_batches_multiple_queries(): + build_graph, scope = _make_trace_graph() + graph = instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[scope["planner_prompt"], scope["synth_prompt"]], + output_key="final_answer", + ) + + optimizer = BatchSpyOptimizer() + result = optimize_graph( + graph, + queries=["Q1", "Q2"], + iterations=1, + optimizer=optimizer, + eval_fn=lambda payload: { + "score": 0.5, + "feedback": "Keep answers short.", + }, + ) + + assert len(result.all_runs[0]) == 2 + assert optimizer.saw_batched_output is True + assert optimizer.feedback_len == 2 + + +def test_optimize_graph_trace_requires_eval_fn(): + build_graph, scope = _make_trace_graph() + graph = instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[scope["planner_prompt"], scope["synth_prompt"]], + output_key="final_answer", + ) + + with pytest.raises(ValueError, match="eval_fn"): + optimize_graph(graph, queries=["hi"], iterations=0) + + +def test_optimize_graph_trace_requires_node_output(): + build_graph, scope = _make_bad_trace_graph() + graph = instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + output_key="final_answer", + ) + + with pytest.raises(TypeError, match="Trace Node"): + optimize_graph( + graph, + queries=["What is CRISPR?"], + iterations=1, + optimizer=BatchSpyOptimizer(), + eval_fn=lambda payload: { + "score": 0.0, + "feedback": "This should not be reached.", + }, + ) diff --git a/tests/unit_tests/test_graph_adapter_modulecandidate.py b/tests/unit_tests/test_graph_adapter_modulecandidate.py new file mode 100644 index 00000000..6f0c7771 --- /dev/null +++ b/tests/unit_tests/test_graph_adapter_modulecandidate.py @@ -0,0 +1,70 @@ +import pytest + +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END + +from opto.features.priority_search.priority_search import ModuleCandidate +from opto.optimizers.optimizer import Optimizer +from opto.trace import node +from opto.trace.graph import LangGraphAdapter + + +def _raw(x): + return getattr(x, "data", x) + + +class DummyOptimizer(Optimizer): + def _step(self, *args, **kwargs): + return {p: p.data for p in self.parameters} + + +def make_searchable_model(): + answer_prompt = node("Base: {query}", trainable=True, name="answer_prompt") + + def planner_node(state): + return {"plan": "draft"} + + def synth_node(state): + query = _raw(state["query"]) + route = _raw(state.get("route_policy", "direct")) + if route == "review": + return {"final_answer": f"Reviewed :: {query}"} + return {"final_answer": answer_prompt.data.replace("{query}", str(query))} + + def build_graph(planner_node=planner_node, synth_node=synth_node, route_policy="direct"): + graph = StateGraph(dict) + graph.add_node("planner", planner_node) + graph.add_node("synth", synth_node) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + adapter = LangGraphAdapter( + backend="trace", + graph_factory=build_graph, + function_targets={"planner_node": planner_node, "synth_node": synth_node}, + prompt_targets={"answer_prompt": answer_prompt}, + graph_knobs={"route_policy": "direct"}, + input_key="query", + output_key="final_answer", + ) + return adapter.as_module() + + +def test_modulecandidate_get_module_works_with_graphmodule(): + model = make_searchable_model() + optimizer = DummyOptimizer(model.parameters()) + route_param = next(p for p in model.parameters() if "route_policy" in p.name) + candidate = ModuleCandidate( + model, + update_dict={route_param: "review"}, + optimizer=optimizer, + ) + new_model = candidate.get_module() + assert getattr(new_model.adapter, "_active_sidecar", None) is None + assert getattr(new_model.adapter, "_compiled_cache", {}) == {} + out = new_model("What is CRISPR?") + assert isinstance(out.data, str) diff --git a/tests/unit_tests/test_graph_adapter_trace.py b/tests/unit_tests/test_graph_adapter_trace.py new file mode 100644 index 00000000..1a07fece --- /dev/null +++ b/tests/unit_tests/test_graph_adapter_trace.py @@ -0,0 +1,100 @@ +import pytest + +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END + +from opto.trace import node +from opto.trace.graph import GraphModule, GraphRunSidecar, LangGraphAdapter + + +def _raw(x): + return getattr(x, "data", x) + + +def _collect_ancestors(n): + seen = set() + stack = [n] + out = [] + while stack: + cur = stack.pop() + if id(cur) in seen: + continue + seen.add(id(cur)) + out.append(cur) + for parent in getattr(cur, "parents", []): + stack.append(parent) + return out + + +def make_adapter(): + planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") + synth_prompt = node("Answer: {query} :: {plan}", trainable=True, name="synth_prompt") + + def planner_node(state): + query = _raw(state["query"]) + return {"plan": planner_prompt.data.replace("{query}", str(query))} + + def synth_node(state): + query = _raw(state["query"]) + plan = _raw(state["plan"]) + answer = synth_prompt.data.replace("{query}", str(query)).replace("{plan}", str(plan)) + return {"final_answer": answer} + + def build_graph(planner_node=planner_node, synth_node=synth_node, route_policy="direct"): + graph = StateGraph(dict) + graph.add_node("planner", planner_node) + graph.add_node("synth", synth_node) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + return LangGraphAdapter( + backend="trace", + graph_factory=build_graph, + function_targets={"planner_node": planner_node, "synth_node": synth_node}, + prompt_targets={"planner_prompt": planner_prompt, "synth_prompt": synth_prompt}, + graph_knobs={"route_policy": "direct"}, + input_key="query", + output_key="final_answer", + ) + + +def test_invoke_runtime_trace_returns_plain_dict_and_sidecar_node(): + adapter = make_adapter() + result, sidecar = adapter.invoke_runtime({"query": "What is CRISPR?"}, backend="trace") + assert isinstance(result, dict) + assert isinstance(sidecar, GraphRunSidecar) + assert "final_answer" in result + assert sidecar.output_node is not None + assert sidecar.output_node.data == result["final_answer"] + + +def test_shadow_state_preserves_cross_node_dependencies(): + adapter = make_adapter() + model = adapter.as_module() + out = model("What is CRISPR?") + sidecar = model._last_sidecar + assert out is sidecar.output_node + assert "planner_node" in sidecar.node_outputs + assert "synth_node" in sidecar.node_outputs + + +def test_graph_module_parameters_include_prompts_and_graph_knobs(): + adapter = make_adapter() + model = adapter.as_module() + assert isinstance(model, GraphModule) + names = {getattr(p, "name", "") for p in model.parameters()} + assert any("planner_prompt" in n for n in names) + assert any("synth_prompt" in n for n in names) + assert any("route_policy" in n for n in names) + + +def test_bindings_are_auto_generated_and_transparent(): + adapter = make_adapter() + assert adapter.bindings["planner_prompt"].kind == "prompt" + assert adapter.bindings["route_policy"].kind == "graph" + adapter.bindings["route_policy"].set("alternate") + assert adapter.graph_knobs["route_policy"].data == "alternate" diff --git a/tests/unit_tests/test_trace_graph_instrumentation.py b/tests/unit_tests/test_trace_graph_instrumentation.py new file mode 100644 index 00000000..bf3d36ab --- /dev/null +++ b/tests/unit_tests/test_trace_graph_instrumentation.py @@ -0,0 +1,138 @@ +import pytest + +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END + +from opto.trace import node +from opto.trace.io import TraceGraph, instrument_graph + +_TRACE_SCOPE = {} + + +def _raw(value): + return getattr(value, "data", value) + + +class RawPrompt: + def __init__(self, text: str): + self.text = text + + def __str__(self) -> str: + return self.text + + +def planner_node(state): + query = _raw(state["query"]) + template = _raw(_TRACE_SCOPE["planner_prompt"]) + return { + "query": str(query), + "plan": str(template).replace("{query}", str(query)), + } + + +def synth_node(state): + query = _raw(state["query"]) + plan = _raw(state["plan"]) + template = _raw(_TRACE_SCOPE["synth_prompt"]) + answer = str(template).replace("{query}", str(query)).replace("{plan}", str(plan)) + return {"final_answer": node(answer, name="final_answer_node")} + + +def _make_trace_graph(planner_prompt=None, synth_prompt=None): + planner_prompt = planner_prompt or node( + "Create a plan for: {query}", + trainable=True, + name="planner_prompt", + ) + synth_prompt = synth_prompt or node( + "Answer: {query}\nPlan: {plan}", + trainable=True, + name="synth_prompt", + ) + + scope = { + "planner_prompt": planner_prompt, + "synth_prompt": synth_prompt, + "planner_node": planner_node, + "synth_node": synth_node, + } + _TRACE_SCOPE.clear() + _TRACE_SCOPE.update(scope) + + def build_graph(): + graph = StateGraph(dict) + graph.add_node("planner", scope["planner_node"]) + graph.add_node("synth", scope["synth_node"]) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + return build_graph, scope + + +def test_trace_backend_requires_callable_graph_factory(): + build_graph, _scope = _make_trace_graph() + with pytest.raises(ValueError): + instrument_graph(backend="trace", graph=build_graph()) + + +def test_trace_backend_requires_scope_when_factory_is_provided(): + build_graph, _scope = _make_trace_graph() + with pytest.raises(ValueError, match="scope"): + instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=None, + graph_agents_functions=["planner_node", "synth_node"], + output_key="final_answer", + ) + + +def test_trace_backend_returns_trace_graph(): + build_graph, scope = _make_trace_graph() + instrumented = instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[scope["planner_prompt"], scope["synth_prompt"]], + output_key="final_answer", + ) + assert isinstance(instrumented, TraceGraph) + assert instrumented.backend == "trace" + assert len(instrumented.parameters) >= 2 + + +def test_trace_backend_rejects_unknown_function_name(): + build_graph, scope = _make_trace_graph() + with pytest.raises(KeyError): + instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["missing_node"], + output_key="final_answer", + ) + + +def test_trace_backend_replaces_raw_prompt_in_scope_by_identity(): + raw_prompt = RawPrompt("Create a plan for: {query}") + build_graph, scope = _make_trace_graph(planner_prompt=raw_prompt) + + instrumented = instrument_graph( + backend="trace", + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[raw_prompt], + output_key="final_answer", + ) + + _TRACE_SCOPE.update(scope) + assert scope["planner_prompt"] is not raw_prompt + result = instrumented.invoke({"query": "What is CRISPR?"}) + assert "final_answer" in result + assert "CRISPR" in result["final_answer"].data From 338d6754ef3e00417239b1210816bb8b2d7a2dec Mon Sep 17 00:00:00 2001 From: doxav Date: Sat, 18 Apr 2026 20:38:56 +0200 Subject: [PATCH 03/16] minimal fixes into otel trace adapters --- opto/trace/graph/adapter.py | 6 +- opto/trainer/__init__.py | 12 ++- opto/trainer/algorithms/__init__.py | 49 +++++++--- .../test_graph_module_prioritysearch.py | 89 +++++++++++++++++-- .../features_tests/test_graph_module_train.py | 24 +---- tests/unit_tests/test_graph_adapter_trace.py | 27 ++++++ 6 files changed, 167 insertions(+), 40 deletions(-) diff --git a/opto/trace/graph/adapter.py b/opto/trace/graph/adapter.py index 05309996..6c40e359 100644 --- a/opto/trace/graph/adapter.py +++ b/opto/trace/graph/adapter.py @@ -281,11 +281,15 @@ def invoke_trace(self, state: Dict[str, Any], **kwargs: Any): sidecar = self.new_run_sidecar() for key, value in state.items(): sidecar.shadow_state[key] = value if isinstance(value, Node) else node(value, name=key) + for key, value in self.graph_knobs.items(): + sidecar.shadow_state[key] = value self._active_sidecar = sidecar + runtime_state = dict(state) + runtime_state.update(self._knob_values()) try: graph = self.build_graph(backend="trace") - result = graph.invoke(state, **kwargs) + result = graph.invoke(runtime_state, **kwargs) finally: self._active_sidecar = None diff --git a/opto/trainer/__init__.py b/opto/trainer/__init__.py index fdb4b478..2a3efa7e 100644 --- a/opto/trainer/__init__.py +++ b/opto/trainer/__init__.py @@ -1 +1,11 @@ -from opto.trainer.train import train #, resume \ No newline at end of file +"""Trainer package public facade.""" + +from importlib import import_module + +__all__ = ["train"] + + +def __getattr__(name): + if name == "train": + return import_module("opto.trainer.train").train + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") \ No newline at end of file diff --git a/opto/trainer/algorithms/__init__.py b/opto/trainer/algorithms/__init__.py index 4d2b11fc..8485b46e 100644 --- a/opto/trainer/algorithms/__init__.py +++ b/opto/trainer/algorithms/__init__.py @@ -1,10 +1,39 @@ -from opto.trainer.algorithms.algorithm import Trainer -from opto.trainer.algorithms.basic_algorithms import ( - Minibatch, - MinibatchAlgorithm, - BasicSearchAlgorithm, - MinibatchCurriculumAccumulationCommonFeedbackAlgorithm, - BasicSearchCurriculumAccumulationCommonFeedbackAlgorithm, -) -from opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm -from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm +"""Lazy public facade for trainer algorithms.""" + +__all__ = [ + "Trainer", + "Minibatch", + "MinibatchAlgorithm", + "BasicSearchAlgorithm", + "MinibatchCurriculumAccumulationCommonFeedbackAlgorithm", + "BasicSearchCurriculumAccumulationCommonFeedbackAlgorithm", + "BeamsearchAlgorithm", + "BeamsearchHistoryAlgorithm", + "UCBSearchAlgorithm", +] + + +def __getattr__(name): + if name == "Trainer": + from opto.trainer.algorithms.algorithm import Trainer + + return Trainer + if name in { + "Minibatch", + "MinibatchAlgorithm", + "BasicSearchAlgorithm", + "MinibatchCurriculumAccumulationCommonFeedbackAlgorithm", + "BasicSearchCurriculumAccumulationCommonFeedbackAlgorithm", + }: + from opto.trainer.algorithms import basic_algorithms as module + + return getattr(module, name) + if name in {"BeamsearchAlgorithm", "BeamsearchHistoryAlgorithm"}: + from opto.trainer.algorithms import beamsearch_algorithm as module + + return getattr(module, name) + if name == "UCBSearchAlgorithm": + from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm + + return UCBSearchAlgorithm + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/tests/features_tests/test_graph_module_prioritysearch.py b/tests/features_tests/test_graph_module_prioritysearch.py index 481959de..1839d28a 100644 --- a/tests/features_tests/test_graph_module_prioritysearch.py +++ b/tests/features_tests/test_graph_module_prioritysearch.py @@ -1,10 +1,89 @@ import pytest -# PrioritySearch integration is covered by graph_module_train smoke. -# This file exists to keep a dedicated feature test entry point. +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END -pytest.importorskip("langgraph.graph") +from opto.features.priority_search import PrioritySearch +from opto.optimizers.optimizer import Optimizer +from opto.trace import node +from opto.trace.graph import LangGraphAdapter +from opto.trainer.guide import Guide -def test_prioritysearch_entrypoint_smoke(): - assert True +def _raw(x): + return getattr(x, "data", x) + + +class KeywordGuide(Guide): + def get_feedback(self, query, response, reference=None, **kwargs): + score = 1.0 if str(reference) in str(response) else 0.0 + return score, f"Expected keyword: {reference}" + + +class RouteOptimizer(Optimizer): + def _step(self, *args, **kwargs): + updates = {p: p.data for p in self.parameters} + for p in self.parameters: + if "route_policy" in getattr(p, "name", ""): + updates[p] = "review" + return updates + + +def build_adapter(): + planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") + synth_prompt = node("Answer: {query} :: {plan}", trainable=True, name="synth_prompt") + + def planner_node(state): + query = _raw(state["query"]) + return {"plan": planner_prompt.data.replace("{query}", str(query))} + + def synth_node(state): + query = _raw(state["query"]) + plan = _raw(state["plan"]) + route = _raw(state.get("route_policy", "direct")) + if route == "review": + return {"final_answer": f"Reviewed CRISPR :: {query}"} + return { + "final_answer": synth_prompt.data.replace("{query}", str(query)).replace("{plan}", str(plan)) + } + + def build_graph(planner_node=planner_node, synth_node=synth_node, route_policy="direct"): + graph = StateGraph(dict) + graph.add_node("planner", planner_node) + graph.add_node("synth", synth_node) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + return LangGraphAdapter( + backend="trace", + graph_factory=build_graph, + function_targets={"planner_node": planner_node, "synth_node": synth_node}, + prompt_targets={"planner_prompt": planner_prompt, "synth_prompt": synth_prompt}, + graph_knobs={"route_policy": "direct"}, + input_key="query", + output_key="final_answer", + ) + + +def test_graphmodule_prioritysearch_smoke_for_graph_knob(): + adapter = build_adapter() + model = adapter.as_module() + guide = KeywordGuide() + optimizer = RouteOptimizer(model.parameters()) + algo = PrioritySearch(model, optimizer, num_threads=1) + algo.train( + guide=guide, + train_dataset={"inputs": ["gene editing"], "infos": ["Reviewed"]}, + num_epochs=1, + batch_size=1, + num_batches=1, + num_candidates=1, + num_proposals=1, + validate_exploration_candidates=True, + ) + out = model("gene editing") + assert "Reviewed" in out.data diff --git a/tests/features_tests/test_graph_module_train.py b/tests/features_tests/test_graph_module_train.py index 73cb3b66..e691e85f 100644 --- a/tests/features_tests/test_graph_module_train.py +++ b/tests/features_tests/test_graph_module_train.py @@ -5,11 +5,10 @@ START = langgraph.START END = langgraph.END -from opto.features.priority_search import PrioritySearch from opto.optimizers.optimizer import Optimizer from opto.trace import node from opto.trace.graph import LangGraphAdapter -from opto.trainer import train +from opto.trainer.train import train from opto.trainer.guide import Guide @@ -99,24 +98,3 @@ def test_graphmodule_is_train_compatible(): assert algo is not None out = model("gene editing") assert isinstance(out.data, str) - - -def test_graphmodule_prioritysearch_smoke_for_graph_knob(): - adapter = build_adapter() - model = adapter.as_module() - guide = KeywordGuide() - optimizer = RouteOptimizer(model.parameters()) - algo = PrioritySearch(model, optimizer, num_threads=1) - result = algo.train( - guide=guide, - train_dataset={"inputs": ["gene editing"], "infos": ["Reviewed"]}, - num_epochs=1, - batch_size=1, - num_batches=1, - num_candidates=1, - num_proposals=1, - validate_exploration_candidates=True, - ) - assert result is None or isinstance(result, tuple) - out = model("gene editing") - assert isinstance(out.data, str) diff --git a/tests/unit_tests/test_graph_adapter_trace.py b/tests/unit_tests/test_graph_adapter_trace.py index 1a07fece..03ba97a3 100644 --- a/tests/unit_tests/test_graph_adapter_trace.py +++ b/tests/unit_tests/test_graph_adapter_trace.py @@ -7,6 +7,7 @@ from opto.trace import node from opto.trace.graph import GraphModule, GraphRunSidecar, LangGraphAdapter +from opto.trace.io import TraceGraph, instrument_graph, optimize_graph def _raw(x): @@ -98,3 +99,29 @@ def test_bindings_are_auto_generated_and_transparent(): assert adapter.bindings["route_policy"].kind == "graph" adapter.bindings["route_policy"].set("alternate") assert adapter.graph_knobs["route_policy"].data == "alternate" + + +def test_instrument_graph_accepts_adapter_in_trace_mode_and_optimize_graph_uses_sidecar(): + adapter = make_adapter() + graph = instrument_graph(adapter=adapter, backend="trace", output_key="final_answer") + assert isinstance(graph, TraceGraph) + result = optimize_graph( + graph, + queries=["What is CRISPR?"], + iterations=0, + eval_fn=lambda payload: { + "score": 1.0 if "CRISPR" in str(payload["answer"]) else 0.0, + "feedback": "Keep CRISPR in the final answer.", + }, + ) + assert result.best_iteration == 0 + assert result.best_score == 1.0 + + +def test_instrument_graph_accepts_graph_argument_when_it_is_a_graph_adapter(): + adapter = make_adapter() + graph = instrument_graph(graph=adapter, backend="trace", output_key="final_answer") + assert isinstance(graph, TraceGraph) + out = graph.invoke({"query": "What is CRISPR?"}) + assert isinstance(out, dict) + assert "final_answer" in out From a638d2e6762340c84baf8e0ffc1d17fc805ddc3c Mon Sep 17 00:00:00 2001 From: doxav Date: Sun, 19 Apr 2026 09:44:07 +0200 Subject: [PATCH 04/16] added support of different trace observers --- ...aph_instrument_and_compare_observers.ipynb | 573 ++++++++++++++++++ ...ggraph_instrument_and_compare_observers.py | 172 ++++++ opto/trace/io/__init__.py | 11 +- opto/trace/io/eval_hooks.py | 314 ---------- opto/trace/io/graph_instrumentation.py | 25 +- opto/trace/io/instrumentation.py | 120 +++- opto/trace/io/observers.py | 71 +++ opto/trace/io/optimization.py | 133 +++- opto/trace/io/sysmonitoring.py | 217 +++++++ .../test_langgraph_notebooks.py | 1 + tests/features_tests/test_sysmon_backend.py | 62 ++ tests/unit_tests/test_graph_observers.py | 139 +++++ 12 files changed, 1505 insertions(+), 333 deletions(-) create mode 100644 examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb create mode 100644 examples/notebooks/demo_langgraph_instrument_and_compare_observers.py delete mode 100644 opto/trace/io/eval_hooks.py create mode 100644 opto/trace/io/observers.py create mode 100644 opto/trace/io/sysmonitoring.py create mode 100644 tests/features_tests/test_sysmon_backend.py create mode 100644 tests/unit_tests/test_graph_observers.py diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb new file mode 100644 index 00000000..5401b7ed --- /dev/null +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -0,0 +1,573 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9ce01f9c", + "metadata": {}, + "source": [ + "# LangGraph Instrumentation Backends Comparison\n", + "\n", + "This notebook demonstrates using `instrument_graph()` with different observation backends:\n", + "\n", + "- **trace** - Uses the opto trace system\n", + "- **trace + otel** - Trace system with OpenTelemetry observation\n", + "- **trace + sysmon** - Trace system with Python 3.12+ sys.monitoring\n", + "- **trace + otel + sysmon** - Trace with both observers\n", + "- **otel** - Pure OpenTelemetry instrumentation \n", + "- **otel + sysmon** - OpenTelemetry with sys.monitoring\n", + "- **sysmon** - Pure sys.monitoring (Python 3.12+)\n", + "\n", + "Each backend provides different tracing and profiling capabilities." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e603173", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import time\n", + "from langgraph.graph import StateGraph, START, END\n", + "from opto.trace.io import instrument_graph\n", + "\n", + "HAS_SYSMON = hasattr(sys, \"monitoring\")\n", + "\n", + "print(f\"Python {sys.version_info.major}.{sys.version_info.minor}\")\n", + "print(f\"sys.monitoring available: {HAS_SYSMON}\")" + ] + }, + { + "cell_type": "markdown", + "id": "ce9569a4", + "metadata": {}, + "source": [ + "## Define the Graph\n", + "\n", + "Create a simple planner -> synthesizer graph for demonstration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "236820ff", + "metadata": {}, + "outputs": [], + "source": [ + "def build_graph():\n", + " \"\"\"Build a simple planner->synth graph.\"\"\"\n", + " def planner(state):\n", + " return {\"plan\": f\"plan::{state['query']}\"}\n", + "\n", + " def synth(state):\n", + " query = state.get(\"query\", \"\")\n", + " plan = state.get(\"plan\", \"\")\n", + " return {\"final_answer\": f\"answer::{query}::{plan}\"}\n", + "\n", + " g = StateGraph(dict)\n", + " g.add_node(\"planner\", planner)\n", + " g.add_node(\"synth\", synth)\n", + " g.add_edge(START, \"planner\")\n", + " g.add_edge(\"planner\", \"synth\")\n", + " g.add_edge(\"synth\", END)\n", + " return g\n", + "\n", + "# Test the base graph\n", + "test_graph = build_graph()\n", + "test_result = test_graph.compile().invoke({\"query\": \"What is CRISPR?\"})\n", + "print(f\"✓ Graph works: {test_result}\")" + ] + }, + { + "cell_type": "markdown", + "id": "907f7aa8", + "metadata": {}, + "source": [ + "## Test Different Backends\n", + "\n", + "Run the same graph with different instrumentation backends." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17a2a026", + "metadata": {}, + "outputs": [], + "source": [ + "def run_test(name, instrument_kwargs):\n", + " \"\"\"Run a single instrumentation test.\"\"\"\n", + " print(f\"\\n{'='*60}\")\n", + " print(f\"Test: {name}\")\n", + " print(f\"{'='*60}\")\n", + " try:\n", + " t0 = time.perf_counter()\n", + " \n", + " # Build and instrument graph \n", + " graph = build_graph()\n", + " if \"backend\" in instrument_kwargs and instrument_kwargs[\"backend\"] == \"trace\":\n", + " # For trace backend, pass graph_factory and scope\n", + " instrumented = instrument_graph(\n", + " graph_factory=build_graph,\n", + " scope=globals(),\n", + " **instrument_kwargs\n", + " )\n", + " else:\n", + " # For otel/sysmon, pass compiled graph\n", + " instrumented = instrument_graph(\n", + " graph=graph.compile(),\n", + " **instrument_kwargs\n", + " )\n", + " \n", + " # Invoke\n", + " result = instrumented.invoke({\"query\": \"What is CRISPR?\"})\n", + " dt_ms = (time.perf_counter() - t0) * 1000.0\n", + " \n", + " # Extract answer\n", + " answer = result.get(\"final_answer\", result)\n", + " \n", + " print(f\"✓ SUCCESS in {dt_ms:.1f}ms\")\n", + " print(f\"Answer (preview): {str(answer)[:80]}\")\n", + " return True\n", + " except Exception as e:\n", + " print(f\"✗ FAIL: {e}\")\n", + " import traceback\n", + " traceback.print_exc()\n", + " return False\n", + "\n", + "results = {}" + ] + }, + { + "cell_type": "markdown", + "id": "59c8ed5b", + "metadata": {}, + "source": [ + "### Test 1: Trace Backend Only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "189fa028", + "metadata": {}, + "outputs": [], + "source": [ + "results[\"trace\"] = run_test(\n", + " \"backend='trace'\",\n", + " {\"backend\": \"trace\", \"output_key\": \"final_answer\"}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d98cf881", + "metadata": {}, + "source": [ + "### Test 2: Trace + OpenTelemetry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afec093b", + "metadata": {}, + "outputs": [], + "source": [ + "results[\"trace+otel\"] = run_test(\n", + " \"backend='trace', observe_with=('otel',)\",\n", + " {\n", + " \"backend\": \"trace\",\n", + " \"observe_with\": (\"otel\",),\n", + " \"output_key\": \"final_answer\"\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d9abee4b", + "metadata": {}, + "source": [ + "### Test 3: OpenTelemetry Backend Only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3994b2a5", + "metadata": {}, + "outputs": [], + "source": [ + "results[\"otel\"] = run_test(\n", + " \"backend='otel'\",\n", + " {\"backend\": \"otel\", \"output_key\": \"final_answer\"}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "859a2dd9", + "metadata": {}, + "source": [ + "### Test 4: sys.monitoring Tests (Python 3.12+)\n", + "\n", + "These tests only run on Python 3.12+ where `sys.monitoring` is available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b8e27f3", + "metadata": {}, + "outputs": [], + "source": [ + "if HAS_SYSMON:\n", + " results[\"trace+sysmon\"] = run_test(\n", + " \"backend='trace', observe_with=('sysmon',)\",\n", + " {\n", + " \"backend\": \"trace\",\n", + " \"observe_with\": (\"sysmon\",),\n", + " \"output_key\": \"final_answer\"\n", + " }\n", + " )\n", + " \n", + " results[\"trace+otel+sysmon\"] = run_test(\n", + " \"backend='trace', observe_with=('otel', 'sysmon')\",\n", + " {\n", + " \"backend\": \"trace\",\n", + " \"observe_with\": (\"otel\", \"sysmon\"),\n", + " \"output_key\": \"final_answer\"\n", + " }\n", + " )\n", + " \n", + " results[\"otel+sysmon\"] = run_test(\n", + " \"backend='otel', observe_with=('sysmon',)\",\n", + " {\n", + " \"backend\": \"otel\",\n", + " \"observe_with\": (\"sysmon\",),\n", + " \"output_key\": \"final_answer\"\n", + " }\n", + " )\n", + " \n", + " results[\"sysmon\"] = run_test(\n", + " \"backend='sysmon'\",\n", + " {\n", + " \"backend\": \"sysmon\",\n", + " \"output_key\": \"final_answer\"\n", + " }\n", + " )\n", + "else:\n", + " print(\"\\n⚠️ sys.monitoring tests skipped (requires Python 3.12+)\")" + ] + }, + { + "cell_type": "markdown", + "id": "94d26802", + "metadata": {}, + "source": [ + "## Results Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b30074f3", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\"*80)\n", + "print(\"Test Results Summary\")\n", + "print(\"=\"*80)\n", + "\n", + "passed = sum(1 for v in results.values() if v)\n", + "total = len(results)\n", + "\n", + "for name, success in results.items():\n", + " status = \"✓ PASS\" if success else \"✗ FAIL\"\n", + " print(f\" {name:30s} {status}\")\n", + "\n", + "print(f\"\\nTotal: {passed}/{total} passed\")\n", + "\n", + "# Verify critical backends\n", + "assert results.get(\"trace\", False), \"trace backend should pass\"\n", + "assert results.get(\"otel\", False), \"otel backend should pass\"\n", + "\n", + "print(\"\\n✓ All critical tests passed!\")\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "markdown", + "id": "3f7e2859", + "metadata": {}, + "source": [ + "# LangGraph trace / OTEL / sys.monitoring comparison demo\n", + "\n", + "Compact notebook comparing the new supported configurations:\n", + "\n", + "- `backend=\"trace\"`\n", + "- `backend=\"trace\", observe_with=(\"otel\",)`\n", + "- `backend=\"trace\", observe_with=(\"sysmon\",)`\n", + "- `backend=\"trace\", observe_with=(\"otel\", \"sysmon\")`\n", + "- `backend=\"otel\", observe_with=(\"sysmon\",)`\n", + "- `backend=\"sysmon\"`\n", + "\n", + "It prints result previews, observer artifacts, simple structure summaries, and a small timing comparison." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "445e1ff9", + "metadata": {}, + "outputs": [], + "source": [ + "import sys, time\n", + "from langgraph.graph import StateGraph, START, END\n", + "from opto.trace import node\n", + "from opto.trace.io import instrument_graph, optimize_graph\n", + "\n", + "HAS_SYSMON = hasattr(sys, 'monitoring')\n", + "\n", + "class StubLLM:\n", + " model = 'stub'\n", + " def __call__(self, messages=None, **kwargs):\n", + " class _Msg:\n", + " content = 'stub-response'\n", + " class _Choice:\n", + " message = _Msg()\n", + " class _Resp:\n", + " choices = [_Choice()]\n", + " return _Resp()\n", + "\n", + "def _raw(x):\n", + " return getattr(x, 'data', x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3404aa4b", + "metadata": {}, + "outputs": [], + "source": [ + "planner_prompt = node('Plan: {query}', trainable=True, name='planner_prompt')\n", + "synth_prompt = node('Answer: {query} :: {plan}', trainable=True, name='synth_prompt')\n", + "\n", + "def planner_node(state):\n", + " query = _raw(state['query'])\n", + " return {'plan': planner_prompt.data.replace('{query}', str(query))}\n", + "\n", + "def synth_node(state):\n", + " query = _raw(state['query'])\n", + " plan = _raw(state['plan'])\n", + " answer = synth_prompt.data.replace('{query}', str(query)).replace('{plan}', str(plan))\n", + " return {'final_answer': node(answer, name='final_answer_node')}\n", + "\n", + "def build_trace_graph():\n", + " g = StateGraph(dict)\n", + " g.add_node('planner', planner_node)\n", + " g.add_node('synth', synth_node)\n", + " g.add_edge(START, 'planner')\n", + " g.add_edge('planner', 'synth')\n", + " g.add_edge('synth', END)\n", + " return g\n", + "\n", + "def build_plain_graph():\n", + " def planner(state):\n", + " return {'plan': f\"plan::{state['query']}\"}\n", + " def synth(state):\n", + " return {'final_answer': f\"answer::{state['query']}::{state['plan']}\"}\n", + " g = StateGraph(dict)\n", + " g.add_node('planner', planner)\n", + " g.add_node('synth', synth)\n", + " g.add_edge(START, 'planner')\n", + " g.add_edge('planner', 'synth')\n", + " g.add_edge('synth', END)\n", + " return g" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d656d4a6", + "metadata": {}, + "outputs": [], + "source": [ + "def run_case(name, factory):\n", + " t0 = time.perf_counter()\n", + " graph = factory()\n", + " result = graph.invoke({'query': 'What is CRISPR?'})\n", + " dt_ms = (time.perf_counter() - t0) * 1000.0\n", + " answer = result.get('final_answer', result)\n", + " observer_summary = []\n", + " for art in getattr(graph, '_last_observer_artifacts', []):\n", + " if art.carrier == 'sysmon':\n", + " observer_summary.append({'carrier': 'sysmon', 'events': len(art.profile_doc.get('events', []))})\n", + " elif art.carrier == 'otel':\n", + " otlp = art.raw or {}\n", + " spans = otlp.get('resourceSpans', [{}])[0].get('scopeSpans', [{}])[0].get('spans', []) if otlp.get('resourceSpans') else []\n", + " observer_summary.append({'carrier': 'otel', 'spans': len(spans)})\n", + " sysmon_events = len(getattr(graph, '_last_profile_doc', {}).get('events', [])) if getattr(graph, '_last_profile_doc', None) else None\n", + " row = {\n", + " 'name': name,\n", + " 'answer_preview': str(getattr(answer, 'data', answer))[:80],\n", + " 'time_ms': round(dt_ms, 3),\n", + " 'observer_summary': observer_summary,\n", + " 'sysmon_events': sysmon_events,\n", + " }\n", + " print(row)\n", + " return row\n", + "\n", + "rows = []\n", + "\n", + "rows.append(run_case(\n", + " 'trace',\n", + " lambda: instrument_graph(\n", + " backend='trace',\n", + " graph_factory=build_trace_graph,\n", + " scope=globals(),\n", + " graph_agents_functions=['planner_node', 'synth_node'],\n", + " graph_prompts_list=[planner_prompt, synth_prompt],\n", + " output_key='final_answer',\n", + " ),\n", + "))\n", + "\n", + "rows.append(run_case(\n", + " 'trace+otel',\n", + " lambda: instrument_graph(\n", + " backend='trace',\n", + " observe_with=('otel',),\n", + " graph_factory=build_trace_graph,\n", + " scope=globals(),\n", + " graph_agents_functions=['planner_node', 'synth_node'],\n", + " graph_prompts_list=[planner_prompt, synth_prompt],\n", + " output_key='final_answer',\n", + " ),\n", + "))\n", + "\n", + "if HAS_SYSMON:\n", + " rows.append(run_case(\n", + " 'trace+sysmon',\n", + " lambda: instrument_graph(\n", + " backend='trace',\n", + " observe_with=('sysmon',),\n", + " graph_factory=build_trace_graph,\n", + " scope=globals(),\n", + " graph_agents_functions=['planner_node', 'synth_node'],\n", + " graph_prompts_list=[planner_prompt, synth_prompt],\n", + " output_key='final_answer',\n", + " ),\n", + " ))\n", + " rows.append(run_case(\n", + " 'trace+otel+sysmon',\n", + " lambda: instrument_graph(\n", + " backend='trace',\n", + " observe_with=('otel', 'sysmon'),\n", + " graph_factory=build_trace_graph,\n", + " scope=globals(),\n", + " graph_agents_functions=['planner_node', 'synth_node'],\n", + " graph_prompts_list=[planner_prompt, synth_prompt],\n", + " output_key='final_answer',\n", + " ),\n", + " ))\n", + "\n", + "rows.append(run_case(\n", + " 'otel',\n", + " lambda: instrument_graph(\n", + " graph=build_plain_graph(),\n", + " backend='otel',\n", + " llm=StubLLM(),\n", + " initial_templates={'planner_prompt': 'Plan {query}'},\n", + " output_key='final_answer',\n", + " ),\n", + "))\n", + "\n", + "if HAS_SYSMON:\n", + " rows.append(run_case(\n", + " 'otel+sysmon',\n", + " lambda: instrument_graph(\n", + " graph=build_plain_graph(),\n", + " backend='otel',\n", + " observe_with=('sysmon',),\n", + " llm=StubLLM(),\n", + " initial_templates={'planner_prompt': 'Plan {query}'},\n", + " output_key='final_answer',\n", + " ),\n", + " ))\n", + " rows.append(run_case(\n", + " 'sysmon',\n", + " lambda: instrument_graph(\n", + " graph=build_plain_graph(),\n", + " backend='sysmon',\n", + " initial_templates={'planner_prompt': 'Plan {query}'},\n", + " output_key='final_answer',\n", + " ),\n", + " ))\n", + "\n", + "assert any(r['name'] == 'trace' for r in rows)\n", + "assert any(r['name'] == 'otel' for r in rows)\n", + "if HAS_SYSMON:\n", + " assert any(r['name'] == 'sysmon' for r in rows)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8254702", + "metadata": {}, + "outputs": [], + "source": [ + "# baseline-only optimization sanity checks\n", + "trace_graph = instrument_graph(\n", + " backend='trace',\n", + " observe_with=('otel',) if not HAS_SYSMON else ('otel', 'sysmon'),\n", + " graph_factory=build_trace_graph,\n", + " scope=globals(),\n", + " graph_agents_functions=['planner_node', 'synth_node'],\n", + " graph_prompts_list=[planner_prompt, synth_prompt],\n", + " output_key='final_answer',\n", + ")\n", + "trace_opt = optimize_graph(\n", + " trace_graph,\n", + " queries=['What is CRISPR?'],\n", + " iterations=0,\n", + " eval_fn=lambda payload: {\n", + " 'score': 1.0 if 'CRISPR' in str(payload['answer']) else 0.0,\n", + " 'feedback': 'Keep CRISPR in the final answer.',\n", + " },\n", + ")\n", + "assert trace_opt.best_iteration == 0\n", + "assert trace_opt.best_score == 1.0\n", + "\n", + "if HAS_SYSMON:\n", + " sysmon_graph = instrument_graph(\n", + " graph=build_plain_graph(),\n", + " backend='sysmon',\n", + " initial_templates={'planner_prompt': 'Plan {query}'},\n", + " output_key='final_answer',\n", + " )\n", + " sysmon_opt = optimize_graph(\n", + " sysmon_graph,\n", + " queries=['What is CRISPR?'],\n", + " iterations=0,\n", + " eval_fn=lambda payload: {\n", + " 'score': 1.0 if 'CRISPR' in str(payload['answer']) else 0.0,\n", + " 'feedback': 'Keep CRISPR in the answer.',\n", + " },\n", + " )\n", + " assert sysmon_opt.best_iteration == 0\n", + " assert sysmon_opt.best_score == 1.0" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py new file mode 100644 index 00000000..6f3a95f3 --- /dev/null +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +""" +LangGraph / OTEL / sys.monitoring comparison demo. + +Demonstrates using instrument_graph with different backends: +- trace, trace+otel, trace+sysmon, trace+otel+sysmon +- otel, otel+sysmon +- sysmon +""" + +import sys +import time +from langgraph.graph import StateGraph, START, END +from opto.trace.io import instrument_graph + + +HAS_SYSMON = hasattr(sys, "monitoring") + + +def build_graph(): + """Build a simple planner->synth graph.""" + def planner(state): + return {"plan": f"plan::{state['query']}"} + + def synth(state): + query = state.get("query", "") + plan = state.get("plan", "") + return {"final_answer": f"answer::{query}::{plan}"} + + g = StateGraph(dict) + g.add_node("planner", planner) + g.add_node("synth", synth) + g.add_edge(START, "planner") + g.add_edge("planner", "synth") + g.add_edge("synth", END) + return g + + +def run_test(name, instrument_kwargs): + """Run a single instrumentation test.""" + print(f"\nTest: {name}") + try: + t0 = time.perf_counter() + + # Build and instrument graph + graph = build_graph() + if "backend" in instrument_kwargs and instrument_kwargs["backend"] == "trace": + # For trace backend, pass graph_factory and scope + instrumented = instrument_graph( + graph_factory=build_graph, + scope=globals(), + **instrument_kwargs + ) + else: + # For otel/sysmon, pass compiled graph + instrumented = instrument_graph( + graph=graph.compile(), + **instrument_kwargs + ) + + # Invoke + result = instrumented.invoke({"query": "What is CRISPR?"}) + dt_ms = (time.perf_counter() - t0) * 1000.0 + + # Extract answer + answer = result.get("final_answer", result) + + print(f" ✓ SUCCESS ({dt_ms:.1f}ms)") + print(f" Answer: {str(answer)[:80]}") + return True + except Exception as e: + print(f" ✗ FAIL: {e}") + import traceback + traceback.print_exc() + return False + + +def main(): + print("\n" + "=" * 80) + print("LangGraph Instrumentation Backends Comparison") + print("=" * 80) + print(f"Python {sys.version_info.major}.{sys.version_info.minor}") + print(f"sys.monitoring available: {HAS_SYSMON}\n") + + results = {} + + # Test 1: trace backend + results["trace"] = run_test( + "backend='trace'", + {"backend": "trace", "output_key": "final_answer"} + ) + + # Test 2: trace + otel + results["trace+otel"] = run_test( + "backend='trace', observe_with=('otel',)", + { + "backend": "trace", + "observe_with": ("otel",), + "output_key": "final_answer" + } + ) + + # Test 3-4: trace + sysmon variants (if available) + if HAS_SYSMON: + results["trace+sysmon"] = run_test( + "backend='trace', observe_with=('sysmon',)", + { + "backend": "trace", + "observe_with": ("sysmon",), + "output_key": "final_answer" + } + ) + + results["trace+otel+sysmon"] = run_test( + "backend='trace', observe_with=('otel', 'sysmon')", + { + "backend": "trace", + "observe_with": ("otel", "sysmon"), + "output_key": "final_answer" + } + ) + + # Test 5: otel backend + results["otel"] = run_test( + "backend='otel'", + {"backend": "otel", "output_key": "final_answer"} + ) + + # Test 6: otel + sysmon (if available) + if HAS_SYSMON: + results["otel+sysmon"] = run_test( + "backend='otel', observe_with=('sysmon',)", + { + "backend": "otel", + "observe_with": ("sysmon",), + "output_key": "final_answer" + } + ) + + # Test 7: sysmon backend + results["sysmon"] = run_test( + "backend='sysmon'", + { + "backend": "sysmon", + "output_key": "final_answer" + } + ) + + # Summary + print("\n" + "=" * 80) + print("Test Results Summary") + print("=" * 80) + + passed = sum(1 for v in results.values() if v) + total = len(results) + + for name, success in results.items(): + status = "✓ PASS" if success else "✗ FAIL" + print(f" {name:30s} {status}") + + print(f"\nTotal: {passed}/{total} passed") + + # Final assertions + assert results.get("trace", False), "trace backend must pass" + assert results.get("otel", False), "otel backend must pass" + + print("\n✓ All critical tests passed!") + print("=" * 80) + + +if __name__ == "__main__": + main() diff --git a/opto/trace/io/__init__.py b/opto/trace/io/__init__.py index 5fce8d8a..90960aa7 100644 --- a/opto/trace/io/__init__.py +++ b/opto/trace/io/__init__.py @@ -21,7 +21,7 @@ """ # -- high-level API -------------------------------------------------------- -from opto.trace.io.instrumentation import instrument_graph, InstrumentedGraph +from opto.trace.io.instrumentation import instrument_graph, InstrumentedGraph, SysMonInstrumentedGraph from opto.trace.io.graph_instrumentation import instrument_trace_graph, TraceGraph from opto.trace.io.optimization import ( optimize_graph, @@ -51,6 +51,8 @@ ) from opto.trace.io.otel_adapter import otlp_traces_to_trace_json from opto.trace.io.tgj_ingest import ingest_tgj, merge_tgj +from opto.trace.io.observers import ObserverArtifact, GraphObserver, OTelObserver +from opto.trace.io.sysmonitoring import SysMonitoringSession, SysMonObserver, sysmon_profile_to_tgj try: from opto.trace.graph import ( GraphAdapter, @@ -85,6 +87,7 @@ "record_genai_chat", # Data classes "InstrumentedGraph", + "SysMonInstrumentedGraph", "instrument_trace_graph", "TraceGraph", "RunResult", @@ -99,6 +102,12 @@ "otlp_traces_to_trace_json", "ingest_tgj", "merge_tgj", + "ObserverArtifact", + "GraphObserver", + "OTelObserver", + "SysMonitoringSession", + "SysMonObserver", + "sysmon_profile_to_tgj", ] if GraphAdapter is not None: diff --git a/opto/trace/io/eval_hooks.py b/opto/trace/io/eval_hooks.py deleted file mode 100644 index 8c6b3641..00000000 --- a/opto/trace/io/eval_hooks.py +++ /dev/null @@ -1,314 +0,0 @@ -from __future__ import annotations - -import json -from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple - -EvalFn = Callable[ - [str, float, Dict[str, float], str, Dict[str, Any], Dict[str, Any]], - Tuple[float, Dict[str, float], str], -] - - -def default_feedback(score: float, metrics: Dict[str, float], reasons: str) -> str: - return json.dumps({"score": score, "metrics": metrics, "reasons": reasons}) - - -def _clip01(x: float) -> float: - if x < 0.0: - return 0.0 - if x > 1.0: - return 1.0 - return x - - -def _ratio_closeness(r: float) -> float: - """ - Convert ratio-to-target (ideal=1.0) into a [0,1] closeness score. - """ - try: - r = float(r) - except Exception: - return 0.0 - return _clip01(1.0 - abs(1.0 - r)) - - -def _dea_overall_from_scores(dea_scores: Mapping[str, Any]) -> Optional[float]: - """ - Robust aggregate over DEA signals: - - ratios -> closeness - - similarities/coverage assumed in [0,1] - - ignore out-of-range values - """ - if not dea_scores: - return None - - ratio_keys = { - "sections_count_ratio_to_target", - "content_length_ratio_to_target", - "resources_count_ratio_to_target", - } - - vals: List[float] = [] - for k, v in dea_scores.items(): - try: - fv = float(v) - except Exception: - continue - - if k in ratio_keys: - vals.append(_ratio_closeness(fv)) - else: - if 0.0 <= fv <= 1.0: - vals.append(_clip01(fv)) - - if not vals: - return None - return sum(vals) / len(vals) - - -def _try_import_evaluate_document(): - """ - Best-effort import of doc_eval.evaluate_document. - We keep this robust because users might have different top-level package names. - """ - candidates = [ - "document_embedding_analysis.common.doc_eval", - "document_analysis_embedding.common.doc_eval", - "common.doc_eval", # allows running inside the external repo directly - ] - for mod in candidates: - try: - m = __import__(mod, fromlist=["evaluate_document"]) - fn = getattr(m, "evaluate_document", None) - if fn is not None: - return fn, m - except Exception: - continue - return None, None - - -def _synthesize_hybrid_feedback( - llm: Any, - answer: str, - original_reasons: str, - dea_scores: Dict[str, Any], -) -> str: - """ - Use the LLM to synthesize a new feedback string combining the original reasons - and the objective DEA scores. - """ - if not llm: - return original_reasons - - # Format DEA scores for the prompt - dea_summary = [] - for k, v in dea_scores.items(): - if isinstance(v, (int, float)): - dea_summary.append(f"{k}: {v:.3f}") - else: - dea_summary.append(f"{k}: {v}") - dea_text = ", ".join(dea_summary) - - prompt = f""" -You are an expert evaluator. -You have evaluated a generated document and provided the following initial feedback: -"{original_reasons}" - -Additionally, an automated Document Embedding Analysis (DEA) system has provided the following objective metrics: -{dea_text} - -Please synthesize a new, comprehensive feedback explanation that incorporates both your initial qualitative assessment and these quantitative DEA metrics. -Focus on explaining *why* the score is what it is, citing specific metrics where relevant (e.g., "The content is semantically close on plan (0.85) but lacks specific entities..."). -Keep the feedback concise and constructive. -""".strip() - - try: - # Assume LangChain-like interface - from langchain_core.messages import HumanMessage - if hasattr(llm, "invoke"): - response = llm.invoke([HumanMessage(content=prompt)]) - return str(response.content) - except Exception: - pass - - try: - # Assume Opto/AutoGen interface - # llm(messages=...) returns a response object with choices - response = llm(messages=[{"role": "user", "content": prompt}]) - - # Handle object access - if hasattr(response, "choices") and response.choices: - choice = response.choices[0] - if hasattr(choice, "message") and hasattr(choice.message, "content"): - return str(choice.message.content) - - # Handle dict access - if isinstance(response, dict) and "choices" in response and response["choices"]: - choice = response["choices"][0] - if "message" in choice and "content" in choice["message"]: - return str(choice["message"]["content"]) - - except Exception: - pass - - return original_reasons - - -def make_document_embedding_analysis_eval( - mode: str = "dea", - *, - llm: Optional[Any] = None, - weight_llm: float = 0.5, - weight_dea: float = 0.5, - doc_eval_kwargs: Optional[Dict[str, Any]] = None, - dea_score_key: Optional[str] = None, -) -> EvalFn: - """ - Build an EvalFn backed by document_embedding_analysis.common.doc_eval.evaluate_document. - - eval_data expected keys: - - solution: dict (required for DEA) - - turns: list (optional) - - content_type: "markdown"|"latex" (optional, default "markdown") - - doc_eval_kwargs: dict (optional overrides per-example) - """ - mode = (mode or "").lower().strip() - - # Default: disable enhanced metrics (Prometheus, WriteHere) unless explicitly enabled - base_kwargs = {"use_enhanced_metrics": False} - if doc_eval_kwargs: - base_kwargs.update(doc_eval_kwargs) - - def _eval( - answer: str, - llm_score: float, - llm_metrics: Dict[str, float], - reasons: str, - otlp: Dict[str, Any], - eval_data: Dict[str, Any], - ) -> Tuple[float, Dict[str, float], str]: - evaluate_document, _mod = _try_import_evaluate_document() - if evaluate_document is None: - return llm_score, dict(llm_metrics), default_feedback(llm_score, dict(llm_metrics), reasons) - - solution = eval_data.get("solution") - if solution is None: - return llm_score, dict(llm_metrics), default_feedback(llm_score, dict(llm_metrics), reasons) - - turns = eval_data.get("turns") or [] - content_type = eval_data.get("content_type") or "markdown" - - kwargs = dict(base_kwargs) - if isinstance(eval_data.get("doc_eval_kwargs"), dict): - kwargs.update(eval_data["doc_eval_kwargs"]) - - try: - result = evaluate_document( - answer, - turns=turns, - solution=solution, - content_type=content_type, - **kwargs, - ) - except Exception as e: - metrics = dict(llm_metrics) - metrics["dea.error"] = 1.0 - feedback = json.dumps( - { - "score": llm_score, - "reasons": reasons, - "metrics": metrics, - "dea_exception": repr(e), - } - ) - return llm_score, metrics, feedback - - if not isinstance(result, dict): - return llm_score, dict(llm_metrics), default_feedback(llm_score, dict(llm_metrics), reasons) - - dea_scores = result.get("dea_evaluation_scores") or {} - article_metrics = result.get("article_metrics") or {} - prometheus_scores = result.get("prometheus_scores") or {} - writehere_scores = result.get("writehere_scores") or {} - - # Keep backward compatibility: base metrics are the LLM-as-judge ones. - metrics: Dict[str, float] = dict(llm_metrics) - - # DEA metrics - if isinstance(dea_scores, Mapping): - for k, v in dea_scores.items(): - try: - metrics[f"dea.{k}"] = float(v) - except Exception: - continue - - # Article metrics (ROUGE f scores + entity recall) - if isinstance(article_metrics, Mapping): - rouge_scores = article_metrics.get("rouge_scores") or {} - if isinstance(rouge_scores, Mapping): - for name, vals in rouge_scores.items(): - if not isinstance(vals, Mapping): - continue - if "f" in vals: - try: - metrics[f"{name}_f"] = float(vals["f"]) - except Exception: - pass - if "entity_recall" in article_metrics: - try: - metrics["entity_recall"] = float(article_metrics["entity_recall"]) - except Exception: - pass - - # Enhanced metrics if enabled - if isinstance(prometheus_scores, Mapping): - for k, v in prometheus_scores.items(): - if isinstance(v, (int, float)): - metrics[f"prometheus.{k}"] = float(v) - if isinstance(writehere_scores, Mapping): - for k, v in writehere_scores.items(): - if isinstance(v, (int, float)): - metrics[f"writehere.{k}"] = float(v) - - dea_scalar: Optional[float] = None - if dea_score_key and isinstance(dea_scores, Mapping) and dea_score_key in dea_scores: - try: - dea_scalar = float(dea_scores[dea_score_key]) - except Exception: - dea_scalar = None - if dea_scalar is None and isinstance(dea_scores, Mapping): - dea_scalar = _dea_overall_from_scores(dea_scores) - if dea_scalar is None: - dea_scalar = llm_score - - final_reasons = reasons - if mode == "dea": - score = float(dea_scalar) - elif mode == "hybrid": - # Hybrid mode: Use DEA score for optimization, but enrich feedback with LLM synthesis - # The user requested "measure should be all a DEA measure" for the benchmark. - # So we return DEA score as the primary score. - score = float(dea_scalar) - if llm: - final_reasons = _synthesize_hybrid_feedback(llm, answer, reasons, dea_scores) - elif mode == "llm": - # LLM mode: Use LLM score for optimization, but include DEA metrics in the payload - # for benchmarking purposes. - score = llm_score - else: # unknown - score = llm_score - - feedback_payload: Dict[str, Any] = { - "score": score, - "reasons": final_reasons, - "metrics": metrics, - "dea_evaluation_scores": dea_scores, - "article_metrics": article_metrics, - "prometheus_scores": prometheus_scores, - "writehere_scores": writehere_scores, - # Explicitly store DEA score for benchmark extraction regardless of optimization score - "benchmark_dea_score": float(dea_scalar) - } - return score, metrics, json.dumps(feedback_payload) - - return _eval diff --git a/opto/trace/io/graph_instrumentation.py b/opto/trace/io/graph_instrumentation.py index 430c107d..812c5480 100644 --- a/opto/trace/io/graph_instrumentation.py +++ b/opto/trace/io/graph_instrumentation.py @@ -8,6 +8,7 @@ from opto.trace import bundle, node from opto.trace.bundle import FunModule +from opto.trace.io.observers import GraphObserver @dataclass @@ -27,13 +28,29 @@ class TraceGraph: output_key: Optional[str] = None backend: str = "trace" _last_sidecar: Any = field(default=None, repr=False, init=False) + observers: List[GraphObserver] = field(default_factory=list) + _last_observer_artifacts: List[Any] = field(default_factory=list, init=False, repr=False) def invoke(self, state: Any, **kwargs: Any) -> Any: - if hasattr(self.graph, "invoke_runtime"): - result, sidecar = self.graph.invoke_runtime(state, backend="trace", **kwargs) - self._last_sidecar = sidecar + for obs in self.observers: + obs.start(bindings=self.bindings, meta={"service_name": self.service_name}) + + result = None + error = None + try: + if hasattr(self.graph, "invoke_runtime"): + result, sidecar = self.graph.invoke_runtime(state, backend="trace", **kwargs) + self._last_sidecar = sidecar + return result + result = self.graph.invoke(state, **kwargs) return result - return self.graph.invoke(state, **kwargs) + except BaseException as exc: + error = exc + raise + finally: + self._last_observer_artifacts = [] + for obs in reversed(self.observers): + self._last_observer_artifacts.append(obs.stop(result=result, error=error)) def stream(self, state: Any, **kwargs: Any): yield from self.graph.stream(state, **kwargs) diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py index b533ebba..944ed41b 100644 --- a/opto/trace/io/instrumentation.py +++ b/opto/trace/io/instrumentation.py @@ -18,6 +18,8 @@ from opto.trace.io.bindings import Binding, make_dict_binding from opto.trace.io.graph_instrumentation import instrument_trace_graph from opto.trace.io.langgraph_otel_runtime import TracingLLM +from opto.trace.io.observers import GraphObserver, OTelObserver +from opto.trace.io.sysmonitoring import SysMonObserver, SysMonitoringSession from opto.trace.io.telemetry_session import TelemetrySession logger = logging.getLogger(__name__) @@ -51,6 +53,8 @@ class InstrumentedGraph: service_name: str = "langgraph-agent" input_key: str = "query" output_key: Optional[str] = None + observers: List[GraphObserver] = field(default_factory=list) + _last_observer_artifacts: List[Any] = field(default_factory=list, init=False, repr=False) # Holds the active root span context for eval_fn to attach reward spans _root_span: Any = field(default=None, repr=False, init=False) @@ -84,15 +88,28 @@ def invoke(self, state: Any, **kwargs: Any) -> Dict[str, Any]: if isinstance(state, dict): query_hint = str(state.get(self.input_key, "")) - with self._root_invocation_span(query_hint) as root_sp: - result = self.graph.invoke(state, **kwargs) - # Attach a summary attribute to the root span (generic) - if isinstance(result, dict) and self.output_key and self.output_key in result: - root_sp.set_attribute( - "langgraph.output.preview", - str(result[self.output_key])[:500], - ) - return result + for obs in self.observers: + obs.start(bindings=self.bindings, meta={"service_name": self.service_name}) + + result = None + error = None + try: + with self._root_invocation_span(query_hint) as root_sp: + result = self.graph.invoke(state, **kwargs) + # Attach a summary attribute to the root span (generic) + if isinstance(result, dict) and self.output_key and self.output_key in result: + root_sp.set_attribute( + "langgraph.output.preview", + str(result[self.output_key])[:500], + ) + return result + except BaseException as exc: + error = exc + raise + finally: + self._last_observer_artifacts = [] + for obs in reversed(self.observers): + self._last_observer_artifacts.append(obs.stop(result=result, error=error)) def stream(self, state: Any, **kwargs: Any) -> Iterator[Dict[str, Any]]: """Stream graph execution with telemetry.""" @@ -104,11 +121,56 @@ def stream(self, state: Any, **kwargs: Any) -> Iterator[Dict[str, Any]]: yield from self.graph.stream(state, **kwargs) +@dataclass +class SysMonInstrumentedGraph: + graph: Any + session: SysMonitoringSession + bindings: Dict[str, Binding] = field(default_factory=dict) + service_name: str = "langgraph-sysmon" + input_key: str = "query" + output_key: Optional[str] = None + backend: str = "sysmon" + _last_profile_doc: Optional[dict] = field(default=None, init=False, repr=False) + + def invoke(self, state: Any, **kwargs: Any): + self.session.start(bindings=self.bindings) + result = None + error = None + try: + result = self.graph.invoke(state, **kwargs) + return result + except BaseException as exc: + error = exc + raise + finally: + self._last_profile_doc = self.session.stop(result=result, error=error) + + def stream(self, state: Any, **kwargs: Any): + raise NotImplementedError("SysMonInstrumentedGraph.stream is not implemented") + + +def _make_observers( + observe_with: tuple[str, ...], + *, + service_name: str, +) -> List[GraphObserver]: + observers: List[GraphObserver] = [] + for name in observe_with: + if name == "otel": + observers.append(OTelObserver(service_name=f"{service_name}-otel-observer")) + elif name == "sysmon": + observers.append(SysMonObserver(SysMonitoringSession(service_name=f"{service_name}-sysmon-observer"))) + else: + raise ValueError(f"Unsupported observer: {name!r}") + return observers + + def instrument_graph( graph: Any = None, *, adapter: Optional[Any] = None, backend: str = "otel", + observe_with: tuple[str, ...] = (), graph_factory: Optional[Callable[[], Any]] = None, scope: Optional[Dict[str, Any]] = None, graph_agents_functions: Optional[List[str]] = None, @@ -180,7 +242,7 @@ def instrument_graph( if adapter is not None: if GraphAdapter is not None and not isinstance(adapter, GraphAdapter): raise TypeError("adapter must be an instance of GraphAdapter") - return adapter.instrument( + out = adapter.instrument( backend=backend, service_name=service_name, input_key=input_key, @@ -196,9 +258,12 @@ def instrument_graph( provider_name=provider_name, llm_span_name=llm_span_name, ) + if hasattr(out, "observers"): + out.observers = _make_observers(observe_with, service_name=service_name) + return out if GraphAdapter is not None and isinstance(graph, GraphAdapter): - return graph.instrument( + out = graph.instrument( backend=backend, service_name=service_name, input_key=input_key, @@ -214,6 +279,9 @@ def instrument_graph( provider_name=provider_name, llm_span_name=llm_span_name, ) + if hasattr(out, "observers"): + out.observers = _make_observers(observe_with, service_name=service_name) + return out if backend == "trace": if graph_factory is None: @@ -223,7 +291,7 @@ def instrument_graph( raise ValueError( "backend='trace' requires graph_factory or a callable graph" ) - return instrument_trace_graph( + out = instrument_trace_graph( graph_factory, scope=scope, graph_agents_functions=list(graph_agents_functions or []), @@ -233,9 +301,34 @@ def instrument_graph( input_key=input_key, output_key=output_key, ) + out.observers = _make_observers(observe_with, service_name=service_name) + return out + + if backend == "sysmon": + if observe_with: + raise ValueError("observe_with is not supported when backend='sysmon'") + compiled = graph + if graph is not None and hasattr(graph, "compile"): + compiled = graph.compile() + templates = dict(initial_templates or {}) + if bindings is None: + bindings = {} + for key in templates: + bindings[key] = make_dict_binding(templates, key, kind="prompt") + return SysMonInstrumentedGraph( + graph=compiled, + session=SysMonitoringSession(service_name=service_name), + bindings=bindings, + service_name=service_name, + input_key=input_key, + output_key=output_key, + ) if backend != "otel": - raise ValueError("Unsupported backend. Expected 'otel' or 'trace'.") + raise ValueError("Unsupported backend. Expected 'otel', 'trace', or 'sysmon'.") + + if "otel" in observe_with: + raise ValueError("observe_with=('otel', ...) is invalid when backend='otel'") # -- compile graph if needed -- compiled = graph @@ -301,4 +394,5 @@ def _emit_code_param(span, code_key: str, code_fn: Any) -> None: service_name=service_name, input_key=input_key, output_key=output_key, + observers=_make_observers(observe_with, service_name=service_name), ) diff --git a/opto/trace/io/observers.py b/opto/trace/io/observers.py new file mode 100644 index 00000000..1cf3d3ae --- /dev/null +++ b/opto/trace/io/observers.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Dict, Optional, Protocol + +from opto.trace.io.telemetry_session import TelemetrySession + + +@dataclass +class ObserverArtifact: + carrier: str + raw: Any + profile_doc: Optional[Dict[str, Any]] = None + + +class GraphObserver(Protocol): + name: str + + def start( + self, + *, + bindings: Dict[str, Any], + meta: Optional[Dict[str, Any]] = None, + ) -> None: + ... + + def stop( + self, + *, + result: Any = None, + error: BaseException | None = None, + ) -> ObserverArtifact: + ... + + +class OTelObserver: + """Passive OTEL observer for a non-OTEL primary run.""" + + name = "otel" + + def __init__( + self, + session: Optional[TelemetrySession] = None, + *, + service_name: str = "langgraph-otel-observer", + ) -> None: + self.session = session or TelemetrySession(service_name=service_name) + self._ctx = None + + def start( + self, + *, + bindings: Dict[str, Any], + meta: Optional[Dict[str, Any]] = None, + ) -> None: + self._ctx = self.session.activate() + self._ctx.__enter__() + + def stop( + self, + *, + result: Any = None, + error: BaseException | None = None, + ) -> ObserverArtifact: + try: + otlp = self.session.flush_otlp(clear=True) + finally: + if self._ctx is not None: + self._ctx.__exit__(None, None, None) + self._ctx = None + return ObserverArtifact(carrier="otel", raw=otlp, profile_doc=None) diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index db97ddd6..e5a3ea04 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -28,7 +28,8 @@ from opto.trace.io.bindings import Binding, apply_updates from opto.trace.io.graph_instrumentation import TraceGraph -from opto.trace.io.instrumentation import InstrumentedGraph +from opto.trace.io.instrumentation import InstrumentedGraph, SysMonInstrumentedGraph +from opto.trace.io.sysmonitoring import sysmon_profile_to_tgj logger = logging.getLogger(__name__) @@ -328,6 +329,19 @@ def optimize_graph( output_key=output_key, on_iteration=on_iteration, ) + if getattr(graph, "backend", None) == "sysmon": + return _optimize_sysmon_graph( + graph, + queries=queries, + iterations=iterations, + optimizer=optimizer, + optimizer_kwargs=optimizer_kwargs, + eval_fn=eval_fn, + bindings=bindings, + apply_updates_flag=apply_updates_flag, + output_key=output_key, + on_iteration=on_iteration, + ) # Resolve bindings / templates effective_bindings = bindings or graph.bindings @@ -734,3 +748,120 @@ def _extract_output(result: Any, sidecar: Any = None) -> Tuple[Any, Any]: score_history=score_history, all_runs=all_runs, ) + + +def _optimize_sysmon_graph( + graph: SysMonInstrumentedGraph, + *, + queries: Union[List[str], List[Dict[str, Any]]], + iterations: int = 5, + optimizer: Optional[Any] = None, + optimizer_kwargs: Optional[Dict[str, Any]] = None, + eval_fn: Optional[EvalFn] = None, + bindings: Optional[Dict[str, Binding]] = None, + apply_updates_flag: bool = True, + output_key: Optional[str] = None, + on_iteration: Optional[Callable[[int, List[RunResult], Dict[str, Any]], None]] = None, +) -> OptimizationResult: + from opto.optimizers.optoprime_v2 import OptoPrimeV2 + from opto.trace.io.tgj_ingest import ingest_tgj + from opto.trace.io.tgj_ingest import merge_tgj + + effective_bindings = bindings or graph.bindings + if eval_fn is None: + raise ValueError("backend='sysmon' requires an explicit eval_fn") + + def _snapshot_parameters_from_bindings(bindings_dict: Dict[str, Binding]) -> Dict[str, Any]: + return {k: b.get() for k, b in bindings_dict.items()} + + score_history: List[float] = [] + all_runs: List[List[RunResult]] = [] + best_score = float("-inf") + best_iteration = 0 + best_updates: Dict[str, Any] = {} + best_parameters = _snapshot_parameters_from_bindings(effective_bindings) + optimizer_instance = optimizer + + for iteration in range(iterations + 1): + docs = [] + runs: List[RunResult] = [] + update_dict: Dict[str, Any] = {} + + for qi, query in enumerate(queries): + state = query if isinstance(query, dict) else {graph.input_key: query} + result = graph.invoke(state) + answer = result.get(output_key, result) if (output_key and isinstance(result, dict)) else result + er = _normalise_eval( + eval_fn( + { + "query": query, + "answer": answer, + "result": result, + "iteration": iteration, + } + ) + ) + runs.append( + RunResult( + answer=answer, + score=er.score, + feedback=er.feedback, + metrics=er.metrics, + otlp={}, + ) + ) + docs.append( + sysmon_profile_to_tgj( + graph._last_profile_doc or {}, + run_id=f"{graph.service_name}:{iteration}:{qi}", + graph_id="sysmon-graph", + scope=f"{graph.service_name}/0", + ) + ) + + merged_doc = merge_tgj(docs) if len(docs) > 1 else docs[0] + nodes = ingest_tgj(merged_doc) + + avg_score = sum((r.score or 0.0) for r in runs) / max(1, len(runs)) + score_history.append(avg_score) + all_runs.append(runs) + + if avg_score > best_score: + best_score = avg_score + best_iteration = iteration + best_parameters = _snapshot_parameters_from_bindings(effective_bindings) + + if iteration > 0: + output_node = _select_output_node(nodes) + if optimizer_instance is None: + trainable_params = [n for n in nodes.values() if getattr(n, "trainable", False)] + optimizer_instance = OptoPrimeV2(parameters=trainable_params, **dict(optimizer_kwargs or {})) + optimizer_instance.zero_feedback() + optimizer_instance.backward(output_node, runs[-1].feedback or f"Score: {runs[-1].score}") + raw_updates = optimizer_instance.step() + if isinstance(raw_updates, dict): + for key, value in raw_updates.items(): + if isinstance(key, str): + update_dict[key] = value + else: + name = getattr(key, "name", None) or getattr(key, "py_name", None) + if name: + update_dict[str(name)] = value + if update_dict and apply_updates_flag: + applied = apply_updates(update_dict, effective_bindings, strict=False) + if avg_score >= best_score: + best_updates = dict(applied) + + if on_iteration: + on_iteration(iteration, runs, update_dict) + + return OptimizationResult( + baseline_score=score_history[0], + best_score=best_score, + best_iteration=best_iteration, + best_parameters=best_parameters, + best_updates=best_updates, + final_parameters=_snapshot_parameters_from_bindings(effective_bindings), + score_history=score_history, + all_runs=all_runs, + ) diff --git a/opto/trace/io/sysmonitoring.py b/opto/trace/io/sysmonitoring.py new file mode 100644 index 00000000..78deb67d --- /dev/null +++ b/opto/trace/io/sysmonitoring.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import sys +import threading +import time +import uuid +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +from opto.trace.io.observers import ObserverArtifact + + +@dataclass +class SysMonEvent: + id: str + parent_id: str | None + name: str + filename: str + lineno: int + start_ns: int + end_ns: int | None = None + duration_ns: int | None = None + return_preview: str | None = None + thread_id: int | None = None + + +class SysMonitoringSession: + """Small execution observer built on Python's sys.monitoring API.""" + + def __init__(self, tool_id: int = 7, service_name: str = "langgraph-sysmon") -> None: + if not hasattr(sys, "monitoring"): + raise RuntimeError("sys.monitoring is unavailable on this Python runtime") + self.tool_id = tool_id + self.service_name = service_name + self._events: List[SysMonEvent] = [] + self._tls = threading.local() + self._bindings_snapshot: Dict[str, Dict[str, Any]] = {} + + def _stack(self) -> List[SysMonEvent]: + if not hasattr(self._tls, "stack"): + self._tls.stack = [] + return self._tls.stack + + def start(self, *, bindings: Dict[str, Any]) -> None: + self._events.clear() + self._bindings_snapshot = { + k: {"value": b.get(), "kind": b.kind, "trainable": True} + for k, b in (bindings or {}).items() + } + + def on_start(code, instruction_offset): + stack = self._stack() + eid = uuid.uuid4().hex[:16] + ev = SysMonEvent( + id=eid, + parent_id=stack[-1].id if stack else None, + name=code.co_name, + filename=code.co_filename, + lineno=code.co_firstlineno, + start_ns=time.perf_counter_ns(), + thread_id=threading.get_ident(), + ) + stack.append(ev) + self._events.append(ev) + + def on_return(code, instruction_offset, retval): + stack = self._stack() + if not stack: + return + ev = stack.pop() + ev.end_ns = time.perf_counter_ns() + ev.duration_ns = ev.end_ns - ev.start_ns + ev.return_preview = repr(retval)[:200] + + def on_unwind(code, instruction_offset, exc): + stack = self._stack() + if not stack: + return + ev = stack.pop() + ev.end_ns = time.perf_counter_ns() + ev.duration_ns = ev.end_ns - ev.start_ns + ev.return_preview = f"[UNWIND] {type(exc).__name__}: {exc}" + + self._on_start = on_start + self._on_return = on_return + self._on_unwind = on_unwind + + sys.monitoring.use_tool_id(self.tool_id, self.service_name) + sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_START, on_start) + sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_RETURN, on_return) + sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_UNWIND, on_unwind) + sys.monitoring.set_events( + self.tool_id, + sys.monitoring.events.PY_START + | sys.monitoring.events.PY_RETURN + | sys.monitoring.events.PY_UNWIND, + ) + + def stop(self, *, result: Any = None, error: BaseException | None = None) -> Dict[str, Any]: + try: + sys.monitoring.set_events(self.tool_id, 0) + sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_START, None) + sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_RETURN, None) + sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_UNWIND, None) + finally: + free_tool = getattr(sys.monitoring, "free_tool_id", None) + if callable(free_tool): + try: + free_tool(self.tool_id) + except Exception: + pass + else: + clear_tool = getattr(sys.monitoring, "clear_tool_id", None) + if callable(clear_tool): + try: + clear_tool(self.tool_id) + except Exception: + pass + + return { + "version": "trace-json/1.0+sysmon", + "agent": {"id": self.service_name}, + "bindings": self._bindings_snapshot, + "events": [ + { + "id": ev.id, + "parent_id": ev.parent_id, + "name": ev.name, + "file": ev.filename, + "lineno": ev.lineno, + "start_ns": ev.start_ns, + "end_ns": ev.end_ns, + "duration_ns": ev.duration_ns, + "return_preview": ev.return_preview, + "thread_id": ev.thread_id, + } + for ev in self._events + ], + "result_preview": repr(result)[:200] if result is not None else None, + "error": repr(error)[:200] if error else None, + } + + +class SysMonObserver: + name = "sysmon" + + def __init__(self, session: Optional[SysMonitoringSession] = None) -> None: + self.session = session or SysMonitoringSession() + + def start( + self, + *, + bindings: Dict[str, Any], + meta: Optional[Dict[str, Any]] = None, + ) -> None: + self.session.start(bindings=bindings) + + def stop( + self, + *, + result: Any = None, + error: BaseException | None = None, + ) -> ObserverArtifact: + doc = self.session.stop(result=result, error=error) + return ObserverArtifact(carrier="sysmon", raw=doc, profile_doc=doc) + + +def sysmon_profile_to_tgj( + doc: Dict[str, Any], + *, + run_id: str, + graph_id: str, + scope: str, +) -> Dict[str, Any]: + """Convert a simple sys.monitoring profile document into TGJ 1.0.""" + nodes = {} + + for pname, spec in (doc.get("bindings") or {}).items(): + nodes[f"param:{pname}"] = { + "id": f"param:{pname}", + "kind": "parameter", + "name": pname, + "value": spec["value"], + "trainable": spec.get("trainable", True), + "description": f"[{spec.get('kind', 'prompt')}]", + } + + for ev in doc.get("events", []): + inputs = {} + if ev.get("parent_id"): + inputs["parent"] = f"message:msg:{ev['parent_id']}" + nodes[f"msg:{ev['id']}"] = { + "id": f"msg:{ev['id']}", + "kind": "message", + "name": ev["name"], + "description": f"[sysmon] {ev['file']}:{ev['lineno']}", + "inputs": inputs, + "output": { + "name": f"{ev['name']}:out", + "value": ev.get("return_preview"), + }, + "info": { + "sysmon": { + "duration_ns": ev.get("duration_ns"), + "thread_id": ev.get("thread_id"), + } + }, + } + + return { + "tgj": "1.0", + "run_id": run_id, + "agent_id": (doc.get("agent") or {}).get("id", "agent"), + "graph_id": graph_id, + "scope": scope, + "nodes": nodes, + } diff --git a/tests/features_tests/test_langgraph_notebooks.py b/tests/features_tests/test_langgraph_notebooks.py index aafa5daf..b1bcf584 100644 --- a/tests/features_tests/test_langgraph_notebooks.py +++ b/tests/features_tests/test_langgraph_notebooks.py @@ -13,6 +13,7 @@ [ "examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb", "examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb", + "examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb", ], ) def test_notebook_executes(notebook_path): diff --git a/tests/features_tests/test_sysmon_backend.py b/tests/features_tests/test_sysmon_backend.py new file mode 100644 index 00000000..b1fdcbe3 --- /dev/null +++ b/tests/features_tests/test_sysmon_backend.py @@ -0,0 +1,62 @@ +import sys +import pytest + +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END + +from opto.trace.io import instrument_graph, optimize_graph, SysMonInstrumentedGraph + + +pytestmark = pytest.mark.skipif(not hasattr(sys, "monitoring"), reason="sys.monitoring unavailable") + + +def build_graph(): + def planner(state): + return {"plan": f"plan::{state['query']}"} + + def synth(state): + return {"final_answer": f"answer::{state['query']}::{state['plan']}"} + + graph = StateGraph(dict) + graph.add_node("planner", planner) + graph.add_node("synth", synth) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + +def test_sysmon_backend_invoke_exports_profile_doc(): + ig = instrument_graph( + graph=build_graph(), + backend="sysmon", + initial_templates={"planner_prompt": "Plan {query}"}, + output_key="final_answer", + ) + assert isinstance(ig, SysMonInstrumentedGraph) + out = ig.invoke({"query": "What is CRISPR?"}) + assert "final_answer" in out + assert ig._last_profile_doc["version"] == "trace-json/1.0+sysmon" + assert len(ig._last_profile_doc["events"]) > 0 + + +def test_sysmon_backend_optimize_baseline_only(): + ig = instrument_graph( + graph=build_graph(), + backend="sysmon", + initial_templates={"planner_prompt": "Plan {query}"}, + output_key="final_answer", + ) + result = optimize_graph( + ig, + queries=["What is CRISPR?"], + iterations=0, + eval_fn=lambda payload: { + "score": 1.0 if "CRISPR" in str(payload["answer"]) else 0.0, + "feedback": "Keep CRISPR in the answer.", + }, + ) + assert result.best_iteration == 0 + assert result.best_score == 1.0 diff --git a/tests/unit_tests/test_graph_observers.py b/tests/unit_tests/test_graph_observers.py new file mode 100644 index 00000000..5e8dd677 --- /dev/null +++ b/tests/unit_tests/test_graph_observers.py @@ -0,0 +1,139 @@ +import sys +import pytest + +langgraph = pytest.importorskip("langgraph.graph") +StateGraph = langgraph.StateGraph +START = langgraph.START +END = langgraph.END + +from opto.trace import node +from opto.trace.io import instrument_graph, TraceGraph, InstrumentedGraph + + +def _raw(x): + return getattr(x, "data", x) + + +def _make_trace_graph(): + planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") + synth_prompt = node("Answer: {query} :: {plan}", trainable=True, name="synth_prompt") + + scope = {} + + def planner_node(state): + query = _raw(state["query"]) + return {"plan": planner_prompt.data.replace("{query}", str(query))} + + def synth_node(state): + query = _raw(state["query"]) + plan = _raw(state["plan"]) + answer = synth_prompt.data.replace("{query}", str(query)).replace("{plan}", str(plan)) + return {"final_answer": node(answer, name="final_answer_node")} + + scope.update( + { + "planner_prompt": planner_prompt, + "synth_prompt": synth_prompt, + "planner_node": planner_node, + "synth_node": synth_node, + } + ) + + def build_graph(): + graph = StateGraph(dict) + graph.add_node("planner", scope["planner_node"]) + graph.add_node("synth", scope["synth_node"]) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + return build_graph, scope + + +class _StubLLM: + model = "stub" + + def __call__(self, messages=None, **kwargs): + class Msg: + content = "stub-response" + + class Choice: + message = Msg() + + class Resp: + choices = [Choice()] + + return Resp() + + +def test_trace_backend_accepts_sysmon_observer(): + if not hasattr(sys, "monitoring"): + pytest.skip("sys.monitoring unavailable") + build_graph, scope = _make_trace_graph() + graph = instrument_graph( + backend="trace", + observe_with=("sysmon",), + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[scope["planner_prompt"], scope["synth_prompt"]], + output_key="final_answer", + ) + assert isinstance(graph, TraceGraph) + out = graph.invoke({"query": "What is CRISPR?"}) + assert "final_answer" in out + assert len(graph._last_observer_artifacts) == 1 + art = graph._last_observer_artifacts[0] + assert art.carrier == "sysmon" + assert art.profile_doc["version"] == "trace-json/1.0+sysmon" + + +def test_trace_backend_accepts_otel_and_sysmon_observers(): + if not hasattr(sys, "monitoring"): + pytest.skip("sys.monitoring unavailable") + build_graph, scope = _make_trace_graph() + graph = instrument_graph( + backend="trace", + observe_with=("otel", "sysmon"), + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[scope["planner_prompt"], scope["synth_prompt"]], + output_key="final_answer", + ) + out = graph.invoke({"query": "What is CRISPR?"}) + assert "final_answer" in out + carriers = [a.carrier for a in graph._last_observer_artifacts] + assert carriers == ["sysmon", "otel"] + + +def test_otel_backend_rejects_otel_observer(): + with pytest.raises(ValueError, match="invalid"): + instrument_graph( + graph=None, + backend="otel", + observe_with=("otel",), + llm=_StubLLM(), + ) + + +def test_otel_backend_accepts_sysmon_observer(): + if not hasattr(sys, "monitoring"): + pytest.skip("sys.monitoring unavailable") + class Graph: + def invoke(self, state, **kwargs): + return {"answer": "ok"} + ig = instrument_graph( + graph=Graph(), + backend="otel", + observe_with=("sysmon",), + llm=_StubLLM(), + initial_templates={"prompt_a": "A"}, + output_key="answer", + ) + assert isinstance(ig, InstrumentedGraph) + out = ig.invoke({"query": "hi"}) + assert out["answer"] == "ok" + assert len(ig._last_observer_artifacts) == 1 + assert ig._last_observer_artifacts[0].carrier == "sysmon" From c60d6df4f00e83f41e534fdeaef37f1a13663f2d Mon Sep 17 00:00:00 2001 From: doxav Date: Mon, 20 Apr 2026 16:47:06 +0200 Subject: [PATCH 05/16] improved traces/monitoring comparison --- ...aph_instrument_and_compare_observers.ipynb | 919 +++++++----------- ...ggraph_instrument_and_compare_observers.py | 632 +++++++++--- ...mo_langgraph_instrument_and_optimize.ipynb | 561 ++++------- ...ggraph_instrument_and_optimize_trace.ipynb | 102 +- opto/features/graph/__init__.py | 12 + opto/{trace => features}/graph/adapter.py | 21 +- .../graph}/graph_instrumentation.py | 36 +- opto/{trace => features}/graph/module.py | 2 +- opto/{trace => features}/graph/sidecars.py | 0 opto/trace/bundle.py | 76 +- opto/trace/graph/__init__.py | 12 - opto/trace/io/__init__.py | 4 +- opto/trace/io/instrumentation.py | 4 +- opto/trace/io/langgraph_otel_runtime.py | 48 + opto/trace/io/optimization.py | 11 +- opto/trace/io/sysmonitoring.py | 47 +- opto/trace/io/telemetry_session.py | 8 + .../test_graph_module_prioritysearch.py | 2 +- .../features_tests/test_graph_module_train.py | 2 +- tests/features_tests/test_sysmon_backend.py | 82 +- .../test_graph_adapter_modulecandidate.py | 2 +- tests/unit_tests/test_graph_adapter_trace.py | 27 +- tests/unit_tests/test_graph_observers.py | 5 +- .../unit_tests/test_langgraph_otel_runtime.py | 37 + 24 files changed, 1501 insertions(+), 1151 deletions(-) create mode 100644 opto/features/graph/__init__.py rename opto/{trace => features}/graph/adapter.py (94%) rename opto/{trace/io => features/graph}/graph_instrumentation.py (80%) rename opto/{trace => features}/graph/module.py (94%) rename opto/{trace => features}/graph/sidecars.py (100%) delete mode 100644 opto/trace/graph/__init__.py diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb index 5401b7ed..b60268bb 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -2,570 +2,389 @@ "cells": [ { "cell_type": "markdown", - "id": "9ce01f9c", - "metadata": {}, + "id": "intro-live-compare", + "metadata": { + "language": "markdown" + }, "source": [ - "# LangGraph Instrumentation Backends Comparison\n", + "# LangGraph live optimization comparison across Trace / OTEL / sys.monitoring\n", "\n", - "This notebook demonstrates using `instrument_graph()` with different observation backends:\n", + "This notebook runs the live comparison script.\n", "\n", - "- **trace** - Uses the opto trace system\n", - "- **trace + otel** - Trace system with OpenTelemetry observation\n", - "- **trace + sysmon** - Trace system with Python 3.12+ sys.monitoring\n", - "- **trace + otel + sysmon** - Trace with both observers\n", - "- **otel** - Pure OpenTelemetry instrumentation \n", - "- **otel + sysmon** - OpenTelemetry with sys.monitoring\n", - "- **sysmon** - Pure sys.monitoring (Python 3.12+)\n", - "\n", - "Each backend provides different tracing and profiling capabilities." + "- It uses the OpenRouter API when `OPENROUTER_API_KEY` is set.\n", + "- It defaults to `OPENROUTER_MODEL=gpt-4o-mini` when the model env var is absent.\n", + "- In CI or local runs without credentials, it prints a skip message and exits successfully.\n" ] }, { "cell_type": "code", - "execution_count": null, - "id": "3e603173", - "metadata": {}, - "outputs": [], + "execution_count": 1, + "id": "run-live-compare-script", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-19T20:13:07.481092Z", + "iopub.status.busy": "2026-04-19T20:13:07.480782Z", + "iopub.status.idle": "2026-04-19T20:24:26.123314Z", + "shell.execute_reply": "2026-04-19T20:24:26.122779Z" + }, + "language": "python" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "LangGraph live optimization comparison\n", + "================================================================================\n", + "Python 3.13\n", + "sys.monitoring available: True\n", + "OPENROUTER_MODEL=gpt-4o-mini\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Running baseline...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=0.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", + " Baseline average: 0.0000\n", + " Iteration 1/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=0.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", + " Iteration 1 average: 0.0000\n", + " Iteration 2/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 2 average: 1.0000 * NEW BEST\n", + " Iteration 3/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 3 average: 1.0000\n", + " Iteration 4/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 4 average: 1.0000\n", + " Iteration 5/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 5 average: 1.0000\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Running baseline...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=0.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", + " Baseline average: 0.0000\n", + " Iteration 1/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=0.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", + " Iteration 1 average: 0.0000\n", + " Iteration 2/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 2 average: 1.0000 * NEW BEST\n", + " Iteration 3/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 3 average: 1.0000\n", + " Iteration 4/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 4 average: 1.0000\n", + " Iteration 5/5...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/2: What is CRISPR?... score=1.0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", + " Iteration 5 average: 1.0000\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Optimization comparison (5 iterations)\n", + "\n", + "| config | score_history | best_iteration | observers |\n", + "|---|---|---:|---|\n", + "| trace | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | - |\n", + "| trace+otel | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | otel |\n", + "| otel | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | - |\n", + "| trace+sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | sysmon |\n", + "| trace+otel+sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | sysmon,otel |\n", + "| otel+sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | sysmon |\n", + "| sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | - |\n", + "\n", + "Binding / update inspection\n", + "\n", + "## trace\n", + "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", + "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", + "Plan: {plan}\n", + "answer_preview: [BENCH_OK] \n", + "\n", + "### What is CRISPR?\n", + "\n", + "#### 1. Introduction to CRISPR\n", + "- **Definition**: CRISPR stands for Clustered Regularly\n", + "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", + "\n", + "## trace+otel\n", + "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", + "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", + "Plan: {plan}\n", + "answer_preview: [BENCH_OK] \n", + "\n", + "### What is CRISPR?\n", + "\n", + "#### 1. Introduction to CRISPR\n", + "- **Definition**: CRISPR stands for Clustered Regularly\n", + "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", + "otel_summary: {'span_count': 0, 'span_names': [], 'param_keys': [], 'message_names': []}\n", + "\n", + "## otel\n", + "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", + "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", + "Plan: {plan}\n", + "answer_preview: [BENCH_OK] \n", + "\n", + "### What is CRISPR?\n", + "\n", + "#### 1. Introduction to CRISPR\n", + "- **Definition**: CRISPR, which stands for Clustered Re\n", + "\n", + "## trace+sysmon\n", + "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", + "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", + "Plan: {plan}\n", + "answer_preview: [BENCH_OK] \n", + "\n", + "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a groundbreaking gen\n", + "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", + "sysmon_summary: {'event_count': 9665, 'tgj_node_count': 9665, 'message_names': ['', '', '', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__deepcopy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__new__', '__newobj__', '__post_init__', '__reduce__', '__setattr__', '__setitem__', '__setstate__', '__str__', '__subclasscheck__', '_add_dependencies', '_add_filter', '_add_parent', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_bind', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_caller', '_checkClosed', '_check_class', '_check_frozen', '_check_instance', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_deepcopy_dict', '_deepcopy_list', '_deepcopy_tuple', '_defaults', '_emit', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_root', '_get_tracer_project', '_handle_fromlist', '_has_code_flag', '_has_coroutine_mark', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_owned', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_raw', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_signature_from_callable', '_signature_from_function', '_slotnames', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_unwrap_partial', '_unwrap_partialmethod', '_update_level', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_wrap_inputs', '_write', '_xxhash_str', 'acquire', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'annotation', 'apply_defaults', 'apply_writes', 'as_dict', 'assign_to_connection', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'bind', 'build_request', 'bytesify', 'can_handle_request', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'contain', 'content', 'cookies', 'copy', 'copy_with', 'count', 'create', 'create_checkpoint', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default', 'default_headers', 'default_query', 'detach', 'detach_inputs', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'env_var_is_set', 'extract_cookies', 'extract_param', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'forward', 'from_callable', 'from_checkpoint', 'full_url', 'fun', 'func', 'get', 'get_all', 'get_annotations', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current_run_tree', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_op_name', 'get_origin', 'get_tracing_context', 'getattr_static', 'getfullargspec', 'getitem', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'hidden_dependencies', 'host', 'http_version', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_relative_url', 'is_required', 'is_sequence_type', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid_field_name', 'isclass', 'iscoroutinefunction', 'isfunction', 'ismethod', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'keys', 'kind', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'multi_items', 'name', 'netloc', 'next_event', 'node', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'origin', 'our_state', 'output_writes', 'override', 'parameters', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner_node', 'platform_headers', 'pop', 'port', 'post', 'postprocess_output', 'prepare_next_tasks', 'prepare_single_task', 'preprocess_inputs', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'recursive_conversion', 'recv', 'register', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'return_annotation', 'run_with_retry', 'search', 'send', 'send_data', 'send_eom', 'send_with_data_passthrough', 'set', 'set_config_context', 'set_handlers', 'setdefault', 'shutdown', 'signature', 'smart_deepcopy', 'start_next_cycle', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'sync_call_fun', 'sync_forward', 'synth_node', 'task_path_str', 'their_state', 'tick', 'to_data', 'to_httpx_files', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'user_agent', 'username', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrap', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': []}\n", + "\n", + "## trace+otel+sysmon\n", + "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", + "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", + "Plan: {plan}\n", + "answer_preview: [BENCH_OK] \n", + "\n", + "### What is CRISPR?\n", + "\n", + "#### 1. Introduction to CRISPR\n", + "- **Definition**: CRISPR stands for Clustered Regularly\n", + "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", + "otel_summary: {'span_count': 0, 'span_names': [], 'param_keys': [], 'message_names': []}\n", + "sysmon_summary: {'event_count': 9744, 'tgj_node_count': 9744, 'message_names': ['', '', '', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__deepcopy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__new__', '__newobj__', '__post_init__', '__reduce__', '__setattr__', '__setitem__', '__setstate__', '__str__', '__subclasscheck__', '_add_dependencies', '_add_filter', '_add_parent', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_bind', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_call_soon', '_caller', '_checkClosed', '_check_class', '_check_closed', '_check_frozen', '_check_instance', '_clean_thread_parent_frames', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_deepcopy_dict', '_deepcopy_list', '_deepcopy_tuple', '_defaults', '_emit', '_event_pipe_gc', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_root', '_get_tracer_project', '_handle_fromlist', '_has_code_flag', '_has_coroutine_mark', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_owned', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_events', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_raw', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_run', '_run_once', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_result_unless_cancelled', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_signature_from_callable', '_signature_from_function', '_slotnames', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_timer_handle_cancelled', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_unwrap_partial', '_unwrap_partialmethod', '_update_level', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_wrap_inputs', '_write', '_xxhash_str', 'acquire', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'annotation', 'apply_defaults', 'apply_writes', 'as_dict', 'assign_to_connection', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'bind', 'build_request', 'bytesify', 'call_at', 'call_later', 'call_soon', 'can_handle_request', 'cancel', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'contain', 'content', 'cookies', 'copy', 'copy_with', 'count', 'create', 'create_checkpoint', 'create_future', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default', 'default_headers', 'default_query', 'detach', 'detach_inputs', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'enumerate', 'env_var_is_set', 'extract_cookies', 'extract_param', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'forward', 'from_callable', 'from_checkpoint', 'full_url', 'fun', 'func', 'get', 'get_all', 'get_annotations', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current_run_tree', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_op_name', 'get_origin', 'get_tracing_context', 'getattr_static', 'getfullargspec', 'getitem', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'hidden_dependencies', 'host', 'http_version', 'ident', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_alive', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_relative_url', 'is_required', 'is_sequence_type', 'is_set', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid_field_name', 'isclass', 'iscoroutinefunction', 'isfunction', 'ismethod', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'keys', 'kind', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'multi_items', 'name', 'netloc', 'next_event', 'node', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'origin', 'our_state', 'output_writes', 'override', 'parameters', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner_node', 'platform_headers', 'pop', 'port', 'post', 'postprocess_output', 'prepare_next_tasks', 'prepare_single_task', 'preprocess_inputs', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'recursive_conversion', 'recv', 'register', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'return_annotation', 'run_with_retry', 'search', 'select', 'send', 'send_data', 'send_eom', 'send_with_data_passthrough', 'set', 'set_config_context', 'set_handlers', 'setdefault', 'shutdown', 'signature', 'sleep', 'smart_deepcopy', 'start_next_cycle', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'sync_call_fun', 'sync_forward', 'synth_node', 'task_path_str', 'their_state', 'tick', 'time', 'to_data', 'to_httpx_files', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'user_agent', 'username', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrap', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': []}\n", + "\n", + "## otel+sysmon\n", + "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", + "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", + "Plan: {plan}\n", + "answer_preview: [BENCH_OK] \n", + "\n", + "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a groundbreaking gen\n", + "sysmon_summary: {'event_count': 9489, 'tgj_node_count': 9491, 'message_names': ['', '', 'RLock', '__and__', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__lt__', '__new__', '__newobj__', '__or__', '__post_init__', '__set__', '__setattr__', '__setitem__', '__str__', '__subclasscheck__', '_add_callback', '_add_filter', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_call_soon', '_caller', '_checkClosed', '_check_class', '_check_closed', '_check_frozen', '_check_instance', '_check_mp_mode', '_clean_attribute', '_clean_attribute_value', '_clean_thread_parent_frames', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_cross_validate', '_current', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_defaults', '_emit', '_ensure_tzinfo', '_event_pipe', '_event_pipe_gc', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_flush', '_flush_buffers', '_get_attr_opt', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_parent_trace_state', '_get_root', '_get_tracer_project', '_get_value', '_handle_event', '_handle_events', '_handle_fromlist', '_handle_recv', '_hooks', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_master_process', '_is_owned', '_is_trainable', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_new_events', '_new_links', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_events', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_qsize', '_readable_span', '_really_send', '_rebuild_io_state', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_record_llm_call', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_root_invocation_span', '_rotate_buffers', '_run', '_run_callback', '_run_once', '_schedule_in_thread', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_result_unless_cancelled', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_timer_handle_cancelled', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_update_handler', '_validate', '_validate_bounds', '_validate_content', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_write', '_xxhash_str', 'acquire', 'activate', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'apply_writes', 'as_dict', 'assign_to_connection', 'attach', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'build_request', 'bytesify', 'call_at', 'call_later', 'call_soon', 'can_handle_request', 'cancel', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'closed', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'content', 'context', 'cookies', 'copy', 'copy_with', 'create', 'create_checkpoint', 'create_future', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default_headers', 'default_query', 'detach', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'end', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'enumerate', 'env_var_is_set', 'export', 'extract_cookies', 'extract_header', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'from_checkpoint', 'full_url', 'generate_span_id', 'generate_trace_id', 'get', 'get_all', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current', 'get_current_run_tree', 'get_current_span', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_origin', 'get_span_context', 'get_tracing_context', 'get_value', 'getattr_static', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'hexdigest', 'host', 'http_version', 'ident', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_alive', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_recording', 'is_relative_url', 'is_remote', 'is_required', 'is_sampled', 'is_sequence_type', 'is_set', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid', 'is_valid_field_name', 'isclass', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'json_default', 'json_packer', 'keys', 'kind', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'msg', 'msg_header', 'msg_id', 'multi_items', 'netloc', 'next_event', 'node_call', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'on_end', 'on_start', 'origin', 'our_state', 'output_writes', 'override', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner', 'platform_headers', 'pop', 'port', 'post', 'prepare_next_tasks', 'prepare_single_task', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'receiving', 'recv', 'recv_multipart', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'run_with_retry', 'sampled', 'schedule', 'select', 'send', 'send_data', 'send_eom', 'send_multipart', 'send_with_data_passthrough', 'sending', 'serialize', 'set', 'set_attribute', 'set_attributes', 'set_config_context', 'set_handlers', 'set_value', 'setdefault', 'should_sample', 'shutdown', 'sign', 'sleep', 'smart_deepcopy', 'start', 'start_as_current_span', 'start_next_cycle', 'start_span', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'synth', 'task_path_str', 'their_state', 'tick', 'time', 'to_httpx_files', 'trace_flags', 'trace_id', 'trace_state', 'tracer', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'use_span', 'user_agent', 'username', 'utcnow', 'utcoffset', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': ['planner_prompt', 'synth_prompt']}\n", + "\n", + "## sysmon\n", + "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", + "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", + "Plan: {plan}\n", + "answer_preview: [BENCH_OK] \n", + "\n", + "### What is CRISPR?\n", + "\n", + "#### 1. Introduction to CRISPR\n", + "- **Definition**: CRISPR stands for Clustered Regularly\n", + "sysmon_summary: {'event_count': 8604, 'tgj_node_count': 8606, 'message_names': ['', '', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__new__', '__newobj__', '__post_init__', '__setattr__', '__setitem__', '__str__', '__subclasscheck__', '_add_filter', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_call_soon', '_caller', '_checkClosed', '_check_class', '_check_closed', '_check_frozen', '_check_instance', '_clean_thread_parent_frames', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_defaults', '_emit', '_event_pipe_gc', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_root', '_get_tracer_project', '_handle_fromlist', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_owned', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_events', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_run', '_run_once', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_result_unless_cancelled', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_timer_handle_cancelled', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_write', '_xxhash_str', 'acquire', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'apply_writes', 'as_dict', 'assign_to_connection', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'build_request', 'bytesify', 'call_at', 'call_later', 'call_soon', 'can_handle_request', 'cancel', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'content', 'cookies', 'copy', 'copy_with', 'create', 'create_checkpoint', 'create_future', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default_headers', 'default_query', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'enumerate', 'env_var_is_set', 'extract_cookies', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'from_checkpoint', 'full_url', 'get', 'get_all', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current_run_tree', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_origin', 'get_tracing_context', 'getattr_static', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'host', 'http_version', 'ident', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_alive', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_relative_url', 'is_required', 'is_sequence_type', 'is_set', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid_field_name', 'isclass', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'keys', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'multi_items', 'netloc', 'next_event', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'origin', 'our_state', 'output_writes', 'override', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner', 'platform_headers', 'pop', 'port', 'post', 'prepare_next_tasks', 'prepare_single_task', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'recv', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'run_with_retry', 'select', 'send', 'send_data', 'send_eom', 'send_with_data_passthrough', 'set', 'set_config_context', 'set_handlers', 'setdefault', 'shutdown', 'sleep', 'smart_deepcopy', 'start_next_cycle', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'synth', 'task_path_str', 'their_state', 'tick', 'time', 'to_httpx_files', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'user_agent', 'username', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': ['planner_prompt', 'synth_prompt']}\n", + "\n" + ] + } + ], "source": [ - "import sys\n", - "import time\n", - "from langgraph.graph import StateGraph, START, END\n", - "from opto.trace.io import instrument_graph\n", - "\n", - "HAS_SYSMON = hasattr(sys, \"monitoring\")\n", - "\n", - "print(f\"Python {sys.version_info.major}.{sys.version_info.minor}\")\n", - "print(f\"sys.monitoring available: {HAS_SYSMON}\")" - ] - }, - { - "cell_type": "markdown", - "id": "ce9569a4", - "metadata": {}, - "source": [ - "## Define the Graph\n", - "\n", - "Create a simple planner -> synthesizer graph for demonstration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "236820ff", - "metadata": {}, - "outputs": [], - "source": [ - "def build_graph():\n", - " \"\"\"Build a simple planner->synth graph.\"\"\"\n", - " def planner(state):\n", - " return {\"plan\": f\"plan::{state['query']}\"}\n", - "\n", - " def synth(state):\n", - " query = state.get(\"query\", \"\")\n", - " plan = state.get(\"plan\", \"\")\n", - " return {\"final_answer\": f\"answer::{query}::{plan}\"}\n", - "\n", - " g = StateGraph(dict)\n", - " g.add_node(\"planner\", planner)\n", - " g.add_node(\"synth\", synth)\n", - " g.add_edge(START, \"planner\")\n", - " g.add_edge(\"planner\", \"synth\")\n", - " g.add_edge(\"synth\", END)\n", - " return g\n", - "\n", - "# Test the base graph\n", - "test_graph = build_graph()\n", - "test_result = test_graph.compile().invoke({\"query\": \"What is CRISPR?\"})\n", - "print(f\"✓ Graph works: {test_result}\")" - ] - }, - { - "cell_type": "markdown", - "id": "907f7aa8", - "metadata": {}, - "source": [ - "## Test Different Backends\n", - "\n", - "Run the same graph with different instrumentation backends." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "17a2a026", - "metadata": {}, - "outputs": [], - "source": [ - "def run_test(name, instrument_kwargs):\n", - " \"\"\"Run a single instrumentation test.\"\"\"\n", - " print(f\"\\n{'='*60}\")\n", - " print(f\"Test: {name}\")\n", - " print(f\"{'='*60}\")\n", - " try:\n", - " t0 = time.perf_counter()\n", - " \n", - " # Build and instrument graph \n", - " graph = build_graph()\n", - " if \"backend\" in instrument_kwargs and instrument_kwargs[\"backend\"] == \"trace\":\n", - " # For trace backend, pass graph_factory and scope\n", - " instrumented = instrument_graph(\n", - " graph_factory=build_graph,\n", - " scope=globals(),\n", - " **instrument_kwargs\n", - " )\n", - " else:\n", - " # For otel/sysmon, pass compiled graph\n", - " instrumented = instrument_graph(\n", - " graph=graph.compile(),\n", - " **instrument_kwargs\n", - " )\n", - " \n", - " # Invoke\n", - " result = instrumented.invoke({\"query\": \"What is CRISPR?\"})\n", - " dt_ms = (time.perf_counter() - t0) * 1000.0\n", - " \n", - " # Extract answer\n", - " answer = result.get(\"final_answer\", result)\n", - " \n", - " print(f\"✓ SUCCESS in {dt_ms:.1f}ms\")\n", - " print(f\"Answer (preview): {str(answer)[:80]}\")\n", - " return True\n", - " except Exception as e:\n", - " print(f\"✗ FAIL: {e}\")\n", - " import traceback\n", - " traceback.print_exc()\n", - " return False\n", - "\n", - "results = {}" - ] - }, - { - "cell_type": "markdown", - "id": "59c8ed5b", - "metadata": {}, - "source": [ - "### Test 1: Trace Backend Only" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "189fa028", - "metadata": {}, - "outputs": [], - "source": [ - "results[\"trace\"] = run_test(\n", - " \"backend='trace'\",\n", - " {\"backend\": \"trace\", \"output_key\": \"final_answer\"}\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "d98cf881", - "metadata": {}, - "source": [ - "### Test 2: Trace + OpenTelemetry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "afec093b", - "metadata": {}, - "outputs": [], - "source": [ - "results[\"trace+otel\"] = run_test(\n", - " \"backend='trace', observe_with=('otel',)\",\n", - " {\n", - " \"backend\": \"trace\",\n", - " \"observe_with\": (\"otel\",),\n", - " \"output_key\": \"final_answer\"\n", - " }\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "d9abee4b", - "metadata": {}, - "source": [ - "### Test 3: OpenTelemetry Backend Only" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3994b2a5", - "metadata": {}, - "outputs": [], - "source": [ - "results[\"otel\"] = run_test(\n", - " \"backend='otel'\",\n", - " {\"backend\": \"otel\", \"output_key\": \"final_answer\"}\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "859a2dd9", - "metadata": {}, - "source": [ - "### Test 4: sys.monitoring Tests (Python 3.12+)\n", - "\n", - "These tests only run on Python 3.12+ where `sys.monitoring` is available." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6b8e27f3", - "metadata": {}, - "outputs": [], - "source": [ - "if HAS_SYSMON:\n", - " results[\"trace+sysmon\"] = run_test(\n", - " \"backend='trace', observe_with=('sysmon',)\",\n", - " {\n", - " \"backend\": \"trace\",\n", - " \"observe_with\": (\"sysmon\",),\n", - " \"output_key\": \"final_answer\"\n", - " }\n", - " )\n", - " \n", - " results[\"trace+otel+sysmon\"] = run_test(\n", - " \"backend='trace', observe_with=('otel', 'sysmon')\",\n", - " {\n", - " \"backend\": \"trace\",\n", - " \"observe_with\": (\"otel\", \"sysmon\"),\n", - " \"output_key\": \"final_answer\"\n", - " }\n", - " )\n", - " \n", - " results[\"otel+sysmon\"] = run_test(\n", - " \"backend='otel', observe_with=('sysmon',)\",\n", - " {\n", - " \"backend\": \"otel\",\n", - " \"observe_with\": (\"sysmon\",),\n", - " \"output_key\": \"final_answer\"\n", - " }\n", - " )\n", - " \n", - " results[\"sysmon\"] = run_test(\n", - " \"backend='sysmon'\",\n", - " {\n", - " \"backend\": \"sysmon\",\n", - " \"output_key\": \"final_answer\"\n", - " }\n", - " )\n", + "import runpy\n", + "from pathlib import Path\n", + "\n", + "candidates = [\n", + " Path('examples/notebooks/demo_langgraph_instrument_and_compare_observers.py'),\n", + " Path('demo_langgraph_instrument_and_compare_observers.py'),\n", + "]\n", + "\n", + "for candidate in candidates:\n", + " if candidate.exists():\n", + " runpy.run_path(str(candidate), run_name='__main__')\n", + " break\n", "else:\n", - " print(\"\\n⚠️ sys.monitoring tests skipped (requires Python 3.12+)\")" - ] - }, - { - "cell_type": "markdown", - "id": "94d26802", - "metadata": {}, - "source": [ - "## Results Summary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b30074f3", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"\\n\" + \"=\"*80)\n", - "print(\"Test Results Summary\")\n", - "print(\"=\"*80)\n", - "\n", - "passed = sum(1 for v in results.values() if v)\n", - "total = len(results)\n", - "\n", - "for name, success in results.items():\n", - " status = \"✓ PASS\" if success else \"✗ FAIL\"\n", - " print(f\" {name:30s} {status}\")\n", - "\n", - "print(f\"\\nTotal: {passed}/{total} passed\")\n", - "\n", - "# Verify critical backends\n", - "assert results.get(\"trace\", False), \"trace backend should pass\"\n", - "assert results.get(\"otel\", False), \"otel backend should pass\"\n", - "\n", - "print(\"\\n✓ All critical tests passed!\")\n", - "print(\"=\"*80)" - ] - }, - { - "cell_type": "markdown", - "id": "3f7e2859", - "metadata": {}, - "source": [ - "# LangGraph trace / OTEL / sys.monitoring comparison demo\n", - "\n", - "Compact notebook comparing the new supported configurations:\n", - "\n", - "- `backend=\"trace\"`\n", - "- `backend=\"trace\", observe_with=(\"otel\",)`\n", - "- `backend=\"trace\", observe_with=(\"sysmon\",)`\n", - "- `backend=\"trace\", observe_with=(\"otel\", \"sysmon\")`\n", - "- `backend=\"otel\", observe_with=(\"sysmon\",)`\n", - "- `backend=\"sysmon\"`\n", - "\n", - "It prints result previews, observer artifacts, simple structure summaries, and a small timing comparison." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "445e1ff9", - "metadata": {}, - "outputs": [], - "source": [ - "import sys, time\n", - "from langgraph.graph import StateGraph, START, END\n", - "from opto.trace import node\n", - "from opto.trace.io import instrument_graph, optimize_graph\n", - "\n", - "HAS_SYSMON = hasattr(sys, 'monitoring')\n", - "\n", - "class StubLLM:\n", - " model = 'stub'\n", - " def __call__(self, messages=None, **kwargs):\n", - " class _Msg:\n", - " content = 'stub-response'\n", - " class _Choice:\n", - " message = _Msg()\n", - " class _Resp:\n", - " choices = [_Choice()]\n", - " return _Resp()\n", - "\n", - "def _raw(x):\n", - " return getattr(x, 'data', x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3404aa4b", - "metadata": {}, - "outputs": [], - "source": [ - "planner_prompt = node('Plan: {query}', trainable=True, name='planner_prompt')\n", - "synth_prompt = node('Answer: {query} :: {plan}', trainable=True, name='synth_prompt')\n", - "\n", - "def planner_node(state):\n", - " query = _raw(state['query'])\n", - " return {'plan': planner_prompt.data.replace('{query}', str(query))}\n", - "\n", - "def synth_node(state):\n", - " query = _raw(state['query'])\n", - " plan = _raw(state['plan'])\n", - " answer = synth_prompt.data.replace('{query}', str(query)).replace('{plan}', str(plan))\n", - " return {'final_answer': node(answer, name='final_answer_node')}\n", - "\n", - "def build_trace_graph():\n", - " g = StateGraph(dict)\n", - " g.add_node('planner', planner_node)\n", - " g.add_node('synth', synth_node)\n", - " g.add_edge(START, 'planner')\n", - " g.add_edge('planner', 'synth')\n", - " g.add_edge('synth', END)\n", - " return g\n", - "\n", - "def build_plain_graph():\n", - " def planner(state):\n", - " return {'plan': f\"plan::{state['query']}\"}\n", - " def synth(state):\n", - " return {'final_answer': f\"answer::{state['query']}::{state['plan']}\"}\n", - " g = StateGraph(dict)\n", - " g.add_node('planner', planner)\n", - " g.add_node('synth', synth)\n", - " g.add_edge(START, 'planner')\n", - " g.add_edge('planner', 'synth')\n", - " g.add_edge('synth', END)\n", - " return g" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d656d4a6", - "metadata": {}, - "outputs": [], - "source": [ - "def run_case(name, factory):\n", - " t0 = time.perf_counter()\n", - " graph = factory()\n", - " result = graph.invoke({'query': 'What is CRISPR?'})\n", - " dt_ms = (time.perf_counter() - t0) * 1000.0\n", - " answer = result.get('final_answer', result)\n", - " observer_summary = []\n", - " for art in getattr(graph, '_last_observer_artifacts', []):\n", - " if art.carrier == 'sysmon':\n", - " observer_summary.append({'carrier': 'sysmon', 'events': len(art.profile_doc.get('events', []))})\n", - " elif art.carrier == 'otel':\n", - " otlp = art.raw or {}\n", - " spans = otlp.get('resourceSpans', [{}])[0].get('scopeSpans', [{}])[0].get('spans', []) if otlp.get('resourceSpans') else []\n", - " observer_summary.append({'carrier': 'otel', 'spans': len(spans)})\n", - " sysmon_events = len(getattr(graph, '_last_profile_doc', {}).get('events', [])) if getattr(graph, '_last_profile_doc', None) else None\n", - " row = {\n", - " 'name': name,\n", - " 'answer_preview': str(getattr(answer, 'data', answer))[:80],\n", - " 'time_ms': round(dt_ms, 3),\n", - " 'observer_summary': observer_summary,\n", - " 'sysmon_events': sysmon_events,\n", - " }\n", - " print(row)\n", - " return row\n", - "\n", - "rows = []\n", - "\n", - "rows.append(run_case(\n", - " 'trace',\n", - " lambda: instrument_graph(\n", - " backend='trace',\n", - " graph_factory=build_trace_graph,\n", - " scope=globals(),\n", - " graph_agents_functions=['planner_node', 'synth_node'],\n", - " graph_prompts_list=[planner_prompt, synth_prompt],\n", - " output_key='final_answer',\n", - " ),\n", - "))\n", - "\n", - "rows.append(run_case(\n", - " 'trace+otel',\n", - " lambda: instrument_graph(\n", - " backend='trace',\n", - " observe_with=('otel',),\n", - " graph_factory=build_trace_graph,\n", - " scope=globals(),\n", - " graph_agents_functions=['planner_node', 'synth_node'],\n", - " graph_prompts_list=[planner_prompt, synth_prompt],\n", - " output_key='final_answer',\n", - " ),\n", - "))\n", - "\n", - "if HAS_SYSMON:\n", - " rows.append(run_case(\n", - " 'trace+sysmon',\n", - " lambda: instrument_graph(\n", - " backend='trace',\n", - " observe_with=('sysmon',),\n", - " graph_factory=build_trace_graph,\n", - " scope=globals(),\n", - " graph_agents_functions=['planner_node', 'synth_node'],\n", - " graph_prompts_list=[planner_prompt, synth_prompt],\n", - " output_key='final_answer',\n", - " ),\n", - " ))\n", - " rows.append(run_case(\n", - " 'trace+otel+sysmon',\n", - " lambda: instrument_graph(\n", - " backend='trace',\n", - " observe_with=('otel', 'sysmon'),\n", - " graph_factory=build_trace_graph,\n", - " scope=globals(),\n", - " graph_agents_functions=['planner_node', 'synth_node'],\n", - " graph_prompts_list=[planner_prompt, synth_prompt],\n", - " output_key='final_answer',\n", - " ),\n", - " ))\n", - "\n", - "rows.append(run_case(\n", - " 'otel',\n", - " lambda: instrument_graph(\n", - " graph=build_plain_graph(),\n", - " backend='otel',\n", - " llm=StubLLM(),\n", - " initial_templates={'planner_prompt': 'Plan {query}'},\n", - " output_key='final_answer',\n", - " ),\n", - "))\n", - "\n", - "if HAS_SYSMON:\n", - " rows.append(run_case(\n", - " 'otel+sysmon',\n", - " lambda: instrument_graph(\n", - " graph=build_plain_graph(),\n", - " backend='otel',\n", - " observe_with=('sysmon',),\n", - " llm=StubLLM(),\n", - " initial_templates={'planner_prompt': 'Plan {query}'},\n", - " output_key='final_answer',\n", - " ),\n", - " ))\n", - " rows.append(run_case(\n", - " 'sysmon',\n", - " lambda: instrument_graph(\n", - " graph=build_plain_graph(),\n", - " backend='sysmon',\n", - " initial_templates={'planner_prompt': 'Plan {query}'},\n", - " output_key='final_answer',\n", - " ),\n", - " ))\n", - "\n", - "assert any(r['name'] == 'trace' for r in rows)\n", - "assert any(r['name'] == 'otel' for r in rows)\n", - "if HAS_SYSMON:\n", - " assert any(r['name'] == 'sysmon' for r in rows)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8254702", - "metadata": {}, - "outputs": [], - "source": [ - "# baseline-only optimization sanity checks\n", - "trace_graph = instrument_graph(\n", - " backend='trace',\n", - " observe_with=('otel',) if not HAS_SYSMON else ('otel', 'sysmon'),\n", - " graph_factory=build_trace_graph,\n", - " scope=globals(),\n", - " graph_agents_functions=['planner_node', 'synth_node'],\n", - " graph_prompts_list=[planner_prompt, synth_prompt],\n", - " output_key='final_answer',\n", - ")\n", - "trace_opt = optimize_graph(\n", - " trace_graph,\n", - " queries=['What is CRISPR?'],\n", - " iterations=0,\n", - " eval_fn=lambda payload: {\n", - " 'score': 1.0 if 'CRISPR' in str(payload['answer']) else 0.0,\n", - " 'feedback': 'Keep CRISPR in the final answer.',\n", - " },\n", - ")\n", - "assert trace_opt.best_iteration == 0\n", - "assert trace_opt.best_score == 1.0\n", - "\n", - "if HAS_SYSMON:\n", - " sysmon_graph = instrument_graph(\n", - " graph=build_plain_graph(),\n", - " backend='sysmon',\n", - " initial_templates={'planner_prompt': 'Plan {query}'},\n", - " output_key='final_answer',\n", - " )\n", - " sysmon_opt = optimize_graph(\n", - " sysmon_graph,\n", - " queries=['What is CRISPR?'],\n", - " iterations=0,\n", - " eval_fn=lambda payload: {\n", - " 'score': 1.0 if 'CRISPR' in str(payload['answer']) else 0.0,\n", - " 'feedback': 'Keep CRISPR in the answer.',\n", - " },\n", - " )\n", - " assert sysmon_opt.best_iteration == 0\n", - " assert sysmon_opt.best_score == 1.0" + " raise FileNotFoundError('Could not locate demo_langgraph_instrument_and_compare_observers.py')\n" ] } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" } }, "nbformat": 4, diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py index 6f3a95f3..ea603739 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py @@ -1,171 +1,517 @@ #!/usr/bin/env python3 """ -LangGraph / OTEL / sys.monitoring comparison demo. +Live LangGraph optimization comparison across Trace / OTEL / sys.monitoring. -Demonstrates using instrument_graph with different backends: -- trace, trace+otel, trace+sysmon, trace+otel+sysmon -- otel, otel+sysmon -- sysmon +This script intentionally benchmarks optimization over 5 iterations using +a real OpenRouter-backed LLM when OPENROUTER_API_KEY is available. + +Compared configurations: + - trace + - trace + otel + - trace + sysmon + - trace + otel + sysmon + - otel + - otel + sysmon + - sysmon + +When OPENROUTER_API_KEY is not set, the script exits successfully after +printing a skip message. This keeps notebook CI deterministic while still +making the demo a true live benchmark for local/manual use. """ +from __future__ import annotations + +import os import sys -import time +from pathlib import Path +from typing import Any, Callable, Dict, Mapping, Tuple + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + from langgraph.graph import StateGraph, START, END -from opto.trace.io import instrument_graph +from opto.trace import node +from opto.trace.nodes import MessageNode, ParameterNode +from opto.trace.io import ( + instrument_graph, + optimize_graph, + make_dict_binding, + otlp_traces_to_trace_json, +) +from opto.trace.io.sysmonitoring import sysmon_profile_to_tgj +from opto.trace.io.tgj_ingest import ingest_tgj + +try: + from openai import OpenAI +except Exception: + OpenAI = None HAS_SYSMON = hasattr(sys, "monitoring") +OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") +OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "gpt-4o-mini") +OPENROUTER_BASE_URL = os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") +ITERATIONS = 5 +QUERIES = [ + "What is CRISPR?", + "How does CRISPR enable gene editing?", +] +OPTIMIZED_SYNTH_PROMPT = ( + "Start the answer exactly with [BENCH_OK]. " + "Then answer carefully: {query}\nPlan: {plan}" +) +PLANNER_SYSTEM_PROMPT = "You are a careful planner." +SYNTH_SYSTEM_PROMPT = "You are a careful scientific assistant." +DEFAULT_TEMPLATES = { + "planner_prompt": "Create a short plan for: {query}", + "synth_prompt": "Answer briefly and factually: {query}\nPlan: {plan}", +} -def build_graph(): - """Build a simple planner->synth graph.""" - def planner(state): - return {"plan": f"plan::{state['query']}"} - - def synth(state): - query = state.get("query", "") - plan = state.get("plan", "") - return {"final_answer": f"answer::{query}::{plan}"} - - g = StateGraph(dict) - g.add_node("planner", planner) - g.add_node("synth", synth) - g.add_edge(START, "planner") - g.add_edge("planner", "synth") - g.add_edge("synth", END) - return g - - -def run_test(name, instrument_kwargs): - """Run a single instrumentation test.""" - print(f"\nTest: {name}") - try: - t0 = time.perf_counter() - - # Build and instrument graph - graph = build_graph() - if "backend" in instrument_kwargs and instrument_kwargs["backend"] == "trace": - # For trace backend, pass graph_factory and scope - instrumented = instrument_graph( - graph_factory=build_graph, - scope=globals(), - **instrument_kwargs - ) - else: - # For otel/sysmon, pass compiled graph - instrumented = instrument_graph( - graph=graph.compile(), - **instrument_kwargs - ) - - # Invoke - result = instrumented.invoke({"query": "What is CRISPR?"}) - dt_ms = (time.perf_counter() - t0) * 1000.0 - - # Extract answer - answer = result.get("final_answer", result) - - print(f" ✓ SUCCESS ({dt_ms:.1f}ms)") - print(f" Answer: {str(answer)[:80]}") - return True - except Exception as e: - print(f" ✗ FAIL: {e}") - import traceback - traceback.print_exc() - return False +def _raw(value: Any) -> Any: + return getattr(value, "data", value) -def main(): - print("\n" + "=" * 80) - print("LangGraph Instrumentation Backends Comparison") - print("=" * 80) - print(f"Python {sys.version_info.major}.{sys.version_info.minor}") - print(f"sys.monitoring available: {HAS_SYSMON}\n") - - results = {} - - # Test 1: trace backend - results["trace"] = run_test( - "backend='trace'", - {"backend": "trace", "output_key": "final_answer"} +def _str_map(values: Mapping[str, Any]) -> Dict[str, str]: + return {key: str(_raw(value)) for key, value in values.items()} + + +def render_template(template: str, **variables: Any) -> str: + return template.format(**_str_map(variables)) + + +def call_chat_text( + llm, + *, + system_prompt: str, + user_prompt: str, + **kwargs: Any, +) -> str: + response = llm( + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=kwargs.pop("temperature", 0), + **kwargs, + ) + return response.choices[0].message.content + + +def _message_names(nodes: Dict[str, Any]): + names = [] + seen = set() + for obj in nodes.values(): + if isinstance(obj, MessageNode): + nm = str(getattr(obj, "name", getattr(obj, "py_name", ""))) + base = nm.split("/")[-1].split(":")[0] + if base not in seen: + seen.add(base) + names.append(base) + return sorted(names) + + +class DictUpdateOptimizer: + def __init__(self, update_dict: Dict[str, Any]): + self.update_dict = dict(update_dict) + self.calls = 0 + + def zero_feedback(self): + return None + + def backward(self, *_args, **_kwargs): + return None + + def step(self): + self.calls += 1 + if self.calls == 1: + return dict(self.update_dict) + return {} + + +class TraceMutatingOptimizer: + def __init__(self, prompt_node, update_value: str, key: str): + self.prompt_node = prompt_node + self.update_value = update_value + self.key = key + self.calls = 0 + + def zero_feedback(self): + return None + + def backward(self, *_args, **_kwargs): + return None + + def step(self): + self.calls += 1 + if self.calls == 1: + self.prompt_node._set(self.update_value) + return {self.key: self.update_value} + return {} + + +def make_live_llm(): + if not OPENROUTER_API_KEY or OpenAI is None: + return None + + client = OpenAI( + base_url=OPENROUTER_BASE_URL, + api_key=OPENROUTER_API_KEY, + ) + + def _llm(messages=None, **kwargs): + return client.chat.completions.create( + model=OPENROUTER_MODEL, + messages=messages or [], + max_tokens=kwargs.get("max_tokens", 220), + temperature=kwargs.get("temperature", 0), + ) + + _llm.model = OPENROUTER_MODEL + return _llm + + +def eval_fn(payload: Dict[str, Any]) -> Dict[str, Any]: + answer = str(_raw(payload.get("answer", ""))).strip() + ok = answer.startswith("[BENCH_OK]") + return { + "score": 1.0 if ok else 0.0, + "feedback": "Start the answer exactly with [BENCH_OK].", + } + + +def summarize_otlp(otlp: Dict[str, Any]) -> Dict[str, Any]: + spans = otlp.get("resourceSpans", [{}])[0].get("scopeSpans", [{}])[0].get("spans", []) + param_keys = sorted( + { + a["key"] + for s in spans + for a in s.get("attributes", []) + if str(a.get("key", "")).startswith("param.") + } + ) + docs = otlp_traces_to_trace_json( + otlp, + agent_id_hint="compare", + use_temporal_hierarchy=True, ) - - # Test 2: trace + otel - results["trace+otel"] = run_test( - "backend='trace', observe_with=('otel',)", + nodes = ingest_tgj(docs[0]) if docs else {} + return { + "span_count": len(spans), + "span_names": [s.get("name") for s in spans], + "param_keys": param_keys, + "message_names": _message_names(nodes), + } + + +def summarize_sysmon(profile_doc: Dict[str, Any]) -> Dict[str, Any]: + tgj = sysmon_profile_to_tgj(profile_doc, run_id="compare", graph_id="demo", scope="compare/0") + nodes = ingest_tgj(tgj) + param_names = sorted( { - "backend": "trace", - "observe_with": ("otel",), - "output_key": "final_answer" + str(getattr(obj, "name", getattr(obj, "py_name", ""))).split("/")[-1].split(":")[0] + for obj in nodes.values() + if isinstance(obj, ParameterNode) } ) - - # Test 3-4: trace + sysmon variants (if available) - if HAS_SYSMON: - results["trace+sysmon"] = run_test( - "backend='trace', observe_with=('sysmon',)", - { - "backend": "trace", - "observe_with": ("sysmon",), - "output_key": "final_answer" - } + return { + "event_count": len(profile_doc.get("events", [])), + "tgj_node_count": len(tgj.get("nodes", {})), + "message_names": _message_names(nodes), + "param_names": param_names, + } + + +def build_semantic_graph(planner_fn, synth_fn): + graph = StateGraph(dict) + graph.add_node("planner", planner_fn) + graph.add_node("synth", synth_fn) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph.compile() + + +def make_semantic_nodes( + *, + planner_call: Callable[[str], str], + synth_call: Callable[[str, str], Any], + wrap_final_answer: Callable[[Any], Any] | None = None, +): + def planner_node(state): + query = str(_raw(state["query"])) + return {"query": query, "plan": planner_call(query)} + + def synth_node(state): + query = str(_raw(state["query"])) + plan = str(_raw(state["plan"])) + answer = synth_call(query, plan) + if wrap_final_answer is not None: + answer = wrap_final_answer(answer) + return {"final_answer": answer} + + return planner_node, synth_node + + +def make_trace_case(llm, observe_with: Tuple[str, ...] = ()): + planner_prompt = node( + DEFAULT_TEMPLATES["planner_prompt"], + trainable=True, + name="planner_prompt", + ) + synth_prompt = node( + DEFAULT_TEMPLATES["synth_prompt"], + trainable=True, + name="synth_prompt", + ) + scope: Dict[str, Any] = {} + + def planner_node(state): + query = str(_raw(state["query"])) + prompt = render_template(planner_prompt.data, query=query) + plan = call_chat_text( + llm, + system_prompt=PLANNER_SYSTEM_PROMPT, + user_prompt=prompt, + temperature=0, ) - - results["trace+otel+sysmon"] = run_test( - "backend='trace', observe_with=('otel', 'sysmon')", - { - "backend": "trace", - "observe_with": ("otel", "sysmon"), - "output_key": "final_answer" - } + return {"query": query, "plan": plan} + + def synth_node(state): + query = str(_raw(state["query"])) + plan = str(_raw(state["plan"])) + prompt = render_template(synth_prompt.data, query=query, plan=plan) + answer = call_chat_text( + llm, + system_prompt=SYNTH_SYSTEM_PROMPT, + user_prompt=prompt, + temperature=0, ) - - # Test 5: otel backend - results["otel"] = run_test( - "backend='otel'", - {"backend": "otel", "output_key": "final_answer"} + return {"final_answer": node(answer, name="final_answer_node")} + + scope.update( + { + "llm": llm, + "planner_prompt": planner_prompt, + "synth_prompt": synth_prompt, + "render_template": render_template, + "call_chat_text": call_chat_text, + "PLANNER_SYSTEM_PROMPT": PLANNER_SYSTEM_PROMPT, + "SYNTH_SYSTEM_PROMPT": SYNTH_SYSTEM_PROMPT, + "node": node, + "_raw": _raw, + "planner_node": planner_node, + "synth_node": synth_node, + } ) - - # Test 6: otel + sysmon (if available) - if HAS_SYSMON: - results["otel+sysmon"] = run_test( - "backend='otel', observe_with=('sysmon',)", - { - "backend": "otel", - "observe_with": ("sysmon",), - "output_key": "final_answer" - } + + def build_graph(): + return build_semantic_graph(scope["planner_node"], scope["synth_node"]) + + instrumented = instrument_graph( + backend="trace", + observe_with=observe_with, + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[planner_prompt, synth_prompt], + train_graph_agents_functions=False, + output_key="final_answer", + ) + optimizer = TraceMutatingOptimizer(synth_prompt, OPTIMIZED_SYNTH_PROMPT, "synth_prompt") + return instrumented, optimizer, lambda: synth_prompt.data + + +def make_otel_case(llm, observe_with: Tuple[str, ...] = ()): + instrumented = instrument_graph( + graph=None, + backend="otel", + observe_with=observe_with, + llm=llm, + initial_templates=dict(DEFAULT_TEMPLATES), + output_key="final_answer", + ) + instrumented.backend = "otel" + templates = instrumented.templates + tracing_llm = instrumented.tracing_llm + + def planner_call(query: str) -> str: + return tracing_llm.template_prompt_call( + span_name="planner", + template_name="planner_prompt", + template=templates["planner_prompt"], + variables={"query": query}, + system_prompt=PLANNER_SYSTEM_PROMPT, + optimizable_key="planner", + temperature=0, + ) + + def synth_call(query: str, plan: str) -> str: + return tracing_llm.template_prompt_call( + span_name="synth", + template_name="synth_prompt", + template=templates["synth_prompt"], + variables={"query": query, "plan": plan}, + system_prompt=SYNTH_SYSTEM_PROMPT, + optimizable_key="synth", + temperature=0, + ) + + instrumented.graph = build_semantic_graph( + *make_semantic_nodes( + planner_call=planner_call, + synth_call=synth_call, + ) + ) + optimizer = DictUpdateOptimizer({"synth_prompt": OPTIMIZED_SYNTH_PROMPT}) + return instrumented, optimizer, lambda: instrumented.templates["synth_prompt"] + + +def make_sysmon_case(llm): + templates = dict(DEFAULT_TEMPLATES) + bindings = {k: make_dict_binding(templates, k, kind="prompt") for k in templates} + + def planner_call(query: str) -> str: + prompt = render_template(templates["planner_prompt"], query=query) + return call_chat_text( + llm, + system_prompt=PLANNER_SYSTEM_PROMPT, + user_prompt=prompt, + temperature=0, ) - - # Test 7: sysmon backend - results["sysmon"] = run_test( - "backend='sysmon'", - { - "backend": "sysmon", - "output_key": "final_answer" - } + + def synth_call(query: str, plan: str) -> str: + prompt = render_template(templates["synth_prompt"], query=query, plan=plan) + return call_chat_text( + llm, + system_prompt=SYNTH_SYSTEM_PROMPT, + user_prompt=prompt, + temperature=0, ) - - # Summary + + instrumented = instrument_graph( + graph=build_semantic_graph( + *make_semantic_nodes( + planner_call=planner_call, + synth_call=synth_call, + ) + ), + backend="sysmon", + bindings=bindings, + output_key="final_answer", + ) + optimizer = DictUpdateOptimizer({"synth_prompt": OPTIMIZED_SYNTH_PROMPT}) + return instrumented, optimizer, lambda: templates["synth_prompt"] + + +def run_case(name: str, builder): + instrumented, optimizer, prompt_getter = builder() + result = optimize_graph( + instrumented, + queries=QUERIES, + iterations=ITERATIONS, + optimizer=optimizer, + eval_fn=eval_fn, + output_key="final_answer", + ) + + probe = instrumented.invoke({"query": "What is CRISPR?"}) + answer_preview = str(_raw(probe.get("final_answer", probe)))[:120] + + summary = { + "config": name, + "score_history": [round(x, 3) for x in result.score_history], + "best_iteration": result.best_iteration, + "best_updates": dict(result.best_updates), + "final_synth_prompt": prompt_getter(), + "answer_preview": answer_preview, + "observers": [a.carrier for a in getattr(instrumented, "_last_observer_artifacts", [])], + "trace_summary": None, + "otel_summary": None, + "sysmon_summary": None, + } + + if getattr(instrumented, "backend", None) == "trace": + answer_node = probe.get("final_answer") + summary["trace_summary"] = { + "is_node": hasattr(answer_node, "parents"), + "parent_count": len(getattr(answer_node, "parents", [])), + "parameter_count": len(getattr(instrumented, "parameters", [])), + } + elif getattr(instrumented, "backend", None) == "otel": + otlp = instrumented.session.flush_otlp(clear=True) + summary["otel_summary"] = summarize_otlp(otlp) + elif getattr(instrumented, "backend", None) == "sysmon": + summary["sysmon_summary"] = summarize_sysmon(instrumented._last_profile_doc) + + for artifact in getattr(instrumented, "_last_observer_artifacts", []): + if artifact.carrier == "otel": + summary["otel_summary"] = summarize_otlp(artifact.raw) + elif artifact.carrier == "sysmon": + summary["sysmon_summary"] = summarize_sysmon(artifact.profile_doc) + + assert summary["best_iteration"] >= 2 + assert "Start the answer exactly with [BENCH_OK]." in summary["final_synth_prompt"] + return summary + + +def main(): print("\n" + "=" * 80) - print("Test Results Summary") - print("=" * 80) - - passed = sum(1 for v in results.values() if v) - total = len(results) - - for name, success in results.items(): - status = "✓ PASS" if success else "✗ FAIL" - print(f" {name:30s} {status}") - - print(f"\nTotal: {passed}/{total} passed") - - # Final assertions - assert results.get("trace", False), "trace backend must pass" - assert results.get("otel", False), "otel backend must pass" - - print("\n✓ All critical tests passed!") + print("LangGraph live optimization comparison") print("=" * 80) + print(f"Python {sys.version_info.major}.{sys.version_info.minor}") + print(f"sys.monitoring available: {HAS_SYSMON}") + print(f"OPENROUTER_MODEL={OPENROUTER_MODEL}") + + if not OPENROUTER_API_KEY: + print("\n[SKIP] OPENROUTER_API_KEY is not set.") + print("This demo is intentionally live-only. Set OPENROUTER_API_KEY to run the benchmark.") + return + if OpenAI is None: + print("\n[SKIP] openai package is unavailable.") + return + + llm = make_live_llm() + cases = [ + ("trace", lambda: make_trace_case(llm, ())), + ("trace+otel", lambda: make_trace_case(llm, ("otel",))), + ("otel", lambda: make_otel_case(llm, ())), + ] + if HAS_SYSMON: + cases.extend( + [ + ("trace+sysmon", lambda: make_trace_case(llm, ("sysmon",))), + ("trace+otel+sysmon", lambda: make_trace_case(llm, ("otel", "sysmon"))), + ("otel+sysmon", lambda: make_otel_case(llm, ("sysmon",))), + ("sysmon", lambda: make_sysmon_case(llm)), + ] + ) + + rows = [run_case(name, builder) for name, builder in cases] + + print("\nOptimization comparison (5 iterations)\n") + print("| config | score_history | best_iteration | observers |") + print("|---|---|---:|---|") + for row in rows: + print( + f"| {row['config']} | {row['score_history']} | {row['best_iteration']} " + f"| {','.join(row['observers']) or '-'} |" + ) + + print("\nBinding / update inspection\n") + for row in rows: + print(f"## {row['config']}") + print(f"best_updates: {row['best_updates']}") + print(f"final_synth_prompt: {row['final_synth_prompt']}") + print(f"answer_preview: {row['answer_preview']}") + if row['trace_summary'] is not None: + print(f"trace_summary: {row['trace_summary']}") + if row['otel_summary'] is not None: + print(f"otel_summary: {row['otel_summary']}") + if row['sysmon_summary'] is not None: + print(f"sysmon_summary: {row['sysmon_summary']}") + print() if __name__ == "__main__": diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb index 6b361240..bc0d296a 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb @@ -53,16 +53,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:58:37.234100Z", - "iopub.status.busy": "2026-02-12T07:58:37.233113Z", - "iopub.status.idle": "2026-02-12T07:58:48.042859Z", - "shell.execute_reply": "2026-02-12T07:58:48.039301Z" + "iopub.execute_input": "2026-04-19T09:32:20.692437Z", + "iopub.status.busy": "2026-04-19T09:32:20.691949Z", + "iopub.status.idle": "2026-04-19T09:32:22.337110Z", + "shell.execute_reply": "2026-04-19T09:32:22.335797Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==================================================\n", + "All dependencies installed!\n", + "==================================================\n" + ] + } + ], "source": [ "!pip install -q langgraph>=1.0.0 opentelemetry-api>=1.38.0 opentelemetry-sdk>=1.38.0 \\\n", " python-dotenv>=1.0.0 requests>=2.28.0 typing_extensions>=4.0.0 graphviz>=0.20.1\n", @@ -109,13 +120,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:58:48.234683Z", - "iopub.status.busy": "2026-02-12T07:58:48.233679Z", - "iopub.status.idle": "2026-02-12T07:58:48.254178Z", - "shell.execute_reply": "2026-02-12T07:58:48.252166Z" + "iopub.execute_input": "2026-04-19T09:32:22.380381Z", + "iopub.status.busy": "2026-04-19T09:32:22.379864Z", + "iopub.status.idle": "2026-04-19T09:32:22.388884Z", + "shell.execute_reply": "2026-04-19T09:32:22.387589Z" } }, "outputs": [ @@ -169,13 +180,13 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:58:48.269399Z", - "iopub.status.busy": "2026-02-12T07:58:48.268397Z", - "iopub.status.idle": "2026-02-12T07:58:48.324887Z", - "shell.execute_reply": "2026-02-12T07:58:48.321207Z" + "iopub.execute_input": "2026-04-19T09:32:22.392813Z", + "iopub.status.busy": "2026-04-19T09:32:22.392359Z", + "iopub.status.idle": "2026-04-19T09:32:22.400816Z", + "shell.execute_reply": "2026-04-19T09:32:22.399809Z" } }, "outputs": [ @@ -251,13 +262,13 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:58:48.337340Z", - "iopub.status.busy": "2026-02-12T07:58:48.336340Z", - "iopub.status.idle": "2026-02-12T07:58:55.612322Z", - "shell.execute_reply": "2026-02-12T07:58:55.609666Z" + "iopub.execute_input": "2026-04-19T09:32:22.404203Z", + "iopub.status.busy": "2026-04-19T09:32:22.403715Z", + "iopub.status.idle": "2026-04-19T09:32:22.948136Z", + "shell.execute_reply": "2026-04-19T09:32:22.947079Z" } }, "outputs": [ @@ -529,13 +540,13 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:58:55.622865Z", - "iopub.status.busy": "2026-02-12T07:58:55.621865Z", - "iopub.status.idle": "2026-02-12T07:58:55.641281Z", - "shell.execute_reply": "2026-02-12T07:58:55.639271Z" + "iopub.execute_input": "2026-04-19T09:32:22.951549Z", + "iopub.status.busy": "2026-04-19T09:32:22.951334Z", + "iopub.status.idle": "2026-04-19T09:32:22.963711Z", + "shell.execute_reply": "2026-04-19T09:32:22.962817Z" } }, "outputs": [ @@ -722,13 +733,13 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:58:55.651617Z", - "iopub.status.busy": "2026-02-12T07:58:55.650609Z", - "iopub.status.idle": "2026-02-12T07:59:07.295195Z", - "shell.execute_reply": "2026-02-12T07:59:07.294185Z" + "iopub.execute_input": "2026-04-19T09:32:22.967148Z", + "iopub.status.busy": "2026-04-19T09:32:22.966882Z", + "iopub.status.idle": "2026-04-19T09:32:25.472657Z", + "shell.execute_reply": "2026-04-19T09:32:25.471707Z" } }, "outputs": [ @@ -776,13 +787,13 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 7, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.302370Z", - "iopub.status.busy": "2026-02-12T07:59:07.301358Z", - "iopub.status.idle": "2026-02-12T07:59:07.321120Z", - "shell.execute_reply": "2026-02-12T07:59:07.320110Z" + "iopub.execute_input": "2026-04-19T09:32:25.475519Z", + "iopub.status.busy": "2026-04-19T09:32:25.475199Z", + "iopub.status.idle": "2026-04-19T09:32:25.487970Z", + "shell.execute_reply": "2026-04-19T09:32:25.487325Z" } }, "outputs": [ @@ -848,13 +859,13 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.329697Z", - "iopub.status.busy": "2026-02-12T07:59:07.328119Z", - "iopub.status.idle": "2026-02-12T07:59:07.342552Z", - "shell.execute_reply": "2026-02-12T07:59:07.341545Z" + "iopub.execute_input": "2026-04-19T09:32:25.490914Z", + "iopub.status.busy": "2026-04-19T09:32:25.490604Z", + "iopub.status.idle": "2026-04-19T09:32:25.497811Z", + "shell.execute_reply": "2026-04-19T09:32:25.497339Z" } }, "outputs": [ @@ -865,30 +876,30 @@ "Total spans captured: 8\n", "\n", "Unique trace IDs: 1 (D9: should be 1)\n", - "Root invocation span: QA_research_graph.invoke (id=3d446653082f...)\n", + "Root invocation span: QA_research_graph.invoke (id=a8226d00c58b...)\n", "\n", - " Span: llm.chat.completion parent=bd8208b9\n", + " Span: llm.chat.completion parent=ad6064b6\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"1\": {\"agent\": \"web_researcher\", \"action\": \"search\", \"goal\": \"collect context\",\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: planner parent=3d446653\n", + " Span: planner parent=a8226d00\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Create a JSON plan for: What is reinforcement learning?. Use web_researcher and \n", " inputs.user_query = What is reinforcement learning?\n", " param.planner_prompt = Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wik\n", " param.planner_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=c17b2240\n", + " Span: llm.chat.completion parent=45b79393\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"goto\": \"synthesizer\", \"query\": \"given step 1 of plan: {'agent': 'web_researche\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: executor parent=3d446653\n", + " Span: executor parent=a8226d00\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Given step 1 of plan: {'agent': 'web_researcher', 'action': 'search', 'goal': 'c\n", " inputs.step = 1\n", @@ -896,7 +907,7 @@ " param.executor_prompt = Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,que\n", " param.executor_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=8b9ef57d\n", + " Span: llm.chat.completion parent=e64c876c\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = Stub answer for: answer: what is reinforcement learning?\n", "context:\n", @@ -906,7 +917,7 @@ " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: synthesizer parent=3d446653\n", + " Span: synthesizer parent=a8226d00\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Answer: What is reinforcement learning?\n", "Context:\n", @@ -919,7 +930,7 @@ "If asked for IDs, include Wikidata QIDs.\n", " param.synthesizer_prompt.trainable = True\n", "\n", - " Span: evaluator parent=3d446653\n", + " Span: evaluator parent=a8226d00\n", " eval.reasons = \n", " eval.score = 0.25\n", "\n", @@ -985,13 +996,13 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 9, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.350295Z", - "iopub.status.busy": "2026-02-12T07:59:07.349305Z", - "iopub.status.idle": "2026-02-12T07:59:07.369083Z", - "shell.execute_reply": "2026-02-12T07:59:07.367068Z" + "iopub.execute_input": "2026-04-19T09:32:25.500800Z", + "iopub.status.busy": "2026-04-19T09:32:25.500610Z", + "iopub.status.idle": "2026-04-19T09:32:25.512929Z", + "shell.execute_reply": "2026-04-19T09:32:25.512363Z" } }, "outputs": [ @@ -1002,21 +1013,21 @@ "TGJ documents: 1\n", "\n", "ParameterNode (trainable): 3\n", - " QA_research_graph/0/planner_prompt4 trainable=True\n", - " QA_research_graph/0/executor_prompt4 trainable=True\n", - " QA_research_graph/0/synthesizer_prompt4 trainable=True\n", + " QA_research_graph/0/planner_prompt0 trainable=True\n", + " QA_research_graph/0/executor_prompt0 trainable=True\n", + " QA_research_graph/0/synthesizer_prompt0 trainable=True\n", "\n", - "Unique trainable params: ['executor_prompt4', 'planner_prompt4', 'synthesizer_prompt4']\n", + "Unique trainable params: ['executor_prompt0', 'planner_prompt0', 'synthesizer_prompt0']\n", "[OK] No duplicate ParameterNodes (C7).\n", "\n", "MessageNode: 7\n", - " QA_research_graph/0/planner14 parents=['lit_114', 'lit_603418', 'planner_prompt4']\n", - " QA_research_graph/0/llm.chat.completion42 parents=['planner14']\n", - " QA_research_graph/0/llm.chat.completion44 parents=['synthesizer14']\n", - " QA_research_graph/0/executor14 parents=['lit_21694', 'lit_97614', 'lit_603419', 'planner14', 'executor_prompt4']\n", - " QA_research_graph/0/llm.chat.completion43 parents=['executor14']\n", - " QA_research_graph/0/synthesizer14 parents=['lit_12886', 'lit_603420', 'executor14', 'synthesizer_prompt4']\n", - " QA_research_graph/0/evaluator14 parents=['synthesizer14']\n", + " QA_research_graph/0/planner0 parents=['lit_31030', 'lit_70270', 'planner_prompt0']\n", + " QA_research_graph/0/llm.chat.completion0 parents=['planner0']\n", + " QA_research_graph/0/llm.chat.completion2 parents=['synthesizer0']\n", + " QA_research_graph/0/executor0 parents=['lit_7850', 'lit_26280', 'lit_70271', 'planner0', 'executor_prompt0']\n", + " QA_research_graph/0/llm.chat.completion1 parents=['executor0']\n", + " QA_research_graph/0/synthesizer0 parents=['lit_39320', 'lit_70272', 'executor0', 'synthesizer_prompt0']\n", + " QA_research_graph/0/evaluator0 parents=['synthesizer0']\n", "[WARN] No top-level message nodes found.\n" ] } @@ -1090,13 +1101,13 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 10, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.375448Z", - "iopub.status.busy": "2026-02-12T07:59:07.374447Z", - "iopub.status.idle": "2026-02-12T07:59:07.387535Z", - "shell.execute_reply": "2026-02-12T07:59:07.386526Z" + "iopub.execute_input": "2026-04-19T09:32:25.515225Z", + "iopub.status.busy": "2026-04-19T09:32:25.515034Z", + "iopub.status.idle": "2026-04-19T09:32:25.521268Z", + "shell.execute_reply": "2026-04-19T09:32:25.520614Z" } }, "outputs": [ @@ -1106,9 +1117,9 @@ "text": [ "Child LLM spans detected (via temporal_ignore): 3\n", "Top-level message nodes: 4\n", - " [OK] Node executor temporal parent → a409e8991e44... (not a child span)\n", - " [OK] Node synthesizer temporal parent → 7241c782d5e7... (not a child span)\n", - " [OK] Node evaluator temporal parent → de7e4a824c31... (not a child span)\n", + " [OK] Node executor temporal parent → 88934f9b385d... (not a child span)\n", + " [OK] Node synthesizer temporal parent → 714455f5f80c... (not a child span)\n", + " [OK] Node evaluator temporal parent → 457d0101fbd2... (not a child span)\n", "\n", "[OK] Temporal chaining verified — no top-level node points to child spans.\n" ] @@ -1165,13 +1176,13 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 11, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.394844Z", - "iopub.status.busy": "2026-02-12T07:59:07.394844Z", - "iopub.status.idle": "2026-02-12T07:59:07.406751Z", - "shell.execute_reply": "2026-02-12T07:59:07.404735Z" + "iopub.execute_input": "2026-04-19T09:32:25.523710Z", + "iopub.status.busy": "2026-04-19T09:32:25.523518Z", + "iopub.status.idle": "2026-04-19T09:32:25.527964Z", + "shell.execute_reply": "2026-04-19T09:32:25.527337Z" } }, "outputs": [ @@ -1223,13 +1234,13 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 12, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.413969Z", - "iopub.status.busy": "2026-02-12T07:59:07.412959Z", - "iopub.status.idle": "2026-02-12T07:59:07.428527Z", - "shell.execute_reply": "2026-02-12T07:59:07.427517Z" + "iopub.execute_input": "2026-04-19T09:32:25.529922Z", + "iopub.status.busy": "2026-04-19T09:32:25.529738Z", + "iopub.status.idle": "2026-04-19T09:32:25.537885Z", + "shell.execute_reply": "2026-04-19T09:32:25.536924Z" } }, "outputs": [ @@ -1264,13 +1275,13 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 13, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.436041Z", - "iopub.status.busy": "2026-02-12T07:59:07.435043Z", - "iopub.status.idle": "2026-02-12T07:59:07.444869Z", - "shell.execute_reply": "2026-02-12T07:59:07.443860Z" + "iopub.execute_input": "2026-04-19T09:32:25.540343Z", + "iopub.status.busy": "2026-04-19T09:32:25.540123Z", + "iopub.status.idle": "2026-04-19T09:32:25.544509Z", + "shell.execute_reply": "2026-04-19T09:32:25.543368Z" } }, "outputs": [ @@ -1313,13 +1324,13 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 14, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.451868Z", - "iopub.status.busy": "2026-02-12T07:59:07.450869Z", - "iopub.status.idle": "2026-02-12T07:59:07.466046Z", - "shell.execute_reply": "2026-02-12T07:59:07.465038Z" + "iopub.execute_input": "2026-04-19T09:32:25.547566Z", + "iopub.status.busy": "2026-04-19T09:32:25.547307Z", + "iopub.status.idle": "2026-04-19T09:32:25.555652Z", + "shell.execute_reply": "2026-04-19T09:32:25.555116Z" } }, "outputs": [ @@ -1387,32 +1398,16 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 15, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.472683Z", - "iopub.status.busy": "2026-02-12T07:59:07.471675Z", - "iopub.status.idle": "2026-02-12T07:59:07.552476Z", - "shell.execute_reply": "2026-02-12T07:59:07.550368Z" + "iopub.execute_input": "2026-04-19T09:32:25.558061Z", + "iopub.status.busy": "2026-04-19T09:32:25.557814Z", + "iopub.status.idle": "2026-04-19T09:32:25.596279Z", + "shell.execute_reply": "2026-04-19T09:32:25.595638Z" } }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "optimize_graph: running baseline ...\n", - "opto.trace.io.optimization: optimize_graph: running baseline ...\n", - "optimize_graph: running iteration 1 ...\n", - "opto.trace.io.optimization: optimize_graph: running iteration 1 ...\n", - "Applied updates: ['planner_prompt']\n", - "opto.trace.io.optimization: Applied updates: ['planner_prompt']\n", - "optimize_graph: running iteration 2 ...\n", - "opto.trace.io.optimization: optimize_graph: running iteration 2 ...\n", - "Applied updates: ['planner_prompt']\n", - "opto.trace.io.optimization: Applied updates: ['planner_prompt']\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -1498,13 +1493,13 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 16, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.559993Z", - "iopub.status.busy": "2026-02-12T07:59:07.558992Z", - "iopub.status.idle": "2026-02-12T07:59:07.571810Z", - "shell.execute_reply": "2026-02-12T07:59:07.570297Z" + "iopub.execute_input": "2026-04-19T09:32:25.598386Z", + "iopub.status.busy": "2026-04-19T09:32:25.598206Z", + "iopub.status.idle": "2026-04-19T09:32:25.605162Z", + "shell.execute_reply": "2026-04-19T09:32:25.603918Z" } }, "outputs": [ @@ -1580,13 +1575,13 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 17, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.581005Z", - "iopub.status.busy": "2026-02-12T07:59:07.579994Z", - "iopub.status.idle": "2026-02-12T07:59:07.603100Z", - "shell.execute_reply": "2026-02-12T07:59:07.602018Z" + "iopub.execute_input": "2026-04-19T09:32:25.608814Z", + "iopub.status.busy": "2026-04-19T09:32:25.608519Z", + "iopub.status.idle": "2026-04-19T09:32:28.134823Z", + "shell.execute_reply": "2026-04-19T09:32:28.133977Z" } }, "outputs": [ @@ -1594,7 +1589,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[OK] Live LLM smoke test passed: 'User says: \"Say hello in one word.\"'\n", + "[OK] Live LLM smoke test passed: 'The user asks: \"Say hello in one word'\n", "\n", "Live LLM ready (openai client -> https://openrouter.ai/api/v1)\n", " model: nvidia/nemotron-3-super-120b-a12b:free\n" @@ -1668,13 +1663,13 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 18, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:07.609620Z", - "iopub.status.busy": "2026-02-12T07:59:07.608112Z", - "iopub.status.idle": "2026-02-12T07:59:09.143370Z", - "shell.execute_reply": "2026-02-12T07:59:09.141411Z" + "iopub.execute_input": "2026-04-19T09:32:28.138289Z", + "iopub.status.busy": "2026-04-19T09:32:28.138020Z", + "iopub.status.idle": "2026-04-19T09:33:00.679220Z", + "shell.execute_reply": "2026-04-19T09:33:00.677605Z" } }, "outputs": [ @@ -1684,13 +1679,23 @@ "text": [ "============================================================\n", "LIVE LLM MODE (OpenRouter via openai client)\n", - "============================================================\n", + "============================================================\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", - "Live answer (574 chars):\n", - " Gradient descent is a first‑order iterative optimization algorithm used to find a (local) minimum of a differentiable multivariate function. At each step it computes the gradient of the function at the current point and moves in the opposite direction—that is, along the direction of steepest descent\n", + "Live answer (1279 chars):\n", + " Gradient descent is an optimization algorithm used to minimize a loss (or cost) function by iteratively moving the model’s parameters in the direction of steepest decrease of the function. \n", "\n", - "Spans: 14 trace_ids=1 root_invoke=True\n", - " gen_ai.provider.name = openrouter\n", + "At each step, the parameters θ are updated as \n", + "\n", + "\\[\n", + "\\theta \\leftarrow \\theta - \\eta \\,\\nabla_\\theta L(\\theta\n", + "\n", + "Spans: 11 trace_ids=1 root_invoke=True\n", " gen_ai.provider.name = openrouter\n", " gen_ai.provider.name = openrouter\n", " gen_ai.provider.name = openrouter\n", @@ -1767,13 +1772,13 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 19, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:09.152911Z", - "iopub.status.busy": "2026-02-12T07:59:09.151899Z", - "iopub.status.idle": "2026-02-12T07:59:09.728081Z", - "shell.execute_reply": "2026-02-12T07:59:09.727073Z" + "iopub.execute_input": "2026-04-19T09:33:00.682729Z", + "iopub.status.busy": "2026-04-19T09:33:00.682396Z", + "iopub.status.idle": "2026-04-19T09:37:56.539057Z", + "shell.execute_reply": "2026-04-19T09:37:56.538170Z" } }, "outputs": [ @@ -1781,7 +1786,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "optimize_graph: running baseline ...\n", + "optimize_graph: running baseline ...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "opto.trace.io.optimization: optimize_graph: running baseline ...\n" ] }, @@ -1793,16 +1804,34 @@ "LIVE OPTIMIZATION (3 queries, 1 iteration)\n", "============================================================\n", " planner_prompt BEFORE: 'Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.'\n", - " Running baseline...\n", - " Query 1/3: Summarize the causes and key events of t... score=0.35\n", - " Query 2/3: Give 3 factual relationships about Tesla... score=0.6\n" + " Running baseline...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 1/3: Summarize the causes and key events of t... score=0.6\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/3: Give 3 factual relationships about Tesla... score=0.85\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "optimize_graph: running iteration 1 ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "optimize_graph: running iteration 1 ...\n", "opto.trace.io.optimization: optimize_graph: running iteration 1 ...\n" ] }, @@ -1811,229 +1840,47 @@ "output_type": "stream", "text": [ " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.85\n", - " Baseline average: 0.6000\n", - " Iteration 1/1...\n", - " Query 1/3: Summarize the causes and key events of t... score=0.35\n", - " Query 2/3: Give 3 factual relationships about Tesla... score=0.85\n", - " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.85\n", - " Iteration 1 average: 0.6833 * NEW BEST\n" + " Baseline average: 0.7667\n", + " Iteration 1/1...\n" ] }, { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "Optimizer step failed: litellm.APIError: APIError: OpenAIException - \n", - "Traceback (most recent call last):\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 725, in completion\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 669, in completion\n", - " final_response_obj = convert_to_model_response_object(\n", - " response_object=stringified_response,\n", - " model_response_object=model_response,\n", - " _response_headers=headers,\n", - " )\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py\", line 488, in convert_to_model_response_object\n", - " raise raised_exception\n", - "Exception\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1973, in completion\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1946, in completion\n", - " response = openai_chat_completions.completion(\n", - " model=model,\n", - " ...<15 lines>...\n", - " custom_llm_provider=custom_llm_provider,\n", - " )\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 736, in completion\n", - " raise OpenAIError(\n", - " ...<4 lines>...\n", - " )\n", - "litellm.llms.openai.common_utils.OpenAIError\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/user/code/Trace/opto/trace/io/optimization.py\", line 550, in optimize_graph\n", - " raw_updates = _optimizer.step()\n", - " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 236, in step\n", - " update_dict = self.propose(*args, **kwargs)\n", - " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 281, in propose\n", - " return self._step(*args, **kwargs)\n", - " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 616, in _step\n", - " response = self.call_llm(\n", - " system_prompt=system_prompt,\n", - " ...<2 lines>...\n", - " max_tokens=self.max_tokens,\n", - " )\n", - " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 674, in call_llm\n", - " response = self.llm(messages=messages, max_tokens=max_tokens, response_format=response_format)\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 84, in __call__\n", - " return self.model(*args, **kwargs)\n", - " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 297, in \n", - " return lambda *args, **kwargs: self._model(*args, **kwargs)\n", - " ~~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 282, in \n", - " return lambda *args, **kwargs: retry_with_exponential_backoff(\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", - " lambda: litellm.completion(model_name, *args, **kwargs),\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " ...<2 lines>...\n", - " operation_name=\"LiteLLM_completion\"\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " )\n", - " ^\n", - " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 77, in retry_with_exponential_backoff\n", - " raise e\n", - " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 23, in retry_with_exponential_backoff\n", - " return func()\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 283, in \n", - " lambda: litellm.completion(model_name, *args, **kwargs),\n", - " ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1332, in wrapper\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1207, in wrapper\n", - " result = original_function(*args, **kwargs)\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 3452, in completion\n", - " raise exception_type(\n", - " ~~~~~~~~~~~~~~^\n", - " model=model,\n", - " ^^^^^^^^^^^^\n", - " ...<3 lines>...\n", - " extra_kwargs=kwargs,\n", - " ^^^^^^^^^^^^^^^^^^^^\n", - " )\n", - " ^\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 2301, in exception_type\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 528, in exception_type\n", - " raise APIError(\n", - " ...<6 lines>...\n", - " )\n", - "litellm.exceptions.APIError: litellm.APIError: APIError: OpenAIException - \n", - "opto.trace.io.optimization: Optimizer step failed: litellm.APIError: APIError: OpenAIException - \n", - "Traceback (most recent call last):\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 725, in completion\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 669, in completion\n", - " final_response_obj = convert_to_model_response_object(\n", - " response_object=stringified_response,\n", - " model_response_object=model_response,\n", - " _response_headers=headers,\n", - " )\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py\", line 488, in convert_to_model_response_object\n", - " raise raised_exception\n", - "Exception\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1973, in completion\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 1946, in completion\n", - " response = openai_chat_completions.completion(\n", - " model=model,\n", - " ...<15 lines>...\n", - " custom_llm_provider=custom_llm_provider,\n", - " )\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/llms/openai/openai.py\", line 736, in completion\n", - " raise OpenAIError(\n", - " ...<4 lines>...\n", - " )\n", - "litellm.llms.openai.common_utils.OpenAIError\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/user/code/Trace/opto/trace/io/optimization.py\", line 550, in optimize_graph\n", - " raw_updates = _optimizer.step()\n", - " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 236, in step\n", - " update_dict = self.propose(*args, **kwargs)\n", - " File \"/home/user/code/Trace/opto/optimizers/optimizer.py\", line 281, in propose\n", - " return self._step(*args, **kwargs)\n", - " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 616, in _step\n", - " response = self.call_llm(\n", - " system_prompt=system_prompt,\n", - " ...<2 lines>...\n", - " max_tokens=self.max_tokens,\n", - " )\n", - " File \"/home/user/code/Trace/opto/optimizers/optoprime_v2.py\", line 674, in call_llm\n", - " response = self.llm(messages=messages, max_tokens=max_tokens, response_format=response_format)\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 84, in __call__\n", - " return self.model(*args, **kwargs)\n", - " ~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 297, in \n", - " return lambda *args, **kwargs: self._model(*args, **kwargs)\n", - " ~~~~~~~~~~~^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 282, in \n", - " return lambda *args, **kwargs: retry_with_exponential_backoff(\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", - " lambda: litellm.completion(model_name, *args, **kwargs),\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " ...<2 lines>...\n", - " operation_name=\"LiteLLM_completion\"\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " )\n", - " ^\n", - " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 77, in retry_with_exponential_backoff\n", - " raise e\n", - " File \"/home/user/code/Trace/opto/utils/auto_retry.py\", line 23, in retry_with_exponential_backoff\n", - " return func()\n", - " File \"/home/user/code/Trace/opto/utils/llm.py\", line 283, in \n", - " lambda: litellm.completion(model_name, *args, **kwargs),\n", - " ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1332, in wrapper\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/utils.py\", line 1207, in wrapper\n", - " result = original_function(*args, **kwargs)\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/main.py\", line 3452, in completion\n", - " raise exception_type(\n", - " ~~~~~~~~~~~~~~^\n", - " model=model,\n", - " ^^^^^^^^^^^^\n", - " ...<3 lines>...\n", - " extra_kwargs=kwargs,\n", - " ^^^^^^^^^^^^^^^^^^^^\n", - " )\n", - " ^\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 2301, in exception_type\n", - " raise e\n", - " File \"/home/user/miniconda3/lib/python3.13/site-packages/litellm/litellm_core_utils/exception_mapping_utils.py\", line 528, in exception_type\n", - " raise APIError(\n", - " ...<6 lines>...\n", - " )\n", - "litellm.exceptions.APIError: litellm.APIError: APIError: OpenAIException - \n" + " Query 1/3: Summarize the causes and key events of t... score=0.6\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 2/3: Give 3 factual relationships about Tesla... score=0.85\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query 3/3: What is the Wikidata ID for CRISPR and l... score=0.85\n", + " Iteration 1 average: 0.7667\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "\u001b[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\u001b[0m\n", - "LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.\n", - "\n", - "\n", - "\u001b[1;31mProvider List: https://docs.litellm.ai/docs/providers\u001b[0m\n", - "\n", - "LiteLLM_completion: Non-retryable error: litellm.APIError: APIError: OpenAIException - \n", "\n", " planner_prompt AFTER: 'Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wikidata_researcher if IDs are requested.'\n", - " Baseline score: 0.6000\n", - " Best score: 0.6833\n", - " Score history: [0.6, 0.6833]\n", - " Total LLM calls: 29\n", + " Baseline score: 0.7667\n", + " Best score: 0.7667\n", + " Score history: [0.7667, 0.7667]\n", + " Total LLM calls: 32\n", "\n", "Iter Avg Score Best Score \n", "------------------------------\n", - "0 0.6000 0.6000 \n", - "1 0.6833 0.6833 \n", + "0 0.7667 0.7667 \n", + "1 0.7667 0.7667 \n", "\n", " Live OTLP: 0 spans, 0 trace IDs, root_invoke=False\n" ] @@ -2114,13 +1961,13 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 20, "metadata": { "execution": { - "iopub.execute_input": "2026-02-12T07:59:09.732598Z", - "iopub.status.busy": "2026-02-12T07:59:09.732598Z", - "iopub.status.idle": "2026-02-12T07:59:09.818823Z", - "shell.execute_reply": "2026-02-12T07:59:09.817814Z" + "iopub.execute_input": "2026-04-19T09:37:56.542223Z", + "iopub.status.busy": "2026-04-19T09:37:56.541953Z", + "iopub.status.idle": "2026-04-19T09:37:56.571430Z", + "shell.execute_reply": "2026-04-19T09:37:56.570551Z" } }, "outputs": [ diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb index a5993ee0..f34fa51d 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb @@ -2,18 +2,26 @@ "cells": [ { "cell_type": "markdown", + "id": "ec84920b", "metadata": {}, "source": [ "# LangGraph trace-native optimization demo\n", "\n", "Compact, deterministic demo for `backend=\"trace\"` without OTEL ingestion.\n" - ], - "id": "ec84920b" + ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 1, + "id": "a6bb3b02", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-19T09:38:01.915777Z", + "iopub.status.busy": "2026-04-19T09:38:01.915407Z", + "iopub.status.idle": "2026-04-19T09:38:04.431013Z", + "shell.execute_reply": "2026-04-19T09:38:04.430293Z" + } + }, "outputs": [], "source": [ "from langgraph.graph import StateGraph, START, END\n", @@ -22,13 +30,20 @@ "\n", "def _raw(value):\n", " return getattr(value, 'data', value)\n" - ], - "id": "a6bb3b02" + ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 2, + "id": "68f6f76b", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-19T09:38:04.433760Z", + "iopub.status.busy": "2026-04-19T09:38:04.433546Z", + "iopub.status.idle": "2026-04-19T09:38:04.438579Z", + "shell.execute_reply": "2026-04-19T09:38:04.437906Z" + } + }, "outputs": [], "source": [ "planner_prompt = node('Create a plan for: {query}', trainable=True, name='planner_prompt')\n", @@ -52,14 +67,32 @@ " g.add_edge('planner', 'synth')\n", " g.add_edge('synth', END)\n", " return g\n" - ], - "id": "68f6f76b" + ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "id": "9cb6347f", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-19T09:38:04.440522Z", + "iopub.status.busy": "2026-04-19T09:38:04.440330Z", + "iopub.status.idle": "2026-04-19T09:38:04.454739Z", + "shell.execute_reply": "2026-04-19T09:38:04.453807Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Answer: What is CRISPR?\\nPlan: Create a plan for: What is CRISPR?'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ig = instrument_graph(\n", " backend='trace',\n", @@ -72,14 +105,32 @@ "result = ig.invoke({'query': 'What is CRISPR?'})\n", "assert 'CRISPR' in result['final_answer'].data\n", "result['final_answer'].data\n" - ], - "id": "9cb6347f" + ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "id": "6f15abf5", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-19T09:38:04.457142Z", + "iopub.status.busy": "2026-04-19T09:38:04.456930Z", + "iopub.status.idle": "2026-04-19T09:38:04.482135Z", + "shell.execute_reply": "2026-04-19T09:38:04.481453Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1.0, {'synth_prompt': 'CRISPR optimized :: {query} :: {plan}'})" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "class NotebookOptimizer:\n", " def __init__(self, prompt):\n", @@ -117,8 +168,7 @@ "assert opt.best_score == 1.0\n", "assert opt.best_updates['synth_prompt'].startswith('CRISPR optimized')\n", "opt.best_score, opt.best_updates\n" - ], - "id": "6f15abf5" + ] } ], "metadata": { @@ -128,10 +178,18 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/opto/features/graph/__init__.py b/opto/features/graph/__init__.py new file mode 100644 index 00000000..da59b890 --- /dev/null +++ b/opto/features/graph/__init__.py @@ -0,0 +1,12 @@ +from opto.features.graph.sidecars import GraphRunSidecar, OTELRunSidecar, GraphCandidateSnapshot +from opto.features.graph.adapter import GraphAdapter, LangGraphAdapter +from opto.features.graph.module import GraphModule + +__all__ = [ + "GraphRunSidecar", + "OTELRunSidecar", + "GraphCandidateSnapshot", + "GraphAdapter", + "LangGraphAdapter", + "GraphModule", +] diff --git a/opto/trace/graph/adapter.py b/opto/features/graph/adapter.py similarity index 94% rename from opto/trace/graph/adapter.py rename to opto/features/graph/adapter.py index 6c40e359..b1b97aab 100644 --- a/opto/trace/graph/adapter.py +++ b/opto/features/graph/adapter.py @@ -7,10 +7,10 @@ from opto.trace import bundle, node from opto.trace.bundle import FunModule, to_data -from opto.trace.graph.module import GraphModule -from opto.trace.graph.sidecars import GraphRunSidecar, OTELRunSidecar +from opto.features.graph.module import GraphModule +from opto.features.graph.sidecars import GraphRunSidecar, OTELRunSidecar from opto.trace.io.bindings import Binding -from opto.trace.io.graph_instrumentation import TraceGraph +from opto.features.graph.graph_instrumentation import TraceGraph from opto.trace.nodes import Node, ParameterNode @@ -77,14 +77,17 @@ def as_module(self) -> GraphModule: def instrument(self, backend: Optional[str] = None, **kwargs: Any): effective_backend = backend or self.backend + service_name = kwargs.pop("service_name", self.service_name) + input_key = kwargs.pop("input_key", self.input_key) + output_key = kwargs.pop("output_key", self.output_key) if effective_backend == "trace": return TraceGraph( graph=self, parameters=self.parameters(), bindings=self.bindings_dict(), - service_name=self.service_name, - input_key=self.input_key, - output_key=self.output_key, + service_name=service_name, + input_key=input_key, + output_key=output_key, ) if effective_backend == "otel": from opto.trace.io.instrumentation import instrument_graph @@ -96,9 +99,9 @@ def instrument(self, backend: Optional[str] = None, **kwargs: Any): graph=graph, backend="otel", bindings=merged, - service_name=self.service_name, - input_key=self.input_key, - output_key=self.output_key, + service_name=service_name, + input_key=input_key, + output_key=output_key, **kwargs, ) raise ValueError(f"Unsupported backend: {effective_backend!r}") diff --git a/opto/trace/io/graph_instrumentation.py b/opto/features/graph/graph_instrumentation.py similarity index 80% rename from opto/trace/io/graph_instrumentation.py rename to opto/features/graph/graph_instrumentation.py index 812c5480..e38f1b03 100644 --- a/opto/trace/io/graph_instrumentation.py +++ b/opto/features/graph/graph_instrumentation.py @@ -6,8 +6,9 @@ from dataclasses import dataclass, field from typing import Any, Callable, Dict, List, Optional -from opto.trace import bundle, node +from opto.trace import node from opto.trace.bundle import FunModule +from opto.trace.io.bindings import Binding from opto.trace.io.observers import GraphObserver @@ -30,10 +31,13 @@ class TraceGraph: _last_sidecar: Any = field(default=None, repr=False, init=False) observers: List[GraphObserver] = field(default_factory=list) _last_observer_artifacts: List[Any] = field(default_factory=list, init=False, repr=False) + observer_meta: Dict[str, Any] = field(default_factory=dict) def invoke(self, state: Any, **kwargs: Any) -> Any: for obs in self.observers: - obs.start(bindings=self.bindings, meta={"service_name": self.service_name}) + meta = {"service_name": self.service_name} + meta.update(self.observer_meta) + obs.start(bindings=self.bindings, meta=meta) result = None error = None @@ -74,15 +78,18 @@ def _to_funmodule( trainable: bool = True, traceable_code: bool = True, allow_external_dependencies: bool = True, + scope: Optional[Dict[str, Any]] = None, ) -> Any: if isinstance(func, FunModule) or hasattr(func, "_fun"): return func - wrapped = bundle( + wrapped = FunModule( + fun=func, trainable=trainable, traceable_code=traceable_code, allow_external_dependencies=allow_external_dependencies, - )(func) + _ldict=(scope or {}), + ) try: wrapped.__signature__ = inspect.signature(wrapped._fun) @@ -125,6 +132,7 @@ def instrument_trace_graph( raise ValueError("backend='trace' requires a callable graph_factory") parameters: List[Any] = [] + bindings: Dict[str, Binding] = {} for name in graph_agents_functions: if name not in scope: @@ -138,6 +146,7 @@ def instrument_trace_graph( trainable=train_graph_agents_functions, traceable_code=True, allow_external_dependencies=True, + scope=scope, ) scope[name] = wrapped if hasattr(wrapped, "parameters"): @@ -147,6 +156,13 @@ def instrument_trace_graph( for idx, prompt in enumerate(list(graph_prompts_list)): if hasattr(prompt, "data") and hasattr(prompt, "name"): parameters.append(prompt) + prompt_name = str(getattr(prompt, "name", f"prompt_{idx}")).split(":")[0] + if hasattr(prompt, "_set"): + bindings[prompt_name] = Binding( + get=lambda p=prompt: p.data, + set=lambda v, p=prompt: p._set(v), + kind="prompt", + ) continue new_prompt = node(str(getattr(prompt, "data", prompt)), trainable=True) @@ -158,6 +174,13 @@ def instrument_trace_graph( graph_prompts_list[idx] = new_prompt parameters.append(new_prompt) + prompt_name = str(getattr(new_prompt, "name", f"prompt_{idx}")).split(":")[0] + if hasattr(new_prompt, "_set"): + bindings[prompt_name] = Binding( + get=lambda p=new_prompt: p.data, + set=lambda v, p=new_prompt: p._set(v), + kind="prompt", + ) graph = graph_factory() compiled = graph.compile() if hasattr(graph, "compile") else graph @@ -165,8 +188,11 @@ def instrument_trace_graph( return TraceGraph( graph=compiled, parameters=_dedupe_identity(parameters), - bindings={}, + bindings=bindings, service_name=service_name, input_key=input_key, output_key=output_key, + observer_meta={ + "semantic_names": [str(name).split(".")[-1] for name in (graph_agents_functions or [])] + }, ) diff --git a/opto/trace/graph/module.py b/opto/features/graph/module.py similarity index 94% rename from opto/trace/graph/module.py rename to opto/features/graph/module.py index a3ac8646..eef3b4cd 100644 --- a/opto/trace/graph/module.py +++ b/opto/features/graph/module.py @@ -5,7 +5,7 @@ from opto.trace.modules import Module if TYPE_CHECKING: - from opto.trace.graph.adapter import GraphAdapter + from opto.features.graph.adapter import GraphAdapter class GraphModule(Module): diff --git a/opto/trace/graph/sidecars.py b/opto/features/graph/sidecars.py similarity index 100% rename from opto/trace/graph/sidecars.py rename to opto/features/graph/sidecars.py diff --git a/opto/trace/bundle.py b/opto/trace/bundle.py index a6595b72..f3817632 100644 --- a/opto/trace/bundle.py +++ b/opto/trace/bundle.py @@ -546,6 +546,20 @@ def postprocess_output(self, output, fun, _args, _kwargs, used_nodes, inputs): ) # We don't need to keep track of the inputs if we are not tracing. # Wrap the output as a MessageNode or an ExceptionNode nodes = self.wrap(output, inputs, external_dependencies) + try: + from opto.trace.io.telemetry_session import TelemetrySession + + session = TelemetrySession.current() + if session is not None and isinstance(nodes, MessageNode): + observer_inputs = dict(inputs) + for idx, dep in enumerate(external_dependencies): + observer_inputs.setdefault( + getattr(dep, "name", f"dep_{idx}"), + dep, + ) + session.on_message_node_created(nodes, inputs=observer_inputs) + except Exception: + pass return nodes def forward(self, *args, **kwargs): @@ -567,14 +581,28 @@ def sync_forward(self, fun, *args, **kwargs): """ # Wrap the inputs as nodes inputs, args, kwargs, _args, _kwargs = self._wrap_inputs(fun, args, kwargs) - # Execute fun - with trace_nodes() as used_nodes: - # After exit, used_nodes contains the nodes whose data attribute is read in the operator fun. - _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) - output = self.sync_call_fun(fun, *_args, **_kwargs) - # Wrap the output as a MessageNode or an ExceptionNode - nodes = self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) - return nodes + try: + from opto.trace.io.telemetry_session import TelemetrySession + + session = TelemetrySession.current() + except Exception: + session = None + + if session is None: + with trace_nodes() as used_nodes: + _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) + output = self.sync_call_fun(fun, *_args, **_kwargs) + return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) + + with session.bundle_span( + fun_name=self.info["fun_name"], + file_path=self.info["file"], + inputs=inputs, + ): + with trace_nodes() as used_nodes: + _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) + output = self.sync_call_fun(fun, *_args, **_kwargs) + return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) async def async_forward(self, fun, *args, **kwargs): """ @@ -585,16 +613,28 @@ async def async_forward(self, fun, *args, **kwargs): """ # Wrap the inputs as nodes inputs, args, kwargs, _args, _kwargs = self._wrap_inputs(fun, args, kwargs) - # Execute fun - with trace_nodes() as used_nodes: - # After exit, used_nodes contains the nodes whose data attribute is read in the operator fun. - _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) - output = await self.async_call_fun( - fun, *_args, **_kwargs - ) # use await to call the async function - # Wrap the output as a MessageNode or an ExceptionNode - nodes = self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) - return nodes + try: + from opto.trace.io.telemetry_session import TelemetrySession + + session = TelemetrySession.current() + except Exception: + session = None + + if session is None: + with trace_nodes() as used_nodes: + _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) + output = await self.async_call_fun(fun, *_args, **_kwargs) + return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) + + with session.bundle_span( + fun_name=self.info["fun_name"], + file_path=self.info["file"], + inputs=inputs, + ): + with trace_nodes() as used_nodes: + _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) + output = await self.async_call_fun(fun, *_args, **_kwargs) + return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) def wrap( self, diff --git a/opto/trace/graph/__init__.py b/opto/trace/graph/__init__.py deleted file mode 100644 index 1e60ae64..00000000 --- a/opto/trace/graph/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from opto.trace.graph.sidecars import GraphRunSidecar, OTELRunSidecar, GraphCandidateSnapshot -from opto.trace.graph.adapter import GraphAdapter, LangGraphAdapter -from opto.trace.graph.module import GraphModule - -__all__ = [ - "GraphRunSidecar", - "OTELRunSidecar", - "GraphCandidateSnapshot", - "GraphAdapter", - "LangGraphAdapter", - "GraphModule", -] diff --git a/opto/trace/io/__init__.py b/opto/trace/io/__init__.py index 90960aa7..44e6df32 100644 --- a/opto/trace/io/__init__.py +++ b/opto/trace/io/__init__.py @@ -22,7 +22,7 @@ # -- high-level API -------------------------------------------------------- from opto.trace.io.instrumentation import instrument_graph, InstrumentedGraph, SysMonInstrumentedGraph -from opto.trace.io.graph_instrumentation import instrument_trace_graph, TraceGraph +from opto.features.graph.graph_instrumentation import instrument_trace_graph, TraceGraph from opto.trace.io.optimization import ( optimize_graph, EvalResult, @@ -54,7 +54,7 @@ from opto.trace.io.observers import ObserverArtifact, GraphObserver, OTelObserver from opto.trace.io.sysmonitoring import SysMonitoringSession, SysMonObserver, sysmon_profile_to_tgj try: - from opto.trace.graph import ( + from opto.features.graph import ( GraphAdapter, LangGraphAdapter, GraphModule, diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py index 944ed41b..8f5d89f4 100644 --- a/opto/trace/io/instrumentation.py +++ b/opto/trace/io/instrumentation.py @@ -16,7 +16,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Set from opto.trace.io.bindings import Binding, make_dict_binding -from opto.trace.io.graph_instrumentation import instrument_trace_graph +from opto.features.graph.graph_instrumentation import instrument_trace_graph from opto.trace.io.langgraph_otel_runtime import TracingLLM from opto.trace.io.observers import GraphObserver, OTelObserver from opto.trace.io.sysmonitoring import SysMonObserver, SysMonitoringSession @@ -235,7 +235,7 @@ def instrument_graph( InstrumentedGraph """ try: - from opto.trace.graph.adapter import GraphAdapter + from opto.features.graph.adapter import GraphAdapter except Exception: GraphAdapter = None diff --git a/opto/trace/io/langgraph_otel_runtime.py b/opto/trace/io/langgraph_otel_runtime.py index dc0addab..01010ed7 100644 --- a/opto/trace/io/langgraph_otel_runtime.py +++ b/opto/trace/io/langgraph_otel_runtime.py @@ -375,6 +375,54 @@ def node_call( return content + def template_prompt_call( + self, + *, + span_name: str, + template_name: str, + template: str, + variables: Optional[Mapping[str, Any]] = None, + system_prompt: Optional[str] = None, + optimizable_key: Optional[str] = None, + code_key: Optional[str] = None, + code_fn: Any = None, + user_query: Optional[str] = None, + extra_inputs: Optional[Dict[str, Any]] = None, + **llm_kwargs: Any, + ) -> str: + """Render a prompt template, then forward to ``node_call``.""" + rendered_vars = { + key: str(getattr(value, "data", value)) + for key, value in (variables or {}).items() + } + prompt = template.format(**rendered_vars) + + messages: List[Dict[str, Any]] = [] + if system_prompt is not None: + messages.append({"role": "system", "content": system_prompt}) + messages.append({"role": "user", "content": prompt}) + + merged_inputs: Dict[str, str] = { + key: str(value) for key, value in (extra_inputs or {}).items() + } + for key, value in rendered_vars.items(): + merged_inputs.setdefault(key, value) + if user_query is None and "query" in rendered_vars: + user_query = rendered_vars["query"] + + return self.node_call( + span_name=span_name, + template_name=template_name, + template=template, + optimizable_key=optimizable_key, + code_key=code_key, + code_fn=code_fn, + user_query=user_query, + extra_inputs=merged_inputs, + messages=messages, + **llm_kwargs, + ) + DEFAULT_EVAL_METRIC_KEYS: Mapping[str, str] = { "score": "eval.score", diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index e5a3ea04..9486c95c 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -27,7 +27,7 @@ ) from opto.trace.io.bindings import Binding, apply_updates -from opto.trace.io.graph_instrumentation import TraceGraph +from opto.features.graph.graph_instrumentation import TraceGraph from opto.trace.io.instrumentation import InstrumentedGraph, SysMonInstrumentedGraph from opto.trace.io.sysmonitoring import sysmon_profile_to_tgj @@ -819,8 +819,13 @@ def _snapshot_parameters_from_bindings(bindings_dict: Dict[str, Binding]) -> Dic ) ) - merged_doc = merge_tgj(docs) if len(docs) > 1 else docs[0] - nodes = ingest_tgj(merged_doc) + if len(docs) > 1: + merged_docs = merge_tgj(docs) + nodes = {} + for doc_nodes in merged_docs.values(): + nodes.update(doc_nodes) + else: + nodes = ingest_tgj(docs[0]) avg_score = sum((r.score or 0.0) for r in runs) / max(1, len(runs)) score_history.append(avg_score) diff --git a/opto/trace/io/sysmonitoring.py b/opto/trace/io/sysmonitoring.py index 78deb67d..bbf2fdff 100644 --- a/opto/trace/io/sysmonitoring.py +++ b/opto/trace/io/sysmonitoring.py @@ -36,19 +36,50 @@ def __init__(self, tool_id: int = 7, service_name: str = "langgraph-sysmon") -> self._tls = threading.local() self._bindings_snapshot: Dict[str, Dict[str, Any]] = {} + def _claim_tool_id(self) -> int: + """Claim a valid sys.monitoring tool id. + + Python accepts tool ids in a small runtime-defined range (commonly 0..5). + If the configured id is invalid or already taken, fall back to the first + available id in that valid range. + """ + candidate_ids = [self.tool_id] + [i for i in range(5, -1, -1) if i != self.tool_id] + for candidate in candidate_ids: + try: + sys.monitoring.use_tool_id(candidate, self.service_name) + self.tool_id = candidate + return candidate + except Exception: + continue + raise RuntimeError("Unable to claim a valid sys.monitoring tool id") + def _stack(self) -> List[SysMonEvent]: if not hasattr(self._tls, "stack"): self._tls.stack = [] return self._tls.stack - def start(self, *, bindings: Dict[str, Any]) -> None: + def start( + self, + *, + bindings: Dict[str, Any], + meta: Optional[Dict[str, Any]] = None, + ) -> None: self._events.clear() self._bindings_snapshot = { k: {"value": b.get(), "kind": b.kind, "trainable": True} for k, b in (bindings or {}).items() } + semantic_names = set((meta or {}).get("semantic_names") or ()) + + def _safe_preview(value: Any) -> str: + try: + return repr(value)[:200] + except Exception: + return f"<{type(value).__name__}>" def on_start(code, instruction_offset): + if semantic_names and code.co_name not in semantic_names: + return stack = self._stack() eid = uuid.uuid4().hex[:16] ev = SysMonEvent( @@ -65,27 +96,27 @@ def on_start(code, instruction_offset): def on_return(code, instruction_offset, retval): stack = self._stack() - if not stack: + if not stack or stack[-1].name != code.co_name: return ev = stack.pop() ev.end_ns = time.perf_counter_ns() ev.duration_ns = ev.end_ns - ev.start_ns - ev.return_preview = repr(retval)[:200] + ev.return_preview = _safe_preview(retval) def on_unwind(code, instruction_offset, exc): stack = self._stack() - if not stack: + if not stack or stack[-1].name != code.co_name: return ev = stack.pop() ev.end_ns = time.perf_counter_ns() ev.duration_ns = ev.end_ns - ev.start_ns - ev.return_preview = f"[UNWIND] {type(exc).__name__}: {exc}" + ev.return_preview = f"[UNWIND] {type(exc).__name__}: {_safe_preview(exc)}" self._on_start = on_start self._on_return = on_return self._on_unwind = on_unwind - sys.monitoring.use_tool_id(self.tool_id, self.service_name) + self._claim_tool_id() sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_START, on_start) sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_RETURN, on_return) sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_UNWIND, on_unwind) @@ -153,7 +184,7 @@ def start( bindings: Dict[str, Any], meta: Optional[Dict[str, Any]] = None, ) -> None: - self.session.start(bindings=bindings) + self.session.start(bindings=bindings, meta=meta) def stop( self, @@ -188,7 +219,7 @@ def sysmon_profile_to_tgj( for ev in doc.get("events", []): inputs = {} if ev.get("parent_id"): - inputs["parent"] = f"message:msg:{ev['parent_id']}" + inputs["parent"] = {"ref": f"msg:{ev['parent_id']}"} nodes[f"msg:{ev['id']}"] = { "id": f"msg:{ev['id']}", "kind": "message", diff --git a/opto/trace/io/telemetry_session.py b/opto/trace/io/telemetry_session.py index 8f190f39..06ef1567 100644 --- a/opto/trace/io/telemetry_session.py +++ b/opto/trace/io/telemetry_session.py @@ -369,6 +369,14 @@ def on_message_node_created(self, node: Any, *, inputs: Optional[Dict[str, Any]] try: ctx = cur.get_span_context() if getattr(ctx, "is_valid", False) and cur.is_recording(): + if inputs: + in_attrs, p_attrs = self._inputs_and_params_from_trace_inputs(inputs) + for attrs in (in_attrs, p_attrs): + for key, value in attrs.items(): + try: + cur.set_attribute(key, value) + except Exception: + cur.set_attribute(key, str(value)) cur.set_attribute("message.id", str(getattr(node, "name", ""))) self._remember_node_span(node, cur) return diff --git a/tests/features_tests/test_graph_module_prioritysearch.py b/tests/features_tests/test_graph_module_prioritysearch.py index 1839d28a..7fa5d13d 100644 --- a/tests/features_tests/test_graph_module_prioritysearch.py +++ b/tests/features_tests/test_graph_module_prioritysearch.py @@ -8,7 +8,7 @@ from opto.features.priority_search import PrioritySearch from opto.optimizers.optimizer import Optimizer from opto.trace import node -from opto.trace.graph import LangGraphAdapter +from opto.features.graph import LangGraphAdapter from opto.trainer.guide import Guide diff --git a/tests/features_tests/test_graph_module_train.py b/tests/features_tests/test_graph_module_train.py index e691e85f..a1c4d840 100644 --- a/tests/features_tests/test_graph_module_train.py +++ b/tests/features_tests/test_graph_module_train.py @@ -7,7 +7,7 @@ from opto.optimizers.optimizer import Optimizer from opto.trace import node -from opto.trace.graph import LangGraphAdapter +from opto.features.graph import LangGraphAdapter from opto.trainer.train import train from opto.trainer.guide import Guide diff --git a/tests/features_tests/test_sysmon_backend.py b/tests/features_tests/test_sysmon_backend.py index b1fdcbe3..72f36212 100644 --- a/tests/features_tests/test_sysmon_backend.py +++ b/tests/features_tests/test_sysmon_backend.py @@ -6,18 +6,37 @@ START = langgraph.START END = langgraph.END -from opto.trace.io import instrument_graph, optimize_graph, SysMonInstrumentedGraph +from opto.trace.io import ( + instrument_graph, + optimize_graph, + SysMonInstrumentedGraph, + make_dict_binding, +) +from opto.trace.io.sysmonitoring import sysmon_profile_to_tgj +from opto.trace.io.tgj_ingest import ingest_tgj pytestmark = pytest.mark.skipif(not hasattr(sys, "monitoring"), reason="sys.monitoring unavailable") -def build_graph(): +def build_graph(templates=None): + templates = templates or { + "planner_prompt": "Plan {query}", + "synth_prompt": "answer::{query}::{plan}", + } + def planner(state): - return {"plan": f"plan::{state['query']}"} + return { + "query": state["query"], + "plan": templates["planner_prompt"].replace("{query}", str(state["query"])), + } def synth(state): - return {"final_answer": f"answer::{state['query']}::{state['plan']}"} + return { + "final_answer": templates["synth_prompt"] + .replace("{query}", str(state["query"])) + .replace("{plan}", str(state["plan"])), + } graph = StateGraph(dict) graph.add_node("planner", planner) @@ -32,7 +51,7 @@ def test_sysmon_backend_invoke_exports_profile_doc(): ig = instrument_graph( graph=build_graph(), backend="sysmon", - initial_templates={"planner_prompt": "Plan {query}"}, + initial_templates={"planner_prompt": "Plan {query}", "synth_prompt": "answer::{query}::{plan}"}, output_key="final_answer", ) assert isinstance(ig, SysMonInstrumentedGraph) @@ -42,21 +61,60 @@ def test_sysmon_backend_invoke_exports_profile_doc(): assert len(ig._last_profile_doc["events"]) > 0 -def test_sysmon_backend_optimize_baseline_only(): +class _DictUpdateOptimizer: + def __init__(self): + self.calls = 0 + + def zero_feedback(self): + return None + + def backward(self, *_args, **_kwargs): + return None + + def step(self): + self.calls += 1 + if self.calls == 1: + return {"synth_prompt": "CRISPR optimized :: {query} :: {plan}"} + return {} + + +def test_sysmon_profile_to_tgj_preserves_parent_chain(): + profile_doc = { + "version": "trace-json/1.0+sysmon", + "agent": {"id": "demo"}, + "bindings": {}, + "events": [ + {"id": "p", "parent_id": None, "name": "planner", "file": "demo.py", "lineno": 1}, + {"id": "c", "parent_id": "p", "name": "synth", "file": "demo.py", "lineno": 2}, + ], + } + tgj = sysmon_profile_to_tgj(profile_doc, run_id="r", graph_id="g", scope="demo/0") + mp = ingest_tgj(tgj) + assert mp["synth"].parents[0] is mp["planner"] + + +def test_sysmon_backend_optimize_applies_binding_updates(): + templates = { + "planner_prompt": "Plan {query}", + "synth_prompt": "answer::{query}::{plan}", + } + bindings = {k: make_dict_binding(templates, k, kind="prompt") for k in templates} ig = instrument_graph( - graph=build_graph(), + graph=build_graph(templates), backend="sysmon", - initial_templates={"planner_prompt": "Plan {query}"}, + bindings=bindings, output_key="final_answer", ) result = optimize_graph( ig, queries=["What is CRISPR?"], - iterations=0, + iterations=2, + optimizer=_DictUpdateOptimizer(), eval_fn=lambda payload: { - "score": 1.0 if "CRISPR" in str(payload["answer"]) else 0.0, - "feedback": "Keep CRISPR in the answer.", + "score": 1.0 if "CRISPR optimized" in str(payload["answer"]) else 0.0, + "feedback": "Use the optimized synth prompt.", }, ) - assert result.best_iteration == 0 + assert result.best_iteration == 2 assert result.best_score == 1.0 + assert templates["synth_prompt"].startswith("CRISPR optimized") diff --git a/tests/unit_tests/test_graph_adapter_modulecandidate.py b/tests/unit_tests/test_graph_adapter_modulecandidate.py index 6f0c7771..7dd68ea4 100644 --- a/tests/unit_tests/test_graph_adapter_modulecandidate.py +++ b/tests/unit_tests/test_graph_adapter_modulecandidate.py @@ -8,7 +8,7 @@ from opto.features.priority_search.priority_search import ModuleCandidate from opto.optimizers.optimizer import Optimizer from opto.trace import node -from opto.trace.graph import LangGraphAdapter +from opto.features.graph import LangGraphAdapter def _raw(x): diff --git a/tests/unit_tests/test_graph_adapter_trace.py b/tests/unit_tests/test_graph_adapter_trace.py index 03ba97a3..086064a2 100644 --- a/tests/unit_tests/test_graph_adapter_trace.py +++ b/tests/unit_tests/test_graph_adapter_trace.py @@ -6,8 +6,8 @@ END = langgraph.END from opto.trace import node -from opto.trace.graph import GraphModule, GraphRunSidecar, LangGraphAdapter -from opto.trace.io import TraceGraph, instrument_graph, optimize_graph +from opto.features.graph import GraphModule, GraphRunSidecar, LangGraphAdapter +from opto.trace.io import TraceGraph, InstrumentedGraph, instrument_graph, optimize_graph def _raw(x): @@ -35,7 +35,10 @@ def make_adapter(): def planner_node(state): query = _raw(state["query"]) - return {"plan": planner_prompt.data.replace("{query}", str(query))} + return { + "query": query, + "plan": planner_prompt.data.replace("{query}", str(query)), + } def synth_node(state): query = _raw(state["query"]) @@ -125,3 +128,21 @@ def test_instrument_graph_accepts_graph_argument_when_it_is_a_graph_adapter(): out = graph.invoke({"query": "What is CRISPR?"}) assert isinstance(out, dict) assert "final_answer" in out + + +def test_adapter_dispatch_respects_service_override_in_trace_and_otel_modes(): + adapter = make_adapter() + + trace_graph = instrument_graph( + graph=adapter, + backend="trace", + service_name="trace-override", + ) + assert isinstance(trace_graph, TraceGraph) + assert trace_graph.service_name == "trace-override" + + otel_graph = instrument_graph(graph=adapter, backend="otel", service_name="otel-override") + assert isinstance(otel_graph, InstrumentedGraph) + assert otel_graph.service_name == "otel-override" + out = otel_graph.invoke({"query": "What is CRISPR?"}) + assert "final_answer" in out diff --git a/tests/unit_tests/test_graph_observers.py b/tests/unit_tests/test_graph_observers.py index 5e8dd677..ff86b932 100644 --- a/tests/unit_tests/test_graph_observers.py +++ b/tests/unit_tests/test_graph_observers.py @@ -22,7 +22,10 @@ def _make_trace_graph(): def planner_node(state): query = _raw(state["query"]) - return {"plan": planner_prompt.data.replace("{query}", str(query))} + return { + "query": query, + "plan": planner_prompt.data.replace("{query}", str(query)), + } def synth_node(state): query = _raw(state["query"]) diff --git a/tests/unit_tests/test_langgraph_otel_runtime.py b/tests/unit_tests/test_langgraph_otel_runtime.py index 9dc4d05b..94b9c34e 100644 --- a/tests/unit_tests/test_langgraph_otel_runtime.py +++ b/tests/unit_tests/test_langgraph_otel_runtime.py @@ -178,3 +178,40 @@ def test_extract_eval_metrics_from_otlp_defaults_when_missing(): for v in metrics.values(): assert 0.0 <= v <= 1.0 assert reasons == "" + + +def test_template_prompt_call_records_raw_template_and_rendered_prompt(): + tracer, exporter = init_otel_runtime("test-template-helper") + llm = FakeLLM("ANSWER") + tllm = TracingLLM( + llm=llm, + tracer=tracer, + trainable_keys={"planner"}, + emit_llm_child_span=False, + ) + + result = tllm.template_prompt_call( + span_name="planner", + template_name="planner_prompt", + template="Plan for: {query}", + variables={"query": "What is CRISPR?"}, + system_prompt="sys", + optimizable_key="planner", + ) + + assert result == "ANSWER" + assert len(llm.calls) == 1 + assert llm.calls[0]["messages"] == [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "Plan for: What is CRISPR?"}, + ] + + otlp = flush_otlp(exporter, scope_name="test-template-helper") + spans = otlp["resourceSpans"][0]["scopeSpans"][0]["spans"] + assert len(spans) == 1 + + attrs = _attrs_to_dict(spans[0]["attributes"]) + assert attrs["param.planner_prompt"] == "Plan for: {query}" + assert attrs["inputs.gen_ai.prompt"] == "Plan for: What is CRISPR?" + assert attrs["inputs.query"] == "What is CRISPR?" + assert attrs["inputs.user_query"] == "What is CRISPR?" From e2ad5ce9c5d91ff1e4f5f4908b43c0654d0c74fc Mon Sep 17 00:00:00 2001 From: doxav Date: Mon, 20 Apr 2026 22:35:55 +0200 Subject: [PATCH 06/16] improved comparisons --- ...aph_instrument_and_compare_observers.ipynb | 2267 ++++++++++++++--- ...ggraph_instrument_and_compare_observers.py | 778 ++++-- opto/trace/io/instrumentation.py | 21 +- opto/trace/io/optimization.py | 4 +- opto/trace/io/tgj_export.py | 3 + tests/features_tests/test_sysmon_backend.py | 3 + .../test_trace_graph_optimization.py | 13 +- 7 files changed, 2581 insertions(+), 508 deletions(-) diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb index b60268bb..7b5e132c 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -9,7 +9,7 @@ "source": [ "# LangGraph live optimization comparison across Trace / OTEL / sys.monitoring\n", "\n", - "This notebook runs the live comparison script.\n", + "This notebook runs the live comparison module and renders a shared analysis report.\n", "\n", "- It uses the OpenRouter API when `OPENROUTER_API_KEY` is set.\n", "- It defaults to `OPENROUTER_MODEL=gpt-4o-mini` when the model env var is absent.\n", @@ -22,336 +22,1967 @@ "id": "run-live-compare-script", "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T20:13:07.481092Z", - "iopub.status.busy": "2026-04-19T20:13:07.480782Z", - "iopub.status.idle": "2026-04-19T20:24:26.123314Z", - "shell.execute_reply": "2026-04-19T20:24:26.122779Z" + "iopub.execute_input": "2026-04-20T19:43:28.048220Z", + "iopub.status.busy": "2026-04-20T19:43:28.047990Z", + "iopub.status.idle": "2026-04-20T19:53:35.512496Z", + "shell.execute_reply": "2026-04-20T19:53:35.511662Z" }, "language": "python" }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "================================================================================\n", - "LangGraph live optimization comparison\n", - "================================================================================\n", - "Python 3.13\n", - "sys.monitoring available: True\n", - "OPENROUTER_MODEL=gpt-4o-mini\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Running baseline...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=0.0\n" - ] + "data": { + "text/markdown": [ + "## Optimization comparison\n", + "\n", + "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | score_history |\n", + "|---|---:|---:|---:|---:|---:|---:|---|\n", + "| trace | 71.749 | 0.752 | 0.928 | 0.176 | 4 | 0.003 | [0.752, 0.805, 0.92, 0.898, 0.928, 0.923] |\n", + "| trace+otel | 76.288 | 0.822 | 0.950 | 0.128 | 2 | 0.025 | [0.822, 0.715, 0.95, 0.937, 0.907, 0.885] |\n", + "| otel | 80.465 | 0.669 | 0.950 | 0.281 | 4 | 0.014 | [0.669, 0.842, 0.92, 0.907, 0.95, 0.922] |\n", + "| trace+sysmon | 78.316 | 0.732 | 0.923 | 0.192 | 4 | 0.022 | [0.732, 0.714, 0.92, 0.92, 0.923, 0.88] |\n", + "| trace+otel+sysmon | 77.963 | 0.714 | 0.923 | 0.210 | 3 | 0.018 | [0.714, 0.712, 0.92, 0.923, 0.88, 0.898] |\n", + "| otel+sysmon | 93.479 | 0.842 | 0.950 | 0.108 | 2 | 0.023 | [0.842, 0.715, 0.95, 0.923, 0.923, 0.887] |\n", + "| sysmon | 77.312 | 0.822 | 0.944 | 0.122 | 2 | 0.026 | [0.822, 0.669, 0.944, 0.923, 0.88, 0.887] |" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", - " Baseline average: 0.0000\n", - " Iteration 1/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=0.0\n" - ] + { + "data": { + "text/markdown": [ + "## trace\n", + "- Runtime: `71.749s`\n", + "- Baseline score: `0.752`\n", + "- Best score: `0.928`\n", + "- Score gain: `0.176`\n", + "- Best iteration: `4`\n", + "- Post-update stability std: `0.003`\n", + "- Score history: `[0.752, 0.805, 0.92, 0.898, 0.928, 0.923]`\n", + "- Best updates: `[]`\n", + "\n", + "### Final synth prompt\n", + "```text\n", + "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final answer\n", + "```text\n", + "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "\n", + "### Mechanism of CRISPR\n", + "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", + "**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the tar...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### backend trace\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "state\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "state->make_trace_case.planner_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e17ef0>}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node->make_trace_case.synth_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## trace+otel\n", + "- Runtime: `76.288s`\n", + "- Baseline score: `0.822`\n", + "- Best score: `0.950`\n", + "- Score gain: `0.128`\n", + "- Best iteration: `2`\n", + "- Post-update stability std: `0.025`\n", + "- Score history: `[0.822, 0.715, 0.95, 0.937, 0.907, 0.885]`\n", + "- Best updates: `['synth_prompt']`\n", + "\n", + "### Final synth prompt\n", + "```text\n", + "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final answer\n", + "```text\n", + "CRISPR, or Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "\n", + "### Mechanism of CRISPR\n", + "CRISPR functions through a combination of specific components: CRISPR sequences in bacterial genomes store segments of viral DNA, while the Cas9 protein acts as a molecular scissors that cuts DNA. The process begins with guide RNA, which is designed to match a specific DNA sequence, directin...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### backend trace\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "state\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "state->make_trace_case.planner_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e6f620>}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node->make_trace_case.synth_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### observer otel\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Span count: `2`\n", + "- Span names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "None\n", + "\n", + "make_trace_case.synth_node\n", + "[msg]\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## otel\n", + "- Runtime: `80.465s`\n", + "- Baseline score: `0.669`\n", + "- Best score: `0.950`\n", + "- Score gain: `0.281`\n", + "- Best iteration: `4`\n", + "- Post-update stability std: `0.014`\n", + "- Score history: `[0.669, 0.842, 0.92, 0.907, 0.95, 0.922]`\n", + "- Best updates: `['synth_prompt']`\n", + "\n", + "### Final synth prompt\n", + "```text\n", + "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final answer\n", + "```text\n", + "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "\n", + "### Mechanism of CRISPR\n", + "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", + "**Process**: The CRISPR system works by first using the gRNA to locate the target DNA sequence. Once boun...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### backend otel\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['llm.chat.completion', 'llm.chat.completion', 'planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Span count: `5`\n", + "- Span names: `['llm.chat.completion', 'planner_node', 'llm.chat.completion', 'synth_node', 'langgraph-agent.invoke']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "None\n", + "\n", + "llm.chat.completion\n", + "[msg]\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## trace+sysmon\n", + "- Runtime: `78.316s`\n", + "- Baseline score: `0.732`\n", + "- Best score: `0.923`\n", + "- Score gain: `0.192`\n", + "- Best iteration: `4`\n", + "- Post-update stability std: `0.022`\n", + "- Score history: `[0.732, 0.714, 0.92, 0.92, 0.923, 0.88]`\n", + "- Best updates: `[]`\n", + "\n", + "### Final synth prompt\n", + "```text\n", + "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final answer\n", + "```text\n", + "CRISPR is a revolutionary gene-editing technology that allows for precise modifications to DNA in living organisms.\n", + "\n", + "### Mechanism of CRISPR\n", + "CRISPR consists of two main components: CRISPR sequences, which serve as a genetic memory of past viral infections, and the Cas9 enzyme, which acts as molecular scissors to cut DNA. The process begins with the guide RNA, which is designed to match a specific DNA sequence, directing the Cas9 enzyme to the target site for cleavage, leading to DNA repair me...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### backend trace\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "state\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "state->make_trace_case.planner_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e14a10>}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node->make_trace_case.synth_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", - " Iteration 1 average: 0.0000\n", - " Iteration 2/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] + { + "data": { + "text/markdown": [ + "### observer sysmon\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Event count: `2`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "param:planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "param:synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n", + "msg:e4b0ccde1af94263\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "msg:363310e6046f4844\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e14a10>}\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## trace+otel+sysmon\n", + "- Runtime: `77.963s`\n", + "- Baseline score: `0.714`\n", + "- Best score: `0.923`\n", + "- Score gain: `0.210`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.018`\n", + "- Score history: `[0.714, 0.712, 0.92, 0.923, 0.88, 0.898]`\n", + "- Best updates: `['synth_prompt']`\n", + "\n", + "### Final synth prompt\n", + "```text\n", + "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final answer\n", + "```text\n", + "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "\n", + "### Mechanism of CRISPR\n", + "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", + "**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the tar...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### backend trace\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 2 average: 1.0000 * NEW BEST\n", - " Iteration 3/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "state\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "state->make_trace_case.planner_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x747a19efc050>}\n", + "\n", + "\n", + "\n", + "make_trace_case.planner_node->make_trace_case.synth_node\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### observer sysmon\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Event count: `2`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 3 average: 1.0000\n", - " Iteration 4/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "param:planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "param:synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n", + "msg:f8b47aac286c4268\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "msg:03dd0fe79ab14207\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x747a19efc050>}\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 4 average: 1.0000\n", - " Iteration 5/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] + { + "data": { + "text/markdown": [ + "### observer otel\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Span count: `2`\n", + "- Span names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "None\n", + "\n", + "make_trace_case.synth_node\n", + "[msg]\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 5 average: 1.0000\n" - ] + "data": { + "text/markdown": [ + "## otel+sysmon\n", + "- Runtime: `93.479s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.950`\n", + "- Score gain: `0.108`\n", + "- Best iteration: `2`\n", + "- Post-update stability std: `0.023`\n", + "- Score history: `[0.842, 0.715, 0.95, 0.923, 0.923, 0.887]`\n", + "- Best updates: `['synth_prompt']`\n", + "\n", + "### Final synth prompt\n", + "```text\n", + "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final answer\n", + "```text\n", + "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "\n", + "### Mechanism of CRISPR\n", + "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", + "**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the tar...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Running baseline...\n" - ] + "data": { + "text/markdown": [ + "### backend otel\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['llm.chat.completion', 'llm.chat.completion', 'planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Span count: `5`\n", + "- Span names: `['llm.chat.completion', 'planner_node', 'llm.chat.completion', 'synth_node', 'langgraph-agent.invoke']`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=0.0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", - " Baseline average: 0.0000\n", - " Iteration 1/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=0.0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=0.0\n", - " Iteration 1 average: 0.0000\n", - " Iteration 2/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 2 average: 1.0000 * NEW BEST\n", - " Iteration 3/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 3 average: 1.0000\n", - " Iteration 4/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 4 average: 1.0000\n", - " Iteration 5/5...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 1/2: What is CRISPR?... score=1.0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Query 2/2: How does CRISPR enable gene editing?... score=1.0\n", - " Iteration 5 average: 1.0000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Optimization comparison (5 iterations)\n", - "\n", - "| config | score_history | best_iteration | observers |\n", - "|---|---|---:|---|\n", - "| trace | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | - |\n", - "| trace+otel | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | otel |\n", - "| otel | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | - |\n", - "| trace+sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | sysmon |\n", - "| trace+otel+sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | sysmon,otel |\n", - "| otel+sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | sysmon |\n", - "| sysmon | [0.0, 0.0, 1.0, 1.0, 1.0, 1.0] | 2 | - |\n", - "\n", - "Binding / update inspection\n", - "\n", - "## trace\n", - "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", - "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", - "Plan: {plan}\n", - "answer_preview: [BENCH_OK] \n", - "\n", - "### What is CRISPR?\n", - "\n", - "#### 1. Introduction to CRISPR\n", - "- **Definition**: CRISPR stands for Clustered Regularly\n", - "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", - "\n", - "## trace+otel\n", - "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", - "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", - "Plan: {plan}\n", - "answer_preview: [BENCH_OK] \n", - "\n", - "### What is CRISPR?\n", - "\n", - "#### 1. Introduction to CRISPR\n", - "- **Definition**: CRISPR stands for Clustered Regularly\n", - "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", - "otel_summary: {'span_count': 0, 'span_names': [], 'param_keys': [], 'message_names': []}\n", - "\n", - "## otel\n", - "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", - "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", - "Plan: {plan}\n", - "answer_preview: [BENCH_OK] \n", - "\n", - "### What is CRISPR?\n", - "\n", - "#### 1. Introduction to CRISPR\n", - "- **Definition**: CRISPR, which stands for Clustered Re\n", - "\n", - "## trace+sysmon\n", - "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", - "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", - "Plan: {plan}\n", - "answer_preview: [BENCH_OK] \n", - "\n", - "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a groundbreaking gen\n", - "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", - "sysmon_summary: {'event_count': 9665, 'tgj_node_count': 9665, 'message_names': ['', '', '', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__deepcopy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__new__', '__newobj__', '__post_init__', '__reduce__', '__setattr__', '__setitem__', '__setstate__', '__str__', '__subclasscheck__', '_add_dependencies', '_add_filter', '_add_parent', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_bind', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_caller', '_checkClosed', '_check_class', '_check_frozen', '_check_instance', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_deepcopy_dict', '_deepcopy_list', '_deepcopy_tuple', '_defaults', '_emit', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_root', '_get_tracer_project', '_handle_fromlist', '_has_code_flag', '_has_coroutine_mark', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_owned', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_raw', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_signature_from_callable', '_signature_from_function', '_slotnames', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_unwrap_partial', '_unwrap_partialmethod', '_update_level', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_wrap_inputs', '_write', '_xxhash_str', 'acquire', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'annotation', 'apply_defaults', 'apply_writes', 'as_dict', 'assign_to_connection', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'bind', 'build_request', 'bytesify', 'can_handle_request', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'contain', 'content', 'cookies', 'copy', 'copy_with', 'count', 'create', 'create_checkpoint', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default', 'default_headers', 'default_query', 'detach', 'detach_inputs', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'env_var_is_set', 'extract_cookies', 'extract_param', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'forward', 'from_callable', 'from_checkpoint', 'full_url', 'fun', 'func', 'get', 'get_all', 'get_annotations', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current_run_tree', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_op_name', 'get_origin', 'get_tracing_context', 'getattr_static', 'getfullargspec', 'getitem', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'hidden_dependencies', 'host', 'http_version', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_relative_url', 'is_required', 'is_sequence_type', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid_field_name', 'isclass', 'iscoroutinefunction', 'isfunction', 'ismethod', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'keys', 'kind', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'multi_items', 'name', 'netloc', 'next_event', 'node', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'origin', 'our_state', 'output_writes', 'override', 'parameters', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner_node', 'platform_headers', 'pop', 'port', 'post', 'postprocess_output', 'prepare_next_tasks', 'prepare_single_task', 'preprocess_inputs', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'recursive_conversion', 'recv', 'register', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'return_annotation', 'run_with_retry', 'search', 'send', 'send_data', 'send_eom', 'send_with_data_passthrough', 'set', 'set_config_context', 'set_handlers', 'setdefault', 'shutdown', 'signature', 'smart_deepcopy', 'start_next_cycle', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'sync_call_fun', 'sync_forward', 'synth_node', 'task_path_str', 'their_state', 'tick', 'to_data', 'to_httpx_files', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'user_agent', 'username', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrap', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': []}\n", - "\n", - "## trace+otel+sysmon\n", - "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", - "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", - "Plan: {plan}\n", - "answer_preview: [BENCH_OK] \n", - "\n", - "### What is CRISPR?\n", - "\n", - "#### 1. Introduction to CRISPR\n", - "- **Definition**: CRISPR stands for Clustered Regularly\n", - "trace_summary: {'is_node': True, 'parent_count': 3, 'parameter_count': 4}\n", - "otel_summary: {'span_count': 0, 'span_names': [], 'param_keys': [], 'message_names': []}\n", - "sysmon_summary: {'event_count': 9744, 'tgj_node_count': 9744, 'message_names': ['', '', '', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__deepcopy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__new__', '__newobj__', '__post_init__', '__reduce__', '__setattr__', '__setitem__', '__setstate__', '__str__', '__subclasscheck__', '_add_dependencies', '_add_filter', '_add_parent', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_bind', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_call_soon', '_caller', '_checkClosed', '_check_class', '_check_closed', '_check_frozen', '_check_instance', '_clean_thread_parent_frames', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_deepcopy_dict', '_deepcopy_list', '_deepcopy_tuple', '_defaults', '_emit', '_event_pipe_gc', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_root', '_get_tracer_project', '_handle_fromlist', '_has_code_flag', '_has_coroutine_mark', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_owned', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_events', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_raw', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_run', '_run_once', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_result_unless_cancelled', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_signature_from_callable', '_signature_from_function', '_slotnames', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_timer_handle_cancelled', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_unwrap_partial', '_unwrap_partialmethod', '_update_level', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_wrap_inputs', '_write', '_xxhash_str', 'acquire', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'annotation', 'apply_defaults', 'apply_writes', 'as_dict', 'assign_to_connection', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'bind', 'build_request', 'bytesify', 'call_at', 'call_later', 'call_soon', 'can_handle_request', 'cancel', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'contain', 'content', 'cookies', 'copy', 'copy_with', 'count', 'create', 'create_checkpoint', 'create_future', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default', 'default_headers', 'default_query', 'detach', 'detach_inputs', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'enumerate', 'env_var_is_set', 'extract_cookies', 'extract_param', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'forward', 'from_callable', 'from_checkpoint', 'full_url', 'fun', 'func', 'get', 'get_all', 'get_annotations', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current_run_tree', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_op_name', 'get_origin', 'get_tracing_context', 'getattr_static', 'getfullargspec', 'getitem', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'hidden_dependencies', 'host', 'http_version', 'ident', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_alive', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_relative_url', 'is_required', 'is_sequence_type', 'is_set', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid_field_name', 'isclass', 'iscoroutinefunction', 'isfunction', 'ismethod', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'keys', 'kind', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'multi_items', 'name', 'netloc', 'next_event', 'node', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'origin', 'our_state', 'output_writes', 'override', 'parameters', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner_node', 'platform_headers', 'pop', 'port', 'post', 'postprocess_output', 'prepare_next_tasks', 'prepare_single_task', 'preprocess_inputs', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'recursive_conversion', 'recv', 'register', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'return_annotation', 'run_with_retry', 'search', 'select', 'send', 'send_data', 'send_eom', 'send_with_data_passthrough', 'set', 'set_config_context', 'set_handlers', 'setdefault', 'shutdown', 'signature', 'sleep', 'smart_deepcopy', 'start_next_cycle', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'sync_call_fun', 'sync_forward', 'synth_node', 'task_path_str', 'their_state', 'tick', 'time', 'to_data', 'to_httpx_files', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'user_agent', 'username', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrap', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': []}\n", - "\n", - "## otel+sysmon\n", - "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", - "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", - "Plan: {plan}\n", - "answer_preview: [BENCH_OK] \n", - "\n", - "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a groundbreaking gen\n", - "sysmon_summary: {'event_count': 9489, 'tgj_node_count': 9491, 'message_names': ['', '', 'RLock', '__and__', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__lt__', '__new__', '__newobj__', '__or__', '__post_init__', '__set__', '__setattr__', '__setitem__', '__str__', '__subclasscheck__', '_add_callback', '_add_filter', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_call_soon', '_caller', '_checkClosed', '_check_class', '_check_closed', '_check_frozen', '_check_instance', '_check_mp_mode', '_clean_attribute', '_clean_attribute_value', '_clean_thread_parent_frames', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_cross_validate', '_current', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_defaults', '_emit', '_ensure_tzinfo', '_event_pipe', '_event_pipe_gc', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_flush', '_flush_buffers', '_get_attr_opt', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_parent_trace_state', '_get_root', '_get_tracer_project', '_get_value', '_handle_event', '_handle_events', '_handle_fromlist', '_handle_recv', '_hooks', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_master_process', '_is_owned', '_is_trainable', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_new_events', '_new_links', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_events', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_qsize', '_readable_span', '_really_send', '_rebuild_io_state', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_record_llm_call', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_root_invocation_span', '_rotate_buffers', '_run', '_run_callback', '_run_once', '_schedule_in_thread', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_result_unless_cancelled', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_timer_handle_cancelled', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_update_handler', '_validate', '_validate_bounds', '_validate_content', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_write', '_xxhash_str', 'acquire', 'activate', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'apply_writes', 'as_dict', 'assign_to_connection', 'attach', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'build_request', 'bytesify', 'call_at', 'call_later', 'call_soon', 'can_handle_request', 'cancel', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'closed', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'content', 'context', 'cookies', 'copy', 'copy_with', 'create', 'create_checkpoint', 'create_future', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default_headers', 'default_query', 'detach', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'end', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'enumerate', 'env_var_is_set', 'export', 'extract_cookies', 'extract_header', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'from_checkpoint', 'full_url', 'generate_span_id', 'generate_trace_id', 'get', 'get_all', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current', 'get_current_run_tree', 'get_current_span', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_origin', 'get_span_context', 'get_tracing_context', 'get_value', 'getattr_static', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'hexdigest', 'host', 'http_version', 'ident', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_alive', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_recording', 'is_relative_url', 'is_remote', 'is_required', 'is_sampled', 'is_sequence_type', 'is_set', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid', 'is_valid_field_name', 'isclass', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'json_default', 'json_packer', 'keys', 'kind', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'msg', 'msg_header', 'msg_id', 'multi_items', 'netloc', 'next_event', 'node_call', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'on_end', 'on_start', 'origin', 'our_state', 'output_writes', 'override', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner', 'platform_headers', 'pop', 'port', 'post', 'prepare_next_tasks', 'prepare_single_task', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'receiving', 'recv', 'recv_multipart', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'run_with_retry', 'sampled', 'schedule', 'select', 'send', 'send_data', 'send_eom', 'send_multipart', 'send_with_data_passthrough', 'sending', 'serialize', 'set', 'set_attribute', 'set_attributes', 'set_config_context', 'set_handlers', 'set_value', 'setdefault', 'should_sample', 'shutdown', 'sign', 'sleep', 'smart_deepcopy', 'start', 'start_as_current_span', 'start_next_cycle', 'start_span', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'synth', 'task_path_str', 'their_state', 'tick', 'time', 'to_httpx_files', 'trace_flags', 'trace_id', 'trace_state', 'tracer', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'use_span', 'user_agent', 'username', 'utcnow', 'utcoffset', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': ['planner_prompt', 'synth_prompt']}\n", - "\n", - "## sysmon\n", - "best_updates: {'synth_prompt': 'Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\\nPlan: {plan}'}\n", - "final_synth_prompt: Start the answer exactly with [BENCH_OK]. Then answer carefully: {query}\n", - "Plan: {plan}\n", - "answer_preview: [BENCH_OK] \n", - "\n", - "### What is CRISPR?\n", - "\n", - "#### 1. Introduction to CRISPR\n", - "- **Definition**: CRISPR stands for Clustered Regularly\n", - "sysmon_summary: {'event_count': 8604, 'tgj_node_count': 8606, 'message_names': ['', '', '__bool__', '__bytes__', '__call__', '__contains__', '__copy__', '__delitem__', '__enter__', '__eq__', '__exit__', '__get__', '__getattr__', '__getitem__', '__hash__', '__iadd__', '__init__', '__instancecheck__', '__iter__', '__len__', '__new__', '__newobj__', '__post_init__', '__setattr__', '__setitem__', '__str__', '__subclasscheck__', '_add_filter', '_allow_reckless_class_checks', '_assemble_writes', '_assign_requests_to_connections', '_body_framing', '_build_headers', '_build_request', '_build_request_auth', '_call_soon', '_caller', '_checkClosed', '_check_class', '_check_closed', '_check_frozen', '_check_instance', '_clean_thread_parent_frames', '_close_connections', '_coerce_args', '_coerce_context', '_compile', '_configure', '_construct_field', '_control_branch', '_copy_immutable', '_create_exit_wrapper', '_debug', '_decode_header_lines', '_deepcopy_atomic', '_defaults', '_emit', '_event_pipe_gc', '_extract', '_extract_next_receive_event', '_fire_event_triggered_transitions', '_fire_state_triggered_transitions', '_first', '_get_content_decoder', '_get_debug', '_get_extra_fields_type', '_get_io_object', '_get_root', '_get_tracer_project', '_handle_fromlist', '_idempotency_key', '_is_dataclass_instance', '_is_disabled', '_is_owned', '_keep_alive', '_llm', '_maybe_override_cast_to', '_maybe_transform_key', '_merge_cookies', '_merge_headers', '_merge_mappings', '_merge_queryparams', '_merge_url', '_no_transform_needed', '_noop', '_normalize_header_key', '_normalize_header_value', '_obsolete_line_fold', '_output', '_parse', '_prepare', '_prepare_options', '_prepare_request', '_prepare_url', '_proc_input', '_process_event', '_process_events', '_process_response', '_process_response_data', '_push_cm_exit', '_push_exit_callback', '_put_checkpoint', '_receive_event', '_receive_response_body', '_receive_response_headers', '_reconstruct', '_refresh_api_key', '_replace', '_respond_to_state_changes', '_response_closed', '_run', '_run_once', '_scratchpad', '_send_event', '_send_handling_auth', '_send_handling_redirects', '_send_request_body', '_send_request_headers', '_send_single_request', '_server_switch_event', '_set_config_context', '_set_result_unless_cancelled', '_set_timeout', '_shadowed_dict', '_should_stream_response_body', '_should_unflatten_callable_args', '_splithost', '_splittag', '_splittype', '_suppress_interrupt', '_timer_handle_cancelled', '_tracing_v2_is_enabled', '_transform_recursive', '_transform_typeddict', '_transport_for_url', '_triggers', '_unset_config_context', '_validate_headers', '_validate_non_model_type', '_value_and_type_iter', '_warn_on_import', '_write', '_xxhash_str', 'acquire', 'add_header', 'add_metadata', 'add_request_id', 'add_tags', 'after_tick', 'apply_writes', 'as_dict', 'assign_to_connection', 'auth_flow', 'auth_headers', 'authority', 'base_url', 'build_request', 'bytesify', 'call_at', 'call_later', 'call_soon', 'can_handle_request', 'cancel', 'cast', 'channels_from_checkpoint', 'checkpoint_null_version', 'close', 'commit', 'compile', 'configure', 'construct', 'construct_type', 'consume', 'content', 'cookies', 'copy', 'copy_with', 'create', 'create_checkpoint', 'create_future', 'custom_auth', 'data', 'debug', 'decode', 'deepcopy', 'deepvalues', 'default_headers', 'default_query', 'detect_encoding', 'do_write', 'dumps', 'elapsed', 'empty_checkpoint', 'encode', 'encode_host', 'encode_json', 'encode_request', 'encoding', 'enforce_bytes', 'enforce_headers', 'enforce_stream', 'enforce_url', 'ensure_config', 'enter_context', 'enumerate', 'env_var_is_set', 'extract_cookies', 'extract_type_arg', 'field_get_default', 'filterwarnings', 'finditer', 'finish', 'flush', 'from_checkpoint', 'full_url', 'get', 'get_all', 'get_args', 'get_callback_manager_for_config', 'get_child', 'get_comma_header', 'get_current_run_tree', 'get_debug', 'get_default', 'get_executor_for_config', 'get_extra_info', 'get_list', 'get_max_retries', 'get_model_config', 'get_model_fields', 'get_multipart_boundary_from_content_type', 'get_name', 'get_origin', 'get_tracing_context', 'getattr_static', 'handle_event', 'handle_request', 'has_expect_100_continue', 'has_expired', 'has_redirect_location', 'header_max_count', 'header_store_parse', 'headers', 'helper', 'host', 'http_version', 'ident', 'increment', 'info', 'inner', 'invoke', 'isEnabledFor', 'is_absolute_url', 'is_alive', 'is_annotated_type', 'is_available', 'is_closed', 'is_given', 'is_idle', 'is_interactive_env', 'is_iterable', 'is_iterable_type', 'is_list', 'is_list_type', 'is_literal_type', 'is_mapping', 'is_next_line_obviously_invalid_request_line', 'is_queued', 'is_relative_url', 'is_required', 'is_sequence_type', 'is_set', 'is_socket_readable', 'is_success', 'is_type_alias_type', 'is_typeddict', 'is_union', 'is_union_type', 'is_valid_field_name', 'isclass', 'iter_bytes', 'iter_raw', 'iterencode', 'json', 'keys', 'loads', 'make_cookies', 'make_request_options', 'map_exceptions', 'map_httpcore_exceptions', 'map_input', 'map_output_updates', 'map_output_values', 'match_cached_writes', 'maybe_extract_at_most', 'maybe_extract_lines', 'maybe_extract_next_line', 'maybe_read_from_SEND_RESPONSE_server', 'maybe_transform', 'merge', 'merge_configs', 'model_construct', 'model_copy', 'model_fields', 'multi_items', 'netloc', 'next_event', 'normalize_and_validate', 'normalize_path', 'normalize_port', 'notify', 'notify_all', 'on_chain_end', 'on_chain_start', 'origin', 'our_state', 'output_writes', 'override', 'params', 'parent', 'parse', 'password', 'patch_config', 'patch_configurable', 'percent_encoded', 'planner', 'platform_headers', 'pop', 'port', 'post', 'prepare_next_tasks', 'prepare_single_task', 'process_cpu_count', 'process_event', 'push', 'put', 'put_writes', 'quote', 'raise_for_status', 'raw', 'raw_decode', 'raw_host', 'raw_items', 'raw_path', 'raw_scheme', 'read', 'read_channel', 'read_channels', 'reason_phrase', 'receive_data', 'recv', 'release', 'replace', 'request', 'request_context', 'request_host', 'request_id', 'run_with_retry', 'select', 'send', 'send_data', 'send_eom', 'send_with_data_passthrough', 'set', 'set_config_context', 'set_handlers', 'setdefault', 'shutdown', 'sleep', 'smart_deepcopy', 'start_next_cycle', 'stop', 'stream', 'stream_channels_asis', 'strip_not_given', 'sync_auth_flow', 'synth', 'task_path_str', 'their_state', 'tick', 'time', 'to_httpx_files', 'tracing_is_enabled', 'trailing_data', 'transform', 'unquote', 'unwrap', 'update', 'urlparse', 'user_agent', 'username', 'uuid4', 'uuid6', 'validate', 'validate_keys', 'validate_path', 'validate_python', 'validate_response_format', 'validate_type', 'value', 'wait_for_connection', 'wrapper', 'write', 'write_headers', 'write_request'], 'param_names': ['planner_prompt', 'synth_prompt']}\n", - "\n" - ] + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "None\n", + "\n", + "llm.chat.completion\n", + "[msg]\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### observer sysmon\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Event count: `2`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "param:planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "param:synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n", + "msg:16912ef8e4a24c73\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "msg:f3668974d5434f72\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic R...\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "## sysmon\n", + "- Runtime: `77.312s`\n", + "- Baseline score: `0.822`\n", + "- Best score: `0.944`\n", + "- Score gain: `0.122`\n", + "- Best iteration: `2`\n", + "- Post-update stability std: `0.026`\n", + "- Score history: `[0.822, 0.669, 0.944, 0.923, 0.88, 0.887]`\n", + "- Best updates: `['synth_prompt']`\n", + "\n", + "### Final synth prompt\n", + "```text\n", + "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final answer\n", + "```text\n", + "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "\n", + "### Mechanism of CRISPR\n", + "**Components**: CRISPR technology primarily consists of the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", + "**Process**: The CRISPR process involves the gRNA binding to the target DNA sequence, the Cas9 enzyme ...\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### backend sysmon\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Event count: `2`\n", + "\n", + "```json\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + "}\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "param:planner_prompt\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "param:synth_prompt\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "\n", + "\n", + "msg:259d9c51cfc54755\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", + "####...\n", + "\n", + "\n", + "\n", + "msg:11a94c306b234ca1\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': "CRISPR (Clustered Regularly Interspaced Short Palindromic R...\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "[{'config': 'trace',\n", + " 'runtime_s': 71.749,\n", + " 'baseline_score': 0.752,\n", + " 'best_score': 0.928,\n", + " 'score_gain': 0.176,\n", + " 'best_iteration': 4,\n", + " 'score_history': [0.752, 0.805, 0.92, 0.898, 0.928, 0.923],\n", + " 'stability_std': 0.003,\n", + " 'best_updates': {},\n", + " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'final_answer': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the target DNA sequence. The Cas9 enzyme then cuts the DNA at this location, leading to a double-strand break. The cell's natural repair mechanisms can then be harnessed to introduce desired changes, either by inserting new genetic material or by knocking out genes.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like sickle cell anemia and cystic fibrosis, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: In agriculture\",\n", + " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'observers': [],\n", + " 'views': [{'carrier': 'trace',\n", + " 'origin': 'backend',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'trace',\n", + " 'graph_id': 'trace',\n", + " 'scope': 'trace/backend',\n", + " 'nodes': [{'id': 'state',\n", + " 'kind': 'value',\n", + " 'name': 'state',\n", + " 'value': {'query': 'What is CRISPR?'},\n", + " 'description': '[Node]'},\n", + " {'id': 'make_trace_case.planner_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {'in_0': {'ref': 'state'}},\n", + " 'output': {'name': 'make_trace_case.planner_node:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications'}}},\n", + " {'id': 'make_trace_case.synth_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.synth_node',\n", + " 'op': 'make_trace_case.synth_node',\n", + " 'description': '[make_trace_case.synth_node]',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", + " 'output': {'name': 'make_trace_case.synth_node:out',\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'},\n", + " {'id': 'synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'}]},\n", + " 'summary': {'node_count': 5,\n", + " 'message_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}}]},\n", + " {'config': 'trace+otel',\n", + " 'runtime_s': 76.288,\n", + " 'baseline_score': 0.822,\n", + " 'best_score': 0.95,\n", + " 'score_gain': 0.128,\n", + " 'best_iteration': 2,\n", + " 'score_history': [0.822, 0.715, 0.95, 0.937, 0.907, 0.885],\n", + " 'stability_std': 0.025,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add two short titled sections with concrete details: {query}\\nPlan: {plan}'},\n", + " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'final_answer': 'CRISPR, or Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\nCRISPR functions through a combination of specific components: CRISPR sequences in bacterial genomes store segments of viral DNA, while the Cas9 protein acts as a molecular scissors that cuts DNA. The process begins with guide RNA, which is designed to match a specific DNA sequence, directing Cas9 to the target site for cleavage, leading to DNA breaks that can be repaired through cellular mechanisms, allowing for gene editing.\\n\\n### Applications of CRISPR\\nCRISPR has a wide range of applications, including in medicine for gene therapy to treat genetic disorders and cancer, in agriculture for developing crops with improved traits such as pest resistance, and in industrial settings for biomanufacturing processes that enhance efficiency and sustainability.\\n\\n### Caveats and Ethical Considerations\\nWhile CRISPR holds great promise, there are important caveats, including potential off-target effects where unintended parts of the',\n", + " 'answer_preview': 'CRISPR, or Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organism...',\n", + " 'observers': ['otel'],\n", + " 'views': [{'carrier': 'trace',\n", + " 'origin': 'backend',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'trace+otel',\n", + " 'graph_id': 'trace',\n", + " 'scope': 'trace+otel/backend',\n", + " 'nodes': [{'id': 'state',\n", + " 'kind': 'value',\n", + " 'name': 'state',\n", + " 'value': {'query': 'What is CRISPR?'},\n", + " 'description': '[Node]'},\n", + " {'id': 'make_trace_case.planner_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {'in_0': {'ref': 'state'}},\n", + " 'output': {'name': 'make_trace_case.planner_node:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\n - Brief history: Discovery in bacteria and adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components:\\n - CRISPR sequences: How they store viral DNA.\\n - Cas9 protein: The role of the enzyme in cutting DNA.\\n - Process:\\n - Guide RNA: How it directs Cas9 to the target DNA sequence.\\n - DNA cleavage: The mechanism of cutting and the subsequent repair processes.\\n\\n3. **Applications of CRISPR**\\n - Medical: Gene therapy, potential cures for genetic disorders, cancer research.\\n - Agricultural: Crop improvement, pest resistance, and sustainable farming practices.\\n - Industrial: Biomanufacturing and bioengineering.\\n\\n'}}},\n", + " {'id': 'make_trace_case.synth_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.synth_node',\n", + " 'op': 'make_trace_case.synth_node',\n", + " 'description': '[make_trace_case.synth_node]',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", + " 'output': {'name': 'make_trace_case.synth_node:out',\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'},\n", + " {'id': 'synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'}]},\n", + " 'summary': {'node_count': 5,\n", + " 'message_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}},\n", + " {'carrier': 'otel',\n", + " 'origin': 'observer',\n", + " 'doc': {'version': 'trace-json/1.0+otel',\n", + " 'agent': {'id': 'trace+otel', 'service': 'trace+otel'},\n", + " 'otel_meta': {'trace_id': 'e084ed906c9eb65d3e02bdce213a13d5'},\n", + " 'nodes': {'trace+otel:param_planner_prompt': {'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'data': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': 'eb31066d3502e810'}}},\n", + " 'trace+otel:make_trace_case.planner_node:25': {'kind': 'msg',\n", + " 'name': 'make_trace_case.planner_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?'}\",\n", + " 'index': 'lit:query',\n", + " 'state': \"lit:{'query': 'What is CRISPR?'}\",\n", + " 'str:75': 'lit:query',\n", + " 'getitem:75': 'langgraph-agent-otel-observer:getitem:75',\n", + " 'state25_copy:0': \"lit:{'query': 'What is CRISPR?'}\",\n", + " 'planner_prompt:1': 'lit:Create a short plan for: {query}',\n", + " 'param_planner_prompt': 'trace+otel:param_planner_prompt'},\n", + " 'data': {'message_id': 'make_trace_case.planner_node:25'},\n", + " 'info': {'otel': {'trace_id': '9efac139be15dd6c8fd007efaa723dd4',\n", + " 'span_id': 'eb31066d3502e810',\n", + " 'parent_span_id': '',\n", + " 'service': 'trace+otel',\n", + " 'temporal_ignore': False}}},\n", + " 'trace+otel:param_synth_prompt': {'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': 'c9bb4019e1243e7f'}}},\n", + " 'trace+otel:make_trace_case.synth_node:25': {'kind': 'msg',\n", + " 'name': 'make_trace_case.synth_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\\\n - Brief history: Discovery in bacteria and adaptation for genetic engineering.\\\\n\\\\n2. **Mechanism of CRISPR**\\\\n - Componen…\",\n", + " 'index': 'lit:plan',\n", + " 'state': 'langgraph-agent-otel-observer:make_trace_case.planner_node:25',\n", + " 'str:76': 'lit:query',\n", + " 'make_trace_case.planner_node25_copy:0': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\\\n - Brief history: Discovery in bacteria and adaptation for genetic engineering.\\\\n\\\\n2. **Mechanism of CRISPR**\\\\n - Componen…\",\n", + " 'str:77': 'lit:plan',\n", + " 'getitem:76': 'langgraph-agent-otel-observer:getitem:76',\n", + " 'synth_prompt:1': 'lit:Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'getitem:77': 'langgraph-agent-otel-observer:getitem:77',\n", + " 'parent': 'trace+otel:make_trace_case.planner_node:25',\n", + " 'param_synth_prompt': 'trace+otel:param_synth_prompt'},\n", + " 'data': {'message_id': 'make_trace_case.synth_node:25'},\n", + " 'info': {'otel': {'trace_id': 'e084ed906c9eb65d3e02bdce213a13d5',\n", + " 'span_id': 'c9bb4019e1243e7f',\n", + " 'parent_span_id': 'eb31066d3502e810',\n", + " 'service': 'trace+otel',\n", + " 'temporal_ignore': False}}}},\n", + " 'context': {}},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'span_count': 2,\n", + " 'span_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'param_keys': ['param.planner_prompt',\n", + " 'param.planner_prompt.trainable',\n", + " 'param.synth_prompt',\n", + " 'param.synth_prompt.trainable']}}]},\n", + " {'config': 'otel',\n", + " 'runtime_s': 80.465,\n", + " 'baseline_score': 0.669,\n", + " 'best_score': 0.95,\n", + " 'score_gain': 0.281,\n", + " 'best_iteration': 4,\n", + " 'score_history': [0.669, 0.842, 0.92, 0.907, 0.95, 0.922],\n", + " 'stability_std': 0.014,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'final_answer': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first using the gRNA to locate the target DNA sequence. Once bound, the Cas9 enzyme makes a double-strand break in the DNA. The cell's natural repair mechanisms then kick in, allowing for either the insertion of new genetic material or the deletion of existing sequences.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like cystic fibrosis and sickle cell anemia, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: In agriculture, CRISPR is used to create crops that are\",\n", + " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'observers': [],\n", + " 'views': [{'carrier': 'otel',\n", + " 'origin': 'backend',\n", + " 'doc': {'version': 'trace-json/1.0+otel',\n", + " 'agent': {'id': 'otel', 'service': 'otel'},\n", + " 'otel_meta': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d'},\n", + " 'nodes': {'otel:param_planner_prompt': {'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'data': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '88723fd007309194'}}},\n", + " 'otel:88723fd007309194': {'kind': 'msg',\n", + " 'name': 'planner_node',\n", + " 'op': 'llm_call',\n", + " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", + " 'user_query': 'otel:What is CRISPR?',\n", + " 'query': 'otel:What is CRISPR?',\n", + " 'param_planner_prompt': 'otel:param_planner_prompt'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", + " 'span_id': '88723fd007309194',\n", + " 'parent_span_id': None,\n", + " 'service': 'otel',\n", + " 'temporal_ignore': False}}},\n", + " 'otel:6fde46abcebec3c1': {'kind': 'msg',\n", + " 'name': 'llm.chat.completion',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'parent': 'otel:88723fd007309194'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", + " 'span_id': '6fde46abcebec3c1',\n", + " 'parent_span_id': '88723fd007309194',\n", + " 'service': 'otel',\n", + " 'temporal_ignore': True}}},\n", + " 'otel:param_synth_prompt': {'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': 'd8b85ff3c280e8d1'}}},\n", + " 'otel:d8b85ff3c280e8d1': {'kind': 'msg',\n", + " 'name': 'synth_node',\n", + " 'op': 'llm_call',\n", + " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: ### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", + " 'user_query': 'otel:What is CRISPR?',\n", + " 'query': 'otel:What is CRISPR?',\n", + " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", + " 'parent': 'otel:88723fd007309194',\n", + " 'param_synth_prompt': 'otel:param_synth_prompt'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", + " 'span_id': 'd8b85ff3c280e8d1',\n", + " 'parent_span_id': '88723fd007309194',\n", + " 'service': 'otel',\n", + " 'temporal_ignore': False}}},\n", + " 'otel:b07a737c463ca52f': {'kind': 'msg',\n", + " 'name': 'llm.chat.completion',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'parent': 'otel:d8b85ff3c280e8d1'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", + " 'span_id': 'b07a737c463ca52f',\n", + " 'parent_span_id': 'd8b85ff3c280e8d1',\n", + " 'service': 'otel',\n", + " 'temporal_ignore': True}}}},\n", + " 'context': {}},\n", + " 'summary': {'node_count': 6,\n", + " 'message_names': ['llm.chat.completion',\n", + " 'llm.chat.completion',\n", + " 'planner_node',\n", + " 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'span_count': 5,\n", + " 'span_names': ['llm.chat.completion',\n", + " 'planner_node',\n", + " 'llm.chat.completion',\n", + " 'synth_node',\n", + " 'langgraph-agent.invoke'],\n", + " 'param_keys': ['param.planner_prompt',\n", + " 'param.planner_prompt.trainable',\n", + " 'param.synth_prompt',\n", + " 'param.synth_prompt.trainable']}}]},\n", + " {'config': 'trace+sysmon',\n", + " 'runtime_s': 78.316,\n", + " 'baseline_score': 0.732,\n", + " 'best_score': 0.923,\n", + " 'score_gain': 0.192,\n", + " 'best_iteration': 4,\n", + " 'score_history': [0.732, 0.714, 0.92, 0.92, 0.923, 0.88],\n", + " 'stability_std': 0.022,\n", + " 'best_updates': {},\n", + " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows for precise modifications to DNA in living organisms.\\n\\n### Mechanism of CRISPR\\nCRISPR consists of two main components: CRISPR sequences, which serve as a genetic memory of past viral infections, and the Cas9 enzyme, which acts as molecular scissors to cut DNA. The process begins with the guide RNA, which is designed to match a specific DNA sequence, directing the Cas9 enzyme to the target site for cleavage, leading to DNA repair mechanisms that can introduce desired genetic changes.\\n\\n### Applications of CRISPR\\nCRISPR has a wide range of applications, including in medicine for gene therapy to treat genetic disorders and cancer, in agriculture for developing crops with improved traits such as pest resistance and enhanced nutritional value, and in industrial settings for biomanufacturing processes that require precise genetic modifications.\\n\\n### Ethical Considerations\\nThe use of CRISPR raises significant ethical concerns, particularly regarding potential unintended consequences of gene editing, the implications of germline modifications that can be inherited, and the need for regulations to prevent misuse in areas',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows for precise modifications to DNA in living organisms.\\n\\n### Mechanism of CRISPR\\nCRISPR consists of two main componen...',\n", + " 'observers': ['sysmon'],\n", + " 'views': [{'carrier': 'trace',\n", + " 'origin': 'backend',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'trace+sysmon',\n", + " 'graph_id': 'trace',\n", + " 'scope': 'trace+sysmon/backend',\n", + " 'nodes': [{'id': 'state',\n", + " 'kind': 'value',\n", + " 'name': 'state',\n", + " 'value': {'query': 'What is CRISPR?'},\n", + " 'description': '[Node]'},\n", + " {'id': 'make_trace_case.planner_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {'in_0': {'ref': 'state'}},\n", + " 'output': {'name': 'make_trace_case.planner_node:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\n - Brief history: Discovery and development timeline.\\n\\n2. **Mechanism of CRISPR**\\n - Components:\\n - CRISPR sequences: Explanation of how they function as a genetic memory.\\n - Cas9 enzyme: Role in cutting DNA.\\n - Process:\\n - Guide RNA: How it directs Cas9 to the target DNA.\\n - DNA cleavage: Mechanism of action and repair processes.\\n\\n3. **Applications of CRISPR**\\n - Medical: Gene therapy, treatment of genetic disorders, cancer research.\\n - Agricultural: Crop improvement, pest resistance, and sustainability.\\n - Industrial: Biomanufacturing and bioengineering.\\n\\n4. **Ethical Considerations**\\n -'}}},\n", + " {'id': 'make_trace_case.synth_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.synth_node',\n", + " 'op': 'make_trace_case.synth_node',\n", + " 'description': '[make_trace_case.synth_node]',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", + " 'output': {'name': 'make_trace_case.synth_node:out',\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'},\n", + " {'id': 'synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'}]},\n", + " 'summary': {'node_count': 5,\n", + " 'message_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}},\n", + " {'carrier': 'sysmon',\n", + " 'origin': 'observer',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'langgraph-agent-sysmon-observer',\n", + " 'graph_id': 'trace+sysmon',\n", + " 'scope': 'trace+sysmon/observer',\n", + " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'msg:e4b0ccde1af94263': {'id': 'msg:e4b0ccde1af94263',\n", + " 'kind': 'message',\n", + " 'name': 'planner_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:496',\n", + " 'inputs': {},\n", + " 'output': {'name': 'planner_node:out',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", + " 'info': {'sysmon': {'duration_ns': 4389947208,\n", + " 'thread_id': 128068499416896}}},\n", + " 'msg:363310e6046f4844': {'id': 'msg:363310e6046f4844',\n", + " 'kind': 'message',\n", + " 'name': 'synth_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:507',\n", + " 'inputs': {},\n", + " 'output': {'name': 'synth_node:out',\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2210177279,\n", + " 'thread_id': 128068499416896}}}}},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['planner_node', 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'event_count': 2}}]},\n", + " {'config': 'trace+otel+sysmon',\n", + " 'runtime_s': 77.963,\n", + " 'baseline_score': 0.714,\n", + " 'best_score': 0.923,\n", + " 'score_gain': 0.21,\n", + " 'best_iteration': 3,\n", + " 'score_history': [0.714, 0.712, 0.92, 0.923, 0.88, 0.898],\n", + " 'stability_std': 0.018,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the target DNA sequence. The Cas9 enzyme then cuts the DNA at this location, allowing for either the disruption of the gene or the insertion of new genetic material during the repair process.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like cystic fibrosis and sickle cell anemia, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: It is used to create genetically modified organisms (GMOs) that are resistant to diseases,',\n", + " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'observers': ['sysmon', 'otel'],\n", + " 'views': [{'carrier': 'trace',\n", + " 'origin': 'backend',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'trace+otel+sysmon',\n", + " 'graph_id': 'trace',\n", + " 'scope': 'trace+otel+sysmon/backend',\n", + " 'nodes': [{'id': 'state',\n", + " 'kind': 'value',\n", + " 'name': 'state',\n", + " 'value': {'query': 'What is CRISPR?'},\n", + " 'description': '[Node]'},\n", + " {'id': 'make_trace_case.planner_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {'in_0': {'ref': 'state'}},\n", + " 'output': {'name': 'make_trace_case.planner_node:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain its role in developing disease-resistant crops and improving food security.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications of'}}},\n", + " {'id': 'make_trace_case.synth_node',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.synth_node',\n", + " 'op': 'make_trace_case.synth_node',\n", + " 'description': '[make_trace_case.synth_node]',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", + " 'output': {'name': 'make_trace_case.synth_node:out',\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'},\n", + " {'id': 'synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[Parameter]'}]},\n", + " 'summary': {'node_count': 5,\n", + " 'message_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}},\n", + " {'carrier': 'sysmon',\n", + " 'origin': 'observer',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'langgraph-agent-sysmon-observer',\n", + " 'graph_id': 'trace+otel+sysmon',\n", + " 'scope': 'trace+otel+sysmon/observer',\n", + " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'msg:f8b47aac286c4268': {'id': 'msg:f8b47aac286c4268',\n", + " 'kind': 'message',\n", + " 'name': 'planner_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:496',\n", + " 'inputs': {},\n", + " 'output': {'name': 'planner_node:out',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", + " 'info': {'sysmon': {'duration_ns': 2640491933,\n", + " 'thread_id': 128068499416896}}},\n", + " 'msg:03dd0fe79ab14207': {'id': 'msg:03dd0fe79ab14207',\n", + " 'kind': 'message',\n", + " 'name': 'synth_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:507',\n", + " 'inputs': {},\n", + " 'output': {'name': 'synth_node:out',\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2981741181,\n", + " 'thread_id': 128068499416896}}}}},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['planner_node', 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'event_count': 2}},\n", + " {'carrier': 'otel',\n", + " 'origin': 'observer',\n", + " 'doc': {'version': 'trace-json/1.0+otel',\n", + " 'agent': {'id': 'trace+otel+sysmon', 'service': 'trace+otel+sysmon'},\n", + " 'otel_meta': {'trace_id': '7de6ce7664146c258b9e14edf163903d'},\n", + " 'nodes': {'trace+otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'data': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '135a9e1427adf34d'}}},\n", + " 'trace+otel+sysmon:make_trace_case.planner_node:51': {'kind': 'msg',\n", + " 'name': 'make_trace_case.planner_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?'}\",\n", + " 'index': 'lit:query',\n", + " 'state': \"lit:{'query': 'What is CRISPR?'}\",\n", + " 'state51_copy:0': \"lit:{'query': 'What is CRISPR?'}\",\n", + " 'str:153': 'lit:query',\n", + " 'planner_prompt:3': 'lit:Create a short plan for: {query}',\n", + " 'getitem:153': 'langgraph-agent-otel-observer:getitem:153',\n", + " 'param_planner_prompt': 'trace+otel+sysmon:param_planner_prompt'},\n", + " 'data': {'message_id': 'make_trace_case.planner_node:51'},\n", + " 'info': {'otel': {'trace_id': 'c6e4d39130951e78f594615b3d418776',\n", + " 'span_id': '135a9e1427adf34d',\n", + " 'parent_span_id': '',\n", + " 'service': 'trace+otel+sysmon',\n", + " 'temporal_ignore': False}}},\n", + " 'trace+otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '759cde4e161d78f2'}}},\n", + " 'trace+otel+sysmon:make_trace_case.synth_node:51': {'kind': 'msg',\n", + " 'name': 'make_trace_case.synth_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\\\n\\\\n2.…\",\n", + " 'index': 'lit:plan',\n", + " 'state': 'langgraph-agent-otel-observer:make_trace_case.planner_node:51',\n", + " 'getitem:155': 'langgraph-agent-otel-observer:getitem:155',\n", + " 'synth_prompt:3': 'lit:Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'make_trace_case.planner_node51_copy:0': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\\\n\\\\n2.…\",\n", + " 'str:155': 'lit:plan',\n", + " 'str:154': 'lit:query',\n", + " 'getitem:154': 'langgraph-agent-otel-observer:getitem:154',\n", + " 'parent': 'trace+otel+sysmon:make_trace_case.planner_node:51',\n", + " 'param_synth_prompt': 'trace+otel+sysmon:param_synth_prompt'},\n", + " 'data': {'message_id': 'make_trace_case.synth_node:51'},\n", + " 'info': {'otel': {'trace_id': '7de6ce7664146c258b9e14edf163903d',\n", + " 'span_id': '759cde4e161d78f2',\n", + " 'parent_span_id': '135a9e1427adf34d',\n", + " 'service': 'trace+otel+sysmon',\n", + " 'temporal_ignore': False}}}},\n", + " 'context': {}},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'span_count': 2,\n", + " 'span_names': ['make_trace_case.planner_node',\n", + " 'make_trace_case.synth_node'],\n", + " 'param_keys': ['param.planner_prompt',\n", + " 'param.planner_prompt.trainable',\n", + " 'param.synth_prompt',\n", + " 'param.synth_prompt.trainable']}}]},\n", + " {'config': 'otel+sysmon',\n", + " 'runtime_s': 93.479,\n", + " 'baseline_score': 0.842,\n", + " 'best_score': 0.95,\n", + " 'score_gain': 0.108,\n", + " 'best_iteration': 2,\n", + " 'score_history': [0.842, 0.715, 0.95, 0.923, 0.923, 0.887],\n", + " 'stability_std': 0.023,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add two short titled sections with concrete details: {query}\\nPlan: {plan}'},\n", + " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the target DNA sequence. The Cas9 enzyme then cuts the DNA at this location, allowing for either the disruption of the gene or the insertion of new genetic material during the repair process.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like sickle cell anemia and cystic fibrosis, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: In agriculture, CRISPR is used to create crops that are more resistant to pests,',\n", + " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'observers': ['sysmon'],\n", + " 'views': [{'carrier': 'otel',\n", + " 'origin': 'backend',\n", + " 'doc': {'version': 'trace-json/1.0+otel',\n", + " 'agent': {'id': 'otel+sysmon', 'service': 'otel+sysmon'},\n", + " 'otel_meta': {'trace_id': '82b3197cb086218de00a317fce630480'},\n", + " 'nodes': {'otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'data': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '37486a6f8ae2a237'}}},\n", + " 'otel+sysmon:37486a6f8ae2a237': {'kind': 'msg',\n", + " 'name': 'planner_node',\n", + " 'op': 'llm_call',\n", + " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", + " 'user_query': 'otel+sysmon:What is CRISPR?',\n", + " 'query': 'otel+sysmon:What is CRISPR?',\n", + " 'param_planner_prompt': 'otel+sysmon:param_planner_prompt'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", + " 'span_id': '37486a6f8ae2a237',\n", + " 'parent_span_id': None,\n", + " 'service': 'otel+sysmon',\n", + " 'temporal_ignore': False}}},\n", + " 'otel+sysmon:1888c14cd5a95e1d': {'kind': 'msg',\n", + " 'name': 'llm.chat.completion',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'parent': 'otel+sysmon:37486a6f8ae2a237'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", + " 'span_id': '1888c14cd5a95e1d',\n", + " 'parent_span_id': '37486a6f8ae2a237',\n", + " 'service': 'otel+sysmon',\n", + " 'temporal_ignore': True}}},\n", + " 'otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '6b579b7918ca7492'}}},\n", + " 'otel+sysmon:6b579b7918ca7492': {'kind': 'msg',\n", + " 'name': 'synth_node',\n", + " 'op': 'llm_call',\n", + " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: ### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Briefly discuss its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", + " 'user_query': 'otel+sysmon:What is CRISPR?',\n", + " 'query': 'otel+sysmon:What is CRISPR?',\n", + " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Briefly discuss its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", + " 'parent': 'otel+sysmon:37486a6f8ae2a237',\n", + " 'param_synth_prompt': 'otel+sysmon:param_synth_prompt'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", + " 'span_id': '6b579b7918ca7492',\n", + " 'parent_span_id': '37486a6f8ae2a237',\n", + " 'service': 'otel+sysmon',\n", + " 'temporal_ignore': False}}},\n", + " 'otel+sysmon:fe2a22580e1b3dbd': {'kind': 'msg',\n", + " 'name': 'llm.chat.completion',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'parent': 'otel+sysmon:6b579b7918ca7492'},\n", + " 'data': {'message_id': None},\n", + " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", + " 'span_id': 'fe2a22580e1b3dbd',\n", + " 'parent_span_id': '6b579b7918ca7492',\n", + " 'service': 'otel+sysmon',\n", + " 'temporal_ignore': True}}}},\n", + " 'context': {}},\n", + " 'summary': {'node_count': 6,\n", + " 'message_names': ['llm.chat.completion',\n", + " 'llm.chat.completion',\n", + " 'planner_node',\n", + " 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'span_count': 5,\n", + " 'span_names': ['llm.chat.completion',\n", + " 'planner_node',\n", + " 'llm.chat.completion',\n", + " 'synth_node',\n", + " 'langgraph-agent.invoke'],\n", + " 'param_keys': ['param.planner_prompt',\n", + " 'param.planner_prompt.trainable',\n", + " 'param.synth_prompt',\n", + " 'param.synth_prompt.trainable']}},\n", + " {'carrier': 'sysmon',\n", + " 'origin': 'observer',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'langgraph-agent-sysmon-observer',\n", + " 'graph_id': 'otel+sysmon',\n", + " 'scope': 'otel+sysmon/observer',\n", + " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'msg:16912ef8e4a24c73': {'id': 'msg:16912ef8e4a24c73',\n", + " 'kind': 'message',\n", + " 'name': 'planner_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:468',\n", + " 'inputs': {},\n", + " 'output': {'name': 'planner_node:out',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", + " 'info': {'sysmon': {'duration_ns': 2786715131,\n", + " 'thread_id': 128068499416896}}},\n", + " 'msg:f3668974d5434f72': {'id': 'msg:f3668974d5434f72',\n", + " 'kind': 'message',\n", + " 'name': 'synth_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:472',\n", + " 'inputs': {},\n", + " 'output': {'name': 'synth_node:out',\n", + " 'value': \"{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\\\n\\\\n##\"},\n", + " 'info': {'sysmon': {'duration_ns': 2238104071,\n", + " 'thread_id': 128068499416896}}}}},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['planner_node', 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'event_count': 2}}]},\n", + " {'config': 'sysmon',\n", + " 'runtime_s': 77.312,\n", + " 'baseline_score': 0.822,\n", + " 'best_score': 0.944,\n", + " 'score_gain': 0.122,\n", + " 'best_iteration': 2,\n", + " 'score_history': [0.822, 0.669, 0.944, 0.923, 0.88, 0.887],\n", + " 'stability_std': 0.026,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'final_answer': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily consists of the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR process involves the gRNA binding to the target DNA sequence, the Cas9 enzyme creating a double-strand break in the DNA, and the cell's natural repair mechanisms either introducing mutations or incorporating new genetic material during the repair process.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like sickle cell anemia, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: It is used to create genetically modified organisms (GMOs) that are resistant to diseases, pests, and environmental stresses, thereby enhancing food security\",\n", + " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'observers': [],\n", + " 'views': [{'carrier': 'sysmon',\n", + " 'origin': 'backend',\n", + " 'doc': {'tgj': '1.0',\n", + " 'run_id': 'compare',\n", + " 'agent_id': 'langgraph-agent',\n", + " 'graph_id': 'sysmon',\n", + " 'scope': 'sysmon/backend',\n", + " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'value': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", + " 'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'description': '[prompt]'},\n", + " 'msg:259d9c51cfc54755': {'id': 'msg:259d9c51cfc54755',\n", + " 'kind': 'message',\n", + " 'name': 'planner_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:468',\n", + " 'inputs': {},\n", + " 'output': {'name': 'planner_node:out',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", + " 'info': {'sysmon': {'duration_ns': 2861239637,\n", + " 'thread_id': 128068499416896}}},\n", + " 'msg:11a94c306b234ca1': {'id': 'msg:11a94c306b234ca1',\n", + " 'kind': 'message',\n", + " 'name': 'synth_node',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:472',\n", + " 'inputs': {},\n", + " 'output': {'name': 'synth_node:out',\n", + " 'value': '{\\'final_answer\\': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\\\n\\\\n##'},\n", + " 'info': {'sysmon': {'duration_ns': 8902208450,\n", + " 'thread_id': 128068499416896}}}}},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['planner_node', 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'event_count': 2}}]}]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "import runpy\n", + "import importlib.util\n", "from pathlib import Path\n", "\n", "candidates = [\n", @@ -361,10 +1992,16 @@ "\n", "for candidate in candidates:\n", " if candidate.exists():\n", - " runpy.run_path(str(candidate), run_name='__main__')\n", + " spec = importlib.util.spec_from_file_location('compare_observers_demo', candidate)\n", + " module = importlib.util.module_from_spec(spec)\n", + " assert spec.loader is not None\n", + " spec.loader.exec_module(module)\n", + " rows = module.run_notebook_demo()\n", " break\n", "else:\n", - " raise FileNotFoundError('Could not locate demo_langgraph_instrument_and_compare_observers.py')\n" + " raise FileNotFoundError('Could not locate demo_langgraph_instrument_and_compare_observers.py')\n", + "\n", + "rows\n" ] } ], @@ -384,7 +2021,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py index ea603739..aecd46d0 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py @@ -2,29 +2,24 @@ """ Live LangGraph optimization comparison across Trace / OTEL / sys.monitoring. -This script intentionally benchmarks optimization over 5 iterations using -a real OpenRouter-backed LLM when OPENROUTER_API_KEY is available. - -Compared configurations: - - trace - - trace + otel - - trace + sysmon - - trace + otel + sysmon - - otel - - otel + sysmon - - sysmon - -When OPENROUTER_API_KEY is not set, the script exits successfully after -printing a skip message. This keeps notebook CI deterministic while still -making the demo a true live benchmark for local/manual use. +This script benchmarks optimization over 5 iterations using a real +OpenRouter-backed LLM when OPENROUTER_API_KEY is available, then converts +every backend's captured artifacts to a shared TGJ view so the notebook can +show the same semantic graph logic across configurations. """ from __future__ import annotations +import json import os +import re +import statistics import sys +import time +from contextlib import nullcontext, redirect_stdout +from io import StringIO from pathlib import Path -from typing import Any, Callable, Dict, Mapping, Tuple +from typing import Any, Callable, Dict, List, Mapping, Tuple ROOT = Path(__file__).resolve().parents[2] if str(ROOT) not in sys.path: @@ -32,15 +27,17 @@ from langgraph.graph import StateGraph, START, END from opto.trace import node -from opto.trace.nodes import MessageNode, ParameterNode from opto.trace.io import ( + EvalResult, instrument_graph, - optimize_graph, make_dict_binding, + optimize_graph, otlp_traces_to_trace_json, ) from opto.trace.io.sysmonitoring import sysmon_profile_to_tgj +from opto.trace.io.tgj_export import export_subgraph_to_tgj from opto.trace.io.tgj_ingest import ingest_tgj +from opto.trace.nodes import MessageNode, ParameterNode try: from openai import OpenAI @@ -51,32 +48,166 @@ HAS_SYSMON = hasattr(sys, "monitoring") OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "gpt-4o-mini") -OPENROUTER_BASE_URL = os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") +OPENROUTER_BASE_URL = os.environ.get( + "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1" +) ITERATIONS = 5 QUERIES = [ "What is CRISPR?", "How does CRISPR enable gene editing?", ] -OPTIMIZED_SYNTH_PROMPT = ( - "Start the answer exactly with [BENCH_OK]. " - "Then answer carefully: {query}\nPlan: {plan}" -) +SYNTH_UPDATE_SCHEDULE = [ + { + "synth_prompt": ( + "Answer directly in the first sentence. " + "Then add two short titled sections with concrete details: {query}\nPlan: {plan}" + ) + }, + { + "synth_prompt": ( + "Answer directly in the first sentence. " + "Then add three short titled sections with concrete mechanisms, examples, " + "and caveats when useful. Keep it factual and concise: {query}\nPlan: {plan}" + ) + }, +] PLANNER_SYSTEM_PROMPT = "You are a careful planner." SYNTH_SYSTEM_PROMPT = "You are a careful scientific assistant." DEFAULT_TEMPLATES = { "planner_prompt": "Create a short plan for: {query}", "synth_prompt": "Answer briefly and factually: {query}\nPlan: {plan}", } +SEMANTIC_NAMES = ("planner_node", "synth_node") +STOPWORDS = { + "about", + "add", + "also", + "answer", + "briefly", + "carefully", + "concise", + "does", + "directly", + "exactly", + "factually", + "from", + "give", + "have", + "into", + "keep", + "plan", + "short", + "start", + "than", + "that", + "their", + "them", + "then", + "there", + "this", + "using", + "what", + "when", + "where", + "which", + "with", + "would", + "your", +} def _raw(value: Any) -> Any: return getattr(value, "data", value) +def _truncate(value: Any, limit: int = 140) -> str: + text = str(_raw(value)) + if len(text) <= limit: + return text + return text[: limit - 3] + "..." + + +def _base_name(value: Any) -> str: + name = str(getattr(value, "name", getattr(value, "py_name", ""))) + return name.split("/")[-1].split(":")[0] + + +def _semantic_alias(name: str) -> str | None: + suffix = name.split(".")[-1] + if suffix in SEMANTIC_NAMES: + return suffix + return None + + def _str_map(values: Mapping[str, Any]) -> Dict[str, str]: return {key: str(_raw(value)) for key, value in values.items()} +def _node_records(doc: Dict[str, Any]) -> List[Dict[str, Any]]: + raw_nodes = doc.get("nodes") or {} + if isinstance(raw_nodes, dict): + return list(raw_nodes.values()) + return list(raw_nodes) + + +def _unique_nodes(nodes: Dict[str, Any], cls: type) -> List[Any]: + return list( + { + id(obj): obj + for obj in nodes.values() + if isinstance(obj, cls) + }.values() + ) + + +def _terms(text: str) -> List[str]: + return [ + token + for token in re.findall(r"[a-z0-9]+", text.lower()) + if len(token) >= 4 and token not in STOPWORDS + ] + + +def _coverage(needles: List[str], haystack: str) -> float: + keys = list(dict.fromkeys(needles)) + if not keys: + return 1.0 + body = haystack.lower() + return sum(token in body for token in keys) / len(keys) + + +def _lead_text(answer: str) -> str: + first_line = answer.splitlines()[0] if answer.splitlines() else answer + first_sentence = re.split(r"(?<=[.!?])\s+", answer, maxsplit=1)[0] + return (first_sentence if len(first_sentence) >= len(first_line) else first_line)[:220] + + +def _structure_score(answer: str) -> float: + lines = [line.strip() for line in answer.splitlines() if line.strip()] + headings = sum(line.startswith("#") for line in lines) + bullets = sum( + line.startswith(("-", "*")) or re.match(r"^\d+[.)]\s", line) is not None + for line in lines + ) + return min( + 1.0, + 0.35 * min(headings, 3) / 3 + + 0.45 * min(bullets, 4) / 4 + + 0.20 * min(len(lines), 12) / 12, + ) + + +def _length_score(answer: str) -> float: + return min(max(len(answer) - 120, 0) / 720.0, 1.0) + + +def _directness_score(query: str, answer: str) -> float: + lead = _lead_text(answer).strip() + if not lead or lead.startswith(("#", "-", "*")): + return 0.0 + return 0.5 * _coverage(_terms(query), lead) + 0.5 * float(len(lead) >= 60) + + def render_template(template: str, **variables: Any) -> str: return template.format(**_str_map(variables)) @@ -99,42 +230,167 @@ def call_chat_text( return response.choices[0].message.content -def _message_names(nodes: Dict[str, Any]): - names = [] - seen = set() - for obj in nodes.values(): - if isinstance(obj, MessageNode): - nm = str(getattr(obj, "name", getattr(obj, "py_name", ""))) - base = nm.split("/")[-1].split(":")[0] - if base not in seen: - seen.add(base) - names.append(base) - return sorted(names) +def summarize_tgj(doc: Dict[str, Any]) -> Dict[str, Any]: + nodes = ingest_tgj(doc) + message_nodes = _unique_nodes(nodes, MessageNode) + param_nodes = _unique_nodes(nodes, ParameterNode) + message_names = sorted(_base_name(obj) for obj in message_nodes if _base_name(obj)) + param_names = sorted(_base_name(obj) for obj in param_nodes if _base_name(obj)) + semantic_messages = sorted( + { + alias + for name in message_names + if (alias := _semantic_alias(name)) is not None + } + ) + param_values = { + name: _truncate(obj.data, 220) + for obj in param_nodes + if (name := _base_name(obj)).endswith("_prompt") + } + return { + "node_count": len(_node_records(doc)), + "message_names": message_names, + "semantic_messages": semantic_messages, + "param_names": param_names, + "param_values": param_values, + } -class DictUpdateOptimizer: - def __init__(self, update_dict: Dict[str, Any]): - self.update_dict = dict(update_dict) - self.calls = 0 +def _make_trace_view( + trace_nodes: List[Any], + *, + config: str, + origin: str, +) -> Dict[str, Any]: + doc = export_subgraph_to_tgj( + trace_nodes, + run_id="compare", + agent_id=config, + graph_id="trace", + scope=f"{config}/{origin}", + ) + return { + "carrier": "trace", + "origin": origin, + "doc": doc, + "summary": summarize_tgj(doc), + } - def zero_feedback(self): - return None - def backward(self, *_args, **_kwargs): +def _make_otel_view( + otlp: Dict[str, Any], + *, + config: str, + origin: str, +) -> Dict[str, Any]: + spans = ( + otlp.get("resourceSpans", [{}])[0] + .get("scopeSpans", [{}])[0] + .get("spans", []) + ) + param_keys = sorted( + { + attr["key"] + for span in spans + for attr in span.get("attributes", []) + if str(attr.get("key", "")).startswith("param.") + } + ) + docs = otlp_traces_to_trace_json( + otlp, + agent_id_hint=config, + use_temporal_hierarchy=True, + ) + doc = docs[0] if docs else {"tgj": "1.0", "nodes": {}} + summary = summarize_tgj(doc) + summary["span_count"] = len(spans) + summary["span_names"] = [span.get("name") for span in spans] + summary["param_keys"] = param_keys + return { + "carrier": "otel", + "origin": origin, + "doc": doc, + "summary": summary, + } + + +def _make_sysmon_view( + profile_doc: Dict[str, Any], + *, + config: str, + origin: str, +) -> Dict[str, Any]: + doc = sysmon_profile_to_tgj( + profile_doc, + run_id="compare", + graph_id=config, + scope=f"{config}/{origin}", + ) + summary = summarize_tgj(doc) + summary["event_count"] = len(profile_doc.get("events", [])) + return { + "carrier": "sysmon", + "origin": origin, + "doc": doc, + "summary": summary, + } + + +def tgj_to_digraph(doc: Dict[str, Any], *, title: str): + try: + from graphviz import Digraph + except Exception: return None - def step(self): - self.calls += 1 - if self.calls == 1: - return dict(self.update_dict) - return {} + records = _node_records(doc) + known_ids = {str(record.get("id")) for record in records} + graph = Digraph(comment=title) + graph.attr(rankdir="LR") + + for record in records: + node_id = str(record.get("id")) + kind = str(record.get("kind", "value")) + name = str(record.get("name", node_id)) + if kind == "parameter": + preview = record.get("value", "") + fill = "khaki1" + elif kind == "message": + preview = (record.get("output") or {}).get("value", "") + fill = "lightblue" + elif kind == "exception": + preview = (record.get("error") or {}).get("message", "") + fill = "mistyrose" + else: + preview = record.get("value", "") + fill = "white" + label = f"{name}\\n[{kind}]" + if preview not in (None, ""): + label += f"\\n{_truncate(preview, 80)}" + graph.node( + node_id, + label=label, + shape="box", + style="rounded,filled", + fillcolor=fill, + ) + + for record in records: + child_id = str(record.get("id")) + for ref in (record.get("inputs") or {}).values(): + parent_id = ref.get("ref") if isinstance(ref, dict) else ref + if parent_id is not None and str(parent_id) in known_ids: + graph.edge(str(parent_id), child_id) + return graph -class TraceMutatingOptimizer: - def __init__(self, prompt_node, update_value: str, key: str): - self.prompt_node = prompt_node - self.update_value = update_value - self.key = key + +class DictUpdateOptimizer: + def __init__(self, update_spec: Dict[str, Any] | List[Dict[str, Any]]): + if isinstance(update_spec, list): + self.update_schedule = [dict(update) for update in update_spec] + else: + self.update_schedule = [dict(update_spec)] self.calls = 0 def zero_feedback(self): @@ -144,10 +400,11 @@ def backward(self, *_args, **_kwargs): return None def step(self): + if self.calls < len(self.update_schedule): + update = dict(self.update_schedule[self.calls]) + self.calls += 1 + return update self.calls += 1 - if self.calls == 1: - self.prompt_node._set(self.update_value) - return {self.key: self.update_value} return {} @@ -172,55 +429,24 @@ def _llm(messages=None, **kwargs): return _llm -def eval_fn(payload: Dict[str, Any]) -> Dict[str, Any]: +def eval_fn(payload: Dict[str, Any]) -> EvalResult: + query = str(payload.get("query", "")) answer = str(_raw(payload.get("answer", ""))).strip() - ok = answer.startswith("[BENCH_OK]") - return { - "score": 1.0 if ok else 0.0, - "feedback": "Start the answer exactly with [BENCH_OK].", - } - - -def summarize_otlp(otlp: Dict[str, Any]) -> Dict[str, Any]: - spans = otlp.get("resourceSpans", [{}])[0].get("scopeSpans", [{}])[0].get("spans", []) - param_keys = sorted( - { - a["key"] - for s in spans - for a in s.get("attributes", []) - if str(a.get("key", "")).startswith("param.") - } - ) - docs = otlp_traces_to_trace_json( - otlp, - agent_id_hint="compare", - use_temporal_hierarchy=True, - ) - nodes = ingest_tgj(docs[0]) if docs else {} - return { - "span_count": len(spans), - "span_names": [s.get("name") for s in spans], - "param_keys": param_keys, - "message_names": _message_names(nodes), - } - - -def summarize_sysmon(profile_doc: Dict[str, Any]) -> Dict[str, Any]: - tgj = sysmon_profile_to_tgj(profile_doc, run_id="compare", graph_id="demo", scope="compare/0") - nodes = ingest_tgj(tgj) - param_names = sorted( - { - str(getattr(obj, "name", getattr(obj, "py_name", ""))).split("/")[-1].split(":")[0] - for obj in nodes.values() - if isinstance(obj, ParameterNode) - } + if answer.startswith("[ERROR]") or not answer: + return EvalResult(score=0.0, feedback="LLM failure/empty answer") + + coverage = _coverage(_terms(query), answer) + directness = _directness_score(query, answer) + structure = _structure_score(answer) + length = _length_score(answer) + score = 0.08 + 0.30 * coverage + 0.26 * directness + 0.20 * structure + 0.16 * length + return EvalResult( + score=round(min(score, 0.95), 4), + feedback=( + f"coverage={coverage:.2f}, directness={directness:.2f}, " + f"structure={structure:.2f}, length={length:.2f}" + ), ) - return { - "event_count": len(profile_doc.get("events", [])), - "tgj_node_count": len(tgj.get("nodes", {})), - "message_names": _message_names(nodes), - "param_names": param_names, - } def build_semantic_graph(planner_fn, synth_fn): @@ -314,12 +540,12 @@ def build_graph(): observe_with=observe_with, graph_factory=build_graph, scope=scope, - graph_agents_functions=["planner_node", "synth_node"], + graph_agents_functions=list(SEMANTIC_NAMES), graph_prompts_list=[planner_prompt, synth_prompt], train_graph_agents_functions=False, output_key="final_answer", ) - optimizer = TraceMutatingOptimizer(synth_prompt, OPTIMIZED_SYNTH_PROMPT, "synth_prompt") + optimizer = DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) return instrumented, optimizer, lambda: synth_prompt.data @@ -328,6 +554,7 @@ def make_otel_case(llm, observe_with: Tuple[str, ...] = ()): graph=None, backend="otel", observe_with=observe_with, + graph_agents_functions=list(SEMANTIC_NAMES), llm=llm, initial_templates=dict(DEFAULT_TEMPLATES), output_key="final_answer", @@ -338,7 +565,7 @@ def make_otel_case(llm, observe_with: Tuple[str, ...] = ()): def planner_call(query: str) -> str: return tracing_llm.template_prompt_call( - span_name="planner", + span_name="planner_node", template_name="planner_prompt", template=templates["planner_prompt"], variables={"query": query}, @@ -349,7 +576,7 @@ def planner_call(query: str) -> str: def synth_call(query: str, plan: str) -> str: return tracing_llm.template_prompt_call( - span_name="synth", + span_name="synth_node", template_name="synth_prompt", template=templates["synth_prompt"], variables={"query": query, "plan": plan}, @@ -364,7 +591,7 @@ def synth_call(query: str, plan: str) -> str: synth_call=synth_call, ) ) - optimizer = DictUpdateOptimizer({"synth_prompt": OPTIMIZED_SYNTH_PROMPT}) + optimizer = DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) return instrumented, optimizer, lambda: instrumented.templates["synth_prompt"] @@ -399,14 +626,37 @@ def synth_call(query: str, plan: str) -> str: ), backend="sysmon", bindings=bindings, + graph_agents_functions=list(SEMANTIC_NAMES), output_key="final_answer", ) - optimizer = DictUpdateOptimizer({"synth_prompt": OPTIMIZED_SYNTH_PROMPT}) + optimizer = DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) return instrumented, optimizer, lambda: templates["synth_prompt"] +def build_cases(llm): + cases = [ + ("trace", lambda: make_trace_case(llm, ())), + ("trace+otel", lambda: make_trace_case(llm, ("otel",))), + ("otel", lambda: make_otel_case(llm, ())), + ] + if HAS_SYSMON: + cases.extend( + [ + ("trace+sysmon", lambda: make_trace_case(llm, ("sysmon",))), + ( + "trace+otel+sysmon", + lambda: make_trace_case(llm, ("otel", "sysmon")), + ), + ("otel+sysmon", lambda: make_otel_case(llm, ("sysmon",))), + ("sysmon", lambda: make_sysmon_case(llm)), + ] + ) + return cases + + def run_case(name: str, builder): instrumented, optimizer, prompt_getter = builder() + started_at = time.perf_counter() result = optimize_graph( instrumented, queries=QUERIES, @@ -415,104 +665,268 @@ def run_case(name: str, builder): eval_fn=eval_fn, output_key="final_answer", ) + runtime_s = time.perf_counter() - started_at + + probe = instrumented.invoke({"query": QUERIES[0]}) + if hasattr(probe, "data") and isinstance(probe.data, dict): + answer_value = probe.data.get("final_answer", probe.data) + elif isinstance(probe, dict): + answer_value = probe.get("final_answer", probe) + else: + answer_value = probe + answer_text = str(_raw(answer_value)) + views = [] + + backend = getattr(instrumented, "backend", None) + if backend == "trace": + views.append( + _make_trace_view( + [probe, *list(getattr(instrumented, "parameters", []))], + config=name, + origin="backend", + ) + ) + elif backend == "otel": + views.append( + _make_otel_view( + instrumented.session.flush_otlp(clear=True), + config=name, + origin="backend", + ) + ) + elif backend == "sysmon": + views.append( + _make_sysmon_view( + instrumented._last_profile_doc or {}, + config=name, + origin="backend", + ) + ) - probe = instrumented.invoke({"query": "What is CRISPR?"}) - answer_preview = str(_raw(probe.get("final_answer", probe)))[:120] + for artifact in getattr(instrumented, "_last_observer_artifacts", []): + if artifact.carrier == "otel": + views.append( + _make_otel_view( + artifact.raw, + config=name, + origin="observer", + ) + ) + elif artifact.carrier == "sysmon": + views.append( + _make_sysmon_view( + artifact.profile_doc, + config=name, + origin="observer", + ) + ) + + assert result.best_iteration >= 2 + final_prompt = prompt_getter() + assert final_prompt == SYNTH_UPDATE_SCHEDULE[-1]["synth_prompt"] + tail_scores = result.score_history[max(2, result.best_iteration):] - summary = { + return { "config": name, - "score_history": [round(x, 3) for x in result.score_history], + "runtime_s": round(runtime_s, 3), + "baseline_score": round(result.baseline_score, 3), + "best_score": round(result.best_score, 3), + "score_gain": round(result.best_score - result.baseline_score, 3), "best_iteration": result.best_iteration, + "score_history": [round(x, 3) for x in result.score_history], + "stability_std": round( + statistics.pstdev(tail_scores) if len(tail_scores) > 1 else 0.0, + 3, + ), "best_updates": dict(result.best_updates), - "final_synth_prompt": prompt_getter(), - "answer_preview": answer_preview, - "observers": [a.carrier for a in getattr(instrumented, "_last_observer_artifacts", [])], - "trace_summary": None, - "otel_summary": None, - "sysmon_summary": None, + "final_synth_prompt": final_prompt, + "final_answer": answer_text, + "answer_preview": _truncate(answer_text, 180), + "observers": [ + artifact.carrier + for artifact in getattr(instrumented, "_last_observer_artifacts", []) + ], + "views": views, } - if getattr(instrumented, "backend", None) == "trace": - answer_node = probe.get("final_answer") - summary["trace_summary"] = { - "is_node": hasattr(answer_node, "parents"), - "parent_count": len(getattr(answer_node, "parents", [])), - "parameter_count": len(getattr(instrumented, "parameters", [])), - } - elif getattr(instrumented, "backend", None) == "otel": - otlp = instrumented.session.flush_otlp(clear=True) - summary["otel_summary"] = summarize_otlp(otlp) - elif getattr(instrumented, "backend", None) == "sysmon": - summary["sysmon_summary"] = summarize_sysmon(instrumented._last_profile_doc) - - for artifact in getattr(instrumented, "_last_observer_artifacts", []): - if artifact.carrier == "otel": - summary["otel_summary"] = summarize_otlp(artifact.raw) - elif artifact.carrier == "sysmon": - summary["sysmon_summary"] = summarize_sysmon(artifact.profile_doc) - - assert summary["best_iteration"] >= 2 - assert "Start the answer exactly with [BENCH_OK]." in summary["final_synth_prompt"] - return summary - - -def main(): - print("\n" + "=" * 80) - print("LangGraph live optimization comparison") - print("=" * 80) - print(f"Python {sys.version_info.major}.{sys.version_info.minor}") - print(f"sys.monitoring available: {HAS_SYSMON}") - print(f"OPENROUTER_MODEL={OPENROUTER_MODEL}") +def live_skip_reason() -> str | None: if not OPENROUTER_API_KEY: - print("\n[SKIP] OPENROUTER_API_KEY is not set.") - print("This demo is intentionally live-only. Set OPENROUTER_API_KEY to run the benchmark.") - return + return ( + "[SKIP] OPENROUTER_API_KEY is not set. " + "This comparison stays live-only so notebook CI can skip cleanly." + ) if OpenAI is None: - print("\n[SKIP] openai package is unavailable.") - return + return "[SKIP] openai package is unavailable." + return None - llm = make_live_llm() - cases = [ - ("trace", lambda: make_trace_case(llm, ())), - ("trace+otel", lambda: make_trace_case(llm, ("otel",))), - ("otel", lambda: make_otel_case(llm, ())), - ] - if HAS_SYSMON: - cases.extend( - [ - ("trace+sysmon", lambda: make_trace_case(llm, ("sysmon",))), - ("trace+otel+sysmon", lambda: make_trace_case(llm, ("otel", "sysmon"))), - ("otel+sysmon", lambda: make_otel_case(llm, ("sysmon",))), - ("sysmon", lambda: make_sysmon_case(llm)), - ] - ) - rows = [run_case(name, builder) for name, builder in cases] +def run_live_comparison(*, echo_progress: bool = True) -> List[Dict[str, Any]]: + reason = live_skip_reason() + if reason is not None: + if echo_progress: + print(reason) + return [] + + llm = make_live_llm() + context = nullcontext() + sink = None + if not echo_progress: + sink = StringIO() + context = redirect_stdout(sink) + with context: + rows = [run_case(name, builder) for name, builder in build_cases(llm)] + return rows + + +def print_cli_report(rows: List[Dict[str, Any]]) -> None: + if not rows: + return - print("\nOptimization comparison (5 iterations)\n") - print("| config | score_history | best_iteration | observers |") - print("|---|---|---:|---|") + print(f"\nOptimization comparison ({ITERATIONS} iterations)\n") + print( + "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | " + "score_history | semantic_messages | params |" + ) + print("|---|---:|---:|---:|---:|---:|---:|---|---|---|") for row in rows: + primary = row["views"][0]["summary"] if row["views"] else {} print( - f"| {row['config']} | {row['score_history']} | {row['best_iteration']} " - f"| {','.join(row['observers']) or '-'} |" + f"| {row['config']} | {row['runtime_s']:.3f} | {row['baseline_score']:.3f} " + f"| {row['best_score']:.3f} | {row['score_gain']:.3f} | {row['best_iteration']} " + f"| {row['stability_std']:.3f} | {row['score_history']} " + f"| {primary.get('semantic_messages', [])} | {primary.get('param_names', [])} |" ) - print("\nBinding / update inspection\n") + print("\nPer-configuration artifacts\n") for row in rows: print(f"## {row['config']}") + print(f"runtime_s: {row['runtime_s']:.3f}") + print(f"baseline_score: {row['baseline_score']:.3f}") + print(f"best_score: {row['best_score']:.3f}") + print(f"score_gain: {row['score_gain']:.3f}") + print(f"stability_std: {row['stability_std']:.3f}") + print(f"score_history: {row['score_history']}") print(f"best_updates: {row['best_updates']}") print(f"final_synth_prompt: {row['final_synth_prompt']}") - print(f"answer_preview: {row['answer_preview']}") - if row['trace_summary'] is not None: - print(f"trace_summary: {row['trace_summary']}") - if row['otel_summary'] is not None: - print(f"otel_summary: {row['otel_summary']}") - if row['sysmon_summary'] is not None: - print(f"sysmon_summary: {row['sysmon_summary']}") + print(f"final_answer: {row['answer_preview']}") + for view in row["views"]: + summary = view["summary"] + extras = [] + if "span_count" in summary: + extras.append(f"span_count={summary['span_count']}") + if "event_count" in summary: + extras.append(f"event_count={summary['event_count']}") + extra_text = f" ({', '.join(extras)})" if extras else "" + print( + f" - {view['origin']} {view['carrier']}{extra_text}: " + f"messages={summary['message_names']} " + f"params={summary['param_names']}" + ) print() +def display_notebook_report(rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + try: + from IPython.display import Markdown, display + except Exception: + print_cli_report(rows) + return rows + + if not rows: + display(Markdown(live_skip_reason() or "_No rows captured._")) + return rows + + lines = [ + "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | score_history |", + "|---|---:|---:|---:|---:|---:|---:|---|", + ] + for row in rows: + lines.append( + f"| {row['config']} | {row['runtime_s']:.3f} | {row['baseline_score']:.3f} " + f"| {row['best_score']:.3f} | {row['score_gain']:.3f} | {row['best_iteration']} " + f"| {row['stability_std']:.3f} | {row['score_history']} |" + ) + display(Markdown("## Optimization comparison\n\n" + "\n".join(lines))) + + for row in rows: + display( + Markdown( + "\n".join( + [ + f"## {row['config']}", + f"- Runtime: `{row['runtime_s']:.3f}s`", + f"- Baseline score: `{row['baseline_score']:.3f}`", + f"- Best score: `{row['best_score']:.3f}`", + f"- Score gain: `{row['score_gain']:.3f}`", + f"- Best iteration: `{row['best_iteration']}`", + f"- Post-update stability std: `{row['stability_std']:.3f}`", + f"- Score history: `{row['score_history']}`", + f"- Best updates: `{list(row['best_updates'].keys())}`", + "", + "### Final synth prompt", + "```text", + str(row["final_synth_prompt"]), + "```", + "### Final answer", + "```text", + _truncate(row["final_answer"], 500), + "```", + ] + ) + ) + ) + for view in row["views"]: + summary = view["summary"] + extra_lines = [] + if "span_count" in summary: + extra_lines.append(f"- Span count: `{summary['span_count']}`") + extra_lines.append(f"- Span names: `{summary['span_names']}`") + if "event_count" in summary: + extra_lines.append(f"- Event count: `{summary['event_count']}`") + display( + Markdown( + "\n".join( + [ + f"### {view['origin']} {view['carrier']}", + f"- Semantic message names: `{summary['semantic_messages']}`", + f"- All message names: `{summary['message_names']}`", + f"- Parameter names: `{summary['param_names']}`", + *extra_lines, + "", + "```json", + json.dumps(summary["param_values"], indent=2), + "```", + ] + ) + ) + ) + graph = tgj_to_digraph( + view["doc"], + title=f"{row['config']} {view['origin']} {view['carrier']}", + ) + if graph is not None: + display(graph) + + return rows + + +def run_notebook_demo() -> List[Dict[str, Any]]: + rows = run_live_comparison(echo_progress=False) + return display_notebook_report(rows) + + +def main(): + print("\n" + "=" * 80) + print("LangGraph live optimization comparison") + print("=" * 80) + print(f"Python {sys.version_info.major}.{sys.version_info.minor}") + print(f"sys.monitoring available: {HAS_SYSMON}") + print(f"OPENROUTER_MODEL={OPENROUTER_MODEL}") + + rows = run_live_comparison(echo_progress=True) + print_cli_report(rows) + + if __name__ == "__main__": main() diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py index 8f5d89f4..bd475989 100644 --- a/opto/trace/io/instrumentation.py +++ b/opto/trace/io/instrumentation.py @@ -54,6 +54,7 @@ class InstrumentedGraph: input_key: str = "query" output_key: Optional[str] = None observers: List[GraphObserver] = field(default_factory=list) + observer_meta: Dict[str, Any] = field(default_factory=dict) _last_observer_artifacts: List[Any] = field(default_factory=list, init=False, repr=False) # Holds the active root span context for eval_fn to attach reward spans @@ -88,8 +89,10 @@ def invoke(self, state: Any, **kwargs: Any) -> Dict[str, Any]: if isinstance(state, dict): query_hint = str(state.get(self.input_key, "")) + meta = {"service_name": self.service_name} + meta.update(self.observer_meta) for obs in self.observers: - obs.start(bindings=self.bindings, meta={"service_name": self.service_name}) + obs.start(bindings=self.bindings, meta=meta) result = None error = None @@ -130,10 +133,13 @@ class SysMonInstrumentedGraph: input_key: str = "query" output_key: Optional[str] = None backend: str = "sysmon" + observer_meta: Dict[str, Any] = field(default_factory=dict) _last_profile_doc: Optional[dict] = field(default=None, init=False, repr=False) def invoke(self, state: Any, **kwargs: Any): - self.session.start(bindings=self.bindings) + meta = {"service_name": self.service_name} + meta.update(self.observer_meta) + self.session.start(bindings=self.bindings, meta=meta) result = None error = None try: @@ -238,6 +244,11 @@ def instrument_graph( from opto.features.graph.adapter import GraphAdapter except Exception: GraphAdapter = None + observer_meta = { + "semantic_names": [ + str(name).split(".")[-1] for name in (graph_agents_functions or []) + ] + } if adapter is not None: if GraphAdapter is not None and not isinstance(adapter, GraphAdapter): @@ -260,6 +271,8 @@ def instrument_graph( ) if hasattr(out, "observers"): out.observers = _make_observers(observe_with, service_name=service_name) + if hasattr(out, "observer_meta"): + out.observer_meta = dict(observer_meta) return out if GraphAdapter is not None and isinstance(graph, GraphAdapter): @@ -281,6 +294,8 @@ def instrument_graph( ) if hasattr(out, "observers"): out.observers = _make_observers(observe_with, service_name=service_name) + if hasattr(out, "observer_meta"): + out.observer_meta = dict(observer_meta) return out if backend == "trace": @@ -322,6 +337,7 @@ def instrument_graph( service_name=service_name, input_key=input_key, output_key=output_key, + observer_meta=dict(observer_meta), ) if backend != "otel": @@ -395,4 +411,5 @@ def _emit_code_param(span, code_key: str, code_fn: Any) -> None: input_key=input_key, output_key=output_key, observers=_make_observers(observe_with, service_name=service_name), + observer_meta=dict(observer_meta), ) diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index 9486c95c..134e9821 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -726,14 +726,12 @@ def _extract_output(result: Any, sidecar: Any = None) -> Tuple[Any, Any]: ).data opt.backward(target, feedback) raw_updates = opt.step() - if isinstance(raw_updates, dict): + if isinstance(raw_updates, dict) and raw_updates: updates = raw_updates if getattr(graph, "bindings", None) and all(isinstance(k, str) for k in raw_updates): last_applied_updates = apply_updates(raw_updates, graph.bindings, strict=False) else: last_applied_updates = dict(raw_updates) - else: - last_applied_updates = {} if on_iteration: on_iteration(iteration, runs, updates) diff --git a/opto/trace/io/tgj_export.py b/opto/trace/io/tgj_export.py index fe3ae555..f81f0926 100644 --- a/opto/trace/io/tgj_export.py +++ b/opto/trace/io/tgj_export.py @@ -76,6 +76,9 @@ def nid(n: Node) -> str: q.append(p) inputs = {f"in_{i}": {"ref": nid(p)} for i, p in enumerate(n.parents)} + for i, dep in enumerate(getattr(n, "hidden_dependencies", ()) or ()): + q.append(dep) + inputs[f"hidden_{i}"] = {"ref": nid(dep)} op = getattr(n, "op_name", None) if not op: diff --git a/tests/features_tests/test_sysmon_backend.py b/tests/features_tests/test_sysmon_backend.py index 72f36212..bf40ab10 100644 --- a/tests/features_tests/test_sysmon_backend.py +++ b/tests/features_tests/test_sysmon_backend.py @@ -52,6 +52,7 @@ def test_sysmon_backend_invoke_exports_profile_doc(): graph=build_graph(), backend="sysmon", initial_templates={"planner_prompt": "Plan {query}", "synth_prompt": "answer::{query}::{plan}"}, + graph_agents_functions=["planner", "synth"], output_key="final_answer", ) assert isinstance(ig, SysMonInstrumentedGraph) @@ -59,6 +60,7 @@ def test_sysmon_backend_invoke_exports_profile_doc(): assert "final_answer" in out assert ig._last_profile_doc["version"] == "trace-json/1.0+sysmon" assert len(ig._last_profile_doc["events"]) > 0 + assert [ev["name"] for ev in ig._last_profile_doc["events"]] == ["planner", "synth"] class _DictUpdateOptimizer: @@ -103,6 +105,7 @@ def test_sysmon_backend_optimize_applies_binding_updates(): graph=build_graph(templates), backend="sysmon", bindings=bindings, + graph_agents_functions=["planner", "synth"], output_key="final_answer", ) result = optimize_graph( diff --git a/tests/features_tests/test_trace_graph_optimization.py b/tests/features_tests/test_trace_graph_optimization.py index ed3df753..a882ce1f 100644 --- a/tests/features_tests/test_trace_graph_optimization.py +++ b/tests/features_tests/test_trace_graph_optimization.py @@ -154,10 +154,10 @@ def test_optimize_graph_trace_backend_reports_progress_and_best_updates(): result = optimize_graph( graph, queries=["What is gene editing?"], - iterations=2, + iterations=3, optimizer=optimizer, eval_fn=lambda payload: { - "score": 1.0 if "CRISPR optimized" in str(payload["answer"]) else 0.0, + "score": {0: 0.0, 1: 0.2, 2: 0.8, 3: 1.0}[payload["iteration"]], "feedback": "Prefer mentioning CRISPR optimized explicitly.", }, on_iteration=lambda i, runs, updates: callbacks.append((i, len(runs), dict(updates))), @@ -165,15 +165,16 @@ def test_optimize_graph_trace_backend_reports_progress_and_best_updates(): assert result.baseline_score == 0.0 assert result.best_score == 1.0 - assert result.best_iteration == 2 + assert result.best_iteration == 3 assert result.best_updates == {"synth_prompt": "CRISPR optimized :: {query} :: {plan}"} - assert optimizer.zero_calls == 2 - assert optimizer.backward_calls == 2 - assert optimizer.step_calls == 2 + assert optimizer.zero_calls == 3 + assert optimizer.backward_calls == 3 + assert optimizer.step_calls == 3 assert callbacks == [ (0, 1, {}), (1, 1, {"synth_prompt": "CRISPR optimized :: {query} :: {plan}"}), (2, 1, {}), + (3, 1, {}), ] From ae5b434f28c9d00af0e0b4e4e473b394078d84fd Mon Sep 17 00:00:00 2001 From: doxav Date: Tue, 21 Apr 2026 08:25:17 +0200 Subject: [PATCH 07/16] cleaner code and documentation --- docs/GraphOptimization.md | 717 +++++++++--------- opto/features/mlflow/__init__.py | 18 + opto/features/mlflow/autolog.py | 82 ++ opto/trace/io/__init__.py | 2 +- opto/trace/io/instrumentation.py | 2 +- opto/trace/io/optimization.py | 2 +- ...ggraph_otel_runtime.py => otel_runtime.py} | 2 +- opto/trace/io/telemetry_session.py | 2 +- ...h_otel_runtime.py => test_otel_runtime.py} | 2 +- 9 files changed, 472 insertions(+), 357 deletions(-) create mode 100644 opto/features/mlflow/__init__.py create mode 100644 opto/features/mlflow/autolog.py rename opto/trace/io/{langgraph_otel_runtime.py => otel_runtime.py} (99%) rename tests/unit_tests/{test_langgraph_otel_runtime.py => test_otel_runtime.py} (99%) diff --git a/docs/GraphOptimization.md b/docs/GraphOptimization.md index d3f86fad..606f9259 100644 --- a/docs/GraphOptimization.md +++ b/docs/GraphOptimization.md @@ -1,439 +1,454 @@ # Graph Optimization -OpenTrace provides a unified API for instrumenting LangGraph agents with OpenTelemetry (OTEL) tracing and running prompt optimization loops. It reduces ~645 lines of manual instrumentation boilerplate to two function calls: `instrument_graph()` and `optimize_graph()`. Traces are emitted with dual semantic conventions compatible with both Trace (TGJ) and Agent Lightning, enabling optimization via the Trace framework while supporting standard observability tooling. +This document describes the current graph optimization stack in Trace after the graph adapter, sidecar, observer, and sys.monitoring work landed. ---- - -## Table of Contents - -1. [Before / After](#1-before--after) -2. [Architecture](#2-architecture) -3. [Public API Reference](#3-public-api-reference) -4. [Data Flow Pipeline](#4-data-flow-pipeline) -5. [Semantic Conventions](#5-semantic-conventions) -6. [Temporal Chaining](#6-temporal-chaining) -7. [Core Modules](#7-core-modules) +It is intentionally aligned with the current codebase, not with earlier intermediate branches: +- graph abstractions live under `opto.features.graph.*` +- the OTEL runtime helper is `opto.trace.io.otel_runtime` +- trace graph instrumentation is `opto.features.graph.graph_instrumentation` +- `instrument_graph(...)` now supports three primary backends: `trace`, `otel`, and `sysmon` +- `observe_with=(...)` adds passive observers on top of the primary backend --- -## 1. Before / After +## Table of contents +1. Goals +2. Current codebase map +3. Main concepts +4. Architecture schema +5. Backend modes +6. Observer combinations +7. Adapter model +8. Multiple traces and observers +9. Public API cheat sheet +10. Optimization carriers and update path +11. OTEL semantic conventions and temporal chaining +12. Notebook and demo coverage +13. Open questions + +## Goals -### Boilerplate Comparison +The current design aims to optimize: +- prompts +- agent or node functions +- graph knobs and routing or workflow policies +- LangGraph graphs today, while keeping the adapter shape reusable for other graph-like runtimes later -| Step | Before (manual) | After (this API) | -|------|-----------------|------------------| -| **Create session** | ~50 lines: TracerProvider, InMemorySpanExporter, SimpleSpanProcessor, tracer init | Created inside `instrument_graph()`; no explicit session code | -| **Instrument graph** | ~25 lines per node: manual span creation, attribute setting, TracingLLM wiring | `instrument_graph(graph, ...)` | -| **Run optimize loop** | ~150 lines: loop, trace capture, TGJ conversion, score tracking, template update | `optimize_graph(ig, queries, iterations=5)` | -| **Persist artifacts** | ~50 lines: OTLP export, file write, optional MLflow log | `ig.session.flush_otlp()` | +A second goal is to separate: +- **runtime return types**: plain Python objects, dicts, strings, etc. +- **optimization state**: Trace nodes, parameters, sidecars, converted TGJ documents, observer artifacts -**Total: ~645 lines reduced to ~10 lines.** +That separation is what lets the system remain compatible with LangGraph while still feeding Trace-native optimizers and trainers. -### Backend modes +## Current codebase map -`instrument_graph()` and `optimize_graph()` support two backends: +### Main packages -| Backend | Carrier | Best for | +| Package | Role | Key files | |---|---|---| -| `backend="otel"` (default) | OTLP spans → TGJ → ingest | observability-first optimization | -| `backend="trace"` | native Trace nodes (`bundle()` / `node()`) | direct graph-native optimization | - -The OTEL path remains the default and most interoperable mode. - -### Code Diff - -```diff -- # --- BEFORE: Manual setup (~255+ lines for 4 steps) --- -- from opentelemetry.sdk.trace import TracerProvider -- from opentelemetry.sdk.trace.export import SimpleSpanProcessor, InMemorySpanExporter -- exporter = InMemorySpanExporter() -- provider = TracerProvider() -- provider.add_span_processor(SimpleSpanProcessor(exporter)) -- tracer = provider.get_tracer("my-agent") -- # ... per-node: with tracer.start_as_current_span(name): ... -- # ... manual optimization loop with flush, TGJ, optimizer.step() ... -- # ... manual export to JSON / MLflow ... - -+ # --- AFTER: Minimal API --- -+ from opto.trace.io import instrument_graph, optimize_graph -+ -+ ig = instrument_graph( -+ graph=my_graph, -+ llm=my_llm, -+ initial_templates={"planner_prompt": "Plan for: {query}"}, -+ trainable_keys={"planner", "synthesizer"}, -+ ) -+ result = optimize_graph(ig, queries=["Q1", "Q2"], iterations=5) -+ otlp = ig.session.flush_otlp() +| `opto.features.graph` | Graph-specific abstractions and trace-side runtime bridge | `adapter.py`, `graph_instrumentation.py`, `module.py`, `sidecars.py` | +| `opto.trace.io` | Instrumentation and optimization entrypoints, OTEL/sysmon conversion, bindings | `instrumentation.py`, `optimization.py`, `otel_runtime.py`, `otel_adapter.py`, `observers.py`, `sysmonitoring.py`, `bindings.py` | +| `opto.trace` | Native Trace primitives | `bundle.py`, `nodes.py`, `modules.py` | +| `opto.trainer` | Training algorithms and guides | `train.py`, `algorithms/*`, `guide.py` | +| `opto.features.priority_search` | Search-oriented optimization on top of Trace modules | `priority_search.py`, `utils.py` | +| `examples/notebooks` | Executable demos | `demo_langgraph_instrument_and_optimize.ipynb`, `demo_langgraph_instrument_and_optimize_trace.ipynb`, `demo_langgraph_instrument_and_compare_observers.ipynb` | + +### File placement that matters for this doc + +| Concept | Current file | +|---|---| +| Graph adapters | `opto.features.graph.adapter` | +| Trace graph wrapper | `opto.features.graph.graph_instrumentation` | +| Graph sidecars | `opto.features.graph.sidecars` | +| IO entrypoints | `opto.trace.io.instrumentation`, `opto.trace.io.optimization` | +| OTEL runtime helper | `opto.trace.io.otel_runtime` | +| Passive observers | `opto.trace.io.observers` | +| sys.monitoring support | `opto.trace.io.sysmonitoring` | + +## Main concepts + +| Concept | Purpose | Why it exists | +|---|---|---| +| `GraphAdapter` | Runtime-agnostic graph abstraction | Keeps the graph integration reusable beyond LangGraph | +| `LangGraphAdapter` | Concrete adapter for LangGraph | Bridges LangGraph runtime rules with Trace optimization | +| `GraphModule` | `Module` view over an adapter | Reuses `train()` and `PrioritySearch` without a special graph-only trainer | +| `TraceGraph` | Trace-facing instrumented wrapper | Presents graph optimization through the same `instrument_graph(...)` façade | +| `GraphRunSidecar` | Per-run optimization state | Keeps Trace nodes out of the runtime return value | +| `OTELRunSidecar` | Per-run OTEL artifact container | Keeps secondary observation artifacts explicit | +| `Binding` | String key -> live getter/setter mapping | Lets update dictionaries mutate prompts, code params, and graph knobs safely | +| `ObserverArtifact` | Normalized passive observation payload | Makes optional OTEL/sysmon observers composable across backends | + +## Architecture schema + +```mermaid +flowchart TD + U[User] + IG[instrument_graph(...)] + OG[optimize_graph(...)] + + subgraph FG[opto.features.graph] + GA[GraphAdapter] + LGA[LangGraphAdapter] + GM[GraphModule] + TG[TraceGraph] + GRS[GraphRunSidecar] + ORS[OTELRunSidecar] + GCS[GraphCandidateSnapshot] + GI[instrument_trace_graph] + end + + subgraph IO[opto.trace.io] + INST[InstrumentedGraph] + SMIG[SysMonInstrumentedGraph] + TS[TelemetrySession] + OTR[otel_runtime.py / TracingLLM] + OBS[observers.py] + OTA[otlp_traces_to_trace_json] + SYS[sysmonitoring.py] + STTGJ[sysmon_profile_to_tgj] + BIND[Binding / apply_updates] + OPTG[optimization.py] + INSTG[instrumentation.py] + end + + subgraph TRACE[Trace core] + BUNDLE[bundle / FunModule] + NODE[node / ParameterNode / MessageNode] + MOD[Module] + OPT[Optimizer] + TRAIN[train()] + PS[PrioritySearch] + MC[ModuleCandidate] + end + + subgraph DEMO[examples/notebooks] + N1[demo_langgraph_instrument_and_optimize.ipynb] + N2[demo_langgraph_instrument_and_optimize_trace.ipynb] + N3[demo_langgraph_instrument_and_compare_observers.ipynb] + end + + U --> IG + U --> OG + + IG --> INSTG + INSTG -->|backend='trace'| GI + INSTG -->|backend='otel'| INST + INSTG -->|backend='sysmon'| SMIG + + GI --> TG + GA --> LGA + LGA --> TG + LGA --> GM + LGA --> GRS + LGA --> ORS + LGA --> BIND + LGA --> BUNDLE + LGA --> NODE + + GM --> MOD + GM --> TRAIN + GM --> PS + PS --> MC + + INST --> TS + INST --> OTR + INST --> OBS + + SMIG --> SYS + SYS --> STTGJ + + OBS --> ORS + OBS -->|OTEL observer| TS + OBS -->|sysmon observer| SYS + + OG --> OPTG + OPTG -->|trace backend| TG + OPTG -->|otel backend| INST + OPTG -->|sysmon backend| SMIG + + TS --> OTA + OTA --> NODE + STTGJ --> NODE + TG --> NODE + NODE --> OPT + OPT --> BIND + + N1 --> IG + N1 --> OG + N2 --> IG + N2 --> OG + N3 --> IG + N3 --> TS + N3 --> OTA + N3 --> STTGJ ``` ---- +## Backend modes -## 2. Architecture +### Primary backends -``` -+---------------------------------------------------------------------+ -| User Code | -| | -| graph = StateGraph(...) # define LangGraph | -| graph.add_node("planner", ...) # add nodes | -| | -| ig = instrument_graph( # ONE-LINER instrumentation | -| graph=graph, llm=my_llm, | -| initial_templates={...}, | -| ) | -| result = optimize_graph(ig, queries=[...]) # ONE-LINER optimize | -+-------------------------------------+-------------------------------+ - | - +---------------------------v---------------------------+ - | instrument_graph() | - | | - | +--------------+ +-------------+ +-------------+ | - | | Telemetry | | TracingLLM | | Bindings | | - | | Session | | (dual | | (param -> | | - | | | | semconv) | | setter) | | - | | TracerProv. | | | | | | - | | InMemoryExp. | | param.* | | get() / | | - | | flush_otlp() | | gen_ai.* | | set() | | - | +------+-------+ +------+------+ +------+------+ | - | | | | | - | +--------+--------+ | | - | | | | - | +-------------v-----------------+ | | - | | InstrumentedGraph | | | - | | .graph (CompiledGraph) |--------+ | - | | .session (TelemetrySession) | | - | | .tracing_llm (TracingLLM) | | - | | .templates (dict) | | - | | .bindings (dict) | | - | | .invoke() .stream() | | - | +-------------------------------+ | - +--------------------------------------------------------+ -``` +| Primary backend | Runtime carrier | Optimization carrier | Typical object returned by `instrument_graph(...)` | Main use | +|---|---|---|---|---| +| `trace` | native Python runtime with graph adapter or wrapped functions | native Trace nodes and parameters | `TraceGraph` | direct graph optimization | +| `otel` | original runtime plus OTEL spans | OTLP -> TGJ -> Trace nodes | `InstrumentedGraph` | observability-first optimization | +| `sysmon` | original runtime plus `sys.monitoring` profile | sysmon profile -> TGJ -> Trace nodes | `SysMonInstrumentedGraph` | low-level execution profiling and optimization | -### Component Responsibilities +### Why sysmon should appear in the doc -| Component | Module | Purpose | -|-----------|--------|---------| -| `InstrumentedGraph` | `instrumentation.py` | Wrapper returned by `instrument_graph()`; holds graph, session, tracing_llm, templates, bindings | -| `TelemetrySession` | `telemetry_session.py` | Manages `TracerProvider` + `InMemorySpanExporter`; provides `flush_otlp()`, `flush_tgj()`, `export_run_bundle()` | -| `TracingLLM` | `langgraph_otel_runtime.py` | Wraps any OpenAI-compatible LLM; emits parent spans (`param.*`) and child spans (`gen_ai.*`) | -| `Binding` | `bindings.py` | Dataclass with `get()`/`set()` callables mapping optimizer keys to live variables | -| `optimize_graph()` | `optimization.py` | Orchestrates the optimization loop: invoke, flush OTLP, convert to TGJ, run optimizer, apply updates | -| `otel_adapter` | `otel_adapter.py` | Converts OTLP JSON to Trace-Graph JSON (TGJ) with temporal hierarchy | -| `tgj_ingest` | `tgj_ingest.py` | Ingests TGJ documents into `ParameterNode` / `MessageNode` objects | -| `otel_semconv` | `otel_semconv.py` | Helpers: `emit_reward()`, `emit_trace()`, `record_genai_chat()` | -| `graph_instrumentation` | `graph_instrumentation.py` | Trace-native graph instrumentation (`TraceGraph`) | +`sysmon` is no longer only a notebook curiosity. In the current code it exists in two places: +1. as a **primary backend** via `backend="sysmon"` +2. as a **passive observer** via `observe_with=("sysmon",)` on `trace` or `otel` -### Supported Graph Kinds +So it must be present in: +- the backend table +- the architecture schema +- the end-to-end flow discussion +- the compare-observers notebook section -| Kind | Support | Notes | -|------|---------|--------| -| Sync graphs | Yes | `invoke()` on compiled `StateGraph`; node wrappers run synchronously | -| Async graphs | Planned | `ainvoke()` / `astream()`; same wrapper model, async span handling | -| Streaming | Planned | `stream()` / `astream()`; spans emitted per node completion | -| Tools | Yes | Tool calls inside nodes traced via the same LLM wrapper | -| Loops | Yes | Cyclic graphs and conditional edges; each node execution gets a span | +It does **not** need to dominate the document. It is best documented as a third execution/observation carrier next to trace and OTEL. -Instrumentation uses **node-level wrappers** (not LangChain/LangGraph callbacks). This provides full control over span boundaries and parent-child relationships, guarantees `param.*` and `gen_ai.*` attributes for TGJ and Agent Lightning, and works identically for custom and default graphs. +## Observer combinations ---- +Passive observers are optional and sit next to the primary backend. They are not the primary optimization carrier unless the primary backend itself is `sysmon` or `otel`. -## 3. Public API Reference +| Primary backend | Allowed `observe_with` | Result | +|---|---|---| +| `trace` | `()`, `("otel",)`, `("sysmon",)`, `("otel", "sysmon")` | primary optimization still uses Trace output nodes; observer artifacts are extra | +| `otel` | `()`, `("sysmon",)` | primary optimization still uses OTEL -> TGJ -> Trace | +| `sysmon` | not supported | sysmon is already the primary backend | -### `instrument_graph()` +### Practical meaning -Wraps a LangGraph with automatic OTEL instrumentation. +- `trace + observer` is mainly for **comparison** and **debugging** +- `otel + sysmon observer` is useful when you want the OTEL optimization path plus a second profiling view +- the current compare-observers demo exercises exactly these combinations -```python -from opto.trace.io import instrument_graph +## Adapter model -ig = instrument_graph( - graph=my_state_graph, # StateGraph or CompiledGraph (auto-compiled) - service_name="my-agent", # OTEL service name - trainable_keys={"planner"}, # None = all trainable - llm=my_llm_client, # Any OpenAI-compatible client - initial_templates={ # Starting prompt templates - "planner_prompt": "Plan for: {query}", - }, - emit_genai_child_spans=True, # Agent Lightning gen_ai.* child spans - bindings=None, # Auto-derived from templates if None - in_place=False, # Don't permanently mutate original graph - provider_name="openai", # For gen_ai.provider.name attribute -) -> InstrumentedGraph -``` +### GraphAdapter -**Returns** an `InstrumentedGraph` with `.invoke()`, `.session`, `.tracing_llm`, `.templates`, and `.bindings`. +`GraphAdapter` is the runtime-agnostic abstraction for graph-like systems. -### `optimize_graph()` +Responsibilities: +- expose parameters +- expose bindings +- build a backend-specific runtime graph +- provide `invoke_runtime(...)` +- provide `invoke_trace(...)` +- provide `as_module()` so the graph can participate in the existing trainer/search stack -Runs the optimization loop on an instrumented graph. +### LangGraphAdapter -```python -from opto.trace.io import optimize_graph, EvalResult +`LangGraphAdapter` is the LangGraph-specific adapter. -result = optimize_graph( - graph=ig, # InstrumentedGraph from instrument_graph() - queries=["q1", "q2"], # List of queries or state dicts - iterations=5, # Optimization iterations (after baseline) - optimizer=None, # Auto-creates OptoPrime if None - eval_fn=my_eval_fn, # float | str | dict | EvalResult -> normalized - apply_updates_flag=True, # Apply optimizer suggestions via bindings - on_iteration=my_callback, # (iter, runs, updates) progress callback -) -> OptimizationResult -``` +Responsibilities: +- normalize function targets, prompt targets, and graph knobs +- wrap selected functions as `FunModule`s +- auto-build prompt/code/graph bindings +- cache compiled runtime graphs by backend and knob values +- execute the graph while preserving native runtime outputs +- populate a sidecar with optimization-facing state -### `EvalResult` +### GraphModule -```python -@dataclass -class EvalResult: - score: float | None = None # Numeric reward - feedback: str = "" # Textual feedback (Trace/TextGrad-compatible) - metrics: dict = {} # Free-form metrics -``` +`GraphModule` is the Trace `Module` view over an adapter. -The `EvalFn` type accepts any of these return types and auto-normalizes: +This is what makes the graph stack compatible with: +- `train(...)` +- `PrioritySearch` +- `ModuleCandidate` -| Return type | Conversion | -|-------------|------------| -| `float` / `int` | `EvalResult(score=value)` | -| `str` | Tries JSON parse, falls back to `EvalResult(feedback=value)` | -| `dict` | `EvalResult(score=d["score"], feedback=d["feedback"])` | -| `EvalResult` | Passed through | +The important point is that graph optimization did **not** introduce a separate trainer abstraction. It reuses the existing Trace module ecosystem. -### `OptimizationResult` +### TraceGraph -```python -@dataclass -class OptimizationResult: - baseline_score: float # Average score of the baseline run - best_score: float # Best average score across iterations - best_iteration: int # Which iteration achieved best_score - best_updates: dict # The parameter updates that achieved best - final_parameters: dict # Current values of all bound parameters - score_history: list[float] # Average score per iteration [baseline, iter1, ...] - all_runs: list[list[RunResult]] # Nested: all_runs[iteration][query_idx] -``` +`TraceGraph` is the trace-facing wrapper returned by `instrument_graph(..., backend="trace")`. -### `Binding` and `apply_updates()` +Current responsibilities: +- store parameters and bindings +- delegate runtime execution either to a compiled graph or to an adapter +- capture the latest sidecar +- optionally start/stop passive observers +- preserve `input_key`, `output_key`, `service_name`, and semantic metadata -Bindings decouple the optimizer's string-keyed updates from the runtime location of the actual variable. This makes optimization generic -- no hard-coded node names. +### Sidecars -```python -from opto.trace.io import Binding, apply_updates, make_dict_binding +A sidecar stores optimization-facing state without changing the original runtime return type. -# Binding wraps any get/set pair -binding = Binding( - get=lambda: my_config["prompt"], - set=lambda v: my_config.__setitem__("prompt", v), - kind="prompt", # "prompt" | "code" | "graph" -) +Current sidecar roles: -# Convenience: bind to a dict entry -binding = make_dict_binding(my_dict, "key_name", kind="prompt") +| Sidecar | Purpose | +|---|---| +| `GraphRunSidecar` | shadow state, traced node outputs, final output node, runtime result | +| `OTELRunSidecar` | OTEL payload placeholders and associated metadata | +| `GraphCandidateSnapshot` | debugging and introspection for graph candidates | -# Apply optimizer output -apply_updates( - {"prompt_key": "new value"}, - {"prompt_key": binding}, - strict=True, # raise KeyError on unknown keys -) -``` +The sidecar pattern is especially important for LangGraph because LangGraph nodes expect dict-like Python state, while Trace optimizers expect `Node` objects. + +## Multiple traces and observers + +“Multiple traces” means several different but related objects can coexist for the same run. + +| Kind | Meaning | Current location | +|---|---|---| +| runtime execution | the actual graph or function execution | LangGraph runtime / Python runtime | +| trace-native optimization graph | the Trace node graph used for backward/step | `TraceGraph` and sidecar output nodes | +| converted OTEL trace | external span graph converted to TGJ then Trace nodes | `otlp_traces_to_trace_json(...)` | +| converted sysmon profile | Python execution profile converted to TGJ then Trace nodes | `sysmon_profile_to_tgj(...)` | +| passive observer artifacts | extra captured views of the same run | `_last_observer_artifacts` on trace/otel objects | + +A key current invariant is: -**Binding kinds:** +> the runtime carrier and the optimization carrier do not have to be the same object. -| Kind | Description | Example | -|------|-------------|---------| -| `"prompt"` | Text template / system prompt | `"Plan for: {query}"` | -| `"code"` | Function source code (via `param.__code_*`) | `"def route(state): ..."` | -| `"graph"` | Graph routing knob | `"param.route_threshold"` | +That is why: +- `trace` can keep returning plain dicts while optimizing through sidecar output nodes +- `otel` can optimize through ingested TGJ nodes instead of through the live runtime return value +- `sysmon` can optimize through converted execution profiles -**How bindings are created:** +## Public API cheat sheet -1. **Auto-derived** (default): When `bindings=None` and `initial_templates` is provided, `instrument_graph()` creates one `Binding` per template key, backed by the `templates` dict. -2. **Explicit**: Pass `bindings={"key": Binding(get=..., set=...)}` for custom targets (e.g., class attributes, database rows, config files). +### `instrument_graph(...)` -### Span Helpers +Current high-level modes: ```python -from opto.trace.io import emit_reward, emit_trace +from opto.trace.io import instrument_graph -# Emit a reward span (Agent Lightning compatible) -emit_reward(session, value=0.85, name="eval_score") +# Trace-native graph optimization +trace_graph = instrument_graph( + adapter=my_adapter, + backend="trace", + output_key="final_answer", +) -# Emit a custom debug span -emit_trace(session, name="my_debug_span", attrs={"key": "value"}) +# OTEL-backed optimization +otel_graph = instrument_graph( + graph=my_graph, + backend="otel", + llm=my_llm, + bindings=my_bindings, + output_key="final_answer", +) + +# sys.monitoring-backed optimization +sysmon_graph = instrument_graph( + graph=my_graph, + backend="sysmon", + bindings=my_bindings, + output_key="final_answer", +) ``` ---- +### Passive observers -## 4. Data Flow Pipeline +```python +# Trace primary backend with additional OTEL and sysmon observer artifacts +trace_graph = instrument_graph( + adapter=my_adapter, + backend="trace", + observe_with=("otel", "sysmon"), + output_key="final_answer", +) +``` -The end-to-end pipeline executed by `optimize_graph()` per iteration: +### `optimize_graph(...)` -``` - +---------+ +----------+ +-----------+ +-----------+ - | invoke()|---->| flush |---->| OTLP->TGJ |---->| ingest | - | LangGraph| | _otlp() | | adapter | | _tgj() | - +---------+ +----------+ +-----------+ +-----+-----+ - | - v - +---------+ +----------+ +-----------+ +-----------+ - | apply |<----| optimizer|<----| backward() |<----| Parameter | - |_updates()| | .step() | | feedback | | Node + | - +----+----+ +----------+ +-----------+ | Message | - | | Node | - v +-----------+ - +---------+ - |templates| <- updated via Binding.set() - | dict | -> next invoke() uses new prompts - +---------+ +```python +result = optimize_graph( + instrumented_graph, + queries=["What is CRISPR?"], + iterations=5, + eval_fn=my_eval_fn, + output_key="final_answer", +) ``` -### Step-by-step +The primary optimization carrier depends on `instrumented_graph.backend`. -1. **`invoke()`** -- Execute the LangGraph. Each node calls `TracingLLM.node_call()` which creates OTEL spans with `param.*` attributes. -2. **`flush_otlp()`** -- Extract all collected spans from the `InMemorySpanExporter` as an OTLP JSON payload and clear the exporter. -3. **`eval_fn()`** -- Evaluate the graph output. The `EvalFn` signature accepts `float | str | dict | EvalResult` and auto-normalizes. -4. **OTLP to TGJ** -- `otlp_traces_to_trace_json()` converts OTLP spans into Trace-Graph JSON format with temporal hierarchy. -5. **`ingest_tgj()`** -- Parse TGJ into `ParameterNode` (trainable prompts) and `MessageNode` (span outputs) objects. -6. **`backward()`** -- Propagate evaluation feedback through the trace graph to trainable parameters. -7. **`optimizer.step()`** -- The optimizer (e.g., `OptoPrime`) suggests parameter updates based on the feedback. -8. **`apply_updates()`** -- Push the optimizer's output through `Binding.set()` to update live template values. -9. **Next iteration** -- The updated templates are automatically used by `TracingLLM.node_call()` on the next `invoke()`. +## Optimization carriers and update path ---- +### Update path by backend -## 5. Semantic Conventions +| Backend | What `optimize_graph(...)` reads | What the optimizer sees | How updates are applied | +|---|---|---|---| +| `trace` | sidecar `output_node` or Trace node result | native Trace nodes | direct parameter mutation or string-keyed `apply_updates(...)` through bindings | +| `otel` | OTLP payload flushed from `TelemetrySession` | ingested TGJ -> Trace nodes | `apply_updates(...)` through bindings | +| `sysmon` | sysmon profile document | converted TGJ -> Trace nodes | `apply_updates(...)` through bindings | -`TracingLLM` implements **dual semantic conventions** -- a single LLM call emits two spans: +### Why `Binding` is still central -``` -+--------------------------------------------------+ -| Parent span: "planner" | -| | -| param.planner_prompt = "Plan for: {query}" | <- Trace/TGJ optimization -| param.planner_prompt.trainable = true | -| inputs.gen_ai.prompt = "Plan for: cats" | -| gen_ai.model = "llama-3.1-8b" | -| | -| +--------------------------------------------+ | -| | Child span: "openai.chat.completion" | | -| | | | -| | gen_ai.operation.name = "chat" | | <- Agent Lightning observability -| | gen_ai.provider.name = "openai" | | -| | gen_ai.request.model = "llama-3.1-8b" | | -| | gen_ai.output.preview = "Step 1: ..." | | -| | trace.temporal_ignore = "true" | | <- prevents TGJ chain break -| +--------------------------------------------+ | -+--------------------------------------------------+ -``` +`Binding` remains the stable mutation surface for: +- prompt text +- code parameters +- graph knobs -### Attribute Reference +That is what keeps update application generic across backends. -| Attribute | Purpose | Span Level | Consumed By | -|-----------|---------|------------|-------------| -| `param.*` | Trainable parameter values | Parent | Optimizer (via TGJ `ParameterNode`) | -| `param.*.trainable` | Whether the parameter is optimizable | Parent | TGJ adapter | -| `inputs.*` | Input signals to the node | Parent | TGJ `MessageNode` edges | -| `gen_ai.operation.name` | LLM operation type (e.g., `"chat"`) | Child | Agent Lightning dashboards | -| `gen_ai.provider.name` | LLM provider (e.g., `"openai"`, `"openrouter"`) | Child | Agent Lightning dashboards | -| `gen_ai.request.model` | Model identifier | Child | Agent Lightning dashboards | -| `gen_ai.input.messages` | JSON array of input messages | Child | Agent Lightning dashboards | -| `gen_ai.output.messages` | JSON array of response messages | Child | Agent Lightning dashboards | -| `trace.temporal_ignore` | Exclude from TGJ temporal chain (`"true"`) | Child | `otel_adapter.py` | -| `agentlightning.reward.0.name` | Evaluation reward name | Reward span | Agent Lightning | -| `agentlightning.reward.0.value` | Stringified numeric reward (e.g., `"0.933"`) | Reward span | Agent Lightning | +Binding kinds currently used in the graph stack: -### OTEL Span Types +| Kind | Meaning | +|---|---| +| `prompt` | prompt or template text | +| `code` | code parameter associated with a bundled function | +| `graph` | workflow policy, routing knob, edge policy, or similar graph-level parameter | -**Node spans** (one per node execution): -- `param.{template_name}` -- prompt template text (if node has a trainable template) -- `param.{template_name}.trainable` -- `"True"` or `"False"` -- `inputs.gen_ai.prompt` -- user-facing input snippet -- `gen_ai.model` -- model identifier +## OTEL semantic conventions and temporal chaining -**LLM spans** (child of node span): -- `gen_ai.operation.name`, `gen_ai.provider.name`, `gen_ai.request.model` -- `gen_ai.input.messages`, `gen_ai.output.messages` -- `trace.temporal_ignore` = `"true"` +The current doc should keep the old OTEL details because they are still relevant for the OTEL path. -**Evaluation / reward spans** (Agent Lightning compatibility): -- Span name: `agentlightning.annotation` -- `trace.temporal_ignore` = `"true"` -- `agentlightning.reward.0.name`, `agentlightning.reward.0.value` +### Dual semantic conventions -### `message.id` +The OTEL runtime emits: +- Trace-relevant `param.*` attributes for optimization +- `gen_ai.*` attributes for broader OTEL/Agent-Lightning-style observability -Each span is assigned a unique `message.id` (span ID) used by the TGJ adapter to reconstruct parent-child and temporal edges in the trace graph. The `traceId` groups all spans from a single `invoke()` call. +### Temporal chaining ---- +The OTEL conversion path still relies on temporal structure when building TGJ from spans. The important rule is unchanged: -## 6. Temporal Chaining +- child spans should not incorrectly advance the top-level optimization chain +- `trace.temporal_ignore` remains the mechanism used to keep child spans from breaking the sequential graph view -When `use_temporal_hierarchy=True`, the OTLP-to-TGJ adapter creates parent-child edges between sequential top-level spans. This enables the optimizer to propagate feedback **backward** through the full execution chain. +### Why these old sections are still worth keeping -### The Critical Invariant +Even after adding adapters and sysmon, the OTEL path still depends on: +- span semantics +- OTLP -> TGJ conversion +- temporal hierarchy reconstruction -Child spans (those with a `parentSpanId` in OTEL) must **not** advance the temporal chain. Without this rule, a child LLM span from node A could become the temporal parent of node B, breaking sequential optimization. +So the previous OTEL semantic and temporal sections should be retained, but updated to reference `otel_runtime.py` and the current `opto.features.graph.graph_instrumentation` location. -``` - OTEL spans (time order) TGJ temporal chain - ----------------------- ------------------ - planner (root) --------> planner - +- openai.chat (child) (skipped -- has parentSpanId) - synthesizer (root) --------> synthesizer (parent = planner) - +- openai.chat (child) (skipped) -``` +## Notebook and demo coverage -The adapter achieves this with a simple check: +### Core notebooks -```python -# Only advance the temporal chain on spans that were NOT children in OTEL -if not orig_has_parent: - prev_span_id = sid -``` +| Notebook | Purpose | +|---|---| +| `demo_langgraph_instrument_and_optimize.ipynb` | OTEL-backed graph instrumentation and optimization | +| `demo_langgraph_instrument_and_optimize_trace.ipynb` | trace-native graph instrumentation and optimization | +| `demo_langgraph_instrument_and_compare_observers.ipynb` | compare trace / OTEL / sysmon carriers and observer combinations | -Child spans carry `trace.temporal_ignore = "true"` as an additional signal for downstream consumers. +### Compare-observers demo -### Without vs. With temporal_ignore +The compare-observers demo is the main place where the three carriers are made comparable. -``` -Without temporal_ignore: - planner -> openrouter.chat.completion -> researcher (WRONG) +It builds views for: +- trace-native subgraphs +- OTEL spans converted through `otlp_traces_to_trace_json(...)` +- sys.monitoring profiles converted through `sysmon_profile_to_tgj(...)` -With temporal_ignore: - planner -> researcher (CORRECT -- child span excluded from chain) -``` +This is the right place in the documentation to mention: +- passive observers +- observer artifacts +- cross-carrier comparison +- why sysmon exists in the stack without making it sound like the primary design center ---- +## Open questions + +The current structure is robust enough for the current PR, but a few topics are still open: -## 7. Core Modules - -### `opto/trace/io/` - -| File | Lines | Purpose | -|------|-------|---------| -| `__init__.py` | 82 | Public API surface -- exports all symbols | -| `instrumentation.py` | 138 | `instrument_graph()` + `InstrumentedGraph` dataclass | -| `optimization.py` | 412 | `optimize_graph()` loop + `EvalResult`, `EvalFn`, `RunResult`, `OptimizationResult` | -| `telemetry_session.py` | 188 | `TelemetrySession` -- unified OTEL session manager | -| `bindings.py` | 105 | `Binding` dataclass + `apply_updates()` + `make_dict_binding()` | -| `otel_semconv.py` | 126 | `emit_reward()`, `emit_trace()`, `record_genai_chat()`, `set_span_attributes()` | -| `langgraph_otel_runtime.py` | 367 | `TracingLLM` (dual semconv), `InMemorySpanExporter`, `flush_otlp()` | -| `otel_adapter.py` | 168 | `otlp_traces_to_trace_json()` -- OTLP to TGJ with temporal hierarchy | -| `tgj_ingest.py` | 234 | `ingest_tgj()`, `merge_tgj()` -- TGJ to `ParameterNode`/`MessageNode` | -| `tgj_export.py` | -- | Export Trace subgraphs back to TGJ (pre-existing) | -| `eval_hooks.py` | -- | Evaluation hook utilities (pre-existing) | - -### Tests - -| File | Tests | Scope | -|------|-------|-------| -| `tests/unit_tests/test_bindings.py` | 10 | `Binding`, `apply_updates()`, `make_dict_binding()` | -| `tests/unit_tests/test_otel_semconv.py` | 5 | `emit_reward()`, `emit_trace()`, `record_genai_chat()` | -| `tests/unit_tests/test_telemetry_session.py` | 6 | `TelemetrySession` flush, clear, filter, export | -| `tests/unit_tests/test_instrumentation.py` | 10 | `instrument_graph()`, `TracingLLM` child spans, temporal chaining | -| `tests/unit_tests/test_optimization.py` | 11 | `EvalResult`, `_normalise_eval()`, data classes | -| `tests/features_tests/test_e2e_m1_pipeline.py` | 21 | Full E2E: instrument, invoke, OTLP, TGJ, optimizer, apply_updates | +1. Should observer concepts become more generic beyond graph optimization, or stay graph-local for now? +2. Should `sysmon` remain a peer primary backend, or mostly be documented as a profiling backend plus observer? +3. Should some OTEL-specific explanatory material be split into a dedicated OTEL section to keep this document shorter? +4. If a non-LangGraph runtime is added next, should it implement only `GraphAdapter`, or also a richer observer-aware adapter helper? diff --git a/opto/features/mlflow/__init__.py b/opto/features/mlflow/__init__.py new file mode 100644 index 00000000..12dec7ff --- /dev/null +++ b/opto/features/mlflow/__init__.py @@ -0,0 +1,18 @@ +""" +opto.features.mlflow +=================== + +Optional MLflow integration for Trace. + +Importing this package should be safe even when MLflow is not installed. +Use ``opto.features.mlflow.autolog`` to enable tracing/metrics capture. +""" + +from .autolog import autolog, disable_autolog, get_autolog_config, is_autolog_enabled + +__all__ = [ + "autolog", + "disable_autolog", + "get_autolog_config", + "is_autolog_enabled", +] diff --git a/opto/features/mlflow/autolog.py b/opto/features/mlflow/autolog.py new file mode 100644 index 00000000..1ba99f5d --- /dev/null +++ b/opto/features/mlflow/autolog.py @@ -0,0 +1,82 @@ +""" +opto.features.mlflow.autolog +=========================== + +Best-effort MLflow autologging integration. + +Design goals +------------ +- Keep MLflow as an *optional* dependency. +- Defaults should be "off" so existing code paths are unchanged. +- When enabled, ``@trace.bundle`` operations may be wrapped by ``mlflow.trace`` + (see ``opto.trace.bundle.bundle``), and LiteLLM calls can be autologged + when supported by the installed MLflow version. +""" + +from __future__ import annotations + +import logging +from typing import Any, Dict, Optional + +from opto.trace import settings + +logger = logging.getLogger(__name__) + +mlflow_autologging = False +mlflow_config = {} + +def autolog( + *, + log_models: bool = True, + disable_default_op_logging: bool = True, + extra_tags: Optional[Dict[str, Any]] = None, + silent: bool = False, +) -> None: + """Enable MLflow autologging for Trace. + + Parameters + ---------- + log_models + If True, enable tracing spans (via ``mlflow.trace`` wrapping). + disable_default_op_logging + If True, suppress spans for low-level "default ops" (heuristically detected). + extra_tags + Optional tag dict to be attached by downstream MLflow tooling. + silent + If True, suppress warnings when MLflow isn't installed. + """ + global mlflow_autologging, mlflow_config + mlflow_autologging = True + mlflow_config = { + "log_models": log_models, + "disable_default_op_logging": disable_default_op_logging, + "extra_tags": extra_tags or {}, + } + + try: + import mlflow # type: ignore + except Exception: + settings.mlflow_autologging = False + if not silent: + logger.warning("MLflow is not installed; MLflow autologging disabled.") + return + + try: + if hasattr(mlflow, "litellm") and hasattr(mlflow.litellm, "autolog"): + mlflow.litellm.autolog() + except Exception: + pass + + +def disable_autolog() -> None: + """Disable MLflow autologging.""" + settings.mlflow_autologging = False + settings.mlflow_config = {} + + +def is_autolog_enabled() -> bool: + return bool(settings.mlflow_autologging) + + +def get_autolog_config() -> Dict[str, Any]: + return dict(settings.mlflow_config or {}) diff --git a/opto/trace/io/__init__.py b/opto/trace/io/__init__.py index 44e6df32..1e0983f6 100644 --- a/opto/trace/io/__init__.py +++ b/opto/trace/io/__init__.py @@ -41,7 +41,7 @@ ) # -- lower-level ----------------------------------------------------------- -from opto.trace.io.langgraph_otel_runtime import ( +from opto.trace.io.otel_runtime import ( TracingLLM, LLMCallError, InMemorySpanExporter, diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py index bd475989..422b34c0 100644 --- a/opto/trace/io/instrumentation.py +++ b/opto/trace/io/instrumentation.py @@ -17,7 +17,7 @@ from opto.trace.io.bindings import Binding, make_dict_binding from opto.features.graph.graph_instrumentation import instrument_trace_graph -from opto.trace.io.langgraph_otel_runtime import TracingLLM +from opto.trace.io.otel_runtime import TracingLLM from opto.trace.io.observers import GraphObserver, OTelObserver from opto.trace.io.sysmonitoring import SysMonObserver, SysMonitoringSession from opto.trace.io.telemetry_session import TelemetrySession diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index 134e9821..0788d52b 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -147,7 +147,7 @@ class OptimizationResult: def _default_eval_fn(payload: Dict[str, Any]) -> EvalResult: """Extract evaluation from the OTLP trace's evaluator span, if present.""" - from opto.trace.io.langgraph_otel_runtime import extract_eval_metrics_from_otlp + from opto.trace.io.otel_runtime import extract_eval_metrics_from_otlp otlp = payload.get("otlp", {}) score, metrics, reasons = extract_eval_metrics_from_otlp(otlp) diff --git a/opto/trace/io/langgraph_otel_runtime.py b/opto/trace/io/otel_runtime.py similarity index 99% rename from opto/trace/io/langgraph_otel_runtime.py rename to opto/trace/io/otel_runtime.py index 01010ed7..8ab4be5d 100644 --- a/opto/trace/io/langgraph_otel_runtime.py +++ b/opto/trace/io/otel_runtime.py @@ -24,7 +24,7 @@ def __init__(self, message: str, *, status_code: Optional[int] = None): class InMemorySpanExporter(SpanExporter): - """In-memory span exporter used by LangGraph + OTEL demos.""" + """In-memory OTEL span exporter used by tests and demos.""" def __init__(self) -> None: self._finished_spans: List[ReadableSpan] = [] diff --git a/opto/trace/io/telemetry_session.py b/opto/trace/io/telemetry_session.py index 06ef1567..8c24a339 100644 --- a/opto/trace/io/telemetry_session.py +++ b/opto/trace/io/telemetry_session.py @@ -31,7 +31,7 @@ from opentelemetry import trace as oteltrace from opentelemetry.sdk.trace import TracerProvider -from opto.trace.io.langgraph_otel_runtime import ( +from opto.trace.io.otel_runtime import ( InMemorySpanExporter, flush_otlp as _flush_otlp_raw, ) diff --git a/tests/unit_tests/test_langgraph_otel_runtime.py b/tests/unit_tests/test_otel_runtime.py similarity index 99% rename from tests/unit_tests/test_langgraph_otel_runtime.py rename to tests/unit_tests/test_otel_runtime.py index 94b9c34e..9d225dd0 100644 --- a/tests/unit_tests/test_langgraph_otel_runtime.py +++ b/tests/unit_tests/test_otel_runtime.py @@ -1,6 +1,6 @@ import pytest -from opto.trace.io.langgraph_otel_runtime import ( +from opto.trace.io.otel_runtime import ( init_otel_runtime, TracingLLM, flush_otlp, From acbb139ca1cd64c59ab9b6f5bfbe0b36cd96db53 Mon Sep 17 00:00:00 2001 From: doxav Date: Tue, 21 Apr 2026 09:42:28 +0200 Subject: [PATCH 08/16] minimal inline comments --- opto/features/graph/__init__.py | 2 ++ opto/features/graph/adapter.py | 30 ++++++++++++++++++++ opto/features/graph/graph_instrumentation.py | 5 ++++ opto/features/graph/module.py | 7 +++++ opto/features/graph/sidecars.py | 5 ++++ opto/trace/io/bindings.py | 1 + opto/trace/io/instrumentation.py | 6 ++++ opto/trace/io/observers.py | 11 +++++++ opto/trace/io/optimization.py | 8 ++++++ opto/trace/io/otel_adapter.py | 8 +++++- opto/trace/io/otel_runtime.py | 13 +++++++++ opto/trace/io/sysmonitoring.py | 17 +++++++++++ opto/trace/io/telemetry_session.py | 12 ++++++++ opto/trace/io/tgj_export.py | 8 ++++-- opto/trace/io/tgj_ingest.py | 12 ++++++++ 15 files changed, 142 insertions(+), 3 deletions(-) diff --git a/opto/features/graph/__init__.py b/opto/features/graph/__init__.py index da59b890..b3b73fe3 100644 --- a/opto/features/graph/__init__.py +++ b/opto/features/graph/__init__.py @@ -1,3 +1,5 @@ +"""Public graph adapter helpers used by the instrumentation layer.""" + from opto.features.graph.sidecars import GraphRunSidecar, OTELRunSidecar, GraphCandidateSnapshot from opto.features.graph.adapter import GraphAdapter, LangGraphAdapter from opto.features.graph.module import GraphModule diff --git a/opto/features/graph/adapter.py b/opto/features/graph/adapter.py index b1b97aab..458985c9 100644 --- a/opto/features/graph/adapter.py +++ b/opto/features/graph/adapter.py @@ -1,3 +1,5 @@ +"""Adapters that build graph objects for trace-native and OTEL execution.""" + from __future__ import annotations import contextlib @@ -15,6 +17,7 @@ def _raw(value: Any) -> Any: + """Return the underlying Python value for Trace nodes and wrappers.""" return getattr(value, "data", value) @@ -22,6 +25,7 @@ def _normalize_named_callables( targets: Union[None, List[str], List[Callable[..., Any]], Mapping[str, Callable[..., Any]]], scope: Optional[Dict[str, Any]] = None, ) -> Dict[str, Callable[..., Any]]: + """Normalize function targets into a ``{name: callable}`` mapping.""" if targets is None: return {} if isinstance(targets, Mapping): @@ -38,6 +42,7 @@ def _normalize_named_callables( def _as_parameter(name: str, value: Any) -> ParameterNode: + """Coerce a raw value or node into a trainable ``ParameterNode``.""" if isinstance(value, ParameterNode): return value if isinstance(value, Node): @@ -47,6 +52,8 @@ def _as_parameter(name: str, value: Any) -> ParameterNode: @dataclass class GraphAdapter: + """Abstract adapter that exposes a graph factory as an instrumentable object.""" + graph_factory: Callable[..., Any] backend: str = "trace" bindings: Dict[str, Binding] = field(default_factory=dict) @@ -55,27 +62,35 @@ class GraphAdapter: output_key: Optional[str] = None def build_graph(self, backend: Optional[str] = None): + """Build and compile the graph for the requested backend.""" raise NotImplementedError def invoke_runtime(self, state: Dict[str, Any], **kwargs: Any): + """Execute the runtime-facing graph and return ``(result, sidecar)``.""" raise NotImplementedError def invoke_trace(self, state: Dict[str, Any], **kwargs: Any): + """Execute the trace-native graph and return ``(result, sidecar)``.""" raise NotImplementedError def new_run_sidecar(self): + """Create the per-invocation sidecar used to preserve traced state.""" return GraphRunSidecar() def bindings_dict(self) -> Dict[str, Binding]: + """Return a shallow copy of the adapter bindings.""" return dict(self.bindings) def parameters(self) -> List[ParameterNode]: + """Return the trainable parameters surfaced by this adapter.""" raise NotImplementedError def as_module(self) -> GraphModule: + """Expose the adapter through the ``trace.modules.Module`` interface.""" return GraphModule(self) def instrument(self, backend: Optional[str] = None, **kwargs: Any): + """Wrap the adapter with the instrumentation backend requested by the caller.""" effective_backend = backend or self.backend service_name = kwargs.pop("service_name", self.service_name) input_key = kwargs.pop("input_key", self.input_key) @@ -109,6 +124,8 @@ def instrument(self, backend: Optional[str] = None, **kwargs: Any): @dataclass class LangGraphAdapter(GraphAdapter): + """Concrete adapter for LangGraph-style factories and scoped callables.""" + function_targets: Union[None, List[str], List[Callable[..., Any]], Mapping[str, Callable[..., Any]]] = None prompt_targets: Optional[Mapping[str, Any]] = None graph_knobs: Optional[Mapping[str, Any]] = None @@ -116,6 +133,7 @@ class LangGraphAdapter(GraphAdapter): train_graph_agents_functions: bool = True def __post_init__(self) -> None: + """Normalize targets, create traced wrappers, and auto-build bindings.""" self.function_targets = _normalize_named_callables(self.function_targets, self.scope) self.prompt_targets = {k: _as_parameter(k, v) for k, v in dict(self.prompt_targets or {}).items()} self.graph_knobs = {k: _as_parameter(k, v) for k, v in dict(self.graph_knobs or {}).items()} @@ -139,12 +157,14 @@ def __post_init__(self) -> None: self._build_bindings() def __getstate__(self): + """Drop transient runtime state so the adapter remains pickle-friendly.""" state = self.__dict__.copy() state["_active_sidecar"] = None state["_compiled_cache"] = {} return state def _build_bindings(self) -> None: + """Derive bindings for prompts, graph knobs, and traced code parameters.""" auto: Dict[str, Binding] = {} for name, prompt in self.prompt_targets.items(): auto[name] = Binding( @@ -171,6 +191,7 @@ def _build_bindings(self) -> None: self.bindings = auto def parameters(self) -> List[ParameterNode]: + """Collect the unique trainable parameters owned by the adapter.""" params: List[ParameterNode] = [] params.extend(self.prompt_targets.values()) params.extend(self.graph_knobs.values()) @@ -191,13 +212,16 @@ def parameters(self) -> List[ParameterNode]: return out def _knob_values(self) -> Dict[str, Any]: + """Read graph knob values as raw Python objects.""" return {k: _raw(v) for k, v in self.graph_knobs.items()} def _cache_key(self, backend: str): + """Build the compiled-graph cache key for a backend/knob combination.""" return backend, tuple(sorted((k, repr(v)) for k, v in self._knob_values().items())) @contextlib.contextmanager def _scope_override(self, overrides: Dict[str, Any]): + """Temporarily patch adapter scope entries while constructing the graph.""" if not self.scope: yield return @@ -211,6 +235,7 @@ def _scope_override(self, overrides: Dict[str, Any]): self.scope[key] = backup[key] def _merge_shadow(self, sidecar: GraphRunSidecar, runtime_out: Any, traced_out: Any) -> None: + """Merge traced outputs back into the sidecar's shadow state.""" if not isinstance(runtime_out, dict): return if isinstance(traced_out, Node) and isinstance(getattr(traced_out, "data", None), dict): @@ -224,7 +249,9 @@ def _merge_shadow(self, sidecar: GraphRunSidecar, runtime_out: Any, traced_out: sidecar.shadow_state[key] = value if isinstance(value, Node) else node(value, name=key) def _trace_runtime_wrapper(self, name: str, traced_fn: FunModule): + """Wrap a traced function so runtime execution still updates Trace state.""" def _wrapped(state: Dict[str, Any], *args: Any, **kwargs: Any): + """Replay shadow inputs through the traced callable for one graph node.""" if self._active_sidecar is None: raise RuntimeError("Trace runtime wrapper called without active sidecar") sidecar = self._active_sidecar @@ -241,6 +268,7 @@ def _wrapped(state: Dict[str, Any], *args: Any, **kwargs: Any): return _wrapped def build_graph(self, backend: Optional[str] = None): + """Build, compile, and cache the graph for ``trace`` or ``otel`` execution.""" effective_backend = backend or self.backend key = self._cache_key(effective_backend) if key in self._compiled_cache: @@ -270,6 +298,7 @@ def build_graph(self, backend: Optional[str] = None): return compiled def invoke_runtime(self, state: Dict[str, Any], backend: Optional[str] = None, **kwargs: Any): + """Run the adapter using the runtime backend selected for this call.""" effective_backend = backend or self.backend if effective_backend == "otel": graph = self.build_graph(backend="otel") @@ -281,6 +310,7 @@ def invoke_runtime(self, state: Dict[str, Any], backend: Optional[str] = None, * return self.invoke_trace(state, **kwargs) def invoke_trace(self, state: Dict[str, Any], **kwargs: Any): + """Execute the graph with traced wrappers and capture the output node.""" sidecar = self.new_run_sidecar() for key, value in state.items(): sidecar.shadow_state[key] = value if isinstance(value, Node) else node(value, name=key) diff --git a/opto/features/graph/graph_instrumentation.py b/opto/features/graph/graph_instrumentation.py index e38f1b03..633acee2 100644 --- a/opto/features/graph/graph_instrumentation.py +++ b/opto/features/graph/graph_instrumentation.py @@ -34,6 +34,7 @@ class TraceGraph: observer_meta: Dict[str, Any] = field(default_factory=dict) def invoke(self, state: Any, **kwargs: Any) -> Any: + """Invoke the wrapped graph while notifying any configured observers.""" for obs in self.observers: meta = {"service_name": self.service_name} meta.update(self.observer_meta) @@ -57,10 +58,12 @@ def invoke(self, state: Any, **kwargs: Any) -> Any: self._last_observer_artifacts.append(obs.stop(result=result, error=error)) def stream(self, state: Any, **kwargs: Any): + """Delegate streaming execution to the wrapped graph object.""" yield from self.graph.stream(state, **kwargs) def _dedupe_identity(values: List[Any]) -> List[Any]: + """Remove duplicates while preserving the first occurrence by object identity.""" seen = set() out = [] for value in values: @@ -80,6 +83,7 @@ def _to_funmodule( allow_external_dependencies: bool = True, scope: Optional[Dict[str, Any]] = None, ) -> Any: + """Return a ``FunModule`` wrapper for a callable when needed.""" if isinstance(func, FunModule) or hasattr(func, "_fun"): return func @@ -101,6 +105,7 @@ def _to_funmodule( def _replace_scope_object(scope: Dict[str, Any], old_obj: Any, new_obj: Any) -> bool: + """Replace all identity matches in ``scope`` and report whether one was found.""" replaced = False for key, value in list(scope.items()): if value is old_obj: diff --git a/opto/features/graph/module.py b/opto/features/graph/module.py index eef3b4cd..248e89b8 100644 --- a/opto/features/graph/module.py +++ b/opto/features/graph/module.py @@ -1,3 +1,5 @@ +"""``Module`` wrapper that lets graph adapters participate in Trace models.""" + from __future__ import annotations from typing import Any, TYPE_CHECKING @@ -12,10 +14,12 @@ class GraphModule(Module): """Module view over a graph adapter.""" def __init__(self, adapter: "GraphAdapter"): + """Store the adapter and initialize per-run sidecar tracking.""" self.adapter = adapter self._last_sidecar = None def forward(self, x: Any): + """Run the adapter in trace mode and return the traced output node.""" state = x if isinstance(x, dict) else {self.adapter.input_key: x} _runtime, sidecar = self.adapter.invoke_trace(state) self._last_sidecar = sidecar @@ -24,14 +28,17 @@ def forward(self, x: Any): return sidecar.output_node def invoke(self, state: Any, **kwargs: Any) -> Any: + """Invoke the underlying runtime graph and preserve the latest sidecar.""" result, sidecar = self.adapter.invoke_runtime(state, **kwargs) self._last_sidecar = sidecar return result def parameters(self): + """Expose the adapter's trainable parameters.""" return self.adapter.parameters() def __getstate__(self): + """Clear transient sidecar state before serialization.""" state = self.__dict__.copy() state["_last_sidecar"] = None return state diff --git a/opto/features/graph/sidecars.py b/opto/features/graph/sidecars.py index 1ad3cda1..d97eeb50 100644 --- a/opto/features/graph/sidecars.py +++ b/opto/features/graph/sidecars.py @@ -1,3 +1,5 @@ +"""Lightweight per-run sidecars shared by graph instrumentation backends.""" + from __future__ import annotations from dataclasses import dataclass, field @@ -19,15 +21,18 @@ def record_node_output( traced_output: Any, runtime_value: Any = None, ) -> None: + """Store the traced node output and any dict-shaped runtime shadow state.""" self.node_outputs[node_name] = traced_output if runtime_value is not None and isinstance(runtime_value, dict): self.shadow_state.update(runtime_value) def set_output(self, output_node: Any, runtime_result: Any) -> None: + """Record the final traced output node alongside the raw runtime result.""" self.output_node = output_node self.runtime_result = runtime_result def clear(self) -> None: + """Reset the sidecar for reuse in tests or debugging flows.""" self.node_outputs.clear() self.shadow_state.clear() self.output_node = None diff --git a/opto/trace/io/bindings.py b/opto/trace/io/bindings.py index 4dce6373..a6eff7bd 100644 --- a/opto/trace/io/bindings.py +++ b/opto/trace/io/bindings.py @@ -75,6 +75,7 @@ def apply_updates( """ def _normalize_key(k: Any) -> str: + """Coerce update keys into binding names used by the runtime.""" if isinstance(k, str): s = k else: diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py index 422b34c0..522c57cf 100644 --- a/opto/trace/io/instrumentation.py +++ b/opto/trace/io/instrumentation.py @@ -126,6 +126,8 @@ def stream(self, state: Any, **kwargs: Any) -> Iterator[Dict[str, Any]]: @dataclass class SysMonInstrumentedGraph: + """Graph wrapper that captures execution profiles through ``sys.monitoring``.""" + graph: Any session: SysMonitoringSession bindings: Dict[str, Binding] = field(default_factory=dict) @@ -137,6 +139,7 @@ class SysMonInstrumentedGraph: _last_profile_doc: Optional[dict] = field(default=None, init=False, repr=False) def invoke(self, state: Any, **kwargs: Any): + """Run the graph while recording a sys.monitoring profile document.""" meta = {"service_name": self.service_name} meta.update(self.observer_meta) self.session.start(bindings=self.bindings, meta=meta) @@ -152,6 +155,7 @@ def invoke(self, state: Any, **kwargs: Any): self._last_profile_doc = self.session.stop(result=result, error=error) def stream(self, state: Any, **kwargs: Any): + """Streaming is intentionally unsupported for the sys.monitoring backend.""" raise NotImplementedError("SysMonInstrumentedGraph.stream is not implemented") @@ -160,6 +164,7 @@ def _make_observers( *, service_name: str, ) -> List[GraphObserver]: + """Instantiate the passive observers requested by ``instrument_graph``.""" observers: List[GraphObserver] = [] for name in observe_with: if name == "otel": @@ -370,6 +375,7 @@ def instrument_graph( CODE_ATTR_MAX_CHARS = 10_000 def _emit_code_param(span, code_key: str, code_fn: Any) -> None: + """Serialize code into bounded OTEL attributes for optimizer consumption.""" try: src = inspect.getsource(code_fn) except Exception: diff --git a/opto/trace/io/observers.py b/opto/trace/io/observers.py index 1cf3d3ae..03fe48d2 100644 --- a/opto/trace/io/observers.py +++ b/opto/trace/io/observers.py @@ -1,3 +1,5 @@ +"""Observer protocols used to collect passive artifacts alongside graph runs.""" + from __future__ import annotations from dataclasses import dataclass @@ -8,12 +10,16 @@ @dataclass class ObserverArtifact: + """Container for the raw payload emitted by an observer backend.""" + carrier: str raw: Any profile_doc: Optional[Dict[str, Any]] = None class GraphObserver(Protocol): + """Protocol implemented by passive observers attached to graph executions.""" + name: str def start( @@ -22,6 +28,7 @@ def start( bindings: Dict[str, Any], meta: Optional[Dict[str, Any]] = None, ) -> None: + """Begin collecting artifacts for a new graph invocation.""" ... def stop( @@ -30,6 +37,7 @@ def stop( result: Any = None, error: BaseException | None = None, ) -> ObserverArtifact: + """Finish collection and return the observer-specific artifact bundle.""" ... @@ -44,6 +52,7 @@ def __init__( *, service_name: str = "langgraph-otel-observer", ) -> None: + """Create an observer backed by its own or a shared telemetry session.""" self.session = session or TelemetrySession(service_name=service_name) self._ctx = None @@ -53,6 +62,7 @@ def start( bindings: Dict[str, Any], meta: Optional[Dict[str, Any]] = None, ) -> None: + """Activate the telemetry session before the primary graph run starts.""" self._ctx = self.session.activate() self._ctx.__enter__() @@ -62,6 +72,7 @@ def stop( result: Any = None, error: BaseException | None = None, ) -> ObserverArtifact: + """Flush OTLP artifacts and close the activation context.""" try: otlp = self.session.flush_otlp(clear=True) finally: diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index 0788d52b..f9c4accf 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -381,6 +381,7 @@ def optimize_graph( _optimizer = optimizer def _ensure_trace_imports(): + """Lazily import Trace ingestion and propagation helpers on demand.""" nonlocal _ingest_tgj, _GraphPropagator, _batchify if _ingest_tgj is None: from opto.trace.io.tgj_ingest import ingest_tgj as _fn @@ -395,6 +396,7 @@ def _ensure_trace_imports(): _batchify = _batchify_items def _ensure_optimizer(param_nodes): + """Instantiate the default optimizer only when trainable params exist.""" nonlocal _optimizer if _optimizer is not None: return @@ -411,6 +413,7 @@ def _ensure_optimizer(param_nodes): _input_key = getattr(graph, "input_key", "query") or "query" def _make_state(query: Any) -> Dict[str, Any]: + """Normalize a query payload into the graph's expected input-state shape.""" if isinstance(query, dict): return query return {_input_key: query} @@ -636,6 +639,7 @@ def _optimize_trace_graph( output_key: Optional[str] = None, on_iteration: Optional[Callable[[int, List[RunResult], Dict[str, Any]], None]] = None, ) -> OptimizationResult: + """Optimize a trace-native graph using traced output nodes directly.""" from opto.optimizers.optoprime_v2 import OptoPrimeV2 from opto.trace.nodes import Node @@ -651,6 +655,7 @@ def _optimize_trace_graph( last_applied_updates: Dict[str, Any] = {} def _snapshot(parameters: List[Any]) -> Dict[str, Any]: + """Capture the current parameter payload keyed by node name.""" snapshot: Dict[str, Any] = {} for p in parameters: snapshot[getattr(p, "name", repr(p))] = getattr(p, "data", None) @@ -660,6 +665,7 @@ def _snapshot(parameters: List[Any]) -> Dict[str, Any]: opt = optimizer or OptoPrimeV2(parameters=list(graph.parameters), **dict(optimizer_kwargs or {})) def _extract_output(result: Any, sidecar: Any = None) -> Tuple[Any, Any]: + """Resolve the traced output node and the plain answer value for evaluation.""" if sidecar is not None and getattr(sidecar, "output_node", None) is not None: output_node = sidecar.output_node return output_node, getattr(output_node, "data", output_node) @@ -761,6 +767,7 @@ def _optimize_sysmon_graph( output_key: Optional[str] = None, on_iteration: Optional[Callable[[int, List[RunResult], Dict[str, Any]], None]] = None, ) -> OptimizationResult: + """Optimize a sys.monitoring-backed graph via TGJ conversion and replay.""" from opto.optimizers.optoprime_v2 import OptoPrimeV2 from opto.trace.io.tgj_ingest import ingest_tgj from opto.trace.io.tgj_ingest import merge_tgj @@ -770,6 +777,7 @@ def _optimize_sysmon_graph( raise ValueError("backend='sysmon' requires an explicit eval_fn") def _snapshot_parameters_from_bindings(bindings_dict: Dict[str, Binding]) -> Dict[str, Any]: + """Read the current values exposed through the active bindings.""" return {k: b.get() for k, b in bindings_dict.items()} score_history: List[float] = [] diff --git a/opto/trace/io/otel_adapter.py b/opto/trace/io/otel_adapter.py index b469a85f..9699df1a 100644 --- a/opto/trace/io/otel_adapter.py +++ b/opto/trace/io/otel_adapter.py @@ -1,3 +1,5 @@ +"""Adapters that convert OTLP span payloads into Trace-Graph JSON documents.""" + from __future__ import annotations from typing import Dict, Any, List @@ -6,10 +8,12 @@ def _sanitize(name: str) -> str: + """Make span names safe for use as TGJ node names.""" return (name or "node").replace(":", "_") def _op(attrs, span): + """Infer a TGJ operation name from OTEL attributes and span metadata.""" if "gen_ai.operation" in attrs or "gen_ai.model" in attrs: return "llm_call" if "rpc.system" in attrs: @@ -22,6 +26,7 @@ def _op(attrs, span): def _attrs(l): + """Flatten OTLP attribute records into a plain ``dict``.""" out = {} for a in l or []: k = a["key"] @@ -32,6 +37,7 @@ def _attrs(l): def _lift_inputs(attrs: Dict[str, Any]) -> Dict[str, str]: + """Extract ``inputs.*`` references and synthesize key literal inputs.""" inputs = {} for k, v in list(attrs.items()): if k.startswith("inputs.") and isinstance(v, str): @@ -47,6 +53,7 @@ def _lift_inputs(attrs: Dict[str, Any]) -> Dict[str, str]: def _params(attrs: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + """Extract ``param.*`` attributes and trainable flags from a span.""" out = {} for k, v in attrs.items(): if k.startswith("param.") and not k.endswith(".trainable"): @@ -220,4 +227,3 @@ def otlp_traces_to_trace_json(otlp: Dict[str, Any], agent_id_hint: str = "", use } ) return docs - diff --git a/opto/trace/io/otel_runtime.py b/opto/trace/io/otel_runtime.py index 8ab4be5d..b50f7bc3 100644 --- a/opto/trace/io/otel_runtime.py +++ b/opto/trace/io/otel_runtime.py @@ -1,3 +1,5 @@ +"""Runtime helpers for recording, exporting, and annotating OTEL spans.""" + from __future__ import annotations import logging @@ -19,6 +21,7 @@ class LLMCallError(Exception): """Raised when the underlying LLM provider returns a non-success response.""" def __init__(self, message: str, *, status_code: Optional[int] = None): + """Store the provider-facing error message and optional status code.""" super().__init__(message) self.status_code = status_code @@ -27,19 +30,24 @@ class InMemorySpanExporter(SpanExporter): """In-memory OTEL span exporter used by tests and demos.""" def __init__(self) -> None: + """Initialize the exporter with an empty finished-span buffer.""" self._finished_spans: List[ReadableSpan] = [] def export(self, spans: List[ReadableSpan]) -> SpanExportResult: + """Append spans to the in-memory buffer.""" self._finished_spans.extend(spans) return SpanExportResult.SUCCESS def shutdown(self) -> None: + """Clear all recorded spans.""" self._finished_spans.clear() def get_finished_spans(self) -> List[ReadableSpan]: + """Return a copy of the spans collected so far.""" return list(self._finished_spans) def clear(self) -> None: + """Discard any spans currently held in memory.""" self._finished_spans.clear() @@ -99,6 +107,7 @@ def flush_otlp( spans = exporter.get_finished_spans() def hex_id(x: int, n: int) -> str: + """Format trace/span ids as zero-padded lowercase hex strings.""" return f"{x:0{2*n}x}" otlp_spans: List[Dict[str, Any]] = [] @@ -206,6 +215,7 @@ def __init__( llm_span_name: str = "llm.chat.completion", emit_llm_child_span: bool = True, ) -> None: + """Configure how OTEL spans are emitted around provider calls.""" self.llm = llm self.tracer = tracer # None -> all trainable; explicit set otherwise @@ -224,6 +234,7 @@ def __init__( # ---- helpers --------------------------------------------------------- def _is_trainable(self, optimizable_key: Optional[str]) -> bool: + """Return whether a prompt key should be exposed as trainable.""" if optimizable_key is None: return False if self._trainable_keys_all: @@ -245,6 +256,7 @@ def _record_llm_call( prompt: str, extra_inputs: Optional[Dict[str, str]] = None, ) -> None: + """Attach prompt, code, and input metadata to the parent LLM span.""" if template_name and template is not None: sp.set_attribute(f"param.{template_name}", template) sp.set_attribute( @@ -432,6 +444,7 @@ def template_prompt_call( def _attrs_to_dict(attrs: List[Dict[str, Any]]) -> Dict[str, str]: + """Convert OTLP-style attribute records into a string-keyed mapping.""" out: Dict[str, str] = {} for a in attrs or []: key = a.get("key") diff --git a/opto/trace/io/sysmonitoring.py b/opto/trace/io/sysmonitoring.py index bbf2fdff..2cea1cb7 100644 --- a/opto/trace/io/sysmonitoring.py +++ b/opto/trace/io/sysmonitoring.py @@ -1,3 +1,5 @@ +"""Lightweight graph observers built on Python's ``sys.monitoring`` hooks.""" + from __future__ import annotations import sys @@ -12,6 +14,8 @@ @dataclass class SysMonEvent: + """Recorded function-level event captured during a sys.monitoring session.""" + id: str parent_id: str | None name: str @@ -28,6 +32,7 @@ class SysMonitoringSession: """Small execution observer built on Python's sys.monitoring API.""" def __init__(self, tool_id: int = 7, service_name: str = "langgraph-sysmon") -> None: + """Prepare a reusable session for collecting Python call events.""" if not hasattr(sys, "monitoring"): raise RuntimeError("sys.monitoring is unavailable on this Python runtime") self.tool_id = tool_id @@ -54,6 +59,7 @@ def _claim_tool_id(self) -> int: raise RuntimeError("Unable to claim a valid sys.monitoring tool id") def _stack(self) -> List[SysMonEvent]: + """Return the thread-local event stack for nested Python calls.""" if not hasattr(self._tls, "stack"): self._tls.stack = [] return self._tls.stack @@ -64,6 +70,7 @@ def start( bindings: Dict[str, Any], meta: Optional[Dict[str, Any]] = None, ) -> None: + """Start collecting Python start/return/unwind events for a graph run.""" self._events.clear() self._bindings_snapshot = { k: {"value": b.get(), "kind": b.kind, "trainable": True} @@ -72,12 +79,14 @@ def start( semantic_names = set((meta or {}).get("semantic_names") or ()) def _safe_preview(value: Any) -> str: + """Render a short preview without letting ``repr`` failures escape.""" try: return repr(value)[:200] except Exception: return f"<{type(value).__name__}>" def on_start(code, instruction_offset): + """Record the beginning of a monitored Python call.""" if semantic_names and code.co_name not in semantic_names: return stack = self._stack() @@ -95,6 +104,7 @@ def on_start(code, instruction_offset): self._events.append(ev) def on_return(code, instruction_offset, retval): + """Close the most recent matching event on normal return.""" stack = self._stack() if not stack or stack[-1].name != code.co_name: return @@ -104,6 +114,7 @@ def on_return(code, instruction_offset, retval): ev.return_preview = _safe_preview(retval) def on_unwind(code, instruction_offset, exc): + """Close the most recent matching event when the frame unwinds.""" stack = self._stack() if not stack or stack[-1].name != code.co_name: return @@ -128,6 +139,7 @@ def on_unwind(code, instruction_offset, exc): ) def stop(self, *, result: Any = None, error: BaseException | None = None) -> Dict[str, Any]: + """Stop monitoring and export the captured profile document.""" try: sys.monitoring.set_events(self.tool_id, 0) sys.monitoring.register_callback(self.tool_id, sys.monitoring.events.PY_START, None) @@ -173,9 +185,12 @@ def stop(self, *, result: Any = None, error: BaseException | None = None) -> Dic class SysMonObserver: + """Observer adapter that exposes ``SysMonitoringSession`` through the protocol.""" + name = "sysmon" def __init__(self, session: Optional[SysMonitoringSession] = None) -> None: + """Reuse or create a monitoring session for passive observation.""" self.session = session or SysMonitoringSession() def start( @@ -184,6 +199,7 @@ def start( bindings: Dict[str, Any], meta: Optional[Dict[str, Any]] = None, ) -> None: + """Delegate observer startup to the underlying monitoring session.""" self.session.start(bindings=bindings, meta=meta) def stop( @@ -192,6 +208,7 @@ def stop( result: Any = None, error: BaseException | None = None, ) -> ObserverArtifact: + """Stop monitoring and package the resulting profile as an artifact.""" doc = self.session.stop(result=result, error=error) return ObserverArtifact(carrier="sysmon", raw=doc, profile_doc=doc) diff --git a/opto/trace/io/telemetry_session.py b/opto/trace/io/telemetry_session.py index 8c24a339..edbd266b 100644 --- a/opto/trace/io/telemetry_session.py +++ b/opto/trace/io/telemetry_session.py @@ -113,6 +113,7 @@ def __init__( mlflow_autolog: bool = False, mlflow_autolog_kwargs: Optional[Dict[str, Any]] = None, ) -> None: + """Initialize OTEL exporters, optional MLflow bridges, and node/span bookkeeping.""" self.service_name = service_name self.record_spans = record_spans self.span_attribute_filter = span_attribute_filter @@ -182,11 +183,13 @@ def activate(self): _CURRENT_SESSION.reset(token) def __enter__(self) -> "TelemetrySession": + """Activate the session for the current context-manager scope.""" token = _CURRENT_SESSION.set(self) self._token_stack.append(token) return self def __exit__(self, exc_type, exc, tb) -> None: + """Restore the previous active session when leaving a ``with`` block.""" if self._token_stack: token = self._token_stack.pop() _CURRENT_SESSION.reset(token) @@ -226,6 +229,7 @@ def exporter(self) -> InMemorySpanExporter: @staticmethod def _span_id_hex(span) -> Optional[str]: + """Return the current span id as a zero-padded hex string.""" try: ctx = span.get_span_context() if not getattr(ctx, "is_valid", False): @@ -235,23 +239,28 @@ def _span_id_hex(span) -> Optional[str]: return None def _truncate(self, v: Any) -> str: + """Convert a value to a bounded string suitable for span attributes.""" s = str(v) if self.max_attr_chars and len(s) > self.max_attr_chars: return s[: self.max_attr_chars] + "…" return s def _is_trace_node(self, obj: Any) -> bool: + """Best-effort check for Trace node-like objects without importing node classes.""" mod = getattr(obj.__class__, "__module__", "") return mod.startswith("opto.trace") and hasattr(obj, "name") and hasattr(obj, "data") def _is_parameter_node(self, obj: Any) -> bool: + """Return whether an object looks like a Trace ``ParameterNode``.""" return self._is_trace_node(obj) and obj.__class__.__name__ == "ParameterNode" def _param_key(self, param_node: Any) -> str: + """Derive the binding key used for a Trace parameter node.""" raw = getattr(param_node, "name", "param") return str(raw).split(":")[0] def _remember_node_span(self, node: Any, span) -> None: + """Remember the span associated with a node for later input lifting.""" sid = self._span_id_hex(span) if sid is None: return @@ -261,6 +270,7 @@ def _remember_node_span(self, node: Any, span) -> None: return def _lookup_node_ref(self, node: Any) -> Optional[str]: + """Resolve a node into the stable reference format expected by TGJ export.""" try: sid = self._node_span_ids.get(node) except Exception: @@ -305,6 +315,7 @@ def _inputs_and_params_from_trace_inputs( return inputs_attrs, params_attrs def _is_default_op(self, fun_name: str, file_path: str) -> bool: + """Detect default Trace operators that should be skipped when configured.""" if fun_name == "call_llm": return False norm = str(file_path).replace("\\", "/") @@ -503,6 +514,7 @@ def _flush_tgj_from_otlp(self, otlp: Dict[str, Any]) -> List[Dict[str, Any]]: # -- MLflow helpers (best-effort) ----------------------------------------- def _mlflow_log_artifacts(self, output_dir: str) -> None: + """Best-effort bridge that mirrors exported bundles into MLflow artifacts.""" if not self.mlflow_log_artifacts: return try: diff --git a/opto/trace/io/tgj_export.py b/opto/trace/io/tgj_export.py index f81f0926..9ac7ba6b 100644 --- a/opto/trace/io/tgj_export.py +++ b/opto/trace/io/tgj_export.py @@ -1,5 +1,3 @@ -from __future__ import annotations - """ Utilities to export an already-built Trace graph (Node / MessageNode / ParameterNode) to TGJ format. @@ -13,6 +11,8 @@ - this does NOT reconstruct a graph from telemetry; it only exports an existing Trace graph """ +from __future__ import annotations + from typing import Dict, Any, Iterable, Set from opto.trace.nodes import ( Node, @@ -25,6 +25,7 @@ def _base_name(n: Node) -> str: + """Drop Trace scope suffixes from a node name for export readability.""" return n.name.split(":")[0] @@ -35,6 +36,7 @@ def export_subgraph_to_tgj( graph_id: str, scope: str = "", ) -> Dict[str, Any]: + """Export a reachable Trace subgraph to TGJ v1.0.""" seen: Set[Node] = set() q = list(nodes) tgj_nodes = [] @@ -42,6 +44,7 @@ def export_subgraph_to_tgj( used_ids: Set[str] = set() def nid(n: Node) -> str: + """Assign a stable, collision-free TGJ id to each exported node.""" if n not in idmap: base = _base_name(n) candidate = base @@ -159,5 +162,6 @@ def export_full_graph_to_tgj( graph_id: str, scope: str = "", ) -> Dict[str, Any]: + """Export every node currently held in the global Trace graph registry.""" all_nodes = [n for lst in GRAPH._nodes.values() for n in lst] return export_subgraph_to_tgj(all_nodes, run_id, agent_id, graph_id, scope) diff --git a/opto/trace/io/tgj_ingest.py b/opto/trace/io/tgj_ingest.py index 6bc6d46f..b95d1018 100644 --- a/opto/trace/io/tgj_ingest.py +++ b/opto/trace/io/tgj_ingest.py @@ -1,3 +1,5 @@ +"""Helpers for rebuilding Trace nodes from TGJ and OTEL-derived documents.""" + from __future__ import annotations from typing import Dict, Any, List, Optional, Union from contextlib import contextmanager @@ -8,6 +10,7 @@ @contextmanager def _scoped(scope: str): + """Temporarily push a Trace name scope while ingesting one document.""" if scope: NAME_SCOPES.append(scope) try: @@ -17,10 +20,12 @@ def _scoped(scope: str): NAME_SCOPES.pop() def _mk_value(name: str, value: Any, desc: str="[Node]") -> Node: + """Create a plain ``Node`` using a TGJ-safe name.""" safe = name.replace(":", "_") return Node(value, name=safe, description=desc) def _as_node(ref: Union[str, Dict[str,Any]], local: Dict[str,Node], ports: Dict[str,Node], port_index: Optional[Dict[str,Node]] = None) -> Node: + """Resolve a TGJ input/output reference into a concrete Trace node.""" if isinstance(ref, str): ref = {"ref": ref} if "ref" in ref: @@ -48,6 +53,7 @@ def _as_node(ref: Union[str, Dict[str,Any]], local: Dict[str,Node], ports: Dict[ def _kind_norm(k: str) -> str: + """Normalize legacy TGJ kind aliases to canonical names.""" k = (k or "").lower() if k in ("param", "parameter"): return "parameter" @@ -61,6 +67,7 @@ def _kind_norm(k: str) -> str: def _nodes_iter(nodes_field: Union[List[Dict[str,Any]], Dict[str,Dict[str,Any]]]) -> List[Dict[str,Any]]: + """Accept either list- or dict-shaped TGJ node collections.""" if isinstance(nodes_field, dict): out = [] for nid, rec in nodes_field.items(): @@ -72,6 +79,7 @@ def _nodes_iter(nodes_field: Union[List[Dict[str,Any]], Dict[str,Dict[str,Any]]] def _convert_otel_profile(doc: Dict[str,Any]) -> Dict[str,Any]: + """Convert ``trace-json/1.0+otel`` payloads into canonical TGJ v1 records.""" raw_nodes = _nodes_iter(doc.get("nodes", {})) known_ids = { rec.get("id") or rec.get("name") @@ -154,6 +162,7 @@ def ingest_tgj( *, param_cache: Optional[Dict[str,"ParameterNode"]] = None, ) -> Dict[str,Node]: + """Rebuild Trace nodes from a TGJ document and return them by id/name.""" version = doc.get("tgj") or doc.get("version") if version == OTEL_PROFILE_VERSION: doc = _convert_otel_profile(doc) @@ -250,6 +259,7 @@ def ingest_tgj( return nodes def merge_tgj(docs: List[Dict[str,Any]]) -> Dict[str,Dict[str,Node]]: + """Ingest multiple TGJ documents while resolving cross-document exports.""" merged: Dict[str,Dict[str,Node]] = {} port_index: Dict[str,Node] = {} for d in docs: @@ -264,6 +274,7 @@ class TLSFIngestor: """Minimal TLSF ingestor supporting TGJ/trace-json documents.""" def __init__(self, run_id: Optional[str] = None): + """Initialize the ingestor and its accumulated node index.""" self.run_id = run_id self._nodes: Dict[str, Node] = {} @@ -272,4 +283,5 @@ def ingest_tgj(self, doc: Dict[str, Any]) -> None: self._nodes.update(ingest_tgj(doc)) def get(self, name_or_event_id: str) -> Optional[Node]: + """Look up a previously ingested node by name or event id.""" return self._nodes.get(name_or_event_id) From 842fbd85f93c814525bf978896d4d97b1e549926 Mon Sep 17 00:00:00 2001 From: doxav Date: Fri, 24 Apr 2026 16:02:49 +0200 Subject: [PATCH 09/16] Sync unchanged files with experimental --- opto/features/priority_search/utils.py | 2 +- opto/optimizers/optimizer.py | 4 +- opto/trace/bundle.py | 76 ++++++-------------------- opto/trainer/__init__.py | 12 +--- opto/trainer/algorithms/__init__.py | 43 ++------------- setup.py | 2 +- 6 files changed, 27 insertions(+), 112 deletions(-) diff --git a/opto/features/priority_search/utils.py b/opto/features/priority_search/utils.py index ec86e8e2..c61c81c2 100644 --- a/opto/features/priority_search/utils.py +++ b/opto/features/priority_search/utils.py @@ -71,7 +71,7 @@ def set_module_parameters(agent, update_dict): """ remapped_update_dict = remap_update_dict(agent, update_dict) # remap the update dict to the agent's parameters for k, v in remapped_update_dict.items(): - k._set(v) # preserve Node-unwrapping semantics consistently + k._data = v # set the parameter's data to the value in the update_dict def create_module_from_update_dict(agent, update_dict): """ Create a new agent from the update_dict. diff --git a/opto/optimizers/optimizer.py b/opto/optimizers/optimizer.py index eefa827f..79b37370 100644 --- a/opto/optimizers/optimizer.py +++ b/opto/optimizers/optimizer.py @@ -295,7 +295,7 @@ def update(self, update_dict: Dict[ParameterNode, Any]): """ for p, d in update_dict.items(): if p.trainable: - p._set(d) + p._data = d def zero_feedback(self): """Clear accumulated feedback from all parameters. @@ -403,4 +403,4 @@ def __deepcopy__(self, memo): setattr(result, k, copy.deepcopy(v, memo)) else: setattr(result, k, v) # parameters is not copied, it is the original parameters - return result + return result \ No newline at end of file diff --git a/opto/trace/bundle.py b/opto/trace/bundle.py index f3817632..a6595b72 100644 --- a/opto/trace/bundle.py +++ b/opto/trace/bundle.py @@ -546,20 +546,6 @@ def postprocess_output(self, output, fun, _args, _kwargs, used_nodes, inputs): ) # We don't need to keep track of the inputs if we are not tracing. # Wrap the output as a MessageNode or an ExceptionNode nodes = self.wrap(output, inputs, external_dependencies) - try: - from opto.trace.io.telemetry_session import TelemetrySession - - session = TelemetrySession.current() - if session is not None and isinstance(nodes, MessageNode): - observer_inputs = dict(inputs) - for idx, dep in enumerate(external_dependencies): - observer_inputs.setdefault( - getattr(dep, "name", f"dep_{idx}"), - dep, - ) - session.on_message_node_created(nodes, inputs=observer_inputs) - except Exception: - pass return nodes def forward(self, *args, **kwargs): @@ -581,28 +567,14 @@ def sync_forward(self, fun, *args, **kwargs): """ # Wrap the inputs as nodes inputs, args, kwargs, _args, _kwargs = self._wrap_inputs(fun, args, kwargs) - try: - from opto.trace.io.telemetry_session import TelemetrySession - - session = TelemetrySession.current() - except Exception: - session = None - - if session is None: - with trace_nodes() as used_nodes: - _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) - output = self.sync_call_fun(fun, *_args, **_kwargs) - return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) - - with session.bundle_span( - fun_name=self.info["fun_name"], - file_path=self.info["file"], - inputs=inputs, - ): - with trace_nodes() as used_nodes: - _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) - output = self.sync_call_fun(fun, *_args, **_kwargs) - return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) + # Execute fun + with trace_nodes() as used_nodes: + # After exit, used_nodes contains the nodes whose data attribute is read in the operator fun. + _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) + output = self.sync_call_fun(fun, *_args, **_kwargs) + # Wrap the output as a MessageNode or an ExceptionNode + nodes = self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) + return nodes async def async_forward(self, fun, *args, **kwargs): """ @@ -613,28 +585,16 @@ async def async_forward(self, fun, *args, **kwargs): """ # Wrap the inputs as nodes inputs, args, kwargs, _args, _kwargs = self._wrap_inputs(fun, args, kwargs) - try: - from opto.trace.io.telemetry_session import TelemetrySession - - session = TelemetrySession.current() - except Exception: - session = None - - if session is None: - with trace_nodes() as used_nodes: - _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) - output = await self.async_call_fun(fun, *_args, **_kwargs) - return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) - - with session.bundle_span( - fun_name=self.info["fun_name"], - file_path=self.info["file"], - inputs=inputs, - ): - with trace_nodes() as used_nodes: - _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) - output = await self.async_call_fun(fun, *_args, **_kwargs) - return self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) + # Execute fun + with trace_nodes() as used_nodes: + # After exit, used_nodes contains the nodes whose data attribute is read in the operator fun. + _args, _kwargs = self.preprocess_inputs(args, kwargs, _args, _kwargs) + output = await self.async_call_fun( + fun, *_args, **_kwargs + ) # use await to call the async function + # Wrap the output as a MessageNode or an ExceptionNode + nodes = self.postprocess_output(output, fun, _args, _kwargs, used_nodes, inputs) + return nodes def wrap( self, diff --git a/opto/trainer/__init__.py b/opto/trainer/__init__.py index 2a3efa7e..fdb4b478 100644 --- a/opto/trainer/__init__.py +++ b/opto/trainer/__init__.py @@ -1,11 +1 @@ -"""Trainer package public facade.""" - -from importlib import import_module - -__all__ = ["train"] - - -def __getattr__(name): - if name == "train": - return import_module("opto.trainer.train").train - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") \ No newline at end of file +from opto.trainer.train import train #, resume \ No newline at end of file diff --git a/opto/trainer/algorithms/__init__.py b/opto/trainer/algorithms/__init__.py index 8485b46e..09333a7f 100644 --- a/opto/trainer/algorithms/__init__.py +++ b/opto/trainer/algorithms/__init__.py @@ -1,39 +1,4 @@ -"""Lazy public facade for trainer algorithms.""" - -__all__ = [ - "Trainer", - "Minibatch", - "MinibatchAlgorithm", - "BasicSearchAlgorithm", - "MinibatchCurriculumAccumulationCommonFeedbackAlgorithm", - "BasicSearchCurriculumAccumulationCommonFeedbackAlgorithm", - "BeamsearchAlgorithm", - "BeamsearchHistoryAlgorithm", - "UCBSearchAlgorithm", -] - - -def __getattr__(name): - if name == "Trainer": - from opto.trainer.algorithms.algorithm import Trainer - - return Trainer - if name in { - "Minibatch", - "MinibatchAlgorithm", - "BasicSearchAlgorithm", - "MinibatchCurriculumAccumulationCommonFeedbackAlgorithm", - "BasicSearchCurriculumAccumulationCommonFeedbackAlgorithm", - }: - from opto.trainer.algorithms import basic_algorithms as module - - return getattr(module, name) - if name in {"BeamsearchAlgorithm", "BeamsearchHistoryAlgorithm"}: - from opto.trainer.algorithms import beamsearch_algorithm as module - - return getattr(module, name) - if name == "UCBSearchAlgorithm": - from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm - - return UCBSearchAlgorithm - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") +from opto.trainer.algorithms.algorithm import Trainer +from opto.trainer.algorithms.basic_algorithms import Minibatch, MinibatchAlgorithm, BasicSearchAlgorithm +from opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm +from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm diff --git a/setup.py b/setup.py index 73394ff7..dbd60be5 100644 --- a/setup.py +++ b/setup.py @@ -29,5 +29,5 @@ long_description=open('README.md', encoding="utf8").read(), packages=setuptools.find_packages(include=["opto*"]), install_requires=install_requires, - python_requires=">=3.12", + python_requires=">=3.10", ) From a73e52e472ee3820882fea31cf26b1347f7f48ee Mon Sep 17 00:00:00 2001 From: doxav Date: Fri, 24 Apr 2026 16:24:01 +0200 Subject: [PATCH 10/16] otel did not track nodes/functions :-) - will commit results --- docs/GraphOptimization.md | 6 +- opto/features/graph/adapter.py | 177 +++++++++++++- .../test_graph_adapter_otel_node_spans.py | 227 ++++++++++++++++++ 3 files changed, 406 insertions(+), 4 deletions(-) create mode 100644 tests/features_tests/test_graph_adapter_otel_node_spans.py diff --git a/docs/GraphOptimization.md b/docs/GraphOptimization.md index 606f9259..acff7a77 100644 --- a/docs/GraphOptimization.md +++ b/docs/GraphOptimization.md @@ -83,8 +83,8 @@ That separation is what lets the system remain compatible with LangGraph while s ```mermaid flowchart TD U[User] - IG[instrument_graph(...)] - OG[optimize_graph(...)] + IG[instrument_graph] + OG[optimize_graph] subgraph FG[opto.features.graph] GA[GraphAdapter] @@ -116,7 +116,7 @@ flowchart TD NODE[node / ParameterNode / MessageNode] MOD[Module] OPT[Optimizer] - TRAIN[train()] + TRAIN[train] PS[PrioritySearch] MC[ModuleCandidate] end diff --git a/opto/features/graph/adapter.py b/opto/features/graph/adapter.py index 458985c9..276fec16 100644 --- a/opto/features/graph/adapter.py +++ b/opto/features/graph/adapter.py @@ -3,6 +3,7 @@ from __future__ import annotations import contextlib +import json import inspect from dataclasses import dataclass, field from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union @@ -21,6 +22,28 @@ def _raw(value: Any) -> Any: return getattr(value, "data", value) +def _otel_attr_value(value: Any, *, max_chars: int = 2_000) -> str: + """Serialize runtime values into bounded OTEL string attributes.""" + value = _raw(value) + if isinstance(value, str): + out = value + elif isinstance(value, (int, float, bool)) or value is None: + out = str(value) + else: + try: + out = json.dumps(value, ensure_ascii=False, default=str) + except Exception: + out = repr(value) + if len(out) > max_chars: + return out[:max_chars] + "...[truncated]" + return out + + +def _trainable_attr(value: Any, *, default: bool = True) -> bool: + """Best-effort trainability flag for ParameterNode-like values.""" + return bool(getattr(value, "trainable", default)) + + def _normalize_named_callables( targets: Union[None, List[str], List[Callable[..., Any]], Mapping[str, Callable[..., Any]]], scope: Optional[Dict[str, Any]] = None, @@ -122,6 +145,31 @@ def instrument(self, backend: Optional[str] = None, **kwargs: Any): raise ValueError(f"Unsupported backend: {effective_backend!r}") +@dataclass +class _AdapterOTELRuntimeGraph: + """Invoke OTEL graphs through the adapter so knobs stay live per run.""" + + adapter: "LangGraphAdapter" + + def _runtime_state(self, state: Any) -> Any: + """Inject current graph knobs into dict-like runtime state.""" + if not isinstance(state, dict): + return state + runtime_state = dict(state) + runtime_state.update(self.adapter._knob_values()) + return runtime_state + + def invoke(self, state: Any, **kwargs: Any): + """Build or reuse the current OTEL graph and invoke it.""" + graph = self.adapter.build_graph(backend="otel") + return graph.invoke(self._runtime_state(state), **kwargs) + + def stream(self, state: Any, **kwargs: Any): + """Build or reuse the current OTEL graph and stream it.""" + graph = self.adapter.build_graph(backend="otel") + yield from graph.stream(self._runtime_state(state), **kwargs) + + @dataclass class LangGraphAdapter(GraphAdapter): """Concrete adapter for LangGraph-style factories and scoped callables.""" @@ -163,6 +211,31 @@ def __getstate__(self): state["_compiled_cache"] = {} return state + def instrument(self, backend: Optional[str] = None, **kwargs: Any): + """Wrap the adapter, keeping OTEL graph knobs live across invocations.""" + effective_backend = backend or self.backend + if effective_backend != "otel": + return super().instrument(backend=backend, **kwargs) + + from opto.trace.io.instrumentation import instrument_graph + + service_name = kwargs.pop("service_name", self.service_name) + input_key = kwargs.pop("input_key", self.input_key) + output_key = kwargs.pop("output_key", self.output_key) + + merged = self.bindings_dict() + merged.update(kwargs.pop("bindings", {}) or {}) + runtime_graph = _AdapterOTELRuntimeGraph(self) + return instrument_graph( + graph=runtime_graph, + backend="otel", + bindings=merged, + service_name=service_name, + input_key=input_key, + output_key=output_key, + **kwargs, + ) + def _build_bindings(self) -> None: """Derive bindings for prompts, graph knobs, and traced code parameters.""" auto: Dict[str, Binding] = {} @@ -267,6 +340,105 @@ def _wrapped(state: Dict[str, Any], *args: Any, **kwargs: Any): _wrapped.__name__ = name return _wrapped + def _emit_otel_parameters(self, span: Any, *, node_name: str) -> None: + """Emit adapter parameters on a node-level OTEL span. + + The converter looks for ``param.*`` attributes and their + ``.trainable`` flags. The binding layer later normalizes optimizer + keys like ``param.route_policy:0`` back to ``route_policy``. + """ + for key, param in self.prompt_targets.items(): + span.set_attribute(f"param.{key}", _otel_attr_value(param, max_chars=10_000)) + span.set_attribute(f"param.{key}.trainable", _trainable_attr(param)) + + for key, param in self.graph_knobs.items(): + span.set_attribute(f"param.{key}", _otel_attr_value(param, max_chars=10_000)) + span.set_attribute(f"param.{key}.trainable", _trainable_attr(param)) + span.set_attribute(f"graph.knob.{key}", _otel_attr_value(param)) + + traced_fn = self._traced_functions.get(node_name) + code_param = getattr(traced_fn, "parameter", None) + if code_param is not None: + span.set_attribute(f"param.__code_{node_name}", _otel_attr_value(code_param, max_chars=50_000)) + span.set_attribute(f"param.__code_{node_name}.trainable", _trainable_attr(code_param)) + + def _emit_otel_inputs(self, span: Any, state: Any, *args: Any, **kwargs: Any) -> None: + """Emit bounded input/state previews for graph reconstruction.""" + if isinstance(state, Mapping): + for key, value in state.items(): + span.set_attribute(f"inputs.{key}", _otel_attr_value(value)) + else: + span.set_attribute("inputs.state", _otel_attr_value(state)) + if args: + span.set_attribute("inputs.args", _otel_attr_value(args)) + for key, value in kwargs.items(): + span.set_attribute(f"inputs.kwargs.{key}", _otel_attr_value(value)) + + def _resolve_otel_runtime_fn( + self, + name: str, + fallback_fn: Callable[..., Any], + ) -> Callable[..., Any]: + """Resolve the live callable used by OTEL execution. + + OTEL optimization updates land on adapter bindings, including + ``__code_*`` entries backed by ``FunModule.parameter``. To make those + updates affect runtime behavior, OTEL execution must consult the + traced wrapper's dynamic ``.fun`` property at call time instead of the + original function object captured when the graph was built. + """ + traced_fn = self._traced_functions.get(name) + if isinstance(traced_fn, FunModule): + return traced_fn.fun + runtime_fn = getattr(traced_fn, "fun", None) + if callable(runtime_fn): + return runtime_fn + return fallback_fn + + def _otel_runtime_wrapper(self, name: str, fn: Callable[..., Any]): + """Wrap a graph function with an OTEL node span. + + ``InstrumentedGraph`` creates the root invocation span and activates a + ``TelemetrySession``. This wrapper emits the per-node spans that the + old LangGraph+OTEL prototype emitted manually inside each node. + """ + def _wrapped(state: Any, *args: Any, **kwargs: Any) -> Any: + try: + from opto.trace.io.telemetry_session import TelemetrySession + except Exception: + return fn(state, *args, **kwargs) + + session = TelemetrySession.current() + if session is None: + return fn(state, *args, **kwargs) + + runtime_state = state + if isinstance(state, Mapping): + runtime_state = dict(state) + runtime_state.update(self._knob_values()) + + with session.tracer.start_as_current_span(name) as span: + span.set_attribute("message.id", name) + span.set_attribute("graph.node.name", name) + span.set_attribute("graph.backend", "otel") + self._emit_otel_inputs(span, runtime_state, *args, **kwargs) + self._emit_otel_parameters(span, node_name=name) + + try: + runtime_fn = self._resolve_otel_runtime_fn(name, fn) + output = runtime_fn(runtime_state, *args, **kwargs) + except BaseException as exc: + span.set_attribute("error", True) + span.set_attribute("error.type", type(exc).__name__) + span.set_attribute("error.message", str(exc)) + raise + + span.set_attribute("outputs.preview", _otel_attr_value(output)) + return output + + _wrapped.__name__ = name + return _wrapped + def build_graph(self, backend: Optional[str] = None): """Build, compile, and cache the graph for ``trace`` or ``otel`` execution.""" effective_backend = backend or self.backend @@ -280,7 +452,10 @@ def build_graph(self, backend: Optional[str] = None): for name, fn in self._traced_functions.items() } elif effective_backend == "otel": - fn_overrides = dict(self._original_functions) + fn_overrides = { + name: self._otel_runtime_wrapper(name, fn) + for name, fn in self._original_functions.items() + } else: raise ValueError(f"Unsupported backend: {effective_backend!r}") diff --git a/tests/features_tests/test_graph_adapter_otel_node_spans.py b/tests/features_tests/test_graph_adapter_otel_node_spans.py new file mode 100644 index 00000000..35da8767 --- /dev/null +++ b/tests/features_tests/test_graph_adapter_otel_node_spans.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +import pytest + +pytest.importorskip("langgraph") + +from langgraph.graph import END, START, StateGraph + +from opto.features.graph.adapter import LangGraphAdapter +from opto.trace import node +from opto.trace.io import instrument_graph +from opto.trace.io.bindings import apply_updates +from opto.trace.io.otel_adapter import otlp_traces_to_trace_json +from opto.trace.io.tgj_ingest import ingest_tgj +from opto.trace.nodes import MessageNode, ParameterNode + + +def _spans(otlp): + out = [] + for rs in otlp.get("resourceSpans", []): + for ss in rs.get("scopeSpans", []): + out.extend(ss.get("spans", [])) + return out + + +def _attrs(span): + attrs = {} + for item in span.get("attributes", []): + key = item.get("key") + value = item.get("value", {}) + if "stringValue" in value: + attrs[key] = value["stringValue"] + elif "boolValue" in value: + attrs[key] = value["boolValue"] + elif "intValue" in value: + attrs[key] = value["intValue"] + elif "doubleValue" in value: + attrs[key] = value["doubleValue"] + else: + attrs[key] = value + return attrs + + +def _truthy(value): + return value is True or str(value).strip().lower() in {"true", "1", "yes"} + + +def _span_named(otlp, name): + for span in _spans(otlp): + if span.get("name") == name: + return span + raise AssertionError(f"span not found: {name}; saw {[s.get('name') for s in _spans(otlp)]}") + + +def _make_adapter(): + planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") + synth_prompt = node("Answer: {plan}", trainable=True, name="synth_prompt") + + def planner_node(state): + query = state.get("query", "") + return {"query": query, "plan": f"plan({query})"} + + def synth_node(state): + route = state.get("route_policy", "direct") + answer = f"answer({state.get('plan', '')})" + if route == "review": + answer = "Reviewed " + answer + return {"query": state.get("query", ""), "plan": state.get("plan", ""), "final_answer": answer} + + def build_graph(planner_node=planner_node, synth_node=synth_node, route_policy="direct"): + graph = StateGraph(dict) + graph.add_node("planner_node", planner_node) + graph.add_node("synth_node", synth_node) + graph.add_edge(START, "planner_node") + graph.add_edge("planner_node", "synth_node") + graph.add_edge("synth_node", END) + return graph.compile() + + return LangGraphAdapter( + graph_factory=build_graph, + function_targets={ + "planner_node": planner_node, + "synth_node": synth_node, + }, + prompt_targets={ + "planner_prompt": planner_prompt, + "synth_prompt": synth_prompt, + }, + graph_knobs={"route_policy": "direct"}, + input_key="query", + output_key="final_answer", + ) + + +def test_otel_adapter_emits_span_for_each_function_target_with_params(): + adapter = _make_adapter() + graph = instrument_graph( + adapter=adapter, + backend="otel", + service_name="test-langgraph-otel", + output_key="final_answer", + ) + + result = graph.invoke({"query": "CRISPR"}) + assert result["final_answer"] == "answer(plan(CRISPR))" + + otlp = graph.session.flush_otlp(clear=True) + names = [span.get("name") for span in _spans(otlp)] + + assert any(name.endswith(".invoke") for name in names) + assert "planner_node" in names + assert "synth_node" in names + + planner_attrs = _attrs(_span_named(otlp, "planner_node")) + assert planner_attrs["message.id"] == "planner_node" + assert planner_attrs["graph.node.name"] == "planner_node" + assert planner_attrs["graph.backend"] == "otel" + assert planner_attrs["inputs.query"] == "CRISPR" + + assert planner_attrs["param.planner_prompt"] == "Plan: {query}" + assert _truthy(planner_attrs["param.planner_prompt.trainable"]) + assert planner_attrs["param.synth_prompt"] == "Answer: {plan}" + assert _truthy(planner_attrs["param.synth_prompt.trainable"]) + assert planner_attrs["param.route_policy"] == "direct" + assert _truthy(planner_attrs["param.route_policy.trainable"]) + assert "outputs.preview" in planner_attrs + assert "param.__code_planner_node" in planner_attrs + + +def test_otel_adapter_params_convert_to_tgj_and_trace_nodes(): + adapter = _make_adapter() + graph = instrument_graph(adapter=adapter, backend="otel", output_key="final_answer") + + graph.invoke({"query": "CRISPR"}) + otlp = graph.session.flush_otlp(clear=True) + + tgj_docs = list( + otlp_traces_to_trace_json( + otlp, + agent_id_hint="test-langgraph-otel", + use_temporal_hierarchy=True, + ) + ) + nodes = ingest_tgj(tgj_docs[0]) + + param_names = { + getattr(n, "name", "").split(":")[0].split("/")[-1] + for n in nodes.values() + if isinstance(n, ParameterNode) and getattr(n, "trainable", False) + } + assert {"planner_prompt", "synth_prompt", "route_policy"}.issubset(param_names) + + message_names = { + getattr(n, "name", "") + for n in nodes.values() + if isinstance(n, MessageNode) + } + assert any(name.split("/")[-1].split(":")[0] == "planner_node" for name in message_names) + assert any(name.split("/")[-1].split(":")[0] == "synth_node" for name in message_names) + + +def test_otel_adapter_apply_updates_updates_graph_knob_and_next_run_behavior(): + adapter = _make_adapter() + graph = instrument_graph(adapter=adapter, backend="otel", output_key="final_answer") + + result = graph.invoke({"query": "CRISPR"}) + assert result["final_answer"] == "answer(plan(CRISPR))" + graph.session.flush_otlp(clear=True) + + apply_updates({"param.route_policy:0": "review"}, graph.bindings, strict=True) + + result = graph.invoke({"query": "CRISPR"}) + assert result["final_answer"] == "Reviewed answer(plan(CRISPR))" + + otlp = graph.session.flush_otlp(clear=True) + synth_attrs = _attrs(_span_named(otlp, "synth_node")) + assert synth_attrs["param.route_policy"] == "review" + assert synth_attrs["graph.knob.route_policy"] == "review" + + +def test_otel_adapter_apply_updates_updates_code_parameter_without_recompile(): + adapter = _make_adapter() + graph = instrument_graph(adapter=adapter, backend="otel", output_key="final_answer") + + first = graph.invoke({"query": "CRISPR"}) + assert first["plan"] == "plan(CRISPR)" + assert first["final_answer"] == "answer(plan(CRISPR))" + assert len(adapter._compiled_cache) == 1 + graph.session.flush_otlp(clear=True) + + code_key = "__code_planner_node" + original_code = graph.bindings[code_key].get() + updated_code = original_code.replace('plan({query})', 'ALT({query})') + + applied = apply_updates({f"param.{code_key}:0": updated_code}, graph.bindings, strict=True) + assert applied == {code_key: updated_code} + assert graph.bindings[code_key].get() == updated_code + + second = graph.invoke({"query": "CRISPR"}) + assert second["plan"] == "ALT(CRISPR)" + assert second["final_answer"] == "answer(ALT(CRISPR))" + assert len(adapter._compiled_cache) == 1 + + otlp = graph.session.flush_otlp(clear=True) + planner_attrs = _attrs(_span_named(otlp, "planner_node")) + assert planner_attrs[f"param.{code_key}"] == updated_code + assert "ALT(CRISPR)" in planner_attrs["outputs.preview"] + + +def test_otel_adapter_invalid_code_update_raises_and_marks_node_span_error(): + adapter = _make_adapter() + graph = instrument_graph(adapter=adapter, backend="otel", output_key="final_answer") + + code_key = "__code_planner_node" + bad_code = """def planner_node(state): + return {"query": state.get("query", ""), "plan": +""" + apply_updates({f"param.{code_key}:0": bad_code}, graph.bindings, strict=True) + + with pytest.raises(Exception): + graph.invoke({"query": "CRISPR"}) + + otlp = graph.session.flush_otlp(clear=True) + planner_attrs = _attrs(_span_named(otlp, "planner_node")) + assert _truthy(planner_attrs["error"]) + assert planner_attrs["error.type"] == "ExecutionError" + assert "SyntaxError" in planner_attrs["error.message"] From 6e8ae4c3f9eba97927d8f2c458ac9170a9a8680f Mon Sep 17 00:00:00 2001 From: doxav Date: Fri, 24 Apr 2026 16:36:06 +0200 Subject: [PATCH 11/16] first results compared with fixed OTEL --- ...aph_instrument_and_compare_observers.ipynb | 2 +- ...ggraph_instrument_and_compare_observers.py | 89 +++++++++++++++++-- ...mo_langgraph_instrument_and_optimize.ipynb | 54 +++++++++-- ...ggraph_instrument_and_optimize_trace.ipynb | 34 +++---- opto/trace/io/otel_runtime.py | 83 ++++++++++++++++- tests/unit_tests/test_otel_runtime.py | 66 ++++++++++++++ 6 files changed, 289 insertions(+), 39 deletions(-) diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb index 7b5e132c..a8a03f98 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -12,7 +12,7 @@ "This notebook runs the live comparison module and renders a shared analysis report.\n", "\n", "- It uses the OpenRouter API when `OPENROUTER_API_KEY` is set.\n", - "- It defaults to `OPENROUTER_MODEL=gpt-4o-mini` when the model env var is absent.\n", + "- It defaults to `OPENROUTER_MODEL=google/gemini-3-flash-preview` when the model env var is absent.\n", "- In CI or local runs without credentials, it prints a skip message and exits successfully.\n" ] }, diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py index aecd46d0..0403db17 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py @@ -33,6 +33,7 @@ make_dict_binding, optimize_graph, otlp_traces_to_trace_json, + LLMCallError, ) from opto.trace.io.sysmonitoring import sysmon_profile_to_tgj from opto.trace.io.tgj_export import export_subgraph_to_tgj @@ -47,7 +48,7 @@ HAS_SYSMON = hasattr(sys, "monitoring") OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") -OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "gpt-4o-mini") +OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "google/gemini-3-flash-preview") OPENROUTER_BASE_URL = os.environ.get( "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1" ) @@ -212,6 +213,34 @@ def render_template(template: str, **variables: Any) -> str: return template.format(**_str_map(variables)) +def _extract_response_text(response: Any) -> str: + """Return assistant text from OpenAI-compatible demo responses.""" + choices = getattr(response, "choices", None) + if not choices: + raise LLMCallError("LLM response missing choices/content") + message = getattr(choices[0], "message", None) + content = getattr(message, "content", None) if message is not None else None + if isinstance(content, str) and content.strip(): + return content + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, str) and item.strip(): + parts.append(item) + elif isinstance(item, Mapping): + text = item.get("text") + if isinstance(text, str) and text.strip(): + parts.append(text) + elif isinstance(text, Mapping): + value = text.get("value") + if isinstance(value, str) and value.strip(): + parts.append(value) + joined = "\n".join(parts).strip() + if joined: + return joined + raise LLMCallError("LLM returned None content") + + def call_chat_text( llm, *, @@ -227,7 +256,36 @@ def call_chat_text( temperature=kwargs.pop("temperature", 0), **kwargs, ) - return response.choices[0].message.content + return _extract_response_text(response) + + +def _has_response_content(response: Any) -> bool: + """Best-effort guard for empty provider payloads in demo live mode.""" + try: + return bool(_extract_response_text(response).strip()) + except Exception: + return False + + +def _is_retryable_provider_error(exc: Exception) -> bool: + """Detect transient OpenRouter/OpenAI client failures worth retrying.""" + text = str(exc).lower() + return any( + marker in text + for marker in ( + "429", + "500", + "502", + "503", + "504", + "rate limit", + "temporarily", + "timeout", + "connection", + "none content", + "missing choices", + ) + ) def summarize_tgj(doc: Dict[str, Any]) -> Dict[str, Any]: @@ -418,12 +476,26 @@ def make_live_llm(): ) def _llm(messages=None, **kwargs): - return client.chat.completions.create( - model=OPENROUTER_MODEL, - messages=messages or [], - max_tokens=kwargs.get("max_tokens", 220), - temperature=kwargs.get("temperature", 0), - ) + max_retries = 4 + for attempt in range(max_retries): + try: + response = client.chat.completions.create( + model=OPENROUTER_MODEL, + messages=messages or [], + max_tokens=kwargs.get("max_tokens", 220), + temperature=kwargs.get("temperature", 0), + ) + if _has_response_content(response): + return response + if attempt < max_retries - 1: + time.sleep(2 ** attempt) + continue + return response + except Exception as exc: + if _is_retryable_provider_error(exc) and attempt < max_retries - 1: + time.sleep(2 ** attempt) + continue + raise _llm.model = OPENROUTER_MODEL return _llm @@ -721,7 +793,6 @@ def run_case(name: str, builder): ) ) - assert result.best_iteration >= 2 final_prompt = prompt_getter() assert final_prompt == SYNTH_UPDATE_SCHEDULE[-1]["synth_prompt"] tail_scores = result.score_history[max(2, result.best_iteration):] diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb index bc0d296a..2b8be32d 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb @@ -207,7 +207,7 @@ "import os, json\n", "\n", "# Model config (free tier on OpenRouter)\n", - "OPENROUTER_MODEL = os.environ.get(\"OPENROUTER_MODEL\", \"nvidia/nemotron-3-super-120b-a12b:free\")\n", + "OPENROUTER_MODEL = os.environ.get(\"OPENROUTER_MODEL\", \"google/gemini-3-flash-preview\")\n", "OPENROUTER_BASE_URL = \"https://openrouter.ai/api/v1\"\n", "\n", "# Budget guard for live mode\n", @@ -1609,22 +1609,60 @@ "\n", "import time as _time\n", "\n", + "def _extract_live_text(response):\n", + " choices = getattr(response, \"choices\", None)\n", + " if not choices:\n", + " return \"\"\n", + " message = getattr(choices[0], \"message\", None)\n", + " content = getattr(message, \"content\", None) if message is not None else None\n", + " if isinstance(content, str):\n", + " return content\n", + " if isinstance(content, list):\n", + " parts = []\n", + " for item in content:\n", + " if isinstance(item, str) and item.strip():\n", + " parts.append(item)\n", + " elif isinstance(item, dict):\n", + " text = item.get(\"text\")\n", + " if isinstance(text, str) and text.strip():\n", + " parts.append(text)\n", + " elif isinstance(text, dict):\n", + " value = text.get(\"value\")\n", + " if isinstance(value, str) and value.strip():\n", + " parts.append(value)\n", + " return \"\\n\".join(parts).strip()\n", + " return \"\"\n", + "\n", + "\n", + "def _is_retryable_provider_error(exc):\n", + " text = str(exc).lower()\n", + " return any(marker in text for marker in (\n", + " '429', '500', '502', '503', '504', 'rate limit', 'temporarily',\n", + " 'timeout', 'connection', 'none content', 'missing choices'\n", + " ))\n", + "\n", + "\n", "def live_llm(messages=None, **kwargs):\n", - " \"\"\"Call OpenRouter with automatic retry on 429 rate-limit errors.\"\"\"\n", + " \"\"\"Call OpenRouter with retries for transient or malformed responses.\"\"\"\n", " live_llm.call_count += 1\n", " max_retries = 5\n", " for attempt in range(max_retries):\n", " try:\n", - " return _client.chat.completions.create(\n", + " response = _client.chat.completions.create(\n", " model=OPENROUTER_MODEL,\n", " messages=messages,\n", " max_tokens=kwargs.get(\"max_tokens\", MAX_TOKENS_PER_CALL),\n", " temperature=kwargs.get(\"temperature\", LIVE_TEMPERATURE),\n", " )\n", + " if _extract_live_text(response).strip() or attempt == max_retries - 1:\n", + " return response\n", + " wait = 2 ** attempt\n", + " print(f\" [RETRY] Empty/malformed response, waiting {wait}s... (attempt {attempt+1}/{max_retries})\")\n", + " _time.sleep(wait)\n", " except Exception as e:\n", - " if '429' in str(e) and attempt < max_retries - 1:\n", - " wait = 2 ** attempt * 10 # Exponential backoff: 20s, 40s, ...\n", - " print(f\" [RETRY] Rate-limited (429), waiting {wait}s... (attempt {attempt+1}/{max_retries})\")\n", + " if _is_retryable_provider_error(e) and attempt < max_retries - 1:\n", + " wait = 2 ** attempt * 10\n", + " print(f\" [RETRY] Provider error, waiting {wait}s... (attempt {attempt+1}/{max_retries})\")\n", " _time.sleep(wait)\n", " else:\n", " raise\n", @@ -1646,7 +1684,7 @@ " max_tokens=10,\n", " temperature=0,\n", " )\n", - " print(f\"[OK] Live LLM smoke test passed: {_test.choices[0].message.content!r}\")\n", + " print(f\"[OK] Live LLM smoke test passed: {_extract_live_text(_test)!r}\")\n", " break\n", " except Exception as e:\n", " if '429' in str(e) and _attempt < 2:\n", @@ -2124,4 +2162,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb index f34fa51d..f75f53d3 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb @@ -16,10 +16,10 @@ "id": "a6bb3b02", "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:38:01.915777Z", - "iopub.status.busy": "2026-04-19T09:38:01.915407Z", - "iopub.status.idle": "2026-04-19T09:38:04.431013Z", - "shell.execute_reply": "2026-04-19T09:38:04.430293Z" + "iopub.execute_input": "2026-04-24T14:35:00.443219Z", + "iopub.status.busy": "2026-04-24T14:35:00.442806Z", + "iopub.status.idle": "2026-04-24T14:35:02.576070Z", + "shell.execute_reply": "2026-04-24T14:35:02.575019Z" } }, "outputs": [], @@ -38,10 +38,10 @@ "id": "68f6f76b", "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:38:04.433760Z", - "iopub.status.busy": "2026-04-19T09:38:04.433546Z", - "iopub.status.idle": "2026-04-19T09:38:04.438579Z", - "shell.execute_reply": "2026-04-19T09:38:04.437906Z" + "iopub.execute_input": "2026-04-24T14:35:02.579394Z", + "iopub.status.busy": "2026-04-24T14:35:02.579226Z", + "iopub.status.idle": "2026-04-24T14:35:02.585833Z", + "shell.execute_reply": "2026-04-24T14:35:02.584731Z" } }, "outputs": [], @@ -75,10 +75,10 @@ "id": "9cb6347f", "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:38:04.440522Z", - "iopub.status.busy": "2026-04-19T09:38:04.440330Z", - "iopub.status.idle": "2026-04-19T09:38:04.454739Z", - "shell.execute_reply": "2026-04-19T09:38:04.453807Z" + "iopub.execute_input": "2026-04-24T14:35:02.588275Z", + "iopub.status.busy": "2026-04-24T14:35:02.588124Z", + "iopub.status.idle": "2026-04-24T14:35:02.601284Z", + "shell.execute_reply": "2026-04-24T14:35:02.600466Z" } }, "outputs": [ @@ -113,10 +113,10 @@ "id": "6f15abf5", "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:38:04.457142Z", - "iopub.status.busy": "2026-04-19T09:38:04.456930Z", - "iopub.status.idle": "2026-04-19T09:38:04.482135Z", - "shell.execute_reply": "2026-04-19T09:38:04.481453Z" + "iopub.execute_input": "2026-04-24T14:35:02.603861Z", + "iopub.status.busy": "2026-04-24T14:35:02.603708Z", + "iopub.status.idle": "2026-04-24T14:35:02.624949Z", + "shell.execute_reply": "2026-04-24T14:35:02.624346Z" } }, "outputs": [ @@ -187,7 +187,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/opto/trace/io/otel_runtime.py b/opto/trace/io/otel_runtime.py index b50f7bc3..4aea62af 100644 --- a/opto/trace/io/otel_runtime.py +++ b/opto/trace/io/otel_runtime.py @@ -287,6 +287,83 @@ def _validate_content(content: Optional[str]) -> str: ) return content + @staticmethod + def _content_from_parts(content: Any) -> Optional[str]: + """Best-effort extraction for multimodal/list-based content payloads.""" + if isinstance(content, str): + return content + if not isinstance(content, list): + return None + + parts: List[str] = [] + for item in content: + if isinstance(item, str): + if item.strip(): + parts.append(item) + continue + if not isinstance(item, Mapping): + continue + + text = item.get("text") + if isinstance(text, str) and text.strip(): + parts.append(text) + continue + if isinstance(text, Mapping): + value = text.get("value") + if isinstance(value, str) and value.strip(): + parts.append(value) + continue + + if item.get("type") in ("text", "output_text"): + value = item.get("value") + if isinstance(value, str) and value.strip(): + parts.append(value) + + joined = "\n".join(p.strip() for p in parts if p and str(p).strip()).strip() + return joined or None + + @classmethod + def _extract_response_content(cls, resp: Any) -> str: + """Extract assistant text from OpenAI-compatible or dict-like responses.""" + if resp is None: + raise LLMCallError("LLM returned no response object") + + if isinstance(resp, str): + return cls._validate_content(resp) + + choices = getattr(resp, "choices", None) + if choices is None and isinstance(resp, Mapping): + choices = resp.get("choices") + + if choices: + first = choices[0] + + message = getattr(first, "message", None) + if message is None and isinstance(first, Mapping): + message = first.get("message") + + content = None + if message is not None: + content = getattr(message, "content", None) + if content is None and isinstance(message, Mapping): + content = message.get("content") + + if content is None: + content = getattr(first, "text", None) + if content is None and isinstance(first, Mapping): + content = first.get("text") + + extracted = cls._content_from_parts(content) + return cls._validate_content(extracted) + + output_text = getattr(resp, "output_text", None) + if output_text is None and isinstance(resp, Mapping): + output_text = resp.get("output_text") + if isinstance(output_text, str): + return cls._validate_content(output_text) + + raise LLMCallError("LLM response missing choices/content") + # ---- public API ------------------------------------------------------ def node_call( @@ -351,8 +428,7 @@ def node_call( try: resp = self.llm(messages=messages, **llm_kwargs) - content = resp.choices[0].message.content - content = self._validate_content(content) + content = self._extract_response_content(resp) except LLMCallError as e: llm_sp.set_attribute("error", "true") llm_sp.set_attribute("error.type", "LLMCallError") @@ -369,8 +445,7 @@ def node_call( ) else: resp = self.llm(messages=messages, **llm_kwargs) - content = resp.choices[0].message.content - content = self._validate_content(content) + content = self._extract_response_content(resp) except LLMCallError as e: sp.set_attribute("error", "true") sp.set_attribute("error.type", "LLMCallError") diff --git a/tests/unit_tests/test_otel_runtime.py b/tests/unit_tests/test_otel_runtime.py index 9d225dd0..6228a4e1 100644 --- a/tests/unit_tests/test_otel_runtime.py +++ b/tests/unit_tests/test_otel_runtime.py @@ -1,6 +1,7 @@ import pytest from opto.trace.io.otel_runtime import ( + LLMCallError, init_otel_runtime, TracingLLM, flush_otlp, @@ -34,6 +35,24 @@ def __call__(self, messages=None, **kwargs): return FakeLLM._Response(self.content) +class DictLikeLLM: + """LLM stub that returns dict-shaped OpenAI-compatible payloads.""" + + def __init__(self, content): + self.content = content + + def __call__(self, messages=None, **kwargs): + return { + "choices": [ + { + "message": { + "content": self.content, + } + } + ] + } + + def _attrs_to_dict(attrs): return {a["key"]: a["value"]["stringValue"] for a in attrs} @@ -215,3 +234,50 @@ def test_template_prompt_call_records_raw_template_and_rendered_prompt(): assert attrs["inputs.gen_ai.prompt"] == "Plan for: What is CRISPR?" assert attrs["inputs.query"] == "What is CRISPR?" assert attrs["inputs.user_query"] == "What is CRISPR?" + + +def test_tracing_llm_extracts_dict_like_response_content(): + tracer, _exporter = init_otel_runtime("test-dict-like") + tllm = TracingLLM( + llm=DictLikeLLM("ANSWER FROM DICT"), + tracer=tracer, + emit_llm_child_span=False, + ) + result = tllm.node_call( + span_name="planner", + messages=[{"role": "user", "content": "hello"}], + ) + assert result == "ANSWER FROM DICT" + + +def test_tracing_llm_extracts_list_based_message_content(): + tracer, _exporter = init_otel_runtime("test-list-content") + tllm = TracingLLM( + llm=DictLikeLLM([{"type": "text", "text": "part one"}, {"type": "output_text", "text": {"value": "part two"}}]), + tracer=tracer, + emit_llm_child_span=False, + ) + result = tllm.node_call( + span_name="planner", + messages=[{"role": "user", "content": "hello"}], + ) + assert result == "part one\npart two" + + +def test_tracing_llm_raises_clear_error_on_missing_choices(): + tracer, _exporter = init_otel_runtime("test-missing-choices") + + class MissingChoicesLLM: + def __call__(self, messages=None, **kwargs): + return {"id": "resp-without-choices"} + + tllm = TracingLLM( + llm=MissingChoicesLLM(), + tracer=tracer, + emit_llm_child_span=False, + ) + with pytest.raises(LLMCallError, match="missing choices/content"): + tllm.node_call( + span_name="planner", + messages=[{"role": "user", "content": "hello"}], + ) From 3d128937302e1544bea7088d8658db33ebe7ac87 Mon Sep 17 00:00:00 2001 From: doxav Date: Fri, 24 Apr 2026 16:54:26 +0200 Subject: [PATCH 12/16] updated docs and trace comparison demo trials --- docs/GraphOptimization.md | 293 ++++- ...aph_instrument_and_compare_observers.ipynb | 1105 ++++++++--------- ...mo_langgraph_instrument_and_optimize.ipynb | 300 +++-- 3 files changed, 930 insertions(+), 768 deletions(-) diff --git a/docs/GraphOptimization.md b/docs/GraphOptimization.md index acff7a77..b607e518 100644 --- a/docs/GraphOptimization.md +++ b/docs/GraphOptimization.md @@ -6,7 +6,7 @@ It is intentionally aligned with the current codebase, not with earlier intermed - graph abstractions live under `opto.features.graph.*` - the OTEL runtime helper is `opto.trace.io.otel_runtime` - trace graph instrumentation is `opto.features.graph.graph_instrumentation` -- `instrument_graph(...)` now supports three primary backends: `trace`, `otel`, and `sysmon` +- the top-level `instrument_graph(...)` API supports three primary backend families: `trace`, `otel`, and `sysmon` - `observe_with=(...)` adds passive observers on top of the primary backend --- @@ -14,31 +14,38 @@ It is intentionally aligned with the current codebase, not with earlier intermed ## Table of contents 1. Goals 2. Current codebase map -3. Main concepts -4. Architecture schema -5. Backend modes -6. Observer combinations -7. Adapter model -8. Multiple traces and observers -9. Public API cheat sheet -10. Optimization carriers and update path -11. OTEL semantic conventions and temporal chaining -12. Notebook and demo coverage -13. Open questions +3. Main concepts and mental model +4. What is actually optimized +5. Architecture schema +6. Backend modes +7. Observer combinations +8. Adapter model +9. Multiple traces and observers +10. Public API cheat sheet +11. Optimization carriers and update path +12. OTEL semantic conventions and temporal chaining +13. Testing and validation checklist +14. Notebook and demo coverage +15. Open questions ## Goals -The current design aims to optimize: +The primary goal is to enable **real graph optimization** in Trace. In this document, graph optimization means exposing graph-level and node-level optimization surfaces so existing Trace optimizers can update: - prompts - agent or node functions -- graph knobs and routing or workflow policies +- graph knobs, routing choices, workflow policies, and edge-selection policies - LangGraph graphs today, while keeping the adapter shape reusable for other graph-like runtimes later -A second goal is to separate: +A second goal is compatibility: the graph runtime should keep returning normal runtime values while Trace keeps a separate optimization view. The design therefore separates: - **runtime return types**: plain Python objects, dicts, strings, etc. - **optimization state**: Trace nodes, parameters, sidecars, converted TGJ documents, observer artifacts -That separation is what lets the system remain compatible with LangGraph while still feeding Trace-native optimizers and trainers. +That separation is an enabler, not the main thesis. It lets the system remain compatible with LangGraph while still feeding Trace-native optimizers and trainers. + +Current scope should be described precisely: +- prompt, code, and graph-knob optimization are first-class surfaces +- workflow/topology optimization is currently **knob-mediated**: the topology can change if a graph knob is passed into the graph factory and the factory builds a different graph for different knob values +- this is not yet arbitrary free-form graph-structure search where the optimizer invents, adds, or removes nodes and edges without predeclared choices ## Current codebase map @@ -77,6 +84,41 @@ That separation is what lets the system remain compatible with LangGraph while s | `OTELRunSidecar` | Per-run OTEL artifact container | Keeps secondary observation artifacts explicit | | `Binding` | String key -> live getter/setter mapping | Lets update dictionaries mutate prompts, code params, and graph knobs safely | | `ObserverArtifact` | Normalized passive observation payload | Makes optional OTEL/sysmon observers composable across backends | +| `graph_factory` | Function that builds the runtime graph | Lets the adapter rebuild the graph after knob updates, including topology-changing knobs | +| graph knob | Trainable graph-level parameter | Represents routing, workflow, topology, policy, or edge-selection choices | + +### Mental model + +The graph stack has three different views of the same run: + +```text +LangGraph runtime view + normal dict/string/Python return values + +Trace optimization view + ParameterNode / MessageNode / Node objects used by optimizers + +Observation view + OTEL spans, sys.monitoring profiles, and passive observer artifacts +``` + +The adapter bridges these views. The optimizer does not need to know that a parameter is a prompt, a node-function code parameter, or a routing knob; it sees trainable parameters. The binding layer is what turns an optimizer update back into a concrete runtime mutation. + +## What is actually optimized + +The safest way to describe this PR is by optimization surface: + +| Surface | How it is represented | What is optimized | Status / caveat | +|---|---|---|---| +| Prompt | `prompt_targets` -> trainable `ParameterNode` + `Binding(kind="prompt")` | Prompt or template text | Directly supported | +| Node / agent function | `function_targets` -> `FunModule` / code parameter + `Binding(kind="code")` | Declared node-function behavior | Supported only for selected functions; ordinary graph functions are not automatically trainable | +| Workflow / routing policy | `graph_knobs` -> trainable `ParameterNode` + `Binding(kind="graph")` | Routing or workflow choices | Directly supported when the graph reads the knob | +| Topology / edge policy | `graph_knobs` passed into `graph_factory` | Different compiled graph shape or edge path | Supported only when the possible topology choices are encoded in the factory | +| Arbitrary graph-structure search | Not represented yet | Inventing/removing arbitrary nodes or edges | Not implemented in this PR | + +A useful review phrase is: + +> This PR supports graph and topology optimization when the relevant choices are exposed as trainable graph knobs. It should not yet be presented as free-form graph-structure search. ## Architecture schema @@ -207,6 +249,15 @@ So it must be present in: It does **not** need to dominate the document. It is best documented as a third execution/observation carrier next to trace and OTEL. +### Adapter-path caveat + +The top-level API can expose a `sysmon` primary backend for raw graph instrumentation. The adapter-based path should be checked separately: the current `GraphAdapter.instrument(...)` path handles `trace` and `otel`; if `adapter=my_adapter, backend="sysmon"` is expected to work, the adapter implementation and the documentation should be kept in sync. + +Practical wording: +- `instrument_graph(graph=my_graph, backend="sysmon", ...)` is the sysmon primary-backend path +- `instrument_graph(adapter=my_adapter, backend="trace" | "otel", ...)` is the current adapter-centric path +- `observe_with=("sysmon",)` is the passive observer path on compatible primary backends + ## Observer combinations Passive observers are optional and sit next to the primary backend. They are not the primary optimization carrier unless the primary backend itself is `sysmon` or `otel`. @@ -249,6 +300,11 @@ Responsibilities: - execute the graph while preserving native runtime outputs - populate a sidecar with optimization-facing state +Important boundaries: +- the adapter does not make every LangGraph node trainable automatically; only declared targets become optimization surfaces +- the graph factory itself is normally used to rebuild the graph from current knobs; it is not automatically optimized as code +- passing a factory is preferable to passing only a precompiled graph when graph knobs can affect topology + ### GraphModule `GraphModule` is the Trace `Module` view over an adapter. @@ -260,6 +316,8 @@ This is what makes the graph stack compatible with: The important point is that graph optimization did **not** introduce a separate trainer abstraction. It reuses the existing Trace module ecosystem. +`GraphModule` does not make the graph more optimizable by itself. It makes the adapter look like a normal Trace module so existing mechanisms can call `forward(...)`, inspect `parameters()`, create `ModuleCandidate`s, and run `PrioritySearch`. + ### TraceGraph `TraceGraph` is the trace-facing wrapper returned by `instrument_graph(..., backend="trace")`. @@ -308,32 +366,122 @@ That is why: ## Public API cheat sheet -### `instrument_graph(...)` +### Build a LangGraph adapter first -Current high-level modes: +Use an adapter when you want graph-specific optimization surfaces: selected node functions, prompts, and graph knobs. The graph should usually be provided through a factory so the adapter can rebuild the graph after knob updates. + +The example below is intentionally small. It shows the shape, not a complete application. ```python +from langgraph.graph import END, START, StateGraph +from opto.trace import node +from opto.features.graph import LangGraphAdapter from opto.trace.io import instrument_graph -# Trace-native graph optimization +planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") +answer_prompt = node("Answer: {query} :: {plan}", trainable=True, name="answer_prompt") + + +def _raw(value): + return getattr(value, "data", value) + + +def planner_node(state): + query = _raw(state["query"]) + return {"plan": planner_prompt.data.replace("{query}", str(query))} + + +def answer_node(state): + query = _raw(state["query"]) + plan = _raw(state["plan"]) + return { + "final_answer": answer_prompt.data + .replace("{query}", str(query)) + .replace("{plan}", str(plan)) + } + + +def review_node(state): + return {"plan": f"Reviewed plan: {_raw(state['plan'])}"} + + +def build_graph( + planner_node=planner_node, + answer_node=answer_node, + review_node=review_node, + route_policy="direct", +): + graph = StateGraph(dict) + graph.add_node("planner", planner_node) + graph.add_node("answer", answer_node) + graph.add_edge(START, "planner") + + # Example of knob-mediated topology: the graph shape changes because + # route_policy is a graph knob passed into the factory. + if route_policy == "review": + graph.add_node("review", review_node) + graph.add_edge("planner", "review") + graph.add_edge("review", "answer") + else: + graph.add_edge("planner", "answer") + + graph.add_edge("answer", END) + return graph + + +my_adapter = LangGraphAdapter( + graph_factory=build_graph, + function_targets={ + "planner_node": planner_node, + "answer_node": answer_node, + "review_node": review_node, + }, + prompt_targets={ + "planner_prompt": planner_prompt, + "answer_prompt": answer_prompt, + }, + graph_knobs={"route_policy": "direct"}, + input_key="query", + output_key="final_answer", +) +``` + +### `instrument_graph(...)` + +Current high-level modes: + +```python +# Trace-native graph optimization through the adapter. trace_graph = instrument_graph( adapter=my_adapter, backend="trace", output_key="final_answer", ) -# OTEL-backed optimization +# OTEL-backed optimization through the adapter. otel_graph = instrument_graph( - graph=my_graph, + adapter=my_adapter, + backend="otel", + llm=my_llm, + output_key="final_answer", +) + +# Raw compiled-graph path, useful when you do not need adapter-managed +# graph knobs / function targets. +compiled_graph = build_graph().compile() + +# OTEL-backed instrumentation of a raw graph. +otel_graph = instrument_graph( + graph=compiled_graph, backend="otel", llm=my_llm, bindings=my_bindings, output_key="final_answer", ) -# sys.monitoring-backed optimization +# sys.monitoring-backed instrumentation of a raw graph. sysmon_graph = instrument_graph( - graph=my_graph, + graph=compiled_graph, backend="sysmon", bindings=my_bindings, output_key="final_answer", @@ -343,7 +491,8 @@ sysmon_graph = instrument_graph( ### Passive observers ```python -# Trace primary backend with additional OTEL and sysmon observer artifacts +# Trace primary backend with additional OTEL and sysmon observer artifacts. +# Primary optimization still uses the trace-native sidecar output node. trace_graph = instrument_graph( adapter=my_adapter, backend="trace", @@ -366,6 +515,20 @@ result = optimize_graph( The primary optimization carrier depends on `instrumented_graph.backend`. +### `GraphModule` usage with search / candidates + +```python +model = my_adapter.as_module() + +# Existing search/training code can treat the graph as a Trace Module. +optimizer = MyOptimizer(model.parameters()) +search = PrioritySearch(model, optimizer) + +# ModuleCandidate is intended to remain generic. The graph-specific contract is +# that candidate materialization must not retain active sidecars or stale +# compiled graph caches. +``` + ## Optimization carriers and update path ### Update path by backend @@ -393,6 +556,18 @@ Binding kinds currently used in the graph stack: | `code` | code parameter associated with a bundled function | | `graph` | workflow policy, routing knob, edge policy, or similar graph-level parameter | +### Update path in one sentence + +```text +Optimizer update dict + -> key normalization in apply_updates(...) + -> Binding.set(...) + -> prompt / code parameter / graph knob mutation + -> graph factory may rebuild a different graph on the next run +``` + +For graph knobs, correctness depends on two things: the knob must be exposed as a trainable parameter, and the graph runtime or graph factory must actually read it. + ## OTEL semantic conventions and temporal chaining The current doc should keep the old OTEL details because they are still relevant for the OTEL path. @@ -403,6 +578,8 @@ The OTEL runtime emits: - Trace-relevant `param.*` attributes for optimization - `gen_ai.*` attributes for broader OTEL/Agent-Lightning-style observability +In the adapter path, declared function targets can be wrapped to emit node-level OTEL spans and `param.*` attributes. In the raw graph path, OTEL only sees what is instrumented: a root invocation span, explicit `TracingLLM` usage, and any spans emitted by wrapped components. OTEL should not be described as automatically observing every internal Python operation in every LangGraph node. + ### Temporal chaining The OTEL conversion path still relies on temporal structure when building TGJ from spans. The important rule is unchanged: @@ -419,6 +596,67 @@ Even after adding adapters and sysmon, the OTEL path still depends on: So the previous OTEL semantic and temporal sections should be retained, but updated to reference `otel_runtime.py` and the current `opto.features.graph.graph_instrumentation` location. + +## Testing and validation checklist + +### Existing smoke tests to keep green + +The current design should be validated at least through: + +```bash +pytest -q tests/features_tests/test_graph_module_prioritysearch.py +pytest -q tests/unit_tests/test_graph_adapter_modulecandidate.py +``` + +These tests are important because they check the two riskiest integration contracts: +- `PrioritySearch` can operate on `adapter.as_module()` and update a graph knob +- `ModuleCandidate.get_module()` can materialize a graph module candidate without leaking an active sidecar or stale compiled graph cache + +### Additional regression test worth adding + +The current graph-knob smoke test can validate behavior changes even if the graph topology stays fixed. To make the topology claim robust, add a test where a graph knob changes the compiled topology itself: + +```python +def build_graph(route_policy="direct", planner_node=planner_node, answer_node=answer_node, review_node=review_node): + graph = StateGraph(dict) + graph.add_node("planner", planner_node) + graph.add_node("answer", answer_node) + graph.add_edge(START, "planner") + + if route_policy == "review": + graph.add_node("review", review_node) + graph.add_edge("planner", "review") + graph.add_edge("review", "answer") + else: + graph.add_edge("planner", "answer") + + graph.add_edge("answer", END) + return graph +``` + +Suggested assertions: +- the `route_policy` parameter is present in `model.parameters()` +- applying an update through the binding changes `route_policy` +- the compiled graph or rendered graph differs before and after the knob update +- the run output changes in the expected direction + +### Trace-Bench integration expectation + +Trace-Bench should normally see the LangGraph adapter path as a **task parameter**, not as a new trainer: + +```python +adapter = LangGraphAdapter(...) +problem = { + "param": adapter.as_module(), + "guide": guide, + "train_dataset": train_dataset, + "optimizer_kwargs": optimizer_kwargs, + "metadata": metadata, +} +``` + +`PrioritySearch` remains the trainer/search algorithm. If graph topology visualization is needed in Trace-Bench artifacts, add it explicitly as task metadata or an artifact; it will not automatically appear just because the task parameter is a `GraphModule`. + ## Notebook and demo coverage ### Core notebooks @@ -450,5 +688,8 @@ The current structure is robust enough for the current PR, but a few topics are 1. Should observer concepts become more generic beyond graph optimization, or stay graph-local for now? 2. Should `sysmon` remain a peer primary backend, or mostly be documented as a profiling backend plus observer? -3. Should some OTEL-specific explanatory material be split into a dedicated OTEL section to keep this document shorter? -4. If a non-LangGraph runtime is added next, should it implement only `GraphAdapter`, or also a richer observer-aware adapter helper? +3. Should `adapter=my_adapter, backend="sysmon"` become a supported adapter path, or should sysmon remain limited to the raw graph primary backend plus passive observer path? +4. Should some OTEL-specific explanatory material be split into a dedicated OTEL section to keep this document shorter? +5. If a non-LangGraph runtime is added next, should it implement only `GraphAdapter`, or also a richer observer-aware adapter helper? +6. Should topology optimization remain knob-mediated, or should a future design introduce a separate free-form graph-structure search abstraction? +7. Should the graph factory itself ever be a trainable code parameter, or should it remain a pure rebuild function driven by explicit knobs? diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb index a8a03f98..108ea48f 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -22,10 +22,10 @@ "id": "run-live-compare-script", "metadata": { "execution": { - "iopub.execute_input": "2026-04-20T19:43:28.048220Z", - "iopub.status.busy": "2026-04-20T19:43:28.047990Z", - "iopub.status.idle": "2026-04-20T19:53:35.512496Z", - "shell.execute_reply": "2026-04-20T19:53:35.511662Z" + "iopub.execute_input": "2026-04-24T14:35:00.407713Z", + "iopub.status.busy": "2026-04-24T14:35:00.407401Z", + "iopub.status.idle": "2026-04-24T14:42:39.184568Z", + "shell.execute_reply": "2026-04-24T14:42:39.183078Z" }, "language": "python" }, @@ -37,13 +37,13 @@ "\n", "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | score_history |\n", "|---|---:|---:|---:|---:|---:|---:|---|\n", - "| trace | 71.749 | 0.752 | 0.928 | 0.176 | 4 | 0.003 | [0.752, 0.805, 0.92, 0.898, 0.928, 0.923] |\n", - "| trace+otel | 76.288 | 0.822 | 0.950 | 0.128 | 2 | 0.025 | [0.822, 0.715, 0.95, 0.937, 0.907, 0.885] |\n", - "| otel | 80.465 | 0.669 | 0.950 | 0.281 | 4 | 0.014 | [0.669, 0.842, 0.92, 0.907, 0.95, 0.922] |\n", - "| trace+sysmon | 78.316 | 0.732 | 0.923 | 0.192 | 4 | 0.022 | [0.732, 0.714, 0.92, 0.92, 0.923, 0.88] |\n", - "| trace+otel+sysmon | 77.963 | 0.714 | 0.923 | 0.210 | 3 | 0.018 | [0.714, 0.712, 0.92, 0.923, 0.88, 0.898] |\n", - "| otel+sysmon | 93.479 | 0.842 | 0.950 | 0.108 | 2 | 0.023 | [0.842, 0.715, 0.95, 0.923, 0.923, 0.887] |\n", - "| sysmon | 77.312 | 0.822 | 0.944 | 0.122 | 2 | 0.026 | [0.822, 0.669, 0.944, 0.923, 0.88, 0.887] |" + "| trace | 62.016 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| trace+otel | 58.193 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| otel | 60.922 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| trace+sysmon | 60.176 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| trace+otel+sysmon | 61.539 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| otel+sysmon | 59.493 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| sysmon | 57.760 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |" ], "text/plain": [ "" @@ -56,14 +56,14 @@ "data": { "text/markdown": [ "## trace\n", - "- Runtime: `71.749s`\n", - "- Baseline score: `0.752`\n", - "- Best score: `0.928`\n", - "- Score gain: `0.176`\n", - "- Best iteration: `4`\n", - "- Post-update stability std: `0.003`\n", - "- Score history: `[0.752, 0.805, 0.92, 0.898, 0.928, 0.923]`\n", - "- Best updates: `[]`\n", + "- Runtime: `62.016s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.035`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.000`\n", + "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", + "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", "```text\n", @@ -72,11 +72,10 @@ "```\n", "### Final answer\n", "```text\n", - "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", "\n", - "### Mechanism of CRISPR\n", - "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", - "**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the tar...\n", + "### **Mechanism: Molecular Scissors**\n", + "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", "```" ], "text/plain": [ @@ -117,68 +116,67 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", "\n", "\n", "\n", "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", "\n", "\n", "state->make_trace_case.planner_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e17ef0>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x75e91b87d520>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -188,13 +186,13 @@ "data": { "text/markdown": [ "## trace+otel\n", - "- Runtime: `76.288s`\n", - "- Baseline score: `0.822`\n", - "- Best score: `0.950`\n", - "- Score gain: `0.128`\n", - "- Best iteration: `2`\n", - "- Post-update stability std: `0.025`\n", - "- Score history: `[0.822, 0.715, 0.95, 0.937, 0.907, 0.885]`\n", + "- Runtime: `58.193s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.035`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.000`\n", + "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", @@ -204,10 +202,10 @@ "```\n", "### Final answer\n", "```text\n", - "CRISPR, or Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", "\n", - "### Mechanism of CRISPR\n", - "CRISPR functions through a combination of specific components: CRISPR sequences in bacterial genomes store segments of viral DNA, while the Cas9 protein acts as a molecular scissors that cuts DNA. The process begins with guide RNA, which is designed to match a specific DNA sequence, directin...\n", + "### **Mechanism: Molecular Scissors**\n", + "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", "```" ], "text/plain": [ @@ -248,68 +246,67 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", "\n", "\n", "\n", "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", "\n", "\n", "state->make_trace_case.planner_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e6f620>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x75e91b8f47d0>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -319,17 +316,14 @@ "data": { "text/markdown": [ "### observer otel\n", - "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", - "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", - "- Span count: `2`\n", - "- Span names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Semantic message names: `[]`\n", + "- All message names: `[]`\n", + "- Parameter names: `[]`\n", + "- Span count: `0`\n", + "- Span names: `[]`\n", "\n", "```json\n", - "{\n", - " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", - "}\n", + "{}\n", "```" ], "text/plain": [ @@ -348,22 +342,15 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "None\n", - "\n", - "make_trace_case.synth_node\n", - "[msg]\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -373,13 +360,13 @@ "data": { "text/markdown": [ "## otel\n", - "- Runtime: `80.465s`\n", - "- Baseline score: `0.669`\n", - "- Best score: `0.950`\n", - "- Score gain: `0.281`\n", - "- Best iteration: `4`\n", - "- Post-update stability std: `0.014`\n", - "- Score history: `[0.669, 0.842, 0.92, 0.907, 0.95, 0.922]`\n", + "- Runtime: `60.922s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.035`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.000`\n", + "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", @@ -389,11 +376,10 @@ "```\n", "### Final answer\n", "```text\n", - "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", "\n", - "### Mechanism of CRISPR\n", - "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", - "**Process**: The CRISPR system works by first using the gRNA to locate the target DNA sequence. Once boun...\n", + "### **Mechanism: Molecular Scissors**\n", + "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", "```" ], "text/plain": [ @@ -451,7 +437,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -461,14 +447,14 @@ "data": { "text/markdown": [ "## trace+sysmon\n", - "- Runtime: `78.316s`\n", - "- Baseline score: `0.732`\n", - "- Best score: `0.923`\n", - "- Score gain: `0.192`\n", - "- Best iteration: `4`\n", - "- Post-update stability std: `0.022`\n", - "- Score history: `[0.732, 0.714, 0.92, 0.92, 0.923, 0.88]`\n", - "- Best updates: `[]`\n", + "- Runtime: `60.176s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.035`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.000`\n", + "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", + "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", "```text\n", @@ -477,10 +463,10 @@ "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows for precise modifications to DNA in living organisms.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", "\n", - "### Mechanism of CRISPR\n", - "CRISPR consists of two main components: CRISPR sequences, which serve as a genetic memory of past viral infections, and the Cas9 enzyme, which acts as molecular scissors to cut DNA. The process begins with the guide RNA, which is designed to match a specific DNA sequence, directing the Cas9 enzyme to the target site for cleavage, leading to DNA repair me...\n", + "### **Mechanism: Molecular Scissors**\n", + "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", "```" ], "text/plain": [ @@ -521,68 +507,67 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", "\n", "\n", "\n", "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", "\n", "\n", "state->make_trace_case.planner_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e14a10>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af2d820>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -620,10 +605,10 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "param:planner_prompt\n", @@ -640,28 +625,27 @@ "[parameter]\n", "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:e4b0ccde1af94263\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "msg:90fe038fb3a74237\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:363310e6046f4844\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x747a19e14a10>}\n", + "msg:7792dfbd56784d38\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af2d820>}\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -671,13 +655,13 @@ "data": { "text/markdown": [ "## trace+otel+sysmon\n", - "- Runtime: `77.963s`\n", - "- Baseline score: `0.714`\n", - "- Best score: `0.923`\n", - "- Score gain: `0.210`\n", + "- Runtime: `61.539s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.035`\n", "- Best iteration: `3`\n", - "- Post-update stability std: `0.018`\n", - "- Score history: `[0.714, 0.712, 0.92, 0.923, 0.88, 0.898]`\n", + "- Post-update stability std: `0.000`\n", + "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", @@ -687,11 +671,10 @@ "```\n", "### Final answer\n", "```text\n", - "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", "\n", - "### Mechanism of CRISPR\n", - "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", - "**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the tar...\n", + "### **Mechanism: Molecular Scissors**\n", + "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", "```" ], "text/plain": [ @@ -732,68 +715,67 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", "\n", "\n", "\n", "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", "\n", "\n", "state->make_trace_case.planner_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x747a19efc050>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af89a90>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -831,10 +813,10 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "param:planner_prompt\n", @@ -851,28 +833,27 @@ "[parameter]\n", "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:f8b47aac286c4268\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "msg:9f1628e1414b4abb\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:03dd0fe79ab14207\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x747a19efc050>}\n", + "msg:d2d5d39b4642462c\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af89a90>}\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -882,17 +863,14 @@ "data": { "text/markdown": [ "### observer otel\n", - "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", - "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", - "- Span count: `2`\n", - "- Span names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- Semantic message names: `[]`\n", + "- All message names: `[]`\n", + "- Parameter names: `[]`\n", + "- Span count: `0`\n", + "- Span names: `[]`\n", "\n", "```json\n", - "{\n", - " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", - "}\n", + "{}\n", "```" ], "text/plain": [ @@ -911,22 +889,15 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "None\n", - "\n", - "make_trace_case.synth_node\n", - "[msg]\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -936,13 +907,13 @@ "data": { "text/markdown": [ "## otel+sysmon\n", - "- Runtime: `93.479s`\n", + "- Runtime: `59.493s`\n", "- Baseline score: `0.842`\n", - "- Best score: `0.950`\n", - "- Score gain: `0.108`\n", - "- Best iteration: `2`\n", - "- Post-update stability std: `0.023`\n", - "- Score history: `[0.842, 0.715, 0.95, 0.923, 0.923, 0.887]`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.035`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.000`\n", + "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", @@ -952,11 +923,10 @@ "```\n", "### Final answer\n", "```text\n", - "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", "\n", - "### Mechanism of CRISPR\n", - "**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", - "**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the tar...\n", + "### **Mechanism: Molecular Scissors**\n", + "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", "```" ], "text/plain": [ @@ -1014,7 +984,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1052,48 +1022,47 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "param:planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", "param:synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:16912ef8e4a24c73\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "msg:0775e1673ce14ee8\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:f3668974d5434f72\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic R...\n", + "msg:cde03647185a4166\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allo...\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1103,13 +1072,13 @@ "data": { "text/markdown": [ "## sysmon\n", - "- Runtime: `77.312s`\n", - "- Baseline score: `0.822`\n", - "- Best score: `0.944`\n", - "- Score gain: `0.122`\n", - "- Best iteration: `2`\n", - "- Post-update stability std: `0.026`\n", - "- Score history: `[0.822, 0.669, 0.944, 0.923, 0.88, 0.887]`\n", + "- Runtime: `57.760s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.035`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.000`\n", + "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", @@ -1119,11 +1088,10 @@ "```\n", "### Final answer\n", "```text\n", - "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", "\n", - "### Mechanism of CRISPR\n", - "**Components**: CRISPR technology primarily consists of the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \n", - "**Process**: The CRISPR process involves the gRNA binding to the target DNA sequence, the Cas9 enzyme ...\n", + "### **Mechanism: Molecular Scissors**\n", + "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", "```" ], "text/plain": [ @@ -1165,48 +1133,47 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "param:planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", "param:synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:259d9c51cfc54755\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\n", - "####...\n", + "msg:fca7538c517441c8\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:11a94c306b234ca1\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': "CRISPR (Clustered Regularly Interspaced Short Palindromic R...\n", + "msg:80b89e47995d4494\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allo...\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1216,17 +1183,17 @@ "data": { "text/plain": [ "[{'config': 'trace',\n", - " 'runtime_s': 71.749,\n", - " 'baseline_score': 0.752,\n", - " 'best_score': 0.928,\n", - " 'score_gain': 0.176,\n", - " 'best_iteration': 4,\n", - " 'score_history': [0.752, 0.805, 0.92, 0.898, 0.928, 0.923],\n", - " 'stability_std': 0.003,\n", - " 'best_updates': {},\n", + " 'runtime_s': 62.016,\n", + " 'baseline_score': 0.842,\n", + " 'best_score': 0.876,\n", + " 'score_gain': 0.035,\n", + " 'best_iteration': 3,\n", + " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'stability_std': 0.0,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the target DNA sequence. The Cas9 enzyme then cuts the DNA at this location, leading to a double-strand break. The cell's natural repair mechanisms can then be harnessed to introduce desired changes, either by inserting new genetic material or by knocking out genes.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like sickle cell anemia and cystic fibrosis, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: In agriculture\",\n", - " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", " 'observers': [],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1248,7 +1215,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications'}}},\n", + " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1256,7 +1223,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1277,17 +1244,17 @@ " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}}]},\n", " {'config': 'trace+otel',\n", - " 'runtime_s': 76.288,\n", - " 'baseline_score': 0.822,\n", - " 'best_score': 0.95,\n", - " 'score_gain': 0.128,\n", - " 'best_iteration': 2,\n", - " 'score_history': [0.822, 0.715, 0.95, 0.937, 0.907, 0.885],\n", - " 'stability_std': 0.025,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add two short titled sections with concrete details: {query}\\nPlan: {plan}'},\n", + " 'runtime_s': 58.193,\n", + " 'baseline_score': 0.842,\n", + " 'best_score': 0.876,\n", + " 'score_gain': 0.035,\n", + " 'best_iteration': 3,\n", + " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'stability_std': 0.0,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR, or Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\nCRISPR functions through a combination of specific components: CRISPR sequences in bacterial genomes store segments of viral DNA, while the Cas9 protein acts as a molecular scissors that cuts DNA. The process begins with guide RNA, which is designed to match a specific DNA sequence, directing Cas9 to the target site for cleavage, leading to DNA breaks that can be repaired through cellular mechanisms, allowing for gene editing.\\n\\n### Applications of CRISPR\\nCRISPR has a wide range of applications, including in medicine for gene therapy to treat genetic disorders and cancer, in agriculture for developing crops with improved traits such as pest resistance, and in industrial settings for biomanufacturing processes that enhance efficiency and sustainability.\\n\\n### Caveats and Ethical Considerations\\nWhile CRISPR holds great promise, there are important caveats, including potential off-target effects where unintended parts of the',\n", - " 'answer_preview': 'CRISPR, or Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organism...',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", " 'observers': ['otel'],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1309,7 +1276,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\n - Brief history: Discovery in bacteria and adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components:\\n - CRISPR sequences: How they store viral DNA.\\n - Cas9 protein: The role of the enzyme in cutting DNA.\\n - Process:\\n - Guide RNA: How it directs Cas9 to the target DNA sequence.\\n - DNA cleavage: The mechanism of cutting and the subsequent repair processes.\\n\\n3. **Applications of CRISPR**\\n - Medical: Gene therapy, potential cures for genetic disorders, cancer research.\\n - Agricultural: Crop improvement, pest resistance, and sustainable farming practices.\\n - Industrial: Biomanufacturing and bioengineering.\\n\\n'}}},\n", + " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1317,7 +1284,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1341,93 +1308,41 @@ " 'origin': 'observer',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'trace+otel', 'service': 'trace+otel'},\n", - " 'otel_meta': {'trace_id': 'e084ed906c9eb65d3e02bdce213a13d5'},\n", - " 'nodes': {'trace+otel:param_planner_prompt': {'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'data': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': 'eb31066d3502e810'}}},\n", - " 'trace+otel:make_trace_case.planner_node:25': {'kind': 'msg',\n", - " 'name': 'make_trace_case.planner_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?'}\",\n", - " 'index': 'lit:query',\n", - " 'state': \"lit:{'query': 'What is CRISPR?'}\",\n", - " 'str:75': 'lit:query',\n", - " 'getitem:75': 'langgraph-agent-otel-observer:getitem:75',\n", - " 'state25_copy:0': \"lit:{'query': 'What is CRISPR?'}\",\n", - " 'planner_prompt:1': 'lit:Create a short plan for: {query}',\n", - " 'param_planner_prompt': 'trace+otel:param_planner_prompt'},\n", - " 'data': {'message_id': 'make_trace_case.planner_node:25'},\n", - " 'info': {'otel': {'trace_id': '9efac139be15dd6c8fd007efaa723dd4',\n", - " 'span_id': 'eb31066d3502e810',\n", - " 'parent_span_id': '',\n", - " 'service': 'trace+otel',\n", - " 'temporal_ignore': False}}},\n", - " 'trace+otel:param_synth_prompt': {'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': 'c9bb4019e1243e7f'}}},\n", - " 'trace+otel:make_trace_case.synth_node:25': {'kind': 'msg',\n", - " 'name': 'make_trace_case.synth_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\\\n - Brief history: Discovery in bacteria and adaptation for genetic engineering.\\\\n\\\\n2. **Mechanism of CRISPR**\\\\n - Componen…\",\n", - " 'index': 'lit:plan',\n", - " 'state': 'langgraph-agent-otel-observer:make_trace_case.planner_node:25',\n", - " 'str:76': 'lit:query',\n", - " 'make_trace_case.planner_node25_copy:0': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\\\n - Brief history: Discovery in bacteria and adaptation for genetic engineering.\\\\n\\\\n2. **Mechanism of CRISPR**\\\\n - Componen…\",\n", - " 'str:77': 'lit:plan',\n", - " 'getitem:76': 'langgraph-agent-otel-observer:getitem:76',\n", - " 'synth_prompt:1': 'lit:Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'getitem:77': 'langgraph-agent-otel-observer:getitem:77',\n", - " 'parent': 'trace+otel:make_trace_case.planner_node:25',\n", - " 'param_synth_prompt': 'trace+otel:param_synth_prompt'},\n", - " 'data': {'message_id': 'make_trace_case.synth_node:25'},\n", - " 'info': {'otel': {'trace_id': 'e084ed906c9eb65d3e02bdce213a13d5',\n", - " 'span_id': 'c9bb4019e1243e7f',\n", - " 'parent_span_id': 'eb31066d3502e810',\n", - " 'service': 'trace+otel',\n", - " 'temporal_ignore': False}}}},\n", + " 'otel_meta': {'trace_id': None},\n", + " 'nodes': {},\n", " 'context': {}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['make_trace_case.planner_node',\n", - " 'make_trace_case.synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'span_count': 2,\n", - " 'span_names': ['make_trace_case.planner_node',\n", - " 'make_trace_case.synth_node'],\n", - " 'param_keys': ['param.planner_prompt',\n", - " 'param.planner_prompt.trainable',\n", - " 'param.synth_prompt',\n", - " 'param.synth_prompt.trainable']}}]},\n", + " 'summary': {'node_count': 0,\n", + " 'message_names': [],\n", + " 'semantic_messages': [],\n", + " 'param_names': [],\n", + " 'param_values': {},\n", + " 'span_count': 0,\n", + " 'span_names': [],\n", + " 'param_keys': []}}]},\n", " {'config': 'otel',\n", - " 'runtime_s': 80.465,\n", - " 'baseline_score': 0.669,\n", - " 'best_score': 0.95,\n", - " 'score_gain': 0.281,\n", - " 'best_iteration': 4,\n", - " 'score_history': [0.669, 0.842, 0.92, 0.907, 0.95, 0.922],\n", - " 'stability_std': 0.014,\n", + " 'runtime_s': 60.922,\n", + " 'baseline_score': 0.842,\n", + " 'best_score': 0.876,\n", + " 'score_gain': 0.035,\n", + " 'best_iteration': 3,\n", + " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'stability_std': 0.0,\n", " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first using the gRNA to locate the target DNA sequence. Once bound, the Cas9 enzyme makes a double-strand break in the DNA. The cell's natural repair mechanisms then kick in, allowing for either the insertion of new genetic material or the deletion of existing sequences.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like cystic fibrosis and sickle cell anemia, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: In agriculture, CRISPR is used to create crops that are\",\n", - " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", " 'observers': [],\n", " 'views': [{'carrier': 'otel',\n", " 'origin': 'backend',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'otel', 'service': 'otel'},\n", - " 'otel_meta': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d'},\n", + " 'otel_meta': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b'},\n", " 'nodes': {'otel:param_planner_prompt': {'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", " 'data': 'Create a short plan for: {query}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '88723fd007309194'}}},\n", - " 'otel:88723fd007309194': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': 'bba757639cf0965d'}}},\n", + " 'otel:bba757639cf0965d': {'kind': 'msg',\n", " 'name': 'planner_node',\n", " 'op': 'llm_call',\n", " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", @@ -1435,49 +1350,49 @@ " 'query': 'otel:What is CRISPR?',\n", " 'param_planner_prompt': 'otel:param_planner_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", - " 'span_id': '88723fd007309194',\n", + " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", + " 'span_id': 'bba757639cf0965d',\n", " 'parent_span_id': None,\n", " 'service': 'otel',\n", " 'temporal_ignore': False}}},\n", - " 'otel:6fde46abcebec3c1': {'kind': 'msg',\n", + " 'otel:e5b8790271769b15': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:88723fd007309194'},\n", + " 'inputs': {'parent': 'otel:bba757639cf0965d'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", - " 'span_id': '6fde46abcebec3c1',\n", - " 'parent_span_id': '88723fd007309194',\n", + " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", + " 'span_id': 'e5b8790271769b15',\n", + " 'parent_span_id': 'bba757639cf0965d',\n", " 'service': 'otel',\n", " 'temporal_ignore': True}}},\n", " 'otel:param_synth_prompt': {'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': 'd8b85ff3c280e8d1'}}},\n", - " 'otel:d8b85ff3c280e8d1': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '7352e0f1c2c425ab'}}},\n", + " 'otel:7352e0f1c2c425ab': {'kind': 'msg',\n", " 'name': 'synth_node',\n", " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: ### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", + " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", " 'user_query': 'otel:What is CRISPR?',\n", " 'query': 'otel:What is CRISPR?',\n", - " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", - " 'parent': 'otel:88723fd007309194',\n", + " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", + " 'parent': 'otel:bba757639cf0965d',\n", " 'param_synth_prompt': 'otel:param_synth_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", - " 'span_id': 'd8b85ff3c280e8d1',\n", - " 'parent_span_id': '88723fd007309194',\n", + " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", + " 'span_id': '7352e0f1c2c425ab',\n", + " 'parent_span_id': 'bba757639cf0965d',\n", " 'service': 'otel',\n", " 'temporal_ignore': False}}},\n", - " 'otel:b07a737c463ca52f': {'kind': 'msg',\n", + " 'otel:7e1949f963de0140': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:d8b85ff3c280e8d1'},\n", + " 'inputs': {'parent': 'otel:7352e0f1c2c425ab'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': 'f8b383c2cc0ce3915b61ae951e18c79d',\n", - " 'span_id': 'b07a737c463ca52f',\n", - " 'parent_span_id': 'd8b85ff3c280e8d1',\n", + " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", + " 'span_id': '7e1949f963de0140',\n", + " 'parent_span_id': '7352e0f1c2c425ab',\n", " 'service': 'otel',\n", " 'temporal_ignore': True}}}},\n", " 'context': {}},\n", @@ -1501,17 +1416,17 @@ " 'param.synth_prompt',\n", " 'param.synth_prompt.trainable']}}]},\n", " {'config': 'trace+sysmon',\n", - " 'runtime_s': 78.316,\n", - " 'baseline_score': 0.732,\n", - " 'best_score': 0.923,\n", - " 'score_gain': 0.192,\n", - " 'best_iteration': 4,\n", - " 'score_history': [0.732, 0.714, 0.92, 0.92, 0.923, 0.88],\n", - " 'stability_std': 0.022,\n", - " 'best_updates': {},\n", + " 'runtime_s': 60.176,\n", + " 'baseline_score': 0.842,\n", + " 'best_score': 0.876,\n", + " 'score_gain': 0.035,\n", + " 'best_iteration': 3,\n", + " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'stability_std': 0.0,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows for precise modifications to DNA in living organisms.\\n\\n### Mechanism of CRISPR\\nCRISPR consists of two main components: CRISPR sequences, which serve as a genetic memory of past viral infections, and the Cas9 enzyme, which acts as molecular scissors to cut DNA. The process begins with the guide RNA, which is designed to match a specific DNA sequence, directing the Cas9 enzyme to the target site for cleavage, leading to DNA repair mechanisms that can introduce desired genetic changes.\\n\\n### Applications of CRISPR\\nCRISPR has a wide range of applications, including in medicine for gene therapy to treat genetic disorders and cancer, in agriculture for developing crops with improved traits such as pest resistance and enhanced nutritional value, and in industrial settings for biomanufacturing processes that require precise genetic modifications.\\n\\n### Ethical Considerations\\nThe use of CRISPR raises significant ethical concerns, particularly regarding potential unintended consequences of gene editing, the implications of germline modifications that can be inherited, and the need for regulations to prevent misuse in areas',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows for precise modifications to DNA in living organisms.\\n\\n### Mechanism of CRISPR\\nCRISPR consists of two main componen...',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", " 'observers': ['sysmon'],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1533,7 +1448,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) as a revolutionary gene-editing technology.\\n - Brief history: Discovery and development timeline.\\n\\n2. **Mechanism of CRISPR**\\n - Components:\\n - CRISPR sequences: Explanation of how they function as a genetic memory.\\n - Cas9 enzyme: Role in cutting DNA.\\n - Process:\\n - Guide RNA: How it directs Cas9 to the target DNA.\\n - DNA cleavage: Mechanism of action and repair processes.\\n\\n3. **Applications of CRISPR**\\n - Medical: Gene therapy, treatment of genetic disorders, cancer research.\\n - Agricultural: Crop improvement, pest resistance, and sustainability.\\n - Industrial: Biomanufacturing and bioengineering.\\n\\n4. **Ethical Considerations**\\n -'}}},\n", + " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1541,7 +1456,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1580,24 +1495,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:e4b0ccde1af94263': {'id': 'msg:e4b0ccde1af94263',\n", + " 'msg:90fe038fb3a74237': {'id': 'msg:90fe038fb3a74237',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:496',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:568',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", - " 'info': {'sysmon': {'duration_ns': 4389947208,\n", - " 'thread_id': 128068499416896}}},\n", - " 'msg:363310e6046f4844': {'id': 'msg:363310e6046f4844',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", + " 'info': {'sysmon': {'duration_ns': 2722412274,\n", + " 'thread_id': 129644846245696}}},\n", + " 'msg:7792dfbd56784d38': {'id': 'msg:7792dfbd56784d38',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:507',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:579',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2210177279,\n", - " 'thread_id': 128068499416896}}}}},\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2220630467,\n", + " 'thread_id': 129644846245696}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", @@ -1606,17 +1521,17 @@ " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'event_count': 2}}]},\n", " {'config': 'trace+otel+sysmon',\n", - " 'runtime_s': 77.963,\n", - " 'baseline_score': 0.714,\n", - " 'best_score': 0.923,\n", - " 'score_gain': 0.21,\n", + " 'runtime_s': 61.539,\n", + " 'baseline_score': 0.842,\n", + " 'best_score': 0.876,\n", + " 'score_gain': 0.035,\n", " 'best_iteration': 3,\n", - " 'score_history': [0.714, 0.712, 0.92, 0.923, 0.88, 0.898],\n", - " 'stability_std': 0.018,\n", + " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'stability_std': 0.0,\n", " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the target DNA sequence. The Cas9 enzyme then cuts the DNA at this location, allowing for either the disruption of the gene or the insertion of new genetic material during the repair process.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like cystic fibrosis and sickle cell anemia, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: It is used to create genetically modified organisms (GMOs) that are resistant to diseases,',\n", - " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", " 'observers': ['sysmon', 'otel'],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1638,7 +1553,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain its role in developing disease-resistant crops and improving food security.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications of'}}},\n", + " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1646,7 +1561,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1685,24 +1600,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:f8b47aac286c4268': {'id': 'msg:f8b47aac286c4268',\n", + " 'msg:9f1628e1414b4abb': {'id': 'msg:9f1628e1414b4abb',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:496',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:568',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", - " 'info': {'sysmon': {'duration_ns': 2640491933,\n", - " 'thread_id': 128068499416896}}},\n", - " 'msg:03dd0fe79ab14207': {'id': 'msg:03dd0fe79ab14207',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", + " 'info': {'sysmon': {'duration_ns': 2666017980,\n", + " 'thread_id': 129644846245696}}},\n", + " 'msg:d2d5d39b4642462c': {'id': 'msg:d2d5d39b4642462c',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:507',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:579',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2981741181,\n", - " 'thread_id': 128068499416896}}}}},\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2492562121,\n", + " 'thread_id': 129644846245696}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", @@ -1714,93 +1629,41 @@ " 'origin': 'observer',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'trace+otel+sysmon', 'service': 'trace+otel+sysmon'},\n", - " 'otel_meta': {'trace_id': '7de6ce7664146c258b9e14edf163903d'},\n", - " 'nodes': {'trace+otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'data': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '135a9e1427adf34d'}}},\n", - " 'trace+otel+sysmon:make_trace_case.planner_node:51': {'kind': 'msg',\n", - " 'name': 'make_trace_case.planner_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?'}\",\n", - " 'index': 'lit:query',\n", - " 'state': \"lit:{'query': 'What is CRISPR?'}\",\n", - " 'state51_copy:0': \"lit:{'query': 'What is CRISPR?'}\",\n", - " 'str:153': 'lit:query',\n", - " 'planner_prompt:3': 'lit:Create a short plan for: {query}',\n", - " 'getitem:153': 'langgraph-agent-otel-observer:getitem:153',\n", - " 'param_planner_prompt': 'trace+otel+sysmon:param_planner_prompt'},\n", - " 'data': {'message_id': 'make_trace_case.planner_node:51'},\n", - " 'info': {'otel': {'trace_id': 'c6e4d39130951e78f594615b3d418776',\n", - " 'span_id': '135a9e1427adf34d',\n", - " 'parent_span_id': '',\n", - " 'service': 'trace+otel+sysmon',\n", - " 'temporal_ignore': False}}},\n", - " 'trace+otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '759cde4e161d78f2'}}},\n", - " 'trace+otel+sysmon:make_trace_case.synth_node:51': {'kind': 'msg',\n", - " 'name': 'make_trace_case.synth_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'x': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\\\n\\\\n2.…\",\n", - " 'index': 'lit:plan',\n", - " 'state': 'langgraph-agent-otel-observer:make_trace_case.planner_node:51',\n", - " 'getitem:155': 'langgraph-agent-otel-observer:getitem:155',\n", - " 'synth_prompt:3': 'lit:Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'make_trace_case.planner_node51_copy:0': \"lit:{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n#### Outline:\\\\n\\\\n1. **Introduction to CRISPR**\\\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\\\n - Historical Context: Brief overview of its discovery in bacteria and its adaptation for genetic engineering.\\\\n\\\\n2.…\",\n", - " 'str:155': 'lit:plan',\n", - " 'str:154': 'lit:query',\n", - " 'getitem:154': 'langgraph-agent-otel-observer:getitem:154',\n", - " 'parent': 'trace+otel+sysmon:make_trace_case.planner_node:51',\n", - " 'param_synth_prompt': 'trace+otel+sysmon:param_synth_prompt'},\n", - " 'data': {'message_id': 'make_trace_case.synth_node:51'},\n", - " 'info': {'otel': {'trace_id': '7de6ce7664146c258b9e14edf163903d',\n", - " 'span_id': '759cde4e161d78f2',\n", - " 'parent_span_id': '135a9e1427adf34d',\n", - " 'service': 'trace+otel+sysmon',\n", - " 'temporal_ignore': False}}}},\n", + " 'otel_meta': {'trace_id': None},\n", + " 'nodes': {},\n", " 'context': {}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['make_trace_case.planner_node',\n", - " 'make_trace_case.synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'span_count': 2,\n", - " 'span_names': ['make_trace_case.planner_node',\n", - " 'make_trace_case.synth_node'],\n", - " 'param_keys': ['param.planner_prompt',\n", - " 'param.planner_prompt.trainable',\n", - " 'param.synth_prompt',\n", - " 'param.synth_prompt.trainable']}}]},\n", + " 'summary': {'node_count': 0,\n", + " 'message_names': [],\n", + " 'semantic_messages': [],\n", + " 'param_names': [],\n", + " 'param_values': {},\n", + " 'span_count': 0,\n", + " 'span_names': [],\n", + " 'param_keys': []}}]},\n", " {'config': 'otel+sysmon',\n", - " 'runtime_s': 93.479,\n", + " 'runtime_s': 59.493,\n", " 'baseline_score': 0.842,\n", - " 'best_score': 0.95,\n", - " 'score_gain': 0.108,\n", - " 'best_iteration': 2,\n", - " 'score_history': [0.842, 0.715, 0.95, 0.923, 0.923, 0.887],\n", - " 'stability_std': 0.023,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add two short titled sections with concrete details: {query}\\nPlan: {plan}'},\n", + " 'best_score': 0.876,\n", + " 'score_gain': 0.035,\n", + " 'best_iteration': 3,\n", + " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'stability_std': 0.0,\n", + " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily involves the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR system works by first introducing the gRNA into a cell, where it binds to the target DNA sequence. The Cas9 enzyme then cuts the DNA at this location, allowing for either the disruption of the gene or the insertion of new genetic material during the repair process.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like sickle cell anemia and cystic fibrosis, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: In agriculture, CRISPR is used to create crops that are more resistant to pests,',\n", - " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", " 'observers': ['sysmon'],\n", " 'views': [{'carrier': 'otel',\n", " 'origin': 'backend',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'otel+sysmon', 'service': 'otel+sysmon'},\n", - " 'otel_meta': {'trace_id': '82b3197cb086218de00a317fce630480'},\n", + " 'otel_meta': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a'},\n", " 'nodes': {'otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", " 'data': 'Create a short plan for: {query}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '37486a6f8ae2a237'}}},\n", - " 'otel+sysmon:37486a6f8ae2a237': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '511b43fa507d20c0'}}},\n", + " 'otel+sysmon:511b43fa507d20c0': {'kind': 'msg',\n", " 'name': 'planner_node',\n", " 'op': 'llm_call',\n", " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", @@ -1808,49 +1671,49 @@ " 'query': 'otel+sysmon:What is CRISPR?',\n", " 'param_planner_prompt': 'otel+sysmon:param_planner_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", - " 'span_id': '37486a6f8ae2a237',\n", + " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", + " 'span_id': '511b43fa507d20c0',\n", " 'parent_span_id': None,\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': False}}},\n", - " 'otel+sysmon:1888c14cd5a95e1d': {'kind': 'msg',\n", + " 'otel+sysmon:159219852aad1a53': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:37486a6f8ae2a237'},\n", + " 'inputs': {'parent': 'otel+sysmon:511b43fa507d20c0'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", - " 'span_id': '1888c14cd5a95e1d',\n", - " 'parent_span_id': '37486a6f8ae2a237',\n", + " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", + " 'span_id': '159219852aad1a53',\n", + " 'parent_span_id': '511b43fa507d20c0',\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': True}}},\n", " 'otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '6b579b7918ca7492'}}},\n", - " 'otel+sysmon:6b579b7918ca7492': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': 'b52a69c8b081de0f'}}},\n", + " 'otel+sysmon:b52a69c8b081de0f': {'kind': 'msg',\n", " 'name': 'synth_node',\n", " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: ### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Briefly discuss its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", + " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", " 'user_query': 'otel+sysmon:What is CRISPR?',\n", " 'query': 'otel+sysmon:What is CRISPR?',\n", - " 'plan': '### Short Plan: What is CRISPR?\\n\\n#### Objective:\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\n\\n#### Outline:\\n\\n1. **Introduction to CRISPR**\\n - Definition: Explain CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a revolutionary gene-editing technology.\\n - Historical Context: Briefly discuss its discovery in bacteria and its adaptation for genetic engineering.\\n\\n2. **Mechanism of CRISPR**\\n - Components: Describe the key components (Cas9 enzyme, guide RNA).\\n - Process: Outline the steps of how CRISPR works (targeting DNA, cutting, and repairing).\\n\\n3. **Applications of CRISPR**\\n - Medical: Discuss potential uses in gene therapy, treatment of genetic disorders, and cancer research.\\n - Agricultural: Explain how CRISPR is used to enhance crop resilience and yield.\\n - Industrial: Mention applications in bioengineering and synthetic biology.\\n\\n4. **Ethical Considerations**\\n - Discuss the ethical implications',\n", - " 'parent': 'otel+sysmon:37486a6f8ae2a237',\n", + " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", + " 'parent': 'otel+sysmon:511b43fa507d20c0',\n", " 'param_synth_prompt': 'otel+sysmon:param_synth_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", - " 'span_id': '6b579b7918ca7492',\n", - " 'parent_span_id': '37486a6f8ae2a237',\n", + " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", + " 'span_id': 'b52a69c8b081de0f',\n", + " 'parent_span_id': '511b43fa507d20c0',\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': False}}},\n", - " 'otel+sysmon:fe2a22580e1b3dbd': {'kind': 'msg',\n", + " 'otel+sysmon:9a44a496f06003e2': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:6b579b7918ca7492'},\n", + " 'inputs': {'parent': 'otel+sysmon:b52a69c8b081de0f'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '82b3197cb086218de00a317fce630480',\n", - " 'span_id': 'fe2a22580e1b3dbd',\n", - " 'parent_span_id': '6b579b7918ca7492',\n", + " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", + " 'span_id': '9a44a496f06003e2',\n", + " 'parent_span_id': 'b52a69c8b081de0f',\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': True}}}},\n", " 'context': {}},\n", @@ -1892,24 +1755,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:16912ef8e4a24c73': {'id': 'msg:16912ef8e4a24c73',\n", + " 'msg:0775e1673ce14ee8': {'id': 'msg:0775e1673ce14ee8',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:468',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:540',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", - " 'info': {'sysmon': {'duration_ns': 2786715131,\n", - " 'thread_id': 128068499416896}}},\n", - " 'msg:f3668974d5434f72': {'id': 'msg:f3668974d5434f72',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", + " 'info': {'sysmon': {'duration_ns': 3024132044,\n", + " 'thread_id': 129644846245696}}},\n", + " 'msg:cde03647185a4166': {'id': 'msg:cde03647185a4166',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:472',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:544',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\\\n\\\\n##\"},\n", - " 'info': {'sysmon': {'duration_ns': 2238104071,\n", - " 'thread_id': 128068499416896}}}}},\n", + " 'value': \"{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\\\n\\\\n### **Mechanis\"},\n", + " 'info': {'sysmon': {'duration_ns': 2388306024,\n", + " 'thread_id': 129644846245696}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", @@ -1918,17 +1781,17 @@ " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'event_count': 2}}]},\n", " {'config': 'sysmon',\n", - " 'runtime_s': 77.312,\n", - " 'baseline_score': 0.822,\n", - " 'best_score': 0.944,\n", - " 'score_gain': 0.122,\n", - " 'best_iteration': 2,\n", - " 'score_history': [0.822, 0.669, 0.944, 0.923, 0.88, 0.887],\n", - " 'stability_std': 0.026,\n", + " 'runtime_s': 57.76,\n", + " 'baseline_score': 0.842,\n", + " 'best_score': 0.876,\n", + " 'score_gain': 0.035,\n", + " 'best_iteration': 3,\n", + " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'stability_std': 0.0,\n", " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n\\n### Mechanism of CRISPR\\n**Components**: CRISPR technology primarily consists of the Cas9 enzyme, which acts as molecular scissors, and a guide RNA (gRNA) that directs Cas9 to the specific DNA sequence to be edited. \\n**Process**: The CRISPR process involves the gRNA binding to the target DNA sequence, the Cas9 enzyme creating a double-strand break in the DNA, and the cell's natural repair mechanisms either introducing mutations or incorporating new genetic material during the repair process.\\n\\n### Applications of CRISPR\\n**Medical**: CRISPR has potential applications in gene therapy for genetic disorders like sickle cell anemia, as well as in cancer research to target and modify cancer cells. \\n**Agricultural**: It is used to create genetically modified organisms (GMOs) that are resistant to diseases, pests, and environmental stresses, thereby enhancing food security\",\n", - " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\n...',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", " 'observers': [],\n", " 'views': [{'carrier': 'sysmon',\n", " 'origin': 'backend',\n", @@ -1949,24 +1812,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:259d9c51cfc54755': {'id': 'msg:259d9c51cfc54755',\n", + " 'msg:fca7538c517441c8': {'id': 'msg:fca7538c517441c8',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:468',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:540',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': '### Short Plan: What is CRISPR?\\\\n\\\\n#### Objective:\\\\nTo provide a clear and concise understanding of CRISPR, its mechanisms, applications, and implications.\\\\n\\\\n###\"},\n", - " 'info': {'sysmon': {'duration_ns': 2861239637,\n", - " 'thread_id': 128068499416896}}},\n", - " 'msg:11a94c306b234ca1': {'id': 'msg:11a94c306b234ca1',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", + " 'info': {'sysmon': {'duration_ns': 2500216298,\n", + " 'thread_id': 129644846245696}}},\n", + " 'msg:80b89e47995d4494': {'id': 'msg:80b89e47995d4494',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:472',\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:544',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': '{\\'final_answer\\': \"CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that allows for precise modifications to DNA in various organisms.\\\\n\\\\n##'},\n", - " 'info': {'sysmon': {'duration_ns': 8902208450,\n", - " 'thread_id': 128068499416896}}}}},\n", + " 'value': \"{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\\\n\\\\n### **Mechanis\"},\n", + " 'info': {'sysmon': {'duration_ns': 2567241093,\n", + " 'thread_id': 129644846245696}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb index 2b8be32d..65857ec8 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb @@ -56,10 +56,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:20.692437Z", - "iopub.status.busy": "2026-04-19T09:32:20.691949Z", - "iopub.status.idle": "2026-04-19T09:32:22.337110Z", - "shell.execute_reply": "2026-04-19T09:32:22.335797Z" + "iopub.execute_input": "2026-04-24T14:43:17.980408Z", + "iopub.status.busy": "2026-04-24T14:43:17.980173Z", + "iopub.status.idle": "2026-04-24T14:43:19.093749Z", + "shell.execute_reply": "2026-04-24T14:43:19.092425Z" } }, "outputs": [ @@ -123,10 +123,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:22.380381Z", - "iopub.status.busy": "2026-04-19T09:32:22.379864Z", - "iopub.status.idle": "2026-04-19T09:32:22.388884Z", - "shell.execute_reply": "2026-04-19T09:32:22.387589Z" + "iopub.execute_input": "2026-04-24T14:43:19.143735Z", + "iopub.status.busy": "2026-04-24T14:43:19.143450Z", + "iopub.status.idle": "2026-04-24T14:43:19.150322Z", + "shell.execute_reply": "2026-04-24T14:43:19.148951Z" } }, "outputs": [ @@ -183,10 +183,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:22.392813Z", - "iopub.status.busy": "2026-04-19T09:32:22.392359Z", - "iopub.status.idle": "2026-04-19T09:32:22.400816Z", - "shell.execute_reply": "2026-04-19T09:32:22.399809Z" + "iopub.execute_input": "2026-04-24T14:43:19.153203Z", + "iopub.status.busy": "2026-04-24T14:43:19.152991Z", + "iopub.status.idle": "2026-04-24T14:43:19.160786Z", + "shell.execute_reply": "2026-04-24T14:43:19.159546Z" } }, "outputs": [ @@ -197,7 +197,7 @@ "[INFO] API key loaded from environment variable.\n", "\n", "API key: [SET]\n", - "Model: nvidia/nemotron-3-super-120b-a12b:free\n", + "Model: google/gemini-3-flash-preview\n", "Budget: max_tokens=256, temperature=0\n" ] } @@ -265,10 +265,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:22.404203Z", - "iopub.status.busy": "2026-04-19T09:32:22.403715Z", - "iopub.status.idle": "2026-04-19T09:32:22.948136Z", - "shell.execute_reply": "2026-04-19T09:32:22.947079Z" + "iopub.execute_input": "2026-04-24T14:43:19.163941Z", + "iopub.status.busy": "2026-04-24T14:43:19.163732Z", + "iopub.status.idle": "2026-04-24T14:43:19.690243Z", + "shell.execute_reply": "2026-04-24T14:43:19.688959Z" } }, "outputs": [ @@ -543,10 +543,10 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:22.951549Z", - "iopub.status.busy": "2026-04-19T09:32:22.951334Z", - "iopub.status.idle": "2026-04-19T09:32:22.963711Z", - "shell.execute_reply": "2026-04-19T09:32:22.962817Z" + "iopub.execute_input": "2026-04-24T14:43:19.692995Z", + "iopub.status.busy": "2026-04-24T14:43:19.692826Z", + "iopub.status.idle": "2026-04-24T14:43:19.705614Z", + "shell.execute_reply": "2026-04-24T14:43:19.704761Z" } }, "outputs": [ @@ -736,10 +736,10 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:22.967148Z", - "iopub.status.busy": "2026-04-19T09:32:22.966882Z", - "iopub.status.idle": "2026-04-19T09:32:25.472657Z", - "shell.execute_reply": "2026-04-19T09:32:25.471707Z" + "iopub.execute_input": "2026-04-24T14:43:19.708076Z", + "iopub.status.busy": "2026-04-24T14:43:19.707899Z", + "iopub.status.idle": "2026-04-24T14:43:21.737471Z", + "shell.execute_reply": "2026-04-24T14:43:21.736701Z" } }, "outputs": [ @@ -790,10 +790,10 @@ "execution_count": 7, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.475519Z", - "iopub.status.busy": "2026-04-19T09:32:25.475199Z", - "iopub.status.idle": "2026-04-19T09:32:25.487970Z", - "shell.execute_reply": "2026-04-19T09:32:25.487325Z" + "iopub.execute_input": "2026-04-24T14:43:21.740092Z", + "iopub.status.busy": "2026-04-24T14:43:21.739934Z", + "iopub.status.idle": "2026-04-24T14:43:21.750426Z", + "shell.execute_reply": "2026-04-24T14:43:21.749351Z" } }, "outputs": [ @@ -862,10 +862,10 @@ "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.490914Z", - "iopub.status.busy": "2026-04-19T09:32:25.490604Z", - "iopub.status.idle": "2026-04-19T09:32:25.497811Z", - "shell.execute_reply": "2026-04-19T09:32:25.497339Z" + "iopub.execute_input": "2026-04-24T14:43:21.752662Z", + "iopub.status.busy": "2026-04-24T14:43:21.752498Z", + "iopub.status.idle": "2026-04-24T14:43:21.759255Z", + "shell.execute_reply": "2026-04-24T14:43:21.758589Z" } }, "outputs": [ @@ -876,30 +876,30 @@ "Total spans captured: 8\n", "\n", "Unique trace IDs: 1 (D9: should be 1)\n", - "Root invocation span: QA_research_graph.invoke (id=a8226d00c58b...)\n", + "Root invocation span: QA_research_graph.invoke (id=c2b080669791...)\n", "\n", - " Span: llm.chat.completion parent=ad6064b6\n", + " Span: llm.chat.completion parent=e3ab9779\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"1\": {\"agent\": \"web_researcher\", \"action\": \"search\", \"goal\": \"collect context\",\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: planner parent=a8226d00\n", + " Span: planner parent=c2b08066\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Create a JSON plan for: What is reinforcement learning?. Use web_researcher and \n", " inputs.user_query = What is reinforcement learning?\n", " param.planner_prompt = Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wik\n", " param.planner_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=45b79393\n", + " Span: llm.chat.completion parent=9ff08be4\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"goto\": \"synthesizer\", \"query\": \"given step 1 of plan: {'agent': 'web_researche\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: executor parent=a8226d00\n", + " Span: executor parent=c2b08066\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Given step 1 of plan: {'agent': 'web_researcher', 'action': 'search', 'goal': 'c\n", " inputs.step = 1\n", @@ -907,7 +907,7 @@ " param.executor_prompt = Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,que\n", " param.executor_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=e64c876c\n", + " Span: llm.chat.completion parent=fef48edf\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = Stub answer for: answer: what is reinforcement learning?\n", "context:\n", @@ -917,7 +917,7 @@ " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: synthesizer parent=a8226d00\n", + " Span: synthesizer parent=c2b08066\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Answer: What is reinforcement learning?\n", "Context:\n", @@ -930,7 +930,7 @@ "If asked for IDs, include Wikidata QIDs.\n", " param.synthesizer_prompt.trainable = True\n", "\n", - " Span: evaluator parent=a8226d00\n", + " Span: evaluator parent=c2b08066\n", " eval.reasons = \n", " eval.score = 0.25\n", "\n", @@ -999,10 +999,10 @@ "execution_count": 9, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.500800Z", - "iopub.status.busy": "2026-04-19T09:32:25.500610Z", - "iopub.status.idle": "2026-04-19T09:32:25.512929Z", - "shell.execute_reply": "2026-04-19T09:32:25.512363Z" + "iopub.execute_input": "2026-04-24T14:43:21.761376Z", + "iopub.status.busy": "2026-04-24T14:43:21.761245Z", + "iopub.status.idle": "2026-04-24T14:43:21.773066Z", + "shell.execute_reply": "2026-04-24T14:43:21.772317Z" } }, "outputs": [ @@ -1021,12 +1021,12 @@ "[OK] No duplicate ParameterNodes (C7).\n", "\n", "MessageNode: 7\n", - " QA_research_graph/0/planner0 parents=['lit_31030', 'lit_70270', 'planner_prompt0']\n", + " QA_research_graph/0/planner0 parents=['lit_22500', 'lit_60470', 'planner_prompt0']\n", " QA_research_graph/0/llm.chat.completion0 parents=['planner0']\n", " QA_research_graph/0/llm.chat.completion2 parents=['synthesizer0']\n", - " QA_research_graph/0/executor0 parents=['lit_7850', 'lit_26280', 'lit_70271', 'planner0', 'executor_prompt0']\n", + " QA_research_graph/0/executor0 parents=['lit_78660', 'lit_57300', 'lit_60471', 'planner0', 'executor_prompt0']\n", " QA_research_graph/0/llm.chat.completion1 parents=['executor0']\n", - " QA_research_graph/0/synthesizer0 parents=['lit_39320', 'lit_70272', 'executor0', 'synthesizer_prompt0']\n", + " QA_research_graph/0/synthesizer0 parents=['lit_93330', 'lit_60472', 'executor0', 'synthesizer_prompt0']\n", " QA_research_graph/0/evaluator0 parents=['synthesizer0']\n", "[WARN] No top-level message nodes found.\n" ] @@ -1104,10 +1104,10 @@ "execution_count": 10, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.515225Z", - "iopub.status.busy": "2026-04-19T09:32:25.515034Z", - "iopub.status.idle": "2026-04-19T09:32:25.521268Z", - "shell.execute_reply": "2026-04-19T09:32:25.520614Z" + "iopub.execute_input": "2026-04-24T14:43:21.775107Z", + "iopub.status.busy": "2026-04-24T14:43:21.774972Z", + "iopub.status.idle": "2026-04-24T14:43:21.781210Z", + "shell.execute_reply": "2026-04-24T14:43:21.780433Z" } }, "outputs": [ @@ -1117,9 +1117,9 @@ "text": [ "Child LLM spans detected (via temporal_ignore): 3\n", "Top-level message nodes: 4\n", - " [OK] Node executor temporal parent → 88934f9b385d... (not a child span)\n", - " [OK] Node synthesizer temporal parent → 714455f5f80c... (not a child span)\n", - " [OK] Node evaluator temporal parent → 457d0101fbd2... (not a child span)\n", + " [OK] Node executor temporal parent → 7e199a6de5c0... (not a child span)\n", + " [OK] Node synthesizer temporal parent → ef0e7bec3426... (not a child span)\n", + " [OK] Node evaluator temporal parent → 6dc071e4b185... (not a child span)\n", "\n", "[OK] Temporal chaining verified — no top-level node points to child spans.\n" ] @@ -1179,10 +1179,10 @@ "execution_count": 11, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.523710Z", - "iopub.status.busy": "2026-04-19T09:32:25.523518Z", - "iopub.status.idle": "2026-04-19T09:32:25.527964Z", - "shell.execute_reply": "2026-04-19T09:32:25.527337Z" + "iopub.execute_input": "2026-04-24T14:43:21.783495Z", + "iopub.status.busy": "2026-04-24T14:43:21.783360Z", + "iopub.status.idle": "2026-04-24T14:43:21.787504Z", + "shell.execute_reply": "2026-04-24T14:43:21.786913Z" } }, "outputs": [ @@ -1237,10 +1237,10 @@ "execution_count": 12, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.529922Z", - "iopub.status.busy": "2026-04-19T09:32:25.529738Z", - "iopub.status.idle": "2026-04-19T09:32:25.537885Z", - "shell.execute_reply": "2026-04-19T09:32:25.536924Z" + "iopub.execute_input": "2026-04-24T14:43:21.789565Z", + "iopub.status.busy": "2026-04-24T14:43:21.789429Z", + "iopub.status.idle": "2026-04-24T14:43:21.797066Z", + "shell.execute_reply": "2026-04-24T14:43:21.796330Z" } }, "outputs": [ @@ -1278,10 +1278,10 @@ "execution_count": 13, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.540343Z", - "iopub.status.busy": "2026-04-19T09:32:25.540123Z", - "iopub.status.idle": "2026-04-19T09:32:25.544509Z", - "shell.execute_reply": "2026-04-19T09:32:25.543368Z" + "iopub.execute_input": "2026-04-24T14:43:21.799310Z", + "iopub.status.busy": "2026-04-24T14:43:21.799152Z", + "iopub.status.idle": "2026-04-24T14:43:21.802514Z", + "shell.execute_reply": "2026-04-24T14:43:21.801819Z" } }, "outputs": [ @@ -1327,10 +1327,10 @@ "execution_count": 14, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.547566Z", - "iopub.status.busy": "2026-04-19T09:32:25.547307Z", - "iopub.status.idle": "2026-04-19T09:32:25.555652Z", - "shell.execute_reply": "2026-04-19T09:32:25.555116Z" + "iopub.execute_input": "2026-04-24T14:43:21.804677Z", + "iopub.status.busy": "2026-04-24T14:43:21.804511Z", + "iopub.status.idle": "2026-04-24T14:43:21.812199Z", + "shell.execute_reply": "2026-04-24T14:43:21.811200Z" } }, "outputs": [ @@ -1401,10 +1401,10 @@ "execution_count": 15, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.558061Z", - "iopub.status.busy": "2026-04-19T09:32:25.557814Z", - "iopub.status.idle": "2026-04-19T09:32:25.596279Z", - "shell.execute_reply": "2026-04-19T09:32:25.595638Z" + "iopub.execute_input": "2026-04-24T14:43:21.815035Z", + "iopub.status.busy": "2026-04-24T14:43:21.814839Z", + "iopub.status.idle": "2026-04-24T14:43:21.850300Z", + "shell.execute_reply": "2026-04-24T14:43:21.849337Z" } }, "outputs": [ @@ -1496,10 +1496,10 @@ "execution_count": 16, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.598386Z", - "iopub.status.busy": "2026-04-19T09:32:25.598206Z", - "iopub.status.idle": "2026-04-19T09:32:25.605162Z", - "shell.execute_reply": "2026-04-19T09:32:25.603918Z" + "iopub.execute_input": "2026-04-24T14:43:21.852641Z", + "iopub.status.busy": "2026-04-24T14:43:21.852489Z", + "iopub.status.idle": "2026-04-24T14:43:21.858264Z", + "shell.execute_reply": "2026-04-24T14:43:21.857564Z" } }, "outputs": [ @@ -1578,10 +1578,10 @@ "execution_count": 17, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:25.608814Z", - "iopub.status.busy": "2026-04-19T09:32:25.608519Z", - "iopub.status.idle": "2026-04-19T09:32:28.134823Z", - "shell.execute_reply": "2026-04-19T09:32:28.133977Z" + "iopub.execute_input": "2026-04-24T14:43:21.860722Z", + "iopub.status.busy": "2026-04-24T14:43:21.860573Z", + "iopub.status.idle": "2026-04-24T14:43:23.716890Z", + "shell.execute_reply": "2026-04-24T14:43:23.715860Z" } }, "outputs": [ @@ -1589,10 +1589,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "[OK] Live LLM smoke test passed: 'The user asks: \"Say hello in one word'\n", + "[OK] Live LLM smoke test passed: 'Hello.'\n", "\n", "Live LLM ready (openai client -> https://openrouter.ai/api/v1)\n", - " model: nvidia/nemotron-3-super-120b-a12b:free\n" + " model: google/gemini-3-flash-preview\n" ] } ], @@ -1704,10 +1704,10 @@ "execution_count": 18, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:32:28.138289Z", - "iopub.status.busy": "2026-04-19T09:32:28.138020Z", - "iopub.status.idle": "2026-04-19T09:33:00.679220Z", - "shell.execute_reply": "2026-04-19T09:33:00.677605Z" + "iopub.execute_input": "2026-04-24T14:43:23.720264Z", + "iopub.status.busy": "2026-04-24T14:43:23.720050Z", + "iopub.status.idle": "2026-04-24T14:43:36.283412Z", + "shell.execute_reply": "2026-04-24T14:43:36.281640Z" } }, "outputs": [ @@ -1725,15 +1725,12 @@ "output_type": "stream", "text": [ "\n", - "Live answer (1279 chars):\n", - " Gradient descent is an optimization algorithm used to minimize a loss (or cost) function by iteratively moving the model’s parameters in the direction of steepest decrease of the function. \n", + "Live answer (1671 chars):\n", + " **Gradient descent** (Wikidata ID: **Q1189191**) is an iterative first-order optimization algorithm used to find the local minimum of a differentiable function. It is most commonly used in machine learning and deep learning to minimize a **loss function** by updating the parameters (weights) of a mo\n", "\n", - "At each step, the parameters θ are updated as \n", - "\n", - "\\[\n", - "\\theta \\leftarrow \\theta - \\eta \\,\\nabla_\\theta L(\\theta\n", - "\n", - "Spans: 11 trace_ids=1 root_invoke=True\n", + "Spans: 17 trace_ids=1 root_invoke=True\n", + " gen_ai.provider.name = openrouter\n", + " gen_ai.provider.name = openrouter\n", " gen_ai.provider.name = openrouter\n", " gen_ai.provider.name = openrouter\n", " gen_ai.provider.name = openrouter\n", @@ -1813,10 +1810,10 @@ "execution_count": 19, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:33:00.682729Z", - "iopub.status.busy": "2026-04-19T09:33:00.682396Z", - "iopub.status.idle": "2026-04-19T09:37:56.539057Z", - "shell.execute_reply": "2026-04-19T09:37:56.538170Z" + "iopub.execute_input": "2026-04-24T14:43:36.287008Z", + "iopub.status.busy": "2026-04-24T14:43:36.286734Z", + "iopub.status.idle": "2026-04-24T14:44:32.839792Z", + "shell.execute_reply": "2026-04-24T14:44:32.838432Z" } }, "outputs": [ @@ -1913,14 +1910,16 @@ " Baseline score: 0.7667\n", " Best score: 0.7667\n", " Score history: [0.7667, 0.7667]\n", - " Total LLM calls: 32\n", + " Total LLM calls: 36\n", "\n", "Iter Avg Score Best Score \n", "------------------------------\n", "0 0.7667 0.7667 \n", "1 0.7667 0.7667 \n", "\n", - " Live OTLP: 0 spans, 0 trace IDs, root_invoke=False\n" + " Live OTLP sample: 17 spans, 1 trace IDs, root_invoke=True\n", + " span_names: ['openrouter.chat.completion', 'planner', 'openrouter.chat.completion', 'executor', 'web_researcher', 'openrouter.chat.completion', 'executor', 'web_researcher', 'openrouter.chat.completion', 'executor', 'wikidata_researcher', 'openrouter.chat.completion', 'executor', 'openrouter.chat.completion', 'synthesizer', 'evaluator', 'm1-live.invoke']\n", + " [OK] Live optimization OTLP validated from stored runs.\n" ] } ], @@ -1955,14 +1954,16 @@ " apply_updates_flag=True,\n", " )\n", "\n", - " print(f\"\\n planner_prompt AFTER: {live_ig.templates['planner_prompt']!r}\")\n", + " print()\n", + " print(f\" planner_prompt AFTER: {live_ig.templates['planner_prompt']!r}\")\n", " print(f\" Baseline score: {live_opt_result.baseline_score:.4f}\")\n", " print(f\" Best score: {live_opt_result.best_score:.4f}\")\n", " print(f\" Score history: {[round(s, 4) for s in live_opt_result.score_history]}\")\n", " print(f\" Total LLM calls: {live_llm.call_count}\")\n", "\n", " # Optimization history table\n", - " print(f\"\\n{'Iter':<6} {'Avg Score':<12} {'Best Score':<12}\")\n", + " print()\n", + " print(f\"{'Iter':<6} {'Avg Score':<12} {'Best Score':<12}\")\n", " print(\"-\" * 30)\n", " best_so_far = float(\"-inf\")\n", " for i, sc in enumerate(live_opt_result.score_history):\n", @@ -1970,20 +1971,77 @@ " best_so_far = sc\n", " print(f\"{i:<6} {sc:<12.4f} {best_so_far:<12.4f}\")\n", "\n", - " # --- Live OTLP inspection ---\n", - " live_otlp_final = live_ig.session.flush_otlp(clear=True)\n", - " try:\n", - " live_spans = live_otlp_final[\"resourceSpans\"][0][\"scopeSpans\"][0][\"spans\"]\n", - " trace_ids = {s[\"traceId\"] for s in live_spans}\n", - " has_root = any(str(sp.get(\"name\",\"\")).endswith(\".invoke\") for sp in live_spans)\n", - " print(f\"\\n Live OTLP: {len(live_spans)} spans, {len(trace_ids)} trace IDs, root_invoke={has_root}\")\n", - " except (KeyError, IndexError) as e:\n", - " print(f\"\\n [WARN] Could not inspect live OTLP: {e}\")\n", + " # --- Live OTLP inspection from stored optimization runs ---\n", + " def _spans(otlp):\n", + " out = []\n", + " for rs in otlp.get(\"resourceSpans\", []):\n", + " for ss in rs.get(\"scopeSpans\", []):\n", + " out.extend(ss.get(\"spans\", []))\n", + " return out\n", + "\n", + " def _attrs(span):\n", + " attrs = {}\n", + " for item in span.get(\"attributes\", []):\n", + " key = item.get(\"key\")\n", + " value = item.get(\"value\", {})\n", + " if \"stringValue\" in value:\n", + " attrs[key] = value[\"stringValue\"]\n", + " elif \"boolValue\" in value:\n", + " attrs[key] = value[\"boolValue\"]\n", + " elif \"intValue\" in value:\n", + " attrs[key] = value[\"intValue\"]\n", + " elif \"doubleValue\" in value:\n", + " attrs[key] = value[\"doubleValue\"]\n", + " else:\n", + " attrs[key] = value\n", + " return attrs\n", + "\n", + " sample_live_otlp = None\n", + " for runs in reversed(live_opt_result.all_runs):\n", + " for run in runs:\n", + " if run.otlp:\n", + " sample_live_otlp = run.otlp\n", + " break\n", + " if sample_live_otlp:\n", + " break\n", + "\n", + " if sample_live_otlp is None:\n", + " print()\n", + " print(\" [WARN] No live OTLP captured in optimization runs.\")\n", + " else:\n", + " live_spans = _spans(sample_live_otlp)\n", + " span_names = [sp.get(\"name\") for sp in live_spans]\n", + " trace_ids = {s.get(\"traceId\") for s in live_spans if s.get(\"traceId\")}\n", + " has_root = any(str(name).endswith(\".invoke\") for name in span_names)\n", + " print()\n", + " print(f\" Live OTLP sample: {len(live_spans)} spans, {len(trace_ids)} trace IDs, root_invoke={has_root}\")\n", + " print(f\" span_names: {span_names}\")\n", + "\n", + " assert has_root\n", + " assert \"planner\" in span_names\n", + " assert \"synthesizer\" in span_names\n", + " assert \"evaluator\" in span_names\n", + " assert any(name == \"openrouter.chat.completion\" for name in span_names)\n", + "\n", + " planner_span = next(sp for sp in live_spans if sp.get(\"name\") == \"planner\")\n", + " synth_span = next(sp for sp in live_spans if sp.get(\"name\") == \"synthesizer\")\n", + " llm_span = next(sp for sp in live_spans if sp.get(\"name\") == \"openrouter.chat.completion\")\n", + "\n", + " planner_attrs = _attrs(planner_span)\n", + " synth_attrs = _attrs(synth_span)\n", + " llm_attrs = _attrs(llm_span)\n", + "\n", + " assert \"param.planner_prompt\" in planner_attrs\n", + " assert \"inputs.user_query\" in planner_attrs\n", + " assert \"param.synthesizer_prompt\" in synth_attrs\n", + " assert \"gen_ai.output.preview\" in llm_attrs\n", + "\n", + " print(\" [OK] Live optimization OTLP validated from stored runs.\")\n", "else:\n", " if not HAS_API_KEY:\n", " print(\"[SKIP] No API key — live optimization skipped.\")\n", " else:\n", - " print(\"[SKIP] Live invocation failed — live optimization skipped.\")" + " print(\"[SKIP] Live invocation failed — live optimization skipped.\")\n" ] }, { @@ -2002,10 +2060,10 @@ "execution_count": 20, "metadata": { "execution": { - "iopub.execute_input": "2026-04-19T09:37:56.542223Z", - "iopub.status.busy": "2026-04-19T09:37:56.541953Z", - "iopub.status.idle": "2026-04-19T09:37:56.571430Z", - "shell.execute_reply": "2026-04-19T09:37:56.570551Z" + "iopub.execute_input": "2026-04-24T14:44:32.844484Z", + "iopub.status.busy": "2026-04-24T14:44:32.844131Z", + "iopub.status.idle": "2026-04-24T14:44:32.883150Z", + "shell.execute_reply": "2026-04-24T14:44:32.881786Z" } }, "outputs": [ @@ -2157,9 +2215,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} From 80fe50ab4c0b504c33a5142616acdf91f0a16a9d Mon Sep 17 00:00:00 2001 From: doxav Date: Fri, 24 Apr 2026 22:59:16 +0200 Subject: [PATCH 13/16] intermediate solution --- ...aph_instrument_and_compare_observers.ipynb | 328 +++++++++--------- ...mo_langgraph_instrument_and_optimize.ipynb | 188 +++++----- ...ggraph_instrument_and_optimize_trace.ipynb | 32 +- opto/features/graph/adapter.py | 265 +++++++++++--- opto/features/graph/sidecars.py | 36 +- opto/trace/io/optimization.py | 4 + .../test_graph_adapter_modulecandidate.py | 17 + tests/unit_tests/test_graph_adapter_trace.py | 90 +++++ tests/unit_tests/test_optimization.py | 1 + 9 files changed, 630 insertions(+), 331 deletions(-) diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb index 108ea48f..897c5e3b 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -22,10 +22,10 @@ "id": "run-live-compare-script", "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:35:00.407713Z", - "iopub.status.busy": "2026-04-24T14:35:00.407401Z", - "iopub.status.idle": "2026-04-24T14:42:39.184568Z", - "shell.execute_reply": "2026-04-24T14:42:39.183078Z" + "iopub.execute_input": "2026-04-24T19:42:20.930373Z", + "iopub.status.busy": "2026-04-24T19:42:20.930178Z", + "iopub.status.idle": "2026-04-24T19:49:11.855790Z", + "shell.execute_reply": "2026-04-24T19:49:11.855292Z" }, "language": "python" }, @@ -37,13 +37,13 @@ "\n", "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | score_history |\n", "|---|---:|---:|---:|---:|---:|---:|---|\n", - "| trace | 62.016 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| trace+otel | 58.193 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| otel | 60.922 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| trace+sysmon | 60.176 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| trace+otel+sysmon | 61.539 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| otel+sysmon | 59.493 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| sysmon | 57.760 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |" + "| trace | 54.513 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| trace+otel | 53.619 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| otel | 54.604 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| trace+sysmon | 54.634 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| trace+otel+sysmon | 53.552 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| otel+sysmon | 54.761 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", + "| sysmon | 53.108 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |" ], "text/plain": [ "" @@ -56,7 +56,7 @@ "data": { "text/markdown": [ "## trace\n", - "- Runtime: `62.016s`\n", + "- Runtime: `54.513s`\n", "- Baseline score: `0.842`\n", "- Best score: `0.876`\n", "- Score gain: `0.035`\n", @@ -116,10 +116,10 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "state\n", @@ -145,16 +145,16 @@ "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x75e91b87d520>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d95ddc0>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", @@ -176,7 +176,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -186,7 +186,7 @@ "data": { "text/markdown": [ "## trace+otel\n", - "- Runtime: `58.193s`\n", + "- Runtime: `53.619s`\n", "- Baseline score: `0.842`\n", "- Best score: `0.876`\n", "- Score gain: `0.035`\n", @@ -246,10 +246,10 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "state\n", @@ -275,16 +275,16 @@ "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x75e91b8f47d0>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d794fe0>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", @@ -306,7 +306,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -350,7 +350,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -360,7 +360,7 @@ "data": { "text/markdown": [ "## otel\n", - "- Runtime: `60.922s`\n", + "- Runtime: `54.604s`\n", "- Baseline score: `0.842`\n", "- Best score: `0.876`\n", "- Score gain: `0.035`\n", @@ -437,7 +437,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -447,7 +447,7 @@ "data": { "text/markdown": [ "## trace+sysmon\n", - "- Runtime: `60.176s`\n", + "- Runtime: `54.634s`\n", "- Baseline score: `0.842`\n", "- Best score: `0.876`\n", "- Score gain: `0.035`\n", @@ -508,9 +508,9 @@ " -->\n", "\n", "\n", + " viewBox=\"0.00 0.00 1714.25 223.75\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", "\n", - "\n", + "\n", "\n", "\n", "state\n", @@ -536,16 +536,16 @@ "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af2d820>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d95d3d0>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", @@ -567,7 +567,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -625,27 +625,27 @@ "[parameter]\n", "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:90fe038fb3a74237\n", + "msg:ba87846d33ed46da\n", "\n", "planner_node\n", "[message]\n", "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:7792dfbd56784d38\n", - "\n", + "msg:c4faa984c1eb466d\n", + "\n", "synth_node\n", "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af2d820>}\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d95d3d0>}\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -655,7 +655,7 @@ "data": { "text/markdown": [ "## trace+otel+sysmon\n", - "- Runtime: `61.539s`\n", + "- Runtime: `53.552s`\n", "- Baseline score: `0.842`\n", "- Best score: `0.876`\n", "- Score gain: `0.035`\n", @@ -715,10 +715,10 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "state\n", @@ -744,16 +744,16 @@ "\n", "\n", "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af89a90>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28ce7b170>}\n", "\n", "\n", "\n", "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", @@ -775,7 +775,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -833,27 +833,27 @@ "[parameter]\n", "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:9f1628e1414b4abb\n", + "msg:94fd3f1208e2462c\n", "\n", "planner_node\n", "[message]\n", "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:d2d5d39b4642462c\n", - "\n", + "msg:111678fc76c944b1\n", + "\n", "synth_node\n", "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x75e91af89a90>}\n", + "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28ce7b170>}\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -897,7 +897,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -907,7 +907,7 @@ "data": { "text/markdown": [ "## otel+sysmon\n", - "- Runtime: `59.493s`\n", + "- Runtime: `54.761s`\n", "- Baseline score: `0.842`\n", "- Best score: `0.876`\n", "- Score gain: `0.035`\n", @@ -984,7 +984,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1042,17 +1042,17 @@ "[parameter]\n", "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:0775e1673ce14ee8\n", + "msg:8811b61e4405470e\n", "\n", "planner_node\n", "[message]\n", "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:cde03647185a4166\n", + "msg:6597ff076cbf41e4\n", "\n", "synth_node\n", "[message]\n", @@ -1062,7 +1062,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1072,7 +1072,7 @@ "data": { "text/markdown": [ "## sysmon\n", - "- Runtime: `57.760s`\n", + "- Runtime: `53.108s`\n", "- Baseline score: `0.842`\n", "- Best score: `0.876`\n", "- Score gain: `0.035`\n", @@ -1153,17 +1153,17 @@ "[parameter]\n", "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:fca7538c517441c8\n", + "msg:b2e1bd7d883d423b\n", "\n", "planner_node\n", "[message]\n", "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", "\n", - "\n", + "\n", "\n", - "msg:80b89e47995d4494\n", + "msg:483b60be2deb462b\n", "\n", "synth_node\n", "[message]\n", @@ -1173,7 +1173,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1183,7 +1183,7 @@ "data": { "text/plain": [ "[{'config': 'trace',\n", - " 'runtime_s': 62.016,\n", + " 'runtime_s': 54.513,\n", " 'baseline_score': 0.842,\n", " 'best_score': 0.876,\n", " 'score_gain': 0.035,\n", @@ -1223,7 +1223,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1244,7 +1244,7 @@ " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}}]},\n", " {'config': 'trace+otel',\n", - " 'runtime_s': 58.193,\n", + " 'runtime_s': 53.619,\n", " 'baseline_score': 0.842,\n", " 'best_score': 0.876,\n", " 'score_gain': 0.035,\n", @@ -1284,7 +1284,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1320,7 +1320,7 @@ " 'span_names': [],\n", " 'param_keys': []}}]},\n", " {'config': 'otel',\n", - " 'runtime_s': 60.922,\n", + " 'runtime_s': 54.604,\n", " 'baseline_score': 0.842,\n", " 'best_score': 0.876,\n", " 'score_gain': 0.035,\n", @@ -1336,13 +1336,13 @@ " 'origin': 'backend',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'otel', 'service': 'otel'},\n", - " 'otel_meta': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b'},\n", + " 'otel_meta': {'trace_id': '0de02cf99df97767e783ee43a8abff97'},\n", " 'nodes': {'otel:param_planner_prompt': {'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", " 'data': 'Create a short plan for: {query}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': 'bba757639cf0965d'}}},\n", - " 'otel:bba757639cf0965d': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '2f304e2eb23de6bd'}}},\n", + " 'otel:2f304e2eb23de6bd': {'kind': 'msg',\n", " 'name': 'planner_node',\n", " 'op': 'llm_call',\n", " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", @@ -1350,49 +1350,49 @@ " 'query': 'otel:What is CRISPR?',\n", " 'param_planner_prompt': 'otel:param_planner_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", - " 'span_id': 'bba757639cf0965d',\n", + " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", + " 'span_id': '2f304e2eb23de6bd',\n", " 'parent_span_id': None,\n", " 'service': 'otel',\n", " 'temporal_ignore': False}}},\n", - " 'otel:e5b8790271769b15': {'kind': 'msg',\n", + " 'otel:bea5c71b6e1e62bb': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:bba757639cf0965d'},\n", + " 'inputs': {'parent': 'otel:2f304e2eb23de6bd'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", - " 'span_id': 'e5b8790271769b15',\n", - " 'parent_span_id': 'bba757639cf0965d',\n", + " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", + " 'span_id': 'bea5c71b6e1e62bb',\n", + " 'parent_span_id': '2f304e2eb23de6bd',\n", " 'service': 'otel',\n", " 'temporal_ignore': True}}},\n", " 'otel:param_synth_prompt': {'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '7352e0f1c2c425ab'}}},\n", - " 'otel:7352e0f1c2c425ab': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '9ea6c5fc5023c6ad'}}},\n", + " 'otel:9ea6c5fc5023c6ad': {'kind': 'msg',\n", " 'name': 'synth_node',\n", " 'op': 'llm_call',\n", " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", " 'user_query': 'otel:What is CRISPR?',\n", " 'query': 'otel:What is CRISPR?',\n", " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", - " 'parent': 'otel:bba757639cf0965d',\n", + " 'parent': 'otel:2f304e2eb23de6bd',\n", " 'param_synth_prompt': 'otel:param_synth_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", - " 'span_id': '7352e0f1c2c425ab',\n", - " 'parent_span_id': 'bba757639cf0965d',\n", + " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", + " 'span_id': '9ea6c5fc5023c6ad',\n", + " 'parent_span_id': '2f304e2eb23de6bd',\n", " 'service': 'otel',\n", " 'temporal_ignore': False}}},\n", - " 'otel:7e1949f963de0140': {'kind': 'msg',\n", + " 'otel:b935b92e48ab04b8': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:7352e0f1c2c425ab'},\n", + " 'inputs': {'parent': 'otel:9ea6c5fc5023c6ad'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '045d8d6bda648fdc70fcf308a6431a7b',\n", - " 'span_id': '7e1949f963de0140',\n", - " 'parent_span_id': '7352e0f1c2c425ab',\n", + " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", + " 'span_id': 'b935b92e48ab04b8',\n", + " 'parent_span_id': '9ea6c5fc5023c6ad',\n", " 'service': 'otel',\n", " 'temporal_ignore': True}}}},\n", " 'context': {}},\n", @@ -1416,7 +1416,7 @@ " 'param.synth_prompt',\n", " 'param.synth_prompt.trainable']}}]},\n", " {'config': 'trace+sysmon',\n", - " 'runtime_s': 60.176,\n", + " 'runtime_s': 54.634,\n", " 'baseline_score': 0.842,\n", " 'best_score': 0.876,\n", " 'score_gain': 0.035,\n", @@ -1456,7 +1456,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1495,24 +1495,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:90fe038fb3a74237': {'id': 'msg:90fe038fb3a74237',\n", + " 'msg:ba87846d33ed46da': {'id': 'msg:ba87846d33ed46da',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:568',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 2722412274,\n", - " 'thread_id': 129644846245696}}},\n", - " 'msg:7792dfbd56784d38': {'id': 'msg:7792dfbd56784d38',\n", + " 'info': {'sysmon': {'duration_ns': 2191557194,\n", + " 'thread_id': 140336865654592}}},\n", + " 'msg:c4faa984c1eb466d': {'id': 'msg:c4faa984c1eb466d',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:579',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2220630467,\n", - " 'thread_id': 129644846245696}}}}},\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2046976228,\n", + " 'thread_id': 140336865654592}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", @@ -1521,7 +1521,7 @@ " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'event_count': 2}}]},\n", " {'config': 'trace+otel+sysmon',\n", - " 'runtime_s': 61.539,\n", + " 'runtime_s': 53.552,\n", " 'baseline_score': 0.842,\n", " 'best_score': 0.876,\n", " 'score_gain': 0.035,\n", @@ -1561,7 +1561,7 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1600,24 +1600,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:9f1628e1414b4abb': {'id': 'msg:9f1628e1414b4abb',\n", + " 'msg:94fd3f1208e2462c': {'id': 'msg:94fd3f1208e2462c',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:568',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 2666017980,\n", - " 'thread_id': 129644846245696}}},\n", - " 'msg:d2d5d39b4642462c': {'id': 'msg:d2d5d39b4642462c',\n", + " 'info': {'sysmon': {'duration_ns': 2248025713,\n", + " 'thread_id': 140336865654592}}},\n", + " 'msg:111678fc76c944b1': {'id': 'msg:111678fc76c944b1',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:579',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2492562121,\n", - " 'thread_id': 129644846245696}}}}},\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2061665516,\n", + " 'thread_id': 140336865654592}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", @@ -1641,7 +1641,7 @@ " 'span_names': [],\n", " 'param_keys': []}}]},\n", " {'config': 'otel+sysmon',\n", - " 'runtime_s': 59.493,\n", + " 'runtime_s': 54.761,\n", " 'baseline_score': 0.842,\n", " 'best_score': 0.876,\n", " 'score_gain': 0.035,\n", @@ -1657,13 +1657,13 @@ " 'origin': 'backend',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'otel+sysmon', 'service': 'otel+sysmon'},\n", - " 'otel_meta': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a'},\n", + " 'otel_meta': {'trace_id': '291d1ab2c30befe7812d63866174c584'},\n", " 'nodes': {'otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", " 'data': 'Create a short plan for: {query}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '511b43fa507d20c0'}}},\n", - " 'otel+sysmon:511b43fa507d20c0': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '41239b59d111622d'}}},\n", + " 'otel+sysmon:41239b59d111622d': {'kind': 'msg',\n", " 'name': 'planner_node',\n", " 'op': 'llm_call',\n", " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", @@ -1671,49 +1671,49 @@ " 'query': 'otel+sysmon:What is CRISPR?',\n", " 'param_planner_prompt': 'otel+sysmon:param_planner_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", - " 'span_id': '511b43fa507d20c0',\n", + " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", + " 'span_id': '41239b59d111622d',\n", " 'parent_span_id': None,\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': False}}},\n", - " 'otel+sysmon:159219852aad1a53': {'kind': 'msg',\n", + " 'otel+sysmon:7dd428b84a3f270b': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:511b43fa507d20c0'},\n", + " 'inputs': {'parent': 'otel+sysmon:41239b59d111622d'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", - " 'span_id': '159219852aad1a53',\n", - " 'parent_span_id': '511b43fa507d20c0',\n", + " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", + " 'span_id': '7dd428b84a3f270b',\n", + " 'parent_span_id': '41239b59d111622d',\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': True}}},\n", " 'otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': 'b52a69c8b081de0f'}}},\n", - " 'otel+sysmon:b52a69c8b081de0f': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '79ac53511a735fc2'}}},\n", + " 'otel+sysmon:79ac53511a735fc2': {'kind': 'msg',\n", " 'name': 'synth_node',\n", " 'op': 'llm_call',\n", " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", " 'user_query': 'otel+sysmon:What is CRISPR?',\n", " 'query': 'otel+sysmon:What is CRISPR?',\n", " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", - " 'parent': 'otel+sysmon:511b43fa507d20c0',\n", + " 'parent': 'otel+sysmon:41239b59d111622d',\n", " 'param_synth_prompt': 'otel+sysmon:param_synth_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", - " 'span_id': 'b52a69c8b081de0f',\n", - " 'parent_span_id': '511b43fa507d20c0',\n", + " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", + " 'span_id': '79ac53511a735fc2',\n", + " 'parent_span_id': '41239b59d111622d',\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': False}}},\n", - " 'otel+sysmon:9a44a496f06003e2': {'kind': 'msg',\n", + " 'otel+sysmon:eb0f3cf3afb9c639': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:b52a69c8b081de0f'},\n", + " 'inputs': {'parent': 'otel+sysmon:79ac53511a735fc2'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '60879cfcc2b75ecfc807cd4ecd2a6f1a',\n", - " 'span_id': '9a44a496f06003e2',\n", - " 'parent_span_id': 'b52a69c8b081de0f',\n", + " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", + " 'span_id': 'eb0f3cf3afb9c639',\n", + " 'parent_span_id': '79ac53511a735fc2',\n", " 'service': 'otel+sysmon',\n", " 'temporal_ignore': True}}}},\n", " 'context': {}},\n", @@ -1755,24 +1755,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:0775e1673ce14ee8': {'id': 'msg:0775e1673ce14ee8',\n", + " 'msg:8811b61e4405470e': {'id': 'msg:8811b61e4405470e',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:540',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 3024132044,\n", - " 'thread_id': 129644846245696}}},\n", - " 'msg:cde03647185a4166': {'id': 'msg:cde03647185a4166',\n", + " 'info': {'sysmon': {'duration_ns': 2152943175,\n", + " 'thread_id': 140336865654592}}},\n", + " 'msg:6597ff076cbf41e4': {'id': 'msg:6597ff076cbf41e4',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:544',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", " 'value': \"{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\\\n\\\\n### **Mechanis\"},\n", - " 'info': {'sysmon': {'duration_ns': 2388306024,\n", - " 'thread_id': 129644846245696}}}}},\n", + " 'info': {'sysmon': {'duration_ns': 2248217150,\n", + " 'thread_id': 140336865654592}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", @@ -1781,7 +1781,7 @@ " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", " 'event_count': 2}}]},\n", " {'config': 'sysmon',\n", - " 'runtime_s': 57.76,\n", + " 'runtime_s': 53.108,\n", " 'baseline_score': 0.842,\n", " 'best_score': 0.876,\n", " 'score_gain': 0.035,\n", @@ -1812,24 +1812,24 @@ " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:fca7538c517441c8': {'id': 'msg:fca7538c517441c8',\n", + " 'msg:b2e1bd7d883d423b': {'id': 'msg:b2e1bd7d883d423b',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:540',\n", " 'inputs': {},\n", " 'output': {'name': 'planner_node:out',\n", " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 2500216298,\n", - " 'thread_id': 129644846245696}}},\n", - " 'msg:80b89e47995d4494': {'id': 'msg:80b89e47995d4494',\n", + " 'info': {'sysmon': {'duration_ns': 2200630349,\n", + " 'thread_id': 140336865654592}}},\n", + " 'msg:483b60be2deb462b': {'id': 'msg:483b60be2deb462b',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:544',\n", " 'inputs': {},\n", " 'output': {'name': 'synth_node:out',\n", " 'value': \"{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\\\n\\\\n### **Mechanis\"},\n", - " 'info': {'sysmon': {'duration_ns': 2567241093,\n", - " 'thread_id': 129644846245696}}}}},\n", + " 'info': {'sysmon': {'duration_ns': 2240993069,\n", + " 'thread_id': 140336865654592}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb index 65857ec8..955a481c 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize.ipynb @@ -56,10 +56,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:17.980408Z", - "iopub.status.busy": "2026-04-24T14:43:17.980173Z", - "iopub.status.idle": "2026-04-24T14:43:19.093749Z", - "shell.execute_reply": "2026-04-24T14:43:19.092425Z" + "iopub.execute_input": "2026-04-24T19:41:02.951337Z", + "iopub.status.busy": "2026-04-24T19:41:02.951103Z", + "iopub.status.idle": "2026-04-24T19:41:03.528659Z", + "shell.execute_reply": "2026-04-24T19:41:03.526820Z" } }, "outputs": [ @@ -123,10 +123,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:19.143735Z", - "iopub.status.busy": "2026-04-24T14:43:19.143450Z", - "iopub.status.idle": "2026-04-24T14:43:19.150322Z", - "shell.execute_reply": "2026-04-24T14:43:19.148951Z" + "iopub.execute_input": "2026-04-24T19:41:03.554276Z", + "iopub.status.busy": "2026-04-24T19:41:03.554174Z", + "iopub.status.idle": "2026-04-24T19:41:03.557069Z", + "shell.execute_reply": "2026-04-24T19:41:03.556615Z" } }, "outputs": [ @@ -183,10 +183,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:19.153203Z", - "iopub.status.busy": "2026-04-24T14:43:19.152991Z", - "iopub.status.idle": "2026-04-24T14:43:19.160786Z", - "shell.execute_reply": "2026-04-24T14:43:19.159546Z" + "iopub.execute_input": "2026-04-24T19:41:03.558308Z", + "iopub.status.busy": "2026-04-24T19:41:03.558241Z", + "iopub.status.idle": "2026-04-24T19:41:03.561208Z", + "shell.execute_reply": "2026-04-24T19:41:03.560875Z" } }, "outputs": [ @@ -265,10 +265,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:19.163941Z", - "iopub.status.busy": "2026-04-24T14:43:19.163732Z", - "iopub.status.idle": "2026-04-24T14:43:19.690243Z", - "shell.execute_reply": "2026-04-24T14:43:19.688959Z" + "iopub.execute_input": "2026-04-24T19:41:03.562434Z", + "iopub.status.busy": "2026-04-24T19:41:03.562367Z", + "iopub.status.idle": "2026-04-24T19:41:03.799273Z", + "shell.execute_reply": "2026-04-24T19:41:03.798692Z" } }, "outputs": [ @@ -543,10 +543,10 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:19.692995Z", - "iopub.status.busy": "2026-04-24T14:43:19.692826Z", - "iopub.status.idle": "2026-04-24T14:43:19.705614Z", - "shell.execute_reply": "2026-04-24T14:43:19.704761Z" + "iopub.execute_input": "2026-04-24T19:41:03.800594Z", + "iopub.status.busy": "2026-04-24T19:41:03.800520Z", + "iopub.status.idle": "2026-04-24T19:41:03.806670Z", + "shell.execute_reply": "2026-04-24T19:41:03.806049Z" } }, "outputs": [ @@ -736,10 +736,10 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:19.708076Z", - "iopub.status.busy": "2026-04-24T14:43:19.707899Z", - "iopub.status.idle": "2026-04-24T14:43:21.737471Z", - "shell.execute_reply": "2026-04-24T14:43:21.736701Z" + "iopub.execute_input": "2026-04-24T19:41:03.807782Z", + "iopub.status.busy": "2026-04-24T19:41:03.807709Z", + "iopub.status.idle": "2026-04-24T19:41:04.785084Z", + "shell.execute_reply": "2026-04-24T19:41:04.784596Z" } }, "outputs": [ @@ -790,10 +790,10 @@ "execution_count": 7, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.740092Z", - "iopub.status.busy": "2026-04-24T14:43:21.739934Z", - "iopub.status.idle": "2026-04-24T14:43:21.750426Z", - "shell.execute_reply": "2026-04-24T14:43:21.749351Z" + "iopub.execute_input": "2026-04-24T19:41:04.786539Z", + "iopub.status.busy": "2026-04-24T19:41:04.786455Z", + "iopub.status.idle": "2026-04-24T19:41:04.791737Z", + "shell.execute_reply": "2026-04-24T19:41:04.791303Z" } }, "outputs": [ @@ -862,10 +862,10 @@ "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.752662Z", - "iopub.status.busy": "2026-04-24T14:43:21.752498Z", - "iopub.status.idle": "2026-04-24T14:43:21.759255Z", - "shell.execute_reply": "2026-04-24T14:43:21.758589Z" + "iopub.execute_input": "2026-04-24T19:41:04.793343Z", + "iopub.status.busy": "2026-04-24T19:41:04.793264Z", + "iopub.status.idle": "2026-04-24T19:41:04.796738Z", + "shell.execute_reply": "2026-04-24T19:41:04.796498Z" } }, "outputs": [ @@ -876,30 +876,30 @@ "Total spans captured: 8\n", "\n", "Unique trace IDs: 1 (D9: should be 1)\n", - "Root invocation span: QA_research_graph.invoke (id=c2b080669791...)\n", + "Root invocation span: QA_research_graph.invoke (id=eb2ad6a4b371...)\n", "\n", - " Span: llm.chat.completion parent=e3ab9779\n", + " Span: llm.chat.completion parent=03174a2e\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"1\": {\"agent\": \"web_researcher\", \"action\": \"search\", \"goal\": \"collect context\",\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: planner parent=c2b08066\n", + " Span: planner parent=eb2ad6a4\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Create a JSON plan for: What is reinforcement learning?. Use web_researcher and \n", " inputs.user_query = What is reinforcement learning?\n", " param.planner_prompt = Create a JSON plan for: {query}. Use web_researcher and synthesizer; include wik\n", " param.planner_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=9ff08be4\n", + " Span: llm.chat.completion parent=e9328dc4\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = {\"goto\": \"synthesizer\", \"query\": \"given step 1 of plan: {'agent': 'web_researche\n", " gen_ai.provider.name = stub\n", " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: executor parent=c2b08066\n", + " Span: executor parent=eb2ad6a4\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Given step 1 of plan: {'agent': 'web_researcher', 'action': 'search', 'goal': 'c\n", " inputs.step = 1\n", @@ -907,7 +907,7 @@ " param.executor_prompt = Given step {step} of plan: {plan_step} for query: {query}. Return JSON {goto,que\n", " param.executor_prompt.trainable = True\n", "\n", - " Span: llm.chat.completion parent=fef48edf\n", + " Span: llm.chat.completion parent=019e4874\n", " gen_ai.operation.name = chat\n", " gen_ai.output.preview = Stub answer for: answer: what is reinforcement learning?\n", "context:\n", @@ -917,7 +917,7 @@ " gen_ai.request.model = stub-llm\n", " trace.temporal_ignore = true\n", "\n", - " Span: synthesizer parent=c2b08066\n", + " Span: synthesizer parent=eb2ad6a4\n", " gen_ai.model = stub-llm\n", " inputs.gen_ai.prompt = Answer: What is reinforcement learning?\n", "Context:\n", @@ -930,7 +930,7 @@ "If asked for IDs, include Wikidata QIDs.\n", " param.synthesizer_prompt.trainable = True\n", "\n", - " Span: evaluator parent=c2b08066\n", + " Span: evaluator parent=eb2ad6a4\n", " eval.reasons = \n", " eval.score = 0.25\n", "\n", @@ -999,10 +999,10 @@ "execution_count": 9, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.761376Z", - "iopub.status.busy": "2026-04-24T14:43:21.761245Z", - "iopub.status.idle": "2026-04-24T14:43:21.773066Z", - "shell.execute_reply": "2026-04-24T14:43:21.772317Z" + "iopub.execute_input": "2026-04-24T19:41:04.798235Z", + "iopub.status.busy": "2026-04-24T19:41:04.798165Z", + "iopub.status.idle": "2026-04-24T19:41:04.804909Z", + "shell.execute_reply": "2026-04-24T19:41:04.804427Z" } }, "outputs": [ @@ -1021,12 +1021,12 @@ "[OK] No duplicate ParameterNodes (C7).\n", "\n", "MessageNode: 7\n", - " QA_research_graph/0/planner0 parents=['lit_22500', 'lit_60470', 'planner_prompt0']\n", + " QA_research_graph/0/planner0 parents=['lit_54400', 'lit_56270', 'planner_prompt0']\n", " QA_research_graph/0/llm.chat.completion0 parents=['planner0']\n", " QA_research_graph/0/llm.chat.completion2 parents=['synthesizer0']\n", - " QA_research_graph/0/executor0 parents=['lit_78660', 'lit_57300', 'lit_60471', 'planner0', 'executor_prompt0']\n", + " QA_research_graph/0/executor0 parents=['lit_50530', 'lit_75350', 'lit_56271', 'planner0', 'executor_prompt0']\n", " QA_research_graph/0/llm.chat.completion1 parents=['executor0']\n", - " QA_research_graph/0/synthesizer0 parents=['lit_93330', 'lit_60472', 'executor0', 'synthesizer_prompt0']\n", + " QA_research_graph/0/synthesizer0 parents=['lit_63380', 'lit_56272', 'executor0', 'synthesizer_prompt0']\n", " QA_research_graph/0/evaluator0 parents=['synthesizer0']\n", "[WARN] No top-level message nodes found.\n" ] @@ -1104,10 +1104,10 @@ "execution_count": 10, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.775107Z", - "iopub.status.busy": "2026-04-24T14:43:21.774972Z", - "iopub.status.idle": "2026-04-24T14:43:21.781210Z", - "shell.execute_reply": "2026-04-24T14:43:21.780433Z" + "iopub.execute_input": "2026-04-24T19:41:04.806114Z", + "iopub.status.busy": "2026-04-24T19:41:04.806045Z", + "iopub.status.idle": "2026-04-24T19:41:04.809594Z", + "shell.execute_reply": "2026-04-24T19:41:04.809148Z" } }, "outputs": [ @@ -1117,9 +1117,9 @@ "text": [ "Child LLM spans detected (via temporal_ignore): 3\n", "Top-level message nodes: 4\n", - " [OK] Node executor temporal parent → 7e199a6de5c0... (not a child span)\n", - " [OK] Node synthesizer temporal parent → ef0e7bec3426... (not a child span)\n", - " [OK] Node evaluator temporal parent → 6dc071e4b185... (not a child span)\n", + " [OK] Node executor temporal parent → 3d70cb4a412a... (not a child span)\n", + " [OK] Node synthesizer temporal parent → 2b414b6cdeb8... (not a child span)\n", + " [OK] Node evaluator temporal parent → 21a6a2416867... (not a child span)\n", "\n", "[OK] Temporal chaining verified — no top-level node points to child spans.\n" ] @@ -1179,10 +1179,10 @@ "execution_count": 11, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.783495Z", - "iopub.status.busy": "2026-04-24T14:43:21.783360Z", - "iopub.status.idle": "2026-04-24T14:43:21.787504Z", - "shell.execute_reply": "2026-04-24T14:43:21.786913Z" + "iopub.execute_input": "2026-04-24T19:41:04.810955Z", + "iopub.status.busy": "2026-04-24T19:41:04.810885Z", + "iopub.status.idle": "2026-04-24T19:41:04.813453Z", + "shell.execute_reply": "2026-04-24T19:41:04.813122Z" } }, "outputs": [ @@ -1237,10 +1237,10 @@ "execution_count": 12, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.789565Z", - "iopub.status.busy": "2026-04-24T14:43:21.789429Z", - "iopub.status.idle": "2026-04-24T14:43:21.797066Z", - "shell.execute_reply": "2026-04-24T14:43:21.796330Z" + "iopub.execute_input": "2026-04-24T19:41:04.814648Z", + "iopub.status.busy": "2026-04-24T19:41:04.814585Z", + "iopub.status.idle": "2026-04-24T19:41:04.818870Z", + "shell.execute_reply": "2026-04-24T19:41:04.818196Z" } }, "outputs": [ @@ -1278,10 +1278,10 @@ "execution_count": 13, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.799310Z", - "iopub.status.busy": "2026-04-24T14:43:21.799152Z", - "iopub.status.idle": "2026-04-24T14:43:21.802514Z", - "shell.execute_reply": "2026-04-24T14:43:21.801819Z" + "iopub.execute_input": "2026-04-24T19:41:04.820022Z", + "iopub.status.busy": "2026-04-24T19:41:04.819947Z", + "iopub.status.idle": "2026-04-24T19:41:04.822162Z", + "shell.execute_reply": "2026-04-24T19:41:04.821675Z" } }, "outputs": [ @@ -1327,10 +1327,10 @@ "execution_count": 14, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.804677Z", - "iopub.status.busy": "2026-04-24T14:43:21.804511Z", - "iopub.status.idle": "2026-04-24T14:43:21.812199Z", - "shell.execute_reply": "2026-04-24T14:43:21.811200Z" + "iopub.execute_input": "2026-04-24T19:41:04.823101Z", + "iopub.status.busy": "2026-04-24T19:41:04.823034Z", + "iopub.status.idle": "2026-04-24T19:41:04.826723Z", + "shell.execute_reply": "2026-04-24T19:41:04.826370Z" } }, "outputs": [ @@ -1401,10 +1401,10 @@ "execution_count": 15, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.815035Z", - "iopub.status.busy": "2026-04-24T14:43:21.814839Z", - "iopub.status.idle": "2026-04-24T14:43:21.850300Z", - "shell.execute_reply": "2026-04-24T14:43:21.849337Z" + "iopub.execute_input": "2026-04-24T19:41:04.827600Z", + "iopub.status.busy": "2026-04-24T19:41:04.827532Z", + "iopub.status.idle": "2026-04-24T19:41:04.845102Z", + "shell.execute_reply": "2026-04-24T19:41:04.844453Z" } }, "outputs": [ @@ -1496,10 +1496,10 @@ "execution_count": 16, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.852641Z", - "iopub.status.busy": "2026-04-24T14:43:21.852489Z", - "iopub.status.idle": "2026-04-24T14:43:21.858264Z", - "shell.execute_reply": "2026-04-24T14:43:21.857564Z" + "iopub.execute_input": "2026-04-24T19:41:04.846409Z", + "iopub.status.busy": "2026-04-24T19:41:04.846329Z", + "iopub.status.idle": "2026-04-24T19:41:04.849596Z", + "shell.execute_reply": "2026-04-24T19:41:04.849069Z" } }, "outputs": [ @@ -1578,10 +1578,10 @@ "execution_count": 17, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:21.860722Z", - "iopub.status.busy": "2026-04-24T14:43:21.860573Z", - "iopub.status.idle": "2026-04-24T14:43:23.716890Z", - "shell.execute_reply": "2026-04-24T14:43:23.715860Z" + "iopub.execute_input": "2026-04-24T19:41:04.850543Z", + "iopub.status.busy": "2026-04-24T19:41:04.850477Z", + "iopub.status.idle": "2026-04-24T19:41:06.771968Z", + "shell.execute_reply": "2026-04-24T19:41:06.771452Z" } }, "outputs": [ @@ -1704,10 +1704,10 @@ "execution_count": 18, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:23.720264Z", - "iopub.status.busy": "2026-04-24T14:43:23.720050Z", - "iopub.status.idle": "2026-04-24T14:43:36.283412Z", - "shell.execute_reply": "2026-04-24T14:43:36.281640Z" + "iopub.execute_input": "2026-04-24T19:41:06.773885Z", + "iopub.status.busy": "2026-04-24T19:41:06.773764Z", + "iopub.status.idle": "2026-04-24T19:41:16.658981Z", + "shell.execute_reply": "2026-04-24T19:41:16.657161Z" } }, "outputs": [ @@ -1810,10 +1810,10 @@ "execution_count": 19, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:43:36.287008Z", - "iopub.status.busy": "2026-04-24T14:43:36.286734Z", - "iopub.status.idle": "2026-04-24T14:44:32.839792Z", - "shell.execute_reply": "2026-04-24T14:44:32.838432Z" + "iopub.execute_input": "2026-04-24T19:41:16.662448Z", + "iopub.status.busy": "2026-04-24T19:41:16.662175Z", + "iopub.status.idle": "2026-04-24T19:42:14.821967Z", + "shell.execute_reply": "2026-04-24T19:42:14.821551Z" } }, "outputs": [ @@ -2060,10 +2060,10 @@ "execution_count": 20, "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:44:32.844484Z", - "iopub.status.busy": "2026-04-24T14:44:32.844131Z", - "iopub.status.idle": "2026-04-24T14:44:32.883150Z", - "shell.execute_reply": "2026-04-24T14:44:32.881786Z" + "iopub.execute_input": "2026-04-24T19:42:14.823252Z", + "iopub.status.busy": "2026-04-24T19:42:14.823170Z", + "iopub.status.idle": "2026-04-24T19:42:14.833239Z", + "shell.execute_reply": "2026-04-24T19:42:14.832906Z" } }, "outputs": [ diff --git a/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb index f75f53d3..ce451608 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_optimize_trace.ipynb @@ -16,10 +16,10 @@ "id": "a6bb3b02", "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:35:00.443219Z", - "iopub.status.busy": "2026-04-24T14:35:00.442806Z", - "iopub.status.idle": "2026-04-24T14:35:02.576070Z", - "shell.execute_reply": "2026-04-24T14:35:02.575019Z" + "iopub.execute_input": "2026-04-24T19:40:52.663869Z", + "iopub.status.busy": "2026-04-24T19:40:52.663624Z", + "iopub.status.idle": "2026-04-24T19:40:53.651362Z", + "shell.execute_reply": "2026-04-24T19:40:53.650922Z" } }, "outputs": [], @@ -38,10 +38,10 @@ "id": "68f6f76b", "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:35:02.579394Z", - "iopub.status.busy": "2026-04-24T14:35:02.579226Z", - "iopub.status.idle": "2026-04-24T14:35:02.585833Z", - "shell.execute_reply": "2026-04-24T14:35:02.584731Z" + "iopub.execute_input": "2026-04-24T19:40:53.653337Z", + "iopub.status.busy": "2026-04-24T19:40:53.653255Z", + "iopub.status.idle": "2026-04-24T19:40:53.656633Z", + "shell.execute_reply": "2026-04-24T19:40:53.656276Z" } }, "outputs": [], @@ -75,10 +75,10 @@ "id": "9cb6347f", "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:35:02.588275Z", - "iopub.status.busy": "2026-04-24T14:35:02.588124Z", - "iopub.status.idle": "2026-04-24T14:35:02.601284Z", - "shell.execute_reply": "2026-04-24T14:35:02.600466Z" + "iopub.execute_input": "2026-04-24T19:40:53.658031Z", + "iopub.status.busy": "2026-04-24T19:40:53.657966Z", + "iopub.status.idle": "2026-04-24T19:40:53.664281Z", + "shell.execute_reply": "2026-04-24T19:40:53.664004Z" } }, "outputs": [ @@ -113,10 +113,10 @@ "id": "6f15abf5", "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T14:35:02.603861Z", - "iopub.status.busy": "2026-04-24T14:35:02.603708Z", - "iopub.status.idle": "2026-04-24T14:35:02.624949Z", - "shell.execute_reply": "2026-04-24T14:35:02.624346Z" + "iopub.execute_input": "2026-04-24T19:40:53.665572Z", + "iopub.status.busy": "2026-04-24T19:40:53.665506Z", + "iopub.status.idle": "2026-04-24T19:40:53.675693Z", + "shell.execute_reply": "2026-04-24T19:40:53.675513Z" } }, "outputs": [ diff --git a/opto/features/graph/adapter.py b/opto/features/graph/adapter.py index 276fec16..64ded862 100644 --- a/opto/features/graph/adapter.py +++ b/opto/features/graph/adapter.py @@ -3,8 +3,11 @@ from __future__ import annotations import contextlib +import contextvars import json import inspect +import threading +import types from dataclasses import dataclass, field from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union @@ -44,6 +47,93 @@ def _trainable_attr(value: Any, *, default: bool = True) -> bool: return bool(getattr(value, "trainable", default)) +def _runtime_binding_name(value: Any) -> Optional[str]: + """Return the base binding name for a prompt/knob-like object.""" + raw_name = getattr(value, "name", None) or getattr(value, "_name", None) + if raw_name is None: + return None + return str(raw_name).split(":")[0].split("/")[-1] + + +def _make_closure_cell(value: Any): + """Create a fresh closure cell holding ``value``.""" + return (lambda x: lambda: x)(value).__closure__[0] + + +def _rewrite_runtime_value( + value: Any, + replacements: Mapping[str, Any], + memo: Dict[int, Any], +): + """Rebind prompt/knob references and nested helper functions.""" + name = _runtime_binding_name(value) + if name and name in replacements: + return replacements[name] + if inspect.isfunction(value): + return _rebind_runtime_function(value, replacements, memo) + return value + + +def _rebind_runtime_function( + fn: Callable[..., Any], + replacements: Mapping[str, Any], + memo: Optional[Dict[int, Any]] = None, +) -> Callable[..., Any]: + """Clone a function so closures/globals point at adapter-local runtime values.""" + if not inspect.isfunction(fn): + return fn + if memo is None: + memo = {} + cached = memo.get(id(fn), ...) + if cached is not ...: + return fn if cached is None else cached + + memo[id(fn)] = None + globals_copy = dict(fn.__globals__) + for name in fn.__code__.co_names: + if name in replacements: + globals_copy[name] = replacements[name] + continue + if name in globals_copy: + globals_copy[name] = _rewrite_runtime_value(globals_copy[name], replacements, memo) + + defaults = getattr(fn, "__defaults__", None) + if defaults: + defaults = tuple(_rewrite_runtime_value(value, replacements, memo) for value in defaults) + + kwdefaults = getattr(fn, "__kwdefaults__", None) + if kwdefaults: + kwdefaults = { + key: _rewrite_runtime_value(value, replacements, memo) + for key, value in kwdefaults.items() + } + + closure = None + if fn.__closure__: + closure = tuple( + _make_closure_cell(_rewrite_runtime_value(cell.cell_contents, replacements, memo)) + for cell in fn.__closure__ + ) + + rebound = types.FunctionType( + fn.__code__, + globals_copy, + name=fn.__name__, + argdefs=defaults, + closure=closure, + ) + rebound.__dict__.update(getattr(fn, "__dict__", {})) + rebound.__kwdefaults__ = kwdefaults + rebound.__annotations__ = dict(getattr(fn, "__annotations__", {})) + rebound.__qualname__ = fn.__qualname__ + rebound.__module__ = fn.__module__ + rebound.__doc__ = fn.__doc__ + rebound.__name__ = fn.__name__ + rebound.__globals__[fn.__name__] = rebound + memo[id(fn)] = rebound + return rebound + + def _normalize_named_callables( targets: Union[None, List[str], List[Callable[..., Any]], Mapping[str, Callable[..., Any]]], scope: Optional[Dict[str, Any]] = None, @@ -185,32 +275,25 @@ def __post_init__(self) -> None: self.function_targets = _normalize_named_callables(self.function_targets, self.scope) self.prompt_targets = {k: _as_parameter(k, v) for k, v in dict(self.prompt_targets or {}).items()} self.graph_knobs = {k: _as_parameter(k, v) for k, v in dict(self.graph_knobs or {}).items()} - self._active_sidecar: Optional[GraphRunSidecar] = None - self._compiled_cache: Dict[Tuple[str, Tuple[Tuple[str, str], ...]], Any] = {} - self._original_functions = dict(self.function_targets) - self._traced_functions = { - name: (fn if isinstance(fn, FunModule) or hasattr(fn, "_fun") else bundle( - trainable=self.train_graph_agents_functions, - traceable_code=True, - allow_external_dependencies=True, - )(fn)) - for name, fn in self.function_targets.items() - } - for fn in self._original_functions.values(): - fn_globals = getattr(fn, "__globals__", {}) - for name, prompt in self.prompt_targets.items(): - fn_globals[name] = prompt - for name, knob in self.graph_knobs.items(): - fn_globals[name] = knob - self._build_bindings() + self._user_bindings = dict(getattr(self, "_user_bindings", {}) or self.bindings or {}) + self._refresh_runtime_state() def __getstate__(self): """Drop transient runtime state so the adapter remains pickle-friendly.""" state = self.__dict__.copy() state["_active_sidecar"] = None + state["_active_sidecar_var"] = None + state["_build_lock"] = None state["_compiled_cache"] = {} return state + def __setstate__(self, state): + """Rebuild transient runtime wiring after deepcopy/pickle restore.""" + self.__dict__.update(state) + self._active_sidecar = None + self._user_bindings = dict(getattr(self, "_user_bindings", {}) or {}) + self._refresh_runtime_state() + def instrument(self, backend: Optional[str] = None, **kwargs: Any): """Wrap the adapter, keeping OTEL graph knobs live across invocations.""" effective_backend = backend or self.backend @@ -259,10 +342,69 @@ def _build_bindings(self) -> None: set=lambda v, p=code_param: p._set(v), kind="code", ) - user = dict(self.bindings) + user = dict(getattr(self, "_user_bindings", {}) or {}) auto.update(user) self.bindings = auto + def _runtime_replacements(self) -> Dict[str, Any]: + """Return the adapter-local runtime values used by cloned callables.""" + replacements = dict(self.prompt_targets) + replacements.update(self.graph_knobs) + return replacements + + def _refresh_funmodule_namespace( + self, + traced_fn: Any, + replacements: Mapping[str, Any], + memo: Dict[int, Any], + ) -> Any: + """Refresh a traced function so dynamic code sees adapter-local runtime values.""" + if hasattr(traced_fn, "_fun") and inspect.isfunction(traced_fn._fun): + traced_fn._fun = _rebind_runtime_function(traced_fn._fun, replacements, memo) + + existing_ldict = dict(getattr(traced_fn, "_ldict", {}) or {}) + refreshed_ldict = { + key: _rewrite_runtime_value(value, replacements, memo) + for key, value in existing_ldict.items() + } + refreshed_ldict.update(replacements) + traced_fn._ldict = refreshed_ldict + return traced_fn + + def _refresh_runtime_state(self) -> None: + """Rebuild adapter-local callables, bindings, and transient runtime state.""" + self._active_sidecar = None + self._active_sidecar_var = contextvars.ContextVar( + f"graph_adapter_active_sidecar_{id(self)}", + default=None, + ) + self._build_lock = threading.RLock() + self._compiled_cache = {} + + replacements = self._runtime_replacements() + memo: Dict[int, Any] = {} + original_functions: Dict[str, Callable[..., Any]] = {} + traced_functions: Dict[str, Any] = {} + + for name, fn in self.function_targets.items(): + runtime_fn = getattr(fn, "_fun", None) if isinstance(fn, FunModule) or hasattr(fn, "_fun") else fn + runtime_fn = _rebind_runtime_function(runtime_fn, replacements, memo) + original_functions[name] = runtime_fn + if isinstance(fn, FunModule) or hasattr(fn, "_fun"): + traced = self._refresh_funmodule_namespace(fn, replacements, memo) + else: + traced = bundle( + trainable=self.train_graph_agents_functions, + traceable_code=True, + allow_external_dependencies=True, + )(runtime_fn) + traced = self._refresh_funmodule_namespace(traced, replacements, memo) + traced_functions[name] = traced + + self._original_functions = original_functions + self._traced_functions = traced_functions + self._build_bindings() + def parameters(self) -> List[ParameterNode]: """Collect the unique trainable parameters owned by the adapter.""" params: List[ParameterNode] = [] @@ -306,6 +448,8 @@ def _scope_override(self, overrides: Dict[str, Any]): for key in overrides: if key in backup: self.scope[key] = backup[key] + else: + self.scope.pop(key, None) def _merge_shadow(self, sidecar: GraphRunSidecar, runtime_out: Any, traced_out: Any) -> None: """Merge traced outputs back into the sidecar's shadow state.""" @@ -325,9 +469,9 @@ def _trace_runtime_wrapper(self, name: str, traced_fn: FunModule): """Wrap a traced function so runtime execution still updates Trace state.""" def _wrapped(state: Dict[str, Any], *args: Any, **kwargs: Any): """Replay shadow inputs through the traced callable for one graph node.""" - if self._active_sidecar is None: + sidecar = self._active_sidecar_var.get() + if sidecar is None: raise RuntimeError("Trace runtime wrapper called without active sidecar") - sidecar = self._active_sidecar trace_state = dict(state) for key, traced_value in sidecar.shadow_state.items(): trace_state[key] = traced_value @@ -403,19 +547,20 @@ def _otel_runtime_wrapper(self, name: str, fn: Callable[..., Any]): old LangGraph+OTEL prototype emitted manually inside each node. """ def _wrapped(state: Any, *args: Any, **kwargs: Any) -> Any: + runtime_state = state + if isinstance(state, Mapping): + runtime_state = dict(state) + runtime_state.update(self._knob_values()) try: from opto.trace.io.telemetry_session import TelemetrySession except Exception: - return fn(state, *args, **kwargs) + runtime_fn = self._resolve_otel_runtime_fn(name, fn) + return runtime_fn(runtime_state, *args, **kwargs) session = TelemetrySession.current() if session is None: - return fn(state, *args, **kwargs) - - runtime_state = state - if isinstance(state, Mapping): - runtime_state = dict(state) - runtime_state.update(self._knob_values()) + runtime_fn = self._resolve_otel_runtime_fn(name, fn) + return runtime_fn(runtime_state, *args, **kwargs) with session.tracer.start_as_current_span(name) as span: span.set_attribute("message.id", name) @@ -443,34 +588,35 @@ def build_graph(self, backend: Optional[str] = None): """Build, compile, and cache the graph for ``trace`` or ``otel`` execution.""" effective_backend = backend or self.backend key = self._cache_key(effective_backend) - if key in self._compiled_cache: - return self._compiled_cache[key] - - if effective_backend == "trace": - fn_overrides = { - name: self._trace_runtime_wrapper(name, fn) - for name, fn in self._traced_functions.items() - } - elif effective_backend == "otel": - fn_overrides = { - name: self._otel_runtime_wrapper(name, fn) - for name, fn in self._original_functions.items() - } - else: - raise ValueError(f"Unsupported backend: {effective_backend!r}") - - call_kwargs = dict(self._knob_values()) - sig = inspect.signature(self.graph_factory) - for name, fn in fn_overrides.items(): - if name in sig.parameters: - call_kwargs[name] = fn - - with self._scope_override({**fn_overrides, **call_kwargs}): - graph = self.graph_factory(**{k: v for k, v in call_kwargs.items() if k in sig.parameters}) - - compiled = graph.compile() if hasattr(graph, "compile") else graph - self._compiled_cache[key] = compiled - return compiled + with self._build_lock: + if key in self._compiled_cache: + return self._compiled_cache[key] + + if effective_backend == "trace": + fn_overrides = { + name: self._trace_runtime_wrapper(name, fn) + for name, fn in self._traced_functions.items() + } + elif effective_backend == "otel": + fn_overrides = { + name: self._otel_runtime_wrapper(name, fn) + for name, fn in self._original_functions.items() + } + else: + raise ValueError(f"Unsupported backend: {effective_backend!r}") + + call_kwargs = dict(self._knob_values()) + sig = inspect.signature(self.graph_factory) + for name, fn in fn_overrides.items(): + if name in sig.parameters: + call_kwargs[name] = fn + + with self._scope_override({**fn_overrides, **call_kwargs}): + graph = self.graph_factory(**{k: v for k, v in call_kwargs.items() if k in sig.parameters}) + + compiled = graph.compile() if hasattr(graph, "compile") else graph + self._compiled_cache[key] = compiled + return compiled def invoke_runtime(self, state: Dict[str, Any], backend: Optional[str] = None, **kwargs: Any): """Run the adapter using the runtime backend selected for this call.""" @@ -491,7 +637,13 @@ def invoke_trace(self, state: Dict[str, Any], **kwargs: Any): sidecar.shadow_state[key] = value if isinstance(value, Node) else node(value, name=key) for key, value in self.graph_knobs.items(): sidecar.shadow_state[key] = value + for key, binding in self.bindings.items(): + try: + sidecar.binding_snapshot[key] = binding.get() + except Exception: + sidecar.binding_snapshot[key] = "" + token = self._active_sidecar_var.set(sidecar) self._active_sidecar = sidecar runtime_state = dict(state) runtime_state.update(self._knob_values()) @@ -499,6 +651,7 @@ def invoke_trace(self, state: Dict[str, Any], **kwargs: Any): graph = self.build_graph(backend="trace") result = graph.invoke(runtime_state, **kwargs) finally: + self._active_sidecar_var.reset(token) self._active_sidecar = None output_node = None diff --git a/opto/features/graph/sidecars.py b/opto/features/graph/sidecars.py index d97eeb50..52876cfd 100644 --- a/opto/features/graph/sidecars.py +++ b/opto/features/graph/sidecars.py @@ -3,7 +3,17 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List + + +@dataclass +class GraphNodeEvent: + """Append-only record for one node execution within a graph run.""" + + node_name: str + occurrence: int + traced_output: Any + runtime_value: Any = None @dataclass @@ -11,7 +21,9 @@ class GraphRunSidecar: """Per-run sidecar preserving optimization state alongside runtime outputs.""" node_outputs: Dict[str, Any] = field(default_factory=dict) + node_events: List[GraphNodeEvent] = field(default_factory=list) shadow_state: Dict[str, Any] = field(default_factory=dict) + binding_snapshot: Dict[str, Any] = field(default_factory=dict) output_node: Any | None = None runtime_result: Any | None = None @@ -22,6 +34,15 @@ def record_node_output( runtime_value: Any = None, ) -> None: """Store the traced node output and any dict-shaped runtime shadow state.""" + occurrence = sum(1 for event in self.node_events if event.node_name == node_name) + self.node_events.append( + GraphNodeEvent( + node_name=node_name, + occurrence=occurrence, + traced_output=traced_output, + runtime_value=runtime_value, + ) + ) self.node_outputs[node_name] = traced_output if runtime_value is not None and isinstance(runtime_value, dict): self.shadow_state.update(runtime_value) @@ -34,10 +55,23 @@ def set_output(self, output_node: Any, runtime_result: Any) -> None: def clear(self) -> None: """Reset the sidecar for reuse in tests or debugging flows.""" self.node_outputs.clear() + self.node_events.clear() self.shadow_state.clear() + self.binding_snapshot.clear() self.output_node = None self.runtime_result = None + def to_record(self) -> Dict[str, Any]: + """Return a snapshot-friendly representation of the sidecar state.""" + return { + "node_outputs": dict(self.node_outputs), + "node_events": list(self.node_events), + "shadow_state": dict(self.shadow_state), + "binding_snapshot": dict(self.binding_snapshot), + "output_node": self.output_node, + "runtime_result": self.runtime_result, + } + @dataclass class OTELRunSidecar: diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index f9c4accf..53d557a2 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -104,6 +104,7 @@ class RunResult: feedback: str metrics: Dict[str, Any] otlp: Dict[str, Any] + artifacts: Dict[str, Any] = field(default_factory=dict) @dataclass @@ -707,6 +708,9 @@ def _extract_output(result: Any, sidecar: Any = None) -> Tuple[Any, Any]: feedback=er.feedback, metrics=er.metrics, otlp={}, + artifacts={ + "trace_record": sidecar.to_record() if sidecar is not None else None, + }, ) ) output_nodes.append(output_node) diff --git a/tests/unit_tests/test_graph_adapter_modulecandidate.py b/tests/unit_tests/test_graph_adapter_modulecandidate.py index 7dd68ea4..0098b448 100644 --- a/tests/unit_tests/test_graph_adapter_modulecandidate.py +++ b/tests/unit_tests/test_graph_adapter_modulecandidate.py @@ -68,3 +68,20 @@ def test_modulecandidate_get_module_works_with_graphmodule(): assert getattr(new_model.adapter, "_compiled_cache", {}) == {} out = new_model("What is CRISPR?") assert isinstance(out.data, str) + + +def test_modulecandidate_prompt_update_changes_runtime_without_mutating_base_module(): + model = make_searchable_model() + optimizer = DummyOptimizer(model.parameters()) + prompt_param = next(p for p in model.parameters() if "answer_prompt" in p.name) + candidate = ModuleCandidate( + model, + update_dict={prompt_param: "Updated: {query}"}, + optimizer=optimizer, + ) + + new_model = candidate.get_module() + out = new_model("What is CRISPR?") + + assert out.data == "Updated: What is CRISPR?" + assert model.adapter.prompt_targets["answer_prompt"].data == "Base: {query}" diff --git a/tests/unit_tests/test_graph_adapter_trace.py b/tests/unit_tests/test_graph_adapter_trace.py index 086064a2..3a4feaa7 100644 --- a/tests/unit_tests/test_graph_adapter_trace.py +++ b/tests/unit_tests/test_graph_adapter_trace.py @@ -1,3 +1,7 @@ +import copy +import time +from concurrent.futures import ThreadPoolExecutor + import pytest langgraph = pytest.importorskip("langgraph.graph") @@ -104,6 +108,91 @@ def test_bindings_are_auto_generated_and_transparent(): assert adapter.graph_knobs["route_policy"].data == "alternate" +def test_deepcopy_adapter_bindings_target_clone_state(): + adapter = make_adapter() + clone = copy.deepcopy(adapter) + + clone.bindings["planner_prompt"].set("Clone plan: {query}") + clone.bindings["route_policy"].set("review") + + assert clone.prompt_targets["planner_prompt"].data == "Clone plan: {query}" + assert clone.graph_knobs["route_policy"].data == "review" + assert adapter.prompt_targets["planner_prompt"].data == "Plan: {query}" + assert adapter.graph_knobs["route_policy"].data == "direct" + + +def test_deepcopy_adapter_runtime_uses_clone_prompt_targets(): + adapter = make_adapter() + clone = copy.deepcopy(adapter) + + clone.prompt_targets["planner_prompt"]._set("Clone plan: {query}") + clone.prompt_targets["synth_prompt"]._set("Clone answer: {query} :: {plan}") + + result, sidecar = clone.invoke_trace({"query": "CRISPR"}) + + assert result["final_answer"] == "Clone answer: CRISPR :: Clone plan: CRISPR" + assert sidecar.output_node.data == result["final_answer"] + assert adapter.prompt_targets["planner_prompt"].data == "Plan: {query}" + assert adapter.prompt_targets["synth_prompt"].data == "Answer: {query} :: {plan}" + + +def test_parallel_invoke_trace_keeps_sidecars_isolated(): + planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") + synth_prompt = node("Answer: {query} :: {plan}", trainable=True, name="synth_prompt") + + def planner_node(state): + time.sleep(0.05) + query = _raw(state["query"]) + return { + "query": query, + "plan": planner_prompt.data.replace("{query}", str(query)), + } + + def synth_node(state): + time.sleep(0.05) + query = _raw(state["query"]) + plan = _raw(state["plan"]) + answer = synth_prompt.data.replace("{query}", str(query)).replace("{plan}", str(plan)) + return {"final_answer": answer} + + def build_graph(planner_node=planner_node, synth_node=synth_node, route_policy="direct"): + graph = StateGraph(dict) + graph.add_node("planner", planner_node) + graph.add_node("synth", synth_node) + graph.add_edge(START, "planner") + graph.add_edge("planner", "synth") + graph.add_edge("synth", END) + return graph + + adapter = LangGraphAdapter( + backend="trace", + graph_factory=build_graph, + function_targets={"planner_node": planner_node, "synth_node": synth_node}, + prompt_targets={"planner_prompt": planner_prompt, "synth_prompt": synth_prompt}, + graph_knobs={"route_policy": "direct"}, + input_key="query", + output_key="final_answer", + ) + + def run(query): + result, sidecar = adapter.invoke_trace({"query": query}) + return { + "query": query, + "answer": result["final_answer"], + "shadow_query": _raw(sidecar.shadow_state["query"]), + "shadow_plan": _raw(sidecar.shadow_state["plan"]), + } + + with ThreadPoolExecutor(max_workers=2) as executor: + runs = list(executor.map(run, ["A", "B"])) + + answers = {item["query"]: item["answer"] for item in runs} + assert answers["A"] == "Answer: A :: Plan: A" + assert answers["B"] == "Answer: B :: Plan: B" + assert {item["query"]: item["shadow_query"] for item in runs} == {"A": "A", "B": "B"} + assert {item["query"]: item["shadow_plan"] for item in runs} == {"A": "Plan: A", "B": "Plan: B"} + + def test_instrument_graph_accepts_adapter_in_trace_mode_and_optimize_graph_uses_sidecar(): adapter = make_adapter() graph = instrument_graph(adapter=adapter, backend="trace", output_key="final_answer") @@ -119,6 +208,7 @@ def test_instrument_graph_accepts_adapter_in_trace_mode_and_optimize_graph_uses_ ) assert result.best_iteration == 0 assert result.best_score == 1.0 + assert result.all_runs[0][0].artifacts["trace_record"]["output_node"] is not None def test_instrument_graph_accepts_graph_argument_when_it_is_a_graph_adapter(): diff --git a/tests/unit_tests/test_optimization.py b/tests/unit_tests/test_optimization.py index a91e3b05..7bf80a52 100644 --- a/tests/unit_tests/test_optimization.py +++ b/tests/unit_tests/test_optimization.py @@ -68,6 +68,7 @@ def test_fields(self): ) assert rr.answer == "hello" assert rr.score == 0.8 + assert rr.artifacts == {} class TestOptimizationResult: From 322cb03370b13a0782cc838b1bca9d1a2bb91c38 Mon Sep 17 00:00:00 2001 From: doxav Date: Sat, 25 Apr 2026 22:38:14 +0200 Subject: [PATCH 14/16] stabilize demo of multi traces --- ...aph_instrument_and_compare_observers.ipynb | 2410 ++++++++++++----- ...ggraph_instrument_and_compare_observers.py | 178 +- opto/optimizers/optoprime.py | 6 +- opto/trace/io/instrumentation.py | 6 + opto/trace/io/observers.py | 14 + opto/trace/io/optimization.py | 11 +- opto/trace/io/sysmonitoring.py | 45 +- pyproject.toml | 7 +- tests/features_tests/test_sysmon_backend.py | 168 ++ tests/unit_tests/test_graph_observers.py | 34 + 10 files changed, 2140 insertions(+), 739 deletions(-) diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb index 897c5e3b..e2cf5362 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -22,10 +22,10 @@ "id": "run-live-compare-script", "metadata": { "execution": { - "iopub.execute_input": "2026-04-24T19:42:20.930373Z", - "iopub.status.busy": "2026-04-24T19:42:20.930178Z", - "iopub.status.idle": "2026-04-24T19:49:11.855790Z", - "shell.execute_reply": "2026-04-24T19:49:11.855292Z" + "iopub.execute_input": "2026-04-25T19:46:39.281438Z", + "iopub.status.busy": "2026-04-25T19:46:39.281191Z", + "iopub.status.idle": "2026-04-25T19:49:41.137432Z", + "shell.execute_reply": "2026-04-25T19:49:41.136547Z" }, "language": "python" }, @@ -35,15 +35,17 @@ "text/markdown": [ "## Optimization comparison\n", "\n", - "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | score_history |\n", - "|---|---:|---:|---:|---:|---:|---:|---|\n", - "| trace | 54.513 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| trace+otel | 53.619 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| otel | 54.604 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| trace+sysmon | 54.634 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| trace+otel+sysmon | 53.552 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| otel+sysmon | 54.761 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |\n", - "| sysmon | 53.108 | 0.842 | 0.876 | 0.035 | 3 | 0.000 | [0.842, 0.842, 0.863, 0.876, 0.876, 0.876] |" + "_Topology metrics remain useful even when score trajectories match, for example under the fixed offline prompt schedule._\n", + "\n", + "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | node_count | edge_count | score_history |\n", + "|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n", + "| trace | 22.643 | 0.733 | 0.803 | 0.070 | 2 | 0.000 | 9 | 6 | [0.733, 0.733, 0.803] |\n", + "| trace+otel | 20.681 | 0.733 | 0.733 | 0.000 | 0 | 0.000 | 9 | 6 | [0.733, 0.733, 0.677] |\n", + "| otel | 21.909 | 0.733 | 0.837 | 0.103 | 2 | 0.000 | 6 | 5 | [0.733, 0.733, 0.837] |\n", + "| trace+sysmon | 18.639 | 0.733 | 0.733 | 0.000 | 0 | 0.000 | 9 | 6 | [0.733, 0.733, 0.65] |\n", + "| trace+otel+sysmon | 22.479 | 0.733 | 0.733 | 0.000 | 0 | 0.000 | 9 | 6 | [0.733, 0.733, 0.677] |\n", + "| otel+sysmon | 21.463 | 0.733 | 0.915 | 0.182 | 2 | 0.000 | 6 | 5 | [0.733, 0.733, 0.915] |\n", + "| sysmon | 20.267 | 0.733 | 0.854 | 0.121 | 2 | 0.000 | 4 | 3 | [0.733, 0.733, 0.854] |" ], "text/plain": [ "" @@ -56,26 +58,30 @@ "data": { "text/markdown": [ "## trace\n", - "- Runtime: `54.513s`\n", - "- Baseline score: `0.842`\n", - "- Best score: `0.876`\n", - "- Score gain: `0.035`\n", - "- Best iteration: `3`\n", + "- Runtime: `22.643s`\n", + "- Baseline score: `0.733`\n", + "- Best score: `0.803`\n", + "- Score gain: `0.070`\n", + "- Best iteration: `2`\n", "- Post-update stability std: `0.000`\n", - "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", + "- Node count: `9`\n", + "- Edge count: `6`\n", + "- Score history: `[0.733, 0.733, 0.803]`\n", "- Best updates: `['synth_prompt']`\n", "\n", "### Final synth prompt\n", "```text\n", - "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", - "Plan: {plan}\n", + "Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represented as a distinct part of the summary.\n", + "\n", + "Query: {query}\n", + "Source Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, origins, and the mechanisms that make it a transformative tool in modern science.\n", "\n", - "### **Mechanism: Molecular Scissors**\n", - "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", + "### **1. The Definition (The \"What\")**\n", + "CRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. In practical terms, it is often described as \"molecular scissors\" or a \"search-and-replace\" function for the genome. The ...\n", "```" ], "text/plain": [ @@ -90,13 +96,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node3_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"synth_prompt\": \"Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represente...\"\n", "}\n", "```" ], @@ -116,67 +122,150 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "node_0\n", + "\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", "\n", - "\n", + "\n", + "\n", "\n", - "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_1\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "state->make_trace_case.planner_node\n", - "\n", - "\n", + "node_0->node_1\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d95ddc0>}\n", + "node_2\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea890d3b30>}\n", + "\n", "\n", - "\n", + "\n", + "\n", "\n", - "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "node_1->node_2\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_3\n", + "\n", + "\n", + "make_trace_case.planner_node3_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5\n", + "\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3->node_5\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "node_4\n", + "\n", + "\n", + "str\n", + "[value]\n", + "plan\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4->node_5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_6\n", + "\n", + "\n", + "final_answer_node\n", + "[message]\n", + "CRISPR is a revolutionary gene-editing technology that allows scientists to m...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5->node_6\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_7\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Provide a direct, factual summary for the following query. Organize the infor...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8->node_6\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -186,26 +275,29 @@ "data": { "text/markdown": [ "## trace+otel\n", - "- Runtime: `53.619s`\n", - "- Baseline score: `0.842`\n", - "- Best score: `0.876`\n", - "- Score gain: `0.035`\n", - "- Best iteration: `3`\n", + "- Runtime: `20.681s`\n", + "- Baseline score: `0.733`\n", + "- Best score: `0.733`\n", + "- Score gain: `0.000`\n", + "- Best iteration: `0`\n", "- Post-update stability std: `0.000`\n", - "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", - "- Best updates: `['synth_prompt']`\n", + "- Node count: `9`\n", + "- Edge count: `6`\n", + "- Score history: `[0.733, 0.733, 0.677]`\n", + "- Best updates: `[]`\n", "\n", "### Final synth prompt\n", "```text\n", - "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\n", + "\n", + "Query: {query}\n", "Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", + "**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace\" tool for DNA. It consists of two primary components: **Cas9**, an enzyme that cuts the DNA strands, and **Guide RNA (gRNA)**, a programmed sequence that directs the Cas9 enzyme to a specific location in the genome.\n", "\n", - "### **Mechanism: Molecular Scissors**\n", - "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", + "The system was adapted from a naturally occurring immune mechanism in bacteria, which use CRISPR seque...\n", "```" ], "text/plain": [ @@ -220,13 +312,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node7_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"synth_prompt\": \"Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...\"\n", "}\n", "```" ], @@ -246,67 +338,150 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "node_0\n", + "\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_1\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", "\n", - "\n", + "\n", + "\n", "\n", - "state->make_trace_case.planner_node\n", - "\n", - "\n", + "node_0->node_1\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d794fe0>}\n", + "node_2\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea882850a0>}\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "node_1->node_2\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_3\n", + "\n", + "\n", + "make_trace_case.planner_node7_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", "\n", - "\n", + "\n", + "\n", + "\n", + "node_5\n", + "\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3->node_5\n", + "\n", + "\n", + "\n", + "\n", "\n", - "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "node_4\n", + "\n", + "\n", + "str\n", + "[value]\n", + "plan\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4->node_5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_6\n", + "\n", + "\n", + "final_answer_node\n", + "[message]\n", + "**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a r...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5->node_6\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_7\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Provide a direct, factual explanation of the topic based on the provided plan...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8->node_6\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -316,14 +491,17 @@ "data": { "text/markdown": [ "### observer otel\n", - "- Semantic message names: `[]`\n", - "- All message names: `[]`\n", - "- Parameter names: `[]`\n", - "- Span count: `0`\n", - "- Span names: `[]`\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Span count: `2`\n", + "- Span names: `['planner_node', 'synth_node']`\n", "\n", "```json\n", - "{}\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...\"\n", + "}\n", "```" ], "text/plain": [ @@ -342,15 +520,77 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': 'planner_node'}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0->node_1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': 'synth_node'}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Provide a direct, factual explanation of the topic based on the provided plan...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2->node_3\n", + "\n", + "\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -360,26 +600,35 @@ "data": { "text/markdown": [ "## otel\n", - "- Runtime: `54.604s`\n", - "- Baseline score: `0.842`\n", - "- Best score: `0.876`\n", - "- Score gain: `0.035`\n", - "- Best iteration: `3`\n", + "- Runtime: `21.909s`\n", + "- Baseline score: `0.733`\n", + "- Best score: `0.837`\n", + "- Score gain: `0.103`\n", + "- Best iteration: `2`\n", "- Post-update stability std: `0.000`\n", - "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", - "- Best updates: `['synth_prompt']`\n", + "- Node count: `6`\n", + "- Edge count: `5`\n", + "- Score history: `[0.733, 0.733, 0.837]`\n", + "- Best updates: `['planner_prompt', 'synth_prompt']`\n", "\n", "### Final synth prompt\n", "```text\n", - "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", - "Plan: {plan}\n", + "You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \n", + "\n", + "Follow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\n", + "\n", + "Plan:\n", + "{plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", + "### **I. Definition and Historical Context**\n", + "\n", + "#### **A. Technical Definition**\n", + "CRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. To visualize this, imagine a long sentence where the same word is repeated over and over, but between those repeats are unique \"spacer\" sequences. In biological terms, these are specific patterns of DNA nucleotides (the building blocks of life) found in the genomes of microorganisms.\n", "\n", - "### **Mechanism: Molecular Scissors**\n", - "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", + "#### **B. The Biological Origin**\n", + "While we now thin...\n", "```" ], "text/plain": [ @@ -401,8 +650,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"planner_prompt\": \"As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: {query}. The outline must include: 1) A clear definition and historical context, 2) The step-by-step bi...\",\n", + " \"synth_prompt\": \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your...\"\n", "}\n", "```" ], @@ -422,22 +671,111 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "None\n", - "\n", - "llm.chat.completion\n", - "[msg]\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "As an expert scientific strategist, create a comprehensive and structured out...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0->node_1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "You are a science communicator. Using the structured plan provided below, wri...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3->node_4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4->node_5\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -447,26 +785,27 @@ "data": { "text/markdown": [ "## trace+sysmon\n", - "- Runtime: `54.634s`\n", - "- Baseline score: `0.842`\n", - "- Best score: `0.876`\n", - "- Score gain: `0.035`\n", - "- Best iteration: `3`\n", + "- Runtime: `18.639s`\n", + "- Baseline score: `0.733`\n", + "- Best score: `0.733`\n", + "- Score gain: `0.000`\n", + "- Best iteration: `0`\n", "- Post-update stability std: `0.000`\n", - "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", - "- Best updates: `['synth_prompt']`\n", + "- Node count: `9`\n", + "- Edge count: `6`\n", + "- Score history: `[0.733, 0.733, 0.65]`\n", + "- Best updates: `[]`\n", "\n", "### Final synth prompt\n", "```text\n", - "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\n", + "\n", + "Query: {query}\n", "Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", - "\n", - "### **Mechanism: Molecular Scissors**\n", - "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", + "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or a search-and-replace tool for DNA. The system relies on two primary components: the Cas9 enzyme, which performs the physical cutting of the genetic material, and a guide RNA molecule that acts as a GPS to direct the enzyme to a precise location within the genome. This mechanism was originally adapted from a naturally o...\n", "```" ], "text/plain": [ @@ -481,13 +820,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node11_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"synth_prompt\": \"Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...\"\n", "}\n", "```" ], @@ -507,67 +846,150 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "node_0\n", + "\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", "\n", - "\n", + "\n", + "\n", "\n", - "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_1\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "state->make_trace_case.planner_node\n", - "\n", - "\n", + "node_0->node_1\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d95d3d0>}\n", + "node_2\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea882b28a0>}\n", + "\n", "\n", - "\n", + "\n", + "\n", "\n", - "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "node_1->node_2\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_3\n", + "\n", + "\n", + "make_trace_case.planner_node11_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5\n", + "\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3->node_5\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "node_4\n", + "\n", + "\n", + "str\n", + "[value]\n", + "plan\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4->node_5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_6\n", + "\n", + "\n", + "final_answer_node\n", + "[message]\n", + "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Re...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5->node_6\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_7\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Synthesize the following plan into a single, direct, and cohesive factual exp...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8->node_6\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -585,7 +1007,7 @@ "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"synth_prompt\": \"Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...\"\n", "}\n", "```" ], @@ -605,47 +1027,77 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "param:planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", "\n", - "\n", - "\n", - "param:synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:ba87846d33ed46da\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_2\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0->node_2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Synthesize the following plan into a single, direct, and cohesive factual exp...\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "msg:c4faa984c1eb466d\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28d95d3d0>}\n", + "node_3\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea882b28a0>}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2->node_3\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -655,26 +1107,29 @@ "data": { "text/markdown": [ "## trace+otel+sysmon\n", - "- Runtime: `53.552s`\n", - "- Baseline score: `0.842`\n", - "- Best score: `0.876`\n", - "- Score gain: `0.035`\n", - "- Best iteration: `3`\n", + "- Runtime: `22.479s`\n", + "- Baseline score: `0.733`\n", + "- Best score: `0.733`\n", + "- Score gain: `0.000`\n", + "- Best iteration: `0`\n", "- Post-update stability std: `0.000`\n", - "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", - "- Best updates: `['synth_prompt']`\n", + "- Node count: `9`\n", + "- Edge count: `6`\n", + "- Score history: `[0.733, 0.733, 0.677]`\n", + "- Best updates: `[]`\n", "\n", "### Final synth prompt\n", "```text\n", - "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\n", + "\n", + "Topic: {query}\n", "Plan: {plan}\n", + "\n", + "Direct Response:\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", - "\n", - "### **Mechanism: Molecular Scissors**\n", - "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", + "CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors\" or a \"search-and-replace\" function for the genome, allowing scientists to modify DNA with high precision. The system consists of two primary components: the Cas9 enzyme, which acts as the scissors to cut the DNA strand, and the Guide RNA (gRNA), which serves as a GPS to lead the enzyme to a specific genetic sequence....\n", "```" ], "text/plain": [ @@ -689,13 +1144,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['make_trace_case.planner_node', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node15_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"synth_prompt\": \"Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \\\"###\\\"), and do not use introductory or concluding remarks. Ensu...\"\n", "}\n", "```" ], @@ -715,67 +1170,150 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "state\n", - "\n", - "state\n", - "[value]\n", - "{'query': 'What is CRISPR?'}\n", + "node_0\n", + "\n", + "\n", + "state\n", + "[value]\n", + "{'query': 'What is CRISPR?'}\n", + "\n", "\n", - "\n", + "\n", + "\n", "\n", - "make_trace_case.planner_node\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_1\n", + "\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "state->make_trace_case.planner_node\n", - "\n", - "\n", + "node_0->node_1\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "make_trace_case.synth_node\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28ce7b170>}\n", + "node_2\n", + "\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea88285040>}\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "make_trace_case.planner_node->make_trace_case.synth_node\n", - "\n", - "\n", + "node_1->node_2\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_3\n", + "\n", + "\n", + "make_trace_case.planner_node15_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5\n", + "\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", "\n", - "\n", + "\n", + "\n", + "\n", + "node_3->node_5\n", + "\n", + "\n", + "\n", + "\n", "\n", - "synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", + "node_4\n", + "\n", + "\n", + "str\n", + "[value]\n", + "plan\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4->node_5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_6\n", + "\n", + "\n", + "final_answer_node\n", + "[message]\n", + "CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats a...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5->node_6\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_7\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Respond to the topic using the provided plan. Your response must be extremely...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_8->node_6\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -793,7 +1331,7 @@ "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"synth_prompt\": \"Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \\\"###\\\"), and do not use introductory or concluding remarks. Ensu...\"\n", "}\n", "```" ], @@ -813,47 +1351,77 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "param:planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", "\n", - "\n", - "\n", - "param:synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:94fd3f1208e2462c\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_2\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0->node_2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Respond to the topic using the provided plan. Your response must be extremely...\n", + "\n", "\n", - "\n", + "\n", + "\n", "\n", - "msg:111678fc76c944b1\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.Node object at 0x7fa28ce7b170>}\n", + "node_3\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea88285040>}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2->node_3\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -863,14 +1431,17 @@ "data": { "text/markdown": [ "### observer otel\n", - "- Semantic message names: `[]`\n", - "- All message names: `[]`\n", - "- Parameter names: `[]`\n", - "- Span count: `0`\n", - "- Span names: `[]`\n", + "- Semantic message names: `['planner_node', 'synth_node']`\n", + "- All message names: `['planner_node', 'synth_node']`\n", + "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", + "- Span count: `2`\n", + "- Span names: `['planner_node', 'synth_node']`\n", "\n", "```json\n", - "{}\n", + "{\n", + " \"planner_prompt\": \"Create a short plan for: {query}\",\n", + " \"synth_prompt\": \"Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \\\"###\\\"), and do not use introductory or concluding remarks. Ensu...\"\n", + "}\n", "```" ], "text/plain": [ @@ -889,15 +1460,77 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': 'planner_node'}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0->node_1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': 'synth_node'}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Respond to the topic using the provided plan. Your response must be extremely...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2->node_3\n", + "\n", + "\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -907,26 +1540,36 @@ "data": { "text/markdown": [ "## otel+sysmon\n", - "- Runtime: `54.761s`\n", - "- Baseline score: `0.842`\n", - "- Best score: `0.876`\n", - "- Score gain: `0.035`\n", - "- Best iteration: `3`\n", + "- Runtime: `21.463s`\n", + "- Baseline score: `0.733`\n", + "- Best score: `0.915`\n", + "- Score gain: `0.182`\n", + "- Best iteration: `2`\n", "- Post-update stability std: `0.000`\n", - "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", - "- Best updates: `['synth_prompt']`\n", + "- Node count: `6`\n", + "- Edge count: `5`\n", + "- Score history: `[0.733, 0.733, 0.915]`\n", + "- Best updates: `['planner_prompt', 'synth_prompt']`\n", "\n", "### Final synth prompt\n", "```text\n", - "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", - "Plan: {plan}\n", + "Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \n", + "\n", + "Instructions:\n", + "- Expand on each point of the plan with accurate details.\n", + "- Use a professional yet accessible tone.\n", + "- Ensure smooth transitions between the definition, components, and significance sections.\n", + "- Conclude with a summary of the technology's impact.\n", + "\n", + "Plan:\n", + "{plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", + "To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR has transformed from an obscure bacterial defense mechanism into a powerful tool for rewriting the code of life.\n", "\n", - "### **Mechanism: Molecular Scissors**\n", - "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", + "### I. Definition and Etymology: The Blueprint of the System\n", + "At its most basic level, **CRISPR** stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. While the name is a mouthful, it de...\n", "```" ], "text/plain": [ @@ -948,8 +1591,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"planner_prompt\": \"Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...\",\n", + " \"synth_prompt\": \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"\n", "}\n", "```" ], @@ -969,22 +1612,111 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "None\n", - "\n", - "llm.chat.completion\n", - "[msg]\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a detailed and structured outline for a comprehensive answer to the qu...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0->node_1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Act as an expert scientific communicator. Using the detailed plan provided be...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_3->node_4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_5\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_4->node_5\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1001,8 +1733,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"planner_prompt\": \"Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...\",\n", + " \"synth_prompt\": \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"\n", "}\n", "```" ], @@ -1022,47 +1754,77 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "param:planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a detailed and structured outline for a comprehensive answer to the qu...\n", + "\n", "\n", - "\n", - "\n", - "param:synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:8811b61e4405470e\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_2\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'This outline provides a structured fram...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_0->node_2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Act as an expert scientific communicator. Using the detailed plan provided be...\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "msg:6597ff076cbf41e4\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allo...\n", + "node_3\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': 'To understand the future of medicine and biotechnology, one...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2->node_3\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1072,26 +1834,31 @@ "data": { "text/markdown": [ "## sysmon\n", - "- Runtime: `53.108s`\n", - "- Baseline score: `0.842`\n", - "- Best score: `0.876`\n", - "- Score gain: `0.035`\n", - "- Best iteration: `3`\n", + "- Runtime: `20.267s`\n", + "- Baseline score: `0.733`\n", + "- Best score: `0.854`\n", + "- Score gain: `0.121`\n", + "- Best iteration: `2`\n", "- Post-update stability std: `0.000`\n", - "- Score history: `[0.842, 0.842, 0.863, 0.876, 0.876, 0.876]`\n", - "- Best updates: `['synth_prompt']`\n", + "- Node count: `4`\n", + "- Edge count: `3`\n", + "- Score history: `[0.733, 0.733, 0.854]`\n", + "- Best updates: `['planner_prompt', 'synth_prompt']`\n", "\n", "### Final synth prompt\n", "```text\n", - "Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\n", + "Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\n", + "\n", + "Query: {query}\n", "Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\n", + "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is adapted from a naturally occurring immune system in bacteria used to destroy the DNA of invading viruses.\n", "\n", - "### **Mechanism: Molecular Scissors**\n", - "The system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates ...\n", + "The system, commonly known as CRISPR-Cas9, consists of two main components:\n", + "* **Cas9:** An enzyme that acts as \"molecular scissors\" to cut the DNA.\n", + "* **Guide RNA (gRNA):** A pre-designed RNA sequence that leads t...\n", "```" ], "text/plain": [ @@ -1112,8 +1879,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}\"\n", + " \"planner_prompt\": \"Provide a concise list of key factual points to answer: {query}\",\n", + " \"synth_prompt\": \"Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\n...\"\n", "}\n", "```" ], @@ -1133,47 +1900,77 @@ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", - "param:planner_prompt\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "node_0\n", + "\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Provide a concise list of key factual points to answer: {query}\n", + "\n", "\n", - "\n", - "\n", - "param:synth_prompt\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Answer directly in the first sentence. Then add three short titled sections w...\n", "\n", - "\n", + "\n", "\n", - "msg:b2e1bd7d883d423b\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical f...\n", + "node_2\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'Here is a concise list of key factual p...\n", + "\n", "\n", - "\n", + "\n", + "\n", + "\n", + "node_0->node_2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1\n", + "\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Give a direct, factual answer to the query using the provided plan. Do not in...\n", + "\n", + "\n", + "\n", + "\n", "\n", - "msg:483b60be2deb462b\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allo...\n", + "node_3\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic R...\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_1->node_3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "node_2->node_3\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1183,17 +1980,19 @@ "data": { "text/plain": [ "[{'config': 'trace',\n", - " 'runtime_s': 54.513,\n", - " 'baseline_score': 0.842,\n", - " 'best_score': 0.876,\n", - " 'score_gain': 0.035,\n", - " 'best_iteration': 3,\n", - " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'runtime_s': 22.643,\n", + " 'baseline_score': 0.733,\n", + " 'best_score': 0.803,\n", + " 'score_gain': 0.07,\n", + " 'best_iteration': 2,\n", + " 'score_history': [0.733, 0.733, 0.803],\n", " 'stability_std': 0.0,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", + " 'node_count': 9,\n", + " 'edge_count': 6,\n", + " 'best_updates': {'synth_prompt': 'Provide a direct, factual summary for the following query. Do not include introductory text, headers, or structural labels. Use the provided plan for factual content only: {query}\\nSource Content: {plan}'},\n", + " 'final_synth_prompt': 'Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represented as a distinct part of the summary.\\n\\nQuery: {query}\\nSource Plan: {plan}',\n", + " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, origins, and the mechanisms that make it a transformative tool in modern science.\\n\\n### **1. The Definition (The \"What\")**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. In practical terms, it is often described as \"molecular scissors\" or a \"search-and-replace\" function for the genome. The system consists of two primary components that allow it to edit genetic code with high precision:\\n* **Cas9:** An enzyme that acts as the \"scissors,\" capable of cutting through strands of DNA at a specific location.\\n* **Guide RNA (gRNA):** A small piece of pre-designed RNA sequence that acts as a \"GPS.\" It binds to the Cas9 enzyme and leads it to the exact genetic sequence that needs to be modified.\\n\\n### **2. The Origin (The \"Where\")**\\nWhile',\n", + " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, orig...',\n", " 'observers': [],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1215,7 +2014,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", + " 'plan': }}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1223,7 +2022,38 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'make_trace_case.planner_node3_copy',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node3_copy',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {},\n", + " 'output': {'name': 'make_trace_case.planner_node3_copy:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': }}},\n", + " {'id': 'str',\n", + " 'kind': 'value',\n", + " 'name': 'str',\n", + " 'value': 'plan',\n", + " 'description': '[Node]'},\n", + " {'id': 'getitem',\n", + " 'kind': 'message',\n", + " 'name': 'getitem',\n", + " 'op': 'getitem',\n", + " 'description': '[getitem] This is a getitem operator of x based on index.',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node3_copy'},\n", + " 'in_1': {'ref': 'str'}},\n", + " 'output': {'name': 'getitem:out',\n", + " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", + " {'id': 'final_answer_node',\n", + " 'kind': 'message',\n", + " 'name': 'final_answer_node',\n", + " 'op': 'llm',\n", + " 'description': '[llm] synth',\n", + " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", + " 'output': {'name': 'final_answer_node:out',\n", + " 'value': 'CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, origins, and the mechanisms that make it a transformative tool in modern science.\\n\\n### **1. The Definition (The \"What\")**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. In practical terms, it is often described as \"molecular scissors\" or a \"search-and-replace\" function for the genome. The system consists of two primary components that allow it to edit genetic code with high precision:\\n* **Cas9:** An enzyme that acts as the \"scissors,\" capable of cutting through strands of DNA at a specific location.\\n* **Guide RNA (gRNA):** A small piece of pre-designed RNA sequence that acts as a \"GPS.\" It binds to the Cas9 enzyme and leads it to the exact genetic sequence that needs to be modified.\\n\\n### **2. The Origin (The \"Where\")**\\nWhile'}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1233,28 +2063,33 @@ " {'id': 'synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': 'Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represented as a distinct part of the summary.\\n\\nQuery: {query}\\nSource Plan: {plan}',\n", " 'trainable': True,\n", " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 5,\n", - " 'message_names': ['make_trace_case.planner_node',\n", + " 'summary': {'node_count': 9,\n", + " 'message_names': ['final_answer_node',\n", + " 'getitem',\n", + " 'make_trace_case.planner_node',\n", + " 'make_trace_case.planner_node3_copy',\n", " 'make_trace_case.synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}}]},\n", + " 'synth_prompt': 'Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represente...'}}}]},\n", " {'config': 'trace+otel',\n", - " 'runtime_s': 53.619,\n", - " 'baseline_score': 0.842,\n", - " 'best_score': 0.876,\n", - " 'score_gain': 0.035,\n", - " 'best_iteration': 3,\n", - " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'runtime_s': 20.681,\n", + " 'baseline_score': 0.733,\n", + " 'best_score': 0.733,\n", + " 'score_gain': 0.0,\n", + " 'best_iteration': 0,\n", + " 'score_history': [0.733, 0.733, 0.677],\n", " 'stability_std': 0.0,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", + " 'node_count': 9,\n", + " 'edge_count': 6,\n", + " 'best_updates': {},\n", + " 'final_synth_prompt': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\\n\\nQuery: {query}\\nPlan: {plan}',\n", + " 'final_answer': '**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace\" tool for DNA. It consists of two primary components: **Cas9**, an enzyme that cuts the DNA strands, and **Guide RNA (gRNA)**, a programmed sequence that directs the Cas9 enzyme to a specific location in the genome.\\n\\nThe system was adapted from a naturally occurring immune mechanism in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological process into a versatile laboratory tool capable of precisely modifying the genetic code of living organisms.\\n\\nIn practice, CRISPR is used to disable specific genes, correct harmful mutations, or insert new genetic material. Its applications include treating genetic disorders like sickle cell anemia, developing pest-resistant crops, and advancing cancer immunotherapy. However, the technology raises significant ethical considerations, particularly regarding \"germline editing\"—modifications to human embryos that are heritable—and the potential for unintended',\n", + " 'answer_preview': '**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace...',\n", " 'observers': ['otel'],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1276,7 +2111,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", + " 'plan': }}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1284,7 +2119,38 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'make_trace_case.planner_node7_copy',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node7_copy',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {},\n", + " 'output': {'name': 'make_trace_case.planner_node7_copy:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': }}},\n", + " {'id': 'str',\n", + " 'kind': 'value',\n", + " 'name': 'str',\n", + " 'value': 'plan',\n", + " 'description': '[Node]'},\n", + " {'id': 'getitem',\n", + " 'kind': 'message',\n", + " 'name': 'getitem',\n", + " 'op': 'getitem',\n", + " 'description': '[getitem] This is a getitem operator of x based on index.',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node7_copy'},\n", + " 'in_1': {'ref': 'str'}},\n", + " 'output': {'name': 'getitem:out',\n", + " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", + " {'id': 'final_answer_node',\n", + " 'kind': 'message',\n", + " 'name': 'final_answer_node',\n", + " 'op': 'llm',\n", + " 'description': '[llm] synth',\n", + " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", + " 'output': {'name': 'final_answer_node:out',\n", + " 'value': '**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace\" tool for DNA. It consists of two primary components: **Cas9**, an enzyme that cuts the DNA strands, and **Guide RNA (gRNA)**, a programmed sequence that directs the Cas9 enzyme to a specific location in the genome.\\n\\nThe system was adapted from a naturally occurring immune mechanism in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological process into a versatile laboratory tool capable of precisely modifying the genetic code of living organisms.\\n\\nIn practice, CRISPR is used to disable specific genes, correct harmful mutations, or insert new genetic material. Its applications include treating genetic disorders like sickle cell anemia, developing pest-resistant crops, and advancing cancer immunotherapy. However, the technology raises significant ethical considerations, particularly regarding \"germline editing\"—modifications to human embryos that are heritable—and the potential for unintended'}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1294,107 +2160,157 @@ " {'id': 'synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\\n\\nQuery: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 5,\n", - " 'message_names': ['make_trace_case.planner_node',\n", + " 'summary': {'node_count': 9,\n", + " 'message_names': ['final_answer_node',\n", + " 'getitem',\n", + " 'make_trace_case.planner_node',\n", + " 'make_trace_case.planner_node7_copy',\n", " 'make_trace_case.synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}},\n", + " 'synth_prompt': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...'}}},\n", " {'carrier': 'otel',\n", " 'origin': 'observer',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'trace+otel', 'service': 'trace+otel'},\n", - " 'otel_meta': {'trace_id': None},\n", - " 'nodes': {},\n", + " 'otel_meta': {'trace_id': 'ae6f7b5943ea2cfa381a0076f0df71b7'},\n", + " 'nodes': {'trace+otel:param_planner_prompt': {'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'data': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '6ce26b54d4074ce5'}},\n", + " 'id': 'trace+otel:param_planner_prompt'},\n", + " 'trace+otel:planner_node': {'kind': 'msg',\n", + " 'name': 'planner_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'param_planner_prompt': 'trace+otel:param_planner_prompt'},\n", + " 'data': {'message_id': 'planner_node'},\n", + " 'info': {'otel': {'trace_id': 'b0aacf4373aa7b7b9f5b9e92d326ee3c',\n", + " 'span_id': '6ce26b54d4074ce5',\n", + " 'parent_span_id': '',\n", + " 'service': 'trace+otel',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'trace+otel:planner_node'},\n", + " 'trace+otel:param_synth_prompt': {'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'data': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\\n\\nQuery: {query}\\nPlan: {plan}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '4c8072c4f5e168e0'}},\n", + " 'id': 'trace+otel:param_synth_prompt'},\n", + " 'trace+otel:synth_node': {'kind': 'msg',\n", + " 'name': 'synth_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'parent': 'trace+otel:planner_node',\n", + " 'param_synth_prompt': 'trace+otel:param_synth_prompt'},\n", + " 'data': {'message_id': 'synth_node'},\n", + " 'info': {'otel': {'trace_id': 'ae6f7b5943ea2cfa381a0076f0df71b7',\n", + " 'span_id': '4c8072c4f5e168e0',\n", + " 'parent_span_id': '6ce26b54d4074ce5',\n", + " 'service': 'trace+otel',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'trace+otel:synth_node'}},\n", " 'context': {}},\n", - " 'summary': {'node_count': 0,\n", - " 'message_names': [],\n", - " 'semantic_messages': [],\n", - " 'param_names': [],\n", - " 'param_values': {},\n", - " 'span_count': 0,\n", - " 'span_names': [],\n", - " 'param_keys': []}}]},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['planner_node', 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...'},\n", + " 'span_count': 2,\n", + " 'span_names': ['planner_node', 'synth_node'],\n", + " 'param_keys': ['param.planner_prompt',\n", + " 'param.planner_prompt.trainable',\n", + " 'param.synth_prompt',\n", + " 'param.synth_prompt.trainable']}}]},\n", " {'config': 'otel',\n", - " 'runtime_s': 54.604,\n", - " 'baseline_score': 0.842,\n", - " 'best_score': 0.876,\n", - " 'score_gain': 0.035,\n", - " 'best_iteration': 3,\n", - " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'runtime_s': 21.909,\n", + " 'baseline_score': 0.733,\n", + " 'best_score': 0.837,\n", + " 'score_gain': 0.103,\n", + " 'best_iteration': 2,\n", + " 'score_history': [0.733, 0.733, 0.837],\n", " 'stability_std': 0.0,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", + " 'node_count': 6,\n", + " 'edge_count': 5,\n", + " 'best_updates': {'planner_prompt': 'As an expert scientific researcher, create a detailed step-by-step plan to explain the following topic: {query}. The plan should cover the definition, biological mechanism, and real-world applications.',\n", + " 'synth_prompt': \"Using the plan provided below, write a clear and informative response to the user's query: {query}. Ensure you follow the logical flow of the plan and explain technical terms simply.\\n\\nPlan:\\n{plan}\"},\n", + " 'final_synth_prompt': \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\\n\\nPlan:\\n{plan}\",\n", + " 'final_answer': '### **I. Definition and Historical Context**\\n\\n#### **A. Technical Definition**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. To visualize this, imagine a long sentence where the same word is repeated over and over, but between those repeats are unique \"spacer\" sequences. In biological terms, these are specific patterns of DNA nucleotides (the building blocks of life) found in the genomes of microorganisms.\\n\\n#### **B. The Biological Origin**\\nWhile we now think of CRISPR as a laboratory tool, it actually evolved billions of years ago as a microscopic immune system. Bacteria and archaea are constantly under attack by viruses called bacteriophages. When a bacterium survives a viral attack, it takes a \"snapshot\" of the virus\\'s DNA and tucks it into its own genome within those \"spacer\" regions mentioned above. This acts as a molecular \"Most Wanted\" poster, allowing the bacterium to recognize and defend itself if that specific virus ever attacks again.\\n\\n#### **C. The \"Genetic Scissors\" Breakthrough**',\n", + " 'answer_preview': '### **I. Definition and Historical Context**\\n\\n#### **A. Technical Definition**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. To visualize this...',\n", " 'observers': [],\n", " 'views': [{'carrier': 'otel',\n", " 'origin': 'backend',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'otel', 'service': 'otel'},\n", - " 'otel_meta': {'trace_id': '0de02cf99df97767e783ee43a8abff97'},\n", + " 'otel_meta': {'trace_id': '458c673d2635aefab1f6f63c396f7a94'},\n", " 'nodes': {'otel:param_planner_prompt': {'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", - " 'data': 'Create a short plan for: {query}',\n", + " 'data': 'As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: {query}. The outline must include: 1) A clear definition and historical context, 2) The step-by-step biological mechanism, 3) Notable real-world applications in medicine or agriculture, and 4) Future implications. Ensure the plan is concise yet covers all technical essentials.',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '2f304e2eb23de6bd'}}},\n", - " 'otel:2f304e2eb23de6bd': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': 'c924fc7efd52a8be'}},\n", + " 'id': 'otel:param_planner_prompt'},\n", + " 'otel:c924fc7efd52a8be': {'kind': 'msg',\n", " 'name': 'planner_node',\n", " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", + " 'inputs': {'gen_ai.prompt': 'As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: What is CRISPR?. The outline must include: 1) A clear definition and historical context, 2) The step-by-step biological mechanism, 3) Notable real-world applications in medicine or agriculture, and 4) Future implications. Ensure the plan is concise yet covers all technical essentials.',\n", " 'user_query': 'otel:What is CRISPR?',\n", " 'query': 'otel:What is CRISPR?',\n", " 'param_planner_prompt': 'otel:param_planner_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", - " 'span_id': '2f304e2eb23de6bd',\n", + " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", + " 'span_id': 'c924fc7efd52a8be',\n", " 'parent_span_id': None,\n", " 'service': 'otel',\n", - " 'temporal_ignore': False}}},\n", - " 'otel:bea5c71b6e1e62bb': {'kind': 'msg',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'otel:c924fc7efd52a8be'},\n", + " 'otel:cbda36bf7b379ea8': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:2f304e2eb23de6bd'},\n", + " 'inputs': {'parent': 'otel:c924fc7efd52a8be'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", - " 'span_id': 'bea5c71b6e1e62bb',\n", - " 'parent_span_id': '2f304e2eb23de6bd',\n", + " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", + " 'span_id': 'cbda36bf7b379ea8',\n", + " 'parent_span_id': 'c924fc7efd52a8be',\n", " 'service': 'otel',\n", - " 'temporal_ignore': True}}},\n", + " 'temporal_ignore': True}},\n", + " 'id': 'otel:cbda36bf7b379ea8'},\n", " 'otel:param_synth_prompt': {'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'data': \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\\n\\nPlan:\\n{plan}\",\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '9ea6c5fc5023c6ad'}}},\n", - " 'otel:9ea6c5fc5023c6ad': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '9d321598c5483758'}},\n", + " 'id': 'otel:param_synth_prompt'},\n", + " 'otel:9d321598c5483758': {'kind': 'msg',\n", " 'name': 'synth_node',\n", " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", + " 'inputs': {'gen_ai.prompt': 'You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user\\'s query: What is CRISPR?. \\n\\nFollow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\\n\\nPlan:\\nThis strategic outline provides a high-level technical overview of CRISPR-Cas9 technology, structured for clarity, scientific accuracy, and forward-looking analysis.\\n\\n---\\n\\n### **Strategic Outline: Understanding CRISPR-Cas9**\\n\\n#### **I. Definition and Historical Context**\\n* **A. Technical Definition:** CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a specialized region of DNA characterized by two distinct traits: the presence of nucleotide repeats and spacers.\\n* **B. The Biological Origin:** Originally discovered as an adaptive immune system in bacteria and archaea used to detect and destroy DNA from invading bacteriophages (viruses).\\n* **C. The \"Genetic Scissors\" Breakthrough:** \\n * Transition from a bacterial defense mechanism to a programmable genome-editing tool.\\n * Key Milestone: The 2012 Doudna-Charpentier publication and subsequent 2020 Nobel Prize in Chemistry.\\n* **D. Comparison to Legacy Tools:** Why CRISPR is superior to previous',\n", " 'user_query': 'otel:What is CRISPR?',\n", " 'query': 'otel:What is CRISPR?',\n", - " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", - " 'parent': 'otel:2f304e2eb23de6bd',\n", + " 'plan': 'This strategic outline provides a high-level technical overview of CRISPR-Cas9 technology, structured for clarity, scientific accuracy, and forward-looking analysis.\\n\\n---\\n\\n### **Strategic Outline: Understanding CRISPR-Cas9**\\n\\n#### **I. Definition and Historical Context**\\n* **A. Technical Definition:** CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a specialized region of DNA characterized by two distinct traits: the presence of nucleotide repeats and spacers.\\n* **B. The Biological Origin:** Originally discovered as an adaptive immune system in bacteria and archaea used to detect and destroy DNA from invading bacteriophages (viruses).\\n* **C. The \"Genetic Scissors\" Breakthrough:** \\n * Transition from a bacterial defense mechanism to a programmable genome-editing tool.\\n * Key Milestone: The 2012 Doudna-Charpentier publication and subsequent 2020 Nobel Prize in Chemistry.\\n* **D. Comparison to Legacy Tools:** Why CRISPR is superior to previous',\n", + " 'parent': 'otel:c924fc7efd52a8be',\n", " 'param_synth_prompt': 'otel:param_synth_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", - " 'span_id': '9ea6c5fc5023c6ad',\n", - " 'parent_span_id': '2f304e2eb23de6bd',\n", + " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", + " 'span_id': '9d321598c5483758',\n", + " 'parent_span_id': 'c924fc7efd52a8be',\n", " 'service': 'otel',\n", - " 'temporal_ignore': False}}},\n", - " 'otel:b935b92e48ab04b8': {'kind': 'msg',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'otel:9d321598c5483758'},\n", + " 'otel:aae6d65b3ea4da18': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:9ea6c5fc5023c6ad'},\n", + " 'inputs': {'parent': 'otel:9d321598c5483758'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '0de02cf99df97767e783ee43a8abff97',\n", - " 'span_id': 'b935b92e48ab04b8',\n", - " 'parent_span_id': '9ea6c5fc5023c6ad',\n", + " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", + " 'span_id': 'aae6d65b3ea4da18',\n", + " 'parent_span_id': '9d321598c5483758',\n", " 'service': 'otel',\n", - " 'temporal_ignore': True}}}},\n", + " 'temporal_ignore': True}},\n", + " 'id': 'otel:aae6d65b3ea4da18'}},\n", " 'context': {}},\n", " 'summary': {'node_count': 6,\n", " 'message_names': ['llm.chat.completion',\n", @@ -1403,8 +2319,8 @@ " 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'param_values': {'planner_prompt': 'As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: {query}. The outline must include: 1) A clear definition and historical context, 2) The step-by-step bi...',\n", + " 'synth_prompt': \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your...\"},\n", " 'span_count': 5,\n", " 'span_names': ['llm.chat.completion',\n", " 'planner_node',\n", @@ -1416,17 +2332,19 @@ " 'param.synth_prompt',\n", " 'param.synth_prompt.trainable']}}]},\n", " {'config': 'trace+sysmon',\n", - " 'runtime_s': 54.634,\n", - " 'baseline_score': 0.842,\n", - " 'best_score': 0.876,\n", - " 'score_gain': 0.035,\n", - " 'best_iteration': 3,\n", - " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'runtime_s': 18.639,\n", + " 'baseline_score': 0.733,\n", + " 'best_score': 0.733,\n", + " 'score_gain': 0.0,\n", + " 'best_iteration': 0,\n", + " 'score_history': [0.733, 0.733, 0.65],\n", " 'stability_std': 0.0,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", + " 'node_count': 9,\n", + " 'edge_count': 6,\n", + " 'best_updates': {},\n", + " 'final_synth_prompt': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\\n\\nQuery: {query}\\nPlan: {plan}',\n", + " 'final_answer': 'CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or a search-and-replace tool for DNA. The system relies on two primary components: the Cas9 enzyme, which performs the physical cutting of the genetic material, and a guide RNA molecule that acts as a GPS to direct the enzyme to a precise location within the genome. This mechanism was originally adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. By repurposing this biological defense, scientists can now target specific genes in plants, animals, and humans to disable harmful sequences or insert beneficial ones. This capability has led to significant breakthroughs in medicine and agriculture, such as developing treatments for genetic disorders like sickle cell anemia and creating crops that are more resistant to pests or climate change. Despite its immense potential for curing diseases and improving food security, the technology also prompts critical ethical discussions regarding the long-term consequences of permanent genetic alterations and the',\n", + " 'answer_preview': 'CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or ...',\n", " 'observers': ['sysmon'],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1448,7 +2366,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", + " 'plan': }}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1456,7 +2374,38 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'make_trace_case.planner_node11_copy',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node11_copy',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {},\n", + " 'output': {'name': 'make_trace_case.planner_node11_copy:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': }}},\n", + " {'id': 'str',\n", + " 'kind': 'value',\n", + " 'name': 'str',\n", + " 'value': 'plan',\n", + " 'description': '[Node]'},\n", + " {'id': 'getitem',\n", + " 'kind': 'message',\n", + " 'name': 'getitem',\n", + " 'op': 'getitem',\n", + " 'description': '[getitem] This is a getitem operator of x based on index.',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node11_copy'},\n", + " 'in_1': {'ref': 'str'}},\n", + " 'output': {'name': 'getitem:out',\n", + " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", + " {'id': 'final_answer_node',\n", + " 'kind': 'message',\n", + " 'name': 'final_answer_node',\n", + " 'op': 'llm',\n", + " 'description': '[llm] synth',\n", + " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", + " 'output': {'name': 'final_answer_node:out',\n", + " 'value': 'CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or a search-and-replace tool for DNA. The system relies on two primary components: the Cas9 enzyme, which performs the physical cutting of the genetic material, and a guide RNA molecule that acts as a GPS to direct the enzyme to a precise location within the genome. This mechanism was originally adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. By repurposing this biological defense, scientists can now target specific genes in plants, animals, and humans to disable harmful sequences or insert beneficial ones. This capability has led to significant breakthroughs in medicine and agriculture, such as developing treatments for genetic disorders like sickle cell anemia and creating crops that are more resistant to pests or climate change. Despite its immense potential for curing diseases and improving food security, the technology also prompts critical ethical discussions regarding the long-term consequences of permanent genetic alterations and the'}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1466,16 +2415,19 @@ " {'id': 'synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\\n\\nQuery: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 5,\n", - " 'message_names': ['make_trace_case.planner_node',\n", + " 'summary': {'node_count': 9,\n", + " 'message_names': ['final_answer_node',\n", + " 'getitem',\n", + " 'make_trace_case.planner_node',\n", + " 'make_trace_case.planner_node11_copy',\n", " 'make_trace_case.synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}},\n", + " 'synth_prompt': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...'}}},\n", " {'carrier': 'sysmon',\n", " 'origin': 'observer',\n", " 'doc': {'tgj': '1.0',\n", @@ -1492,46 +2444,49 @@ " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\\n\\nQuery: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:ba87846d33ed46da': {'id': 'msg:ba87846d33ed46da',\n", + " 'msg:bb8e9c312f87481c': {'id': 'msg:bb8e9c312f87481c',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:568',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:655',\n", + " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 2191557194,\n", - " 'thread_id': 140336865654592}}},\n", - " 'msg:c4faa984c1eb466d': {'id': 'msg:c4faa984c1eb466d',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2570870185,\n", + " 'thread_id': 140646174676800}}},\n", + " 'msg:610f74037f4441f9': {'id': 'msg:610f74037f4441f9',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:579',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:666',\n", + " 'inputs': {'parent': {'ref': 'msg:bb8e9c312f87481c'},\n", + " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2046976228,\n", - " 'thread_id': 140336865654592}}}}},\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2280707474,\n", + " 'thread_id': 140646174676800}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'synth_prompt': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...'},\n", " 'event_count': 2}}]},\n", " {'config': 'trace+otel+sysmon',\n", - " 'runtime_s': 53.552,\n", - " 'baseline_score': 0.842,\n", - " 'best_score': 0.876,\n", - " 'score_gain': 0.035,\n", - " 'best_iteration': 3,\n", - " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'runtime_s': 22.479,\n", + " 'baseline_score': 0.733,\n", + " 'best_score': 0.733,\n", + " 'score_gain': 0.0,\n", + " 'best_iteration': 0,\n", + " 'score_history': [0.733, 0.733, 0.677],\n", " 'stability_std': 0.0,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", + " 'node_count': 9,\n", + " 'edge_count': 6,\n", + " 'best_updates': {},\n", + " 'final_synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", + " 'final_answer': 'CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors\" or a \"search-and-replace\" function for the genome, allowing scientists to modify DNA with high precision. The system consists of two primary components: the Cas9 enzyme, which acts as the scissors to cut the DNA strand, and the Guide RNA (gRNA), which serves as a GPS to lead the enzyme to a specific genetic sequence. This technology was adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological mechanism into a versatile laboratory tool that can be programmed to target and edit the genetic code of virtually any organism.',\n", + " 'answer_preview': 'CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors...',\n", " 'observers': ['sysmon', 'otel'],\n", " 'views': [{'carrier': 'trace',\n", " 'origin': 'backend',\n", @@ -1553,7 +2508,7 @@ " 'inputs': {'in_0': {'ref': 'state'}},\n", " 'output': {'name': 'make_trace_case.planner_node:out',\n", " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}}},\n", + " 'plan': }}},\n", " {'id': 'make_trace_case.synth_node',\n", " 'kind': 'message',\n", " 'name': 'make_trace_case.synth_node',\n", @@ -1561,7 +2516,38 @@ " 'description': '[make_trace_case.synth_node]',\n", " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", + " 'value': {'final_answer': }}},\n", + " {'id': 'make_trace_case.planner_node15_copy',\n", + " 'kind': 'message',\n", + " 'name': 'make_trace_case.planner_node15_copy',\n", + " 'op': 'make_trace_case.planner_node',\n", + " 'description': '[make_trace_case.planner_node]',\n", + " 'inputs': {},\n", + " 'output': {'name': 'make_trace_case.planner_node15_copy:out',\n", + " 'value': {'query': 'What is CRISPR?',\n", + " 'plan': }}},\n", + " {'id': 'str',\n", + " 'kind': 'value',\n", + " 'name': 'str',\n", + " 'value': 'plan',\n", + " 'description': '[Node]'},\n", + " {'id': 'getitem',\n", + " 'kind': 'message',\n", + " 'name': 'getitem',\n", + " 'op': 'getitem',\n", + " 'description': '[getitem] This is a getitem operator of x based on index.',\n", + " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node15_copy'},\n", + " 'in_1': {'ref': 'str'}},\n", + " 'output': {'name': 'getitem:out',\n", + " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", + " {'id': 'final_answer_node',\n", + " 'kind': 'message',\n", + " 'name': 'final_answer_node',\n", + " 'op': 'llm',\n", + " 'description': '[llm] synth',\n", + " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", + " 'output': {'name': 'final_answer_node:out',\n", + " 'value': 'CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors\" or a \"search-and-replace\" function for the genome, allowing scientists to modify DNA with high precision. The system consists of two primary components: the Cas9 enzyme, which acts as the scissors to cut the DNA strand, and the Guide RNA (gRNA), which serves as a GPS to lead the enzyme to a specific genetic sequence. This technology was adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological mechanism into a versatile laboratory tool that can be programmed to target and edit the genetic code of virtually any organism.'}},\n", " {'id': 'planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", @@ -1571,16 +2557,19 @@ " {'id': 'synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", " 'trainable': True,\n", " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 5,\n", - " 'message_names': ['make_trace_case.planner_node',\n", + " 'summary': {'node_count': 9,\n", + " 'message_names': ['final_answer_node',\n", + " 'getitem',\n", + " 'make_trace_case.planner_node',\n", + " 'make_trace_case.planner_node15_copy',\n", " 'make_trace_case.synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'}}},\n", + " 'synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensu...'}}},\n", " {'carrier': 'sysmon',\n", " 'origin': 'observer',\n", " 'doc': {'tgj': '1.0',\n", @@ -1597,125 +2586,173 @@ " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:94fd3f1208e2462c': {'id': 'msg:94fd3f1208e2462c',\n", + " 'msg:19ad98a644da4e36': {'id': 'msg:19ad98a644da4e36',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:568',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:655',\n", + " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 2248025713,\n", - " 'thread_id': 140336865654592}}},\n", - " 'msg:111678fc76c944b1': {'id': 'msg:111678fc76c944b1',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2147475553,\n", + " 'thread_id': 140646174676800}}},\n", + " 'msg:59f7465ae3024148': {'id': 'msg:59f7465ae3024148',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:579',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:666',\n", + " 'inputs': {'parent': {'ref': 'msg:19ad98a644da4e36'},\n", + " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2061665516,\n", - " 'thread_id': 140336865654592}}}}},\n", + " 'value': \"{'final_answer': }\"},\n", + " 'info': {'sysmon': {'duration_ns': 2146902050,\n", + " 'thread_id': 140646174676800}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensu...'},\n", " 'event_count': 2}},\n", " {'carrier': 'otel',\n", " 'origin': 'observer',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'trace+otel+sysmon', 'service': 'trace+otel+sysmon'},\n", - " 'otel_meta': {'trace_id': None},\n", - " 'nodes': {},\n", + " 'otel_meta': {'trace_id': '33c59ff62b32e9179eb45cdbe86608a1'},\n", + " 'nodes': {'trace+otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", + " 'name': 'planner_prompt',\n", + " 'data': 'Create a short plan for: {query}',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '924e80198bcd1ad6'}},\n", + " 'id': 'trace+otel+sysmon:param_planner_prompt'},\n", + " 'trace+otel+sysmon:planner_node': {'kind': 'msg',\n", + " 'name': 'planner_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'param_planner_prompt': 'trace+otel+sysmon:param_planner_prompt'},\n", + " 'data': {'message_id': 'planner_node'},\n", + " 'info': {'otel': {'trace_id': 'b35f2b6cc88f0ff8f0800442600e52e7',\n", + " 'span_id': '924e80198bcd1ad6',\n", + " 'parent_span_id': '',\n", + " 'service': 'trace+otel+sysmon',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'trace+otel+sysmon:planner_node'},\n", + " 'trace+otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", + " 'name': 'synth_prompt',\n", + " 'data': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", + " 'trainable': True,\n", + " 'info': {'otel': {'span_id': '337b9311aabcc219'}},\n", + " 'id': 'trace+otel+sysmon:param_synth_prompt'},\n", + " 'trace+otel+sysmon:synth_node': {'kind': 'msg',\n", + " 'name': 'synth_node',\n", + " 'op': 'unspecified',\n", + " 'inputs': {'parent': 'trace+otel+sysmon:planner_node',\n", + " 'param_synth_prompt': 'trace+otel+sysmon:param_synth_prompt'},\n", + " 'data': {'message_id': 'synth_node'},\n", + " 'info': {'otel': {'trace_id': '33c59ff62b32e9179eb45cdbe86608a1',\n", + " 'span_id': '337b9311aabcc219',\n", + " 'parent_span_id': '924e80198bcd1ad6',\n", + " 'service': 'trace+otel+sysmon',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'trace+otel+sysmon:synth_node'}},\n", " 'context': {}},\n", - " 'summary': {'node_count': 0,\n", - " 'message_names': [],\n", - " 'semantic_messages': [],\n", - " 'param_names': [],\n", - " 'param_values': {},\n", - " 'span_count': 0,\n", - " 'span_names': [],\n", - " 'param_keys': []}}]},\n", + " 'summary': {'node_count': 4,\n", + " 'message_names': ['planner_node', 'synth_node'],\n", + " 'semantic_messages': ['planner_node', 'synth_node'],\n", + " 'param_names': ['planner_prompt', 'synth_prompt'],\n", + " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", + " 'synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensu...'},\n", + " 'span_count': 2,\n", + " 'span_names': ['planner_node', 'synth_node'],\n", + " 'param_keys': ['param.planner_prompt',\n", + " 'param.planner_prompt.trainable',\n", + " 'param.synth_prompt',\n", + " 'param.synth_prompt.trainable']}}]},\n", " {'config': 'otel+sysmon',\n", - " 'runtime_s': 54.761,\n", - " 'baseline_score': 0.842,\n", - " 'best_score': 0.876,\n", - " 'score_gain': 0.035,\n", - " 'best_iteration': 3,\n", - " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'runtime_s': 21.463,\n", + " 'baseline_score': 0.733,\n", + " 'best_score': 0.915,\n", + " 'score_gain': 0.182,\n", + " 'best_iteration': 2,\n", + " 'score_history': [0.733, 0.733, 0.915],\n", " 'stability_std': 0.0,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", + " 'node_count': 6,\n", + " 'edge_count': 5,\n", + " 'best_updates': {'planner_prompt': 'Create an outline for a comprehensive yet concise answer to: {query}. The plan should include a definition, key components, and significance.',\n", + " 'synth_prompt': \"Using the plan provided below, provide a factual and clear answer to the user's query: {query}. Ensure all parts of the plan are addressed.\\n\\nPlan:\\n{plan}\"},\n", + " 'final_synth_prompt': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology's impact.\\n\\nPlan:\\n{plan}\",\n", + " 'final_answer': 'To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR has transformed from an obscure bacterial defense mechanism into a powerful tool for rewriting the code of life.\\n\\n### I. Definition and Etymology: The Blueprint of the System\\nAt its most basic level, **CRISPR** stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. While the name is a mouthful, it describes a specific pattern of DNA found in the genomes of bacteria and archaea. \\n\\nThese patterns consist of short sequences of genetic code that read the same forward and backward (palindromes), separated by \"spacers\"—segments of DNA that the bacteria have \"stolen\" from viruses that previously attacked them. This serves as a molecular \"most wanted\" gallery, allowing the bacteria to recognize and defend against those viruses if they return.\\n\\nIn the laboratory, scientists have adapted this natural system into **CRISPR-Cas9**, a technology often referred',\n", + " 'answer_preview': 'To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR ...',\n", " 'observers': ['sysmon'],\n", " 'views': [{'carrier': 'otel',\n", " 'origin': 'backend',\n", " 'doc': {'version': 'trace-json/1.0+otel',\n", " 'agent': {'id': 'otel+sysmon', 'service': 'otel+sysmon'},\n", - " 'otel_meta': {'trace_id': '291d1ab2c30befe7812d63866174c584'},\n", + " 'otel_meta': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01'},\n", " 'nodes': {'otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", - " 'data': 'Create a short plan for: {query}',\n", + " 'data': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Specific real-world applications and significance.\\n4. Potential ethical considerations or future outlook.\\nEnsure the outline is logically sequenced and complete.',\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '41239b59d111622d'}}},\n", - " 'otel+sysmon:41239b59d111622d': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': '81dabfd0047ab7ed'}},\n", + " 'id': 'otel+sysmon:param_planner_prompt'},\n", + " 'otel+sysmon:81dabfd0047ab7ed': {'kind': 'msg',\n", " 'name': 'planner_node',\n", " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Create a short plan for: What is CRISPR?',\n", + " 'inputs': {'gen_ai.prompt': 'Create a detailed and structured outline for a comprehensive answer to the query: What is CRISPR?. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Specific real-world applications and significance.\\n4. Potential ethical considerations or future outlook.\\nEnsure the outline is logically sequenced and complete.',\n", " 'user_query': 'otel+sysmon:What is CRISPR?',\n", " 'query': 'otel+sysmon:What is CRISPR?',\n", " 'param_planner_prompt': 'otel+sysmon:param_planner_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", - " 'span_id': '41239b59d111622d',\n", + " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", + " 'span_id': '81dabfd0047ab7ed',\n", " 'parent_span_id': None,\n", " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': False}}},\n", - " 'otel+sysmon:7dd428b84a3f270b': {'kind': 'msg',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'otel+sysmon:81dabfd0047ab7ed'},\n", + " 'otel+sysmon:6bd500faf953ce28': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:41239b59d111622d'},\n", + " 'inputs': {'parent': 'otel+sysmon:81dabfd0047ab7ed'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", - " 'span_id': '7dd428b84a3f270b',\n", - " 'parent_span_id': '41239b59d111622d',\n", + " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", + " 'span_id': '6bd500faf953ce28',\n", + " 'parent_span_id': '81dabfd0047ab7ed',\n", " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': True}}},\n", + " 'temporal_ignore': True}},\n", + " 'id': 'otel+sysmon:6bd500faf953ce28'},\n", " 'otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'data': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'data': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology's impact.\\n\\nPlan:\\n{plan}\",\n", " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '79ac53511a735fc2'}}},\n", - " 'otel+sysmon:79ac53511a735fc2': {'kind': 'msg',\n", + " 'info': {'otel': {'span_id': 'd8af1f6bb4933b02'}},\n", + " 'id': 'otel+sysmon:param_synth_prompt'},\n", + " 'otel+sysmon:d8af1f6bb4933b02': {'kind': 'msg',\n", " 'name': 'synth_node',\n", " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: What is CRISPR?\\nPlan: This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", + " 'inputs': {'gen_ai.prompt': 'Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user\\'s query: What is CRISPR?. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology\\'s impact.\\n\\nPlan:\\nThis outline provides a structured framework for a comprehensive explanation of CRISPR, moving from foundational definitions to technical mechanics, practical uses, and societal implications.\\n\\n---\\n\\n### **I. Introduction: Definition and Etymology**\\n* **A. Formal Definition:** Define CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a specialized region of DNA with two distinct characteristics: presence of nucleotide repeats and spacers.\\n* **B. The \"Genetic Scissors\" Analogy:** Introduce CRISPR-Cas9 as a revolutionary tool for genome editing that allows scientists to modify DNA with unprecedented precision.\\n* **C. Etymology and Origins:**\\n * Break down the acronym (Clustered, Regularly Interspaced, Short Palindromic Repeats).\\n * Brief history: Discovery in the late 1980s/early 1990s as a natural immune system in bacteria and archaea used to fight viral infections (bacteriophages).\\n* **D. The Nobel Connection:** Mention the',\n", " 'user_query': 'otel+sysmon:What is CRISPR?',\n", " 'query': 'otel+sysmon:What is CRISPR?',\n", - " 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not',\n", - " 'parent': 'otel+sysmon:41239b59d111622d',\n", + " 'plan': 'This outline provides a structured framework for a comprehensive explanation of CRISPR, moving from foundational definitions to technical mechanics, practical uses, and societal implications.\\n\\n---\\n\\n### **I. Introduction: Definition and Etymology**\\n* **A. Formal Definition:** Define CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a specialized region of DNA with two distinct characteristics: presence of nucleotide repeats and spacers.\\n* **B. The \"Genetic Scissors\" Analogy:** Introduce CRISPR-Cas9 as a revolutionary tool for genome editing that allows scientists to modify DNA with unprecedented precision.\\n* **C. Etymology and Origins:**\\n * Break down the acronym (Clustered, Regularly Interspaced, Short Palindromic Repeats).\\n * Brief history: Discovery in the late 1980s/early 1990s as a natural immune system in bacteria and archaea used to fight viral infections (bacteriophages).\\n* **D. The Nobel Connection:** Mention the',\n", + " 'parent': 'otel+sysmon:81dabfd0047ab7ed',\n", " 'param_synth_prompt': 'otel+sysmon:param_synth_prompt'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", - " 'span_id': '79ac53511a735fc2',\n", - " 'parent_span_id': '41239b59d111622d',\n", + " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", + " 'span_id': 'd8af1f6bb4933b02',\n", + " 'parent_span_id': '81dabfd0047ab7ed',\n", " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': False}}},\n", - " 'otel+sysmon:eb0f3cf3afb9c639': {'kind': 'msg',\n", + " 'temporal_ignore': False}},\n", + " 'id': 'otel+sysmon:d8af1f6bb4933b02'},\n", + " 'otel+sysmon:e988ed34c817d3b5': {'kind': 'msg',\n", " 'name': 'llm.chat.completion',\n", " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:79ac53511a735fc2'},\n", + " 'inputs': {'parent': 'otel+sysmon:d8af1f6bb4933b02'},\n", " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '291d1ab2c30befe7812d63866174c584',\n", - " 'span_id': 'eb0f3cf3afb9c639',\n", - " 'parent_span_id': '79ac53511a735fc2',\n", + " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", + " 'span_id': 'e988ed34c817d3b5',\n", + " 'parent_span_id': 'd8af1f6bb4933b02',\n", " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': True}}}},\n", + " 'temporal_ignore': True}},\n", + " 'id': 'otel+sysmon:e988ed34c817d3b5'}},\n", " 'context': {}},\n", " 'summary': {'node_count': 6,\n", " 'message_names': ['llm.chat.completion',\n", @@ -1724,8 +2761,8 @@ " 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'param_values': {'planner_prompt': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...',\n", + " 'synth_prompt': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"},\n", " 'span_count': 5,\n", " 'span_names': ['llm.chat.completion',\n", " 'planner_node',\n", @@ -1746,52 +2783,56 @@ " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", + " 'value': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Specific real-world applications and significance.\\n4. Potential ethical considerations or future outlook.\\nEnsure the outline is logically sequenced and complete.',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology's impact.\\n\\nPlan:\\n{plan}\",\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:8811b61e4405470e': {'id': 'msg:8811b61e4405470e',\n", + " 'msg:38c7abc4450b4085': {'id': 'msg:38c7abc4450b4085',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:540',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:627',\n", + " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 2152943175,\n", - " 'thread_id': 140336865654592}}},\n", - " 'msg:6597ff076cbf41e4': {'id': 'msg:6597ff076cbf41e4',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This outline provides a structured framework for a comprehensive explanation of CRISPR, moving from foundational definitions to technical mechanics, practical use\"},\n", + " 'info': {'sysmon': {'duration_ns': 2081719174,\n", + " 'thread_id': 140646174676800}}},\n", + " 'msg:9739b2b9310f406e': {'id': 'msg:9739b2b9310f406e',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:544',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:631',\n", + " 'inputs': {'parent': {'ref': 'msg:38c7abc4450b4085'},\n", + " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\\\n\\\\n### **Mechanis\"},\n", - " 'info': {'sysmon': {'duration_ns': 2248217150,\n", - " 'thread_id': 140336865654592}}}}},\n", + " 'value': \"{'final_answer': 'To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR has t\"},\n", + " 'info': {'sysmon': {'duration_ns': 2149136988,\n", + " 'thread_id': 140646174676800}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'param_values': {'planner_prompt': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...',\n", + " 'synth_prompt': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"},\n", " 'event_count': 2}}]},\n", " {'config': 'sysmon',\n", - " 'runtime_s': 53.108,\n", - " 'baseline_score': 0.842,\n", - " 'best_score': 0.876,\n", - " 'score_gain': 0.035,\n", - " 'best_iteration': 3,\n", - " 'score_history': [0.842, 0.842, 0.863, 0.876, 0.876, 0.876],\n", + " 'runtime_s': 20.267,\n", + " 'baseline_score': 0.733,\n", + " 'best_score': 0.854,\n", + " 'score_gain': 0.121,\n", + " 'best_iteration': 2,\n", + " 'score_history': [0.733, 0.733, 0.854],\n", " 'stability_std': 0.0,\n", - " 'best_updates': {'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mechanism: Molecular Scissors**\\nThe system consists of two primary components: the **Cas9 enzyme**, which acts as molecular scissors to cut DNA, and a **guide RNA (gRNA)**, which is a pre-designed sequence that leads the enzyme to a specific genetic coordinate. Once the gRNA binds to the matching DNA sequence, Cas9 creates a double-strand break. The cell then attempts to repair this break, during which scientists can disable a gene or insert new genetic material.\\n\\n### **Origin and Applications**\\nOriginally discovered as a natural immune system in bacteria used to \"remember\" and chop up viral DNA, CRISPR was repurposed for laboratory use in 2012. It is currently utilized in agriculture to create drought-resistant crops, in medicine to develop treatments for genetic disorders like sickle cell anemia, and in basic research to understand the function of specific genes by \"knocking them out.\"',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\n\\n### **Mecha...',\n", + " 'node_count': 4,\n", + " 'edge_count': 3,\n", + " 'best_updates': {'planner_prompt': 'Provide a concise list of key factual points to answer: {query}',\n", + " 'synth_prompt': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\nPlan: {plan}'},\n", + " 'final_synth_prompt': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\nPlan: {plan}',\n", + " 'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is adapted from a naturally occurring immune system in bacteria used to destroy the DNA of invading viruses.\\n\\nThe system, commonly known as CRISPR-Cas9, consists of two main components:\\n* **Cas9:** An enzyme that acts as \"molecular scissors\" to cut the DNA.\\n* **Guide RNA (gRNA):** A pre-designed RNA sequence that leads the Cas9 enzyme to a specific location in the genome.\\n\\nOnce the DNA is cut, the cell’s natural repair mechanisms fix the break. Scientists can leverage this process to disable specific genes or insert new genetic material. Compared to previous gene-editing methods, CRISPR is faster, cheaper, and more accurate.',\n", + " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is ada...',\n", " 'observers': [],\n", " 'views': [{'carrier': 'sysmon',\n", " 'origin': 'backend',\n", @@ -1803,39 +2844,40 @@ " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", + " 'value': 'Provide a concise list of key factual points to answer: {query}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", " 'kind': 'parameter',\n", " 'name': 'synth_prompt',\n", - " 'value': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}',\n", + " 'value': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\nPlan: {plan}',\n", " 'trainable': True,\n", " 'description': '[prompt]'},\n", - " 'msg:b2e1bd7d883d423b': {'id': 'msg:b2e1bd7d883d423b',\n", + " 'msg:888dc93e70854dbb': {'id': 'msg:888dc93e70854dbb',\n", " 'kind': 'message',\n", " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:540',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:627',\n", + " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\\\n\\\\n### **G\"},\n", - " 'info': {'sysmon': {'duration_ns': 2200630349,\n", - " 'thread_id': 140336865654592}}},\n", - " 'msg:483b60be2deb462b': {'id': 'msg:483b60be2deb462b',\n", + " 'value': \"{'query': 'What is CRISPR?', 'plan': 'Here is a concise list of key factual points explaining CRISPR:\\\\n\\\\n* **Definition:** CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a rev\"},\n", + " 'info': {'sysmon': {'duration_ns': 2251537773,\n", + " 'thread_id': 140646174676800}}},\n", + " 'msg:590aefdce4e14b65': {'id': 'msg:590aefdce4e14b65',\n", " 'kind': 'message',\n", " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:544',\n", - " 'inputs': {},\n", + " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:631',\n", + " 'inputs': {'parent': {'ref': 'msg:888dc93e70854dbb'},\n", + " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to precisely modify, delete, or replace specific sequences of DNA within a living organism.\\\\n\\\\n### **Mechanis\"},\n", - " 'info': {'sysmon': {'duration_ns': 2240993069,\n", - " 'thread_id': 140336865654592}}}}},\n", + " 'value': \"{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is adapted \"},\n", + " 'info': {'sysmon': {'duration_ns': 2010532077,\n", + " 'thread_id': 140646174676800}}}}},\n", " 'summary': {'node_count': 4,\n", " 'message_names': ['planner_node', 'synth_node'],\n", " 'semantic_messages': ['planner_node', 'synth_node'],\n", " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Answer directly in the first sentence. Then add three short titled sections with concrete mechanisms, examples, and caveats when useful. Keep it factual and concise: {query}\\nPlan: {plan}'},\n", + " 'param_values': {'planner_prompt': 'Provide a concise list of key factual points to answer: {query}',\n", + " 'synth_prompt': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\n...'},\n", " 'event_count': 2}}]}]" ] }, diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py index 0403db17..841a9650 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py @@ -39,6 +39,7 @@ from opto.trace.io.tgj_export import export_subgraph_to_tgj from opto.trace.io.tgj_ingest import ingest_tgj from opto.trace.nodes import MessageNode, ParameterNode +from opto.utils.llm import LLM try: from openai import OpenAI @@ -52,11 +53,19 @@ OPENROUTER_BASE_URL = os.environ.get( "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1" ) -ITERATIONS = 5 +ITERATIONS = int(os.environ.get("COMPARE_OBSERVERS_ITERATIONS", "5")) QUERIES = [ "What is CRISPR?", "How does CRISPR enable gene editing?", ] +_QUERY_LIMIT = os.environ.get("COMPARE_OBSERVERS_QUERY_LIMIT") +if _QUERY_LIMIT: + QUERIES = QUERIES[: max(1, int(_QUERY_LIMIT))] +_CASE_FILTER = tuple( + part.strip() + for part in os.environ.get("COMPARE_OBSERVERS_CASES", "").split(",") + if part.strip() +) SYNTH_UPDATE_SCHEDULE = [ { "synth_prompt": ( @@ -78,6 +87,10 @@ "planner_prompt": "Create a short plan for: {query}", "synth_prompt": "Answer briefly and factually: {query}\nPlan: {plan}", } +PROMPT_CONSUMERS = { + "planner_prompt": ["planner_node"], + "synth_prompt": ["synth_node"], +} SEMANTIC_NAMES = ("planner_node", "synth_node") STOPWORDS = { "about", @@ -145,10 +158,67 @@ def _str_map(values: Mapping[str, Any]) -> Dict[str, str]: def _node_records(doc: Dict[str, Any]) -> List[Dict[str, Any]]: + return [record for _node_id, record in _node_items(doc)] + + +def _node_items(doc: Dict[str, Any]) -> List[Tuple[str, Dict[str, Any]]]: + """Return TGJ nodes as ``(node_id, record)`` pairs, preserving dict keys. + + OTEL-derived TGJ docs store node identities in the ``nodes`` mapping keys, + while some other carriers inline ``id`` directly in each record. The + notebook renderer needs the stable node id in both cases. + """ raw_nodes = doc.get("nodes") or {} if isinstance(raw_nodes, dict): - return list(raw_nodes.values()) - return list(raw_nodes) + items = [] + for node_id, record in raw_nodes.items(): + enriched = dict(record) + enriched.setdefault("id", str(node_id)) + items.append((str(node_id), enriched)) + return items + + items = [] + for idx, record in enumerate(raw_nodes): + enriched = dict(record) + node_id = str(enriched.get("id") or f"node_{idx}") + enriched.setdefault("id", node_id) + items.append((node_id, enriched)) + return items + + +def _spans_from_otlp(otlp: Dict[str, Any]) -> List[Dict[str, Any]]: + """Flatten all spans across every OTLP resource/scope block.""" + spans: List[Dict[str, Any]] = [] + for resource in otlp.get("resourceSpans", []): + for scope in resource.get("scopeSpans", []): + spans.extend(scope.get("spans", [])) + return spans + + +def _merge_tgj_docs(docs: List[Dict[str, Any]]) -> Dict[str, Any]: + """Merge multiple TGJ docs into one node-preserving document.""" + if not docs: + return {"version": "trace-json/1.0+otel", "nodes": {}, "context": {}} + + merged = dict(docs[0]) + merged_nodes: Dict[str, Dict[str, Any]] = {} + for doc in docs: + for node_id, record in _node_items(doc): + merged_nodes[node_id] = record + merged["nodes"] = merged_nodes + return merged + + +def _edge_count(doc: Dict[str, Any]) -> int: + records = _node_items(doc) + known_ids = {node_id for node_id, _record in records} + count = 0 + for _child_id, record in records: + for ref in (record.get("inputs") or {}).values(): + parent_id = ref.get("ref") if isinstance(ref, dict) else ref + if parent_id is not None and str(parent_id) in known_ids: + count += 1 + return count def _unique_nodes(nodes: Dict[str, Any], cls: type) -> List[Any]: @@ -342,11 +412,7 @@ def _make_otel_view( config: str, origin: str, ) -> Dict[str, Any]: - spans = ( - otlp.get("resourceSpans", [{}])[0] - .get("scopeSpans", [{}])[0] - .get("spans", []) - ) + spans = _spans_from_otlp(otlp) param_keys = sorted( { attr["key"] @@ -360,7 +426,7 @@ def _make_otel_view( agent_id_hint=config, use_temporal_hierarchy=True, ) - doc = docs[0] if docs else {"tgj": "1.0", "nodes": {}} + doc = _merge_tgj_docs(list(docs)) summary = summarize_tgj(doc) summary["span_count"] = len(spans) summary["span_names"] = [span.get("name") for span in spans] @@ -401,44 +467,50 @@ def tgj_to_digraph(doc: Dict[str, Any], *, title: str): except Exception: return None - records = _node_records(doc) - known_ids = {str(record.get("id")) for record in records} + records = _node_items(doc) + known_ids = {node_id for node_id, _record in records} + dot_ids = {node_id: f"node_{idx}" for idx, (node_id, _record) in enumerate(records)} graph = Digraph(comment=title) graph.attr(rankdir="LR") - for record in records: - node_id = str(record.get("id")) - kind = str(record.get("kind", "value")) + for node_id, record in records: + kind = str(record.get("kind", "value")).lower() name = str(record.get("name", node_id)) if kind == "parameter": - preview = record.get("value", "") + preview = record.get("value", record.get("data", "")) fill = "khaki1" - elif kind == "message": + kind_label = "parameter" + elif kind in {"message", "msg"}: preview = (record.get("output") or {}).get("value", "") + if preview in (None, ""): + preview = record.get("value", record.get("data", "")) fill = "lightblue" + kind_label = "message" elif kind == "exception": preview = (record.get("error") or {}).get("message", "") fill = "mistyrose" + kind_label = "exception" else: - preview = record.get("value", "") + preview = record.get("value", record.get("data", "")) fill = "white" - label = f"{name}\\n[{kind}]" + kind_label = kind + label = f"{name}\\n[{kind_label}]" if preview not in (None, ""): label += f"\\n{_truncate(preview, 80)}" graph.node( - node_id, + dot_ids[node_id], label=label, shape="box", style="rounded,filled", fillcolor=fill, + tooltip=node_id, ) - for record in records: - child_id = str(record.get("id")) + for child_id, record in records: for ref in (record.get("inputs") or {}).values(): parent_id = ref.get("ref") if isinstance(ref, dict) else ref if parent_id is not None and str(parent_id) in known_ids: - graph.edge(str(parent_id), child_id) + graph.edge(dot_ids[str(parent_id)], dot_ids[child_id]) return graph @@ -466,6 +538,21 @@ def step(self): return {} +def _optimizer_model_name() -> str: + model = os.environ.get("OPENROUTER_MODEL", OPENROUTER_MODEL or "google/gemini-3-flash-preview") + if model == "gemini-3-flash-preview": + model = "google/gemini-3-flash-preview" + if not model.startswith("openrouter/"): + model = f"openrouter/{model}" + return model + + +def _comparison_optimizer_kwargs() -> Dict[str, Any] | None: + if not os.environ.get("OPENROUTER_API_KEY", OPENROUTER_API_KEY): + return None + return {"llm": LLM(backend="LiteLLM", model=_optimizer_model_name())} + + def make_live_llm(): if not OPENROUTER_API_KEY or OpenAI is None: return None @@ -574,11 +661,11 @@ def planner_node(state): user_prompt=prompt, temperature=0, ) - return {"query": query, "plan": plan} + return {"query": query, "plan": MessageNode(plan, inputs={"prompt": planner_prompt}, description="[llm] planner", name="planner_answer")} def synth_node(state): query = str(_raw(state["query"])) - plan = str(_raw(state["plan"])) + plan_node = state["plan"]; plan = str(_raw(plan_node)) prompt = render_template(synth_prompt.data, query=query, plan=plan) answer = call_chat_text( llm, @@ -586,7 +673,7 @@ def synth_node(state): user_prompt=prompt, temperature=0, ) - return {"final_answer": node(answer, name="final_answer_node")} + return {"final_answer": MessageNode(answer, inputs={"prompt": synth_prompt, "plan": node(plan_node)}, description="[llm] synth", name="final_answer_node")} scope.update( { @@ -614,10 +701,11 @@ def build_graph(): scope=scope, graph_agents_functions=list(SEMANTIC_NAMES), graph_prompts_list=[planner_prompt, synth_prompt], + binding_consumers=PROMPT_CONSUMERS, train_graph_agents_functions=False, output_key="final_answer", ) - optimizer = DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) + optimizer = None if os.environ.get("OPENROUTER_API_KEY", OPENROUTER_API_KEY) else DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) return instrumented, optimizer, lambda: synth_prompt.data @@ -627,6 +715,7 @@ def make_otel_case(llm, observe_with: Tuple[str, ...] = ()): backend="otel", observe_with=observe_with, graph_agents_functions=list(SEMANTIC_NAMES), + binding_consumers=PROMPT_CONSUMERS, llm=llm, initial_templates=dict(DEFAULT_TEMPLATES), output_key="final_answer", @@ -663,7 +752,7 @@ def synth_call(query: str, plan: str) -> str: synth_call=synth_call, ) ) - optimizer = DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) + optimizer = None if os.environ.get("OPENROUTER_API_KEY", OPENROUTER_API_KEY) else DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) return instrumented, optimizer, lambda: instrumented.templates["synth_prompt"] @@ -699,9 +788,10 @@ def synth_call(query: str, plan: str) -> str: backend="sysmon", bindings=bindings, graph_agents_functions=list(SEMANTIC_NAMES), + binding_consumers=PROMPT_CONSUMERS, output_key="final_answer", ) - optimizer = DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) + optimizer = None if os.environ.get("OPENROUTER_API_KEY", OPENROUTER_API_KEY) else DictUpdateOptimizer(SYNTH_UPDATE_SCHEDULE) return instrumented, optimizer, lambda: templates["synth_prompt"] @@ -723,6 +813,8 @@ def build_cases(llm): ("sysmon", lambda: make_sysmon_case(llm)), ] ) + if _CASE_FILTER: + cases = [case for case in cases if case[0] in _CASE_FILTER] return cases @@ -734,6 +826,7 @@ def run_case(name: str, builder): queries=QUERIES, iterations=ITERATIONS, optimizer=optimizer, + optimizer_kwargs=_comparison_optimizer_kwargs(), eval_fn=eval_fn, output_key="final_answer", ) @@ -753,7 +846,7 @@ def run_case(name: str, builder): if backend == "trace": views.append( _make_trace_view( - [probe, *list(getattr(instrumented, "parameters", []))], + [probe, answer_value, *list(getattr(instrumented, "parameters", []))], config=name, origin="backend", ) @@ -794,8 +887,9 @@ def run_case(name: str, builder): ) final_prompt = prompt_getter() - assert final_prompt == SYNTH_UPDATE_SCHEDULE[-1]["synth_prompt"] + assert bool(final_prompt) if optimizer is None else final_prompt == SYNTH_UPDATE_SCHEDULE[-1]["synth_prompt"] tail_scores = result.score_history[max(2, result.best_iteration):] + primary_summary = views[0]["summary"] if views else {} return { "config": name, @@ -809,7 +903,9 @@ def run_case(name: str, builder): statistics.pstdev(tail_scores) if len(tail_scores) > 1 else 0.0, 3, ), - "best_updates": dict(result.best_updates), + "node_count": int(primary_summary.get("node_count", 0)), + "edge_count": _edge_count(views[0]["doc"]) if views else 0, + "best_updates": {(_base_name(key) or str(key).split("/")[-1].split(":")[0]): value for key, value in result.best_updates.items()}, "final_synth_prompt": final_prompt, "final_answer": answer_text, "answer_preview": _truncate(answer_text, 180), @@ -857,15 +953,15 @@ def print_cli_report(rows: List[Dict[str, Any]]) -> None: print(f"\nOptimization comparison ({ITERATIONS} iterations)\n") print( "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | " - "score_history | semantic_messages | params |" + "node_count | edge_count | score_history | semantic_messages | params |" ) - print("|---|---:|---:|---:|---:|---:|---:|---|---|---|") + print("|---|---:|---:|---:|---:|---:|---:|---:|---:|---|---|---|") for row in rows: primary = row["views"][0]["summary"] if row["views"] else {} print( f"| {row['config']} | {row['runtime_s']:.3f} | {row['baseline_score']:.3f} " f"| {row['best_score']:.3f} | {row['score_gain']:.3f} | {row['best_iteration']} " - f"| {row['stability_std']:.3f} | {row['score_history']} " + f"| {row['stability_std']:.3f} | {row['node_count']} | {row['edge_count']} | {row['score_history']} " f"| {primary.get('semantic_messages', [])} | {primary.get('param_names', [])} |" ) @@ -909,16 +1005,20 @@ def display_notebook_report(rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: return rows lines = [ - "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | score_history |", - "|---|---:|---:|---:|---:|---:|---:|---|", + "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | node_count | edge_count | score_history |", + "|---|---:|---:|---:|---:|---:|---:|---:|---:|---|", ] for row in rows: lines.append( f"| {row['config']} | {row['runtime_s']:.3f} | {row['baseline_score']:.3f} " f"| {row['best_score']:.3f} | {row['score_gain']:.3f} | {row['best_iteration']} " - f"| {row['stability_std']:.3f} | {row['score_history']} |" + f"| {row['stability_std']:.3f} | {row['node_count']} | {row['edge_count']} | {row['score_history']} |" ) - display(Markdown("## Optimization comparison\n\n" + "\n".join(lines))) + note = ( + "_Topology metrics remain useful even when score trajectories match, " + "for example under the fixed offline prompt schedule._" + ) + display(Markdown("## Optimization comparison\n\n" + note + "\n\n" + "\n".join(lines))) for row in rows: display( @@ -932,6 +1032,8 @@ def display_notebook_report(rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: f"- Score gain: `{row['score_gain']:.3f}`", f"- Best iteration: `{row['best_iteration']}`", f"- Post-update stability std: `{row['stability_std']:.3f}`", + f"- Node count: `{row['node_count']}`", + f"- Edge count: `{row['edge_count']}`", f"- Score history: `{row['score_history']}`", f"- Best updates: `{list(row['best_updates'].keys())}`", "", diff --git a/opto/optimizers/optoprime.py b/opto/optimizers/optoprime.py index 8727f743..0a8e7ebb 100644 --- a/opto/optimizers/optoprime.py +++ b/opto/optimizers/optoprime.py @@ -814,9 +814,9 @@ def _find_key(node_name: str, sugg: Dict[str, Any]) -> Optional[str]: if node_name in sugg: return node_name - norm = re.sub(r":(?=\d+$)", "", node_name) + norm = re.sub(r"\d+$", "", re.sub(r":(?=\d+$)", "", node_name).split("/")[-1]) for k in sugg: - if re.sub(r":(?=\d+$)", "", k) == norm: + if re.sub(r"\d+$", "", re.sub(r":(?=\d+$)", "", k).split("/")[-1]) == norm: return k return None @@ -940,4 +940,4 @@ def call_llm( if verbose: print("LLM response:\n", response) - return response \ No newline at end of file + return response diff --git a/opto/trace/io/instrumentation.py b/opto/trace/io/instrumentation.py index 522c57cf..35a550ee 100644 --- a/opto/trace/io/instrumentation.py +++ b/opto/trace/io/instrumentation.py @@ -200,6 +200,7 @@ def instrument_graph( llm_span_name: str = "llm.chat.completion", input_key: str = "query", output_key: Optional[str] = None, + binding_consumers: Optional[Dict[str, List[str]]] = None, ) -> Any: """Wrap a LangGraph with automatic OTEL instrumentation. @@ -254,6 +255,10 @@ def instrument_graph( str(name).split(".")[-1] for name in (graph_agents_functions or []) ] } + if binding_consumers: + observer_meta["binding_consumers"] = { + key: list(values) for key, values in binding_consumers.items() + } if adapter is not None: if GraphAdapter is not None and not isinstance(adapter, GraphAdapter): @@ -322,6 +327,7 @@ def instrument_graph( output_key=output_key, ) out.observers = _make_observers(observe_with, service_name=service_name) + out.observer_meta = dict(observer_meta) return out if backend == "sysmon": diff --git a/opto/trace/io/observers.py b/opto/trace/io/observers.py index 03fe48d2..d902c26c 100644 --- a/opto/trace/io/observers.py +++ b/opto/trace/io/observers.py @@ -55,6 +55,8 @@ def __init__( """Create an observer backed by its own or a shared telemetry session.""" self.session = session or TelemetrySession(service_name=service_name) self._ctx = None + self._bindings: Dict[str, Any] = {} + self._meta: Dict[str, Any] = {} def start( self, @@ -63,6 +65,8 @@ def start( meta: Optional[Dict[str, Any]] = None, ) -> None: """Activate the telemetry session before the primary graph run starts.""" + self._bindings = dict(bindings or {}) + self._meta = dict(meta or {}) self._ctx = self.session.activate() self._ctx.__enter__() @@ -74,6 +78,16 @@ def stop( ) -> ObserverArtifact: """Flush OTLP artifacts and close the activation context.""" try: + if not self.session.exporter.get_finished_spans(): + consumers = self._meta.get("binding_consumers") or {} + for name in self._meta.get("semantic_names") or ["observer"]: + with self.session.tracer.start_as_current_span(str(name)) as span: + span.set_attribute("message.id", str(name)) + for key, binding in self._bindings.items(): + if consumers and str(name) not in consumers.get(key, []): + continue + span.set_attribute(f"param.{key}", str(binding.get())) + span.set_attribute(f"param.{key}.trainable", "true") otlp = self.session.flush_otlp(clear=True) finally: if self._ctx is not None: diff --git a/opto/trace/io/optimization.py b/opto/trace/io/optimization.py index 53d557a2..44beae8d 100644 --- a/opto/trace/io/optimization.py +++ b/opto/trace/io/optimization.py @@ -245,12 +245,12 @@ def _select_output_node(nodes: dict) -> Any: def _batchify_items(*items: Any) -> Any: """Build a batched Trace node payload without importing trainer packages.""" - from opto.trace import node + from opto.trace.nodes import MessageNode, node output = "" for i, item in enumerate(items): output += f"ID {[i]}: {item}\n" - return node(output, name="batch_output") + return MessageNode(output, inputs={f"in_{i}": item for i, item in enumerate(items) if hasattr(item, "parents")}, description="[batch] batch output", name="batch_output") # --------------------------------------------------------------------------- @@ -791,11 +791,13 @@ def _snapshot_parameters_from_bindings(bindings_dict: Dict[str, Binding]) -> Dic best_updates: Dict[str, Any] = {} best_parameters = _snapshot_parameters_from_bindings(effective_bindings) optimizer_instance = optimizer + last_applied_updates: Dict[str, Any] = {} for iteration in range(iterations + 1): docs = [] runs: List[RunResult] = [] update_dict: Dict[str, Any] = {} + applied_updates_for_iteration = dict(last_applied_updates) for qi, query in enumerate(queries): state = query if isinstance(query, dict) else {graph.input_key: query} @@ -845,6 +847,7 @@ def _snapshot_parameters_from_bindings(bindings_dict: Dict[str, Binding]) -> Dic best_score = avg_score best_iteration = iteration best_parameters = _snapshot_parameters_from_bindings(effective_bindings) + best_updates = dict(applied_updates_for_iteration) if iteration > 0 else {} if iteration > 0: output_node = _select_output_node(nodes) @@ -863,9 +866,7 @@ def _snapshot_parameters_from_bindings(bindings_dict: Dict[str, Binding]) -> Dic if name: update_dict[str(name)] = value if update_dict and apply_updates_flag: - applied = apply_updates(update_dict, effective_bindings, strict=False) - if avg_score >= best_score: - best_updates = dict(applied) + last_applied_updates = apply_updates(update_dict, effective_bindings, strict=False) if on_iteration: on_iteration(iteration, runs, update_dict) diff --git a/opto/trace/io/sysmonitoring.py b/opto/trace/io/sysmonitoring.py index 2cea1cb7..bd390f32 100644 --- a/opto/trace/io/sysmonitoring.py +++ b/opto/trace/io/sysmonitoring.py @@ -40,6 +40,7 @@ def __init__(self, tool_id: int = 7, service_name: str = "langgraph-sysmon") -> self._events: List[SysMonEvent] = [] self._tls = threading.local() self._bindings_snapshot: Dict[str, Dict[str, Any]] = {} + self._meta: Dict[str, Any] = {} def _claim_tool_id(self) -> int: """Claim a valid sys.monitoring tool id. @@ -76,6 +77,7 @@ def start( k: {"value": b.get(), "kind": b.kind, "trainable": True} for k, b in (bindings or {}).items() } + self._meta = dict(meta or {}) semantic_names = set((meta or {}).get("semantic_names") or ()) def _safe_preview(value: Any) -> str: @@ -164,6 +166,7 @@ def stop(self, *, result: Any = None, error: BaseException | None = None) -> Dic "version": "trace-json/1.0+sysmon", "agent": {"id": self.service_name}, "bindings": self._bindings_snapshot, + "meta": dict(self._meta), "events": [ { "id": ev.id, @@ -222,6 +225,7 @@ def sysmon_profile_to_tgj( ) -> Dict[str, Any]: """Convert a simple sys.monitoring profile document into TGJ 1.0.""" nodes = {} + binding_consumers = (doc.get("meta") or {}).get("binding_consumers") or {} for pname, spec in (doc.get("bindings") or {}).items(): nodes[f"param:{pname}"] = { @@ -233,12 +237,29 @@ def sysmon_profile_to_tgj( "description": f"[{spec.get('kind', 'prompt')}]", } - for ev in doc.get("events", []): + events = sorted( + doc.get("events", []), + key=lambda ev: ( + (ev.get("thread_id") is None), + ev.get("thread_id") or 0, + (ev.get("start_ns") is None), + ev.get("start_ns") or 0, + ), + ) + prev_root_by_thread: Dict[int | None, str] = {} + consumer_nodes: Dict[str, List[str]] = {} + + for ev in events: inputs = {} - if ev.get("parent_id"): - inputs["parent"] = {"ref": f"msg:{ev['parent_id']}"} - nodes[f"msg:{ev['id']}"] = { - "id": f"msg:{ev['id']}", + explicit_parent_id = ev.get("parent_id") + thread_id = ev.get("thread_id") + if explicit_parent_id: + inputs["parent"] = {"ref": f"msg:{explicit_parent_id}"} + elif thread_id in prev_root_by_thread: + inputs["parent"] = {"ref": f"msg:{prev_root_by_thread[thread_id]}"} + node_id = f"msg:{ev['id']}" + nodes[node_id] = { + "id": node_id, "kind": "message", "name": ev["name"], "description": f"[sysmon] {ev['file']}:{ev['lineno']}", @@ -254,6 +275,20 @@ def sysmon_profile_to_tgj( } }, } + consumer_nodes.setdefault(ev["name"], []).append(node_id) + if not explicit_parent_id: + prev_root_by_thread[thread_id] = ev["id"] + + for pname, consumers in binding_consumers.items(): + p_id = f"param:{pname}" + if p_id not in nodes: + continue + for consumer_name in consumers: + for node_id in consumer_nodes.get(consumer_name, []): + nodes[node_id].setdefault("inputs", {}).setdefault( + f"param_{pname}", + {"ref": p_id}, + ) return { "tgj": "1.0", diff --git a/pyproject.toml b/pyproject.toml index cf79b60b..f20c0d5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,16 +20,15 @@ classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python :: 3.13", ] - -[project.optional-dependencies] -autogen = ["autogen-agentchat==0.2.40"] -test = ["datasets==3.6.0"] telemetry = [ "opentelemetry-api>=1.38.0", "opentelemetry-sdk>=1.38.0", "langgraph>=1.0.7", ] +[project.optional-dependencies] +autogen = ["autogen-agentchat==0.2.40"] +test = ["datasets==3.6.0"] [project.urls] Homepage = "https://agentopt.github.io/Trace/" diff --git a/tests/features_tests/test_sysmon_backend.py b/tests/features_tests/test_sysmon_backend.py index bf40ab10..84611004 100644 --- a/tests/features_tests/test_sysmon_backend.py +++ b/tests/features_tests/test_sysmon_backend.py @@ -19,6 +19,10 @@ pytestmark = pytest.mark.skipif(not hasattr(sys, "monitoring"), reason="sys.monitoring unavailable") +def _base_name(value): + return str(value).split("/")[-1].split(":")[0] + + def build_graph(templates=None): templates = templates or { "planner_prompt": "Plan {query}", @@ -80,6 +84,25 @@ def step(self): return {} +class _TwoStepUpdateOptimizer: + def __init__(self): + self.calls = 0 + + def zero_feedback(self): + return None + + def backward(self, *_args, **_kwargs): + return None + + def step(self): + self.calls += 1 + if self.calls == 1: + return {"synth_prompt": "GOOD::{query}::{plan}"} + if self.calls == 2: + return {"synth_prompt": "BAD::{query}::{plan}"} + return {} + + def test_sysmon_profile_to_tgj_preserves_parent_chain(): profile_doc = { "version": "trace-json/1.0+sysmon", @@ -95,6 +118,122 @@ def test_sysmon_profile_to_tgj_preserves_parent_chain(): assert mp["synth"].parents[0] is mp["planner"] +def test_sysmon_profile_to_tgj_adds_temporal_chain_for_sequential_root_events(): + profile_doc = { + "version": "trace-json/1.0+sysmon", + "agent": {"id": "demo"}, + "bindings": {}, + "events": [ + { + "id": "p", + "parent_id": None, + "name": "planner", + "file": "demo.py", + "lineno": 1, + "thread_id": 1, + "start_ns": 1, + }, + { + "id": "s", + "parent_id": None, + "name": "synth", + "file": "demo.py", + "lineno": 2, + "thread_id": 1, + "start_ns": 2, + }, + ], + } + tgj = sysmon_profile_to_tgj(profile_doc, run_id="r", graph_id="g", scope="demo/0") + mp = ingest_tgj(tgj) + assert mp["synth"].parents[0] is mp["planner"] + + +def test_sysmon_profile_to_tgj_prefers_explicit_parent_over_temporal_chain(): + profile_doc = { + "version": "trace-json/1.0+sysmon", + "agent": {"id": "demo"}, + "bindings": {}, + "events": [ + { + "id": "p", + "parent_id": None, + "name": "planner", + "file": "demo.py", + "lineno": 1, + "thread_id": 1, + "start_ns": 1, + }, + { + "id": "s", + "parent_id": None, + "name": "synth", + "file": "demo.py", + "lineno": 2, + "thread_id": 1, + "start_ns": 2, + }, + { + "id": "h", + "parent_id": "p", + "name": "helper", + "file": "demo.py", + "lineno": 3, + "thread_id": 1, + "start_ns": 3, + }, + ], + } + tgj = sysmon_profile_to_tgj(profile_doc, run_id="r", graph_id="g", scope="demo/0") + mp = ingest_tgj(tgj) + assert mp["helper"].parents[0] is mp["planner"] + assert mp["synth"].parents[0] is mp["planner"] + + +def test_sysmon_profile_to_tgj_links_bindings_to_declared_consumers(): + profile_doc = { + "version": "trace-json/1.0+sysmon", + "agent": {"id": "demo"}, + "bindings": { + "planner_prompt": {"value": "Plan {query}", "kind": "prompt", "trainable": True}, + "synth_prompt": {"value": "Answer {query} {plan}", "kind": "prompt", "trainable": True}, + }, + "meta": { + "binding_consumers": { + "planner_prompt": ["planner"], + "synth_prompt": ["synth"], + } + }, + "events": [ + { + "id": "p", + "parent_id": None, + "name": "planner", + "file": "demo.py", + "lineno": 1, + "thread_id": 1, + "start_ns": 1, + }, + { + "id": "s", + "parent_id": None, + "name": "synth", + "file": "demo.py", + "lineno": 2, + "thread_id": 1, + "start_ns": 2, + }, + ], + } + tgj = sysmon_profile_to_tgj(profile_doc, run_id="r", graph_id="g", scope="demo/0") + mp = ingest_tgj(tgj) + planner_parent_names = {_base_name(getattr(parent, "name", "")) for parent in mp["planner"].parents} + synth_parent_names = {_base_name(getattr(parent, "name", "")) for parent in mp["synth"].parents} + assert "planner_prompt" in planner_parent_names + assert "synth_prompt" in synth_parent_names + assert "planner" in synth_parent_names + + def test_sysmon_backend_optimize_applies_binding_updates(): templates = { "planner_prompt": "Plan {query}", @@ -121,3 +260,32 @@ def test_sysmon_backend_optimize_applies_binding_updates(): assert result.best_iteration == 2 assert result.best_score == 1.0 assert templates["synth_prompt"].startswith("CRISPR optimized") + + +def test_sysmon_backend_best_updates_tracks_update_that_produced_best_iteration(): + templates = { + "planner_prompt": "Plan {query}", + "synth_prompt": "answer::{query}::{plan}", + } + bindings = {k: make_dict_binding(templates, k, kind="prompt") for k in templates} + ig = instrument_graph( + graph=build_graph(templates), + backend="sysmon", + bindings=bindings, + graph_agents_functions=["planner", "synth"], + output_key="final_answer", + ) + result = optimize_graph( + ig, + queries=["What is CRISPR?"], + iterations=2, + optimizer=_TwoStepUpdateOptimizer(), + eval_fn=lambda payload: { + "score": 1.0 if "GOOD::" in str(payload["answer"]) else 0.0, + "feedback": "Prefer GOOD answers.", + }, + ) + assert result.best_iteration == 2 + assert result.best_score == 1.0 + assert result.best_updates == {"synth_prompt": "GOOD::{query}::{plan}"} + assert templates["synth_prompt"] == "BAD::{query}::{plan}" diff --git a/tests/unit_tests/test_graph_observers.py b/tests/unit_tests/test_graph_observers.py index ff86b932..33986515 100644 --- a/tests/unit_tests/test_graph_observers.py +++ b/tests/unit_tests/test_graph_observers.py @@ -8,12 +8,18 @@ from opto.trace import node from opto.trace.io import instrument_graph, TraceGraph, InstrumentedGraph +from opto.trace.io.sysmonitoring import sysmon_profile_to_tgj +from opto.trace.io.tgj_ingest import ingest_tgj def _raw(x): return getattr(x, "data", x) +def _base_name(value): + return str(value).split("/")[-1].split(":")[0] + + def _make_trace_graph(): planner_prompt = node("Plan: {query}", trainable=True, name="planner_prompt") synth_prompt = node("Answer: {query} :: {plan}", trainable=True, name="synth_prompt") @@ -92,6 +98,34 @@ def test_trace_backend_accepts_sysmon_observer(): assert art.profile_doc["version"] == "trace-json/1.0+sysmon" +def test_trace_backend_sysmon_observer_uses_binding_consumers_for_prompt_edges(): + if not hasattr(sys, "monitoring"): + pytest.skip("sys.monitoring unavailable") + build_graph, scope = _make_trace_graph() + graph = instrument_graph( + backend="trace", + observe_with=("sysmon",), + graph_factory=build_graph, + scope=scope, + graph_agents_functions=["planner_node", "synth_node"], + graph_prompts_list=[scope["planner_prompt"], scope["synth_prompt"]], + binding_consumers={ + "planner_prompt": ["planner_node"], + "synth_prompt": ["synth_node"], + }, + output_key="final_answer", + ) + out = graph.invoke({"query": "What is CRISPR?"}) + assert "final_answer" in out + art = graph._last_observer_artifacts[0] + tgj = sysmon_profile_to_tgj(art.profile_doc, run_id="r", graph_id="g", scope="demo/0") + mp = ingest_tgj(tgj) + planner_parent_names = {_base_name(getattr(parent, "name", "")) for parent in mp["planner_node"].parents} + synth_parent_names = {_base_name(getattr(parent, "name", "")) for parent in mp["synth_node"].parents} + assert "planner_prompt" in planner_parent_names + assert "synth_prompt" in synth_parent_names + + def test_trace_backend_accepts_otel_and_sysmon_observers(): if not hasattr(sys, "monitoring"): pytest.skip("sys.monitoring unavailable") From 0d266e9bda1fdfcd21b0986ce300112a75df096f Mon Sep 17 00:00:00 2001 From: doxav Date: Sat, 25 Apr 2026 23:51:10 +0200 Subject: [PATCH 15/16] results with 5 iterations --- ...ggraph_instrument_and_compare_observers.py | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py index 841a9650..10c97c4a 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py @@ -146,6 +146,16 @@ def _base_name(value: Any) -> str: return name.split("/")[-1].split(":")[0] +def _param_name(value: Any) -> str: + if name := _base_name(value): + return name + text = str(value).split("/")[-1] + if ":" in text: + left, right = text.rsplit(":", 1) + text = left if right.isdigit() else right + return text.removeprefix("param_") + + def _semantic_alias(name: str) -> str | None: suffix = name.split(".")[-1] if suffix in SEMANTIC_NAMES: @@ -888,6 +898,15 @@ def run_case(name: str, builder): final_prompt = prompt_getter() assert bool(final_prompt) if optimizer is None else final_prompt == SYNTH_UPDATE_SCHEDULE[-1]["synth_prompt"] + best_parameters = { + _param_name(key): value for key, value in result.best_parameters.items() + } + best_updates = { + _param_name(key): value for key, value in result.best_updates.items() + } + best_synth_prompt = str( + _raw(best_parameters.get("synth_prompt", DEFAULT_TEMPLATES["synth_prompt"])) + ) tail_scores = result.score_history[max(2, result.best_iteration):] primary_summary = views[0]["summary"] if views else {} @@ -905,7 +924,9 @@ def run_case(name: str, builder): ), "node_count": int(primary_summary.get("node_count", 0)), "edge_count": _edge_count(views[0]["doc"]) if views else 0, - "best_updates": {(_base_name(key) or str(key).split("/")[-1].split(":")[0]): value for key, value in result.best_updates.items()}, + "best_update_keys": list(best_updates.keys()), + "best_updates": best_updates, + "best_synth_prompt": best_synth_prompt, "final_synth_prompt": final_prompt, "final_answer": answer_text, "answer_preview": _truncate(answer_text, 180), @@ -974,8 +995,9 @@ def print_cli_report(rows: List[Dict[str, Any]]) -> None: print(f"score_gain: {row['score_gain']:.3f}") print(f"stability_std: {row['stability_std']:.3f}") print(f"score_history: {row['score_history']}") - print(f"best_updates: {row['best_updates']}") - print(f"final_synth_prompt: {row['final_synth_prompt']}") + print(f"best_update_keys: {row['best_update_keys']}") + print(f"best_synth_prompt: {row['best_synth_prompt']}") + print(f"final_attempted_synth_prompt: {row['final_synth_prompt']}") print(f"final_answer: {row['answer_preview']}") for view in row["views"]: summary = view["summary"] @@ -1035,9 +1057,13 @@ def display_notebook_report(rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: f"- Node count: `{row['node_count']}`", f"- Edge count: `{row['edge_count']}`", f"- Score history: `{row['score_history']}`", - f"- Best updates: `{list(row['best_updates'].keys())}`", + f"- Best update keys: `{row['best_update_keys']}`", "", - "### Final synth prompt", + "### Best-scoring synth prompt", + "```text", + str(row["best_synth_prompt"]), + "```", + "### Final attempted synth prompt", "```text", str(row["final_synth_prompt"]), "```", From d329112c7cb950d894492119bcb376228c74c884 Mon Sep 17 00:00:00 2001 From: doxav Date: Thu, 7 May 2026 14:06:45 +0200 Subject: [PATCH 16/16] checkpoint on multi traces notebook --- ...aph_instrument_and_compare_observers.ipynb | 2093 +++++------------ ...ggraph_instrument_and_compare_observers.py | 24 +- 2 files changed, 665 insertions(+), 1452 deletions(-) diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb index e2cf5362..ca9e1537 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.ipynb @@ -22,10 +22,10 @@ "id": "run-live-compare-script", "metadata": { "execution": { - "iopub.execute_input": "2026-04-25T19:46:39.281438Z", - "iopub.status.busy": "2026-04-25T19:46:39.281191Z", - "iopub.status.idle": "2026-04-25T19:49:41.137432Z", - "shell.execute_reply": "2026-04-25T19:49:41.136547Z" + "iopub.execute_input": "2026-04-26T11:57:31.770794Z", + "iopub.status.busy": "2026-04-26T11:57:31.770419Z", + "iopub.status.idle": "2026-04-26T12:06:56.074519Z", + "shell.execute_reply": "2026-04-26T12:06:56.073398Z" }, "language": "python" }, @@ -39,13 +39,25 @@ "\n", "| config | runtime_s | baseline | best | gain | best_iteration | stability_std | node_count | edge_count | score_history |\n", "|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n", - "| trace | 22.643 | 0.733 | 0.803 | 0.070 | 2 | 0.000 | 9 | 6 | [0.733, 0.733, 0.803] |\n", - "| trace+otel | 20.681 | 0.733 | 0.733 | 0.000 | 0 | 0.000 | 9 | 6 | [0.733, 0.733, 0.677] |\n", - "| otel | 21.909 | 0.733 | 0.837 | 0.103 | 2 | 0.000 | 6 | 5 | [0.733, 0.733, 0.837] |\n", - "| trace+sysmon | 18.639 | 0.733 | 0.733 | 0.000 | 0 | 0.000 | 9 | 6 | [0.733, 0.733, 0.65] |\n", - "| trace+otel+sysmon | 22.479 | 0.733 | 0.733 | 0.000 | 0 | 0.000 | 9 | 6 | [0.733, 0.733, 0.677] |\n", - "| otel+sysmon | 21.463 | 0.733 | 0.915 | 0.182 | 2 | 0.000 | 6 | 5 | [0.733, 0.733, 0.915] |\n", - "| sysmon | 20.267 | 0.733 | 0.854 | 0.121 | 2 | 0.000 | 4 | 3 | [0.733, 0.733, 0.854] |" + "| trace | 76.856 | 0.842 | 0.872 | 0.030 | 3 | 0.112 | 9 | 6 | [0.842, 0.842, 0.568, 0.872, 0.64, 0.629] |\n", + "| trace+otel | 73.826 | 0.842 | 0.893 | 0.051 | 2 | 0.137 | 9 | 6 | [0.842, 0.842, 0.893, 0.573, 0.586, 0.571] |\n", + "| otel | 79.725 | 0.842 | 0.881 | 0.039 | 2 | 0.076 | 6 | 5 | [0.842, 0.842, 0.881, 0.745, 0.689, 0.702] |\n", + "| trace+sysmon | 71.576 | 0.842 | 0.890 | 0.048 | 4 | 0.095 | 9 | 6 | [0.842, 0.842, 0.714, 0.737, 0.89, 0.7] |\n", + "| trace+otel+sysmon | 89.682 | 0.842 | 0.876 | 0.034 | 3 | 0.038 | 9 | 6 | [0.842, 0.842, 0.8, 0.876, 0.805, 0.788] |\n", + "| otel+sysmon | 74.301 | 0.842 | 0.842 | 0.000 | 0 | 0.022 | 6 | 5 | [0.842, 0.842, 0.712, 0.72, 0.737, 0.675] |\n", + "| sysmon | 62.266 | 0.842 | 0.930 | 0.088 | 2 | 0.000 | 4 | 3 | [0.842, 0.842, 0.93, 0.93, 0.93, 0.93] |\n", + "\n", + "### Prompt selection\n", + "\n", + "| config | best_iteration | best update keys | best-scoring prompt | final attempted prompt |\n", + "|---|---:|---|---|---|\n", + "| trace | 3 | ['synth_prompt'] | Based on the following outline, provide a complete, direct, and factual explanation. En... | Act as an expert scientific writer. Your primary goal is to expand the provided outline... |\n", + "| trace+otel | 2 | ['synth_prompt'] | Provide a direct, factual answer to the query based on the plan provided below. Start y... | Act as a technical writer to generate a professional document based on the provided pla... |\n", + "| otel | 2 | ['planner_prompt', 'synth_prompt'] | Synthesize a clear, brief, and factual response to the query: {query}
Use these planned... | You are an expert science communicator and molecular biologist. Synthesize a profession... |\n", + "| trace+sysmon | 4 | ['synth_prompt'] | Synthesize the following plan into a complete, polished, and factual summary.

Maintai... | Synthesize the following plan into a complete, polished, and factual summary.

Maintai... |\n", + "| trace+otel+sysmon | 3 | ['synth_prompt'] | Provide a concise and direct answer to the query by synthesizing the provided plan.
En... | Provide a concise and direct answer to the query by synthesizing the provided plan.

Y... |\n", + "| otel+sysmon | 0 | [] | Answer briefly and factually: {query}
Plan: {plan} | Integrating every detail from the provided plan, write a sophisticated, single-paragrap... |\n", + "| sysmon | 2 | ['planner_prompt', 'synth_prompt'] | Answer briefly and factually using a clear, structured bulleted list: {query}
Plan: {plan} | Answer briefly and factually using a clear, structured bulleted list: {query}
Plan: {plan} |" ], "text/plain": [ "" @@ -58,30 +70,44 @@ "data": { "text/markdown": [ "## trace\n", - "- Runtime: `22.643s`\n", - "- Baseline score: `0.733`\n", - "- Best score: `0.803`\n", - "- Score gain: `0.070`\n", - "- Best iteration: `2`\n", - "- Post-update stability std: `0.000`\n", + "- Runtime: `76.856s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.872`\n", + "- Score gain: `0.030`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.112`\n", "- Node count: `9`\n", "- Edge count: `6`\n", - "- Score history: `[0.733, 0.733, 0.803]`\n", - "- Best updates: `['synth_prompt']`\n", + "- Score history: `[0.842, 0.842, 0.568, 0.872, 0.64, 0.629]`\n", + "- Best update keys: `['synth_prompt']`\n", "\n", - "### Final synth prompt\n", + "### Best-scoring synth prompt\n", + "```text\n", + "Based on the following outline, provide a complete, direct, and factual explanation. Ensure the response is cohesive, completes any unfinished thoughts in the plan, and avoids unnecessary introductory phrases.\n", + "Outline: {plan}\n", + "```\n", + "### Final attempted synth prompt\n", "```text\n", - "Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represented as a distinct part of the summary.\n", + "Act as an expert scientific writer. Your primary goal is to expand the provided outline into a comprehensive, professional, and fully self-contained article.\n", "\n", - "Query: {query}\n", - "Source Plan: {plan}\n", + "Follow these strict constraints:\n", + "1. ABSOLUTE COMPLETION: The provided outline is intentionally incomplete or ends abruptly. You MUST use your expertise to fill in the missing information, bridge all gaps, and provide a logical progression to the end of the topic.\n", + "2. MANDATORY CONCLUSION: You must end the article with a formal \"Conclusion\" section that summarizes the key points. The response MUST NOT end mid-sentence or mid-thought.\n", + "3. FORMATTING: Use professional Markdown headings (e.g., #, ##, ###) and categorized bullet points for readability.\n", + "4. TONE: Maintain a formal, academic, and engaging scientific tone.\n", + "5. LENGTH MANAGEMENT: If the topic is broad, ensure you balance detail with the need to provide a complete narrative from introduction to conclusion within your response limit.\n", + "\n", + "Outline to expand:\n", + "{plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, origins, and the mechanisms that make it a transformative tool in modern science.\n", + "# CRISPR-Cas9: The Frontier of Genetic Engineering\n", "\n", - "### **1. The Definition (The \"What\")**\n", - "CRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. In practical terms, it is often described as \"molecular scissors\" or a \"search-and-replace\" function for the genome. The ...\n", + "### **1. The Definition: Understanding the Molecular Machinery**\n", + "CRISPR, an acronym for **Clustered Regularly Interspaced Short Palindromic Repeats**, represents the most significant breakthrough in biotechnology of the 21st century. At its core, CRISPR is a precision gene-editing technology that allows scientists to modify the DNA of living organisms with unprecedented accuracy.\n", + "\n", + "To understand its function, it is often helpful to use the an...\n", "```" ], "text/plain": [ @@ -96,13 +122,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node3_copy', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node12_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represente...\"\n", + " \"synth_prompt\": \"Act as an expert scientific writer. Your primary goal is to expand the provided outline into a comprehensive, professional, and fully self-contained article.\\n\\nFollow these strict constraints:\\n1. ABSOLUTE COMPLETION: T...\"\n", "}\n", "```" ], @@ -122,10 +148,10 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "node_0\n", @@ -158,27 +184,27 @@ "\n", "node_2\n", "\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea890d3b30>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7142117e9a00>}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "make_trace_case.planner_node3_copy\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "make_trace_case.planner_node12_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", "\n", "\n", "\n", @@ -186,61 +212,62 @@ "\n", "node_5\n", "\n", - "\n", - "getitem\n", - "[message]\n", - "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", "\n", "\n", "\n", "\n", "\n", "node_3->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_4\n", "\n", - "\n", - "str\n", - "[value]\n", - "plan\n", + "\n", + "str\n", + "[value]\n", + "plan\n", "\n", "\n", "\n", "\n", "\n", "node_4->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_6\n", "\n", - "\n", - "final_answer_node\n", - "[message]\n", - "CRISPR is a revolutionary gene-editing technology that allows scientists to m...\n", + "\n", + "final_answer_node\n", + "[message]\n", + "# CRISPR-Cas9: The Frontier of Genetic Engineering\n", + "### **1. The Definition: ...\n", "\n", "\n", "\n", "\n", "\n", "node_5->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_7\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", @@ -248,24 +275,24 @@ "\n", "node_8\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Provide a direct, factual summary for the following query. Organize the infor...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Act as an expert scientific writer. Your primary goal is to expand the provid...\n", "\n", "\n", "\n", "\n", "\n", "node_8->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -275,29 +302,47 @@ "data": { "text/markdown": [ "## trace+otel\n", - "- Runtime: `20.681s`\n", - "- Baseline score: `0.733`\n", - "- Best score: `0.733`\n", - "- Score gain: `0.000`\n", - "- Best iteration: `0`\n", - "- Post-update stability std: `0.000`\n", + "- Runtime: `73.826s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.893`\n", + "- Score gain: `0.051`\n", + "- Best iteration: `2`\n", + "- Post-update stability std: `0.137`\n", "- Node count: `9`\n", "- Edge count: `6`\n", - "- Score history: `[0.733, 0.733, 0.677]`\n", - "- Best updates: `[]`\n", + "- Score history: `[0.842, 0.842, 0.893, 0.573, 0.586, 0.571]`\n", + "- Best update keys: `['synth_prompt']`\n", "\n", - "### Final synth prompt\n", + "### Best-scoring synth prompt\n", + "```text\n", + "Provide a direct, factual answer to the query based on the plan provided below. Start your response immediately with the information requested without introductory filler. Ensure the response is logically structured and complete.\n", + "\n", + "Query: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final attempted synth prompt\n", "```text\n", - "Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\n", + "Act as a technical writer to generate a professional document based on the provided plan.\n", + "\n", + "CRITICAL INSTRUCTIONS:\n", + "1. ABSOLUTE DIRECTNESS: Begin the response immediately with the first heading or paragraph. Do not provide any introduction, prefix, or meta-commentary like \"Here is the response...\".\n", + "2. STRUCTURE: Use bolded Markdown headings (e.g., ### **Heading**) that correspond exactly to the sections in the Plan.\n", + "3. TRUNCATION HANDLING: If the provided Plan is cut off (truncated mid-sentence or mid-section), do not attempt to replicate the truncated fragment. Instead, logically conclude the current section and provide a final summary sentence to ensure the document is a complete, polished deliverable. \n", + "4. COMPLETION GUARANTEE: The response MUST end with a full sentence and a period. Never leave a sentence unfinished or a list item trailing.\n", + "5. TECHNICAL DETAIL: Define all acronyms and provide technical context for all components mentioned (e.g., Cas9, gRNA, PAM). Use professional, factual language.\n", "\n", "Query: {query}\n", "Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace\" tool for DNA. It consists of two primary components: **Cas9**, an enzyme that cuts the DNA strands, and **Guide RNA (gRNA)**, a programmed sequence that directs the Cas9 enzyme to a specific location in the genome.\n", + "### **Goal: To explain CRISPR as a revolutionary gene-editing tool.**\n", + "\n", + "CRISPR represents a paradigm shift in biotechnology, offering a precise, efficient, and versatile method for altering the genetic code of living organisms. By leveraging a mechanism originally evolved in prokaryotes, scientists can now target specific sequences of Deoxyribonucleic Acid (DNA) to disable genes, correct mutations, or insert new genetic material.\n", + "\n", + "### **1. The Definition (The \"What\")**\n", "\n", - "The system was adapted from a naturally occurring immune mechanism in bacteria, which use CRISPR seque...\n", + "CRISPR is an acronym fo...\n", "```" ], "text/plain": [ @@ -312,13 +357,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node7_copy', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node25_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...\"\n", + " \"synth_prompt\": \"Act as a technical writer to generate a professional document based on the provided plan.\\n\\nCRITICAL INSTRUCTIONS:\\n1. ABSOLUTE DIRECTNESS: Begin the response immediately with the first heading or paragraph. Do not prov...\"\n", "}\n", "```" ], @@ -338,10 +383,10 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "node_0\n", @@ -374,27 +419,27 @@ "\n", "node_2\n", "\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea882850a0>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7142117bc800>}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "make_trace_case.planner_node7_copy\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "make_trace_case.planner_node25_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", "\n", "\n", "\n", @@ -402,61 +447,62 @@ "\n", "node_5\n", "\n", - "\n", - "getitem\n", - "[message]\n", - "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", "\n", "\n", "\n", "\n", "\n", "node_3->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_4\n", "\n", - "\n", - "str\n", - "[value]\n", - "plan\n", + "\n", + "str\n", + "[value]\n", + "plan\n", "\n", "\n", "\n", "\n", "\n", "node_4->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_6\n", "\n", - "\n", - "final_answer_node\n", - "[message]\n", - "**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a r...\n", + "\n", + "final_answer_node\n", + "[message]\n", + "### **Goal: To explain CRISPR as a revolutionary gene-editing tool.**\n", + "CRISPR...\n", "\n", "\n", "\n", "\n", "\n", "node_5->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_7\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", @@ -464,24 +510,24 @@ "\n", "node_8\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Provide a direct, factual explanation of the topic based on the provided plan...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Act as a technical writer to generate a professional document based on the pr...\n", "\n", "\n", "\n", "\n", "\n", "node_8->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -500,7 +546,7 @@ "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...\"\n", + " \"synth_prompt\": \"Act as a technical writer to generate a professional document based on the provided plan.\\n\\nCRITICAL INSTRUCTIONS:\\n1. ABSOLUTE DIRECTNESS: Begin the response immediately with the first heading or paragraph. Do not prov...\"\n", "}\n", "```" ], @@ -520,10 +566,10 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", @@ -539,58 +585,58 @@ "\n", "node_1\n", "\n", - "\n", - "planner_node\n", - "[message]\n", - "{'message_id': 'planner_node'}\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': 'planner_node'}\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_1\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'message_id': 'synth_node'}\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': 'synth_node'}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Provide a direct, factual explanation of the topic based on the provided plan...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Act as a technical writer to generate a professional document based on the pr...\n", "\n", "\n", "\n", "\n", "\n", "node_2->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -600,35 +646,42 @@ "data": { "text/markdown": [ "## otel\n", - "- Runtime: `21.909s`\n", - "- Baseline score: `0.733`\n", - "- Best score: `0.837`\n", - "- Score gain: `0.103`\n", + "- Runtime: `79.725s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.881`\n", + "- Score gain: `0.039`\n", "- Best iteration: `2`\n", - "- Post-update stability std: `0.000`\n", + "- Post-update stability std: `0.076`\n", "- Node count: `6`\n", "- Edge count: `5`\n", - "- Score history: `[0.733, 0.733, 0.837]`\n", - "- Best updates: `['planner_prompt', 'synth_prompt']`\n", + "- Score history: `[0.842, 0.842, 0.881, 0.745, 0.689, 0.702]`\n", + "- Best update keys: `['planner_prompt', 'synth_prompt']`\n", "\n", - "### Final synth prompt\n", + "### Best-scoring synth prompt\n", "```text\n", - "You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \n", - "\n", - "Follow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\n", + "Synthesize a clear, brief, and factual response to the query: {query}\n", + "Use these planned points as a guide: {plan}\n", + "```\n", + "### Final attempted synth prompt\n", + "```text\n", + "You are an expert science communicator and molecular biologist. Synthesize a professional, comprehensive, and cohesive academic essay responding to the query: \"{query}\".\n", "\n", - "Plan:\n", + "Use the following points as your structural framework:\n", "{plan}\n", + "\n", + "Instructions:\n", + "- Expand significantly upon the provided points using your internal expertise to provide a deep dive into the biochemistry and practical implications.\n", + "- You must ensure the final essay covers three distinct areas: the fundamental definition/origin, the specific molecular mechanism (e.g., Cas9/gRNA interaction), and the broader implications or repair pathways (NHEJ/HDR).\n", + "- If the provided plan is missing the third component or ends abruptly, you are required to use your expertise to complete the framework and provide a full, high-quality explanation.\n", + "- Maintain a tone that is sophisticated, academic, and authoritative.\n", + "- Ensure seamless transitions between sections to create a single, unified narrative with an overarching logic.\n", + "- Do not include an introductory \"Here is the response\" preamble or a concluding summary of your own performance.\n", "```\n", "### Final answer\n", "```text\n", - "### **I. Definition and Historical Context**\n", - "\n", - "#### **A. Technical Definition**\n", - "CRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. To visualize this, imagine a long sentence where the same word is repeated over and over, but between those repeats are unique \"spacer\" sequences. In biological terms, these are specific patterns of DNA nucleotides (the building blocks of life) found in the genomes of microorganisms.\n", + "### The Architecture of Precision: Understanding CRISPR-Cas9 as a Biological and Biotechnological Paradigm\n", "\n", - "#### **B. The Biological Origin**\n", - "While we now thin...\n", + "The landscape of modern molecular biology was irrevocably altered by the discovery of CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats), a system that transitioned from an obscure observation in prokaryotic genomics to the most versatile tool in the geneticist’s arsenal. At its core, CRISPR represents a sophisticated intersection of evolutionary immunology and precision...\n", "```" ], "text/plain": [ @@ -650,8 +703,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: {query}. The outline must include: 1) A clear definition and historical context, 2) The step-by-step bi...\",\n", - " \"synth_prompt\": \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your...\"\n", + " \"planner_prompt\": \"Identify exactly three distinct and critical factual components required to explain \\\"{query}\\\". \\nFor each component:\\n1. Provide a clear, bolded heading.\\n2. Provide a concise but complete 2-3 sentence explanation of tha...\",\n", + " \"synth_prompt\": \"You are an expert science communicator and molecular biologist. Synthesize a professional, comprehensive, and cohesive academic essay responding to the query: \\\"{query}\\\".\\n\\nUse the following points as your structural fr...\"\n", "}\n", "```" ], @@ -671,111 +724,111 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "As an expert scientific strategist, create a comprehensive and structured out...\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Identify exactly three distinct and critical factual components required to e...\n", "\n", "\n", "\n", "\n", "\n", "node_1\n", - "\n", - "\n", - "planner_node\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_1\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2\n", - "\n", - "\n", - "llm.chat.completion\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_4\n", - "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_4\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "You are a science communicator. Using the structured plan provided below, wri...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "You are an expert science communicator and molecular biologist. Synthesize a ...\n", "\n", "\n", "\n", "\n", "\n", "node_3->node_4\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_5\n", - "\n", - "\n", - "llm.chat.completion\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_4->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -785,27 +838,46 @@ "data": { "text/markdown": [ "## trace+sysmon\n", - "- Runtime: `18.639s`\n", - "- Baseline score: `0.733`\n", - "- Best score: `0.733`\n", - "- Score gain: `0.000`\n", - "- Best iteration: `0`\n", - "- Post-update stability std: `0.000`\n", + "- Runtime: `71.576s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.890`\n", + "- Score gain: `0.048`\n", + "- Best iteration: `4`\n", + "- Post-update stability std: `0.095`\n", "- Node count: `9`\n", "- Edge count: `6`\n", - "- Score history: `[0.733, 0.733, 0.65]`\n", - "- Best updates: `[]`\n", + "- Score history: `[0.842, 0.842, 0.714, 0.737, 0.89, 0.7]`\n", + "- Best update keys: `['synth_prompt']`\n", "\n", - "### Final synth prompt\n", + "### Best-scoring synth prompt\n", "```text\n", - "Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\n", + "Synthesize the following plan into a complete, polished, and factual summary. \n", + "\n", + "Maintain the organizational structure of the plan by using clear headings and bullet points to ensure readability and information hierarchy. Even if the plan is incomplete or ends abruptly, use your knowledge to logically complete the narrative and provide a definitive concluding sentence.\n", + "\n", + "Plan: {plan}\n", + "```\n", + "### Final attempted synth prompt\n", + "```text\n", + "Synthesize the following plan into a complete, polished, and factual summary. \n", + "\n", + "Maintain the organizational structure of the plan using clear headings and bullet points. Your primary goal is to provide a concise yet comprehensive overview. \n", + "\n", + "**Important:** If the provided plan is incomplete or cuts off mid-sentence, you MUST use your knowledge to provide a logical conclusion to those sections and ensure the narrative is fully realized. \n", + "\n", + "**Mandatory Ending:** You MUST conclude your entire summary with a single, definitive, and impactful final sentence, separated from the bullet points, that encapsulates the global significance of this technology.\n", "\n", - "Query: {query}\n", "Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or a search-and-replace tool for DNA. The system relies on two primary components: the Cas9 enzyme, which performs the physical cutting of the genetic material, and a guide RNA molecule that acts as a GPS to direct the enzyme to a precise location within the genome. This mechanism was originally adapted from a naturally o...\n", + "### **CRISPR: A Revolutionary Tool for Genetic Engineering**\n", + "\n", + "#### **1. The Definition (The \"What\")**\n", + "* **Acronym:** CRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**, referring to unique sequences in the genome that serve as a hallmark of this system.\n", + "* **The Analogy:** It is most commonly described as \"molecular scissors\" or a \"search-and-replace\" function for the code of life, allowing for the precise editing of DNA.\n", + "* **The Components:**\n", + " * **Cas9:...\n", "```" ], "text/plain": [ @@ -820,13 +892,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node11_copy', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node38_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...\"\n", + " \"synth_prompt\": \"Synthesize the following plan into a complete, polished, and factual summary. \\n\\nMaintain the organizational structure of the plan using clear headings and bullet points. Your primary goal is to provide a concise yet c...\"\n", "}\n", "```" ], @@ -846,10 +918,10 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "node_0\n", @@ -882,27 +954,27 @@ "\n", "node_2\n", "\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea882b28a0>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7142116db290>}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "make_trace_case.planner_node11_copy\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "make_trace_case.planner_node38_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", "\n", "\n", "\n", @@ -910,61 +982,62 @@ "\n", "node_5\n", "\n", - "\n", - "getitem\n", - "[message]\n", - "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", "\n", "\n", "\n", "\n", "\n", "node_3->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_4\n", "\n", - "\n", - "str\n", - "[value]\n", - "plan\n", + "\n", + "str\n", + "[value]\n", + "plan\n", "\n", "\n", "\n", "\n", "\n", "node_4->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_6\n", "\n", - "\n", - "final_answer_node\n", - "[message]\n", - "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Re...\n", + "\n", + "final_answer_node\n", + "[message]\n", + "### **CRISPR: A Revolutionary Tool for Genetic Engineering**\n", + "#### **1. The D...\n", "\n", "\n", "\n", "\n", "\n", "node_5->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_7\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", @@ -972,24 +1045,24 @@ "\n", "node_8\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Synthesize the following plan into a single, direct, and cohesive factual exp...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Synthesize the following plan into a complete, polished, and factual summary....\n", "\n", "\n", "\n", "\n", "\n", "node_8->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1007,7 +1080,7 @@ "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...\"\n", + " \"synth_prompt\": \"Synthesize the following plan into a complete, polished, and factual summary. \\n\\nMaintain the organizational structure of the plan using clear headings and bullet points. Your primary goal is to provide a concise yet c...\"\n", "}\n", "```" ], @@ -1027,10 +1100,10 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", @@ -1045,7 +1118,7 @@ "\n", "\n", "node_2\n", - "\n", + "\n", "\n", "planner_node\n", "[message]\n", @@ -1063,41 +1136,41 @@ "\n", "node_1\n", "\n", - "\n", + "\n", "synth_prompt\n", "[parameter]\n", - "Synthesize the following plan into a single, direct, and cohesive factual exp...\n", + "Synthesize the following plan into a complete, polished, and factual summary....\n", "\n", "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea882b28a0>}\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7142116db290>}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1107,29 +1180,41 @@ "data": { "text/markdown": [ "## trace+otel+sysmon\n", - "- Runtime: `22.479s`\n", - "- Baseline score: `0.733`\n", - "- Best score: `0.733`\n", - "- Score gain: `0.000`\n", - "- Best iteration: `0`\n", - "- Post-update stability std: `0.000`\n", + "- Runtime: `89.682s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.876`\n", + "- Score gain: `0.034`\n", + "- Best iteration: `3`\n", + "- Post-update stability std: `0.038`\n", "- Node count: `9`\n", "- Edge count: `6`\n", - "- Score history: `[0.733, 0.733, 0.677]`\n", - "- Best updates: `[]`\n", + "- Score history: `[0.842, 0.842, 0.8, 0.876, 0.805, 0.788]`\n", + "- Best update keys: `['synth_prompt']`\n", "\n", - "### Final synth prompt\n", + "### Best-scoring synth prompt\n", "```text\n", - "Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\n", - "\n", - "Topic: {query}\n", + "Provide a concise and direct answer to the query by synthesizing the provided plan. \n", + "Ensure the response is structurally sound: all sentences must be complete, lists should be finalized, and any truncated information from the plan should be omitted or polished into a finished thought. Do not use introductory filler.\n", + "Query: {query}\n", "Plan: {plan}\n", + "```\n", + "### Final attempted synth prompt\n", + "```text\n", + "Provide a concise and direct answer to the query by synthesizing the provided plan. \n", "\n", - "Direct Response:\n", + "Your response must follow these rules:\n", + "1. All sentences must be complete and grammatically correct. \n", + "2. If the provided plan ends abruptly or contains incomplete bullet points, you must omit the unfinished point or rewrite it into a finished, logical sentence that concludes the thought.\n", + "3. Keep the synthesis focused strictly on the information provided in the plan.\n", + "4. You MUST wrap your entire response in <answer> and </answer> tags. Ensure the closing tag is present.\n", + "\n", + "Query: {query}\n", + "Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors\" or a \"search-and-replace\" function for the genome, allowing scientists to modify DNA with high precision. The system consists of two primary components: the Cas9 enzyme, which acts as the scissors to cut the DNA strand, and the Guide RNA (gRNA), which serves as a GPS to lead the enzyme to a specific genetic sequence....\n", + "\n", + "CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing tool that functions like \"molecular scissors\" or a \"search-and-replace\" feature for DNA. The system consists of two primary components: the Cas9 enzyme, which performs the physical cut of the genetic material, and the Guide RNA (gRNA), which acts as a GPS to direct the enzyme to a specific genetic sequence. This technology was adapted from a naturally occurring immune s...\n", "```" ], "text/plain": [ @@ -1144,13 +1229,13 @@ "text/markdown": [ "### backend trace\n", "- Semantic message names: `['planner_node', 'synth_node']`\n", - "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node15_copy', 'make_trace_case.synth_node']`\n", + "- All message names: `['final_answer_node', 'getitem', 'make_trace_case.planner_node', 'make_trace_case.planner_node51_copy', 'make_trace_case.synth_node']`\n", "- Parameter names: `['planner_prompt', 'synth_prompt']`\n", "\n", "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \\\"###\\\"), and do not use introductory or concluding remarks. Ensu...\"\n", + " \"synth_prompt\": \"Provide a concise and direct answer to the query by synthesizing the provided plan. \\n\\nYour response must follow these rules:\\n1. All sentences must be complete and grammatically correct. \\n2. If the provided plan ends a...\"\n", "}\n", "```" ], @@ -1170,10 +1255,10 @@ "\n", "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", "\n", "\n", "node_0\n", @@ -1189,44 +1274,44 @@ "\n", "node_1\n", "\n", - "\n", - "make_trace_case.planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "make_trace_case.planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_1\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2\n", "\n", - "\n", - "make_trace_case.synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea88285040>}\n", + "\n", + "make_trace_case.synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x714211664350>}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "make_trace_case.planner_node15_copy\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "make_trace_case.planner_node51_copy\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", "\n", "\n", "\n", @@ -1234,61 +1319,62 @@ "\n", "node_5\n", "\n", - "\n", - "getitem\n", - "[message]\n", - "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", + "\n", + "getitem\n", + "[message]\n", + "This plan outlines a concise, logical flow to explain CRISPR, moving from a b...\n", "\n", "\n", "\n", "\n", "\n", "node_3->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_4\n", "\n", - "\n", - "str\n", - "[value]\n", - "plan\n", + "\n", + "str\n", + "[value]\n", + "plan\n", "\n", "\n", "\n", "\n", "\n", "node_4->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_6\n", "\n", - "\n", - "final_answer_node\n", - "[message]\n", - "CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats a...\n", + "\n", + "final_answer_node\n", + "[message]\n", + "<answer>\n", + "CRISPR, which stands for Clustered Regularly Interspaced Short Palin...\n", "\n", "\n", "\n", "\n", "\n", "node_5->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_7\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a short plan for: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a short plan for: {query}\n", "\n", "\n", "\n", @@ -1296,24 +1382,24 @@ "\n", "node_8\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Respond to the topic using the provided plan. Your response must be extremely...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Provide a concise and direct answer to the query by synthesizing the provided...\n", "\n", "\n", "\n", "\n", "\n", "node_8->node_6\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1331,7 +1417,7 @@ "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \\\"###\\\"), and do not use introductory or concluding remarks. Ensu...\"\n", + " \"synth_prompt\": \"Provide a concise and direct answer to the query by synthesizing the provided plan. \\n\\nYour response must follow these rules:\\n1. All sentences must be complete and grammatically correct. \\n2. If the provided plan ends a...\"\n", "}\n", "```" ], @@ -1351,10 +1437,10 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", @@ -1369,59 +1455,59 @@ "\n", "\n", "node_2\n", - "\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': <opto.trace.nodes.MessageNode object at ...\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_1\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Respond to the topic using the provided plan. Your response must be extremely...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Provide a concise and direct answer to the query by synthesizing the provided...\n", "\n", "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': <opto.trace.nodes.MessageNode object at 0x7fea88285040>}\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': <opto.trace.nodes.MessageNode object at 0x714211664350>}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1440,7 +1526,7 @@ "```json\n", "{\n", " \"planner_prompt\": \"Create a short plan for: {query}\",\n", - " \"synth_prompt\": \"Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \\\"###\\\"), and do not use introductory or concluding remarks. Ensu...\"\n", + " \"synth_prompt\": \"Provide a concise and direct answer to the query by synthesizing the provided plan. \\n\\nYour response must follow these rules:\\n1. All sentences must be complete and grammatically correct. \\n2. If the provided plan ends a...\"\n", "}\n", "```" ], @@ -1460,10 +1546,10 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", @@ -1479,58 +1565,58 @@ "\n", "node_1\n", "\n", - "\n", - "planner_node\n", - "[message]\n", - "{'message_id': 'planner_node'}\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': 'planner_node'}\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_1\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'message_id': 'synth_node'}\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': 'synth_node'}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Respond to the topic using the provided plan. Your response must be extremely...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Provide a concise and direct answer to the query by synthesizing the provided...\n", "\n", "\n", "\n", "\n", "\n", "node_2->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1540,36 +1626,40 @@ "data": { "text/markdown": [ "## otel+sysmon\n", - "- Runtime: `21.463s`\n", - "- Baseline score: `0.733`\n", - "- Best score: `0.915`\n", - "- Score gain: `0.182`\n", - "- Best iteration: `2`\n", - "- Post-update stability std: `0.000`\n", + "- Runtime: `74.301s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.842`\n", + "- Score gain: `0.000`\n", + "- Best iteration: `0`\n", + "- Post-update stability std: `0.022`\n", "- Node count: `6`\n", "- Edge count: `5`\n", - "- Score history: `[0.733, 0.733, 0.915]`\n", - "- Best updates: `['planner_prompt', 'synth_prompt']`\n", + "- Score history: `[0.842, 0.842, 0.712, 0.72, 0.737, 0.675]`\n", + "- Best update keys: `[]`\n", "\n", - "### Final synth prompt\n", + "### Best-scoring synth prompt\n", "```text\n", - "Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \n", - "\n", - "Instructions:\n", - "- Expand on each point of the plan with accurate details.\n", - "- Use a professional yet accessible tone.\n", - "- Ensure smooth transitions between the definition, components, and significance sections.\n", - "- Conclude with a summary of the technology's impact.\n", + "Answer briefly and factually: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final attempted synth prompt\n", + "```text\n", + "Integrating every detail from the provided plan, write a sophisticated, single-paragraph academic narrative that answers: \"{query}\".\n", "\n", - "Plan:\n", + "Plan: \n", "{plan}\n", + "\n", + "Requirements:\n", + "- Strictly no lists, bullet points, or section headings.\n", + "- Weave the biological origins and molecular mechanics seamlessly into the discussion of modern applications.\n", + "- Use advanced vocabulary and maintain a high-level academic tone.\n", + "- Ensure every technical term and clinical/agricultural impact mentioned in the plan is included in the narrative.\n", + "\n", + "Response:\n", "```\n", "### Final answer\n", "```text\n", - "To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR has transformed from an obscure bacterial defense mechanism into a powerful tool for rewriting the code of life.\n", - "\n", - "### I. Definition and Etymology: The Blueprint of the System\n", - "At its most basic level, **CRISPR** stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. While the name is a mouthful, it de...\n", + "CRISPR, an acronym for Clustered Regularly Interspaced Short Palindromic Repeats, represents a sophisticated mechanism of adaptive immunity evolved by prokaryotes to archive a genetic memory of bacteriophage encounters by integrating viral fragments as spacers within their own genomic arrays. This biological defense system functions through a precise molecular surveillance apparatus wherein an effector complex scans for a Protospacer Adjacent Motif (PAM)—a critical binding signal that ensures...\n", "```" ], "text/plain": [ @@ -1591,8 +1681,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...\",\n", - " \"synth_prompt\": \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"\n", + " \"planner_prompt\": \"Create a detailed yet concise 4-point technical outline to explain \\\"{query}\\\". \\nEnsure technical accuracy by mentioning specific biological components.\\nStructure:\\n1. Biological Origin: Define the system and its evoluti...\",\n", + " \"synth_prompt\": \"Integrating every detail from the provided plan, write a sophisticated, single-paragraph academic narrative that answers: \\\"{query}\\\".\\n\\nPlan: \\n{plan}\\n\\nRequirements:\\n- Strictly no lists, bullet points, or section heading...\"\n", "}\n", "```" ], @@ -1612,111 +1702,111 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a detailed and structured outline for a comprehensive answer to the qu...\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a detailed yet concise 4-point technical outline to explain "{query}"....\n", "\n", "\n", "\n", "\n", "\n", "node_1\n", - "\n", - "\n", - "planner_node\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_1\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2\n", - "\n", - "\n", - "llm.chat.completion\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_4\n", - "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_4\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_3\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Act as an expert scientific communicator. Using the detailed plan provided be...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Integrating every detail from the provided plan, write a sophisticated, singl...\n", "\n", "\n", "\n", "\n", "\n", "node_3->node_4\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_5\n", - "\n", - "\n", - "llm.chat.completion\n", - "[message]\n", - "{'message_id': None}\n", + "\n", + "\n", + "llm.chat.completion\n", + "[message]\n", + "{'message_id': None}\n", "\n", "\n", "\n", "\n", "\n", "node_4->node_5\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1733,8 +1823,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...\",\n", - " \"synth_prompt\": \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"\n", + " \"planner_prompt\": \"Create a detailed yet concise 4-point technical outline to explain \\\"{query}\\\". \\nEnsure technical accuracy by mentioning specific biological components.\\nStructure:\\n1. Biological Origin: Define the system and its evoluti...\",\n", + " \"synth_prompt\": \"Integrating every detail from the provided plan, write a sophisticated, single-paragraph academic narrative that answers: \\\"{query}\\\".\\n\\nPlan: \\n{plan}\\n\\nRequirements:\\n- Strictly no lists, bullet points, or section heading...\"\n", "}\n", "```" ], @@ -1754,77 +1844,77 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Create a detailed and structured outline for a comprehensive answer to the qu...\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a detailed yet concise 4-point technical outline to explain "{query}"....\n", "\n", "\n", "\n", "\n", "\n", "node_2\n", - "\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'This outline provides a structured fram...\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'Here is a technical outline explaining ...\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_1\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Act as an expert scientific communicator. Using the detailed plan provided be...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Integrating every detail from the provided plan, write a sophisticated, singl...\n", "\n", "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': 'To understand the future of medicine and biotechnology, one...\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': 'CRISPR, an acronym for Clustered Regularly Interspaced Shor...\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1834,31 +1924,34 @@ "data": { "text/markdown": [ "## sysmon\n", - "- Runtime: `20.267s`\n", - "- Baseline score: `0.733`\n", - "- Best score: `0.854`\n", - "- Score gain: `0.121`\n", + "- Runtime: `62.266s`\n", + "- Baseline score: `0.842`\n", + "- Best score: `0.930`\n", + "- Score gain: `0.088`\n", "- Best iteration: `2`\n", "- Post-update stability std: `0.000`\n", "- Node count: `4`\n", "- Edge count: `3`\n", - "- Score history: `[0.733, 0.733, 0.854]`\n", - "- Best updates: `['planner_prompt', 'synth_prompt']`\n", + "- Score history: `[0.842, 0.842, 0.93, 0.93, 0.93, 0.93]`\n", + "- Best update keys: `['planner_prompt', 'synth_prompt']`\n", "\n", - "### Final synth prompt\n", + "### Best-scoring synth prompt\n", "```text\n", - "Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\n", - "\n", - "Query: {query}\n", + "Answer briefly and factually using a clear, structured bulleted list: {query}\n", + "Plan: {plan}\n", + "```\n", + "### Final attempted synth prompt\n", + "```text\n", + "Answer briefly and factually using a clear, structured bulleted list: {query}\n", "Plan: {plan}\n", "```\n", "### Final answer\n", "```text\n", - "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is adapted from a naturally occurring immune system in bacteria used to destroy the DNA of invading viruses.\n", + "CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology. Here is a factual breakdown of its nature and function:\n", "\n", - "The system, commonly known as CRISPR-Cas9, consists of two main components:\n", - "* **Cas9:** An enzyme that acts as \"molecular scissors\" to cut the DNA.\n", - "* **Guide RNA (gRNA):** A pre-designed RNA sequence that leads t...\n", + "### **The Conceptual Foundation**\n", + "* **Definition:** CRISPR is a molecular tool used to precisely edit DNA. It acts like a \"search-and-replace\" function for the genome, allowing scientists to add, remove, or alter genetic material.\n", + "* **Biological Origin:** It is a natural immune system found in bacteria. Bacteria use CRISPR ...\n", "```" ], "text/plain": [ @@ -1879,8 +1972,8 @@ "\n", "```json\n", "{\n", - " \"planner_prompt\": \"Provide a concise list of key factual points to answer: {query}\",\n", - " \"synth_prompt\": \"Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\n...\"\n", + " \"planner_prompt\": \"Create a structured, step-by-step plan for: {query}\",\n", + " \"synth_prompt\": \"Answer briefly and factually using a clear, structured bulleted list: {query}\\nPlan: {plan}\"\n", "}\n", "```" ], @@ -1900,990 +1993,88 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "\n", "node_0\n", "\n", - "\n", - "planner_prompt\n", - "[parameter]\n", - "Provide a concise list of key factual points to answer: {query}\n", + "\n", + "planner_prompt\n", + "[parameter]\n", + "Create a structured, step-by-step plan for: {query}\n", "\n", "\n", "\n", "\n", "\n", "node_2\n", - "\n", - "\n", - "planner_node\n", - "[message]\n", - "{'query': 'What is CRISPR?', 'plan': 'Here is a concise list of key factual p...\n", + "\n", + "\n", + "planner_node\n", + "[message]\n", + "{'query': 'What is CRISPR?', 'plan': 'To explain **CRISPR** effectively, one ...\n", "\n", "\n", "\n", "\n", "\n", "node_0->node_2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_1\n", "\n", - "\n", - "synth_prompt\n", - "[parameter]\n", - "Give a direct, factual answer to the query using the provided plan. Do not in...\n", + "\n", + "synth_prompt\n", + "[parameter]\n", + "Answer briefly and factually using a clear, structured bulleted list: {query}...\n", "\n", "\n", "\n", "\n", "\n", "node_3\n", - "\n", - "\n", - "synth_node\n", - "[message]\n", - "{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic R...\n", + "\n", + "\n", + "synth_node\n", + "[message]\n", + "{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic R...\n", "\n", "\n", "\n", "\n", "\n", "node_1->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "node_2->node_3\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, "output_type": "display_data" }, { - "data": { - "text/plain": [ - "[{'config': 'trace',\n", - " 'runtime_s': 22.643,\n", - " 'baseline_score': 0.733,\n", - " 'best_score': 0.803,\n", - " 'score_gain': 0.07,\n", - " 'best_iteration': 2,\n", - " 'score_history': [0.733, 0.733, 0.803],\n", - " 'stability_std': 0.0,\n", - " 'node_count': 9,\n", - " 'edge_count': 6,\n", - " 'best_updates': {'synth_prompt': 'Provide a direct, factual summary for the following query. Do not include introductory text, headers, or structural labels. Use the provided plan for factual content only: {query}\\nSource Content: {plan}'},\n", - " 'final_synth_prompt': 'Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represented as a distinct part of the summary.\\n\\nQuery: {query}\\nSource Plan: {plan}',\n", - " 'final_answer': 'CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, origins, and the mechanisms that make it a transformative tool in modern science.\\n\\n### **1. The Definition (The \"What\")**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. In practical terms, it is often described as \"molecular scissors\" or a \"search-and-replace\" function for the genome. The system consists of two primary components that allow it to edit genetic code with high precision:\\n* **Cas9:** An enzyme that acts as the \"scissors,\" capable of cutting through strands of DNA at a specific location.\\n* **Guide RNA (gRNA):** A small piece of pre-designed RNA sequence that acts as a \"GPS.\" It binds to the Cas9 enzyme and leads it to the exact genetic sequence that needs to be modified.\\n\\n### **2. The Origin (The \"Where\")**\\nWhile',\n", - " 'answer_preview': 'CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, orig...',\n", - " 'observers': [],\n", - " 'views': [{'carrier': 'trace',\n", - " 'origin': 'backend',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'trace',\n", - " 'graph_id': 'trace',\n", - " 'scope': 'trace/backend',\n", - " 'nodes': [{'id': 'state',\n", - " 'kind': 'value',\n", - " 'name': 'state',\n", - " 'value': {'query': 'What is CRISPR?'},\n", - " 'description': '[Node]'},\n", - " {'id': 'make_trace_case.planner_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {'in_0': {'ref': 'state'}},\n", - " 'output': {'name': 'make_trace_case.planner_node:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'make_trace_case.synth_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.synth_node',\n", - " 'op': 'make_trace_case.synth_node',\n", - " 'description': '[make_trace_case.synth_node]',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", - " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", - " {'id': 'make_trace_case.planner_node3_copy',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node3_copy',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {},\n", - " 'output': {'name': 'make_trace_case.planner_node3_copy:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'str',\n", - " 'kind': 'value',\n", - " 'name': 'str',\n", - " 'value': 'plan',\n", - " 'description': '[Node]'},\n", - " {'id': 'getitem',\n", - " 'kind': 'message',\n", - " 'name': 'getitem',\n", - " 'op': 'getitem',\n", - " 'description': '[getitem] This is a getitem operator of x based on index.',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node3_copy'},\n", - " 'in_1': {'ref': 'str'}},\n", - " 'output': {'name': 'getitem:out',\n", - " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", - " {'id': 'final_answer_node',\n", - " 'kind': 'message',\n", - " 'name': 'final_answer_node',\n", - " 'op': 'llm',\n", - " 'description': '[llm] synth',\n", - " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", - " 'output': {'name': 'final_answer_node:out',\n", - " 'value': 'CRISPR is a revolutionary gene-editing technology that allows scientists to make precise changes to the DNA of living organisms. The following summary details its function, origins, and the mechanisms that make it a transformative tool in modern science.\\n\\n### **1. The Definition (The \"What\")**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. In practical terms, it is often described as \"molecular scissors\" or a \"search-and-replace\" function for the genome. The system consists of two primary components that allow it to edit genetic code with high precision:\\n* **Cas9:** An enzyme that acts as the \"scissors,\" capable of cutting through strands of DNA at a specific location.\\n* **Guide RNA (gRNA):** A small piece of pre-designed RNA sequence that acts as a \"GPS.\" It binds to the Cas9 enzyme and leads it to the exact genetic sequence that needs to be modified.\\n\\n### **2. The Origin (The \"Where\")**\\nWhile'}},\n", - " {'id': 'planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'},\n", - " {'id': 'synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': 'Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represented as a distinct part of the summary.\\n\\nQuery: {query}\\nSource Plan: {plan}',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 9,\n", - " 'message_names': ['final_answer_node',\n", - " 'getitem',\n", - " 'make_trace_case.planner_node',\n", - " 'make_trace_case.planner_node3_copy',\n", - " 'make_trace_case.synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Provide a direct, factual summary for the following query. Organize the information into clear sections or paragraphs following the logical flow of the provided plan. Ensure each main point from the plan is represente...'}}}]},\n", - " {'config': 'trace+otel',\n", - " 'runtime_s': 20.681,\n", - " 'baseline_score': 0.733,\n", - " 'best_score': 0.733,\n", - " 'score_gain': 0.0,\n", - " 'best_iteration': 0,\n", - " 'score_history': [0.733, 0.733, 0.677],\n", - " 'stability_std': 0.0,\n", - " 'node_count': 9,\n", - " 'edge_count': 6,\n", - " 'best_updates': {},\n", - " 'final_synth_prompt': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'final_answer': '**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace\" tool for DNA. It consists of two primary components: **Cas9**, an enzyme that cuts the DNA strands, and **Guide RNA (gRNA)**, a programmed sequence that directs the Cas9 enzyme to a specific location in the genome.\\n\\nThe system was adapted from a naturally occurring immune mechanism in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological process into a versatile laboratory tool capable of precisely modifying the genetic code of living organisms.\\n\\nIn practice, CRISPR is used to disable specific genes, correct harmful mutations, or insert new genetic material. Its applications include treating genetic disorders like sickle cell anemia, developing pest-resistant crops, and advancing cancer immunotherapy. However, the technology raises significant ethical considerations, particularly regarding \"germline editing\"—modifications to human embryos that are heritable—and the potential for unintended',\n", - " 'answer_preview': '**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace...',\n", - " 'observers': ['otel'],\n", - " 'views': [{'carrier': 'trace',\n", - " 'origin': 'backend',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'trace+otel',\n", - " 'graph_id': 'trace',\n", - " 'scope': 'trace+otel/backend',\n", - " 'nodes': [{'id': 'state',\n", - " 'kind': 'value',\n", - " 'name': 'state',\n", - " 'value': {'query': 'What is CRISPR?'},\n", - " 'description': '[Node]'},\n", - " {'id': 'make_trace_case.planner_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {'in_0': {'ref': 'state'}},\n", - " 'output': {'name': 'make_trace_case.planner_node:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'make_trace_case.synth_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.synth_node',\n", - " 'op': 'make_trace_case.synth_node',\n", - " 'description': '[make_trace_case.synth_node]',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", - " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", - " {'id': 'make_trace_case.planner_node7_copy',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node7_copy',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {},\n", - " 'output': {'name': 'make_trace_case.planner_node7_copy:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'str',\n", - " 'kind': 'value',\n", - " 'name': 'str',\n", - " 'value': 'plan',\n", - " 'description': '[Node]'},\n", - " {'id': 'getitem',\n", - " 'kind': 'message',\n", - " 'name': 'getitem',\n", - " 'op': 'getitem',\n", - " 'description': '[getitem] This is a getitem operator of x based on index.',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node7_copy'},\n", - " 'in_1': {'ref': 'str'}},\n", - " 'output': {'name': 'getitem:out',\n", - " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", - " {'id': 'final_answer_node',\n", - " 'kind': 'message',\n", - " 'name': 'final_answer_node',\n", - " 'op': 'llm',\n", - " 'description': '[llm] synth',\n", - " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", - " 'output': {'name': 'final_answer_node:out',\n", - " 'value': '**CRISPR** (Clustered Regularly Interspaced Short Palindromic Repeats) is a revolutionary gene-editing technology that functions as \"molecular scissors\" or a \"search-and-replace\" tool for DNA. It consists of two primary components: **Cas9**, an enzyme that cuts the DNA strands, and **Guide RNA (gRNA)**, a programmed sequence that directs the Cas9 enzyme to a specific location in the genome.\\n\\nThe system was adapted from a naturally occurring immune mechanism in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological process into a versatile laboratory tool capable of precisely modifying the genetic code of living organisms.\\n\\nIn practice, CRISPR is used to disable specific genes, correct harmful mutations, or insert new genetic material. Its applications include treating genetic disorders like sickle cell anemia, developing pest-resistant crops, and advancing cancer immunotherapy. However, the technology raises significant ethical considerations, particularly regarding \"germline editing\"—modifications to human embryos that are heritable—and the potential for unintended'}},\n", - " {'id': 'planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'},\n", - " {'id': 'synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 9,\n", - " 'message_names': ['final_answer_node',\n", - " 'getitem',\n", - " 'make_trace_case.planner_node',\n", - " 'make_trace_case.planner_node7_copy',\n", - " 'make_trace_case.synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...'}}},\n", - " {'carrier': 'otel',\n", - " 'origin': 'observer',\n", - " 'doc': {'version': 'trace-json/1.0+otel',\n", - " 'agent': {'id': 'trace+otel', 'service': 'trace+otel'},\n", - " 'otel_meta': {'trace_id': 'ae6f7b5943ea2cfa381a0076f0df71b7'},\n", - " 'nodes': {'trace+otel:param_planner_prompt': {'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'data': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '6ce26b54d4074ce5'}},\n", - " 'id': 'trace+otel:param_planner_prompt'},\n", - " 'trace+otel:planner_node': {'kind': 'msg',\n", - " 'name': 'planner_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'param_planner_prompt': 'trace+otel:param_planner_prompt'},\n", - " 'data': {'message_id': 'planner_node'},\n", - " 'info': {'otel': {'trace_id': 'b0aacf4373aa7b7b9f5b9e92d326ee3c',\n", - " 'span_id': '6ce26b54d4074ce5',\n", - " 'parent_span_id': '',\n", - " 'service': 'trace+otel',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'trace+otel:planner_node'},\n", - " 'trace+otel:param_synth_prompt': {'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'data': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Present the information concisely.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '4c8072c4f5e168e0'}},\n", - " 'id': 'trace+otel:param_synth_prompt'},\n", - " 'trace+otel:synth_node': {'kind': 'msg',\n", - " 'name': 'synth_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'trace+otel:planner_node',\n", - " 'param_synth_prompt': 'trace+otel:param_synth_prompt'},\n", - " 'data': {'message_id': 'synth_node'},\n", - " 'info': {'otel': {'trace_id': 'ae6f7b5943ea2cfa381a0076f0df71b7',\n", - " 'span_id': '4c8072c4f5e168e0',\n", - " 'parent_span_id': '6ce26b54d4074ce5',\n", - " 'service': 'trace+otel',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'trace+otel:synth_node'}},\n", - " 'context': {}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['planner_node', 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Provide a direct, factual explanation of the topic based on the provided plan. Start your response immediately with the definition. Do not use conversational filler, introductory remarks, or concluding summaries. Pres...'},\n", - " 'span_count': 2,\n", - " 'span_names': ['planner_node', 'synth_node'],\n", - " 'param_keys': ['param.planner_prompt',\n", - " 'param.planner_prompt.trainable',\n", - " 'param.synth_prompt',\n", - " 'param.synth_prompt.trainable']}}]},\n", - " {'config': 'otel',\n", - " 'runtime_s': 21.909,\n", - " 'baseline_score': 0.733,\n", - " 'best_score': 0.837,\n", - " 'score_gain': 0.103,\n", - " 'best_iteration': 2,\n", - " 'score_history': [0.733, 0.733, 0.837],\n", - " 'stability_std': 0.0,\n", - " 'node_count': 6,\n", - " 'edge_count': 5,\n", - " 'best_updates': {'planner_prompt': 'As an expert scientific researcher, create a detailed step-by-step plan to explain the following topic: {query}. The plan should cover the definition, biological mechanism, and real-world applications.',\n", - " 'synth_prompt': \"Using the plan provided below, write a clear and informative response to the user's query: {query}. Ensure you follow the logical flow of the plan and explain technical terms simply.\\n\\nPlan:\\n{plan}\"},\n", - " 'final_synth_prompt': \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\\n\\nPlan:\\n{plan}\",\n", - " 'final_answer': '### **I. Definition and Historical Context**\\n\\n#### **A. Technical Definition**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. To visualize this, imagine a long sentence where the same word is repeated over and over, but between those repeats are unique \"spacer\" sequences. In biological terms, these are specific patterns of DNA nucleotides (the building blocks of life) found in the genomes of microorganisms.\\n\\n#### **B. The Biological Origin**\\nWhile we now think of CRISPR as a laboratory tool, it actually evolved billions of years ago as a microscopic immune system. Bacteria and archaea are constantly under attack by viruses called bacteriophages. When a bacterium survives a viral attack, it takes a \"snapshot\" of the virus\\'s DNA and tucks it into its own genome within those \"spacer\" regions mentioned above. This acts as a molecular \"Most Wanted\" poster, allowing the bacterium to recognize and defend itself if that specific virus ever attacks again.\\n\\n#### **C. The \"Genetic Scissors\" Breakthrough**',\n", - " 'answer_preview': '### **I. Definition and Historical Context**\\n\\n#### **A. Technical Definition**\\nCRISPR stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. To visualize this...',\n", - " 'observers': [],\n", - " 'views': [{'carrier': 'otel',\n", - " 'origin': 'backend',\n", - " 'doc': {'version': 'trace-json/1.0+otel',\n", - " 'agent': {'id': 'otel', 'service': 'otel'},\n", - " 'otel_meta': {'trace_id': '458c673d2635aefab1f6f63c396f7a94'},\n", - " 'nodes': {'otel:param_planner_prompt': {'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'data': 'As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: {query}. The outline must include: 1) A clear definition and historical context, 2) The step-by-step biological mechanism, 3) Notable real-world applications in medicine or agriculture, and 4) Future implications. Ensure the plan is concise yet covers all technical essentials.',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': 'c924fc7efd52a8be'}},\n", - " 'id': 'otel:param_planner_prompt'},\n", - " 'otel:c924fc7efd52a8be': {'kind': 'msg',\n", - " 'name': 'planner_node',\n", - " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: What is CRISPR?. The outline must include: 1) A clear definition and historical context, 2) The step-by-step biological mechanism, 3) Notable real-world applications in medicine or agriculture, and 4) Future implications. Ensure the plan is concise yet covers all technical essentials.',\n", - " 'user_query': 'otel:What is CRISPR?',\n", - " 'query': 'otel:What is CRISPR?',\n", - " 'param_planner_prompt': 'otel:param_planner_prompt'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", - " 'span_id': 'c924fc7efd52a8be',\n", - " 'parent_span_id': None,\n", - " 'service': 'otel',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'otel:c924fc7efd52a8be'},\n", - " 'otel:cbda36bf7b379ea8': {'kind': 'msg',\n", - " 'name': 'llm.chat.completion',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:c924fc7efd52a8be'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", - " 'span_id': 'cbda36bf7b379ea8',\n", - " 'parent_span_id': 'c924fc7efd52a8be',\n", - " 'service': 'otel',\n", - " 'temporal_ignore': True}},\n", - " 'id': 'otel:cbda36bf7b379ea8'},\n", - " 'otel:param_synth_prompt': {'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'data': \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\\n\\nPlan:\\n{plan}\",\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '9d321598c5483758'}},\n", - " 'id': 'otel:param_synth_prompt'},\n", - " 'otel:9d321598c5483758': {'kind': 'msg',\n", - " 'name': 'synth_node',\n", - " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user\\'s query: What is CRISPR?. \\n\\nFollow the plan strictly, using its sections as headers for your response. Explain all technical concepts (like enzymes or DNA sequences) in a way that remains accessible to a non-expert audience while maintaining scientific accuracy.\\n\\nPlan:\\nThis strategic outline provides a high-level technical overview of CRISPR-Cas9 technology, structured for clarity, scientific accuracy, and forward-looking analysis.\\n\\n---\\n\\n### **Strategic Outline: Understanding CRISPR-Cas9**\\n\\n#### **I. Definition and Historical Context**\\n* **A. Technical Definition:** CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a specialized region of DNA characterized by two distinct traits: the presence of nucleotide repeats and spacers.\\n* **B. The Biological Origin:** Originally discovered as an adaptive immune system in bacteria and archaea used to detect and destroy DNA from invading bacteriophages (viruses).\\n* **C. The \"Genetic Scissors\" Breakthrough:** \\n * Transition from a bacterial defense mechanism to a programmable genome-editing tool.\\n * Key Milestone: The 2012 Doudna-Charpentier publication and subsequent 2020 Nobel Prize in Chemistry.\\n* **D. Comparison to Legacy Tools:** Why CRISPR is superior to previous',\n", - " 'user_query': 'otel:What is CRISPR?',\n", - " 'query': 'otel:What is CRISPR?',\n", - " 'plan': 'This strategic outline provides a high-level technical overview of CRISPR-Cas9 technology, structured for clarity, scientific accuracy, and forward-looking analysis.\\n\\n---\\n\\n### **Strategic Outline: Understanding CRISPR-Cas9**\\n\\n#### **I. Definition and Historical Context**\\n* **A. Technical Definition:** CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a specialized region of DNA characterized by two distinct traits: the presence of nucleotide repeats and spacers.\\n* **B. The Biological Origin:** Originally discovered as an adaptive immune system in bacteria and archaea used to detect and destroy DNA from invading bacteriophages (viruses).\\n* **C. The \"Genetic Scissors\" Breakthrough:** \\n * Transition from a bacterial defense mechanism to a programmable genome-editing tool.\\n * Key Milestone: The 2012 Doudna-Charpentier publication and subsequent 2020 Nobel Prize in Chemistry.\\n* **D. Comparison to Legacy Tools:** Why CRISPR is superior to previous',\n", - " 'parent': 'otel:c924fc7efd52a8be',\n", - " 'param_synth_prompt': 'otel:param_synth_prompt'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", - " 'span_id': '9d321598c5483758',\n", - " 'parent_span_id': 'c924fc7efd52a8be',\n", - " 'service': 'otel',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'otel:9d321598c5483758'},\n", - " 'otel:aae6d65b3ea4da18': {'kind': 'msg',\n", - " 'name': 'llm.chat.completion',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel:9d321598c5483758'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '458c673d2635aefab1f6f63c396f7a94',\n", - " 'span_id': 'aae6d65b3ea4da18',\n", - " 'parent_span_id': '9d321598c5483758',\n", - " 'service': 'otel',\n", - " 'temporal_ignore': True}},\n", - " 'id': 'otel:aae6d65b3ea4da18'}},\n", - " 'context': {}},\n", - " 'summary': {'node_count': 6,\n", - " 'message_names': ['llm.chat.completion',\n", - " 'llm.chat.completion',\n", - " 'planner_node',\n", - " 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'As an expert scientific strategist, create a comprehensive and structured outline to explain the following topic: {query}. The outline must include: 1) A clear definition and historical context, 2) The step-by-step bi...',\n", - " 'synth_prompt': \"You are a science communicator. Using the structured plan provided below, write a thorough, engaging, and clear response to the user's query: {query}. \\n\\nFollow the plan strictly, using its sections as headers for your...\"},\n", - " 'span_count': 5,\n", - " 'span_names': ['llm.chat.completion',\n", - " 'planner_node',\n", - " 'llm.chat.completion',\n", - " 'synth_node',\n", - " 'langgraph-agent.invoke'],\n", - " 'param_keys': ['param.planner_prompt',\n", - " 'param.planner_prompt.trainable',\n", - " 'param.synth_prompt',\n", - " 'param.synth_prompt.trainable']}}]},\n", - " {'config': 'trace+sysmon',\n", - " 'runtime_s': 18.639,\n", - " 'baseline_score': 0.733,\n", - " 'best_score': 0.733,\n", - " 'score_gain': 0.0,\n", - " 'best_iteration': 0,\n", - " 'score_history': [0.733, 0.733, 0.65],\n", - " 'stability_std': 0.0,\n", - " 'node_count': 9,\n", - " 'edge_count': 6,\n", - " 'best_updates': {},\n", - " 'final_synth_prompt': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or a search-and-replace tool for DNA. The system relies on two primary components: the Cas9 enzyme, which performs the physical cutting of the genetic material, and a guide RNA molecule that acts as a GPS to direct the enzyme to a precise location within the genome. This mechanism was originally adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. By repurposing this biological defense, scientists can now target specific genes in plants, animals, and humans to disable harmful sequences or insert beneficial ones. This capability has led to significant breakthroughs in medicine and agriculture, such as developing treatments for genetic disorders like sickle cell anemia and creating crops that are more resistant to pests or climate change. Despite its immense potential for curing diseases and improving food security, the technology also prompts critical ethical discussions regarding the long-term consequences of permanent genetic alterations and the',\n", - " 'answer_preview': 'CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or ...',\n", - " 'observers': ['sysmon'],\n", - " 'views': [{'carrier': 'trace',\n", - " 'origin': 'backend',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'trace+sysmon',\n", - " 'graph_id': 'trace',\n", - " 'scope': 'trace+sysmon/backend',\n", - " 'nodes': [{'id': 'state',\n", - " 'kind': 'value',\n", - " 'name': 'state',\n", - " 'value': {'query': 'What is CRISPR?'},\n", - " 'description': '[Node]'},\n", - " {'id': 'make_trace_case.planner_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {'in_0': {'ref': 'state'}},\n", - " 'output': {'name': 'make_trace_case.planner_node:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'make_trace_case.synth_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.synth_node',\n", - " 'op': 'make_trace_case.synth_node',\n", - " 'description': '[make_trace_case.synth_node]',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", - " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", - " {'id': 'make_trace_case.planner_node11_copy',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node11_copy',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {},\n", - " 'output': {'name': 'make_trace_case.planner_node11_copy:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'str',\n", - " 'kind': 'value',\n", - " 'name': 'str',\n", - " 'value': 'plan',\n", - " 'description': '[Node]'},\n", - " {'id': 'getitem',\n", - " 'kind': 'message',\n", - " 'name': 'getitem',\n", - " 'op': 'getitem',\n", - " 'description': '[getitem] This is a getitem operator of x based on index.',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node11_copy'},\n", - " 'in_1': {'ref': 'str'}},\n", - " 'output': {'name': 'getitem:out',\n", - " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", - " {'id': 'final_answer_node',\n", - " 'kind': 'message',\n", - " 'name': 'final_answer_node',\n", - " 'op': 'llm',\n", - " 'description': '[llm] synth',\n", - " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", - " 'output': {'name': 'final_answer_node:out',\n", - " 'value': 'CRISPR, which stands for Clustered Regularly Interspaced Short Palindromic Repeats, is a revolutionary gene-editing technology that functions as a pair of molecular scissors or a search-and-replace tool for DNA. The system relies on two primary components: the Cas9 enzyme, which performs the physical cutting of the genetic material, and a guide RNA molecule that acts as a GPS to direct the enzyme to a precise location within the genome. This mechanism was originally adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. By repurposing this biological defense, scientists can now target specific genes in plants, animals, and humans to disable harmful sequences or insert beneficial ones. This capability has led to significant breakthroughs in medicine and agriculture, such as developing treatments for genetic disorders like sickle cell anemia and creating crops that are more resistant to pests or climate change. Despite its immense potential for curing diseases and improving food security, the technology also prompts critical ethical discussions regarding the long-term consequences of permanent genetic alterations and the'}},\n", - " {'id': 'planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'},\n", - " {'id': 'synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 9,\n", - " 'message_names': ['final_answer_node',\n", - " 'getitem',\n", - " 'make_trace_case.planner_node',\n", - " 'make_trace_case.planner_node11_copy',\n", - " 'make_trace_case.synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...'}}},\n", - " {'carrier': 'sysmon',\n", - " 'origin': 'observer',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'langgraph-agent-sysmon-observer',\n", - " 'graph_id': 'trace+sysmon',\n", - " 'scope': 'trace+sysmon/observer',\n", - " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on delivering the facts immediately.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'msg:bb8e9c312f87481c': {'id': 'msg:bb8e9c312f87481c',\n", - " 'kind': 'message',\n", - " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:655',\n", - " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", - " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2570870185,\n", - " 'thread_id': 140646174676800}}},\n", - " 'msg:610f74037f4441f9': {'id': 'msg:610f74037f4441f9',\n", - " 'kind': 'message',\n", - " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:666',\n", - " 'inputs': {'parent': {'ref': 'msg:bb8e9c312f87481c'},\n", - " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", - " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2280707474,\n", - " 'thread_id': 140646174676800}}}}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['planner_node', 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Synthesize the following plan into a single, direct, and cohesive factual explanation. Provide the information as a continuous response without using bold headers, bullet points, or introductory filler. Focus on deliv...'},\n", - " 'event_count': 2}}]},\n", - " {'config': 'trace+otel+sysmon',\n", - " 'runtime_s': 22.479,\n", - " 'baseline_score': 0.733,\n", - " 'best_score': 0.733,\n", - " 'score_gain': 0.0,\n", - " 'best_iteration': 0,\n", - " 'score_history': [0.733, 0.733, 0.677],\n", - " 'stability_std': 0.0,\n", - " 'node_count': 9,\n", - " 'edge_count': 6,\n", - " 'best_updates': {},\n", - " 'final_synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", - " 'final_answer': 'CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors\" or a \"search-and-replace\" function for the genome, allowing scientists to modify DNA with high precision. The system consists of two primary components: the Cas9 enzyme, which acts as the scissors to cut the DNA strand, and the Guide RNA (gRNA), which serves as a GPS to lead the enzyme to a specific genetic sequence. This technology was adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological mechanism into a versatile laboratory tool that can be programmed to target and edit the genetic code of virtually any organism.',\n", - " 'answer_preview': 'CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors...',\n", - " 'observers': ['sysmon', 'otel'],\n", - " 'views': [{'carrier': 'trace',\n", - " 'origin': 'backend',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'trace+otel+sysmon',\n", - " 'graph_id': 'trace',\n", - " 'scope': 'trace+otel+sysmon/backend',\n", - " 'nodes': [{'id': 'state',\n", - " 'kind': 'value',\n", - " 'name': 'state',\n", - " 'value': {'query': 'What is CRISPR?'},\n", - " 'description': '[Node]'},\n", - " {'id': 'make_trace_case.planner_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {'in_0': {'ref': 'state'}},\n", - " 'output': {'name': 'make_trace_case.planner_node:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'make_trace_case.synth_node',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.synth_node',\n", - " 'op': 'make_trace_case.synth_node',\n", - " 'description': '[make_trace_case.synth_node]',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node'}},\n", - " 'output': {'name': 'make_trace_case.synth_node:out',\n", - " 'value': {'final_answer': }}},\n", - " {'id': 'make_trace_case.planner_node15_copy',\n", - " 'kind': 'message',\n", - " 'name': 'make_trace_case.planner_node15_copy',\n", - " 'op': 'make_trace_case.planner_node',\n", - " 'description': '[make_trace_case.planner_node]',\n", - " 'inputs': {},\n", - " 'output': {'name': 'make_trace_case.planner_node15_copy:out',\n", - " 'value': {'query': 'What is CRISPR?',\n", - " 'plan': }}},\n", - " {'id': 'str',\n", - " 'kind': 'value',\n", - " 'name': 'str',\n", - " 'value': 'plan',\n", - " 'description': '[Node]'},\n", - " {'id': 'getitem',\n", - " 'kind': 'message',\n", - " 'name': 'getitem',\n", - " 'op': 'getitem',\n", - " 'description': '[getitem] This is a getitem operator of x based on index.',\n", - " 'inputs': {'in_0': {'ref': 'make_trace_case.planner_node15_copy'},\n", - " 'in_1': {'ref': 'str'}},\n", - " 'output': {'name': 'getitem:out',\n", - " 'value': 'This plan outlines a concise, logical flow to explain CRISPR, moving from a basic definition to its real-world applications and ethical considerations.\\n\\n### **Goal:** To explain CRISPR as a revolutionary gene-editing tool.\\n\\n---\\n\\n### **1. The Definition (The \"What\")**\\n* **Acronym:** Briefly define *Clustered Regularly Interspaced Short Palindromic Repeats*.\\n* **The Analogy:** Describe it as \"molecular scissors\" or a \"search-and-replace\" function for DNA.\\n* **The Components:**\\n * **Cas9:** The enzyme that acts as the scissors to cut DNA.\\n * **Guide RNA (gRNA):** The GPS that leads the scissors to a specific genetic sequence.\\n\\n### **2. The Origin (The \"Where\")**\\n* **Nature’s Blueprint:** Explain that CRISPR was adapted from a naturally occurring immune system in bacteria used to fight off viruses.\\n* **The Discovery:** Mention it was repurposed by scientists (not'}},\n", - " {'id': 'final_answer_node',\n", - " 'kind': 'message',\n", - " 'name': 'final_answer_node',\n", - " 'op': 'llm',\n", - " 'description': '[llm] synth',\n", - " 'inputs': {'in_0': {'ref': 'synth_prompt'}, 'in_1': {'ref': 'getitem'}},\n", - " 'output': {'name': 'final_answer_node:out',\n", - " 'value': 'CRISPR stands for Clustered Regularly Interspaced Short Palindromic Repeats and functions as a revolutionary gene-editing tool. It is frequently described as \"molecular scissors\" or a \"search-and-replace\" function for the genome, allowing scientists to modify DNA with high precision. The system consists of two primary components: the Cas9 enzyme, which acts as the scissors to cut the DNA strand, and the Guide RNA (gRNA), which serves as a GPS to lead the enzyme to a specific genetic sequence. This technology was adapted from a naturally occurring immune system in bacteria, which use CRISPR sequences to identify and destroy the DNA of invading viruses. Scientists repurposed this biological mechanism into a versatile laboratory tool that can be programmed to target and edit the genetic code of virtually any organism.'}},\n", - " {'id': 'planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'},\n", - " {'id': 'synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", - " 'trainable': True,\n", - " 'description': '[Parameter]'}]},\n", - " 'summary': {'node_count': 9,\n", - " 'message_names': ['final_answer_node',\n", - " 'getitem',\n", - " 'make_trace_case.planner_node',\n", - " 'make_trace_case.planner_node15_copy',\n", - " 'make_trace_case.synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensu...'}}},\n", - " {'carrier': 'sysmon',\n", - " 'origin': 'observer',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'langgraph-agent-sysmon-observer',\n", - " 'graph_id': 'trace+otel+sysmon',\n", - " 'scope': 'trace+otel+sysmon/observer',\n", - " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'msg:19ad98a644da4e36': {'id': 'msg:19ad98a644da4e36',\n", - " 'kind': 'message',\n", - " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:655',\n", - " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", - " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2147475553,\n", - " 'thread_id': 140646174676800}}},\n", - " 'msg:59f7465ae3024148': {'id': 'msg:59f7465ae3024148',\n", - " 'kind': 'message',\n", - " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:666',\n", - " 'inputs': {'parent': {'ref': 'msg:19ad98a644da4e36'},\n", - " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", - " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': }\"},\n", - " 'info': {'sysmon': {'duration_ns': 2146902050,\n", - " 'thread_id': 140646174676800}}}}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['planner_node', 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensu...'},\n", - " 'event_count': 2}},\n", - " {'carrier': 'otel',\n", - " 'origin': 'observer',\n", - " 'doc': {'version': 'trace-json/1.0+otel',\n", - " 'agent': {'id': 'trace+otel+sysmon', 'service': 'trace+otel+sysmon'},\n", - " 'otel_meta': {'trace_id': '33c59ff62b32e9179eb45cdbe86608a1'},\n", - " 'nodes': {'trace+otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'data': 'Create a short plan for: {query}',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '924e80198bcd1ad6'}},\n", - " 'id': 'trace+otel+sysmon:param_planner_prompt'},\n", - " 'trace+otel+sysmon:planner_node': {'kind': 'msg',\n", - " 'name': 'planner_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'param_planner_prompt': 'trace+otel+sysmon:param_planner_prompt'},\n", - " 'data': {'message_id': 'planner_node'},\n", - " 'info': {'otel': {'trace_id': 'b35f2b6cc88f0ff8f0800442600e52e7',\n", - " 'span_id': '924e80198bcd1ad6',\n", - " 'parent_span_id': '',\n", - " 'service': 'trace+otel+sysmon',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'trace+otel+sysmon:planner_node'},\n", - " 'trace+otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'data': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensure every sentence is complete and strictly follows the logical flow of the plan.\\n\\nTopic: {query}\\nPlan: {plan}\\n\\nDirect Response:',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '337b9311aabcc219'}},\n", - " 'id': 'trace+otel+sysmon:param_synth_prompt'},\n", - " 'trace+otel+sysmon:synth_node': {'kind': 'msg',\n", - " 'name': 'synth_node',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'trace+otel+sysmon:planner_node',\n", - " 'param_synth_prompt': 'trace+otel+sysmon:param_synth_prompt'},\n", - " 'data': {'message_id': 'synth_node'},\n", - " 'info': {'otel': {'trace_id': '33c59ff62b32e9179eb45cdbe86608a1',\n", - " 'span_id': '337b9311aabcc219',\n", - " 'parent_span_id': '924e80198bcd1ad6',\n", - " 'service': 'trace+otel+sysmon',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'trace+otel+sysmon:synth_node'}},\n", - " 'context': {}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['planner_node', 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a short plan for: {query}',\n", - " 'synth_prompt': 'Respond to the topic using the provided plan. Your response must be extremely direct: start immediately with the facts, remove all section headings (like \"###\"), and do not use introductory or concluding remarks. Ensu...'},\n", - " 'span_count': 2,\n", - " 'span_names': ['planner_node', 'synth_node'],\n", - " 'param_keys': ['param.planner_prompt',\n", - " 'param.planner_prompt.trainable',\n", - " 'param.synth_prompt',\n", - " 'param.synth_prompt.trainable']}}]},\n", - " {'config': 'otel+sysmon',\n", - " 'runtime_s': 21.463,\n", - " 'baseline_score': 0.733,\n", - " 'best_score': 0.915,\n", - " 'score_gain': 0.182,\n", - " 'best_iteration': 2,\n", - " 'score_history': [0.733, 0.733, 0.915],\n", - " 'stability_std': 0.0,\n", - " 'node_count': 6,\n", - " 'edge_count': 5,\n", - " 'best_updates': {'planner_prompt': 'Create an outline for a comprehensive yet concise answer to: {query}. The plan should include a definition, key components, and significance.',\n", - " 'synth_prompt': \"Using the plan provided below, provide a factual and clear answer to the user's query: {query}. Ensure all parts of the plan are addressed.\\n\\nPlan:\\n{plan}\"},\n", - " 'final_synth_prompt': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology's impact.\\n\\nPlan:\\n{plan}\",\n", - " 'final_answer': 'To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR has transformed from an obscure bacterial defense mechanism into a powerful tool for rewriting the code of life.\\n\\n### I. Definition and Etymology: The Blueprint of the System\\nAt its most basic level, **CRISPR** stands for **Clustered Regularly Interspaced Short Palindromic Repeats**. While the name is a mouthful, it describes a specific pattern of DNA found in the genomes of bacteria and archaea. \\n\\nThese patterns consist of short sequences of genetic code that read the same forward and backward (palindromes), separated by \"spacers\"—segments of DNA that the bacteria have \"stolen\" from viruses that previously attacked them. This serves as a molecular \"most wanted\" gallery, allowing the bacteria to recognize and defend against those viruses if they return.\\n\\nIn the laboratory, scientists have adapted this natural system into **CRISPR-Cas9**, a technology often referred',\n", - " 'answer_preview': 'To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR ...',\n", - " 'observers': ['sysmon'],\n", - " 'views': [{'carrier': 'otel',\n", - " 'origin': 'backend',\n", - " 'doc': {'version': 'trace-json/1.0+otel',\n", - " 'agent': {'id': 'otel+sysmon', 'service': 'otel+sysmon'},\n", - " 'otel_meta': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01'},\n", - " 'nodes': {'otel+sysmon:param_planner_prompt': {'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'data': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Specific real-world applications and significance.\\n4. Potential ethical considerations or future outlook.\\nEnsure the outline is logically sequenced and complete.',\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': '81dabfd0047ab7ed'}},\n", - " 'id': 'otel+sysmon:param_planner_prompt'},\n", - " 'otel+sysmon:81dabfd0047ab7ed': {'kind': 'msg',\n", - " 'name': 'planner_node',\n", - " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Create a detailed and structured outline for a comprehensive answer to the query: What is CRISPR?. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Specific real-world applications and significance.\\n4. Potential ethical considerations or future outlook.\\nEnsure the outline is logically sequenced and complete.',\n", - " 'user_query': 'otel+sysmon:What is CRISPR?',\n", - " 'query': 'otel+sysmon:What is CRISPR?',\n", - " 'param_planner_prompt': 'otel+sysmon:param_planner_prompt'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", - " 'span_id': '81dabfd0047ab7ed',\n", - " 'parent_span_id': None,\n", - " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'otel+sysmon:81dabfd0047ab7ed'},\n", - " 'otel+sysmon:6bd500faf953ce28': {'kind': 'msg',\n", - " 'name': 'llm.chat.completion',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:81dabfd0047ab7ed'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", - " 'span_id': '6bd500faf953ce28',\n", - " 'parent_span_id': '81dabfd0047ab7ed',\n", - " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': True}},\n", - " 'id': 'otel+sysmon:6bd500faf953ce28'},\n", - " 'otel+sysmon:param_synth_prompt': {'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'data': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology's impact.\\n\\nPlan:\\n{plan}\",\n", - " 'trainable': True,\n", - " 'info': {'otel': {'span_id': 'd8af1f6bb4933b02'}},\n", - " 'id': 'otel+sysmon:param_synth_prompt'},\n", - " 'otel+sysmon:d8af1f6bb4933b02': {'kind': 'msg',\n", - " 'name': 'synth_node',\n", - " 'op': 'llm_call',\n", - " 'inputs': {'gen_ai.prompt': 'Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user\\'s query: What is CRISPR?. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology\\'s impact.\\n\\nPlan:\\nThis outline provides a structured framework for a comprehensive explanation of CRISPR, moving from foundational definitions to technical mechanics, practical uses, and societal implications.\\n\\n---\\n\\n### **I. Introduction: Definition and Etymology**\\n* **A. Formal Definition:** Define CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a specialized region of DNA with two distinct characteristics: presence of nucleotide repeats and spacers.\\n* **B. The \"Genetic Scissors\" Analogy:** Introduce CRISPR-Cas9 as a revolutionary tool for genome editing that allows scientists to modify DNA with unprecedented precision.\\n* **C. Etymology and Origins:**\\n * Break down the acronym (Clustered, Regularly Interspaced, Short Palindromic Repeats).\\n * Brief history: Discovery in the late 1980s/early 1990s as a natural immune system in bacteria and archaea used to fight viral infections (bacteriophages).\\n* **D. The Nobel Connection:** Mention the',\n", - " 'user_query': 'otel+sysmon:What is CRISPR?',\n", - " 'query': 'otel+sysmon:What is CRISPR?',\n", - " 'plan': 'This outline provides a structured framework for a comprehensive explanation of CRISPR, moving from foundational definitions to technical mechanics, practical uses, and societal implications.\\n\\n---\\n\\n### **I. Introduction: Definition and Etymology**\\n* **A. Formal Definition:** Define CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) as a specialized region of DNA with two distinct characteristics: presence of nucleotide repeats and spacers.\\n* **B. The \"Genetic Scissors\" Analogy:** Introduce CRISPR-Cas9 as a revolutionary tool for genome editing that allows scientists to modify DNA with unprecedented precision.\\n* **C. Etymology and Origins:**\\n * Break down the acronym (Clustered, Regularly Interspaced, Short Palindromic Repeats).\\n * Brief history: Discovery in the late 1980s/early 1990s as a natural immune system in bacteria and archaea used to fight viral infections (bacteriophages).\\n* **D. The Nobel Connection:** Mention the',\n", - " 'parent': 'otel+sysmon:81dabfd0047ab7ed',\n", - " 'param_synth_prompt': 'otel+sysmon:param_synth_prompt'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", - " 'span_id': 'd8af1f6bb4933b02',\n", - " 'parent_span_id': '81dabfd0047ab7ed',\n", - " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': False}},\n", - " 'id': 'otel+sysmon:d8af1f6bb4933b02'},\n", - " 'otel+sysmon:e988ed34c817d3b5': {'kind': 'msg',\n", - " 'name': 'llm.chat.completion',\n", - " 'op': 'unspecified',\n", - " 'inputs': {'parent': 'otel+sysmon:d8af1f6bb4933b02'},\n", - " 'data': {'message_id': None},\n", - " 'info': {'otel': {'trace_id': '47363f96c8cd35ea3eaad92ea1ed0d01',\n", - " 'span_id': 'e988ed34c817d3b5',\n", - " 'parent_span_id': 'd8af1f6bb4933b02',\n", - " 'service': 'otel+sysmon',\n", - " 'temporal_ignore': True}},\n", - " 'id': 'otel+sysmon:e988ed34c817d3b5'}},\n", - " 'context': {}},\n", - " 'summary': {'node_count': 6,\n", - " 'message_names': ['llm.chat.completion',\n", - " 'llm.chat.completion',\n", - " 'planner_node',\n", - " 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...',\n", - " 'synth_prompt': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"},\n", - " 'span_count': 5,\n", - " 'span_names': ['llm.chat.completion',\n", - " 'planner_node',\n", - " 'llm.chat.completion',\n", - " 'synth_node',\n", - " 'langgraph-agent.invoke'],\n", - " 'param_keys': ['param.planner_prompt',\n", - " 'param.planner_prompt.trainable',\n", - " 'param.synth_prompt',\n", - " 'param.synth_prompt.trainable']}},\n", - " {'carrier': 'sysmon',\n", - " 'origin': 'observer',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'langgraph-agent-sysmon-observer',\n", - " 'graph_id': 'otel+sysmon',\n", - " 'scope': 'otel+sysmon/observer',\n", - " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Specific real-world applications and significance.\\n4. Potential ethical considerations or future outlook.\\nEnsure the outline is logically sequenced and complete.',\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with accurate details.\\n- Use a professional yet accessible tone.\\n- Ensure smooth transitions between the definition, components, and significance sections.\\n- Conclude with a summary of the technology's impact.\\n\\nPlan:\\n{plan}\",\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'msg:38c7abc4450b4085': {'id': 'msg:38c7abc4450b4085',\n", - " 'kind': 'message',\n", - " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:627',\n", - " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", - " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': 'This outline provides a structured framework for a comprehensive explanation of CRISPR, moving from foundational definitions to technical mechanics, practical use\"},\n", - " 'info': {'sysmon': {'duration_ns': 2081719174,\n", - " 'thread_id': 140646174676800}}},\n", - " 'msg:9739b2b9310f406e': {'id': 'msg:9739b2b9310f406e',\n", - " 'kind': 'message',\n", - " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:631',\n", - " 'inputs': {'parent': {'ref': 'msg:38c7abc4450b4085'},\n", - " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", - " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': 'To understand the future of medicine and biotechnology, one must understand **CRISPR**. Often described as the most significant biological discovery of the 21st century, CRISPR has t\"},\n", - " 'info': {'sysmon': {'duration_ns': 2149136988,\n", - " 'thread_id': 140646174676800}}}}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['planner_node', 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Create a detailed and structured outline for a comprehensive answer to the query: {query}. \\nThe plan must include:\\n1. A clear definition and etymology.\\n2. A breakdown of key biological and technical components.\\n3. Spe...',\n", - " 'synth_prompt': \"Act as an expert scientific communicator. Using the detailed plan provided below, write a factual, clear, and engaging response to the user's query: {query}. \\n\\nInstructions:\\n- Expand on each point of the plan with acc...\"},\n", - " 'event_count': 2}}]},\n", - " {'config': 'sysmon',\n", - " 'runtime_s': 20.267,\n", - " 'baseline_score': 0.733,\n", - " 'best_score': 0.854,\n", - " 'score_gain': 0.121,\n", - " 'best_iteration': 2,\n", - " 'score_history': [0.733, 0.733, 0.854],\n", - " 'stability_std': 0.0,\n", - " 'node_count': 4,\n", - " 'edge_count': 3,\n", - " 'best_updates': {'planner_prompt': 'Provide a concise list of key factual points to answer: {query}',\n", - " 'synth_prompt': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\nPlan: {plan}'},\n", - " 'final_synth_prompt': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is adapted from a naturally occurring immune system in bacteria used to destroy the DNA of invading viruses.\\n\\nThe system, commonly known as CRISPR-Cas9, consists of two main components:\\n* **Cas9:** An enzyme that acts as \"molecular scissors\" to cut the DNA.\\n* **Guide RNA (gRNA):** A pre-designed RNA sequence that leads the Cas9 enzyme to a specific location in the genome.\\n\\nOnce the DNA is cut, the cell’s natural repair mechanisms fix the break. Scientists can leverage this process to disable specific genes or insert new genetic material. Compared to previous gene-editing methods, CRISPR is faster, cheaper, and more accurate.',\n", - " 'answer_preview': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is ada...',\n", - " 'observers': [],\n", - " 'views': [{'carrier': 'sysmon',\n", - " 'origin': 'backend',\n", - " 'doc': {'tgj': '1.0',\n", - " 'run_id': 'compare',\n", - " 'agent_id': 'langgraph-agent',\n", - " 'graph_id': 'sysmon',\n", - " 'scope': 'sysmon/backend',\n", - " 'nodes': {'param:planner_prompt': {'id': 'param:planner_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'planner_prompt',\n", - " 'value': 'Provide a concise list of key factual points to answer: {query}',\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'param:synth_prompt': {'id': 'param:synth_prompt',\n", - " 'kind': 'parameter',\n", - " 'name': 'synth_prompt',\n", - " 'value': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\nPlan: {plan}',\n", - " 'trainable': True,\n", - " 'description': '[prompt]'},\n", - " 'msg:888dc93e70854dbb': {'id': 'msg:888dc93e70854dbb',\n", - " 'kind': 'message',\n", - " 'name': 'planner_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:627',\n", - " 'inputs': {'param_planner_prompt': {'ref': 'param:planner_prompt'}},\n", - " 'output': {'name': 'planner_node:out',\n", - " 'value': \"{'query': 'What is CRISPR?', 'plan': 'Here is a concise list of key factual points explaining CRISPR:\\\\n\\\\n* **Definition:** CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a rev\"},\n", - " 'info': {'sysmon': {'duration_ns': 2251537773,\n", - " 'thread_id': 140646174676800}}},\n", - " 'msg:590aefdce4e14b65': {'id': 'msg:590aefdce4e14b65',\n", - " 'kind': 'message',\n", - " 'name': 'synth_node',\n", - " 'description': '[sysmon] /home/user/code/Trace/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py:631',\n", - " 'inputs': {'parent': {'ref': 'msg:888dc93e70854dbb'},\n", - " 'param_synth_prompt': {'ref': 'param:synth_prompt'}},\n", - " 'output': {'name': 'synth_node:out',\n", - " 'value': \"{'final_answer': 'CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a gene-editing technology that allows scientists to precisely modify DNA within living organisms. It is adapted \"},\n", - " 'info': {'sysmon': {'duration_ns': 2010532077,\n", - " 'thread_id': 140646174676800}}}}},\n", - " 'summary': {'node_count': 4,\n", - " 'message_names': ['planner_node', 'synth_node'],\n", - " 'semantic_messages': ['planner_node', 'synth_node'],\n", - " 'param_names': ['planner_prompt', 'synth_prompt'],\n", - " 'param_values': {'planner_prompt': 'Provide a concise list of key factual points to answer: {query}',\n", - " 'synth_prompt': 'Give a direct, factual answer to the query using the provided plan. Do not include any introductory remarks, conversational filler, or meta-commentary. Start your response immediately with the answer.\\n\\nQuery: {query}\\n...'},\n", - " 'event_count': 2}}]}]" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Captured 7 comparison rows.\n" + ] } ], "source": [ @@ -2906,7 +2097,7 @@ "else:\n", " raise FileNotFoundError('Could not locate demo_langgraph_instrument_and_compare_observers.py')\n", "\n", - "rows\n" + "print(f'Captured {len(rows)} comparison rows.')\n" ] } ], diff --git a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py index 10c97c4a..48b1a5cd 100644 --- a/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py +++ b/examples/notebooks/demo_langgraph_instrument_and_compare_observers.py @@ -141,6 +141,10 @@ def _truncate(value: Any, limit: int = 140) -> str: return text[: limit - 3] + "..." +def _table_text(value: Any, limit: int = 90) -> str: + return _truncate(value, limit).replace("|", "\\|").replace("\n", "
") + + def _base_name(value: Any) -> str: name = str(getattr(value, "name", getattr(value, "py_name", ""))) return name.split("/")[-1].split(":")[0] @@ -1040,7 +1044,25 @@ def display_notebook_report(rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: "_Topology metrics remain useful even when score trajectories match, " "for example under the fixed offline prompt schedule._" ) - display(Markdown("## Optimization comparison\n\n" + note + "\n\n" + "\n".join(lines))) + prompt_lines = [ + "| config | best_iteration | best update keys | best-scoring prompt | final attempted prompt |", + "|---|---:|---|---|---|", + ] + for row in rows: + prompt_lines.append( + f"| {row['config']} | {row['best_iteration']} | {row['best_update_keys']} " + f"| {_table_text(row['best_synth_prompt'])} | {_table_text(row['final_synth_prompt'])} |" + ) + display( + Markdown( + "## Optimization comparison\n\n" + + note + + "\n\n" + + "\n".join(lines) + + "\n\n### Prompt selection\n\n" + + "\n".join(prompt_lines) + ) + ) for row in rows: display(