diff --git a/docs/multi_objective_scores.md b/docs/multi_objective_scores.md index 3128f14f..2d4b3d5e 100644 --- a/docs/multi_objective_scores.md +++ b/docs/multi_objective_scores.md @@ -106,7 +106,7 @@ config = ObjectiveConfig( ### Step 3 — Pass it to the trainer ```python -from opto.trainer.algorithms.basic_algorithms import BasicSearchAlgorithm +from examples.trainers.basic_algorithms import BasicSearchAlgorithm trainer = BasicSearchAlgorithm(agent, optimizer) trainer.train( diff --git a/docs/tutorials/minibatch.ipynb b/docs/tutorials/minibatch.ipynb index 890d13f1..39a1002b 100644 --- a/docs/tutorials/minibatch.ipynb +++ b/docs/tutorials/minibatch.ipynb @@ -550,7 +550,19 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from opto import trace\nfrom opto.utils.llm import LLM\nfrom opto.optimizers import OptoPrime\nfrom opto.trainer.algorithms.basic_algorithms import MinibatchAlgorithm\nfrom opto.trainer.loggers import TensorboardLogger\nfrom opto.trainer.guide import LLMJudge\nfrom opto.features.predefined_agents import BasicLearner\nfrom typing import Any\n\n# Use the predefined BasicLearner instead of defining our own\nLearner = BasicLearner" + "source": [ + "from opto import trace\n", + "from opto.utils.llm import LLM\n", + "from opto.optimizers import OptoPrime\n", + "from examples.trainers.basic_algorithms import MinibatchAlgorithm\n", + "from opto.trainer.loggers import TensorboardLogger\n", + "from opto.trainer.guide import LLMJudge\n", + "from opto.features.predefined_agents import BasicLearner\n", + "from typing import Any\n", + "\n", + "# Use the predefined BasicLearner instead of defining our own\n", + "Learner = BasicLearner" + ] }, { "cell_type": "markdown", @@ -799,15 +811,45 @@ }, { "cell_type": "markdown", - "source": "## Simplified Training with `trainer.train()`\n\nInstead of manually setting up the algorithm, optimizer, guide, and logger, you can use the simplified `trainer.train()` function that handles all the setup for you. This is the recommended approach for most use cases.", - "metadata": {} + "metadata": {}, + "source": [ + "## Simplified Training with `trainer.train()`\n", + "\n", + "Instead of manually setting up the algorithm, optimizer, guide, and logger, you can use the simplified `trainer.train()` function that handles all the setup for you. This is the recommended approach for most use cases." + ] }, { "cell_type": "code", - "source": "# Using the simplified trainer.train approach\nfrom opto import trainer\n\n# Create a fresh agent for simplified training\nsimple_agent = Learner(\n system_prompt=\"You're a helpful agent answering math problems.\",\n llm=LLM()\n)\n\nprint(\"STARTING SIMPLIFIED TRAINING\")\nmetrics, final_score = trainer.train(\n model=simple_agent,\n train_dataset=train_dataset,\n algorithm='MinibatchAlgorithm',\n guide=LLMJudge(llm=LLM()),\n # trainer kwargs\n num_epochs=num_epochs,\n batch_size=batch_size,\n eval_frequency=eval_frequency,\n test_dataset=test_dataset,\n num_threads=num_threads,\n verbose='output',\n)\nprint(\"FINISHED SIMPLIFIED TRAINING\")\nprint(f\"Final score: {final_score}\")", - "metadata": {}, "execution_count": null, - "outputs": [] + "metadata": {}, + "outputs": [], + "source": [ + "# Using the simplified trainer.train approach\n", + "from opto import trainer\n", + "\n", + "# Create a fresh agent for simplified training\n", + "simple_agent = Learner(\n", + " system_prompt=\"You're a helpful agent answering math problems.\",\n", + " llm=LLM()\n", + ")\n", + "\n", + "print(\"STARTING SIMPLIFIED TRAINING\")\n", + "metrics, final_score = trainer.train(\n", + " model=simple_agent,\n", + " train_dataset=train_dataset,\n", + " algorithm='MinibatchAlgorithm',\n", + " guide=LLMJudge(llm=LLM()),\n", + " # trainer kwargs\n", + " num_epochs=num_epochs,\n", + " batch_size=batch_size,\n", + " eval_frequency=eval_frequency,\n", + " test_dataset=test_dataset,\n", + " num_threads=num_threads,\n", + " verbose='output',\n", + ")\n", + "print(\"FINISHED SIMPLIFIED TRAINING\")\n", + "print(f\"Final score: {final_score}\")" + ] } ], "metadata": { @@ -831,4 +873,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/tutorials/trainers.ipynb b/docs/tutorials/trainers.ipynb index 97b662c6..30c8d677 100644 --- a/docs/tutorials/trainers.ipynb +++ b/docs/tutorials/trainers.ipynb @@ -323,9 +323,9 @@ "from opto import trace, trainer\n", "from opto.optimizers import OptoPrime\n", "from opto.optimizers.utils import print_color\n", - "from opto.trainer.algorithms.basic_algorithms import MinibatchAlgorithm, BasicSearchAlgorithm\n", - "from opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm\n", - "from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm\n", + "from examples.trainers.basic_algorithms import MinibatchAlgorithm, BasicSearchAlgorithm\n", + "from examples.trainers.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm\n", + "from examples.trainers.UCBsearch import UCBSearchAlgorithm\n", "from opto.features.predefined_agents import BasicLearner\n", "\n", "# Create alias for backward compatibility in this tutorial\n", diff --git a/examples/bbeh/bbeh_trace.py b/examples/bbeh/bbeh_trace.py index 58d672b4..2b9b554b 100644 --- a/examples/bbeh/bbeh_trace.py +++ b/examples/bbeh/bbeh_trace.py @@ -19,7 +19,7 @@ import opto.trace.operators as trace_ops import numpy as np from tqdm import tqdm -from opto.trainer.algorithms.basic_algorithms import MinibatchAlgorithm, evaluate +from examples.trainers.basic_algorithms import MinibatchAlgorithm, evaluate from opto.trainer.guide import Guide diff --git a/examples/minibatch_bbh_aynsc/run_bigbench_trace_async.py b/examples/minibatch_bbh_aynsc/run_bigbench_trace_async.py index c5689a97..388f2d5b 100644 --- a/examples/minibatch_bbh_aynsc/run_bigbench_trace_async.py +++ b/examples/minibatch_bbh_aynsc/run_bigbench_trace_async.py @@ -10,7 +10,7 @@ import autogen import pickle import os -from opto.trainer.algorithms.basic_algorithms import MinibatchAlgorithm, evaluate +from examples.trainers.basic_algorithms import MinibatchAlgorithm, evaluate from opto.trainer.guide import Guide diff --git a/examples/multi_objective_convex_fn.py b/examples/multi_objective_convex_fn.py index 50f11428..7883126d 100644 --- a/examples/multi_objective_convex_fn.py +++ b/examples/multi_objective_convex_fn.py @@ -572,7 +572,7 @@ def _init_certificate(self) -> None: from opto.trainer.loggers import TensorboardLogger from opto import trainer from opto.trainer.objectives import ObjectiveConfig -from opto.trainer.algorithms.basic_algorithms import BasicSearchAlgorithm as SearchAlgorithm +from examples.trainers.basic_algorithms import BasicSearchAlgorithm as SearchAlgorithm from typing import Tuple from copy import copy diff --git a/examples/notebooks/multiobjective_bbeh_langgraph.ipynb b/examples/notebooks/multiobjective_bbeh_langgraph.ipynb index ec5e48dc..4ee74cfa 100644 --- a/examples/notebooks/multiobjective_bbeh_langgraph.ipynb +++ b/examples/notebooks/multiobjective_bbeh_langgraph.ipynb @@ -4,7 +4,27 @@ "cell_type": "markdown", "id": "cell-title", "metadata": {}, - "source": "# T6 M2 — BBEH Boolean Expressions with Multi-Objective Instrumentation\n\n[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AgentOpt/OpenTrace/blob/experimental/examples/notebooks/multiobjective_bbeh_langgraph.ipynb)\n\n**Milestone 2 Deliverable** — Multi-objective scoring on a real LLM task\n\nThis notebook demonstrates multi-objective optimization on the **BBEH boolean_expressions** benchmark\nusing the **PAL (Program-Aided Language model)** strategy from Xavier's original experiment.\n\nTwo objectives are tracked:\n- **accuracy** (binary: 1.0 = correct, 0.0 = wrong)\n- **execution_time_s** (end-to-end wall-clock seconds per example: LLM call + code execution)\n\nThe `LangGraphGuide.get_score_dict()` method returns both metrics per example,\nenabling the M2 multi-objective infrastructure to track and visualize tradeoffs.\n\n**Requires a real LLM API key** (OpenRouter recommended, default model: `openai/gpt-5-nano`).\n\n---" + "source": [ + "# T6 M2 — BBEH Boolean Expressions with Multi-Objective Instrumentation\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AgentOpt/OpenTrace/blob/experimental/examples/notebooks/multiobjective_bbeh_langgraph.ipynb)\n", + "\n", + "**Milestone 2 Deliverable** — Multi-objective scoring on a real LLM task\n", + "\n", + "This notebook demonstrates multi-objective optimization on the **BBEH boolean_expressions** benchmark\n", + "using the **PAL (Program-Aided Language model)** strategy from Xavier's original experiment.\n", + "\n", + "Two objectives are tracked:\n", + "- **accuracy** (binary: 1.0 = correct, 0.0 = wrong)\n", + "- **execution_time_s** (end-to-end wall-clock seconds per example: LLM call + code execution)\n", + "\n", + "The `LangGraphGuide.get_score_dict()` method returns both metrics per example,\n", + "enabling the M2 multi-objective infrastructure to track and visualize tradeoffs.\n", + "\n", + "**Requires a real LLM API key** (OpenRouter recommended, default model: `openai/gpt-5-nano`).\n", + "\n", + "---" + ] }, { "cell_type": "code", @@ -12,7 +32,67 @@ "id": "cell-config", "metadata": {}, "outputs": [], - "source": "import os\n\n# -----------------------\n# Load .env file (if present) so API keys are available via os.getenv()\n# -----------------------\ntry:\n from dotenv import load_dotenv\n # Walk up from notebook dir to find .env (works locally and in Colab)\n _env_candidates = [\".env\", \"../.env\", \"../../.env\", \"../../../.env\"]\n for _ep in _env_candidates:\n if os.path.exists(_ep):\n load_dotenv(_ep, override=False)\n print(f\"Loaded .env from: {os.path.abspath(_ep)}\")\n break\n else:\n print(\"No .env file found (will use existing env vars).\")\nexcept ImportError:\n print(\"python-dotenv not installed (pip install python-dotenv). Using existing env vars.\")\n\n# -----------------------\n# Core defaults (edit me)\n# -----------------------\nBBEH_TASK_NAME = os.getenv(\"BBEH_TASK_NAME\", \"bbeh_boolean_expressions\")\n\n# Data split\nN_TRAIN = int(os.getenv(\"N_TRAIN\", \"20\"))\nN_VAL = int(os.getenv(\"N_VAL\", \"10\"))\nSEED = int(os.getenv(\"SEED\", \"0\"))\n\n# CurriculumBuffer Mode B\nVALIDATE_ON_LAST_N = int(os.getenv(\"VALIDATE_ON_LAST_N\", \"2\"))\nACCUMULATION_STEPS = int(os.getenv(\"ACCUMULATION_STEPS\", \"2\"))\n\n# Optimization loop controls\nLEARNING_RETRY = int(os.getenv(\"LEARNING_RETRY\", \"20\"))\nMAX_ATTEMPTS = int(os.getenv(\"MAX_ATTEMPTS\", \"10\"))\n\nSKIP_OPTIMIZATION = os.getenv(\"SKIP_OPTIMIZATION\", \"0\") == \"1\"\n\n# Output\nOUTPUT_FOLDER = os.getenv(\"OUTPUT_FOLDER\", \"./trace_runs\")\n\n# Optional verbosity toggles\nSHOW_MERMAID_GRAPH = os.getenv(\"SHOW_MERMAID_GRAPH\", \"0\") == \"1\"\nSHOW_OPT_TRACE = os.getenv(\"SHOW_OPT_TRACE\", \"0\") == \"1\"\n\ntry:\n import google.colab\n IN_COLAB = True\nexcept ImportError:\n IN_COLAB = False\n\nprint(\"Config:\")\nprint(f\" {BBEH_TASK_NAME=}\")\nprint(f\" {N_TRAIN=}, {N_VAL=}, {SEED=}\")\nprint(f\" {VALIDATE_ON_LAST_N=}, {ACCUMULATION_STEPS=}\")\nprint(f\" {LEARNING_RETRY=}, {MAX_ATTEMPTS=}\")\nprint(f\" {SKIP_OPTIMIZATION=}\")\nprint(f\" {OUTPUT_FOLDER=}\")" + "source": [ + "import os\n", + "\n", + "# -----------------------\n", + "# Load .env file (if present) so API keys are available via os.getenv()\n", + "# -----------------------\n", + "try:\n", + " from dotenv import load_dotenv\n", + " # Walk up from notebook dir to find .env (works locally and in Colab)\n", + " _env_candidates = [\".env\", \"../.env\", \"../../.env\", \"../../../.env\"]\n", + " for _ep in _env_candidates:\n", + " if os.path.exists(_ep):\n", + " load_dotenv(_ep, override=False)\n", + " print(f\"Loaded .env from: {os.path.abspath(_ep)}\")\n", + " break\n", + " else:\n", + " print(\"No .env file found (will use existing env vars).\")\n", + "except ImportError:\n", + " print(\"python-dotenv not installed (pip install python-dotenv). Using existing env vars.\")\n", + "\n", + "# -----------------------\n", + "# Core defaults (edit me)\n", + "# -----------------------\n", + "BBEH_TASK_NAME = os.getenv(\"BBEH_TASK_NAME\", \"bbeh_boolean_expressions\")\n", + "\n", + "# Data split\n", + "N_TRAIN = int(os.getenv(\"N_TRAIN\", \"20\"))\n", + "N_VAL = int(os.getenv(\"N_VAL\", \"10\"))\n", + "SEED = int(os.getenv(\"SEED\", \"0\"))\n", + "\n", + "# CurriculumBuffer Mode B\n", + "VALIDATE_ON_LAST_N = int(os.getenv(\"VALIDATE_ON_LAST_N\", \"2\"))\n", + "ACCUMULATION_STEPS = int(os.getenv(\"ACCUMULATION_STEPS\", \"2\"))\n", + "\n", + "# Optimization loop controls\n", + "LEARNING_RETRY = int(os.getenv(\"LEARNING_RETRY\", \"20\"))\n", + "MAX_ATTEMPTS = int(os.getenv(\"MAX_ATTEMPTS\", \"10\"))\n", + "\n", + "SKIP_OPTIMIZATION = os.getenv(\"SKIP_OPTIMIZATION\", \"0\") == \"1\"\n", + "\n", + "# Output\n", + "OUTPUT_FOLDER = os.getenv(\"OUTPUT_FOLDER\", \"./trace_runs\")\n", + "\n", + "# Optional verbosity toggles\n", + "SHOW_MERMAID_GRAPH = os.getenv(\"SHOW_MERMAID_GRAPH\", \"0\") == \"1\"\n", + "SHOW_OPT_TRACE = os.getenv(\"SHOW_OPT_TRACE\", \"0\") == \"1\"\n", + "\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", + "\n", + "print(\"Config:\")\n", + "print(f\" {BBEH_TASK_NAME=}\")\n", + "print(f\" {N_TRAIN=}, {N_VAL=}, {SEED=}\")\n", + "print(f\" {VALIDATE_ON_LAST_N=}, {ACCUMULATION_STEPS=}\")\n", + "print(f\" {LEARNING_RETRY=}, {MAX_ATTEMPTS=}\")\n", + "print(f\" {SKIP_OPTIMIZATION=}\")\n", + "print(f\" {OUTPUT_FOLDER=}\")" + ] }, { "cell_type": "code", @@ -20,7 +100,40 @@ "id": "cell-setup", "metadata": {}, "outputs": [], - "source": "import os, sys, subprocess\n\nif IN_COLAB:\n if not os.path.exists('/content/Trace'):\n print(\"Setting up Trace...\")\n !pip install langgraph langchain langchain_openai datasets tqdm langchain_community litellm dspy black matplotlib pandas\n !git clone https://github.com/AgentOpt/OpenTrace.git /content/Trace\n %cd /content/Trace\n !git pull origin experimental && git checkout experimental\n !pip install -e .\n sys.path.append('/content/Trace')\nelse:\n # Local: add repo root to sys.path\n _nb_dir = os.path.dirname(os.path.abspath(\"__file__\"))\n _repo_root = os.path.abspath(os.path.join(_nb_dir, \"..\", \"..\"))\n if _repo_root not in sys.path:\n sys.path.insert(0, _repo_root)\n\n# Clone BBEH benchmark tasks\nif not os.path.exists('bbeh'):\n !git clone https://github.com/google-deepmind/bbeh.git\nelse:\n print(\"bbeh/ already exists, skipping clone.\")\n\n# Soft-import display\ntry:\n from IPython.display import display\nexcept Exception:\n def display(*args, **kwargs):\n return None\n\nprint(f\"{IN_COLAB=}\")" + "source": [ + "import os, sys, subprocess\n", + "\n", + "if IN_COLAB:\n", + " if not os.path.exists('/content/Trace'):\n", + " print(\"Setting up Trace...\")\n", + " !pip install langgraph langchain langchain_openai datasets tqdm langchain_community litellm dspy black matplotlib pandas\n", + " !git clone https://github.com/AgentOpt/OpenTrace.git /content/Trace\n", + " %cd /content/Trace\n", + " !git pull origin experimental && git checkout experimental\n", + " !pip install -e .\n", + " sys.path.append('/content/Trace')\n", + "else:\n", + " # Local: add repo root to sys.path\n", + " _nb_dir = os.path.dirname(os.path.abspath(\"__file__\"))\n", + " _repo_root = os.path.abspath(os.path.join(_nb_dir, \"..\", \"..\"))\n", + " if _repo_root not in sys.path:\n", + " sys.path.insert(0, _repo_root)\n", + "\n", + "# Clone BBEH benchmark tasks\n", + "if not os.path.exists('bbeh'):\n", + " !git clone https://github.com/google-deepmind/bbeh.git\n", + "else:\n", + " print(\"bbeh/ already exists, skipping clone.\")\n", + "\n", + "# Soft-import display\n", + "try:\n", + " from IPython.display import display\n", + "except Exception:\n", + " def display(*args, **kwargs):\n", + " return None\n", + "\n", + "print(f\"{IN_COLAB=}\")" + ] }, { "cell_type": "code", @@ -28,7 +141,85 @@ "id": "cell-llm-config", "metadata": {}, "outputs": [], - "source": "import os\nfrom langchain_core.messages import HumanMessage, SystemMessage\nfrom langchain_openai import ChatOpenAI\n\n# -----------------------\n# LLM config — auto-detect from available API keys\n# -----------------------\n# Priority: LLM_SERVICE env var (explicit override) > OPENAI_API_KEY > OPENROUTER_API_KEY > CUSTOMLLM_API_KEY\n# When OPENAI_API_KEY is available, uses gpt-5-nano directly via OpenAI (no OpenRouter prefix).\n\ndef _get_secret(name: str) -> str | None:\n try:\n from google.colab import userdata\n v = userdata.get(name)\n if v:\n return v\n except Exception:\n pass\n return os.getenv(name)\n\nOPENAI_API_KEY = _get_secret(\"OPENAI_API_KEY\")\nOPENROUTER_API_KEY = _get_secret(\"OPENROUTER_API_KEY\")\nCUSTOMLLM_API_KEY = _get_secret(\"CUSTOMLLM_API_KEY\")\nCUSTOMLLM_URL = os.getenv(\"CUSTOMLLM_URL\", \"http://localhost:4000/\")\n\n# Auto-detect service if not explicitly set\n_explicit_service = os.getenv(\"LLM_SERVICE\")\nif _explicit_service:\n LLM_SERVICE = _explicit_service\nelif OPENAI_API_KEY:\n LLM_SERVICE = \"openai\"\nelif OPENROUTER_API_KEY:\n LLM_SERVICE = \"openrouter\"\nelif CUSTOMLLM_API_KEY:\n LLM_SERVICE = \"customllm\"\nelse:\n raise ValueError(\n \"No API key found. Set OPENAI_API_KEY, OPENROUTER_API_KEY, or CUSTOMLLM_API_KEY \"\n \"(via env var, .env file, or Colab secret).\"\n )\n\n# Model name: OpenRouter uses \"openai/gpt-5-nano\" prefix, OpenAI uses \"gpt-5-nano\" directly\n_default_model = os.getenv(\"LLM_GENERAL_MODEL\")\nif _default_model:\n LLM_GENERAL_MODEL = _default_model\nelif LLM_SERVICE == \"openai\":\n LLM_GENERAL_MODEL = \"gpt-5-nano\"\nelse:\n LLM_GENERAL_MODEL = \"openai/gpt-5-nano\"\n\nif LLM_SERVICE == \"openai\":\n if not OPENAI_API_KEY:\n raise ValueError(\"OPENAI_API_KEY missing (set env var, .env file, or Colab secret).\")\n os.environ[\"OPENAI_BASE_URL\"] = \"https://api.openai.com/v1\"\n os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\nelif LLM_SERVICE == \"openrouter\":\n if not OPENROUTER_API_KEY:\n raise ValueError(\"OPENROUTER_API_KEY missing (set env var, .env file, or Colab secret).\")\n os.environ[\"OPENAI_BASE_URL\"] = \"https://openrouter.ai/api/v1\"\n os.environ[\"OPENAI_API_KEY\"] = OPENROUTER_API_KEY\nelif LLM_SERVICE == \"customllm\":\n if not CUSTOMLLM_API_KEY:\n raise ValueError(\"CUSTOMLLM_API_KEY missing (set env var, .env file, or Colab secret).\")\n os.environ[\"OPENAI_BASE_URL\"] = CUSTOMLLM_URL\n os.environ[\"OPENAI_API_KEY\"] = CUSTOMLLM_API_KEY\nelse:\n raise ValueError(f\"Unknown LLM_SERVICE: {LLM_SERVICE!r}\")\n\nllm = ChatOpenAI(model_name=LLM_GENERAL_MODEL, temperature=0)\n\ndef llm_call(prompt: str, system_instructions: str = \"\") -> str:\n msgs = [HumanMessage(content=prompt)]\n if system_instructions:\n msgs.insert(0, SystemMessage(content=system_instructions))\n return llm.invoke(msgs).content\n\nprint(\"LLM ready:\", {\"service\": LLM_SERVICE, \"model\": LLM_GENERAL_MODEL})" + "source": [ + "import os\n", + "from langchain_core.messages import HumanMessage, SystemMessage\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "# -----------------------\n", + "# LLM config — auto-detect from available API keys\n", + "# -----------------------\n", + "# Priority: LLM_SERVICE env var (explicit override) > OPENAI_API_KEY > OPENROUTER_API_KEY > CUSTOMLLM_API_KEY\n", + "# When OPENAI_API_KEY is available, uses gpt-5-nano directly via OpenAI (no OpenRouter prefix).\n", + "\n", + "def _get_secret(name: str) -> str | None:\n", + " try:\n", + " from google.colab import userdata\n", + " v = userdata.get(name)\n", + " if v:\n", + " return v\n", + " except Exception:\n", + " pass\n", + " return os.getenv(name)\n", + "\n", + "OPENAI_API_KEY = _get_secret(\"OPENAI_API_KEY\")\n", + "OPENROUTER_API_KEY = _get_secret(\"OPENROUTER_API_KEY\")\n", + "CUSTOMLLM_API_KEY = _get_secret(\"CUSTOMLLM_API_KEY\")\n", + "CUSTOMLLM_URL = os.getenv(\"CUSTOMLLM_URL\", \"http://localhost:4000/\")\n", + "\n", + "# Auto-detect service if not explicitly set\n", + "_explicit_service = os.getenv(\"LLM_SERVICE\")\n", + "if _explicit_service:\n", + " LLM_SERVICE = _explicit_service\n", + "elif OPENAI_API_KEY:\n", + " LLM_SERVICE = \"openai\"\n", + "elif OPENROUTER_API_KEY:\n", + " LLM_SERVICE = \"openrouter\"\n", + "elif CUSTOMLLM_API_KEY:\n", + " LLM_SERVICE = \"customllm\"\n", + "else:\n", + " raise ValueError(\n", + " \"No API key found. Set OPENAI_API_KEY, OPENROUTER_API_KEY, or CUSTOMLLM_API_KEY \"\n", + " \"(via env var, .env file, or Colab secret).\"\n", + " )\n", + "\n", + "# Model name: OpenRouter uses \"openai/gpt-5-nano\" prefix, OpenAI uses \"gpt-5-nano\" directly\n", + "_default_model = os.getenv(\"LLM_GENERAL_MODEL\")\n", + "if _default_model:\n", + " LLM_GENERAL_MODEL = _default_model\n", + "elif LLM_SERVICE == \"openai\":\n", + " LLM_GENERAL_MODEL = \"gpt-5-nano\"\n", + "else:\n", + " LLM_GENERAL_MODEL = \"openai/gpt-5-nano\"\n", + "\n", + "if LLM_SERVICE == \"openai\":\n", + " if not OPENAI_API_KEY:\n", + " raise ValueError(\"OPENAI_API_KEY missing (set env var, .env file, or Colab secret).\")\n", + " os.environ[\"OPENAI_BASE_URL\"] = \"https://api.openai.com/v1\"\n", + " os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n", + "elif LLM_SERVICE == \"openrouter\":\n", + " if not OPENROUTER_API_KEY:\n", + " raise ValueError(\"OPENROUTER_API_KEY missing (set env var, .env file, or Colab secret).\")\n", + " os.environ[\"OPENAI_BASE_URL\"] = \"https://openrouter.ai/api/v1\"\n", + " os.environ[\"OPENAI_API_KEY\"] = OPENROUTER_API_KEY\n", + "elif LLM_SERVICE == \"customllm\":\n", + " if not CUSTOMLLM_API_KEY:\n", + " raise ValueError(\"CUSTOMLLM_API_KEY missing (set env var, .env file, or Colab secret).\")\n", + " os.environ[\"OPENAI_BASE_URL\"] = CUSTOMLLM_URL\n", + " os.environ[\"OPENAI_API_KEY\"] = CUSTOMLLM_API_KEY\n", + "else:\n", + " raise ValueError(f\"Unknown LLM_SERVICE: {LLM_SERVICE!r}\")\n", + "\n", + "llm = ChatOpenAI(model_name=LLM_GENERAL_MODEL, temperature=0)\n", + "\n", + "def llm_call(prompt: str, system_instructions: str = \"\") -> str:\n", + " msgs = [HumanMessage(content=prompt)]\n", + " if system_instructions:\n", + " msgs.insert(0, SystemMessage(content=system_instructions))\n", + " return llm.invoke(msgs).content\n", + "\n", + "print(\"LLM ready:\", {\"service\": LLM_SERVICE, \"model\": LLM_GENERAL_MODEL})" + ] }, { "cell_type": "code", @@ -47,7 +238,7 @@ " from opto.trace.bundle import FunModule\n", " from opto.optimizers.optoprime_v2 import OptoPrimeV2 as OptoPrime\n", " from opto.trainer.guide import Guide as _TraceGuide\n", - " from opto.trainer.algorithms.basic_algorithms import Minibatch as _TraceMinibatch\n", + " from examples.trainers.basic_algorithms import Minibatch as _TraceMinibatch\n", "except Exception as e:\n", " raise ImportError(\n", " \"Could not import OpenTrace (opto.*). \"\n", diff --git a/examples/notebooks/multiobjective_quickstart.ipynb b/examples/notebooks/multiobjective_quickstart.ipynb index 07732b73..bd1d9aef 100644 --- a/examples/notebooks/multiobjective_quickstart.ipynb +++ b/examples/notebooks/multiobjective_quickstart.ipynb @@ -6,7 +6,31 @@ "id": "a0000001", "metadata": {}, "outputs": [], - "source": "import os, sys\n\n# In Colab: clone and install from GitHub\n# Locally: add repo root to sys.path so opto is importable\ntry:\n import google.colab\n IN_COLAB = True\nexcept ImportError:\n IN_COLAB = False\n\nif IN_COLAB:\n !git clone https://github.com/AgentOpt/OpenTrace.git Trace\n %cd Trace\n !git checkout experimental\n !pip install -e .\nelse:\n # Local: ensure repo root is on sys.path\n _nb_dir = os.path.dirname(os.path.abspath(\"__file__\"))\n _repo_root = os.path.abspath(os.path.join(_nb_dir, \"..\", \"..\"))\n if _repo_root not in sys.path:\n sys.path.insert(0, _repo_root)\n import opto\n print(f\"Using local opto from: {os.path.dirname(opto.__file__)}\")" + "source": [ + "import os, sys\n", + "\n", + "# In Colab: clone and install from GitHub\n", + "# Locally: add repo root to sys.path so opto is importable\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", + "\n", + "if IN_COLAB:\n", + " !git clone https://github.com/AgentOpt/OpenTrace.git Trace\n", + " %cd Trace\n", + " !git checkout experimental\n", + " !pip install -e .\n", + "else:\n", + " # Local: ensure repo root is on sys.path\n", + " _nb_dir = os.path.dirname(os.path.abspath(\"__file__\"))\n", + " _repo_root = os.path.abspath(os.path.join(_nb_dir, \"..\", \"..\"))\n", + " if _repo_root not in sys.path:\n", + " sys.path.insert(0, _repo_root)\n", + " import opto\n", + " print(f\"Using local opto from: {os.path.dirname(opto.__file__)}\")" + ] }, { "cell_type": "markdown", @@ -56,7 +80,14 @@ "id": "a0000004", "metadata": {}, "outputs": [], - "source": "import numpy as np\nfrom typing import Dict, Tuple, Optional\n\nprint(\"=\" * 70)\nprint(\"T6 M1 \\u2014 Multi-Objective Vector Scores\")\nprint(\"=\" * 70)" + "source": [ + "import numpy as np\n", + "from typing import Dict, Tuple, Optional\n", + "\n", + "print(\"=\" * 70)\n", + "print(\"T6 M1 \\u2014 Multi-Objective Vector Scores\")\n", + "print(\"=\" * 70)" + ] }, { "cell_type": "markdown", @@ -75,7 +106,61 @@ "id": "a0000006", "metadata": {}, "outputs": [], - "source": "from opto.trainer.objectives import (\n ObjectiveConfig, to_score_dict, apply_minimize,\n weighted_scalarize, dominates, pareto_rank, select_best, select_top_k,\n)\n\nprint(\"--- ObjectiveConfig defaults ---\")\nconfig_default = ObjectiveConfig()\nprint(f\" mode={config_default.mode}, weights={config_default.weights}, \"\n f\"minimize={config_default.minimize}\")\n\nprint(\"\\n--- ObjectiveConfig: weighted mode ---\")\nconfig_weighted = ObjectiveConfig(\n mode=\"weighted\",\n weights={\"accuracy\": 0.8, \"latency_s\": 0.2},\n minimize=frozenset({\"latency_s\"}),\n)\nprint(f\" mode={config_weighted.mode}\")\nprint(f\" weights={config_weighted.weights}\")\nprint(f\" minimize={config_weighted.minimize}\")\n\nprint(\"\\n--- ObjectiveConfig: Pareto mode ---\")\nconfig_pareto = ObjectiveConfig(\n mode=\"pareto\",\n weights={\"accuracy\": 0.5, \"latency_s\": 0.5},\n minimize=frozenset({\"latency_s\"}),\n tie_break=\"weighted\",\n seed=42,\n)\nprint(f\" mode={config_pareto.mode}, tie_break={config_pareto.tie_break}, seed={config_pareto.seed}\")\n\nprint(\"\\n--- ObjectiveConfig: set auto-converts to frozenset ---\")\nconfig_set = ObjectiveConfig(minimize={\"lat\"})\nprint(f\" type(minimize)={type(config_set.minimize).__name__} (auto-converted from set)\")\n\nprint(\"\\n--- Validation: negative weight ---\")\ntry:\n ObjectiveConfig(weights={\"a\": -0.5})\nexcept ValueError as e:\n print(f\" Caught: {e}\")\n\nprint(\"\\n--- Validation: bad mode ---\")\ntry:\n ObjectiveConfig(mode=\"unknown\")\nexcept ValueError as e:\n print(f\" Caught: {e}\")\n\nprint(\"\\n--- Frozen (immutable) ---\")\ntry:\n config_default.mode = \"weighted\"\nexcept AttributeError as e:\n print(f\" Caught: {e}\")\n\nprint(\"\\nObjectiveConfig validation: all checks passed.\")" + "source": [ + "from opto.trainer.objectives import (\n", + " ObjectiveConfig, to_score_dict, apply_minimize,\n", + " weighted_scalarize, dominates, pareto_rank, select_best, select_top_k,\n", + ")\n", + "\n", + "print(\"--- ObjectiveConfig defaults ---\")\n", + "config_default = ObjectiveConfig()\n", + "print(f\" mode={config_default.mode}, weights={config_default.weights}, \"\n", + " f\"minimize={config_default.minimize}\")\n", + "\n", + "print(\"\\n--- ObjectiveConfig: weighted mode ---\")\n", + "config_weighted = ObjectiveConfig(\n", + " mode=\"weighted\",\n", + " weights={\"accuracy\": 0.8, \"latency_s\": 0.2},\n", + " minimize=frozenset({\"latency_s\"}),\n", + ")\n", + "print(f\" mode={config_weighted.mode}\")\n", + "print(f\" weights={config_weighted.weights}\")\n", + "print(f\" minimize={config_weighted.minimize}\")\n", + "\n", + "print(\"\\n--- ObjectiveConfig: Pareto mode ---\")\n", + "config_pareto = ObjectiveConfig(\n", + " mode=\"pareto\",\n", + " weights={\"accuracy\": 0.5, \"latency_s\": 0.5},\n", + " minimize=frozenset({\"latency_s\"}),\n", + " tie_break=\"weighted\",\n", + " seed=42,\n", + ")\n", + "print(f\" mode={config_pareto.mode}, tie_break={config_pareto.tie_break}, seed={config_pareto.seed}\")\n", + "\n", + "print(\"\\n--- ObjectiveConfig: set auto-converts to frozenset ---\")\n", + "config_set = ObjectiveConfig(minimize={\"lat\"})\n", + "print(f\" type(minimize)={type(config_set.minimize).__name__} (auto-converted from set)\")\n", + "\n", + "print(\"\\n--- Validation: negative weight ---\")\n", + "try:\n", + " ObjectiveConfig(weights={\"a\": -0.5})\n", + "except ValueError as e:\n", + " print(f\" Caught: {e}\")\n", + "\n", + "print(\"\\n--- Validation: bad mode ---\")\n", + "try:\n", + " ObjectiveConfig(mode=\"unknown\")\n", + "except ValueError as e:\n", + " print(f\" Caught: {e}\")\n", + "\n", + "print(\"\\n--- Frozen (immutable) ---\")\n", + "try:\n", + " config_default.mode = \"weighted\"\n", + "except AttributeError as e:\n", + " print(f\" Caught: {e}\")\n", + "\n", + "print(\"\\nObjectiveConfig validation: all checks passed.\")" + ] }, { "cell_type": "markdown", @@ -258,21 +343,136 @@ "id": "a0000012", "metadata": {}, "outputs": [], - "source": "# Candidates: (score_dict, payload) tuples\ncandidates = [\n ({\"accuracy\": 0.95, \"latency_s\": 0.200}, \"prompt_A\"),\n ({\"accuracy\": 0.70, \"latency_s\": 0.030}, \"prompt_B\"),\n ({\"accuracy\": 0.88, \"latency_s\": 0.080}, \"prompt_C\"),\n ({\"accuracy\": 0.60, \"latency_s\": 0.020}, \"prompt_D\"),\n]\n\nprint(\"Candidates:\")\nfor s, name in candidates:\n print(f\" {name}: {s}\")\n\n# Scalar mode with explicit config (dict scores require ObjectiveConfig)\nprint(\"\\n--- select_best(scalar, scalarize_dict='mean') ---\")\nconfig_scalar = ObjectiveConfig(mode=\"scalar\", scalarize_dict=\"mean\")\nidx = select_best(candidates, config_scalar)\nprint(f\" Winner: {candidates[idx][1]} (index {idx})\")\nprint(\" (Uses mean of dict values as scalar — explicit via scalarize_dict='mean')\")\n\n# Weighted: accuracy-heavy\nprint(\"\\n--- select_best(weighted, accuracy=0.8) ---\")\nconfig_acc = ObjectiveConfig(\n mode=\"weighted\",\n weights={\"accuracy\": 0.8, \"latency_s\": 0.2},\n minimize=frozenset({\"latency_s\"}),\n)\nidx = select_best(candidates, config_acc)\nprint(f\" Winner: {candidates[idx][1]} (index {idx})\")\n\n# Weighted: latency-heavy\nprint(\"\\n--- select_best(weighted, latency_s=0.8) ---\")\nconfig_lat = ObjectiveConfig(\n mode=\"weighted\",\n weights={\"accuracy\": 0.2, \"latency_s\": 0.8},\n minimize=frozenset({\"latency_s\"}),\n)\nidx = select_best(candidates, config_lat)\nprint(f\" Winner: {candidates[idx][1]} (index {idx})\")\n\n# Pareto mode\nprint(\"\\n--- select_best(pareto, tie_break=weighted) ---\")\nconfig_par = ObjectiveConfig(\n mode=\"pareto\",\n weights={\"accuracy\": 0.5, \"latency_s\": 0.5},\n minimize=frozenset({\"latency_s\"}),\n tie_break=\"weighted\",\n)\nscore_dicts_norm = [apply_minimize(to_score_dict(s), config_par.minimize) for s, _ in candidates]\nranks = pareto_rank(score_dicts_norm)\nprint(f\" Pareto ranks: {ranks}\")\nprint(f\" Front (rank 0): {[candidates[i][1] for i, r in enumerate(ranks) if r == 0]}\")\nidx = select_best(candidates, config_par)\nprint(f\" Winner (after tie-break): {candidates[idx][1]} (index {idx})\")\n\n# Deterministic check\nprint(\"\\n--- Determinism: 10 runs with same config ---\")\nresults = [select_best(candidates, config_par) for _ in range(10)]\nprint(f\" Results: {results}\")\nprint(f\" All identical: {len(set(results)) == 1}\")\n\n# Top-k\nprint(\"\\n--- select_top_k(pareto, k=2) ---\")\ntop2 = select_top_k(candidates, config_par, k=2)\nprint(f\" Top 2: {[candidates[i][1] for i in top2]}\")\n\n# Dict scores + config=None raises ValueError (no hidden reduction)\nprint(\"\\n--- Dict scores + config=None raises ValueError ---\")\ntry:\n select_best(candidates, None)\nexcept ValueError as e:\n print(f\" Caught: {e}\")\n print(\" (Pass explicit ObjectiveConfig to define dict→scalar reduction)\")" + "source": [ + "# Candidates: (score_dict, payload) tuples\n", + "candidates = [\n", + " ({\"accuracy\": 0.95, \"latency_s\": 0.200}, \"prompt_A\"),\n", + " ({\"accuracy\": 0.70, \"latency_s\": 0.030}, \"prompt_B\"),\n", + " ({\"accuracy\": 0.88, \"latency_s\": 0.080}, \"prompt_C\"),\n", + " ({\"accuracy\": 0.60, \"latency_s\": 0.020}, \"prompt_D\"),\n", + "]\n", + "\n", + "print(\"Candidates:\")\n", + "for s, name in candidates:\n", + " print(f\" {name}: {s}\")\n", + "\n", + "# Scalar mode with explicit config (dict scores require ObjectiveConfig)\n", + "print(\"\\n--- select_best(scalar, scalarize_dict='mean') ---\")\n", + "config_scalar = ObjectiveConfig(mode=\"scalar\", scalarize_dict=\"mean\")\n", + "idx = select_best(candidates, config_scalar)\n", + "print(f\" Winner: {candidates[idx][1]} (index {idx})\")\n", + "print(\" (Uses mean of dict values as scalar — explicit via scalarize_dict='mean')\")\n", + "\n", + "# Weighted: accuracy-heavy\n", + "print(\"\\n--- select_best(weighted, accuracy=0.8) ---\")\n", + "config_acc = ObjectiveConfig(\n", + " mode=\"weighted\",\n", + " weights={\"accuracy\": 0.8, \"latency_s\": 0.2},\n", + " minimize=frozenset({\"latency_s\"}),\n", + ")\n", + "idx = select_best(candidates, config_acc)\n", + "print(f\" Winner: {candidates[idx][1]} (index {idx})\")\n", + "\n", + "# Weighted: latency-heavy\n", + "print(\"\\n--- select_best(weighted, latency_s=0.8) ---\")\n", + "config_lat = ObjectiveConfig(\n", + " mode=\"weighted\",\n", + " weights={\"accuracy\": 0.2, \"latency_s\": 0.8},\n", + " minimize=frozenset({\"latency_s\"}),\n", + ")\n", + "idx = select_best(candidates, config_lat)\n", + "print(f\" Winner: {candidates[idx][1]} (index {idx})\")\n", + "\n", + "# Pareto mode\n", + "print(\"\\n--- select_best(pareto, tie_break=weighted) ---\")\n", + "config_par = ObjectiveConfig(\n", + " mode=\"pareto\",\n", + " weights={\"accuracy\": 0.5, \"latency_s\": 0.5},\n", + " minimize=frozenset({\"latency_s\"}),\n", + " tie_break=\"weighted\",\n", + ")\n", + "score_dicts_norm = [apply_minimize(to_score_dict(s), config_par.minimize) for s, _ in candidates]\n", + "ranks = pareto_rank(score_dicts_norm)\n", + "print(f\" Pareto ranks: {ranks}\")\n", + "print(f\" Front (rank 0): {[candidates[i][1] for i, r in enumerate(ranks) if r == 0]}\")\n", + "idx = select_best(candidates, config_par)\n", + "print(f\" Winner (after tie-break): {candidates[idx][1]} (index {idx})\")\n", + "\n", + "# Deterministic check\n", + "print(\"\\n--- Determinism: 10 runs with same config ---\")\n", + "results = [select_best(candidates, config_par) for _ in range(10)]\n", + "print(f\" Results: {results}\")\n", + "print(f\" All identical: {len(set(results)) == 1}\")\n", + "\n", + "# Top-k\n", + "print(\"\\n--- select_top_k(pareto, k=2) ---\")\n", + "top2 = select_top_k(candidates, config_par, k=2)\n", + "print(f\" Top 2: {[candidates[i][1] for i in top2]}\")\n", + "\n", + "# Dict scores + config=None raises ValueError (no hidden reduction)\n", + "print(\"\\n--- Dict scores + config=None raises ValueError ---\")\n", + "try:\n", + " select_best(candidates, None)\n", + "except ValueError as e:\n", + " print(f\" Caught: {e}\")\n", + " print(\" (Pass explicit ObjectiveConfig to define dict→scalar reduction)\")" + ] }, { "cell_type": "markdown", "id": "h7x96u4z4dn", - "source": "### A.4b Weight Sensitivity Demonstration\n\nTwo candidates with a genuine tradeoff: A has higher accuracy, B has higher brevity.\nChanging the weights should flip the winner.", - "metadata": {} + "metadata": {}, + "source": [ + "### A.4b Weight Sensitivity Demonstration\n", + "\n", + "Two candidates with a genuine tradeoff: A has higher accuracy, B has higher brevity.\n", + "Changing the weights should flip the winner." + ] }, { "cell_type": "code", + "execution_count": null, "id": "w0zvqaxrl98", - "source": "# Weight sensitivity: changing weights flips the winner\nfrom opto.trainer.objectives import ObjectiveConfig, select_best, weighted_scalarize\n\ncandidates = [\n ({\"accuracy\": 0.95, \"brevity\": 0.3}, \"candidate_A\"), # high accuracy, low brevity\n ({\"accuracy\": 0.70, \"brevity\": 0.9}, \"candidate_B\"), # low accuracy, high brevity\n]\n\nprint(\"Candidates:\")\nfor score, name in candidates:\n print(f\" {name}: {score}\")\n\n# Accuracy-heavy weights\nconfig_acc = ObjectiveConfig(mode=\"weighted\", weights={\"accuracy\": 0.9, \"brevity\": 0.1})\nwinner_acc = select_best(candidates, config_acc)\nscore_A_acc = weighted_scalarize(candidates[0][0], config_acc.weights)\nscore_B_acc = weighted_scalarize(candidates[1][0], config_acc.weights)\nprint(f\"\\n--- Accuracy-heavy (accuracy=0.9, brevity=0.1) ---\")\nprint(f\" A: 0.9*0.95 + 0.1*0.3 = {score_A_acc:.3f}\")\nprint(f\" B: 0.9*0.70 + 0.1*0.9 = {score_B_acc:.3f}\")\nprint(f\" Winner: {candidates[winner_acc][1]}\")\n\n# Brevity-heavy weights\nconfig_brev = ObjectiveConfig(mode=\"weighted\", weights={\"accuracy\": 0.1, \"brevity\": 0.9})\nwinner_brev = select_best(candidates, config_brev)\nscore_A_brev = weighted_scalarize(candidates[0][0], config_brev.weights)\nscore_B_brev = weighted_scalarize(candidates[1][0], config_brev.weights)\nprint(f\"\\n--- Brevity-heavy (accuracy=0.1, brevity=0.9) ---\")\nprint(f\" A: 0.1*0.95 + 0.9*0.3 = {score_A_brev:.3f}\")\nprint(f\" B: 0.1*0.70 + 0.9*0.9 = {score_B_brev:.3f}\")\nprint(f\" Winner: {candidates[winner_brev][1]}\")\n\n# Verify the flip\nassert winner_acc == 0, \"Accuracy-heavy should pick candidate_A\"\nassert winner_brev == 1, \"Brevity-heavy should pick candidate_B\"\nprint(f\"\\n✓ Weight sensitivity confirmed: accuracy-heavy → A, brevity-heavy → B\")", "metadata": {}, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Weight sensitivity: changing weights flips the winner\n", + "from opto.trainer.objectives import ObjectiveConfig, select_best, weighted_scalarize\n", + "\n", + "candidates = [\n", + " ({\"accuracy\": 0.95, \"brevity\": 0.3}, \"candidate_A\"), # high accuracy, low brevity\n", + " ({\"accuracy\": 0.70, \"brevity\": 0.9}, \"candidate_B\"), # low accuracy, high brevity\n", + "]\n", + "\n", + "print(\"Candidates:\")\n", + "for score, name in candidates:\n", + " print(f\" {name}: {score}\")\n", + "\n", + "# Accuracy-heavy weights\n", + "config_acc = ObjectiveConfig(mode=\"weighted\", weights={\"accuracy\": 0.9, \"brevity\": 0.1})\n", + "winner_acc = select_best(candidates, config_acc)\n", + "score_A_acc = weighted_scalarize(candidates[0][0], config_acc.weights)\n", + "score_B_acc = weighted_scalarize(candidates[1][0], config_acc.weights)\n", + "print(f\"\\n--- Accuracy-heavy (accuracy=0.9, brevity=0.1) ---\")\n", + "print(f\" A: 0.9*0.95 + 0.1*0.3 = {score_A_acc:.3f}\")\n", + "print(f\" B: 0.9*0.70 + 0.1*0.9 = {score_B_acc:.3f}\")\n", + "print(f\" Winner: {candidates[winner_acc][1]}\")\n", + "\n", + "# Brevity-heavy weights\n", + "config_brev = ObjectiveConfig(mode=\"weighted\", weights={\"accuracy\": 0.1, \"brevity\": 0.9})\n", + "winner_brev = select_best(candidates, config_brev)\n", + "score_A_brev = weighted_scalarize(candidates[0][0], config_brev.weights)\n", + "score_B_brev = weighted_scalarize(candidates[1][0], config_brev.weights)\n", + "print(f\"\\n--- Brevity-heavy (accuracy=0.1, brevity=0.9) ---\")\n", + "print(f\" A: 0.1*0.95 + 0.9*0.3 = {score_A_brev:.3f}\")\n", + "print(f\" B: 0.1*0.70 + 0.9*0.9 = {score_B_brev:.3f}\")\n", + "print(f\" Winner: {candidates[winner_brev][1]}\")\n", + "\n", + "# Verify the flip\n", + "assert winner_acc == 0, \"Accuracy-heavy should pick candidate_A\"\n", + "assert winner_brev == 1, \"Brevity-heavy should pick candidate_B\"\n", + "print(f\"\\n✓ Weight sensitivity confirmed: accuracy-heavy → A, brevity-heavy → B\")" + ] }, { "cell_type": "markdown", @@ -284,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "a0000014", "metadata": {}, "outputs": [ @@ -336,7 +536,7 @@ "source": [ "from opto.utils.llm import DummyLLM\n", "from opto.optimizers import OptoPrimeV2\n", - "from opto.trainer.algorithms.basic_algorithms import BasicSearchAlgorithm\n", + "from examples.trainers.basic_algorithms import BasicSearchAlgorithm\n", "\n", "# --- Dataset: simple Q&A ---\n", "dataset = dict(\n", diff --git a/examples/notebooks/multiobjective_trainers.ipynb b/examples/notebooks/multiobjective_trainers.ipynb index 5c97209b..abfde246 100644 --- a/examples/notebooks/multiobjective_trainers.ipynb +++ b/examples/notebooks/multiobjective_trainers.ipynb @@ -6,7 +6,44 @@ "id": "cell-setup", "metadata": {}, "outputs": [], - "source": "import os, sys\n\n# In Colab: clone and install from GitHub\n# Locally: add repo root to sys.path so opto is importable\ntry:\n import google.colab\n IN_COLAB = True\nexcept ImportError:\n IN_COLAB = False\n\nif IN_COLAB:\n %cd /content\n !rm -rf Trace # clean slate\n !git clone https://github.com/AgentOpt/OpenTrace.git Trace\n %cd Trace\n !git checkout experimental\n !pip install -e .\n !pip install cvxpy matplotlib pandas\n _repo_root = os.getcwd() # /content/Trace after %cd\nelse:\n # Local: ensure repo root is on sys.path\n _nb_dir = os.path.dirname(os.path.abspath(\"__file__\"))\n _repo_root = os.path.abspath(os.path.join(_nb_dir, \"..\", \"..\"))\n if _repo_root not in sys.path:\n sys.path.insert(0, _repo_root)\n import opto\n print(f\"Using local opto from: {os.path.dirname(opto.__file__)}\")\n\nprint(f\"Repo root: {_repo_root}\")\n\n# Verify cvxpy is available (required for SixHumpCamel SOS certificate)\ntry:\n import cvxpy\n print(f\"cvxpy {cvxpy.__version__} available\")\nexcept ImportError:\n raise ImportError(\"cvxpy is required: pip install cvxpy\")" + "source": [ + "import os, sys\n", + "\n", + "# In Colab: clone and install from GitHub\n", + "# Locally: add repo root to sys.path so opto is importable\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", + "\n", + "if IN_COLAB:\n", + " %cd /content\n", + " !rm -rf Trace # clean slate\n", + " !git clone https://github.com/AgentOpt/OpenTrace.git Trace\n", + " %cd Trace\n", + " !git checkout experimental\n", + " !pip install -e .\n", + " !pip install cvxpy matplotlib pandas\n", + " _repo_root = os.getcwd() # /content/Trace after %cd\n", + "else:\n", + " # Local: ensure repo root is on sys.path\n", + " _nb_dir = os.path.dirname(os.path.abspath(\"__file__\"))\n", + " _repo_root = os.path.abspath(os.path.join(_nb_dir, \"..\", \"..\"))\n", + " if _repo_root not in sys.path:\n", + " sys.path.insert(0, _repo_root)\n", + " import opto\n", + " print(f\"Using local opto from: {os.path.dirname(opto.__file__)}\")\n", + "\n", + "print(f\"Repo root: {_repo_root}\")\n", + "\n", + "# Verify cvxpy is available (required for SixHumpCamel SOS certificate)\n", + "try:\n", + " import cvxpy\n", + " print(f\"cvxpy {cvxpy.__version__} available\")\n", + "except ImportError:\n", + " raise ImportError(\"cvxpy is required: pip install cvxpy\")" + ] }, { "cell_type": "markdown", @@ -53,7 +90,30 @@ "id": "cell-imports", "metadata": {}, "outputs": [], - "source": "import re\nimport copy\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\nfrom typing import Tuple, Dict\n\nfrom opto import trace\nfrom opto.trainer.guide import Guide\nfrom opto.trainer.objectives import ObjectiveConfig\nfrom opto.utils.llm import DummyLLM\nfrom opto.optimizers import OptoPrimeV2\nfrom opto.trainer.algorithms.basic_algorithms import BasicSearchAlgorithm\nfrom opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm\nfrom opto.features.priority_search.priority_search import PrioritySearch\n\n# Single-item dataset used by all algorithms (SixHumpCamel ignores inputs/infos)\nDATASET = dict(inputs=[None], infos=[None])\n\nprint(\"=\" * 70)\nprint(\"T6 M2 — BeamsearchAlgorithm & PrioritySearch Multi-Objective\")\nprint(\"=\" * 70)" + "source": [ + "import re\n", + "import copy\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "from typing import Tuple, Dict\n", + "\n", + "from opto import trace\n", + "from opto.trainer.guide import Guide\n", + "from opto.trainer.objectives import ObjectiveConfig\n", + "from opto.utils.llm import DummyLLM\n", + "from opto.optimizers import OptoPrimeV2\n", + "from examples.trainers.basic_algorithms import BasicSearchAlgorithm\n", + "from examples.trainers.beamsearch_algorithm import BeamsearchAlgorithm\n", + "from opto.trainer.algorithms.priority_search import PrioritySearch\n", + "\n", + "# Single-item dataset used by all algorithms (SixHumpCamel ignores inputs/infos)\n", + "DATASET = dict(inputs=[None], infos=[None])\n", + "\n", + "print(\"=\" * 70)\n", + "print(\"T6 M2 — BeamsearchAlgorithm & PrioritySearch Multi-Objective\")\n", + "print(\"=\" * 70)" + ] }, { "cell_type": "markdown", @@ -79,7 +139,168 @@ "id": "cell-setup-env", "metadata": {}, "outputs": [], - "source": "# Import SixHumpCamel from Allen's example file\n_examples_dir = os.path.join(_repo_root, 'examples')\nif _examples_dir not in sys.path:\n sys.path.insert(0, _examples_dir)\nfrom multi_objective_convex_fn import SixHumpCamel\n\n\n# --- RewardGuide (defined here with correct copy import) ---\nclass RewardGuide(Guide):\n \"\"\"Multi-objective guide for convex function environments.\n\n Both get_feedback() and get_score_dict() evaluate on a deepcopy of\n the environment so that candidate scoring never advances the real env.\n The SixHumpCamel loss computation is stateless (only depends on x),\n so deepcopy is safe and avoids burning through the env's horizon\n during multi-candidate validation.\n \"\"\"\n\n def __init__(self, env):\n self.env = env\n\n def _score_on_copy(self, response):\n \"\"\"Evaluate response on a deepcopy — env state stays frozen.\"\"\"\n env_copy = copy.deepcopy(self.env)\n obs, reward, done, info = env_copy.step(str(response))\n return obs, reward, done, info\n\n def get_feedback(self, query, response, reference=None, **kwargs) -> Tuple[float, str]:\n obs, reward, done, info = self._score_on_copy(response)\n feedback = ((obs + \"\\n\\n\") if obs else \"\") + info.get(\"feedback\", \"\")\n return float(reward), feedback\n\n def get_score_dict(self, query, response, reference=None, **kwargs) -> Dict[str, float]:\n obs, reward, done, info = self._score_on_copy(response)\n base_loss = info.get(\"base_loss\")\n reg_loss = info.get(\"reg_loss\")\n if base_loss is None or reg_loss is None:\n base_loss = float(\"inf\")\n reg_loss = float(\"inf\")\n return {\"base_loss\": float(base_loss), \"reg_loss\": float(reg_loss)}\n\n\n# --- Agent: wraps a trace node that holds the x = [x1, x2] string ---\n@trace.model\nclass ConvexAgent:\n def __init__(self, initial_value):\n self.param = trace.node(\n initial_value, trainable=True,\n description=\"Input x into the hidden function to minimize y. Format: x = [x1, x2]\"\n )\n\n def forward(self, x):\n return self.param\n\n\n# --- DummyLLM callable: proposes x = [float, float] values ---\nclass ConvexLLMCallable:\n \"\"\"Returns cycling proposals spanning the SixHumpCamel landscape.\"\"\"\n\n PROPOSALS = [\n \"x = [0.09, -0.71]\", # very close to optimum 1\n \"x = [-0.09, 0.71]\", # very close to optimum 2\n \"x = [0.1, -0.7]\", # near optimum 1\n \"x = [-0.1, 0.7]\", # near optimum 2\n \"x = [0.5, -0.3]\", # moderate region\n \"x = [-0.5, 0.3]\", # moderate symmetric\n \"x = [0.2, -0.5]\", # exploring\n \"x = [-0.3, 0.6]\", # exploring\n \"x = [1.0, -1.0]\", # far from optima (high reg)\n \"x = [0.0, 0.0]\", # origin (zero loss)\n ]\n\n def __init__(self):\n self.idx = 0\n\n def __call__(self, messages, **kwargs):\n problem = messages[1][\"content\"]\n name = re.findall(r'', problem)\n name = name[0] if name else \"unknown\"\n value = self.PROPOSALS[self.idx % len(self.PROPOSALS)]\n self.idx += 1\n return (\n f\" Exploring the loss landscape. \\n\"\n f\"\\n\"\n f\" {name} \\n\"\n f\" {value} \\n\"\n f\"\"\n )\n\n\n# --- Post-training evaluation: get actual losses from the final parameter ---\ndef evaluate_final_losses(param_value):\n \"\"\"Evaluate a parameter string on a fresh SixHumpCamel env.\n\n Returns dict with base_loss, reg_loss, total_loss (all actual values).\n \"\"\"\n env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n env.reset(seed=42)\n x, stop = env.text_extract(str(param_value))\n if x is None:\n return {\"base_loss\": float(\"nan\"), \"reg_loss\": float(\"nan\"), \"total_loss\": float(\"nan\")}\n base, reg, total = env._eval_losses(x)\n return {\"base_loss\": float(base), \"reg_loss\": float(reg), \"total_loss\": float(total)}\n\n\n# --- Factory: create fresh env + agent + optimizer + guide per run ---\n\ndef make_basicsearch_run():\n env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n env.reset(seed=42)\n guide = RewardGuide(env)\n agent = ConvexAgent(\"x = [0.0, 0.0]\")\n llm = DummyLLM(ConvexLLMCallable())\n optimizer = OptoPrimeV2(agent.parameters(), llm=llm)\n algo = BasicSearchAlgorithm(agent, optimizer)\n return algo, guide, agent\n\n\ndef make_beamsearch_run():\n env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n env.reset(seed=42)\n guide = RewardGuide(env)\n agent = ConvexAgent(\"x = [0.0, 0.0]\")\n llm = DummyLLM(ConvexLLMCallable())\n optimizer = OptoPrimeV2(agent.parameters(), llm=llm)\n algo = BeamsearchAlgorithm(agent, optimizer)\n return algo, guide, agent\n\n\ndef make_priority_search_run():\n env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n env.reset(seed=42)\n guide = RewardGuide(env)\n agent = ConvexAgent(\"x = [0.0, 0.0]\")\n llm = DummyLLM(ConvexLLMCallable())\n optimizer = OptoPrimeV2(agent.parameters(), llm=llm)\n algo = PrioritySearch(agent, optimizer)\n return algo, guide, agent\n\n\n# Objective configs\nCONFIG_WEIGHTED = ObjectiveConfig(\n mode=\"weighted\",\n weights={\"base_loss\": 1.0, \"reg_loss\": 1.0},\n minimize=frozenset({\"base_loss\", \"reg_loss\"}),\n seed=0,\n)\n\nCONFIG_PARETO = ObjectiveConfig(\n mode=\"pareto\",\n weights={\"base_loss\": 0.7, \"reg_loss\": 0.3},\n minimize=frozenset({\"base_loss\", \"reg_loss\"}),\n tie_break=\"weighted\",\n seed=42,\n)\n\n# Results collector\nresults = {}\nprint(\"Setup complete. SixHumpCamel environment + DummyLLM ready.\")\nprint(f\"DummyLLM has {len(ConvexLLMCallable.PROPOSALS)} diverse proposals.\")" + "source": [ + "# Import SixHumpCamel from Allen's example file\n", + "_examples_dir = os.path.join(_repo_root, 'examples')\n", + "if _examples_dir not in sys.path:\n", + " sys.path.insert(0, _examples_dir)\n", + "from multi_objective_convex_fn import SixHumpCamel\n", + "\n", + "\n", + "# --- RewardGuide (defined here with correct copy import) ---\n", + "class RewardGuide(Guide):\n", + " \"\"\"Multi-objective guide for convex function environments.\n", + "\n", + " Both get_feedback() and get_score_dict() evaluate on a deepcopy of\n", + " the environment so that candidate scoring never advances the real env.\n", + " The SixHumpCamel loss computation is stateless (only depends on x),\n", + " so deepcopy is safe and avoids burning through the env's horizon\n", + " during multi-candidate validation.\n", + " \"\"\"\n", + "\n", + " def __init__(self, env):\n", + " self.env = env\n", + "\n", + " def _score_on_copy(self, response):\n", + " \"\"\"Evaluate response on a deepcopy — env state stays frozen.\"\"\"\n", + " env_copy = copy.deepcopy(self.env)\n", + " obs, reward, done, info = env_copy.step(str(response))\n", + " return obs, reward, done, info\n", + "\n", + " def get_feedback(self, query, response, reference=None, **kwargs) -> Tuple[float, str]:\n", + " obs, reward, done, info = self._score_on_copy(response)\n", + " feedback = ((obs + \"\\n\\n\") if obs else \"\") + info.get(\"feedback\", \"\")\n", + " return float(reward), feedback\n", + "\n", + " def get_score_dict(self, query, response, reference=None, **kwargs) -> Dict[str, float]:\n", + " obs, reward, done, info = self._score_on_copy(response)\n", + " base_loss = info.get(\"base_loss\")\n", + " reg_loss = info.get(\"reg_loss\")\n", + " if base_loss is None or reg_loss is None:\n", + " base_loss = float(\"inf\")\n", + " reg_loss = float(\"inf\")\n", + " return {\"base_loss\": float(base_loss), \"reg_loss\": float(reg_loss)}\n", + "\n", + "\n", + "# --- Agent: wraps a trace node that holds the x = [x1, x2] string ---\n", + "@trace.model\n", + "class ConvexAgent:\n", + " def __init__(self, initial_value):\n", + " self.param = trace.node(\n", + " initial_value, trainable=True,\n", + " description=\"Input x into the hidden function to minimize y. Format: x = [x1, x2]\"\n", + " )\n", + "\n", + " def forward(self, x):\n", + " return self.param\n", + "\n", + "\n", + "# --- DummyLLM callable: proposes x = [float, float] values ---\n", + "class ConvexLLMCallable:\n", + " \"\"\"Returns cycling proposals spanning the SixHumpCamel landscape.\"\"\"\n", + "\n", + " PROPOSALS = [\n", + " \"x = [0.09, -0.71]\", # very close to optimum 1\n", + " \"x = [-0.09, 0.71]\", # very close to optimum 2\n", + " \"x = [0.1, -0.7]\", # near optimum 1\n", + " \"x = [-0.1, 0.7]\", # near optimum 2\n", + " \"x = [0.5, -0.3]\", # moderate region\n", + " \"x = [-0.5, 0.3]\", # moderate symmetric\n", + " \"x = [0.2, -0.5]\", # exploring\n", + " \"x = [-0.3, 0.6]\", # exploring\n", + " \"x = [1.0, -1.0]\", # far from optima (high reg)\n", + " \"x = [0.0, 0.0]\", # origin (zero loss)\n", + " ]\n", + "\n", + " def __init__(self):\n", + " self.idx = 0\n", + "\n", + " def __call__(self, messages, **kwargs):\n", + " problem = messages[1][\"content\"]\n", + " name = re.findall(r'', problem)\n", + " name = name[0] if name else \"unknown\"\n", + " value = self.PROPOSALS[self.idx % len(self.PROPOSALS)]\n", + " self.idx += 1\n", + " return (\n", + " f\" Exploring the loss landscape. \\n\"\n", + " f\"\\n\"\n", + " f\" {name} \\n\"\n", + " f\" {value} \\n\"\n", + " f\"\"\n", + " )\n", + "\n", + "\n", + "# --- Post-training evaluation: get actual losses from the final parameter ---\n", + "def evaluate_final_losses(param_value):\n", + " \"\"\"Evaluate a parameter string on a fresh SixHumpCamel env.\n", + "\n", + " Returns dict with base_loss, reg_loss, total_loss (all actual values).\n", + " \"\"\"\n", + " env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n", + " env.reset(seed=42)\n", + " x, stop = env.text_extract(str(param_value))\n", + " if x is None:\n", + " return {\"base_loss\": float(\"nan\"), \"reg_loss\": float(\"nan\"), \"total_loss\": float(\"nan\")}\n", + " base, reg, total = env._eval_losses(x)\n", + " return {\"base_loss\": float(base), \"reg_loss\": float(reg), \"total_loss\": float(total)}\n", + "\n", + "\n", + "# --- Factory: create fresh env + agent + optimizer + guide per run ---\n", + "\n", + "def make_basicsearch_run():\n", + " env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n", + " env.reset(seed=42)\n", + " guide = RewardGuide(env)\n", + " agent = ConvexAgent(\"x = [0.0, 0.0]\")\n", + " llm = DummyLLM(ConvexLLMCallable())\n", + " optimizer = OptoPrimeV2(agent.parameters(), llm=llm)\n", + " algo = BasicSearchAlgorithm(agent, optimizer)\n", + " return algo, guide, agent\n", + "\n", + "\n", + "def make_beamsearch_run():\n", + " env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n", + " env.reset(seed=42)\n", + " guide = RewardGuide(env)\n", + " agent = ConvexAgent(\"x = [0.0, 0.0]\")\n", + " llm = DummyLLM(ConvexLLMCallable())\n", + " optimizer = OptoPrimeV2(agent.parameters(), llm=llm)\n", + " algo = BeamsearchAlgorithm(agent, optimizer)\n", + " return algo, guide, agent\n", + "\n", + "\n", + "def make_priority_search_run():\n", + " env = SixHumpCamel(horizon=200, norm_coef=1.0, seed=42)\n", + " env.reset(seed=42)\n", + " guide = RewardGuide(env)\n", + " agent = ConvexAgent(\"x = [0.0, 0.0]\")\n", + " llm = DummyLLM(ConvexLLMCallable())\n", + " optimizer = OptoPrimeV2(agent.parameters(), llm=llm)\n", + " algo = PrioritySearch(agent, optimizer)\n", + " return algo, guide, agent\n", + "\n", + "\n", + "# Objective configs\n", + "CONFIG_WEIGHTED = ObjectiveConfig(\n", + " mode=\"weighted\",\n", + " weights={\"base_loss\": 1.0, \"reg_loss\": 1.0},\n", + " minimize=frozenset({\"base_loss\", \"reg_loss\"}),\n", + " seed=0,\n", + ")\n", + "\n", + "CONFIG_PARETO = ObjectiveConfig(\n", + " mode=\"pareto\",\n", + " weights={\"base_loss\": 0.7, \"reg_loss\": 0.3},\n", + " minimize=frozenset({\"base_loss\", \"reg_loss\"}),\n", + " tie_break=\"weighted\",\n", + " seed=42,\n", + ")\n", + "\n", + "# Results collector\n", + "results = {}\n", + "print(\"Setup complete. SixHumpCamel environment + DummyLLM ready.\")\n", + "print(f\"DummyLLM has {len(ConvexLLMCallable.PROPOSALS)} diverse proposals.\")" + ] }, { "cell_type": "code", @@ -87,7 +308,76 @@ "id": "cell-basicsearch-runs", "metadata": {}, "outputs": [], - "source": "# =====================================================================\n# BasicSearch: scalar, weighted, pareto (M1 baseline for comparison)\n# num_epochs=5 gives 5 training steps (1-item dataset, batch_size=1)\n# =====================================================================\nBASIC_KWARGS = dict(\n train_dataset=DATASET,\n num_proposals=4,\n num_epochs=5,\n batch_size=1,\n num_threads=1,\n)\n\n# --- Scalar ---\nprint(\"=\" * 60)\nprint(\"BasicSearch: SCALAR mode (5 epochs)\")\nprint(\"=\" * 60)\nalgo_bs_scalar, guide_bs_scalar, agent_bs_scalar = make_basicsearch_run()\nscores_bs_scalar, test_bs_scalar = algo_bs_scalar.train(\n guide=guide_bs_scalar, objective_config=None, **BASIC_KWARGS\n)\neval_scalar = evaluate_final_losses(agent_bs_scalar.param.data)\nresults['BasicSearch/scalar'] = {\n 'val_scores': scores_bs_scalar,\n 'final_score': test_bs_scalar,\n 'eval_losses': eval_scalar,\n 'final_param': str(agent_bs_scalar.param.data),\n}\nprint(f\"\\nFinal param: {agent_bs_scalar.param.data}\")\nprint(f\"Validation scores ({len(scores_bs_scalar)} steps): {scores_bs_scalar}\")\nprint(f\"Evaluated losses: {eval_scalar}\")\n\n# --- Weighted ---\nprint(\"\\n\" + \"=\" * 60)\nprint(\"BasicSearch: WEIGHTED mode (5 epochs)\")\nprint(\"=\" * 60)\nalgo_bs_weighted, guide_bs_weighted, agent_bs_weighted = make_basicsearch_run()\nscores_bs_weighted, test_bs_weighted = algo_bs_weighted.train(\n guide=guide_bs_weighted, objective_config=CONFIG_WEIGHTED, **BASIC_KWARGS\n)\neval_weighted = evaluate_final_losses(agent_bs_weighted.param.data)\nresults['BasicSearch/weighted'] = {\n 'val_scores': scores_bs_weighted,\n 'final_score': test_bs_weighted,\n 'eval_losses': eval_weighted,\n 'final_param': str(agent_bs_weighted.param.data),\n}\nprint(f\"\\nFinal param: {agent_bs_weighted.param.data}\")\nprint(f\"Validation scores ({len(scores_bs_weighted)} steps): {scores_bs_weighted}\")\nprint(f\"Evaluated losses: {eval_weighted}\")\n\n# --- Pareto ---\nprint(\"\\n\" + \"=\" * 60)\nprint(\"BasicSearch: PARETO mode (5 epochs)\")\nprint(\"=\" * 60)\nalgo_bs_pareto, guide_bs_pareto, agent_bs_pareto = make_basicsearch_run()\nscores_bs_pareto, test_bs_pareto = algo_bs_pareto.train(\n guide=guide_bs_pareto, objective_config=CONFIG_PARETO, **BASIC_KWARGS\n)\neval_pareto = evaluate_final_losses(agent_bs_pareto.param.data)\nresults['BasicSearch/pareto'] = {\n 'val_scores': scores_bs_pareto,\n 'final_score': test_bs_pareto,\n 'eval_losses': eval_pareto,\n 'final_param': str(agent_bs_pareto.param.data),\n}\nprint(f\"\\nFinal param: {agent_bs_pareto.param.data}\")\nprint(f\"Validation scores ({len(scores_bs_pareto)} steps): {scores_bs_pareto}\")\nprint(f\"Evaluated losses: {eval_pareto}\")" + "source": [ + "# =====================================================================\n", + "# BasicSearch: scalar, weighted, pareto (M1 baseline for comparison)\n", + "# num_epochs=5 gives 5 training steps (1-item dataset, batch_size=1)\n", + "# =====================================================================\n", + "BASIC_KWARGS = dict(\n", + " train_dataset=DATASET,\n", + " num_proposals=4,\n", + " num_epochs=5,\n", + " batch_size=1,\n", + " num_threads=1,\n", + ")\n", + "\n", + "# --- Scalar ---\n", + "print(\"=\" * 60)\n", + "print(\"BasicSearch: SCALAR mode (5 epochs)\")\n", + "print(\"=\" * 60)\n", + "algo_bs_scalar, guide_bs_scalar, agent_bs_scalar = make_basicsearch_run()\n", + "scores_bs_scalar, test_bs_scalar = algo_bs_scalar.train(\n", + " guide=guide_bs_scalar, objective_config=None, **BASIC_KWARGS\n", + ")\n", + "eval_scalar = evaluate_final_losses(agent_bs_scalar.param.data)\n", + "results['BasicSearch/scalar'] = {\n", + " 'val_scores': scores_bs_scalar,\n", + " 'final_score': test_bs_scalar,\n", + " 'eval_losses': eval_scalar,\n", + " 'final_param': str(agent_bs_scalar.param.data),\n", + "}\n", + "print(f\"\\nFinal param: {agent_bs_scalar.param.data}\")\n", + "print(f\"Validation scores ({len(scores_bs_scalar)} steps): {scores_bs_scalar}\")\n", + "print(f\"Evaluated losses: {eval_scalar}\")\n", + "\n", + "# --- Weighted ---\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"BasicSearch: WEIGHTED mode (5 epochs)\")\n", + "print(\"=\" * 60)\n", + "algo_bs_weighted, guide_bs_weighted, agent_bs_weighted = make_basicsearch_run()\n", + "scores_bs_weighted, test_bs_weighted = algo_bs_weighted.train(\n", + " guide=guide_bs_weighted, objective_config=CONFIG_WEIGHTED, **BASIC_KWARGS\n", + ")\n", + "eval_weighted = evaluate_final_losses(agent_bs_weighted.param.data)\n", + "results['BasicSearch/weighted'] = {\n", + " 'val_scores': scores_bs_weighted,\n", + " 'final_score': test_bs_weighted,\n", + " 'eval_losses': eval_weighted,\n", + " 'final_param': str(agent_bs_weighted.param.data),\n", + "}\n", + "print(f\"\\nFinal param: {agent_bs_weighted.param.data}\")\n", + "print(f\"Validation scores ({len(scores_bs_weighted)} steps): {scores_bs_weighted}\")\n", + "print(f\"Evaluated losses: {eval_weighted}\")\n", + "\n", + "# --- Pareto ---\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"BasicSearch: PARETO mode (5 epochs)\")\n", + "print(\"=\" * 60)\n", + "algo_bs_pareto, guide_bs_pareto, agent_bs_pareto = make_basicsearch_run()\n", + "scores_bs_pareto, test_bs_pareto = algo_bs_pareto.train(\n", + " guide=guide_bs_pareto, objective_config=CONFIG_PARETO, **BASIC_KWARGS\n", + ")\n", + "eval_pareto = evaluate_final_losses(agent_bs_pareto.param.data)\n", + "results['BasicSearch/pareto'] = {\n", + " 'val_scores': scores_bs_pareto,\n", + " 'final_score': test_bs_pareto,\n", + " 'eval_losses': eval_pareto,\n", + " 'final_param': str(agent_bs_pareto.param.data),\n", + "}\n", + "print(f\"\\nFinal param: {agent_bs_pareto.param.data}\")\n", + "print(f\"Validation scores ({len(scores_bs_pareto)} steps): {scores_bs_pareto}\")\n", + "print(f\"Evaluated losses: {eval_pareto}\")" + ] }, { "cell_type": "code", @@ -95,7 +385,37 @@ "id": "cell-score-progression", "metadata": {}, "outputs": [], - "source": "# =====================================================================\n# Graph 1: Score Progression — BasicSearch validation scores across modes\n# With 5 epochs, each mode produces 5 data points showing optimization\n# =====================================================================\nfig, ax = plt.subplots(1, 1, figsize=(9, 5))\n\nfor label, marker, color in [\n ('BasicSearch/scalar', 'o-', '#1f77b4'),\n ('BasicSearch/weighted', 's-', '#ff7f0e'),\n ('BasicSearch/pareto', '^-', '#2ca02c'),\n]:\n data = results.get(label, {})\n val_scores = data.get('val_scores', [])\n if val_scores:\n steps = list(range(len(val_scores)))\n ax.plot(steps, val_scores, marker, color=color, label=label,\n linewidth=2, markersize=7)\n\nax.set_xlabel('Training Step', fontsize=12)\nax.set_ylabel('Validation Score (reward)', fontsize=12)\nax.set_title('BasicSearch Score Progression — SixHumpCamel (5 epochs)', fontsize=14)\nax.legend(fontsize=10)\nax.grid(True, alpha=0.3)\nplt.tight_layout()\nplt.show()\n\nprint(\"Score progression across 5 training epochs.\")\nprint(\"Higher reward = lower total loss (reward = -total_loss).\")\nprint(\"Different objective modes may select different proposals, producing different curves.\")" + "source": [ + "# =====================================================================\n", + "# Graph 1: Score Progression — BasicSearch validation scores across modes\n", + "# With 5 epochs, each mode produces 5 data points showing optimization\n", + "# =====================================================================\n", + "fig, ax = plt.subplots(1, 1, figsize=(9, 5))\n", + "\n", + "for label, marker, color in [\n", + " ('BasicSearch/scalar', 'o-', '#1f77b4'),\n", + " ('BasicSearch/weighted', 's-', '#ff7f0e'),\n", + " ('BasicSearch/pareto', '^-', '#2ca02c'),\n", + "]:\n", + " data = results.get(label, {})\n", + " val_scores = data.get('val_scores', [])\n", + " if val_scores:\n", + " steps = list(range(len(val_scores)))\n", + " ax.plot(steps, val_scores, marker, color=color, label=label,\n", + " linewidth=2, markersize=7)\n", + "\n", + "ax.set_xlabel('Training Step', fontsize=12)\n", + "ax.set_ylabel('Validation Score (reward)', fontsize=12)\n", + "ax.set_title('BasicSearch Score Progression — SixHumpCamel (5 epochs)', fontsize=14)\n", + "ax.legend(fontsize=10)\n", + "ax.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "print(\"Score progression across 5 training epochs.\")\n", + "print(\"Higher reward = lower total loss (reward = -total_loss).\")\n", + "print(\"Different objective modes may select different proposals, producing different curves.\")" + ] }, { "cell_type": "markdown", @@ -118,7 +438,46 @@ "id": "cell-beamsearch-run", "metadata": {}, "outputs": [], - "source": "# =====================================================================\n# BeamsearchAlgorithm: weighted mode\n# max_depth=2 gives 2 beam search levels with validation at each\n# =====================================================================\nprint(\"=\" * 60)\nprint(\"Beamsearch: WEIGHTED mode (depth=2, width=3)\")\nprint(\"=\" * 60)\n\nalgo_beam_w, guide_beam_w, agent_beam_w = make_beamsearch_run()\nmetrics_beam_w, final_beam_w = algo_beam_w.train(\n guide=guide_beam_w,\n train_dataset=DATASET,\n objective_config=CONFIG_WEIGHTED,\n beam_width=3,\n num_proposals=3,\n max_depth=2,\n batch_size=1,\n num_threads=1,\n)\n\n# Post-training evaluation on fresh env\neval_beam_w = evaluate_final_losses(agent_beam_w.param.data)\n\n# Extract score_dicts from selection\nbeam_score_dicts = getattr(algo_beam_w, '_last_selected_score_dicts', None)\nbeam_best_sd = beam_score_dicts[0] if beam_score_dicts else None\n\nresults['Beamsearch/weighted'] = {\n 'val_scores': metrics_beam_w.get('best_validation_scores', []),\n 'final_score': final_beam_w,\n 'eval_losses': eval_beam_w,\n 'final_param': str(agent_beam_w.param.data),\n}\n\nprint(f\"\\nFinal param: {agent_beam_w.param.data}\")\nprint(f\"Validation scores by depth: {metrics_beam_w.get('best_validation_scores', [])}\")\nprint(f\"_last_selected_score_dicts: {beam_score_dicts}\")\nprint(f\"Evaluated losses: {eval_beam_w}\")" + "source": [ + "# =====================================================================\n", + "# BeamsearchAlgorithm: weighted mode\n", + "# max_depth=2 gives 2 beam search levels with validation at each\n", + "# =====================================================================\n", + "print(\"=\" * 60)\n", + "print(\"Beamsearch: WEIGHTED mode (depth=2, width=3)\")\n", + "print(\"=\" * 60)\n", + "\n", + "algo_beam_w, guide_beam_w, agent_beam_w = make_beamsearch_run()\n", + "metrics_beam_w, final_beam_w = algo_beam_w.train(\n", + " guide=guide_beam_w,\n", + " train_dataset=DATASET,\n", + " objective_config=CONFIG_WEIGHTED,\n", + " beam_width=3,\n", + " num_proposals=3,\n", + " max_depth=2,\n", + " batch_size=1,\n", + " num_threads=1,\n", + ")\n", + "\n", + "# Post-training evaluation on fresh env\n", + "eval_beam_w = evaluate_final_losses(agent_beam_w.param.data)\n", + "\n", + "# Extract score_dicts from selection\n", + "beam_score_dicts = getattr(algo_beam_w, '_last_selected_score_dicts', None)\n", + "beam_best_sd = beam_score_dicts[0] if beam_score_dicts else None\n", + "\n", + "results['Beamsearch/weighted'] = {\n", + " 'val_scores': metrics_beam_w.get('best_validation_scores', []),\n", + " 'final_score': final_beam_w,\n", + " 'eval_losses': eval_beam_w,\n", + " 'final_param': str(agent_beam_w.param.data),\n", + "}\n", + "\n", + "print(f\"\\nFinal param: {agent_beam_w.param.data}\")\n", + "print(f\"Validation scores by depth: {metrics_beam_w.get('best_validation_scores', [])}\")\n", + "print(f\"_last_selected_score_dicts: {beam_score_dicts}\")\n", + "print(f\"Evaluated losses: {eval_beam_w}\")" + ] }, { "cell_type": "markdown", @@ -142,7 +501,58 @@ "id": "cell-priority-weighted", "metadata": {}, "outputs": [], - "source": "# =====================================================================\n# PrioritySearch: weighted mode (more epochs for real exploration)\n# =====================================================================\nprint(\"=\" * 60)\nprint(\"PrioritySearch: WEIGHTED mode (2 epochs, 2 batches)\")\nprint(\"=\" * 60)\n\nalgo_ps_w, guide_ps_w, agent_ps_w = make_priority_search_run()\nalgo_ps_w.train(\n guide=guide_ps_w,\n train_dataset=DATASET,\n objective_config=CONFIG_WEIGHTED,\n batch_size=1,\n num_batches=2,\n num_epochs=2,\n num_candidates=3,\n num_proposals=2,\n num_threads=1,\n long_term_memory_size=10,\n memory_update_frequency=0,\n verbose=False,\n)\n\n# Post-training evaluation on fresh env\neval_ps_w = evaluate_final_losses(agent_ps_w.param.data)\n\n# Extract best candidate from memory\nps_w_sd = None\nps_w_final = None\nif hasattr(algo_ps_w, 'long_term_memory') and algo_ps_w.long_term_memory:\n best_neg, best_cand = min(algo_ps_w.long_term_memory, key=lambda x: x[0])\n ps_w_sd = best_cand.mean_score_dict()\n ps_w_final = float(-best_neg)\n print(f\"\\nBest candidate priority: {ps_w_final:.4f}\")\n print(f\"Best candidate mean_score_dict: {ps_w_sd}\")\n has_sd = any('score_dict' in r and r['score_dict'] is not None\n for r in best_cand.rollouts)\n print(f\"Rollouts contain score_dict: {has_sd}\")\nelse:\n print(\"No candidates in long_term_memory\")\n\nprint(f\"Final param: {agent_ps_w.param.data}\")\nprint(f\"Evaluated losses: {eval_ps_w}\")\n\nresults['PrioritySearch/weighted'] = {\n 'val_scores': [],\n 'final_score': ps_w_final,\n 'eval_losses': eval_ps_w,\n 'final_param': str(agent_ps_w.param.data),\n}" + "source": [ + "# =====================================================================\n", + "# PrioritySearch: weighted mode (more epochs for real exploration)\n", + "# =====================================================================\n", + "print(\"=\" * 60)\n", + "print(\"PrioritySearch: WEIGHTED mode (2 epochs, 2 batches)\")\n", + "print(\"=\" * 60)\n", + "\n", + "algo_ps_w, guide_ps_w, agent_ps_w = make_priority_search_run()\n", + "algo_ps_w.train(\n", + " guide=guide_ps_w,\n", + " train_dataset=DATASET,\n", + " objective_config=CONFIG_WEIGHTED,\n", + " batch_size=1,\n", + " num_batches=2,\n", + " num_epochs=2,\n", + " num_candidates=3,\n", + " num_proposals=2,\n", + " num_threads=1,\n", + " long_term_memory_size=10,\n", + " memory_update_frequency=0,\n", + " verbose=False,\n", + ")\n", + "\n", + "# Post-training evaluation on fresh env\n", + "eval_ps_w = evaluate_final_losses(agent_ps_w.param.data)\n", + "\n", + "# Extract best candidate from memory\n", + "ps_w_sd = None\n", + "ps_w_final = None\n", + "if hasattr(algo_ps_w, 'long_term_memory') and algo_ps_w.long_term_memory:\n", + " best_neg, best_cand = min(algo_ps_w.long_term_memory, key=lambda x: x[0])\n", + " ps_w_sd = best_cand.mean_score_dict()\n", + " ps_w_final = float(-best_neg)\n", + " print(f\"\\nBest candidate priority: {ps_w_final:.4f}\")\n", + " print(f\"Best candidate mean_score_dict: {ps_w_sd}\")\n", + " has_sd = any('score_dict' in r and r['score_dict'] is not None\n", + " for r in best_cand.rollouts)\n", + " print(f\"Rollouts contain score_dict: {has_sd}\")\n", + "else:\n", + " print(\"No candidates in long_term_memory\")\n", + "\n", + "print(f\"Final param: {agent_ps_w.param.data}\")\n", + "print(f\"Evaluated losses: {eval_ps_w}\")\n", + "\n", + "results['PrioritySearch/weighted'] = {\n", + " 'val_scores': [],\n", + " 'final_score': ps_w_final,\n", + " 'eval_losses': eval_ps_w,\n", + " 'final_param': str(agent_ps_w.param.data),\n", + "}" + ] }, { "cell_type": "code", @@ -150,7 +560,66 @@ "id": "cell-priority-pareto", "metadata": {}, "outputs": [], - "source": "# =====================================================================\n# PrioritySearch: Pareto mode (uses ParetoHeapMemory)\n# =====================================================================\nprint(\"=\" * 60)\nprint(\"PrioritySearch: PARETO mode (2 epochs, 2 batches)\")\nprint(\"=\" * 60)\n\nalgo_ps_p, guide_ps_p, agent_ps_p = make_priority_search_run()\nalgo_ps_p.train(\n guide=guide_ps_p,\n train_dataset=DATASET,\n objective_config=CONFIG_PARETO,\n batch_size=1,\n num_batches=2,\n num_epochs=2,\n num_candidates=3,\n num_proposals=2,\n num_threads=1,\n long_term_memory_size=10,\n memory_update_frequency=0,\n verbose=False,\n)\n\n# Post-training evaluation on fresh env\neval_ps_p = evaluate_final_losses(agent_ps_p.param.data)\n\n# Extract best candidate from memory\nps_p_sd = None\nps_p_final = None\nif hasattr(algo_ps_p, 'long_term_memory') and algo_ps_p.long_term_memory:\n mem_type = type(algo_ps_p.long_term_memory).__name__\n print(f\"Memory type: {mem_type}\")\n\n best_neg_p, best_cand_p = min(algo_ps_p.long_term_memory, key=lambda x: x[0])\n ps_p_sd = best_cand_p.mean_score_dict()\n ps_p_final = float(-best_neg_p)\n print(f\"\\nBest candidate priority: {ps_p_final:.4f}\")\n print(f\"Best candidate mean_score_dict: {ps_p_sd}\")\n has_sd_p = any('score_dict' in r and r['score_dict'] is not None\n for r in best_cand_p.rollouts)\n print(f\"Rollouts contain score_dict: {has_sd_p}\")\n\n print(f\"\\nAll candidates in memory ({len(algo_ps_p.long_term_memory)}):\")\n for neg_p, cand in sorted(algo_ps_p.long_term_memory, key=lambda x: x[0]):\n sd = cand.mean_score_dict()\n print(f\" priority={-neg_p:.4f}, score_dict={sd}\")\nelse:\n print(\"No candidates in long_term_memory\")\n\nprint(f\"Final param: {agent_ps_p.param.data}\")\nprint(f\"Evaluated losses: {eval_ps_p}\")\n\nresults['PrioritySearch/pareto'] = {\n 'val_scores': [],\n 'final_score': ps_p_final,\n 'eval_losses': eval_ps_p,\n 'final_param': str(agent_ps_p.param.data),\n}" + "source": [ + "# =====================================================================\n", + "# PrioritySearch: Pareto mode (uses ParetoHeapMemory)\n", + "# =====================================================================\n", + "print(\"=\" * 60)\n", + "print(\"PrioritySearch: PARETO mode (2 epochs, 2 batches)\")\n", + "print(\"=\" * 60)\n", + "\n", + "algo_ps_p, guide_ps_p, agent_ps_p = make_priority_search_run()\n", + "algo_ps_p.train(\n", + " guide=guide_ps_p,\n", + " train_dataset=DATASET,\n", + " objective_config=CONFIG_PARETO,\n", + " batch_size=1,\n", + " num_batches=2,\n", + " num_epochs=2,\n", + " num_candidates=3,\n", + " num_proposals=2,\n", + " num_threads=1,\n", + " long_term_memory_size=10,\n", + " memory_update_frequency=0,\n", + " verbose=False,\n", + ")\n", + "\n", + "# Post-training evaluation on fresh env\n", + "eval_ps_p = evaluate_final_losses(agent_ps_p.param.data)\n", + "\n", + "# Extract best candidate from memory\n", + "ps_p_sd = None\n", + "ps_p_final = None\n", + "if hasattr(algo_ps_p, 'long_term_memory') and algo_ps_p.long_term_memory:\n", + " mem_type = type(algo_ps_p.long_term_memory).__name__\n", + " print(f\"Memory type: {mem_type}\")\n", + "\n", + " best_neg_p, best_cand_p = min(algo_ps_p.long_term_memory, key=lambda x: x[0])\n", + " ps_p_sd = best_cand_p.mean_score_dict()\n", + " ps_p_final = float(-best_neg_p)\n", + " print(f\"\\nBest candidate priority: {ps_p_final:.4f}\")\n", + " print(f\"Best candidate mean_score_dict: {ps_p_sd}\")\n", + " has_sd_p = any('score_dict' in r and r['score_dict'] is not None\n", + " for r in best_cand_p.rollouts)\n", + " print(f\"Rollouts contain score_dict: {has_sd_p}\")\n", + "\n", + " print(f\"\\nAll candidates in memory ({len(algo_ps_p.long_term_memory)}):\")\n", + " for neg_p, cand in sorted(algo_ps_p.long_term_memory, key=lambda x: x[0]):\n", + " sd = cand.mean_score_dict()\n", + " print(f\" priority={-neg_p:.4f}, score_dict={sd}\")\n", + "else:\n", + " print(\"No candidates in long_term_memory\")\n", + "\n", + "print(f\"Final param: {agent_ps_p.param.data}\")\n", + "print(f\"Evaluated losses: {eval_ps_p}\")\n", + "\n", + "results['PrioritySearch/pareto'] = {\n", + " 'val_scores': [],\n", + " 'final_score': ps_p_final,\n", + " 'eval_losses': eval_ps_p,\n", + " 'final_param': str(agent_ps_p.param.data),\n", + "}" + ] }, { "cell_type": "code", @@ -158,7 +627,51 @@ "id": "cell-scatter", "metadata": {}, "outputs": [], - "source": "# =====================================================================\n# Graph 2: Comparison Scatter — base_loss vs reg_loss\n# Each point = final parameter evaluated on a fresh SixHumpCamel env\n# =====================================================================\nfig, ax = plt.subplots(1, 1, figsize=(8, 6))\n\nmarkers = {\n 'BasicSearch': 'o',\n 'Beamsearch': 's',\n 'PrioritySearch': '^',\n}\ncolors = {\n 'scalar': '#1f77b4',\n 'weighted': '#ff7f0e',\n 'pareto': '#2ca02c',\n}\n\nfor run_name, run_data in results.items():\n el = run_data.get('eval_losses')\n if el is None or 'base_loss' not in el or 'reg_loss' not in el:\n continue\n # Skip NaN entries (e.g. if text_extract failed)\n if np.isnan(el['base_loss']) or np.isnan(el['reg_loss']):\n continue\n algo_name, mode_name = run_name.split('/')\n ax.scatter(\n el['base_loss'], el['reg_loss'],\n marker=markers.get(algo_name, 'x'),\n color=colors.get(mode_name, 'gray'),\n s=120, edgecolors='black', linewidths=0.8,\n label=run_name, zorder=5,\n )\n\nax.set_xlabel('base_loss (lower is better)', fontsize=12)\nax.set_ylabel('reg_loss (lower is better)', fontsize=12)\nax.set_title('Multi-Objective Comparison — base_loss vs reg_loss', fontsize=14)\nax.legend(fontsize=9, loc='upper right')\nax.grid(True, alpha=0.3)\nplt.tight_layout()\nplt.show()\n\nprint(\"Graph 2: Each point represents the final parameter evaluated on a fresh SixHumpCamel env.\")\nprint(\"Ideal candidates are in the bottom-left (low base_loss AND low reg_loss).\")" + "source": [ + "# =====================================================================\n", + "# Graph 2: Comparison Scatter — base_loss vs reg_loss\n", + "# Each point = final parameter evaluated on a fresh SixHumpCamel env\n", + "# =====================================================================\n", + "fig, ax = plt.subplots(1, 1, figsize=(8, 6))\n", + "\n", + "markers = {\n", + " 'BasicSearch': 'o',\n", + " 'Beamsearch': 's',\n", + " 'PrioritySearch': '^',\n", + "}\n", + "colors = {\n", + " 'scalar': '#1f77b4',\n", + " 'weighted': '#ff7f0e',\n", + " 'pareto': '#2ca02c',\n", + "}\n", + "\n", + "for run_name, run_data in results.items():\n", + " el = run_data.get('eval_losses')\n", + " if el is None or 'base_loss' not in el or 'reg_loss' not in el:\n", + " continue\n", + " # Skip NaN entries (e.g. if text_extract failed)\n", + " if np.isnan(el['base_loss']) or np.isnan(el['reg_loss']):\n", + " continue\n", + " algo_name, mode_name = run_name.split('/')\n", + " ax.scatter(\n", + " el['base_loss'], el['reg_loss'],\n", + " marker=markers.get(algo_name, 'x'),\n", + " color=colors.get(mode_name, 'gray'),\n", + " s=120, edgecolors='black', linewidths=0.8,\n", + " label=run_name, zorder=5,\n", + " )\n", + "\n", + "ax.set_xlabel('base_loss (lower is better)', fontsize=12)\n", + "ax.set_ylabel('reg_loss (lower is better)', fontsize=12)\n", + "ax.set_title('Multi-Objective Comparison — base_loss vs reg_loss', fontsize=14)\n", + "ax.legend(fontsize=9, loc='upper right')\n", + "ax.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "print(\"Graph 2: Each point represents the final parameter evaluated on a fresh SixHumpCamel env.\")\n", + "print(\"Ideal candidates are in the bottom-left (low base_loss AND low reg_loss).\")" + ] }, { "cell_type": "code", @@ -166,7 +679,53 @@ "id": "cell-summary", "metadata": {}, "outputs": [], - "source": "# =====================================================================\n# Summary Table\n# =====================================================================\nrows = []\nfor run_name, run_data in results.items():\n algo_name, mode_name = run_name.split('/')\n el = run_data.get('eval_losses')\n rows.append({\n 'Algorithm': algo_name,\n 'Mode': mode_name,\n 'Final Scalar Score': f\"{run_data.get('final_score', 'N/A')}\",\n 'base_loss': f\"{el['base_loss']:.4f}\" if el and 'base_loss' in el and not np.isnan(el['base_loss']) else 'N/A',\n 'reg_loss': f\"{el['reg_loss']:.4f}\" if el and 'reg_loss' in el and not np.isnan(el['reg_loss']) else 'N/A',\n 'total_loss': f\"{el['total_loss']:.4f}\" if el and 'total_loss' in el and not np.isnan(el['total_loss']) else 'N/A',\n 'Final Param': run_data.get('final_param', 'N/A'),\n })\n\ndf = pd.DataFrame(rows)\nprint(\"\\n\" + \"=\" * 70)\nprint(\"SUMMARY: Multi-Objective Training Results\")\nprint(\"=\" * 70)\nprint(df.to_string(index=False))\n\nprint(\"\\n\" + \"=\" * 70)\nprint(\"M2 NOTEBOOK COMPLETE\")\nprint(\"=\" * 70)\nprint(\"\"\"\nDeliverables verified:\n Part A (BasicSearch): scalar, weighted, Pareto modes on SixHumpCamel\n - Backward compatible (objective_config=None)\n - Weighted mode populates current_score_dict\n - Pareto mode selects from non-dominated front\n\n Part B (BeamsearchAlgorithm): weighted mode with vector select()\n - evaluate_vector() computes per-metric scores for beam candidates\n - select_top_k() ranks candidates via ObjectiveConfig\n - _last_selected_score_dicts populated for per-metric logging\n\n Part C (PrioritySearch): weighted + Pareto modes\n - validate() populates score_dict in rollouts\n - mean_score_dict() aggregates per-metric means\n - compute_exploration_priority() uses weighted scalarization\n - ParetoHeapMemory used in Pareto mode\n - Rollouts contain score_dict entries\n\"\"\")" + "source": [ + "# =====================================================================\n", + "# Summary Table\n", + "# =====================================================================\n", + "rows = []\n", + "for run_name, run_data in results.items():\n", + " algo_name, mode_name = run_name.split('/')\n", + " el = run_data.get('eval_losses')\n", + " rows.append({\n", + " 'Algorithm': algo_name,\n", + " 'Mode': mode_name,\n", + " 'Final Scalar Score': f\"{run_data.get('final_score', 'N/A')}\",\n", + " 'base_loss': f\"{el['base_loss']:.4f}\" if el and 'base_loss' in el and not np.isnan(el['base_loss']) else 'N/A',\n", + " 'reg_loss': f\"{el['reg_loss']:.4f}\" if el and 'reg_loss' in el and not np.isnan(el['reg_loss']) else 'N/A',\n", + " 'total_loss': f\"{el['total_loss']:.4f}\" if el and 'total_loss' in el and not np.isnan(el['total_loss']) else 'N/A',\n", + " 'Final Param': run_data.get('final_param', 'N/A'),\n", + " })\n", + "\n", + "df = pd.DataFrame(rows)\n", + "print(\"\\n\" + \"=\" * 70)\n", + "print(\"SUMMARY: Multi-Objective Training Results\")\n", + "print(\"=\" * 70)\n", + "print(df.to_string(index=False))\n", + "\n", + "print(\"\\n\" + \"=\" * 70)\n", + "print(\"M2 NOTEBOOK COMPLETE\")\n", + "print(\"=\" * 70)\n", + "print(\"\"\"\n", + "Deliverables verified:\n", + " Part A (BasicSearch): scalar, weighted, Pareto modes on SixHumpCamel\n", + " - Backward compatible (objective_config=None)\n", + " - Weighted mode populates current_score_dict\n", + " - Pareto mode selects from non-dominated front\n", + "\n", + " Part B (BeamsearchAlgorithm): weighted mode with vector select()\n", + " - evaluate_vector() computes per-metric scores for beam candidates\n", + " - select_top_k() ranks candidates via ObjectiveConfig\n", + " - _last_selected_score_dicts populated for per-metric logging\n", + "\n", + " Part C (PrioritySearch): weighted + Pareto modes\n", + " - validate() populates score_dict in rollouts\n", + " - mean_score_dict() aggregates per-metric means\n", + " - compute_exploration_priority() uses weighted scalarization\n", + " - ParetoHeapMemory used in Pareto mode\n", + " - Rollouts contain score_dict entries\n", + "\"\"\")" + ] } ], "metadata": { diff --git a/examples/priority_search_example.py b/examples/priority_search_example.py index b63e04a7..dfc6363a 100644 --- a/examples/priority_search_example.py +++ b/examples/priority_search_example.py @@ -4,7 +4,7 @@ from opto.utils.llm import LLM from opto.features.predefined_agents import BasicLearner from opto.optimizers import OptoPrimeV2 as OptoPrime -from opto.features.priority_search import PrioritySearch as SearchAlgorithm +from opto.trainer.algorithms import PrioritySearch as SearchAlgorithm from opto.trainer.loggers import TensorboardLogger from opto.trainer.guide import LLMJudge diff --git a/examples/priority_search_on_convex_fn.py b/examples/priority_search_on_convex_fn.py index 8122dde7..b207e676 100644 --- a/examples/priority_search_on_convex_fn.py +++ b/examples/priority_search_on_convex_fn.py @@ -212,7 +212,7 @@ def __init__(self, feedback=0, seed=None, horizon=10): from opto import trace from opto.utils.llm import LLM, LiteLLM from opto.optimizers import OptoPrimeV2 as OptoPrime -from opto.features.priority_search import PrioritySearch as SearchAlgorithm +from opto.trainer.algorithms import PrioritySearch as SearchAlgorithm from opto.trainer.guide import Guide from opto.trainer.loggers import TensorboardLogger from opto.trainer.guide import LLMJudge diff --git a/examples/priority_search_on_convex_fn_BENCH.py b/examples/priority_search_on_convex_fn_BENCH.py index 8f1a974e..1cd0ef3a 100644 --- a/examples/priority_search_on_convex_fn_BENCH.py +++ b/examples/priority_search_on_convex_fn_BENCH.py @@ -9,8 +9,8 @@ # ============ TESTING code ============= import numpy as np from opto import trace -from opto.features.priority_search import PrioritySearch as SearchAlgorithm -from opto.features.gepa.gepa_algorithms import GEPAAlgorithmBase, GEPAUCBSearch, GEPABeamPareto +from opto.trainer.algorithms import PrioritySearch as SearchAlgorithm +from examples.trainers.gepa_algorithms import GEPAAlgorithmBase, GEPAUCBSearch, GEPABeamPareto from typing import Any from opto import trainer from typing import Tuple diff --git a/examples/search_algo_example.py b/examples/search_algo_example.py index 5b04d11c..ea919c95 100644 --- a/examples/search_algo_example.py +++ b/examples/search_algo_example.py @@ -13,9 +13,9 @@ from opto.optimizers import OptoPrime from opto.optimizers.utils import print_color from opto.trace.modules import Module -from opto.trainer.algorithms.basic_algorithms import MinibatchAlgorithm, BasicSearchAlgorithm -from opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm -from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm +from examples.trainers.basic_algorithms import MinibatchAlgorithm, BasicSearchAlgorithm +from examples.trainers.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm +from examples.trainers.UCBsearch import UCBSearchAlgorithm from opto.trainer.guide import Guide from opto.trainer.loggers import DefaultLogger from opto.utils.llm import LLM diff --git a/opto/trainer/algorithms/UCBsearch.py b/examples/trainers/UCBsearch.py similarity index 99% rename from opto/trainer/algorithms/UCBsearch.py rename to examples/trainers/UCBsearch.py index 51e2b1a9..570d17a9 100644 --- a/opto/trainer/algorithms/UCBsearch.py +++ b/examples/trainers/UCBsearch.py @@ -6,7 +6,7 @@ from opto import trace from opto.trainer.utils import async_run # Assuming print_color is in utils from opto.optimizers.utils import print_color -from opto.trainer.algorithms.basic_algorithms import MinibatchAlgorithm, evaluate, batchify # evaluate and batchify might be useful +from examples.trainers.basic_algorithms import MinibatchAlgorithm, evaluate, batchify # evaluate and batchify might be useful class UCBSearchAlgorithm(MinibatchAlgorithm): """ diff --git a/examples/trainers/__init__.py b/examples/trainers/__init__.py new file mode 100644 index 00000000..72268d41 --- /dev/null +++ b/examples/trainers/__init__.py @@ -0,0 +1,4 @@ +from examples.trainers.basic_algorithms import Minibatch, MinibatchAlgorithm, BasicSearchAlgorithm +from examples.trainers.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm +from examples.trainers.UCBsearch import UCBSearchAlgorithm +from examples.trainers.gepa_algorithms import GEPAAlgorithmBase, GEPAUCBSearch, GEPABeamPareto diff --git a/opto/trainer/algorithms/basic_algorithms.py b/examples/trainers/basic_algorithms.py similarity index 97% rename from opto/trainer/algorithms/basic_algorithms.py rename to examples/trainers/basic_algorithms.py index 5edb3a3e..b57d0db4 100644 --- a/opto/trainer/algorithms/basic_algorithms.py +++ b/examples/trainers/basic_algorithms.py @@ -4,7 +4,7 @@ from opto import trace from opto.trainer.algorithms.algorithm import Trainer from opto.trainer.loader import DataLoader -from opto.trainer.utils import batch_run, async_run +from opto.trainer.utils import batch_run, async_run, batchify from opto.optimizers.utils import print_color from opto.trainer.evaluators import evaluate, evaluate_vector, aggregate_vector_scores from opto.trainer.objectives import ObjectiveConfig, select_best, apply_minimize, weighted_scalarize @@ -429,31 +429,6 @@ def update(self, outputs, verbose=False, num_threads=None, **kwargs): -@trace.bundle() -def batchify(*items): - """Concatenate multiple items into a formatted batch string. - - Parameters - ---------- - *items : Any - Variable number of items to concatenate into a batch. - - Returns - ------- - str - Formatted string with each item labeled by ID. - - Notes - ----- - This function is decorated with @trace.bundle() and creates a formatted - string where each item is prefixed with 'ID [i]:' for identification. - """ - output = '' - for i, item in enumerate(items): - output += f'ID {[i]}: {item}\n' - return output - - class MinibatchAlgorithm(Minibatch): """Standard minibatch algorithm that aggregates outputs for batch feedback. diff --git a/opto/trainer/algorithms/beamsearch_algorithm.py b/examples/trainers/beamsearch_algorithm.py similarity index 99% rename from opto/trainer/algorithms/beamsearch_algorithm.py rename to examples/trainers/beamsearch_algorithm.py index 43655f0c..52d5ea0f 100644 --- a/opto/trainer/algorithms/beamsearch_algorithm.py +++ b/examples/trainers/beamsearch_algorithm.py @@ -3,7 +3,7 @@ from typing import Union, List, Tuple, Dict, Any, Optional from opto.trainer.utils import async_run, batch_run from opto.optimizers.utils import print_color -from opto.trainer.algorithms.basic_algorithms import MinibatchAlgorithm, evaluate, batchify, _objective_scalar +from examples.trainers.basic_algorithms import MinibatchAlgorithm, evaluate, batchify, _objective_scalar from opto.trainer.evaluators import evaluate_vector, aggregate_vector_scores from opto.trainer.objectives import ObjectiveConfig, select_top_k, apply_minimize, weighted_scalarize diff --git a/opto/features/gepa/gepa_algorithms.py b/examples/trainers/gepa_algorithms.py similarity index 99% rename from opto/features/gepa/gepa_algorithms.py rename to examples/trainers/gepa_algorithms.py index 7494c0ca..a5d84a9f 100644 --- a/opto/features/gepa/gepa_algorithms.py +++ b/examples/trainers/gepa_algorithms.py @@ -17,10 +17,10 @@ from opto.optimizers.optoprime_v2 import OptoPrimeV2 from opto.trace.nodes import ParameterNode -from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm -from opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm +from examples.trainers.UCBsearch import UCBSearchAlgorithm +from examples.trainers.beamsearch_algorithm import BeamsearchAlgorithm from opto.trainer.algorithms.algorithm import Trainer as AlgorithmBase -from opto.trainer.algorithms.basic_algorithms import ( +from examples.trainers.basic_algorithms import ( evaluate, batchify, standard_optimization_step, diff --git a/opto/features/gepa/__init__.py b/opto/features/gepa/__init__.py deleted file mode 100644 index dd92a13c..00000000 --- a/opto/features/gepa/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""GEPA (Genetic Enhancement via Population Algorithm) implementations. - -This module contains experimental GEPA algorithms that extend basic optimization -with population-based genetic enhancement techniques. -""" - -from .gepa_algorithms import (GEPAAlgorithmBase, GEPAUCBSearch, GEPABeamPareto) - -__all__ = ['GEPAAlgorithmBase', 'GEPAUCBSearch', 'GEPABeamPareto'] \ No newline at end of file diff --git a/opto/features/priority_search/__init__.py b/opto/features/priority_search/__init__.py deleted file mode 100644 index 5ec28705..00000000 --- a/opto/features/priority_search/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from opto.features.priority_search.priority_search import PrioritySearch -from opto.features.priority_search.examples import SequentialUpdate, SequentialSearch, BeamSearch \ No newline at end of file diff --git a/opto/features/priority_search/module_regressor.py b/opto/features/priority_search/module_regressor.py deleted file mode 100644 index 818df096..00000000 --- a/opto/features/priority_search/module_regressor.py +++ /dev/null @@ -1,367 +0,0 @@ -import numpy as np -from opto.trainer.utils import batch_run, async_run -from opto.optimizers.utils import print_color -from typing import Union, List, Tuple, Dict, Any, Optional -from opto.utils.auto_retry import retry_with_exponential_backoff -import litellm -import time -from opto.features.priority_search.priority_search import ModuleCandidate - - -def embed_text(model, text): - """Call the embedding API for a given model and text string. - - This is a standalone function so users can easily replace it with a custom - embedding provider (e.g. local model, different API) without subclassing. - Must return a litellm-compatible response with response.data[0].embedding. - """ - return litellm.embedding(model=model, input=text) - - -class RegressorTemplate: - """Base class template for regression-based predictors for ModuleCandidate objects. - - Provides common functionality for embedding generation and candidate processing. - Subclasses should implement update() and predict_scores() methods. - - Regressors can be built on this template by implementing the update() and predict_scores() methods. - This class itself is enough for getting embeddings for candidates. - """ - - def __init__(self, embedding_model="gemini/gemini-embedding-001", num_threads=None, regularization_strength=1, linear_dim=None, rich_text=True,verbose: bool = False, max_candidates_to_predict=500,original_embedding_dim=768): - ''' - Args: - embedding_model: The embedding model to use. - num_threads: The number of threads to use for the embedding generation. - regularization_strength: The regularization strength for the logistic regression. - linear_dim: The dimension of the linear space. - rich_text: Whether to use rich text for the parameter text. - verbose: Whether to print the verbose output. - max_candidates_to_predict: The maximum number of candidates to predict. - original_embedding_dim: The original dimension of the embedding. - ''' - def _get_parameter_text(self, candidate): - """Get the parameter text for a ModuleCandidate.""" - if not hasattr(candidate, 'update_dict'): - print(candidate) - assert hasattr(candidate, 'update_dict'), "ModuleCandidate must have an update_dict" - # Convert parameter nodes to readable names for deterministic embedding - params_with_names = {k.py_name: v for k, v in candidate.update_dict.items()} - return str(params_with_names) - - - def _get_embedding(self, candidate,max_retries=10,base_delay=1.0): - """Get the embedding for a ModuleCandidate.""" - parameter_text = self._get_parameter_text(candidate) - - try: - response = retry_with_exponential_backoff( - lambda: embed_text(self.embedding_model, parameter_text), - max_retries=max_retries, - base_delay=base_delay, - operation_name="Embedding API call" - ) - embedding = response.data[0].embedding - if self.random_projector is not None: - embedding_array = np.array(embedding).reshape(1, -1) - projected = self.random_projector.transform(embedding_array) - embedding = projected.flatten().tolist() - return embedding - except Exception as e: - print_color(f"ERROR: Embedding API call failed after retries: {e}", "red") - return None - - def add_embeddings_to_candidates(self, candidates: List[ModuleCandidate]): - """Add embeddings to a list of candidates. This function could be used outside.""" - self._update_memory_embeddings_for_batch(candidates) - - def _update_memory_embeddings_for_batch(self, batch,max_workers=50,max_retries=10,base_delay=1.0): - """Update the embeddings for a batch of candidates.""" - # Separate candidates that need embeddings from those that already have them - candidates_needing_embeddings = [] - for candidate in batch: - if not hasattr(candidate, "embedding"): - candidates_needing_embeddings.append(candidate) - - # Generate embeddings in parallel for candidates that need them - if candidates_needing_embeddings: - def get_embedding_for_candidate(candidate): - return self._get_embedding(candidate) - - # Create function list for async_run - embedding_functions = [lambda c=candidate: get_embedding_for_candidate(c) - for candidate in candidates_needing_embeddings] - - # Run embedding generation in parallel - new_embeddings = async_run( - embedding_functions, - max_workers=max_workers, - description=f"Generating embeddings for {len(candidates_needing_embeddings)} candidates" - ) - - # Assign embeddings back to candidates - for candidate, embedding in zip(candidates_needing_embeddings, new_embeddings): - candidate.embedding = embedding - - def update(self, memory: List[Tuple[float, ModuleCandidate]]): - """Update the regression model parameters. Should be implemented by subclasses.""" - raise NotImplementedError("Subclasses must implement the update method") - - def predict_scores(self, memory: List[Tuple[float, ModuleCandidate]]): - """Predict scores for candidates. Should be implemented by subclasses.""" - raise NotImplementedError("Subclasses must implement the predict_scores method") - -class ModuleCandidateRegressor: - """ - Predict scores using embedding logistic regression for ModuleCandidate objects. - Should have two key methods: predict_scores and predict_scores_for_batch. - predict_scores has no parameters, it could return predicted scores for all candidates in the memory. - predict_scores_for_batch has one parameter, a batch of candidates, it could return predicted scores for the batch of candidates.""" - - def __init__(self, memory=None, embedding_model="gemini/text-embedding-004", num_threads=None, learning_rate=0.2, regularization_strength=1e-4, max_iterations=20000, tolerance=5e-3, max_candidates_to_predict=500, original_embedding_dim=768,patience=20,lr_decay_factor=0.8): - self.max_candidates_to_predict = max_candidates_to_predict - self.memory = memory - self.embedding_model = embedding_model - self.num_threads = num_threads - self.learning_rate = learning_rate - self.initial_learning_rate = learning_rate - self.regularization_strength = regularization_strength # L2 regularization strength (lambda) - self.max_iterations = max_iterations - self.tolerance = tolerance - self.patience = patience # Early stopping patience - self.lr_decay_factor = lr_decay_factor # Learning rate decay factor - self.linear_dim = original_embedding_dim - # Initialize weights with larger values for more aggressive learning - self.weights = np.random.normal(0, 0.1, self.linear_dim) - self.bias = 0.0 - - def _sigmoid(self, z): - """Sigmoid activation function for logistic regression.""" - return 1.0 / (1.0 + np.exp(-z)) - - def _get_parameter_text(self, candidate): - """Get the parameter text for a ModuleCandidate.""" - if not hasattr(candidate, 'update_dict'): - print(candidate) - assert hasattr(candidate, 'update_dict'), "ModuleCandidate must have an update_dict" - # Convert parameter nodes to readable names for deterministic embedding - params_with_names = {k.py_name: v for k, v in candidate.update_dict.items()} - return str(params_with_names) - - def _get_embedding(self, candidate,max_retries=10,base_delay=1.0): - """Get the embedding for a ModuleCandidate.""" - parameter_text = self._get_parameter_text(candidate) - - try: - response = retry_with_exponential_backoff( - lambda: embed_text(self.embedding_model, parameter_text), - max_retries=max_retries, - base_delay=base_delay, - operation_name="Embedding API call" - ) - embedding = response.data[0].embedding - return embedding - except Exception as e: - print_color(f"ERROR: Embedding API call failed after retries: {e}", "red") - print_color("Using random embedding as fallback", "yellow") - fallback_embedding = np.random.normal(0, 0.01, self.linear_dim) - return fallback_embedding / np.linalg.norm(fallback_embedding) - - def _update_memory_embeddings_for_batch(self, batch,max_workers=1000,max_retries=10,base_delay=1.0): - """Update the embeddings for a batch of candidates.""" - # Separate candidates that need embeddings from those that already have them - candidates_needing_embeddings = [] - for candidate in batch: - if not hasattr(candidate, "embedding"): - candidates_needing_embeddings.append(candidate) - - # Generate embeddings in parallel for candidates that need them - if candidates_needing_embeddings: - def get_embedding_for_candidate(candidate): - return self._get_embedding(candidate) - - # Create function list for async_run - embedding_functions = [lambda c=candidate: get_embedding_for_candidate(c) - for candidate in candidates_needing_embeddings] - - # Run embedding generation in parallel - new_embeddings = async_run( - embedding_functions, - max_workers=max_workers, - description=f"Generating embeddings for {len(candidates_needing_embeddings)} candidates" - ) - - # Assign embeddings back to candidates - for candidate, embedding in zip(candidates_needing_embeddings, new_embeddings): - candidate.embedding = embedding - - def update(self): - """Update the regression model parameters using the current memory with logistic regression.""" - start_time = time.time() - if self.verbose: - print_color("Updating regression model using the current memory with logistic regression...", "blue") - # Extract candidates from memory (memory contains (neg_score, candidate) tuples) - batch = [candidate for _, candidate in self.memory] - # Ensure all candidates have embeddings - self._update_memory_embeddings_for_batch(batch) - - # Get training data from memory (only candidates with rollout data) - training_candidates = [candidate for neg_score, candidate in self.memory if candidate.num_rollouts > 0 and candidate.mean_score() is not None] - - if len(training_candidates) == 0: - if self.verbose: - print_color("Warning: No training data available for regression model.", "yellow") - end_time = time.time() - elapsed_time = end_time - start_time - if self.verbose: - print_color(f"Regressor update completed in {elapsed_time:.4f} seconds (no training data)", "cyan") - return - - # Extract raw binary training data from each candidate - X_list = [] - y_list = [] - - for candidate in training_candidates: - embedding = candidate.embedding - eval_count = candidate.num_rollouts - mean_score = candidate.mean_score() - - if mean_score is None: - continue - - # Calculate score_sum from mean_score and eval_count - # Assuming scores are binary (0 or 1), score_sum = mean_score * eval_count - score_sum = mean_score * eval_count - - # score_sum directly represents the number of successes - num_successes = int(round(score_sum)) - num_failures = eval_count - num_successes - - # Ensure non-negative values - num_successes = max(0, num_successes) - num_failures = max(0, num_failures) - - # Create binary training samples: 1 for success, 0 for failure - for _ in range(num_successes): - X_list.append(embedding) - y_list.append(1.0) - - for _ in range(num_failures): - X_list.append(embedding) - y_list.append(0.0) - - if len(X_list) == 0: - print_color("Warning: No binary training samples generated.", "yellow") - end_time = time.time() - elapsed_time = end_time - start_time - if self.verbose: - print_color(f"Regressor update completed in {elapsed_time:.4f} seconds (no binary samples)", "cyan") - return - - # Convert to numpy arrays - X = np.array(X_list) - y = np.array(y_list) - - # Ensure X has the right dimensions - if X.shape[1] != self.linear_dim: - self.linear_dim = X.shape[1] - # Initialize weights with larger values for more aggressive learning - self.weights = np.random.normal(0, 0.1, self.linear_dim) - - # Convergence-based regularized logistic regression training using all raw binary data - m = len(X_list) - # Training loop until convergence with adaptive learning rate and early stopping - prev_cost = float('inf') - best_cost = float('inf') - converged = False - iteration = 0 - patience_counter = 0 - - # Reset learning rate - self.learning_rate = self.initial_learning_rate - - for iteration in range(self.max_iterations): - # Forward pass - z = X.dot(self.weights) + self.bias - predictions = self._sigmoid(z) - - # Compute cost with L2 regularization - epsilon = 1e-15 # Small value to prevent log(0) - predictions_clipped = np.clip(predictions, epsilon, 1 - epsilon) - log_likelihood = -np.mean(y * np.log(predictions_clipped) + (1 - y) * np.log(1 - predictions_clipped)) - l2_penalty = self.regularization_strength * np.sum(self.weights ** 2) - total_cost = log_likelihood + l2_penalty - - # Check for improvement and early stopping - cost_change = abs(prev_cost - total_cost) - if total_cost < best_cost: - best_cost = total_cost - patience_counter = 0 - else: - patience_counter += 1 - - # Backward pass (compute gradients with L2 regularization) - dw = (1/m) * X.T.dot(predictions - y) + 2 * self.regularization_strength * self.weights - db = (1/m) * np.sum(predictions - y) - gradient_norm = np.linalg.norm(dw) - - # Check convergence criteria (stricter) - if cost_change < self.tolerance and gradient_norm < self.tolerance: - converged = True - print_color(f"Converged at iteration {iteration + 1}: cost change {cost_change:.10f}, gradient norm {gradient_norm:.10f}", "green") - break - - # Early stopping if no improvement - if patience_counter >= self.patience: - print_color(f"Early stopping at iteration {iteration + 1}: no improvement for {self.patience} iterations", "yellow") - break - - # Adaptive learning rate: decay if no improvement for several iterations - if patience_counter > 0 and patience_counter % 10 == 0: - self.learning_rate *= self.lr_decay_factor - print_color(f"Reducing learning rate to {self.learning_rate:.6f}", "yellow") - - # Update parameters - self.weights -= self.learning_rate * dw - self.bias -= self.learning_rate * db - - prev_cost = total_cost - - # Final status - if converged: - print_color(f"Logistic regression converged after {iteration + 1} iterations. Final cost: {total_cost:.6f} (Log-likelihood: {log_likelihood:.6f}, L2 penalty: {l2_penalty:.6f}), bias: {self.bias:.6f}", "green") - else: - print_color(f"Logistic regression reached max iterations ({self.max_iterations}). Final cost: {total_cost:.6f} (Log-likelihood: {log_likelihood:.6f}, L2 penalty: {l2_penalty:.6f}), bias: {self.bias:.6f}", "yellow") - - # Print timing information - end_time = time.time() - elapsed_time = end_time - start_time - print_color(f"Regressor update completed in {elapsed_time:.4f} seconds", "cyan") - - def predict_scores(self,memory = None): - """Predict scores for all candidates in the memory.""" - # Extract all candidates from memory (memory is a list of (neg_score, candidate) tuples) - if memory is None: - memory = self.memory - batch = [candidate for _, candidate in memory] - - # Ensure all candidates have embeddings - self._update_memory_embeddings_for_batch(batch) - - # Collect all embeddings in order - embeddings = [] - for candidate in batch: - embeddings.append(candidate.embedding) - - - # Batch prediction using vectorized operations - X_batch = np.array(embeddings) - z = X_batch.dot(self.weights) + self.bias - predicted_scores = self._sigmoid(z) - - # Update each candidate with predicted score as attribute - for candidate, predicted_score in zip(batch, predicted_scores): - candidate.predicted_score = predicted_score - - return predicted_scores - \ No newline at end of file diff --git a/opto/features/priority_search/priority_search_with_regressor.py b/opto/features/priority_search/priority_search_with_regressor.py deleted file mode 100644 index 21574ed5..00000000 --- a/opto/features/priority_search/priority_search_with_regressor.py +++ /dev/null @@ -1,212 +0,0 @@ -import numpy as np -import copy -from typing import Union, List, Tuple, Dict, Any, Optional -from opto.features.priority_search.search_template import Samples, SearchTemplate, BatchRollout -from opto.features.priority_search.module_regressor import ModuleCandidateRegressor -from opto.features.priority_search.priority_search import PrioritySearch, ModuleCandidate, HeapMemory -import heapq - -class PrioritySearch_with_Regressor(PrioritySearch): - """ - A subclass of PrioritySearch that uses a regressor to predict the scores of the candidates. - """ - - def train(self, - guide, # guide to provide feedback - train_dataset, # dataset of (x, info) pairs to train the agent - *, - # validation - validate_dataset = None, # same format as train_dataset; if None, use the current batch. - validate_guide = None, # to provide scores for the validation set - # training loop - batch_size = 1, # batch size for updating the agent - num_batches = 1, # number of batches to use from the dataset in each iteration - score_range = None, # range of (min_score, max_score) to clip the scores; if None, no clipping is applied - num_epochs = 1, # number of training epochs (int or None) - num_steps = None, # number of training steps (int or None) - num_threads = None, # maximum number of threads to use - verbose = False, # whether to print the output of the agent - # evaluation - test_dataset = None, # dataset of (x, info) pairs to evaluate the agent - test_frequency: Union[int, None] = 1, # frequency of evaluation (set it to be negative to skip the first evaluation) - num_test_samples: int = 1, # number of times to evaluate each input; when greater than 1, the scores are averaged. - # logging - log_frequency = None, # frequency of logging - save_frequency: Union[int, None] = None, # frequency of saving the agent - save_path: str = "checkpoints/agent.pkl", # path to save the agent - # Priority Search specific parameters - num_candidates: int = 10, # number of candidates to propose for exploration - num_proposals: int = 1, # number of proposals to generate per optimizer - validate_exploration_candidates: bool = True, # whether to validate the proposed parameters for exploration - use_best_candidate_to_explore: bool = True, # whether to use the best candidate as part of the exploration candidates - memory_size: Optional[int] = None, # size of the long-term heap memory to store the candidates; if None, no limit is set - short_term_memory_size: Optional[int] = None, # size of the short-term memory to store the most recent candidates; if None, no limit is set - memory_update_frequency: Optional[int] = 0, # number of iterations to keep the candidates in the short-term memory before merging them into the long-term memory. 0 means only long-term memory is used. - score_function: str = 'mean', # function to compute the score for the candidates; 'mean' or 'ucb' - ucb_exploration_constant: float = 1.0, # exploration constant for UCB score function - # Regressor specific parameters - regressor_embedding_model: str = "gemini/text-embedding-004", # embedding model for the regressor - regressor_learning_rate: float = 0.2, # learning rate for the regressor - regressor_regularization_strength: float = 1e-4, # L2 regularization strength for the regressor - regressor_max_iterations: int = 20000, # maximum iterations for regressor training - regressor_tolerance: float = 5e-3, # convergence tolerance for the regressor - # Additional keyword arguments - **kwargs - ): - """ Train the agent using the Priority Search algorithm with regressor. - - This extends the parent PrioritySearch by adding a regressor that predicts - candidate scores based on the long-term memory. - - Args: - All parameters from the parent PrioritySearch.train() method, plus: - regressor_embedding_model (str, optional): Embedding model for the regressor. Defaults to "gemini/text-embedding-004". - regressor_learning_rate (float, optional): Learning rate for the regressor. Defaults to 0.2. - regressor_regularization_strength (float, optional): L2 regularization strength for the regressor. Defaults to 1e-4. - regressor_max_iterations (int, optional): Maximum iterations for regressor training. Defaults to 20000. - regressor_tolerance (float, optional): Convergence tolerance for the regressor. Defaults to 5e-3. - """ - - # Initialize the search parameters and memory - self._initialize_search_parameters( - num_candidates=num_candidates, - num_proposals=num_proposals, - validate_exploration_candidates=validate_exploration_candidates, - use_best_candidate_to_explore=use_best_candidate_to_explore, - score_function=score_function, - score_range=score_range, - ucb_exploration_constant=ucb_exploration_constant, - memory_size=memory_size, - short_term_memory_size=short_term_memory_size, - memory_update_frequency=memory_update_frequency - ) - - # Initialize the regressor with the long-term memory and custom parameters - this is the only difference from parent class - self.regressor = ModuleCandidateRegressor( - memory=self.long_term_memory, - embedding_model=regressor_embedding_model, - num_threads=num_threads, - learning_rate=regressor_learning_rate, - regularization_strength=regressor_regularization_strength, - max_iterations=regressor_max_iterations, - tolerance=regressor_tolerance - ) - - SearchTemplate.train(self, guide=guide, - train_dataset=train_dataset, - validate_dataset=validate_dataset, - validate_guide=validate_guide, - batch_size=batch_size, - num_batches=num_batches, - score_range=score_range, - num_epochs=num_epochs, - num_steps=num_steps, - num_threads=num_threads, - verbose=verbose, - test_dataset=test_dataset, - test_frequency=test_frequency, - num_test_samples=num_test_samples, - log_frequency=log_frequency, - save_frequency=save_frequency, - save_path=save_path, - **kwargs) - - def update(self, - samples: Union[Samples, None] = None, - verbose: bool = False, - **kwargs): #-> Tuple[Dict[ParameterNode, Any], List[trace.Module], Dict[str, Any]]: - """ Update the agent using the collected samples. - """ - - # samples is None in the first iteration - if samples is not None: - # 1. Propose new parameters based on running LLM optimizers on the collected samples - candidates = self.propose(samples, verbose=verbose, **kwargs) # List of ModuleCandidates - # 2. Validate the proposed parameters - validate_results = self.validate(candidates, samples, verbose=verbose, **kwargs) # this updates the priority queue - # 3. Update the priority queue with the validation results - self.update_memory(validate_results, verbose=verbose, **kwargs) # samples are provided here in case candidates do not capture full information - else: # The first iteration. - max_mem_size = self.memory.size if self.memory.size is not None else float('inf') - while len(self.memory) < min(max_mem_size, self.num_candidates): - self.memory.push(self.max_score, ModuleCandidate(self.agent, optimizer=self.optimizer)) # Push the base agent as the first candidate (This gives the initialization of the priority queue) - - - self.update_memory_with_regressor(verbose=verbose, **kwargs) - - # TODO Log information about the update - info_log = { - 'n_iters': self.n_iters, # number of iterations - 'short_term_memory_size': len(self.short_term_memory), # size of the short-term memory - 'long_term_memory_size': len(self.long_term_memory), # size of the long-term memory - 'using_short_term_memory': self.memory is self.short_term_memory, # whether the current memory is the short-term memory - 'using_long_term_memory': self.memory is self.long_term_memory, # whether the current memory is the long-term memory - } - # If using long-term memory, log the total number of samples in the long-term memory - if self.memory is self.long_term_memory: - total_samples = sum([candidate.num_rollouts for _, candidate in self.memory]) - info_log.update({'total_samples': total_samples}) - # 4. Explore and exploit the priority queue - self._best_candidate, self._best_candidate_priority, info_exploit = self.exploit(verbose=verbose, **kwargs) # get the best candidate (ModuleCandidate) from the priority queue - self._exploration_candidates, self._exploration_candidates_priority, info_explore = self.explore(verbose=verbose, **kwargs) # List of ModuleCandidates - info_log.update(info_exploit) # add the info from the exploit step - info_log.update(info_explore) # add the info from the explore step - return self._best_candidate.update_dict, [c.get_module() for c in self._exploration_candidates], info_log - - def validate(self, - candidates: List[ModuleCandidate], - samples: Samples, - verbose: bool = False, - **kwargs): - """ Override the validate method. In this version we only use training data to update arm statistics. No validation is performed. - """ - print("--- Validating candidates...") if verbose else None - assert isinstance(samples, Samples), "samples must be an instance of Samples." - exploration_candidates = self._exploration_candidates # exploration candidates from the previous iteration - assert self._exploration_candidates is not None, "exploration_candidates must be set before calling validate." - - # The current batch of samples can be used to validate the exploration candidates - validate_samples = copy.copy(samples) - matched_candidates_and_samples = self.match_candidates_and_samples(exploration_candidates, validate_samples.samples) - # Append new candidates with out rollouts to matched_candidates_and_samples - matched_candidates_and_samples.update({c: [] for c in candidates }) - results = {} # dict of ModuleCandidate id: (ModuleCandidate, list of rollouts) - for c, rollouts in matched_candidates_and_samples.items(): # rollouts is a list of BatchRollouts - results[c] = [ r for rr in rollouts for r in rr.to_list()] # we only need the list of dicts - - return results - - def update_memory(self, validate_results, verbose: bool = False, **kwargs): - """ Override the update_memory method. In this subclass, we update the priority of all candidates together. Cannot use the parent class's update_memory method, because now some candidates may not have predicted scores. - """ - print("--- Updating memory with validation results...") if verbose else None - for candidate, rollouts in validate_results.items(): - candidate.add_rollouts(rollouts) # add the rollouts to the - placeholder_priority = self.max_score - self.memory.push(placeholder_priority, candidate) - - def update_memory_with_regressor(self, verbose: bool = False, **kwargs): - """ Update the priority queue with the regressor results. - """ - print("--- Updating memory with regressor results...") if verbose else None - if self.memory is self.long_term_memory: # Only update the regressor if we are using the long-term memory - self.regressor.update() - self.regressor.predict_scores(self.memory) # The only difference from the parent class - # Reorder the memory according to the predicted scores - # Extract candidates from memory tuples and reorder by predicted scores - candidates_with_scores = [(-candidate.predicted_score, candidate) for _, candidate in self.memory] - self.memory.memory = candidates_with_scores # Update the internal list of HeapMemory - heapq.heapify(self.memory.memory) # Heapify the internal list - - def print_memory_stats(self): - # For debugging, print all candidates: number, mean_score(), num_rollouts, predicted_score. It is better to see an increasing trend in the predicted scores. - for i, (neg_predicted_score, candidate) in enumerate(self.memory): - print(f"Candidate {i}, Mean Score: {candidate.mean_score()}, Num Rollouts: {candidate.num_rollouts}, Predicted Score: {-neg_predicted_score}") - - # TODO refactor below to reuse scoring - def compute_exploitation_priority(self, candidate) -> float: - """ Compute the priority for the candidate based on the predicted score. """ - if not isinstance(candidate, ModuleCandidate): - raise TypeError("candidate must be an instance of ModuleCandidate.") - # By default, we compute the mean score of the rollouts - return candidate.predicted_score diff --git a/opto/features/priority_search/utils.py b/opto/features/priority_search/utils.py deleted file mode 100644 index c61c81c2..00000000 --- a/opto/features/priority_search/utils.py +++ /dev/null @@ -1,102 +0,0 @@ -import numpy as np -import copy -import heapq -from dataclasses import dataclass -from typing import Union, List, Tuple, Dict, Any, Optional -from opto import trace -from opto.trace.nodes import ParameterNode -from opto.trainer.utils import async_run, batch_run -from opto.optimizers.utils import print_color -from opto.trainer.algorithms.basic_algorithms import Minibatch, Trainer, batchify -from opto.trainer.loader import DataLoader - -# Some helper functions to convert between trace.Module and update_dict - -def get_original_name(node): - """Extract the original name from a node, removing all _copy suffixes.""" - py_name = node.py_name # This removes colons: "param:0" -> "param0" - - # Find the first occurrence of "_copy" and remove it and everything after - copy_index = py_name.find('_copy') - if copy_index != -1: - return py_name[:copy_index] - else: - return py_name - -def is_node_copy(a, b): - """Check if two nodes are copies of each other by comparing their original names. - - This function has transitivity: if A is a copy of B and B is a copy of C, - then A is also considered a copy of C. - """ - return get_original_name(a) == get_original_name(b) - -def is_module_copy(a, b): - """ Check if a and b (trace.Modules) are copies of each other. """ - parameters_a = a.parameters() # list of ParameterNode - parameters_b = b.parameters() # list of ParameterNode - # Check if all parameters of a are copies of b or vice versa - # This might over count - # need to check 1:1 correspondence - matched = [] - for p_a in parameters_a: - _matched = [] - for p_b in parameters_b: - _matched.append(is_node_copy(p_a, p_b)) - matched.append(_matched) - matched = np.array(matched) - if np.all(np.sum(matched, axis=1) == 1) and np.all(np.sum(matched, axis=0) == 1): - return True - return False - -def remap_update_dict(base_module, update_dict): - """ Remap the update dict to the agent's parameters. update_dict might have keys which are copies of the base_module's parameters or visa versa. - This function remaps the keys in update_dict to the original parameters of the base_module. - - The return dict is empty if no keys in update_dict matched any parameters of the base_module. This condition can be used to check if the update_dict contains non-trivial updates. - """ - parameters = base_module.parameters() # get the parameters of the base agent - remapped_update_dict = {} - for k, v in update_dict.items(): - for p in parameters: - if is_node_copy(k, p): # Check if k is a copy of p or p is a copy of k - remapped_update_dict[p] = v - break # stop checking once we've found a match - return remapped_update_dict - -def set_module_parameters(agent, update_dict): - """ Set the parameters of the agent based on the update_dict. - The update_dict is a dictionary of ParameterNode: value pairs. - The agent's parameters will be updated with the values from the update_dict. - """ - remapped_update_dict = remap_update_dict(agent, update_dict) # remap the update dict to the agent's parameters - for k, v in remapped_update_dict.items(): - k._data = v # set the parameter's data to the value in the update_dict - -def create_module_from_update_dict(agent, update_dict): - """ Create a new agent from the update_dict. - The update_dict is a dictionary of ParameterNode: value pairs. - A new agent will be created with the parameters set to the values from the update_dict. - """ - # new_agent = copy.deepcopy(agent) #.copy() # create a copy of the agent - new_agent = deepcopy_module(agent) # create a copy of the agent - set_module_parameters(new_agent, update_dict) # set the parameters of the new agent - return new_agent # return the new agent - - -def deepcopy_module(agent): - """ Create a deep copy of the agent, but reset the parameter names to remove the _copy suffixes. - - This is useful when we want to create a new agent for a new rollout, - but we want to keep the parameter names consistent with the original agent - so that the optimizer can recognize them across different rollouts. - - NOTE: This breaks the GRAPH's assumption on uniqueness of node names. Use with caution. - """ - new_agent = copy.deepcopy(agent) - for p_n in new_agent.parameters(): - for p_o in agent.parameters(): - if is_node_copy(p_n, p_o): - p_n._name = p_o._name # directly set the name to the original parameter's name - break - return new_agent diff --git a/opto/trainer/algorithms/__init__.py b/opto/trainer/algorithms/__init__.py index 09333a7f..3b3e3022 100644 --- a/opto/trainer/algorithms/__init__.py +++ b/opto/trainer/algorithms/__init__.py @@ -1,4 +1,5 @@ from opto.trainer.algorithms.algorithm import Trainer -from opto.trainer.algorithms.basic_algorithms import Minibatch, MinibatchAlgorithm, BasicSearchAlgorithm -from opto.trainer.algorithms.beamsearch_algorithm import BeamsearchAlgorithm, BeamsearchHistoryAlgorithm -from opto.trainer.algorithms.UCBsearch import UCBSearchAlgorithm +from opto.trainer.algorithms.priority_search import PrioritySearch +from opto.trainer.algorithms.polca import POLCA +from opto.trainer.algorithms.streaming_priority_search import StreamingPrioritySearch +from opto.trainer.algorithms.classical_algorithms import SequentialUpdate, SequentialSearch, BeamSearch, ParetobasedPS diff --git a/opto/trainer/algorithms/aggregator.py b/opto/trainer/algorithms/aggregator.py deleted file mode 100644 index 4f54cdd4..00000000 --- a/opto/trainer/algorithms/aggregator.py +++ /dev/null @@ -1,420 +0,0 @@ -import re -import copy -import json -import warnings -import numpy as np -from textwrap import dedent -from typing import Dict, List, Any, Union -from opto import trace -from opto.trace.nodes import ParameterNode -from opto.optimizers.utils import print_color -from opto.trainer.algorithms import Minibatch -from opto.trainer.algorithms.basic_algorithms import standard_optimization_step -from opto.utils.llm import LLM, AbstractModel - - -class AggregatedUpdate(Minibatch): - """Algorithm that applies optimizer to propose updates independently for minibatch instances. - - The updates are then aggregated using an LLM and applied to the agent. This approach - allows for intelligent consolidation of multiple parameter suggestions based on - confidence scores and common patterns. - - Parameters - ---------- - agent : trace.Module - The agent module to be trained and optimized. - optimizer : Optimizer - The optimizer instance used to generate parameter updates. - use_asyncio : bool, optional - Whether to use asyncio for parallel agent evaluation, by default True. - logger : Logger, optional - Logger instance for tracking training metrics, by default None. - llm : AbstractModel, optional - Language model instance for aggregating updates, by default None. - max_tokens : int, optional - Maximum tokens for aggregator LLM responses, by default 4096. - *args - Additional positional arguments passed to parent class. - **kwargs - Additional keyword arguments passed to parent class. - - Attributes - ---------- - llm : AbstractModel - Language model used for parameter update aggregation. - max_tokens : int - Token limit for aggregator responses. - stepsize : float - Step size for parameter updates, set during training. - aggregator_system_prompt : str - System prompt template for the aggregator LLM. - - Notes - ----- - The aggregation process uses confidence scores to weight suggestions, with the - current parameter values receiving a confidence of (1 - stepsize) and new - suggestions receiving a confidence of stepsize. - """ - - aggregator_system_prompt = f"""You are an expert in aggregating suggestions. You will see a list of suggestions of parameters from different people (denoted as #SuggestedValue_i). A parameter is represented as a dict, where the key is the name of a parameter component, and the value is the component value. - - Your task is to aggregate the suggestions and provide a new value for the parameter. Please consider the following: - 1. Make sure the new values in the dict is in the same format as the values in the dict of the suggested parameters. - 2. Provide a new value to consolidate the suggestions considering on their confidence scores. The suggestions can be wrong (especially the ones with low confidence). - 3. When aggregating, try to find the common ground between the suggestions. - - - Output_format: Your output should be in the following json format, satisfying the json syntax: - - {{ - "reasoning_": , - "reasoning_": , - "suggestion": {{ - : , - : , - }} - }} - - In "reasoning", explain the problem your thought process and how you arrive at the new value. - - In "suggestion", write down the suggested values. For each key in #CurrentValue, you should write the new value in the format of python code without syntax errors. If you don't want to change a variable, just write down its current value. - - If no changes or answer are needed, just output TERMINATE. - """ - - def __init__(self, - agent, - optimizer, - use_asyncio: bool = True, # whether to use asyncio to evaluate the agent - logger = None, - llm: AbstractModel = None, - max_tokens: int = 4096, - *args, - **kwargs, - ): - """Initialize the AggregatedUpdate algorithm. - - Parameters - ---------- - agent : trace.Module - The agent module to be trained. - optimizer : Optimizer - The optimizer for generating parameter updates. - use_asyncio : bool, optional - Whether to use asyncio for agent evaluation, by default True. - logger : Logger, optional - Logger for tracking metrics, by default None. - llm : AbstractModel, optional - Language model for aggregation, by default None (uses LLM()). - max_tokens : int, optional - Maximum tokens for aggregator responses, by default 4096. - *args - Additional positional arguments. - **kwargs - Additional keyword arguments. - """ - super().__init__(agent, optimizer, logger=logger, use_asyncio=use_asyncio, *args, **kwargs) - self.llm = llm or LLM() # for the aggregator - self.max_tokens = max_tokens # for the aggregator - - - def train(self, - guide, - train_dataset, - *, - stepsize = 0.5, # the stepsize for the update (used by the aggregator) - num_epochs: int = 1, # number of training epochs - batch_size: int = 1, # batch size for updating the agent - test_dataset = None, # dataset of (x, info) pairs to evaluate the agent - test_frequency: int = 1, # frequency of evaluation - log_frequency: Union[int, None] = None, # frequency of logging - min_score: Union[int, None] = None, # minimum score to update the agent - verbose: Union[bool, str] = False, # whether to print the output of the agent - **kwargs - ): - """Train the agent using aggregated parameter updates. - - Parameters - ---------- - guide : Guide - Guide function to provide feedback for training. - train_dataset : dict - Training dataset containing 'inputs' and 'infos' keys. - stepsize : float, optional - Step size for parameter updates (0-1), by default 0.5. - num_epochs : int, optional - Number of training epochs, by default 1. - batch_size : int, optional - Batch size for parameter updates, by default 1. - test_dataset : dict, optional - Test dataset for evaluation, by default None. - eval_frequency : int, optional - Frequency of evaluation, by default 1. - log_frequency : int, optional - Frequency of logging, by default None. - min_score : int, optional - Minimum score threshold for updates, by default None. - verbose : bool or str, optional - Verbosity level for output, by default False. - **kwargs - Additional training arguments. - - Raises - ------ - AssertionError - If stepsize is not between 0 and 1. - - Notes - ----- - The stepsize parameter controls the balance between current parameters - (confidence 1-stepsize) and new suggestions (confidence stepsize). - """ - - assert stepsize >= 0 and stepsize <= 1 - self.stepsize = stepsize # used in self.aggregate - - super().train(guide, train_dataset, num_epochs=num_epochs, batch_size=batch_size, - test_dataset=test_dataset, test_frequency=test_frequency, - log_frequency=log_frequency, min_score=min_score, - verbose=verbose, **kwargs) - - - def forward(self, agent, x, guide, info, verbose=False): - """Run agent forward pass and generate parameter updates for minibatch instance. - - Parameters - ---------- - agent : trace.Module - The agent module to run forward pass on. - x : Any - Input data for the agent. - guide : Guide - Guide function for generating feedback. - info : Any - Additional information for the guide. - verbose : bool, optional - Whether to print verbose output, by default False. - - Returns - ------- - tuple[dict, float] - Parameter update dictionary and score for the instance. - - Notes - ----- - This method runs a standard optimization step and generates parameter - updates using the optimizer's backward and step methods. - """ - target, score, feedback = standard_optimization_step(self.agent, x, guide, info, min_score=None) - self.optimizer.zero_feedback() - self.optimizer.backward(target, feedback) - update_dict = self.optimizer.step(verbose=verbose, bypassing=True) - return self.to_param_dict(update_dict), score - - def to_param_dict(self, update_dict): - """Convert parameter update dictionary from ParameterNode keys to string keys. - - Parameters - ---------- - update_dict : dict[ParameterNode, Any] - Update dictionary with ParameterNode keys. - - Returns - ------- - dict[str, Any] - Update dictionary with string keys (py_name of ParameterNode). - """ - return {k.py_name: v for k, v in update_dict.items()} - - def update(self, outputs, verbose=False): - """Aggregate parameter update suggestions using LLM and apply to agent. - - Parameters - ---------- - outputs : list[tuple[dict, float]] - List of (parameter_updates, score) tuples from minibatch forward passes. - verbose : bool, optional - Whether to print verbose aggregation output, by default False. - - Returns - ------- - float or None - Average score across the minibatch instances, or None if no valid scores. - - Notes - ----- - This method constructs a prompt with current parameters and suggested updates, - asks the LLM aggregator to consolidate them, and applies the aggregated update - to the agent parameters. - """ - - # Prepare the new parameters and scores - new_parameters = [] - scores = [] - for update_dict, score in outputs: - new_parameters.append(update_dict) - scores.append(score) - - average_score = np.mean(scores) if all([s is not None for s in scores]) else None - - # Construct user prompt - p0 = {n.py_name: n.data for n in self.optimizer.parameters} # the current parameters - user_prompt = f'#SuggestedValue_0 (confidence {1-self.stepsize}):\n{p0}\n\n' - for i, p in enumerate(new_parameters): - # Fill in the missing keys - for k, v in p0.items(): - if k not in p: - p[k] = v - user_prompt += f"#SuggestedValue_{i+1} (confidence {self.stepsize}):\n{p}\n\n\n" - - messages = [ - {"role": "system", "content": self.aggregator_system_prompt}, - {"role": "user", "content": user_prompt}, - ] - - response = self.llm( - messages=messages, - response_format={"type": "json_object"}, - max_tokens=self.max_tokens, - ) - response = response.choices[0].message.content - - if verbose: - if verbose is True: - print("Aggregator User Prompt:") - print(user_prompt) - print("Aggregator Response:") - print_color(response, 'blue') - - update_dict = construct_update_dict(self.optimizer.parameters, extract_llm_suggestion(response)) - self.optimizer.update(update_dict) - - return average_score - - - -# These two helper functions are extracted from OptoPrime -def construct_update_dict( - parameters: List[ParameterNode], suggestion: Dict[str, Any], ignore_extraction_error: bool = True - ) -> Dict[ParameterNode, Any]: - """Convert LLM suggestion dictionary into typed parameter update dictionary. - - Parameters - ---------- - parameters : List[ParameterNode] - List of trainable parameter nodes in the agent. - suggestion : Dict[str, Any] - Dictionary of suggested parameter values from LLM. - ignore_extraction_error : bool, optional - Whether to ignore type conversion errors, by default True. - - Returns - ------- - Dict[ParameterNode, Any] - Dictionary mapping parameter nodes to their suggested values. - - Raises - ------ - ValueError - If type conversion fails and ignore_extraction_error is False. - KeyError - If parameter key is missing and ignore_extraction_error is False. - - Notes - ----- - This function attempts to convert string suggestions to the appropriate - data types based on the current parameter values. Type conversion errors - are either ignored (with warning) or raised based on the flag. - """ - # TODO: might need some automatic type conversion - update_dict = {} - for node in parameters: - if node.trainable and node.py_name in suggestion: - try: - update_dict[node] = type(node.data)(suggestion[node.py_name]) - except (ValueError, KeyError) as e: - # catch error due to suggestion missing the key or wrong data type - if ignore_extraction_error: - warnings.warn( - f"Cannot convert the suggestion '{suggestion[node.py_name]}' for {node.py_name} to the right data type" - ) - else: - raise e - return update_dict - - -def extract_llm_suggestion(response: str, ignore_extraction_error: bool = True) -> Dict[str, Any]: - """Extract parameter suggestions from LLM response text. - - Parameters - ---------- - response : str - Raw response text from the LLM aggregator. - ignore_extraction_error : bool, optional - Whether to ignore JSON parsing and extraction errors, by default True. - - Returns - ------- - Dict[str, Any] - Dictionary of extracted parameter suggestions. - - Notes - ----- - This function attempts multiple parsing strategies: - 1. JSON parsing of the full response - 2. Regex extraction of JSON content within braces - 3. Manual key-value pair extraction using regex patterns - - Empty code suggestions (parameters ending with "__code") are automatically - removed from the final result. - """ - suggestion = {} - attempt_n = 0 - while attempt_n < 2: - try: - suggestion = json.loads(response)["suggestion"] - break - except json.JSONDecodeError: - # Remove things outside the brackets - response = re.findall(r"{.*}", response, re.DOTALL) - if len(response) > 0: - response = response[0] - attempt_n += 1 - except Exception: - attempt_n += 1 - - if not isinstance(suggestion, dict): - suggestion = {} - - if len(suggestion) == 0: - # we try to extract key/value separately and return it as a dictionary - pattern = r'"suggestion"\s*:\s*\{(.*?)\}' - suggestion_match = re.search(pattern, str(response), re.DOTALL) - if suggestion_match: - suggestion = {} - # Extract the entire content of the suggestion dictionary - suggestion_content = suggestion_match.group(1) - # Regex to extract each key-value pair; - # This scheme assumes double quotes but is robust to missing commas at the end of the line - pair_pattern = r'"([a-zA-Z0-9_]+)"\s*:\s*"(.*)"' - # Find all matches of key-value pairs - pairs = re.findall(pair_pattern, suggestion_content, re.DOTALL) - for key, value in pairs: - suggestion[key] = value - - if len(suggestion) == 0: - if not ignore_extraction_error: - print("Cannot extract suggestion from LLM's response:") - print(response) - - # if the suggested value is a code, and the entire code body is empty (i.e., not even function signature is present) - # then we remove such suggestion - keys_to_remove = [] - for key, value in suggestion.items(): - if "__code" in key and value == "": - keys_to_remove.append(key) - for key in keys_to_remove: - del suggestion[key] - - return suggestion \ No newline at end of file diff --git a/opto/features/priority_search/examples.py b/opto/trainer/algorithms/classical_algorithms.py similarity index 61% rename from opto/features/priority_search/examples.py rename to opto/trainer/algorithms/classical_algorithms.py index 281b85aa..0816bba3 100644 --- a/opto/features/priority_search/examples.py +++ b/opto/trainer/algorithms/classical_algorithms.py @@ -1,7 +1,12 @@ -from opto.features.priority_search import PrioritySearch +import copy +import heapq from typing import Union, Optional +from opto.optimizers.utils import print_color +from opto.trainer.algorithms.priority_search import PrioritySearch, ModuleCandidate +from opto.trainer.utils import safe_mean + # Below we define several algorithms that use the PrioritySearch class. @@ -212,3 +217,159 @@ def train(self, default_score=default_score, validate_proposals=validate_proposals, memory_size=memory_size, **kwargs) + +class ParetobasedPS(PrioritySearch): + """GEPA-style Pareto-based exploration on top of the PrioritySearch pipeline. + + Instead of popping the top candidates by a scalar priority (the default + PrioritySearch behavior), this algorithm selects exploration candidates + via the Pareto frontier of per-task scores: + + 1. For every training input x, find the candidate(s) with the highest + empirical mean score on x (the "best set" for x). + 2. Collect all candidates that appear in at least one such best set. + 3. Remove strictly dominated candidates: candidate ``a`` strictly + dominates ``b`` iff the set of tasks on which ``a`` is best is a + proper superset of the set of tasks on which ``b`` is best. + 4. Return the remaining (Pareto-optimal) candidates, truncated to + ``num_candidates`` (sorted by overall mean score as a tie-breaker). + + Notes + ----- + * To compute per-task scores we need the original ``x`` of each rollout, + so ``compress_candidate_memory`` is overridden to keep ``x`` and + ``score`` (instead of only ``score`` / ``score_dict`` as in the base + class). + * Scalar priorities are still pushed into the priority queue by + ``update_memory`` (via ``compute_exploration_priority``), so exploit + still works. Only ``explore`` is replaced with the Pareto selection. + """ + + def compress_candidate_memory(self, candidate: ModuleCandidate) -> ModuleCandidate: + """Keep ``x`` and ``score`` per rollout. Needed because Pareto selection groups rollouts by task ``x``; the parent class would drop it.""" + def _process_rollout(rollout): + for k in rollout: + if k not in ['x', 'score']: + rollout[k] = None + candidate = copy.copy(candidate) + candidate.rollouts = copy.deepcopy(candidate.rollouts) + for rollout in candidate.rollouts: + _process_rollout(rollout) + return candidate + + def compute_score_for_task_x(self, candidate: ModuleCandidate, x) -> float: + """Empirical mean score of ``candidate`` on task ``x`` (0 if unseen).""" + scores = [r['score'] for r in candidate.rollouts if r.get('x') == x] + return safe_mean(scores, missing_value=0) + + def get_best_candidates_for_x(self, x, candidates): + """Return the subset of ``candidates`` with the max score on task ``x``.""" + if not candidates: + return [] + scores = [self.compute_score_for_task_x(c, x) for c in candidates] + highest = max(scores) + return [c for c, s in zip(candidates, scores) if s == highest] + + def explore(self, verbose: bool = False, **kwargs): + """Select exploration candidates from the Pareto frontier. + + Returns + ------- + top_candidates : list[ModuleCandidate] + The Pareto-frontier candidates, truncated to ``self.num_candidates``. + priorities : list[float] + Priorities associated with the selected candidates (as stored in + the heap memory, for logging). + info_dict : dict + Logging info analogous to ``PrioritySearch.explore``. + """ + print_color("Using Pareto-based exploration to explore the parameter space...", "green") + + # Gather all candidates currently in memory. + all_candidates = [c for _, c in self.memory.memory] + if not all_candidates: + return [], [], { + 'num_exploration_candidates': 0, + 'exploration_candidates_mean_priority': None, + 'exploration_candidates_mean_score': None, + 'exploration_candidates_average_num_rollouts': None, + } + + # Training inputs (deduplicated in a stable way). + raw_xs = list(self.train_sampler.dataset['inputs']) + seen, xs = set(), [] + for x in raw_xs: + try: + key = x + if key in seen: + continue + seen.add(key) + except TypeError: + # unhashable x: keep all occurrences; semantics unchanged + pass + xs.append(x) + + # Best candidates per task. + best_for_x = {i: self.get_best_candidates_for_x(x, all_candidates) + for i, x in enumerate(xs)} + + # Candidates that are best on at least one task. + frontier_pool = list({id(c): c for cs in best_for_x.values() for c in cs}.values()) + + # Strict Pareto dominance on task-index sets. + def tasks_where_best(c): + return frozenset(i for i, cs in best_for_x.items() if c in cs) + + tasks_of = {id(c): tasks_where_best(c) for c in frontier_pool} + + non_dominated = [] + for b in frontier_pool: + tb = tasks_of[id(b)] + dominated = False + for a in frontier_pool: + if a is b: + continue + ta = tasks_of[id(a)] + # Strict superset: ta ⊋ tb (a is best everywhere b is best, and strictly more) + if tb.issubset(ta) and ta != tb: + dominated = True + break + if not dominated: + non_dominated.append(b) + + self.logger.log('Update/num_pareto_candidates', + len(non_dominated), self.n_iters, color='green') + print_color( + f"Pareto frontier size: {len(non_dominated)} / {len(frontier_pool)} " + f"(taking up to {self.num_candidates} for exploration).", "green") + + # Truncate to num_candidates (break ties by mean score, descending). + non_dominated.sort( + key=lambda c: c.mean_score() if c.mean_score() is not None else 0.0, + reverse=True, + ) + top_candidates = non_dominated[:self.num_candidates] + + # Remove selected candidates from the heap memory and re-heapify. + selected_ids = {id(c) for c in top_candidates} + priorities = [] + items_to_remove = [] + for neg_priority, candidate in self.memory.memory: + if id(candidate) in selected_ids: + priorities.append(-neg_priority) + items_to_remove.append((neg_priority, candidate)) + for item in items_to_remove: + self.memory.memory.remove(item) + heapq.heapify(self.memory.memory) + + mean_scores = [c.mean_score() for c in top_candidates] + mean_scores = [s for s in mean_scores if s is not None] + info_dict = { + 'num_exploration_candidates': len(top_candidates), + 'exploration_candidates_mean_priority': safe_mean(priorities), + 'exploration_candidates_mean_score': safe_mean(mean_scores), + 'exploration_candidates_average_num_rollouts': + safe_mean([c.num_rollouts for c in top_candidates]), + } + return top_candidates, priorities, info_dict + diff --git a/opto/features/priority_search/polca.py b/opto/trainer/algorithms/polca.py similarity index 75% rename from opto/features/priority_search/polca.py rename to opto/trainer/algorithms/polca.py index 2850a2a6..229ac30c 100644 --- a/opto/features/priority_search/polca.py +++ b/opto/trainer/algorithms/polca.py @@ -1,10 +1,43 @@ -from opto.features.priority_search.priority_search import PrioritySearch, ModuleCandidate -from opto.features.priority_search.module_regressor import RegressorTemplate -from opto.features.priority_search.summarizer import Summarizer +from opto.trainer.algorithms.priority_search import PrioritySearch, ModuleCandidate +from opto.trainer.summarizer import Summarizer from typing import Union, List, Tuple, Dict, Any, Optional, Callable from opto.optimizers.utils import print_color +from opto.trainer.utils import async_run +from opto.utils.llm import embed import numpy as np -from opto.features.priority_search.search_template import Samples +from opto.trainer.search_template import Samples + + +class _CandidateEmbedder: + """Adds an `embedding` attribute to ModuleCandidate objects via an embedding API.""" + + def __init__(self, embedding_model: str = "gemini/gemini-embedding-001"): + self.embedding_model = embedding_model + + def _get_parameter_text(self, candidate): + assert hasattr(candidate, 'update_dict'), "ModuleCandidate must have an update_dict" + params_with_names = {k.py_name: v for k, v in candidate.update_dict.items()} + return str(params_with_names) + + def _get_embedding(self, candidate, max_retries: int = 10, base_delay: float = 1.0): + parameter_text = self._get_parameter_text(candidate) + return embed(self.embedding_model, parameter_text, + max_retries=max_retries, base_delay=base_delay) + + def add_embeddings_to_candidates(self, candidates: List[ModuleCandidate], max_workers: int = 50): + """Attach embeddings to each candidate that doesn't already have one.""" + candidates_needing_embeddings = [c for c in candidates if not hasattr(c, "embedding")] + if not candidates_needing_embeddings: + return + embedding_functions = [lambda c=candidate: self._get_embedding(c) + for candidate in candidates_needing_embeddings] + new_embeddings = async_run( + embedding_functions, + max_workers=max_workers, + description=f"Generating embeddings for {len(candidates_needing_embeddings)} candidates", + ) + for candidate, embedding in zip(candidates_needing_embeddings, new_embeddings): + candidate.embedding = embedding def calculate_distance_to_memory(memory, new_candidate): @@ -29,6 +62,7 @@ class POLCA(PrioritySearch): Args: epsilon: The epsilon value for the epsilon-net. 0 means no filtering, the same as vanilla PrioritySearch. use_summarizer: Whether to use a summarizer to summarize the memory and the exploration candidates. + embedding_model: The embedding model used to compute candidate embeddings for the epsilon-net. summarizer_model_name: The model name for the summarizer. *args: Additional arguments for the parent class. **kwargs: Additional keyword arguments for the parent class. @@ -37,12 +71,13 @@ def __init__(self, epsilon: float = 0.1, use_summarizer: bool = False, context: str = "Concrete recommendations for generating better agent parameters based on successful patterns observed in the trajectories: ", + embedding_model: str = "gemini/gemini-embedding-001", *args, **kwargs): super().__init__(*args, **kwargs) self.epsilon = epsilon self.use_summarizer = use_summarizer - self.regressor = RegressorTemplate() + self.regressor = _CandidateEmbedder(embedding_model=embedding_model) self.summarizer = Summarizer() self.context = context diff --git a/opto/features/priority_search/priority_search.py b/opto/trainer/algorithms/priority_search.py similarity index 99% rename from opto/features/priority_search/priority_search.py rename to opto/trainer/algorithms/priority_search.py index d35b114f..eb89f86c 100644 --- a/opto/features/priority_search/priority_search.py +++ b/opto/trainer/algorithms/priority_search.py @@ -6,10 +6,12 @@ from opto import trace from opto.trace.nodes import ParameterNode from opto.optimizers.optimizer import Optimizer -from opto.trainer.utils import async_run, safe_mean -from opto.trainer.algorithms.basic_algorithms import batchify -from opto.features.priority_search.search_template import SearchTemplate, Samples, BatchRollout, save_train_config -from opto.features.priority_search.utils import set_module_parameters, remap_update_dict, create_module_from_update_dict, is_module_copy, deepcopy_module +from opto.trainer.utils import ( + async_run, safe_mean, batchify, + set_module_parameters, remap_update_dict, create_module_from_update_dict, + is_module_copy, deepcopy_module, +) +from opto.trainer.search_template import SearchTemplate, Samples, BatchRollout, save_train_config from opto.trainer.objectives import ( ObjectiveConfig, to_score_dict, apply_minimize, weighted_scalarize, pareto_rank, aggregate_score_dicts diff --git a/opto/features/priority_search/streaming_priority_search.py b/opto/trainer/algorithms/streaming_priority_search.py similarity index 97% rename from opto/features/priority_search/streaming_priority_search.py rename to opto/trainer/algorithms/streaming_priority_search.py index 5b7db8cd..124b01ee 100644 --- a/opto/features/priority_search/streaming_priority_search.py +++ b/opto/trainer/algorithms/streaming_priority_search.py @@ -1,8 +1,8 @@ import numpy as np from typing import List -from opto.features.priority_search.search_template import Samples, BatchRollout, save_train_config -from opto.features.priority_search.priority_search import PrioritySearch, ModuleCandidate +from opto.trainer.search_template import Samples, BatchRollout, save_train_config +from opto.trainer.algorithms.priority_search import PrioritySearch, ModuleCandidate diff --git a/opto/features/priority_search/sampler.py b/opto/trainer/sampler.py similarity index 99% rename from opto/features/priority_search/sampler.py rename to opto/trainer/sampler.py index 6c09cf57..05f6ff6c 100644 --- a/opto/features/priority_search/sampler.py +++ b/opto/trainer/sampler.py @@ -5,7 +5,7 @@ from opto import trace from opto.trainer.utils import batch_run from opto.trainer.guide import Guide -from opto.features.priority_search.utils import deepcopy_module +from opto.trainer.utils import deepcopy_module @dataclass class Rollout: diff --git a/opto/features/priority_search/search_template.py b/opto/trainer/search_template.py similarity index 99% rename from opto/features/priority_search/search_template.py rename to opto/trainer/search_template.py index 616dd1ff..d53df685 100644 --- a/opto/features/priority_search/search_template.py +++ b/opto/trainer/search_template.py @@ -3,9 +3,9 @@ from opto import trace from opto.optimizers.optimizer import Optimizer from opto.trainer.loggers import BaseLogger -from opto.trainer.algorithms.basic_algorithms import Trainer +from opto.trainer.algorithms.algorithm import Trainer from opto.trainer.loader import DataLoader -from opto.features.priority_search.sampler import Sampler, BatchRollout +from opto.trainer.sampler import Sampler, BatchRollout from opto.trainer.evaluators import evaluate # TODO update evaluate implementation from opto.trainer.utils import safe_mean from dataclasses import dataclass diff --git a/opto/features/priority_search/summarizer.py b/opto/trainer/summarizer.py similarity index 99% rename from opto/features/priority_search/summarizer.py rename to opto/trainer/summarizer.py index 4530ae09..882c60b0 100644 --- a/opto/features/priority_search/summarizer.py +++ b/opto/trainer/summarizer.py @@ -192,4 +192,4 @@ def summarize(self, memory) -> str: self.current_summary = summary_match.group(1).strip() - return self.current_summary \ No newline at end of file + return self.current_summary diff --git a/opto/trainer/train.py b/opto/trainer/train.py index ab33862c..00cc6414 100644 --- a/opto/trainer/train.py +++ b/opto/trainer/train.py @@ -247,13 +247,13 @@ def load_logger(logger: Union[BaseLogger, str], **kwargs) -> BaseLogger: def load_trainer_class(trainer: Union[Trainer, str]) -> Trainer: if isinstance(trainer, str): - if trainer.lower() == 'PrioritySearch'.lower(): - print('Warning: You are using PrioritySearch trainer, which is an experimental feature. Please report any issues you encounter.') - trainers_module = importlib.import_module("opto.features.priority_search") - trainer_class = getattr(trainers_module, trainer) - else: + # Try main algorithms first, then fall back to examples for legacy names + try: trainers_module = importlib.import_module("opto.trainer.algorithms") trainer_class = getattr(trainers_module, trainer) + except AttributeError: + trainers_module = importlib.import_module("examples.trainers") + trainer_class = getattr(trainers_module, trainer) elif issubclass(trainer, Trainer): trainer_class = trainer else: diff --git a/opto/trainer/utils.py b/opto/trainer/utils.py index 0406b05d..62c3ecbc 100644 --- a/opto/trainer/utils.py +++ b/opto/trainer/utils.py @@ -1,10 +1,12 @@ from typing import List, Optional import asyncio +import copy import functools import warnings import numpy as np from concurrent.futures import ThreadPoolExecutor from tqdm.asyncio import tqdm_asyncio +from opto import trace from opto.trace.bundle import ALLOW_EXTERNAL_DEPENDENCIES from opto.trace.modules import Module from opto.trainer.guide import Guide @@ -160,6 +162,134 @@ def _fun(*args, **kwargs): return decorator + +# --------------------------------------------------------------------------- +# Minibatch helpers +# --------------------------------------------------------------------------- + +@trace.bundle() +def batchify(*items): + """Concatenate multiple items into a formatted batch string. + + Parameters + ---------- + *items : Any + Variable number of items to concatenate into a batch. + + Returns + ------- + str + Formatted string with each item labeled by ID. + + Notes + ----- + This function is decorated with @trace.bundle() and creates a formatted + string where each item is prefixed with 'ID [i]:' for identification. + """ + output = '' + for i, item in enumerate(items): + output += f'ID {[i]}: {item}\n' + return output + + +# --------------------------------------------------------------------------- +# trace.Module graph helpers +# --------------------------------------------------------------------------- + +def get_original_name(node): + """Extract the original name from a node, removing all _copy suffixes.""" + py_name = node.py_name # This removes colons: "param:0" -> "param0" + + # Find the first occurrence of "_copy" and remove it and everything after + copy_index = py_name.find('_copy') + if copy_index != -1: + return py_name[:copy_index] + else: + return py_name + + +def is_node_copy(a, b): + """Check if two nodes are copies of each other by comparing their original names. + + This function has transitivity: if A is a copy of B and B is a copy of C, + then A is also considered a copy of C. + """ + return get_original_name(a) == get_original_name(b) + + +def is_module_copy(a, b): + """ Check if a and b (trace.Modules) are copies of each other. """ + parameters_a = a.parameters() # list of ParameterNode + parameters_b = b.parameters() # list of ParameterNode + # Check if all parameters of a are copies of b or vice versa + # This might over count + # need to check 1:1 correspondence + matched = [] + for p_a in parameters_a: + _matched = [] + for p_b in parameters_b: + _matched.append(is_node_copy(p_a, p_b)) + matched.append(_matched) + matched = np.array(matched) + if np.all(np.sum(matched, axis=1) == 1) and np.all(np.sum(matched, axis=0) == 1): + return True + return False + + +def remap_update_dict(base_module, update_dict): + """ Remap the update dict to the agent's parameters. update_dict might have keys which are copies of the base_module's parameters or visa versa. + This function remaps the keys in update_dict to the original parameters of the base_module. + + The return dict is empty if no keys in update_dict matched any parameters of the base_module. This condition can be used to check if the update_dict contains non-trivial updates. + """ + parameters = base_module.parameters() # get the parameters of the base agent + remapped_update_dict = {} + for k, v in update_dict.items(): + for p in parameters: + if is_node_copy(k, p): # Check if k is a copy of p or p is a copy of k + remapped_update_dict[p] = v + break # stop checking once we've found a match + return remapped_update_dict + + +def set_module_parameters(agent, update_dict): + """ Set the parameters of the agent based on the update_dict. + The update_dict is a dictionary of ParameterNode: value pairs. + The agent's parameters will be updated with the values from the update_dict. + """ + remapped_update_dict = remap_update_dict(agent, update_dict) # remap the update dict to the agent's parameters + for k, v in remapped_update_dict.items(): + k._data = v # set the parameter's data to the value in the update_dict + + +def create_module_from_update_dict(agent, update_dict): + """ Create a new agent from the update_dict. + The update_dict is a dictionary of ParameterNode: value pairs. + A new agent will be created with the parameters set to the values from the update_dict. + """ + new_agent = deepcopy_module(agent) # create a copy of the agent + set_module_parameters(new_agent, update_dict) # set the parameters of the new agent + return new_agent # return the new agent + + +def deepcopy_module(agent): + """ Create a deep copy of the agent, but reset the parameter names to remove the _copy suffixes. + + This is useful when we want to create a new agent for a new rollout, + but we want to keep the parameter names consistent with the original agent + so that the optimizer can recognize them across different rollouts. + + NOTE: This breaks the GRAPH's assumption on uniqueness of node names. Use with caution. + """ + new_agent = copy.deepcopy(agent) + for p_n in new_agent.parameters(): + for p_o in agent.parameters(): + if is_node_copy(p_n, p_o): + p_n._name = p_o._name # directly set the name to the original parameter's name + break + return new_agent + + if __name__ == "__main__": def tester(t): # regular time-consuming function diff --git a/opto/utils/llm.py b/opto/utils/llm.py index b6fbd4fe..97377c16 100644 --- a/opto/utils/llm.py +++ b/opto/utils/llm.py @@ -215,6 +215,33 @@ def create(self, **config: Any): return self._model.create(**config) +def embed(model: str, text: str, max_retries: int = 10, base_delay: float = 1.0): + """Call an embedding API with automatic retry on failure. + + Uses litellm under the hood. Returns the embedding vector (list of floats), + or None if all retries are exhausted. + + Args: + model: Embedding model identifier accepted by litellm + (e.g. "gemini/gemini-embedding-001", "openai/text-embedding-3-small"). + text: Text to embed. + max_retries: Maximum number of retry attempts on failure. + base_delay: Initial delay (in seconds) for exponential backoff. + """ + import litellm + try: + response = retry_with_exponential_backoff( + lambda: litellm.embedding(model=model, input=text), + max_retries=max_retries, + base_delay=base_delay, + operation_name="Embedding API call", + ) + return response.data[0].embedding + except Exception as e: + warnings.warn(f"Embedding API call failed after retries: {e}") + return None + + def auto_construct_oai_config_list_from_env() -> List: """ Collect various API keys saved in the environment and return a format like: diff --git a/tests/llm_optimizers_tests/test_gepa_benchmark.py b/tests/llm_optimizers_tests/test_gepa_benchmark.py index 2811d4ec..145a3e32 100644 --- a/tests/llm_optimizers_tests/test_gepa_benchmark.py +++ b/tests/llm_optimizers_tests/test_gepa_benchmark.py @@ -4,8 +4,8 @@ from opto import trace from opto.optimizers.optoprime_v2 import OptoPrimeV2 -from opto.features.gepa.gepa_algorithms import GEPAAlgorithmBase, GEPAUCBSearch, GEPABeamPareto -from opto.trainer.algorithms.basic_algorithms import BasicSearchAlgorithm +from examples.trainers.gepa_algorithms import GEPAAlgorithmBase, GEPAUCBSearch, GEPABeamPareto +from examples.trainers.basic_algorithms import BasicSearchAlgorithm from opto.trainer.guide import LLMJudge from opto.utils.llm import LLM diff --git a/tests/unit_tests/test_priority_search.py b/tests/unit_tests/test_priority_search.py index a7ff24d3..76618197 100644 --- a/tests/unit_tests/test_priority_search.py +++ b/tests/unit_tests/test_priority_search.py @@ -1,8 +1,8 @@ from opto import trace, trainer from opto.trainer.loader import DataLoader -from opto.features.priority_search.sampler import Sampler -from opto.features.priority_search.priority_search import PrioritySearch as _PrioritySearch -from opto.features.priority_search.priority_search import ModuleCandidate +from opto.trainer.sampler import Sampler +from opto.trainer.algorithms.priority_search import PrioritySearch as _PrioritySearch +from opto.trainer.algorithms.priority_search import ModuleCandidate from opto.optimizers import OptoPrimeV2 from opto.trainer.guide import Guide from opto.utils.llm import DummyLLM diff --git a/tests/unit_tests/test_sampler.py b/tests/unit_tests/test_sampler.py index a2457b35..b3239d20 100644 --- a/tests/unit_tests/test_sampler.py +++ b/tests/unit_tests/test_sampler.py @@ -1,8 +1,8 @@ from opto import trace -from opto.features.priority_search.sampler import Sampler +from opto.trainer.sampler import Sampler from opto.trainer.loader import DataLoader from opto.trainer.guide import Guide -from opto.features.priority_search.utils import is_node_copy +from opto.trainer.utils import is_node_copy class Guide(Guide): diff --git a/tests/unit_tests/test_saving_loading.py b/tests/unit_tests/test_saving_loading.py index f9a5835c..571711d3 100644 --- a/tests/unit_tests/test_saving_loading.py +++ b/tests/unit_tests/test_saving_loading.py @@ -1,8 +1,13 @@ - +# Ensure the repo root is importable so `examples.trainers` resolves in CI, +# where `examples/` is not an installed package and pytest's default prepend +# import mode only adds the test file's parent directory to sys.path. +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) from opto import trace from opto.trainer.loader import DataLoader -from opto.trainer.algorithms import BasicSearchAlgorithm +from examples.trainers import BasicSearchAlgorithm from opto.optimizers import OptoPrimeV2 from opto.trainer.guide import Guide as _Guide from opto.utils.llm import DummyLLM diff --git a/tests/unit_tests/test_trainers_multiobjective.py b/tests/unit_tests/test_trainers_multiobjective.py index 012a00bd..39b611ae 100644 --- a/tests/unit_tests/test_trainers_multiobjective.py +++ b/tests/unit_tests/test_trainers_multiobjective.py @@ -2,6 +2,13 @@ Uses DummyLLM and deterministic guides — no API keys required. """ +# Ensure the repo root is importable so `examples.trainers` resolves in CI, +# where `examples/` is not an installed package and pytest's default prepend +# import mode only adds the test file's parent directory to sys.path. +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + import pytest import re import numpy as np @@ -10,11 +17,11 @@ from opto import trace from opto.trainer.guide import Guide from opto.trainer.objectives import ObjectiveConfig -from opto.trainer.algorithms.beamsearch_algorithm import ( +from examples.trainers.beamsearch_algorithm import ( BeamsearchAlgorithm, BeamsearchHistoryAlgorithm, ) -from opto.features.priority_search.priority_search import ( +from opto.trainer.algorithms.priority_search import ( PrioritySearch, ModuleCandidate, HeapMemory,