google · dandye · Jun 2, 2026
diff --git a/.env.example b/.env.example
@@ -0,0 +1,34 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Gemini API Key (used for various Gemini operations/agents)
+GEMINI_API_KEY=your_gemini_api_key_here
+
+# VirusTotal API Key (used by VirusTotal / GTI tools)
+VT_APIKEY=your_virustotal_api_key_here
+
+# Google SecOps SOAR API Keys & Credentials
+SOAR_URL=https://your-soar-instance-url.com
+SOAR_APP_KEY=your_soar_app_key_here
+
+# Google Cloud Platform / Chronicle SIEM Credentials (used by Optimizer scripts and SIEM tools)
+# Path to your Google Cloud service account JSON file
+GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account.json
+
+# Vertex AI Model Garden Configuration
+# GCP Project ID/Name for Vertex AI API execution (e.g., secops-demo-env)
+VERTEX_PROJECT=your_gcp_project_id_here
+
+# GCP Location/Region for Vertex AI execution (e.g., us-central1)
+VERTEX_LOCATION=us-central1
diff --git a/.gitmodules b/.gitmodules
diff --git a/docs/development_guide.md b/docs/development_guide.md
@@ -111,6 +111,39 @@ When documenting tools:
 3. Explain the return values and any side effects
 4. Provide examples of how to use the tool
 
+## MCP Tool Optimization (GEPA)
+
+This repository integrates **GEPA (Gradient-based Engine for Prompt Ad-hoc Optimization)** to automatically optimize MCP tool docstrings and descriptions. Since LLMs decide which tool to call based on its description, optimizing these docstrings significantly improves tool-routing accuracy.
+
+### Running Optimization
+
+MCP servers supporting GEPA optimization include an optimization package and script under `gepa_opt/`:
+- `server/secops/gepa_opt/optimize_secops_mcp.py`
+- `server/secops-soar/gepa_opt/optimize_soar_mcp.py`
+- `server/gti/gepa_opt/optimize_gti_mcp.py`
+
+#### Prerequisites
+
+The optimizer runs against Google Cloud Vertex AI and requires the following environment variables (which can be configured in a `.env` file at the root of the project):
+- `GOOGLE_APPLICATION_CREDENTIALS`: Path to your GCP Service Account credentials JSON file.
+- `VERTEX_PROJECT`: The GCP project ID for Vertex AI execution.
+- `VERTEX_LOCATION`: The GCP location/region for Vertex AI execution (e.g., `us-central1`).
+
+If any of these are missing, the optimizer scripts will fail fast with a `ValueError`.
+
+#### Execution
+
+Run the optimizer script for the target server:
+```bash
+python server/secops-soar/gepa_opt/optimize_soar_mcp.py
+```
+
+### How it Works
+
+1. **Dataset**: A curation of user queries matched with expected tool calls is defined in `mcp_dataset.json` within each `gepa_opt/` directory.
+2. **Routing Evaluation**: GEPA executes mock queries against the tools using Vertex AI models, calculating a baseline routing accuracy.
+3. **Iterative Optimization**: GEPA generates candidate docstrings, evaluates them on the dataset, and updates the python source files of the tools with the best-performing docstring formulations.
+
 ## Building Documentation
 
 The documentation uses Sphinx with MyST Markdown. To build the docs:

diff --git a/server/gti/gepa_opt/gepa_optimization_results.json b/server/gti/gepa_opt/gepa_optimization_results.json
@@ -0,0 +1,23 @@
+{
+  "best_score": 0.9166666666666666,
+  "optimized_tool_descriptions": {
+    "tool_description_search_threats": "Search threats in the Google Threat Intelligence platform. Threats are modeled as collections. Once you get collections from this tool, you can use get_collection_report to fetch the full reports and their relationships.",
+    "tool_description_search_campaigns": "Search threat campaigns in the Google Threat Intelligence platform. Campaigns are modeled as collections.",
+    "tool_description_search_threat_actors": "Search threat actors in the Google Threat Intelligence platform. Threat actors are modeled as collections.",
+    "tool_description_search_malware_families": "Search malware families in the Google Threat Intelligence platform. Malware families are modeled as collections.",
+    "tool_description_search_software_toolkits": "Search software toolkits (or just tools) in the Google Threat Intelligence platform. Software toolkits are modeled as collections.",
+    "tool_description_search_threat_reports": "Search threat reports in the Google Threat Intelligence platform. Threat reports are modeled as collections.",
+    "tool_description_search_vulnerabilities": "Search vulnerabilities (CVEs) in the Google Threat Intelligence platform. Vulnerabilities are modeled as collections.",
+    "tool_description_get_entities_related_to_a_domain": "Retrieve entities related to the given domain. Available relationships: associations, caa_records, campaigns, cname_records, collections, comments, communicating_files, downloaded_files, graphs, historical_ssl_certificates, historical_whois, immediate_parent, malware_families, memory_pattern_parents, mx_records, ns_records, parent, referrer_files, related_comments, related_reports, related_threat_actors, reports, resolutions, siblings, soa_records, software_toolkits, subdomains, urls, user_votes, votes, vulnerabilities.",
+    "tool_description_get_entities_related_to_an_url": "Retrieve entities related to the given URL. Available relationships: analyses, associations, campaigns, collections, comments, communicating_files, contacted_domains, contacted_ips, downloaded_files, embedded_js_files, last_serving_ip_address, malware_families, parent_resource_urls, redirects_to, referrer_files, referrer_urls.",
+    "tool_description_get_entities_related_to_an_ip_address": "Retrieve entities related to the given IP address. Available relationships: associations, campaigns, collections, comments, communicating_files, downloaded_files, graphs, historical_ssl_certificates, historical_whois, malware_families, memory_pattern_parents, referrer_files, related_comments, related_reports, related_threat_actors, reports, resolutions, software_toolkits, urls, user_votes, votes, vulnerabilities.",
+    "tool_description_get_entities_related_to_a_file": "Retrieve entities related to the given file hash. Available relationships: analyses, behaviors, carbonblack_children, carbonblack_parents, compressed_parents, contacted_domains, contacted_ips, contacted_urls, dropped_files, execution_parents, itw_domains, itw_urls, metadata, memory_pattern_domains, memory_pattern_ips, mutexes_created, mutexes_opened, overlay_children, overlay_parents, pcap_parents, pe_resource_children, pe_resource_parents, popular_threat_category, suggested_threat_label, yara_rules.",
+    "tool_description_get_domain_report": "Get a comprehensive domain analysis report from Google Threat Intelligence. Provides attributes, threat classification, and historical metadata for a domain.",
+    "tool_description_get_ip_address_report": "Get a comprehensive IP Address analysis report from Google Threat Intelligence. Provides geolocation, autonomous system details, and threat reputation data.",
+    "tool_description_get_url_report": "Get a comprehensive URL analysis report from Google Threat Intelligence. Provides security analysis, categorizations, and threat category classifications.",
+    "tool_description_get_file_report": "Get a comprehensive file analysis report using its hash (MD5/SHA-1/SHA-256). Provides detection stats, threat classifications, and static metadata.",
+    "tool_description_get_file_behavior_summary": "Retrieve a summary of all sandbox execution reports and dynamic analysis details for a file.",
+    "tool_description_get_file_behavior_report": "Retrieve a full, detailed sandbox behavior report using a behavior ID formatted as {hash}_{sandbox}.",
+    "tool_description_search_iocs": "Search Indicators of Compromise (IOC) in the Google Threat Intelligence platform using VirusTotal query search modifiers."
+  }
+}
diff --git a/server/gti/gepa_opt/mcp_dataset.json b/server/gti/gepa_opt/mcp_dataset.json
@@ -0,0 +1,104 @@
+[
+  {
+    "user_query": "Search for threat actor profiles related to APT28",
+    "tool_arguments": {
+      "query": "APT28"
+    },
+    "reference_answer": "search_threat_actors",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Find campaigns associated with Sandworm",
+    "tool_arguments": {
+      "query": "Sandworm"
+    },
+    "reference_answer": "search_campaigns",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Show me reports and profiles for the malware family known as Emotet",
+    "tool_arguments": {
+      "query": "Emotet"
+    },
+    "reference_answer": "search_malware_families",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Look up general threats associated with Apache Log4j CVE-2021-44228 vulnerability",
+    "tool_arguments": {
+      "query": "CVE-2021-44228"
+    },
+    "reference_answer": "search_vulnerabilities",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Get a comprehensive analysis report for the domain badsite.com",
+    "tool_arguments": {
+      "domain": "badsite.com"
+    },
+    "reference_answer": "get_domain_report",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Check file reputation for the SHA256 hash 275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f",
+    "tool_arguments": {
+      "hash": "275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f"
+    },
+    "reference_answer": "get_file_report",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Retrieve sandbox behavior summary for file 275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f",
+    "tool_arguments": {
+      "hash": "275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f"
+    },
+    "reference_answer": "get_file_behavior_summary",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Show me the sandbox behavior report for id 275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f_Jujubox",
+    "tool_arguments": {
+      "file_behaviour_id": "275a021bbfb6489e54d471899f7db9d1663fc695ec2fe2a2c4538aabf651fd0f_Jujubox"
+    },
+    "reference_answer": "get_file_behavior_report",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Search for indicators of compromise related to port 4444 connection in files",
+    "tool_arguments": {
+      "query": "entity:file p:4444"
+    },
+    "reference_answer": "search_iocs",
+    "additional_context": {}
+  },
+  {
+    "user_query": "What are the IP addresses contacted by the domain malicious.xyz?",
+    "tool_arguments": {
+      "domain": "malicious.xyz",
+      "relationship_name": "resolutions",
+      "descriptors_only": true
+    },
+    "reference_answer": "get_entities_related_to_a_domain",
+    "additional_context": {}
+  },
+  {
+    "user_query": "Find any communicating files that communicate with the URL http://malicious-link.com/download",
+    "tool_arguments": {
+      "url": "http://malicious-link.com/download",
+      "relationship_name": "communicating_files",
+      "descriptors_only": true
+    },
+    "reference_answer": "get_entities_related_to_an_url",
+    "additional_context": {}
+  },
+  {
+    "user_query": "List comments posted on the IP 8.8.8.8",
+    "tool_arguments": {
+      "ip_address": "8.8.8.8",
+      "relationship_name": "comments",
+      "descriptors_only": false
+    },
+    "reference_answer": "get_entities_related_to_an_ip_address",
+    "additional_context": {}
+  }
+]