diff --git a/yourbench/utils/convert_to_atlas_module.py b/yourbench/utils/convert_to_atlas_module.py index c32bda4a..0dbc1d50 100644 --- a/yourbench/utils/convert_to_atlas_module.py +++ b/yourbench/utils/convert_to_atlas_module.py @@ -13,7 +13,7 @@ def _scorer_yaml(name: str) -> str: type: llm_judge options: regex_pattern: '' - judge_model: google_gemini-2.5-flash-preview-05-20 + judge_model: google_gemini-2.5-flash judge_prompt: |- Your job is to look at a question, a gold target, and a predicted answer, and then assign a grade of either ["CORRECT", "INCORRECT", "NOT_ATTEMPTED"]. First, I will give examples of each grade, and then you will grade a new example.