ggml-org · Anuj-Attri · Jun 5, 2026
@@ -259,6 +259,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
              }
         }
     }
+    if (!grmr && !grammar_str.empty()) {
+        throw std::runtime_error("failed to parse grammar");
+    }
 
     // Compute prefill tokens from the generation prompt
     std::vector<llama_token> prefill_tokens;

@@ -307,6 +307,20 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re
     assert match_regex(re_content, choice["message"]["content"]), choice["message"]["content"]
 
 
+def test_completion_with_invalid_grammar():
+    global server
+    server.start()
+    res = server.make_request("POST", "/chat/completions", data={
+        "max_tokens": 8,
+        "messages": [
+            {"role": "user", "content": "Does not matter what I say, does it?"},
+        ],
+        "grammar": "root ::= this is (not valid GBNF",
+    })
+    assert res.status_code == 400, res.body
+    assert "error" in res.body
+
+
 @pytest.mark.parametrize("messages", [
     None,
     "string",