From 6814a5749989cbc2bc440602e7f95a9dee057646 Mon Sep 17 00:00:00 2001
From: Anuj Attri <anujattri01@gmail.com>
Date: Thu, 4 Jun 2026 21:25:53 -0400
Subject: [PATCH] server : return HTTP 400 on invalid grammar (#24144)

Throw on grammar parse failure so the server returns HTTP 400
instead of silently dropping the constraint.
Add a regression test for the invalid-grammar response.

Fixes #24144
---
 common/sampling.cpp                             |  3 +++
 tools/server/tests/unit/test_chat_completion.py | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index 85f8ed50b35..3345c53e139 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -259,6 +259,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
              }
         }
     }
+    if (!grmr && !grammar_str.empty()) {
+        throw std::runtime_error("failed to parse grammar");
+    }
 
     // Compute prefill tokens from the generation prompt
     std::vector<llama_token> prefill_tokens;
diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py
index f80e46133c7..2c18868f771 100644
--- a/tools/server/tests/unit/test_chat_completion.py
+++ b/tools/server/tests/unit/test_chat_completion.py
@@ -307,6 +307,20 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re
     assert match_regex(re_content, choice["message"]["content"]), choice["message"]["content"]
 
 
+def test_completion_with_invalid_grammar():
+    global server
+    server.start()
+    res = server.make_request("POST", "/chat/completions", data={
+        "max_tokens": 8,
+        "messages": [
+            {"role": "user", "content": "Does not matter what I say, does it?"},
+        ],
+        "grammar": "root ::= this is (not valid GBNF",
+    })
+    assert res.status_code == 400, res.body
+    assert "error" in res.body
+
+
 @pytest.mark.parametrize("messages", [
     None,
     "string",