From 6814a5749989cbc2bc440602e7f95a9dee057646 Mon Sep 17 00:00:00 2001 From: Anuj Attri Date: Thu, 4 Jun 2026 21:25:53 -0400 Subject: [PATCH] server : return HTTP 400 on invalid grammar (#24144) Throw on grammar parse failure so the server returns HTTP 400 instead of silently dropping the constraint. Add a regression test for the invalid-grammar response. Fixes #24144 --- common/sampling.cpp | 3 +++ tools/server/tests/unit/test_chat_completion.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/common/sampling.cpp b/common/sampling.cpp index 85f8ed50b35..3345c53e139 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -259,6 +259,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st } } } + if (!grmr && !grammar_str.empty()) { + throw std::runtime_error("failed to parse grammar"); + } // Compute prefill tokens from the generation prompt std::vector prefill_tokens; diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index f80e46133c7..2c18868f771 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -307,6 +307,20 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re assert match_regex(re_content, choice["message"]["content"]), choice["message"]["content"] +def test_completion_with_invalid_grammar(): + global server + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": 8, + "messages": [ + {"role": "user", "content": "Does not matter what I say, does it?"}, + ], + "grammar": "root ::= this is (not valid GBNF", + }) + assert res.status_code == 400, res.body + assert "error" in res.body + + @pytest.mark.parametrize("messages", [ None, "string",