From 2f817aa55570650218694adbd5b95da4f3022c6f Mon Sep 17 00:00:00 2001 From: Brad Hilton Date: Tue, 19 May 2026 12:13:48 -0600 Subject: [PATCH 1/2] Preserve thinking content in Tinker server chat templates Co-authored-by: Cursor --- src/art/tinker/server.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/art/tinker/server.py b/src/art/tinker/server.py index 328d9a976..3221a91fe 100644 --- a/src/art/tinker/server.py +++ b/src/art/tinker/server.py @@ -556,19 +556,18 @@ async def prompt_tokens( ) -> list[int]: normalized_messages = _normalize_qwen3_dot_messages(base_model, messages) tokenizer = self._get_renderer(base_model).tokenizer - if _chat_template_disables_thinking(base_model): - encoding = tokenizer.apply_chat_template( - cast(Any, normalized_messages), - tools=cast(Any, tools), - add_generation_prompt=True, - enable_thinking=False, - ) - else: - encoding = tokenizer.apply_chat_template( - cast(Any, normalized_messages), - tools=cast(Any, tools), - add_generation_prompt=True, - ) + chat_template_kwargs = {} + if isinstance(tokenizer.chat_template, str): + if "enable_thinking" in tokenizer.chat_template: + chat_template_kwargs["enable_thinking"] = False + if "preserve_thinking" in tokenizer.chat_template: + chat_template_kwargs["preserve_thinking"] = True + encoding = tokenizer.apply_chat_template( + cast(Any, normalized_messages), + tools=cast(Any, tools), + add_generation_prompt=True, + **chat_template_kwargs, + ) if isinstance(encoding, BatchEncoding): return encoding.input_ids else: From f43a55f97ba686579548ca1fd77859b9780982b0 Mon Sep 17 00:00:00 2001 From: Brad Hilton Date: Tue, 19 May 2026 12:14:21 -0600 Subject: [PATCH 2/2] Remove unused function for chat template thinking disable check in Tinker server --- src/art/tinker/server.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/art/tinker/server.py b/src/art/tinker/server.py index 3221a91fe..9f9341895 100644 --- a/src/art/tinker/server.py +++ b/src/art/tinker/server.py @@ -113,10 +113,6 @@ def _normalize_qwen3_dot_messages( return normalized_messages -def _chat_template_disables_thinking(base_model: str) -> bool: - return is_qwen3_dot_family_model(base_model) - - @dataclass class OpenAICompatibleTinkerServer: host: str | None = None