From 219c32018b574b483603ece7c31df35d6a99fce0 Mon Sep 17 00:00:00 2001
From: Cyril Guilleminot <bridgecommerceholding@gmail.com>
Date: Mon, 13 Apr 2026 19:32:13 +0300
Subject: [PATCH] fix: UTF-8 decoding corrupts multi-byte characters in
 streaming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SSE reader was calling resp.read(1) and decoding each single
byte with utf-8. Multi-byte characters (é, à, 中, emoji, etc.) span
2–4 bytes, so each byte was individually replaced by U+FFFD, producing
garbled output for any non-ASCII language.

Fixed by reading 4 KiB chunks and feeding them through an incremental
UTF-8 decoder, which correctly handles multi-byte sequences that span
chunk boundaries.
---
 agent.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/agent.py b/agent.py
index 0a97816..cb8a2c0 100755
--- a/agent.py
+++ b/agent.py
@@ -3,7 +3,7 @@
 mac code — claude code for your Mac
 """
 
-import json, sys, os, time, subprocess, re, threading, queue
+import json, sys, os, time, subprocess, re, threading, queue, codecs
 import urllib.request, random
 from datetime import datetime
 from pathlib import Path
@@ -543,11 +543,13 @@ def stream_llm(messages):
 
     with urllib.request.urlopen(req, timeout=300) as resp:
         buf = ""
+        decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
         while True:
-            ch = resp.read(1)
+            ch = resp.read(4096)
             if not ch:
+                buf += decoder.decode(b"", final=True)
                 break
-            buf += ch.decode("utf-8", errors="replace")
+            buf += decoder.decode(ch)
             while "\n" in buf:
                 line, buf = buf.split("\n", 1)
                 line = line.strip()