HKUDS · Mcy0618 · May 17, 2026 · May 17, 2026 · May 23, 2026 · May 23, 2026
diff --git a/.catpaw/rules/python-launcher.md b/.catpaw/rules/python-launcher.md
@@ -0,0 +1,23 @@
+---
+ruleType: Manual
+description: 本地环境优先使用 py 命令运行 Python 脚本
+globs: 
+---
+rule编写规则: https://catpaw.meituan.com/guides/settings/rules
+
+# 本地 Python 命令约定
+
+在 Windows 本地环境中，优先使用 `py` 而非 `python` 来运行 Python 脚本。
+
+## 原因
+
+- **`py`** 是 Windows 上的 Python Launcher（`py.exe`），安装时写入 `C:\Windows\`，始终在 PATH 中。
+- **`python`** 的可执行目录可能未加入 PATH（安装时未勾选 "Add Python to PATH"），导致命令不可用。
+- `py` 支持多版本选择（如 `py -3.11`、`py -3.12`），更灵活可靠。
+
+## 执行规则
+
+- 运行 Python 脚本时使用 `py script.py` 而非 `python script.py`
+- 安装包时使用 `py -m pip install <package>` 而非 `python -m pip install <package>`
+- 指定版本时使用 `py -3.x` 格式
+- 运行模块时使用 `py -m <module>` 格式
diff --git a/TODO.md b/TODO.md
@@ -0,0 +1,12 @@
+# TODO
+- [ ] [context.py] 在 build_runtime_system_prompt() 中新增 PLAN 模式感知段落（类似 fast_mode），告知 LLM 当前处于只读分析模式
+- [ ] [runtime.py] 在 refresh_runtime_client() 中调用 build_runtime_system_prompt() 重建 system prompt，使模式切换后 LLM 立即感知新模式
+- [ ] [runtime.py] 验证 refresh_runtime_client() 中 bundle 对象的必要属性（cwd、extra_skill_dirs、extra_plugin_roots、include_project_memory）在调用点可用
+- [ ] [测试] 编写/验证测试用例：进入 plan 模式后 system prompt 包含 plan mode 提示，退出后不再包含
+- [x] [context.py] 在 build_runtime_system_prompt() 中新增 PLAN 模式感知段落 — 已完成
+- [x] [runtime.py] 在 refresh_runtime_client() 中重建 system prompt — 已完成
+- [x] [runtime.py] 可用性验证 — RuntimeBundle 所有必要属性均已存在 — 已完成
+- [x] [测试] 编写/验证测试用例 — 所有 prompt 相关测试通过，权限模式切换逻辑验证通过
+- [x] 创建 clipboard_screenshot_tool.py 工具实现
+- [x] 在 __init__.py 中注册新工具
+- [x] 创建单元测试 test_clipboard_screenshot_tool.py
diff --git a/frontend/terminal/src/components/TranscriptPane.tsx b/frontend/terminal/src/components/TranscriptPane.tsx
@@ -32,6 +32,8 @@ function labelFor(role: TranscriptItem['role']): string {
 			return 'tool>';
 		case 'tool_result':
 			return 'tool_result>';
+		case 'thinking':
+			return 'Think:';
 		default:
 			return `${role}>`;
 	}
@@ -41,6 +43,9 @@ function roleColor(role: TranscriptItem['role']): string | undefined {
 	if (role === 'assistant') {
 		return 'green';
 	}
+	if (role === 'thinking') {
+		return 'gray';
+	}
 	if (role === 'tool') {
 		return 'cyan';
 	}

diff --git a/frontend/terminal/src/hooks/useBackendSession.ts b/frontend/terminal/src/hooks/useBackendSession.ts
@@ -52,6 +52,9 @@ export function useBackendSession(config: FrontendConfig, onExit: (code?: number
 	const assistantFlushTimerRef = useRef<NodeJS.Timeout | null>(null);
 	const pendingTranscriptItemsRef = useRef<TranscriptItem[]>([]);
 	const transcriptFlushTimerRef = useRef<NodeJS.Timeout | null>(null);
+	// Thinking content buffer to accumulate chunks into single item
+	const thinkingBufferRef = useRef('');
+	const thinkingActiveRef = useRef(false);
 
 	const flushAssistantDelta = (): void => {
 		const pending = pendingAssistantDeltaRef.current;
@@ -294,11 +297,34 @@ export function useBackendSession(config: FrontendConfig, onExit: (code?: number
 			}
 			return;
 		}
+		if (event.type === 'thinking_delta') {
+			const delta = event.message ?? '';
+			if (!delta) {
+				return;
+			}
+			// Accumulate thinking content instead of creating separate items
+			if (!thinkingActiveRef.current) {
+				thinkingActiveRef.current = true;
+				thinkingBufferRef.current = delta;
+			} else {
+				thinkingBufferRef.current += delta;
+			}
+			return;
+		}
 		if (event.type === 'assistant_delta') {
 			const delta = event.message ?? '';
 			if (!delta) {
 				return;
 			}
+			// Flush accumulated thinking content before first assistant delta
+			if (thinkingActiveRef.current) {
+				const thinkingContent = thinkingBufferRef.current.trim();
+				if (thinkingContent) {
+					queueTranscriptItem({role: 'thinking', text: thinkingContent});
+				}
+				thinkingActiveRef.current = false;
+				thinkingBufferRef.current = '';
+			}
 			const isCodexStyle = String(statusRef.current.output_style ?? 'default') === 'codex';
 			if (isCodexStyle) {
 				// Keep collecting text for assistant_complete fallback, but avoid
@@ -325,6 +351,15 @@ export function useBackendSession(config: FrontendConfig, onExit: (code?: number
 				assistantFlushTimerRef.current = null;
 			}
 			flushTranscriptItems();
+			// Flush any remaining thinking content before assistant_complete
+			if (thinkingActiveRef.current) {
+				const thinkingContent = thinkingBufferRef.current.trim();
+				if (thinkingContent) {
+					pendingTranscriptItemsRef.current.push({role: 'thinking', text: thinkingContent});
+				}
+				thinkingActiveRef.current = false;
+				thinkingBufferRef.current = '';
+			}
 			const isCodexStyle = String(statusRef.current.output_style ?? 'default') === 'codex';
 			if (isCodexStyle) {
 				if (pendingAssistantDeltaRef.current) {

diff --git a/frontend/terminal/src/types.ts b/frontend/terminal/src/types.ts
@@ -4,7 +4,7 @@ export type FrontendConfig = {
 };
 
 export type TranscriptItem = {
-	role: 'system' | 'user' | 'assistant' | 'tool' | 'tool_result' | 'log' | 'status';
+	role: 'system' | 'user' | 'assistant' | 'thinking' | 'tool' | 'tool_result' | 'log' | 'status';
 	text: string;
 	tool_name?: string;
 	tool_input?: Record<string, unknown>;

diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ dev = [
     "pytest-cov>=5.0.0",
     "ruff>=0.5.0",
     "mypy>=1.10.0",
+    "Pillow>=10.0.0",
 ]
 
 [project.scripts]

diff --git a/src/openharness/api/client.py b/src/openharness/api/client.py
@@ -46,6 +46,7 @@ class ApiMessageRequest:
     max_tokens: int = 4096
     tools: list[dict[str, Any]] = field(default_factory=list)
     effort: str | None = None
+    show_thinking: bool = False
 
 
 @dataclass(frozen=True)
@@ -74,7 +75,14 @@ class ApiRetryEvent:
     delay_seconds: float
 
 
-ApiStreamEvent = ApiTextDeltaEvent | ApiMessageCompleteEvent | ApiRetryEvent
+@dataclass(frozen=True)
+class ApiThinkingDeltaEvent:
+    """Incremental thinking/reasoning content from the model."""
+
+    text: str
+
+
+ApiStreamEvent = ApiTextDeltaEvent | ApiThinkingDeltaEvent | ApiMessageCompleteEvent | ApiRetryEvent
 
 
 class SupportsStreamingMessages(Protocol):

diff --git a/src/openharness/api/copilot_client.py b/src/openharness/api/copilot_client.py
@@ -125,6 +125,7 @@ async def stream_message(self, request: ApiMessageRequest) -> AsyncIterator[ApiS
             system_prompt=request.system_prompt,
             max_tokens=request.max_tokens,
             tools=request.tools,
+            show_thinking=request.show_thinking,
         )
         async for event in self._inner.stream_message(patched):
             yield event
diff --git a/src/openharness/api/openai_client.py b/src/openharness/api/openai_client.py
@@ -18,6 +18,7 @@
     ApiRetryEvent,
     ApiStreamEvent,
     ApiTextDeltaEvent,
+    ApiThinkingDeltaEvent,
 )
 from openharness.api.errors import (
     AuthenticationFailure,
@@ -334,7 +335,7 @@ async def _stream_once(self, request: ApiMessageRequest) -> AsyncIterator[ApiStr
         collected_tool_calls: dict[int, dict[str, Any]] = {}
         finish_reason: str | None = None
         usage_data: dict[str, int] = {}
-        # Buffer to strip inline <think>…</think> blocks across streaming chunks.
+        # Buffer to strip inline  blocks across streaming chunks.
         _think_buf = ""
 
         stream = await self._client.chat.completions.create(**params)
@@ -354,18 +355,32 @@ async def _stream_once(self, request: ApiMessageRequest) -> AsyncIterator[ApiStr
             if chunk_finish:
                 finish_reason = chunk_finish
 
-            # Accumulate reasoning_content from thinking models (not shown to user)
+            # Accumulate reasoning_content from thinking models
             reasoning_piece = getattr(delta, "reasoning_content", None) or ""
             if reasoning_piece:
                 collected_reasoning += reasoning_piece
+                if request.show_thinking:
+                    yield ApiThinkingDeltaEvent(text=reasoning_piece)
 
-            # Stream text content to user, stripping inline <think> blocks
+            # Stream text content to user
             if delta.content:
                 _think_buf += delta.content
-                visible, _think_buf = _strip_think_blocks(_think_buf)
-                if visible:
-                    collected_content += visible
-                    yield ApiTextDeltaEvent(text=visible)
+                if request.show_thinking:
+                    # Convert inline blocks into classified segments
+                    segments, _think_buf = _convert_think_blocks_display(_think_buf)
+                    for text, is_thinking in segments:
+                        if not text:
+                            continue
+                        if is_thinking:
+                            yield ApiThinkingDeltaEvent(text=text)
+                        else:
+                            collected_content += text
+                            yield ApiTextDeltaEvent(text=text)
+                else:
+                    visible, _think_buf = _strip_think_blocks(_think_buf)
+                    if visible:
+                        collected_content += visible
+                        yield ApiTextDeltaEvent(text=visible)
 
             # Accumulate tool calls
             if delta.tool_calls:
@@ -449,32 +464,79 @@ def _translate_error(exc: Exception) -> OpenHarnessApiError:
         return RequestFailure(msg)
 
 
-# Matches complete <think>…</think> blocks (DOTALL so newlines are included).
-_THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL)
+# Matches complete  blocks (DOTALL so newlines are included).
+_THINK_RE = re.compile(r"<think>(.*?)</think>", re.DOTALL)
 _THINK_OPEN_TAG = "<think>"
+_THINK_CLOSE_TAG = "</think>"
 
 
 def _strip_think_blocks(buf: str) -> tuple[str, str]:
-    """Strip complete ``<think>…</think>`` blocks and return ``(visible_text, leftover)``.
+    """Strip complete ``...`` blocks and return ``(visible_text, leftover)``.
 
-    Complete pairs are removed via regex.  An unclosed ``<think>`` is held in
+    Complete pairs are removed via regex.  An unclosed ```` is held in
     *leftover* so it can be re-evaluated once the closing tag arrives in the
     next streaming chunk.
     """
     # Remove fully-closed blocks.
     cleaned = _THINK_RE.sub("", buf)
 
-    # Hold back any unclosed <think> for the next chunk.
+    # Hold back any unclosed  for the next chunk.
     open_idx = cleaned.find(_THINK_OPEN_TAG)
     if open_idx != -1:
         return cleaned[:open_idx], cleaned[open_idx:]
 
     # Streaming providers may split the opening tag itself across chunk
     # boundaries (e.g. ``"<thi"`` then ``"nk>..."``). Hold back the longest
-    # suffix that could still become ``<think>`` on the next chunk.
+    # suffix that could still become ```` on the next chunk.
     max_prefix = min(len(cleaned), len(_THINK_OPEN_TAG) - 1)
     for prefix_len in range(max_prefix, 0, -1):
         if _THINK_OPEN_TAG.startswith(cleaned[-prefix_len:]):
             return cleaned[:-prefix_len], cleaned[-prefix_len:]
 
     return cleaned, ""
+
+
+def _convert_think_blocks_display(buf: str) -> tuple[list[tuple[str, bool]], str]:
+    """Convert ``...`` blocks into classified segments.
+
+    Instead of stripping thinking content, this extracts it and classifies
+    each segment as thinking or normal text so the caller can emit the
+    appropriate event type.
+    Returns ``(segments, leftover)`` where each segment is
+    ``(text, is_thinking)`` and *leftover* holds an unclosed ``
+`` for the next streaming chunk.
+    """
+    segments: list[tuple[str, bool]] = []
+    pos = 0
+    while True:
+        open_idx = buf.find(_THINK_OPEN_TAG, pos)
+        if open_idx == -1:
+            # No more opening tags; flush remaining text
+            remaining = buf[pos:]
+            # Check if the tail could be a partial opening tag
+            max_prefix = min(len(remaining), len(_THINK_OPEN_TAG) - 1)
+            for prefix_len in range(max_prefix, 0, -1):
+                if _THINK_OPEN_TAG.startswith(remaining[-prefix_len:]):
+                    if remaining[:-prefix_len]:
+                        segments.append((remaining[:-prefix_len], False))
+                    return segments, remaining[-prefix_len:]
+            if remaining:
+                segments.append((remaining, False))
+            return segments, ""
+
+        # Text before the opening tag
+        if open_idx > pos:
+            segments.append((buf[pos:open_idx], False))
+
+        close_idx = buf.find(_THINK_CLOSE_TAG, open_idx + len(_THINK_OPEN_TAG))
+        if close_idx == -1:
+            # Unclosed block — hold back from the opening tag
+            return segments, buf[open_idx:]
+
+        # Extract thinking content
+        think_content = buf[open_idx + len(_THINK_OPEN_TAG):close_idx].strip()
+        if think_content:
+            segments.append((think_content, True))
+        pos = close_idx + len(_THINK_CLOSE_TAG)
+
+    return segments, ""
diff --git a/src/openharness/autopilot/service.py b/src/openharness/autopilot/service.py
@@ -36,7 +36,7 @@
     get_project_repo_journal_path,
     get_project_verification_policy_path,
 )
-from openharness.engine.stream_events import AssistantTextDelta, AssistantTurnComplete, ErrorEvent
+from openharness.engine.stream_events import AssistantTextDelta, AssistantThinkingDelta, AssistantTurnComplete, ErrorEvent
 from openharness.swarm.worktree import WorktreeManager
 from openharness.utils.fs import atomic_write_text
 
@@ -2067,7 +2067,9 @@ async def _ask(_question: str) -> str:
         collected: list[str] = []
         try:
             async for event in bundle.engine.submit_message(prompt):
-                if isinstance(event, AssistantTextDelta):
+                if isinstance(event, AssistantThinkingDelta):
+                    pass
+                elif isinstance(event, AssistantTextDelta):
                     collected.append(event.text)
                 elif isinstance(event, AssistantTurnComplete):
                     text = event.message.text.strip()

diff --git a/src/openharness/channels/adapter.py b/src/openharness/channels/adapter.py
@@ -18,7 +18,7 @@
 
 from openharness.channels.bus.events import InboundMessage, OutboundMessage
 from openharness.channels.bus.queue import MessageBus
-from openharness.engine.stream_events import AssistantTextDelta, AssistantTurnComplete
+from openharness.engine.stream_events import AssistantTextDelta, AssistantThinkingDelta, AssistantTurnComplete
 
 if TYPE_CHECKING:
     from openharness.engine.query_engine import QueryEngine
@@ -98,7 +98,10 @@ async def _handle(self, msg: InboundMessage) -> None:
         reply_parts: list[str] = []
         try:
             async for event in self._engine.submit_message(msg.content):
-                if isinstance(event, AssistantTextDelta):
+                if isinstance(event, AssistantThinkingDelta):
+                    # Thinking content is omitted from channel replies
+                    pass
+                elif isinstance(event, AssistantTextDelta):
                     reply_parts.append(event.text)
                 elif isinstance(event, AssistantTurnComplete):
                     # Turn is done; we'll send the accumulated text below

diff --git a/src/openharness/cli.py b/src/openharness/cli.py
@@ -2162,6 +2162,12 @@ def main(
         help="Override verbose mode setting from config",
         rich_help_panel="Model & Effort",
     ),
+    show_thinking: bool = typer.Option(
+        False,
+        "--show-thinking",
+        help="Show model thinking/reasoning process in the output",
+        rich_help_panel="Model & Effort",
+    ),
     max_turns: int | None = typer.Option(
         None,
         "--max-turns",
@@ -2446,6 +2452,7 @@ def main(
                 permission_mode=permission_mode,
                 max_turns=max_turns,
                 effort=effort,
+                show_thinking=show_thinking or None,
             )
         )
         return
@@ -2479,5 +2486,6 @@ def main(
             api_format=api_format,
             permission_mode=permission_mode,
             effort=effort,
+            show_thinking=show_thinking or None,
         )
     )