Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .catpaw/rules/python-launcher.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
ruleType: Manual
description: 本地环境优先使用 py 命令运行 Python 脚本
globs:
---
rule编写规则: https://catpaw.meituan.com/guides/settings/rules

# 本地 Python 命令约定

在 Windows 本地环境中,优先使用 `py` 而非 `python` 来运行 Python 脚本。

## 原因

- **`py`** 是 Windows 上的 Python Launcher(`py.exe`),安装时写入 `C:\Windows\`,始终在 PATH 中。
- **`python`** 的可执行目录可能未加入 PATH(安装时未勾选 "Add Python to PATH"),导致命令不可用。
- `py` 支持多版本选择(如 `py -3.11`、`py -3.12`),更灵活可靠。

## 执行规则

- 运行 Python 脚本时使用 `py script.py` 而非 `python script.py`
- 安装包时使用 `py -m pip install <package>` 而非 `python -m pip install <package>`
- 指定版本时使用 `py -3.x` 格式
- 运行模块时使用 `py -m <module>` 格式
12 changes: 12 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# TODO
- [ ] [context.py] 在 build_runtime_system_prompt() 中新增 PLAN 模式感知段落(类似 fast_mode),告知 LLM 当前处于只读分析模式
- [ ] [runtime.py] 在 refresh_runtime_client() 中调用 build_runtime_system_prompt() 重建 system prompt,使模式切换后 LLM 立即感知新模式
- [ ] [runtime.py] 验证 refresh_runtime_client() 中 bundle 对象的必要属性(cwd、extra_skill_dirs、extra_plugin_roots、include_project_memory)在调用点可用
- [ ] [测试] 编写/验证测试用例:进入 plan 模式后 system prompt 包含 plan mode 提示,退出后不再包含
- [x] [context.py] 在 build_runtime_system_prompt() 中新增 PLAN 模式感知段落 — 已完成
- [x] [runtime.py] 在 refresh_runtime_client() 中重建 system prompt — 已完成
- [x] [runtime.py] 可用性验证 — RuntimeBundle 所有必要属性均已存在 — 已完成
- [x] [测试] 编写/验证测试用例 — 所有 prompt 相关测试通过,权限模式切换逻辑验证通过
- [x] 创建 clipboard_screenshot_tool.py 工具实现
- [x] 在 __init__.py 中注册新工具
- [x] 创建单元测试 test_clipboard_screenshot_tool.py
5 changes: 5 additions & 0 deletions frontend/terminal/src/components/TranscriptPane.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ function labelFor(role: TranscriptItem['role']): string {
return 'tool>';
case 'tool_result':
return 'tool_result>';
case 'thinking':
return 'Think:';
default:
return `${role}>`;
}
Expand All @@ -41,6 +43,9 @@ function roleColor(role: TranscriptItem['role']): string | undefined {
if (role === 'assistant') {
return 'green';
}
if (role === 'thinking') {
return 'gray';
}
if (role === 'tool') {
return 'cyan';
}
Expand Down
35 changes: 35 additions & 0 deletions frontend/terminal/src/hooks/useBackendSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ export function useBackendSession(config: FrontendConfig, onExit: (code?: number
const assistantFlushTimerRef = useRef<NodeJS.Timeout | null>(null);
const pendingTranscriptItemsRef = useRef<TranscriptItem[]>([]);
const transcriptFlushTimerRef = useRef<NodeJS.Timeout | null>(null);
// Thinking content buffer to accumulate chunks into single item
const thinkingBufferRef = useRef('');
const thinkingActiveRef = useRef(false);

const flushAssistantDelta = (): void => {
const pending = pendingAssistantDeltaRef.current;
Expand Down Expand Up @@ -294,11 +297,34 @@ export function useBackendSession(config: FrontendConfig, onExit: (code?: number
}
return;
}
if (event.type === 'thinking_delta') {
const delta = event.message ?? '';
if (!delta) {
return;
}
// Accumulate thinking content instead of creating separate items
if (!thinkingActiveRef.current) {
thinkingActiveRef.current = true;
thinkingBufferRef.current = delta;
} else {
thinkingBufferRef.current += delta;
}
return;
}
if (event.type === 'assistant_delta') {
const delta = event.message ?? '';
if (!delta) {
return;
}
// Flush accumulated thinking content before first assistant delta
if (thinkingActiveRef.current) {
const thinkingContent = thinkingBufferRef.current.trim();
if (thinkingContent) {
queueTranscriptItem({role: 'thinking', text: thinkingContent});
}
thinkingActiveRef.current = false;
thinkingBufferRef.current = '';
}
const isCodexStyle = String(statusRef.current.output_style ?? 'default') === 'codex';
if (isCodexStyle) {
// Keep collecting text for assistant_complete fallback, but avoid
Expand All @@ -325,6 +351,15 @@ export function useBackendSession(config: FrontendConfig, onExit: (code?: number
assistantFlushTimerRef.current = null;
}
flushTranscriptItems();
// Flush any remaining thinking content before assistant_complete
if (thinkingActiveRef.current) {
const thinkingContent = thinkingBufferRef.current.trim();
if (thinkingContent) {
pendingTranscriptItemsRef.current.push({role: 'thinking', text: thinkingContent});
}
thinkingActiveRef.current = false;
thinkingBufferRef.current = '';
}
const isCodexStyle = String(statusRef.current.output_style ?? 'default') === 'codex';
if (isCodexStyle) {
if (pendingAssistantDeltaRef.current) {
Expand Down
2 changes: 1 addition & 1 deletion frontend/terminal/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export type FrontendConfig = {
};

export type TranscriptItem = {
role: 'system' | 'user' | 'assistant' | 'tool' | 'tool_result' | 'log' | 'status';
role: 'system' | 'user' | 'assistant' | 'thinking' | 'tool' | 'tool_result' | 'log' | 'status';
text: string;
tool_name?: string;
tool_input?: Record<string, unknown>;
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dev = [
"pytest-cov>=5.0.0",
"ruff>=0.5.0",
"mypy>=1.10.0",
"Pillow>=10.0.0",
]

[project.scripts]
Expand Down
10 changes: 9 additions & 1 deletion src/openharness/api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ApiMessageRequest:
max_tokens: int = 4096
tools: list[dict[str, Any]] = field(default_factory=list)
effort: str | None = None
show_thinking: bool = False


@dataclass(frozen=True)
Expand Down Expand Up @@ -74,7 +75,14 @@ class ApiRetryEvent:
delay_seconds: float


ApiStreamEvent = ApiTextDeltaEvent | ApiMessageCompleteEvent | ApiRetryEvent
@dataclass(frozen=True)
class ApiThinkingDeltaEvent:
"""Incremental thinking/reasoning content from the model."""

text: str


ApiStreamEvent = ApiTextDeltaEvent | ApiThinkingDeltaEvent | ApiMessageCompleteEvent | ApiRetryEvent


class SupportsStreamingMessages(Protocol):
Expand Down
1 change: 1 addition & 0 deletions src/openharness/api/copilot_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ async def stream_message(self, request: ApiMessageRequest) -> AsyncIterator[ApiS
system_prompt=request.system_prompt,
max_tokens=request.max_tokens,
tools=request.tools,
show_thinking=request.show_thinking,
)
async for event in self._inner.stream_message(patched):
yield event
88 changes: 75 additions & 13 deletions src/openharness/api/openai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
ApiRetryEvent,
ApiStreamEvent,
ApiTextDeltaEvent,
ApiThinkingDeltaEvent,
)
from openharness.api.errors import (
AuthenticationFailure,
Expand Down Expand Up @@ -334,7 +335,7 @@ async def _stream_once(self, request: ApiMessageRequest) -> AsyncIterator[ApiStr
collected_tool_calls: dict[int, dict[str, Any]] = {}
finish_reason: str | None = None
usage_data: dict[str, int] = {}
# Buffer to strip inline <think>…</think> blocks across streaming chunks.
# Buffer to strip inline blocks across streaming chunks.
_think_buf = ""

stream = await self._client.chat.completions.create(**params)
Expand All @@ -354,18 +355,32 @@ async def _stream_once(self, request: ApiMessageRequest) -> AsyncIterator[ApiStr
if chunk_finish:
finish_reason = chunk_finish

# Accumulate reasoning_content from thinking models (not shown to user)
# Accumulate reasoning_content from thinking models
reasoning_piece = getattr(delta, "reasoning_content", None) or ""
if reasoning_piece:
collected_reasoning += reasoning_piece
if request.show_thinking:
yield ApiThinkingDeltaEvent(text=reasoning_piece)

# Stream text content to user, stripping inline <think> blocks
# Stream text content to user
if delta.content:
_think_buf += delta.content
visible, _think_buf = _strip_think_blocks(_think_buf)
if visible:
collected_content += visible
yield ApiTextDeltaEvent(text=visible)
if request.show_thinking:
# Convert inline blocks into classified segments
segments, _think_buf = _convert_think_blocks_display(_think_buf)
for text, is_thinking in segments:
if not text:
continue
if is_thinking:
yield ApiThinkingDeltaEvent(text=text)
else:
collected_content += text
yield ApiTextDeltaEvent(text=text)
else:
visible, _think_buf = _strip_think_blocks(_think_buf)
if visible:
collected_content += visible
yield ApiTextDeltaEvent(text=visible)

# Accumulate tool calls
if delta.tool_calls:
Expand Down Expand Up @@ -449,32 +464,79 @@ def _translate_error(exc: Exception) -> OpenHarnessApiError:
return RequestFailure(msg)


# Matches complete <think>…</think> blocks (DOTALL so newlines are included).
_THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL)
# Matches complete blocks (DOTALL so newlines are included).
_THINK_RE = re.compile(r"<think>(.*?)</think>", re.DOTALL)
Comment on lines +467 to +468
_THINK_OPEN_TAG = "<think>"
_THINK_CLOSE_TAG = "</think>"


def _strip_think_blocks(buf: str) -> tuple[str, str]:
"""Strip complete ``<think>…</think>`` blocks and return ``(visible_text, leftover)``.
"""Strip complete ``...`` blocks and return ``(visible_text, leftover)``.

Complete pairs are removed via regex. An unclosed ``<think>`` is held in
Complete pairs are removed via regex. An unclosed ```` is held in
Comment on lines 473 to +476
*leftover* so it can be re-evaluated once the closing tag arrives in the
next streaming chunk.
"""
# Remove fully-closed blocks.
cleaned = _THINK_RE.sub("", buf)

# Hold back any unclosed <think> for the next chunk.
# Hold back any unclosed for the next chunk.
open_idx = cleaned.find(_THINK_OPEN_TAG)
if open_idx != -1:
return cleaned[:open_idx], cleaned[open_idx:]

# Streaming providers may split the opening tag itself across chunk
# boundaries (e.g. ``"<thi"`` then ``"nk>..."``). Hold back the longest
# suffix that could still become ``<think>`` on the next chunk.
# suffix that could still become ```` on the next chunk.
max_prefix = min(len(cleaned), len(_THINK_OPEN_TAG) - 1)
for prefix_len in range(max_prefix, 0, -1):
if _THINK_OPEN_TAG.startswith(cleaned[-prefix_len:]):
return cleaned[:-prefix_len], cleaned[-prefix_len:]

return cleaned, ""


def _convert_think_blocks_display(buf: str) -> tuple[list[tuple[str, bool]], str]:
"""Convert ``...`` blocks into classified segments.

Instead of stripping thinking content, this extracts it and classifies
each segment as thinking or normal text so the caller can emit the
appropriate event type.
Returns ``(segments, leftover)`` where each segment is
``(text, is_thinking)`` and *leftover* holds an unclosed ``
`` for the next streaming chunk.
"""
segments: list[tuple[str, bool]] = []
pos = 0
while True:
open_idx = buf.find(_THINK_OPEN_TAG, pos)
if open_idx == -1:
# No more opening tags; flush remaining text
remaining = buf[pos:]
# Check if the tail could be a partial opening tag
max_prefix = min(len(remaining), len(_THINK_OPEN_TAG) - 1)
for prefix_len in range(max_prefix, 0, -1):
if _THINK_OPEN_TAG.startswith(remaining[-prefix_len:]):
if remaining[:-prefix_len]:
segments.append((remaining[:-prefix_len], False))
return segments, remaining[-prefix_len:]
if remaining:
segments.append((remaining, False))
return segments, ""

# Text before the opening tag
if open_idx > pos:
segments.append((buf[pos:open_idx], False))

close_idx = buf.find(_THINK_CLOSE_TAG, open_idx + len(_THINK_OPEN_TAG))
if close_idx == -1:
# Unclosed block — hold back from the opening tag
return segments, buf[open_idx:]

# Extract thinking content
think_content = buf[open_idx + len(_THINK_OPEN_TAG):close_idx].strip()
if think_content:
segments.append((think_content, True))
pos = close_idx + len(_THINK_CLOSE_TAG)

return segments, ""
6 changes: 4 additions & 2 deletions src/openharness/autopilot/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
get_project_repo_journal_path,
get_project_verification_policy_path,
)
from openharness.engine.stream_events import AssistantTextDelta, AssistantTurnComplete, ErrorEvent
from openharness.engine.stream_events import AssistantTextDelta, AssistantThinkingDelta, AssistantTurnComplete, ErrorEvent
from openharness.swarm.worktree import WorktreeManager
from openharness.utils.fs import atomic_write_text

Expand Down Expand Up @@ -2067,7 +2067,9 @@ async def _ask(_question: str) -> str:
collected: list[str] = []
try:
async for event in bundle.engine.submit_message(prompt):
if isinstance(event, AssistantTextDelta):
if isinstance(event, AssistantThinkingDelta):
pass
elif isinstance(event, AssistantTextDelta):
collected.append(event.text)
elif isinstance(event, AssistantTurnComplete):
text = event.message.text.strip()
Expand Down
7 changes: 5 additions & 2 deletions src/openharness/channels/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from openharness.channels.bus.events import InboundMessage, OutboundMessage
from openharness.channels.bus.queue import MessageBus
from openharness.engine.stream_events import AssistantTextDelta, AssistantTurnComplete
from openharness.engine.stream_events import AssistantTextDelta, AssistantThinkingDelta, AssistantTurnComplete

if TYPE_CHECKING:
from openharness.engine.query_engine import QueryEngine
Expand Down Expand Up @@ -98,7 +98,10 @@ async def _handle(self, msg: InboundMessage) -> None:
reply_parts: list[str] = []
try:
async for event in self._engine.submit_message(msg.content):
if isinstance(event, AssistantTextDelta):
if isinstance(event, AssistantThinkingDelta):
# Thinking content is omitted from channel replies
pass
elif isinstance(event, AssistantTextDelta):
reply_parts.append(event.text)
elif isinstance(event, AssistantTurnComplete):
# Turn is done; we'll send the accumulated text below
Expand Down
8 changes: 8 additions & 0 deletions src/openharness/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2162,6 +2162,12 @@ def main(
help="Override verbose mode setting from config",
rich_help_panel="Model & Effort",
),
show_thinking: bool = typer.Option(
False,
"--show-thinking",
help="Show model thinking/reasoning process in the output",
rich_help_panel="Model & Effort",
),
max_turns: int | None = typer.Option(
None,
"--max-turns",
Expand Down Expand Up @@ -2446,6 +2452,7 @@ def main(
permission_mode=permission_mode,
max_turns=max_turns,
effort=effort,
show_thinking=show_thinking or None,
)
)
return
Expand Down Expand Up @@ -2479,5 +2486,6 @@ def main(
api_format=api_format,
permission_mode=permission_mode,
effort=effort,
show_thinking=show_thinking or None,
)
)
Loading
Loading