diff --git a/agent.py b/agent.py index 0a97816..a48bed7 100755 --- a/agent.py +++ b/agent.py @@ -24,6 +24,7 @@ LOGS_DIR = Path.home() / ".mac-code" / "logs" LOGS_DIR.mkdir(parents=True, exist_ok=True) + def log_interaction(query, intent, response, speed, grade=None, error=None): """Log every interaction for self-improvement training data.""" entry = { @@ -34,12 +35,13 @@ def log_interaction(query, intent, response, speed, grade=None, error=None): "speed": speed, "grade": grade, # "good", "bad", or None (ungraded) "error": error, - "model": get_current_model() if 'get_current_model' in dir() else "unknown", + "model": get_current_model() if "get_current_model" in dir() else "unknown", } log_file = LOGS_DIR / f"interactions-{datetime.now().strftime('%Y-%m-%d')}.jsonl" with open(log_file, "a") as f: f.write(json.dumps(entry) + "\n") + def get_failure_stats(): """Show stats from logged interactions.""" total = 0 @@ -62,6 +64,8 @@ def get_failure_stats(): pass return {"total": total, "graded": graded, "intents": intents, "errors": errors} + + PICOCLAW = os.path.expanduser("~/Desktop/qwen/picoclaw/build/picoclaw-darwin-arm64") console = Console() @@ -87,44 +91,105 @@ def get_failure_stats(): # ── smart routing ───────────────────────────────── TOOL_KEYWORDS = [ - "search", "find", "look up", "google", "what time", "when do", - "when is", "when does", "when are", "who do", "who is playing", - "who plays", "who won", "what happened", "what is the score", - "weather", "news", "latest", "schedule", "score", "tonight", - "today", "tomorrow", "yesterday", "this week", "next game", - "play next", "playing next", "results", "standings", - "price", "stock", "market", "crypto", "bitcoin", - "fetch", "download", "read file", "write file", - "create file", "run", "execute", "list files", "show me", - "open", "browse", "url", "http", "website", - "how much", "where is", "directions", "recipe", - "explore", "repo", "repository", "github", "tell me more", - "more about", "what else", "continue", "go deeper", + "search", + "find", + "look up", + "google", + "what time", + "when do", + "when is", + "when does", + "when are", + "who do", + "who is playing", + "who plays", + "who won", + "what happened", + "what is the score", + "weather", + "news", + "latest", + "schedule", + "score", + "tonight", + "today", + "tomorrow", + "yesterday", + "this week", + "next game", + "play next", + "playing next", + "results", + "standings", + "price", + "stock", + "market", + "crypto", + "bitcoin", + "fetch", + "download", + "read file", + "write file", + "create file", + "run", + "execute", + "list files", + "show me", + "open", + "browse", + "url", + "http", + "website", + "how much", + "where is", + "directions", + "recipe", + "explore", + "repo", + "repository", + "github", + "tell me more", + "more about", + "what else", + "continue", + "go deeper", ] + def classify_intent(message): """Ask LLM to classify: 'search', 'shell', or 'chat'. One fast call (~1s).""" try: - result, _ = llm_call([ - {"role": "system", "content": """Classify the user's request into exactly one category. Reply with ONLY the category word, nothing else. + result, _ = llm_call( + [ + { + "role": "system", + "content": """Classify the user's request into exactly one category. Reply with ONLY the category word, nothing else. Categories: - search: needs web search (news, scores, weather, prices, current events, looking up info online) - shell: needs filesystem or command execution (find files, list directories, read/write files, run commands, look at desktop, explore folders, check disk space, anything involving the local computer) - chat: general conversation, reasoning, math, coding questions, explanations (no tools needed) -Reply with ONLY one word: search, shell, or chat"""}, - {"role": "user", "content": message}, - ], max_tokens=5, temperature=0.0) +Reply with ONLY one word: search, shell, or chat""", + }, + {"role": "user", "content": message}, + ], + max_tokens=5, + temperature=0.0, + ) return result.strip().lower().split()[0] except Exception: return "chat" + def generate_shell_command(query, work_dir="."): """Ask LLM to generate the right shell command for a file/system task.""" home = os.path.expanduser("~") - result, _ = llm_call([ - {"role": "system", "content": f"""You are a macOS shell command generator. The user's home directory is {home}. Current working directory is {work_dir}. + result, _ = llm_call( + [ + { + "role": "system", + "content": f"""You are a macOS shell command generator. The user's home directory is {home}. Current working directory is {work_dir}. Generate a single shell command that accomplishes the user's request. Output ONLY the command, nothing else. No explanation, no markdown, no backticks. @@ -135,10 +200,15 @@ def generate_shell_command(query, work_dir="."): - "show me python files in this project" → find . -name "*.py" -type f - "read the readme" → cat README.md - "what's running on port 8000" → lsof -i :8000 -- "count lines of code" → find . -name "*.py" -exec wc -l {{}} +"""}, - {"role": "user", "content": query}, - ], max_tokens=100, temperature=0.0) - return result.strip().strip('`').strip() +- "count lines of code" → find . -name "*.py" -exec wc -l {{}} +""", + }, + {"role": "user", "content": query}, + ], + max_tokens=100, + temperature=0.0, + ) + return result.strip().strip("`").strip() + def run_smart_tool(query, work_dir="."): """Execute a shell command generated by the LLM, feed results back.""" @@ -150,8 +220,9 @@ def run_smart_tool(query, work_dir="."): # Step 2: Execute it try: - result = sp.run(cmd, shell=True, capture_output=True, text=True, - timeout=30, cwd=work_dir) + result = sp.run( + cmd, shell=True, capture_output=True, text=True, timeout=30, cwd=work_dir + ) output = result.stdout[:8000] if result.stderr: output += f"\n{result.stderr[:2000]}" @@ -162,13 +233,23 @@ def run_smart_tool(query, work_dir="."): # Step 3: LLM summarizes results (~2-3s) today = datetime.now().strftime("%A, %B %d, %Y") - content, timings = llm_call([ - {"role": "system", "content": f"Today is {today}. You ran a shell command and got results. Present the results clearly to the user. If it's a file listing, format it nicely. If it's code, use formatting. Be helpful and concise."}, - {"role": "user", "content": f"Command: {cmd}\nOutput:\n{output}\n\nOriginal question: {query}"}, - ], max_tokens=1000) + content, timings = llm_call( + [ + { + "role": "system", + "content": f"Today is {today}. You ran a shell command and got results. Present the results clearly to the user. If it's a file listing, format it nicely. If it's code, use formatting. Be helpful and concise.", + }, + { + "role": "user", + "content": f"Command: {cmd}\nOutput:\n{output}\n\nOriginal question: {query}", + }, + ], + max_tokens=1000, + ) return content, timings.get("predicted_per_second", 0), cmd + def run_file_tool(query, work_dir="."): """Execute file/exec operations directly in Python, feed results to LLM.""" import subprocess as sp @@ -193,10 +274,13 @@ def run_file_tool(query, work_dir="."): tool_name = f"list_dir({path})" tool_output = "\n".join(entries[:50]) if len(entries) > 50: - tool_output += f"\n... and {len(entries)-50} more" + tool_output += f"\n... and {len(entries) - 50} more" # Read file - elif any(kw in lower for kw in ["read file", "show me", "look at", "cat ", "what's in"]): + elif any( + kw in lower + for kw in ["read file", "show me", "look at", "cat ", "what's in"] + ): # Find file path in the query path = None for token in query.split(): @@ -219,13 +303,30 @@ def run_file_tool(query, work_dir="."): tool_name = "read_file(not found)" # Write file - elif any(kw in lower for kw in ["write file", "write a file", "create file", "create a file", - "create a new", "save file", "save to", "save this"]): + elif any( + kw in lower + for kw in [ + "write file", + "write a file", + "create file", + "create a file", + "create a new", + "save file", + "save to", + "save this", + ] + ): # LLM decides what to write - content, _ = llm_call([ - {"role": "system", "content": "The user wants to create/write a file. Generate ONLY the file content. No explanations."}, - {"role": "user", "content": query}, - ], max_tokens=2000) + content, _ = llm_call( + [ + { + "role": "system", + "content": "The user wants to create/write a file. Generate ONLY the file content. No explanations.", + }, + {"role": "user", "content": query}, + ], + max_tokens=2000, + ) # Extract filename from query filename = None @@ -248,11 +349,17 @@ def run_file_tool(query, work_dir="."): cmd = query for prefix in ["execute ", "run "]: if lower.startswith(prefix): - cmd = query[len(prefix):] + cmd = query[len(prefix) :] break - result = sp.run(cmd, shell=True, capture_output=True, text=True, - timeout=30, cwd=work_dir) + result = sp.run( + cmd, + shell=True, + capture_output=True, + text=True, + timeout=30, + cwd=work_dir, + ) tool_name = f"exec({cmd.strip()[:40]})" tool_output = result.stdout[:5000] if result.stderr: @@ -267,21 +374,33 @@ def run_file_tool(query, work_dir="."): # Feed tool output to LLM for final answer today = datetime.now().strftime("%A, %B %d, %Y") - content, timings = llm_call([ - {"role": "system", "content": f"Today is {today}. You executed a tool and got results. Summarize the results clearly for the user. If it's code, format it nicely."}, - {"role": "user", "content": f"Tool: {tool_name}\nResult:\n{tool_output}\n\nOriginal question: {query}"}, - ], max_tokens=1000) + content, timings = llm_call( + [ + { + "role": "system", + "content": f"Today is {today}. You executed a tool and got results. Summarize the results clearly for the user. If it's code, format it nicely.", + }, + { + "role": "user", + "content": f"Tool: {tool_name}\nResult:\n{tool_output}\n\nOriginal question: {query}", + }, + ], + max_tokens=1000, + ) return content, timings.get("predicted_per_second", 0), tool_name + def llm_call(messages, max_tokens=300, temperature=0.1): """Single LLM call, returns content + timings.""" - payload = json.dumps({ - "model": "local", - "messages": messages, - "max_tokens": max_tokens, - "temperature": temperature, - }).encode() + payload = json.dumps( + { + "model": "local", + "messages": messages, + "max_tokens": max_tokens, + "temperature": temperature, + } + ).encode() req = urllib.request.Request( f"{SERVER}/v1/chat/completions", data=payload, @@ -290,6 +409,7 @@ def llm_call(messages, max_tokens=300, temperature=0.1): d = json.loads(urllib.request.urlopen(req, timeout=60).read()) return d["choices"][0]["message"]["content"], d.get("timings", {}) + def quick_search(query): """LLM rewrites query → DuckDuckGo search → LLM answers. ~5-8s total.""" try: @@ -301,15 +421,23 @@ def quick_search(query): return None from datetime import datetime + today = datetime.now().strftime("%A, %B %d, %Y") # Step 1: LLM rewrites query into optimal search terms (~1s) try: - search_query, _ = llm_call([ - {"role": "system", "content": f"Today is {today}. Rewrite the user's question into an optimal web search query that will find current, specific data (not articles about announcements). Include 'today' or 'tonight' and the full date for time-sensitive queries. Add words like 'scores', 'results', 'live', or 'now' when looking for current data. Output ONLY the search query string, nothing else."}, - {"role": "user", "content": query}, - ], max_tokens=30, temperature=0.0) - search_query = search_query.strip().strip('"\'') + search_query, _ = llm_call( + [ + { + "role": "system", + "content": f"Today is {today}. Rewrite the user's question into an optimal web search query that will find current, specific data (not articles about announcements). Include 'today' or 'tonight' and the full date for time-sensitive queries. Add words like 'scores', 'results', 'live', or 'now' when looking for current data. Output ONLY the search query string, nothing else.", + }, + {"role": "user", "content": query}, + ], + max_tokens=30, + temperature=0.0, + ) + search_query = search_query.strip().strip("\"'") except Exception: search_query = query @@ -333,17 +461,23 @@ def quick_search(query): return None # Combine all snippets - snippets = "\n".join([f"- {r.get('title','')}: {r.get('body','')}" for r in all_results]) + snippets = "\n".join( + [f"- {r.get('title', '')}: {r.get('body', '')}" for r in all_results] + ) # Check if snippets actually contain useful data or just meta descriptions # If total snippet text is mostly generic, fetch the best page import re as _re + page_content = "" snippet_words = len(snippets.split()) # Heuristic: check if snippets have actual specific data # Numbers with context (times, scores, prices) count. Generic "live scores available" doesn't. - specific_patterns = _re.findall(r'\d{1,2}:\d{2}\s*(?:p\.m\.|a\.m\.|ET|PT)|\$[\d,.]+|\d+-\d+(?:\s*(?:win|loss|final))', snippets.lower()) + specific_patterns = _re.findall( + r"\d{1,2}:\d{2}\s*(?:p\.m\.|a\.m\.|ET|PT)|\$[\d,.]+|\d+-\d+(?:\s*(?:win|loss|final))", + snippets.lower(), + ) has_specifics = len(specific_patterns) >= 2 # need at least 2 specific data points if not has_specifics and all_results: @@ -355,10 +489,13 @@ def quick_search(query): continue try: jina_url = f"https://r.jina.ai/{url}" - req = urllib.request.Request(jina_url, headers={ - "User-Agent": "Mozilla/5.0", - "Accept": "text/plain", - }) + req = urllib.request.Request( + jina_url, + headers={ + "User-Agent": "Mozilla/5.0", + "Accept": "text/plain", + }, + ) with urllib.request.urlopen(req, timeout=10) as resp: text = resp.read(6000).decode("utf-8", errors="ignore") if len(text) > 200: @@ -372,13 +509,23 @@ def quick_search(query): context += f"\n\nDetailed content from top result:\n{page_content}" # Step 3: LLM answers using results (~2-3s) - content, timings = llm_call([ - {"role": "system", "content": f"Today is {today}. Answer the user's question using the search results below. Be specific, direct, and detailed. Extract dates, times, scores, names, numbers, prices, and facts. Present them clearly."}, - {"role": "user", "content": f"Search results:\n\n{context}\n\nQuestion: {query}"}, - ], max_tokens=1000) + content, timings = llm_call( + [ + { + "role": "system", + "content": f"Today is {today}. Answer the user's question using the search results below. Be specific, direct, and detailed. Extract dates, times, scores, names, numbers, prices, and facts. Present them clearly.", + }, + { + "role": "user", + "content": f"Search results:\n\n{context}\n\nQuestion: {query}", + }, + ], + max_tokens=1000, + ) return content, timings.get("predicted_per_second", 0) + def get_current_model(): """Check which model the running server has loaded.""" try: @@ -394,6 +541,7 @@ def get_current_model(): pass return None + def swap_model(target_key): """Stop current server and start a new one with the target model.""" cfg = MODELS[target_key] @@ -407,10 +555,14 @@ def swap_model(target_key): # Start new server cmd_list = [ "llama-server", - "--model", cfg["path"], - "--port", "8000", - "--host", "127.0.0.1", - "--ctx-size", str(cfg["ctx"]), + "--model", + cfg["path"], + "--port", + "8000", + "--host", + "127.0.0.1", + "--ctx-size", + str(cfg["ctx"]), ] + cfg["flags"].split() subprocess.Popen(cmd_list, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) @@ -428,14 +580,19 @@ def swap_model(target_key): return False, "Server failed to start" + # ── ANSI strip ───────────────────────────────────── -ANSI_RE = re.compile(r'\x1b\[[0-9;]*m|\r') +ANSI_RE = re.compile(r"\x1b\[[0-9;]*m|\r") + + def strip_ansi(text): - return ANSI_RE.sub('', text) + return ANSI_RE.sub("", text) + # ── live working display ────────────────────────── DOTS = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] + class WorkingDisplay: def __init__(self): self.events = [] @@ -480,7 +637,18 @@ def add_log(self, line): self.events.append((time.time() - self.start_time, new_phase, detail)) # Keep last few interesting log lines - if any(k in lower for k in ["llm_request", "tool_call", "tool_result", "turn_end", "web_search", "fetch", "exec"]): + if any( + k in lower + for k in [ + "llm_request", + "tool_call", + "tool_result", + "turn_end", + "web_search", + "fetch", + "exec", + ] + ): short = clean if ">" in short: short = short.split(">", 1)[-1].strip() @@ -506,6 +674,7 @@ def render(self): return t + # ── detect model ─────────────────────────────────── def detect_model(): try: @@ -521,15 +690,18 @@ def detect_model(): except Exception: return "offline", "" + # ── streaming chat (raw mode) ───────────────────── def stream_llm(messages): - payload = json.dumps({ - "model": "local", - "messages": messages, - "max_tokens": 4096, - "temperature": 0.7, - "stream": True, - }).encode() + payload = json.dumps( + { + "model": "local", + "messages": messages, + "max_tokens": 4096, + "temperature": 0.7, + "stream": True, + } + ).encode() req = urllib.request.Request( f"{SERVER}/v1/chat/completions", @@ -544,10 +716,10 @@ def stream_llm(messages): with urllib.request.urlopen(req, timeout=300) as resp: buf = "" while True: - ch = resp.read(1) - if not ch: + chunk = resp.read(4096) + if not chunk: break - buf += ch.decode("utf-8", errors="replace") + buf += chunk.decode("utf-8", errors="replace") while "\n" in buf: line, buf = buf.split("\n", 1) line = line.strip() @@ -569,6 +741,7 @@ def stream_llm(messages): return full, tokens, time.time() - start + # ── picoclaw agent call with LIVE log streaming ─── def picoclaw_call_live(message, session="mac-code"): """Run picoclaw with real-time log streaming into animated display.""" @@ -578,8 +751,7 @@ def picoclaw_call_live(message, session="mac-code"): # Launch with Popen — picoclaw writes everything to stdout proc = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, bufsize=1 + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 ) # Read stdout line-by-line in a thread for real-time updates @@ -595,7 +767,9 @@ def read_output(): reader.start() # Animate while process runs - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), console=console, refresh_per_second=8, transient=True + ) as live: while proc.poll() is None: live.update(display.render()) time.sleep(0.12) @@ -633,6 +807,7 @@ def read_output(): return response, display.events + # ── banner ───────────────────────────────────────── def print_banner(model_name, model_detail): console.print() @@ -663,7 +838,10 @@ def print_banner(model_name, model_detail): console.print() console.print(Rule(style="dim")) - console.print(" [dim]type [bold bright_cyan]/[/bold bright_cyan] to see all commands[/]\n") + console.print( + " [dim]type [bold bright_cyan]/[/bold bright_cyan] to see all commands[/]\n" + ) + # ── render helpers ───────────────────────────────── def render_response(response): @@ -674,6 +852,7 @@ def render_response(response): for line in response.split("\n"): console.print(f" {line}") + def render_speed(tokens, elapsed): if elapsed <= 0 or tokens <= 0: return @@ -684,6 +863,7 @@ def render_speed(tokens, elapsed): s.append(f" · {tokens} tokens · {elapsed:.1f}s", style="dim") console.print(s) + def render_timeline(events): """Show a compact summary of what the agent did.""" if not events: @@ -706,33 +886,35 @@ def render_timeline(events): t.append(" → ", style="dim") console.print(t) + # ── commands ─────────────────────────────────────── COMMANDS = [ - ("/agent", "Switch to agent mode (tools + web search)"), - ("/raw", "Switch to raw mode (direct streaming, no tools)"), - ("/btw", "Ask a side question without adding to conversation history"), - ("/loop", "Run a prompt on a recurring interval — /loop 5m "), - ("/branch", "Save conversation checkpoint you can restore later"), - ("/restore", "Restore last saved conversation checkpoint"), - ("/add-dir", "Set working directory — /add-dir "), - ("/save", "Save conversation to a file — /save "), - ("/search", "Quick web search — /search "), - ("/bench", "Run a quick speed benchmark"), - ("/clear", "Clear conversation and start fresh"), - ("/stats", "Show session statistics"), - ("/model", "Show or switch model — /model 9b or /model 35b"), - ("/auto", "Toggle smart auto-routing between 9B and 35B"), - ("/tools", "List available agent tools"), - ("/system", "Set system prompt — /system "), - ("/compact", "Toggle compact output (no markdown rendering)"), - ("/stop", "Stop a running /loop"), - ("/cost", "Show estimated cost savings vs cloud APIs"), - ("/good", "Grade last response as good (for self-improvement)"), - ("/bad", "Grade last response as bad (for self-improvement)"), - ("/improve", "Show self-improvement stats from logged interactions"), - ("/quit", "Exit mac code"), + ("/agent", "Switch to agent mode (tools + web search)"), + ("/raw", "Switch to raw mode (direct streaming, no tools)"), + ("/btw", "Ask a side question without adding to conversation history"), + ("/loop", "Run a prompt on a recurring interval — /loop 5m "), + ("/branch", "Save conversation checkpoint you can restore later"), + ("/restore", "Restore last saved conversation checkpoint"), + ("/add-dir", "Set working directory — /add-dir "), + ("/save", "Save conversation to a file — /save "), + ("/search", "Quick web search — /search "), + ("/bench", "Run a quick speed benchmark"), + ("/clear", "Clear conversation and start fresh"), + ("/stats", "Show session statistics"), + ("/model", "Show or switch model — /model 9b or /model 35b"), + ("/auto", "Toggle smart auto-routing between 9B and 35B"), + ("/tools", "List available agent tools"), + ("/system", "Set system prompt — /system "), + ("/compact", "Toggle compact output (no markdown rendering)"), + ("/stop", "Stop a running /loop"), + ("/cost", "Show estimated cost savings vs cloud APIs"), + ("/good", "Grade last response as good (for self-improvement)"), + ("/bad", "Grade last response as bad (for self-improvement)"), + ("/improve", "Show self-improvement stats from logged interactions"), + ("/quit", "Exit mac code"), ] + def show_slash_menu(filter_text=""): """Print slash commands inline — like Claude Code.""" matches = COMMANDS @@ -747,6 +929,7 @@ def show_slash_menu(filter_text=""): line.append(desc, style="dim") console.print(line) + # ── main ─────────────────────────────────────────── def main(): model_name, model_detail = detect_model() @@ -819,10 +1002,17 @@ def main(): if len(parts) >= 2: target = parts[1].lower().replace("b", "b") if target in MODELS: - console.print(f" [dim]swapping to {MODELS[target]['name']}...[/]") + console.print( + f" [dim]swapping to {MODELS[target]['name']}...[/]" + ) display = WorkingDisplay() display.phase = f"loading {MODELS[target]['name']}" - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), + console=console, + refresh_per_second=8, + transient=True, + ) as live: ok, msg = swap_model(target) while not ok and display.frame < 100: display.frame += 1 @@ -839,8 +1029,12 @@ def main(): else: cur = get_current_model() model_name, model_detail = detect_model() - console.print(f" [bold white]{model_name}[/] [dim]{model_detail}[/]") - console.print(f" [dim]auto-routing: {'on' if auto_route else 'off'}[/]") + console.print( + f" [bold white]{model_name}[/] [dim]{model_detail}[/]" + ) + console.print( + f" [dim]auto-routing: {'on' if auto_route else 'off'}[/]" + ) console.print(f" [dim]switch: /model 9b or /model 35b[/]\n") continue @@ -855,10 +1049,14 @@ def main(): continue elif exact == "/tools": for name, desc in [ - ("web_search", "DuckDuckGo"), ("web_fetch", "read URLs"), - ("exec", "shell commands"), ("read_file", "local files"), - ("write_file", "create files"), ("edit_file", "modify files"), - ("list_dir", "browse dirs"), ("subagent", "spawn tasks"), + ("web_search", "DuckDuckGo"), + ("web_fetch", "read URLs"), + ("exec", "shell commands"), + ("read_file", "local files"), + ("write_file", "create files"), + ("edit_file", "modify files"), + ("list_dir", "browse dirs"), + ("subagent", "spawn tasks"), ]: t = Text() t.append(" ▸ ", style="bright_cyan") @@ -883,13 +1081,17 @@ def main(): elif exact == "/branch": branch_save = [m.copy() for m in messages] - console.print(f" [dim]conversation saved ({len(messages)} messages). use /restore to go back.[/]\n") + console.print( + f" [dim]conversation saved ({len(messages)} messages). use /restore to go back.[/]\n" + ) continue elif exact == "/restore": if branch_save is not None: messages = [m.copy() for m in branch_save] - console.print(f" [dim]restored to checkpoint ({len(messages)} messages)[/]\n") + console.print( + f" [dim]restored to checkpoint ({len(messages)} messages)[/]\n" + ) else: console.print(" [dim]no checkpoint saved. use /branch first.[/]\n") continue @@ -897,13 +1099,22 @@ def main(): elif exact == "/bench": console.print(" [dim]running speed benchmark...[/]") try: - payload = json.dumps({ - "model": "local", - "messages": [{"role": "user", "content": "Count from 1 to 50, one number per line."}], - "max_tokens": 300, "temperature": 0.1, - }).encode() + payload = json.dumps( + { + "model": "local", + "messages": [ + { + "role": "user", + "content": "Count from 1 to 50, one number per line.", + } + ], + "max_tokens": 300, + "temperature": 0.1, + } + ).encode() req = urllib.request.Request( - f"{SERVER}/v1/chat/completions", data=payload, + f"{SERVER}/v1/chat/completions", + data=payload, headers={"Content-Type": "application/json"}, ) bstart = time.time() @@ -915,8 +1126,12 @@ def main(): gen_speed = t.get("predicted_per_second", 0) prompt_speed = t.get("prompt_per_second", 0) tokens = u.get("completion_tokens", 0) - console.print(f" [bold bright_green]{gen_speed:.1f} tok/s[/] generation") - console.print(f" [bold bright_green]{prompt_speed:.1f} tok/s[/] prompt processing") + console.print( + f" [bold bright_green]{gen_speed:.1f} tok/s[/] generation" + ) + console.print( + f" [bold bright_green]{prompt_speed:.1f} tok/s[/] prompt processing" + ) console.print(f" [dim]{tokens} tokens in {belapsed:.1f}s[/]\n") except Exception as e: console.print(f" [bold red]benchmark failed: {e}[/]\n") @@ -925,10 +1140,14 @@ def main(): elif exact == "/cost": cloud_rate = 0.34 # $/hr RunPod equivalent hours = session_time / 3600 if session_time > 0 else 0 - saved = cloud_rate * max(hours, 1/60) + saved = cloud_rate * max(hours, 1 / 60) console.print(f" [bold bright_green]$0.00[/] spent locally") - console.print(f" [dim]~${saved:.4f} would have cost on cloud GPU (${cloud_rate}/hr)[/]") - console.print(f" [dim]session: {session_time:.0f}s · {session_tokens:,} tokens[/]\n") + console.print( + f" [dim]~${saved:.4f} would have cost on cloud GPU (${cloud_rate}/hr)[/]" + ) + console.print( + f" [dim]session: {session_time:.0f}s · {session_tokens:,} tokens[/]\n" + ) continue elif exact == "/good": @@ -946,7 +1165,9 @@ def main(): if last_interaction: last_interaction["grade"] = "bad" log_interaction(**last_interaction) - console.print(" [bright_red]marked bad — logged for improvement[/]\n") + console.print( + " [bright_red]marked bad — logged for improvement[/]\n" + ) else: console.print(" [dim]no response to grade[/]\n") continue @@ -996,7 +1217,9 @@ def main(): if use_agent: start = time.time() # Use a separate session so it doesn't pollute main conversation - response, events = picoclaw_call_live(side_q, session=f"btw-{int(time.time())}") + response, events = picoclaw_call_live( + side_q, session=f"btw-{int(time.time())}" + ) elapsed = time.time() - start if response: console.print(f" [dim italic](side answer)[/]") @@ -1009,12 +1232,17 @@ def main(): else: side_msgs = [{"role": "user", "content": side_q}] try: - payload = json.dumps({ - "model": "local", "messages": side_msgs, - "max_tokens": 2000, "temperature": 0.7, - }).encode() + payload = json.dumps( + { + "model": "local", + "messages": side_msgs, + "max_tokens": 2000, + "temperature": 0.7, + } + ).encode() req = urllib.request.Request( - f"{SERVER}/v1/chat/completions", data=payload, + f"{SERVER}/v1/chat/completions", + data=payload, headers={"Content-Type": "application/json"}, ) with urllib.request.urlopen(req, timeout=120) as resp: @@ -1045,13 +1273,17 @@ def main(): try: save_path = os.path.join(work_dir, filename) with open(save_path, "w") as f: - json.dump({ - "messages": messages, - "session_id": session_id, - "tokens": session_tokens, - "time": session_time, - "turns": session_turns, - }, f, indent=2) + json.dump( + { + "messages": messages, + "session_id": session_id, + "tokens": session_tokens, + "time": session_time, + "turns": session_turns, + }, + f, + indent=2, + ) console.print(f" [dim]saved to {save_path}[/]\n") except Exception as e: console.print(f" [bold red]{e}[/]\n") @@ -1066,7 +1298,7 @@ def main(): start = time.time() response, events = picoclaw_call_live( f"Search the web for: {query}. Give a brief summary of the top results.", - session=f"search-{int(time.time())}" + session=f"search-{int(time.time())}", ) elapsed = time.time() - start if response: @@ -1086,7 +1318,9 @@ def main(): # Parse: /loop 5m parts = cmd[6:].strip().split(None, 1) if len(parts) < 2: - console.print(" [dim]/loop — e.g. /loop 5m check server status[/]\n") + console.print( + " [dim]/loop — e.g. /loop 5m check server status[/]\n" + ) continue interval_str, loop_prompt = parts @@ -1119,7 +1353,9 @@ def run_loop(prompt, interval, sid): time.sleep(interval) if not loop_running: break - console.print(f"\n [dim italic]loop: running '{prompt[:40]}...'[/]") + console.print( + f"\n [dim italic]loop: running '{prompt[:40]}...'[/]" + ) resp, _ = picoclaw_call_live(prompt, session=sid) if resp: for line in resp.split("\n"): @@ -1129,7 +1365,7 @@ def run_loop(prompt, interval, sid): loop_thread = threading.Thread( target=run_loop, args=(loop_prompt, interval_sec, f"loop-{session_id}"), - daemon=True + daemon=True, ) loop_thread.start() continue @@ -1159,7 +1395,9 @@ def do_classify(): cls_thread = threading.Thread(target=do_classify, daemon=True) cls_thread.start() - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), console=console, refresh_per_second=8, transient=True + ) as live: while cls_thread.is_alive(): display.frame += 1 live.update(display.render()) @@ -1184,7 +1422,12 @@ def do_tool(): tool_thread = threading.Thread(target=do_tool, daemon=True) tool_thread.start() - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), + console=console, + refresh_per_second=8, + transient=True, + ) as live: while tool_thread.is_alive(): display.frame += 1 t = time.time() - start @@ -1217,7 +1460,12 @@ def do_tool(): session_tokens += len(response.split()) session_time += elapsed session_turns += 1 - last_interaction = {"query": user_input, "intent": "shell", "response": response, "speed": speed} + last_interaction = { + "query": user_input, + "intent": "shell", + "response": response, + "speed": speed, + } messages.append({"role": "user", "content": user_input}) messages.append({"role": "assistant", "content": response}) else: @@ -1238,7 +1486,12 @@ def do_search(): search_thread = threading.Thread(target=do_search, daemon=True) search_thread.start() - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), + console=console, + refresh_per_second=8, + transient=True, + ) as live: while search_thread.is_alive(): display.frame += 1 t = time.time() - start @@ -1271,7 +1524,12 @@ def do_search(): session_turns += 1 messages.append({"role": "user", "content": user_input}) messages.append({"role": "assistant", "content": response}) - last_interaction = {"query": user_input, "intent": "search", "response": response, "speed": speed} + last_interaction = { + "query": user_input, + "intent": "search", + "response": response, + "speed": speed, + } else: # Search failed, fall back to direct LLM console.print(" [dim]search failed, asking model directly...[/]") @@ -1280,7 +1538,12 @@ def do_search(): tokens = 0 first_token = True display.phase = "thinking" - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), + console=console, + refresh_per_second=8, + transient=True, + ) as live: gen = stream_llm(messages) for chunk in gen: if isinstance(chunk, str): @@ -1307,7 +1570,12 @@ def do_search(): display = WorkingDisplay() display.phase = "thinking" try: - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), + console=console, + refresh_per_second=8, + transient=True, + ) as live: gen = stream_llm(messages) for chunk in gen: if isinstance(chunk, str): @@ -1342,7 +1610,12 @@ def do_search(): display.phase = "thinking" first_token = True - with Live(display.render(), console=console, refresh_per_second=8, transient=True) as live: + with Live( + display.render(), + console=console, + refresh_per_second=8, + transient=True, + ) as live: gen = stream_llm(messages) for chunk in gen: if isinstance(chunk, str): @@ -1355,7 +1628,9 @@ def do_search(): tokens += 1 elapsed = time.time() - start - if not compact_mode and any(c in full for c in ["##", "**", "```", "- ", "1. "]): + if not compact_mode and any( + c in full for c in ["##", "**", "```", "- ", "1. "] + ): console.print("\n") console.print(Padding(Markdown(full), (0, 2))) else: @@ -1383,5 +1658,6 @@ def do_search(): ) console.print() + if __name__ == "__main__": main()