From 43479bff3ce562fdac47e4910e4f61d38ac373ca Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Sat, 23 May 2026 21:18:49 +0530 Subject: [PATCH 1/6] restructering codebase --- Auto_Use/macOS_use/agent/__init__.py | 4 ++-- Auto_Use/macOS_use/agent/{cli => coder}/__init__.py | 0 Auto_Use/macOS_use/agent/{cli => coder}/__main__.py | 8 ++++---- Auto_Use/macOS_use/agent/{cli => coder}/service.py | 0 .../macOS_use/agent/{cli => coder}/system_prompt.md | 0 Auto_Use/macOS_use/agent/{cli => coder}/view.py | 0 Auto_Use/macOS_use/agent/main_driver/__init__.py | 5 +++++ .../macOS_use/agent/{ => main_driver}/service.py | 12 ++++++------ .../agent/{ => main_driver}/system_prompt.md | 0 Auto_Use/macOS_use/agent/{ => main_driver}/view.py | 0 .../cli => macOS_use/agent}/minions/__init__.py | 4 ++-- .../macOS_use/agent/{cli => }/minions/__main__.py | 8 ++++---- .../macOS_use/agent/{cli => }/minions/service.py | 4 ++-- .../agent/{cli => }/minions/system_prompt.md | 0 Auto_Use/macOS_use/agent/{cli => }/minions/view.py | 0 .../agent/{domain_knowledge => skills}/__init__.py | 0 .../agent/{domain_knowledge => skills}/browser.md | 0 .../{domain_knowledge => skills}/google_colab.md | 0 .../{domain_knowledge => skills}/google_services.md | 0 .../{domain_knowledge => skills}/libreoffice_calc.md | 0 .../microsoft_services.md | 0 .../agent/{domain_knowledge => skills}/service.py | 8 ++++---- .../domain_knowledge.json => skills/skills.json} | 0 .../agent/{domain_knowledge => skills}/skyscanner.md | 0 .../agent/{domain_knowledge => skills}/wikipedia.md | 0 Auto_Use/macOS_use/controller/view.py | 4 ++-- .../macOS_use/remote_connection/telegram/service.py | 2 +- Auto_Use/windows_use/agent/__init__.py | 4 ++-- .../windows_use/agent/{cli => coder}/__init__.py | 0 .../windows_use/agent/{cli => coder}/__main__.py | 8 ++++---- Auto_Use/windows_use/agent/{cli => coder}/service.py | 0 .../agent/{cli => coder}/system_prompt.md | 0 Auto_Use/windows_use/agent/{cli => coder}/view.py | 0 Auto_Use/windows_use/agent/main_driver/__init__.py | 5 +++++ .../windows_use/agent/{ => main_driver}/service.py | 12 ++++++------ .../agent/{ => main_driver}/system_prompt.md | 0 Auto_Use/windows_use/agent/{ => main_driver}/view.py | 0 .../cli => windows_use/agent}/minions/__init__.py | 4 ++-- .../windows_use/agent/{cli => }/minions/__main__.py | 8 ++++---- .../windows_use/agent/{cli => }/minions/service.py | 4 ++-- .../agent/{cli => }/minions/system_prompt.md | 2 +- Auto_Use/windows_use/agent/{cli => }/minions/view.py | 0 .../agent/{domain_knowledge => skills}/__init__.py | 0 .../agent/{domain_knowledge => skills}/browser.md | 0 .../{domain_knowledge => skills}/google_colab.md | 0 .../{domain_knowledge => skills}/google_services.md | 0 .../{domain_knowledge => skills}/libreoffice_calc.md | 0 .../microsoft_services.md | 0 .../agent/{domain_knowledge => skills}/service.py | 8 ++++---- .../domain_knowledge.json => skills/skills.json} | 0 .../agent/{domain_knowledge => skills}/skyscanner.md | 0 .../agent/{domain_knowledge => skills}/wikipedia.md | 0 Auto_Use/windows_use/controller/view.py | 4 ++-- .../remote_connection/telegram/service.py | 2 +- README.md | 4 ++-- app.py | 6 +++--- cli.py | 4 ++-- main.py | 4 ++-- 58 files changed, 74 insertions(+), 64 deletions(-) rename Auto_Use/macOS_use/agent/{cli => coder}/__init__.py (100%) rename Auto_Use/macOS_use/agent/{cli => coder}/__main__.py (93%) rename Auto_Use/macOS_use/agent/{cli => coder}/service.py (100%) rename Auto_Use/macOS_use/agent/{cli => coder}/system_prompt.md (100%) rename Auto_Use/macOS_use/agent/{cli => coder}/view.py (100%) create mode 100644 Auto_Use/macOS_use/agent/main_driver/__init__.py rename Auto_Use/macOS_use/agent/{ => main_driver}/service.py (99%) rename Auto_Use/macOS_use/agent/{ => main_driver}/system_prompt.md (100%) rename Auto_Use/macOS_use/agent/{ => main_driver}/view.py (100%) rename Auto_Use/{windows_use/agent/cli => macOS_use/agent}/minions/__init__.py (96%) rename Auto_Use/macOS_use/agent/{cli => }/minions/__main__.py (91%) rename Auto_Use/macOS_use/agent/{cli => }/minions/service.py (99%) rename Auto_Use/macOS_use/agent/{cli => }/minions/system_prompt.md (100%) rename Auto_Use/macOS_use/agent/{cli => }/minions/view.py (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/__init__.py (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/browser.md (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/google_colab.md (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/google_services.md (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/libreoffice_calc.md (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/microsoft_services.md (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/service.py (96%) rename Auto_Use/macOS_use/agent/{domain_knowledge/domain_knowledge.json => skills/skills.json} (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/skyscanner.md (100%) rename Auto_Use/macOS_use/agent/{domain_knowledge => skills}/wikipedia.md (100%) rename Auto_Use/windows_use/agent/{cli => coder}/__init__.py (100%) rename Auto_Use/windows_use/agent/{cli => coder}/__main__.py (93%) rename Auto_Use/windows_use/agent/{cli => coder}/service.py (100%) rename Auto_Use/windows_use/agent/{cli => coder}/system_prompt.md (100%) rename Auto_Use/windows_use/agent/{cli => coder}/view.py (100%) create mode 100644 Auto_Use/windows_use/agent/main_driver/__init__.py rename Auto_Use/windows_use/agent/{ => main_driver}/service.py (99%) rename Auto_Use/windows_use/agent/{ => main_driver}/system_prompt.md (100%) rename Auto_Use/windows_use/agent/{ => main_driver}/view.py (100%) rename Auto_Use/{macOS_use/agent/cli => windows_use/agent}/minions/__init__.py (96%) rename Auto_Use/windows_use/agent/{cli => }/minions/__main__.py (91%) rename Auto_Use/windows_use/agent/{cli => }/minions/service.py (99%) rename Auto_Use/windows_use/agent/{cli => }/minions/system_prompt.md (99%) rename Auto_Use/windows_use/agent/{cli => }/minions/view.py (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/__init__.py (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/browser.md (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/google_colab.md (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/google_services.md (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/libreoffice_calc.md (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/microsoft_services.md (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/service.py (96%) rename Auto_Use/windows_use/agent/{domain_knowledge/domain_knowledge.json => skills/skills.json} (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/skyscanner.md (100%) rename Auto_Use/windows_use/agent/{domain_knowledge => skills}/wikipedia.md (100%) diff --git a/Auto_Use/macOS_use/agent/__init__.py b/Auto_Use/macOS_use/agent/__init__.py index 4ae0514..1d5ddd6 100644 --- a/Auto_Use/macOS_use/agent/__init__.py +++ b/Auto_Use/macOS_use/agent/__init__.py @@ -18,7 +18,7 @@ # community — thank you for contributing. # Auto_Use/macOS_use/agent/__init__.py -from .service import AgentService -from .view import AgentResponseFormatter +from .main_driver.service import AgentService +from .main_driver.view import AgentResponseFormatter __all__ = ['AgentService', 'AgentResponseFormatter'] \ No newline at end of file diff --git a/Auto_Use/macOS_use/agent/cli/__init__.py b/Auto_Use/macOS_use/agent/coder/__init__.py similarity index 100% rename from Auto_Use/macOS_use/agent/cli/__init__.py rename to Auto_Use/macOS_use/agent/coder/__init__.py diff --git a/Auto_Use/macOS_use/agent/cli/__main__.py b/Auto_Use/macOS_use/agent/coder/__main__.py similarity index 93% rename from Auto_Use/macOS_use/agent/cli/__main__.py rename to Auto_Use/macOS_use/agent/coder/__main__.py index 1b8e1aa..8b09fac 100644 --- a/Auto_Use/macOS_use/agent/cli/__main__.py +++ b/Auto_Use/macOS_use/agent/coder/__main__.py @@ -23,7 +23,7 @@ This module allows the CLI agent to be run as a subprocess. Usage: - python -m Auto_Use.macOS_use.agent.cli --task "your task here" + python -m Auto_Use.macOS_use.agent.coder --task "your task here" Options: --task : Required. The task for CLI agent to execute @@ -37,7 +37,7 @@ - Result is written to --result file when done When called directly for testing: - - Run: python -m Auto_Use.macOS_use.agent.cli --task "test task" + - Run: python -m Auto_Use.macOS_use.agent.coder --task "test task" - Or use cli.py at project root """ @@ -61,8 +61,8 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - python -m Auto_Use.macOS_use.agent.cli --task "fix the bug in test.py" - python -m Auto_Use.macOS_use.agent.cli --task "create hello world" --provider openrouter --model gemini-3.5-flash + python -m Auto_Use.macOS_use.agent.coder --task "fix the bug in test.py" + python -m Auto_Use.macOS_use.agent.coder --task "create hello world" --provider openrouter --model gemini-3.5-flash """ ) diff --git a/Auto_Use/macOS_use/agent/cli/service.py b/Auto_Use/macOS_use/agent/coder/service.py similarity index 100% rename from Auto_Use/macOS_use/agent/cli/service.py rename to Auto_Use/macOS_use/agent/coder/service.py diff --git a/Auto_Use/macOS_use/agent/cli/system_prompt.md b/Auto_Use/macOS_use/agent/coder/system_prompt.md similarity index 100% rename from Auto_Use/macOS_use/agent/cli/system_prompt.md rename to Auto_Use/macOS_use/agent/coder/system_prompt.md diff --git a/Auto_Use/macOS_use/agent/cli/view.py b/Auto_Use/macOS_use/agent/coder/view.py similarity index 100% rename from Auto_Use/macOS_use/agent/cli/view.py rename to Auto_Use/macOS_use/agent/coder/view.py diff --git a/Auto_Use/macOS_use/agent/main_driver/__init__.py b/Auto_Use/macOS_use/agent/main_driver/__init__.py new file mode 100644 index 0000000..d58e70c --- /dev/null +++ b/Auto_Use/macOS_use/agent/main_driver/__init__.py @@ -0,0 +1,5 @@ +# Auto_Use/macOS_use/agent/main_driver/__init__.py +from .service import AgentService +from .view import AgentResponseFormatter + +__all__ = ['AgentService', 'AgentResponseFormatter'] diff --git a/Auto_Use/macOS_use/agent/service.py b/Auto_Use/macOS_use/agent/main_driver/service.py similarity index 99% rename from Auto_Use/macOS_use/agent/service.py rename to Auto_Use/macOS_use/agent/main_driver/service.py index 92ff338..832ed7b 100644 --- a/Auto_Use/macOS_use/agent/service.py +++ b/Auto_Use/macOS_use/agent/main_driver/service.py @@ -26,11 +26,11 @@ from pathlib import Path from datetime import datetime from typing import Optional -from ..llm_provider.llm_manager import LLMManager +from ...llm_provider.llm_manager import LLMManager from .view import AgentResponseFormatter -from ..tree.element import UIElementScanner, ELEMENT_CONFIG -from ..controller import ControllerView -from .domain_knowledge import DomainKnowledgeService +from ...tree.element import UIElementScanner, ELEMENT_CONFIG +from ...controller import ControllerView +from ..skills import DomainKnowledgeService from PIL import Image from io import BytesIO @@ -111,7 +111,7 @@ def __init__(self, provider: str, model: str, save_conversation: bool = False, t self.controller = ControllerView(provider=provider, model=self.llm_manager.get_model_name(), web_callback=web_callback, shell_callback=shell_callback, cli_callback=cli_callback, api_key=api_key, stop_event=stop_event, external_terminal=external_terminal) # Initialize Domain Knowledge Service - self.domain_knowledge = DomainKnowledgeService() + self.skills = DomainKnowledgeService() # Save conversation flag self.save_conversation = save_conversation @@ -384,7 +384,7 @@ def process_request(self, task: str) -> str: formatted_element_tree = f"\n{element_tree_text}\n" # Fetch domain-specific knowledge if available - domain_block = self.domain_knowledge.get_knowledge( + domain_block = self.skills.get_knowledge( self.scanner.application_name, element_tree_text ) diff --git a/Auto_Use/macOS_use/agent/system_prompt.md b/Auto_Use/macOS_use/agent/main_driver/system_prompt.md similarity index 100% rename from Auto_Use/macOS_use/agent/system_prompt.md rename to Auto_Use/macOS_use/agent/main_driver/system_prompt.md diff --git a/Auto_Use/macOS_use/agent/view.py b/Auto_Use/macOS_use/agent/main_driver/view.py similarity index 100% rename from Auto_Use/macOS_use/agent/view.py rename to Auto_Use/macOS_use/agent/main_driver/view.py diff --git a/Auto_Use/windows_use/agent/cli/minions/__init__.py b/Auto_Use/macOS_use/agent/minions/__init__.py similarity index 96% rename from Auto_Use/windows_use/agent/cli/minions/__init__.py rename to Auto_Use/macOS_use/agent/minions/__init__.py index 95a5143..14d85eb 100644 --- a/Auto_Use/windows_use/agent/cli/minions/__init__.py +++ b/Auto_Use/macOS_use/agent/minions/__init__.py @@ -18,10 +18,10 @@ # community — thank you for contributing. # Minion sub-agent package. -# Mirrors the cli/ package structure: +# Mirrors the coder/ package structure: # - service.py : full agent loop (read-only scout variant) # - view.py : MinionResponseFormatter (next_goal-shape JSON validator) -# - __main__.py : subprocess entry — `python -m ...agent.cli.minions` +# - __main__.py : subprocess entry — `python -m ...agent.minions` # - system_prompt.md : read-only scout system prompt from .service import AgentService diff --git a/Auto_Use/macOS_use/agent/cli/minions/__main__.py b/Auto_Use/macOS_use/agent/minions/__main__.py similarity index 91% rename from Auto_Use/macOS_use/agent/cli/minions/__main__.py rename to Auto_Use/macOS_use/agent/minions/__main__.py index e763eca..c130515 100644 --- a/Auto_Use/macOS_use/agent/cli/minions/__main__.py +++ b/Auto_Use/macOS_use/agent/minions/__main__.py @@ -23,7 +23,7 @@ Subprocess entry for the read-only scout minion. Usage: - python -m Auto_Use.macOS_use.agent.cli.minions --task "your question here" + python -m Auto_Use.macOS_use.agent.minions --task "your question here" Options: --task : Required. The question/objective for the minion to answer. @@ -38,7 +38,7 @@ parent CLI agent as a tool response. When called directly for testing: - python -m Auto_Use.macOS_use.agent.cli.minions --task "where is X defined?" + python -m Auto_Use.macOS_use.agent.minions --task "where is X defined?" """ import argparse @@ -60,8 +60,8 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - python -m Auto_Use.macOS_use.agent.cli.minions --task "where is _read_scratchpad_from_file defined and who calls it?" - python -m Auto_Use.macOS_use.agent.cli.minions --task "list every file under src/ that imports requests" + python -m Auto_Use.macOS_use.agent.minions --task "where is _read_scratchpad_from_file defined and who calls it?" + python -m Auto_Use.macOS_use.agent.minions --task "list every file under src/ that imports requests" """ ) diff --git a/Auto_Use/macOS_use/agent/cli/minions/service.py b/Auto_Use/macOS_use/agent/minions/service.py similarity index 99% rename from Auto_Use/macOS_use/agent/cli/minions/service.py rename to Auto_Use/macOS_use/agent/minions/service.py index 9945b74..50ea67e 100644 --- a/Auto_Use/macOS_use/agent/cli/minions/service.py +++ b/Auto_Use/macOS_use/agent/minions/service.py @@ -44,8 +44,8 @@ from typing import Optional import threading -from ....llm_provider.llm_manager import LLMManager -from ....controller.view import ControllerView +from ...llm_provider.llm_manager import LLMManager +from ...controller.view import ControllerView from .view import MinionResponseFormatter try: diff --git a/Auto_Use/macOS_use/agent/cli/minions/system_prompt.md b/Auto_Use/macOS_use/agent/minions/system_prompt.md similarity index 100% rename from Auto_Use/macOS_use/agent/cli/minions/system_prompt.md rename to Auto_Use/macOS_use/agent/minions/system_prompt.md diff --git a/Auto_Use/macOS_use/agent/cli/minions/view.py b/Auto_Use/macOS_use/agent/minions/view.py similarity index 100% rename from Auto_Use/macOS_use/agent/cli/minions/view.py rename to Auto_Use/macOS_use/agent/minions/view.py diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/__init__.py b/Auto_Use/macOS_use/agent/skills/__init__.py similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/__init__.py rename to Auto_Use/macOS_use/agent/skills/__init__.py diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/browser.md b/Auto_Use/macOS_use/agent/skills/browser.md similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/browser.md rename to Auto_Use/macOS_use/agent/skills/browser.md diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/google_colab.md b/Auto_Use/macOS_use/agent/skills/google_colab.md similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/google_colab.md rename to Auto_Use/macOS_use/agent/skills/google_colab.md diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/google_services.md b/Auto_Use/macOS_use/agent/skills/google_services.md similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/google_services.md rename to Auto_Use/macOS_use/agent/skills/google_services.md diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/libreoffice_calc.md b/Auto_Use/macOS_use/agent/skills/libreoffice_calc.md similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/libreoffice_calc.md rename to Auto_Use/macOS_use/agent/skills/libreoffice_calc.md diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/microsoft_services.md b/Auto_Use/macOS_use/agent/skills/microsoft_services.md similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/microsoft_services.md rename to Auto_Use/macOS_use/agent/skills/microsoft_services.md diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/service.py b/Auto_Use/macOS_use/agent/skills/service.py similarity index 96% rename from Auto_Use/macOS_use/agent/domain_knowledge/service.py rename to Auto_Use/macOS_use/agent/skills/service.py index 9fa2535..f3a6357 100644 --- a/Auto_Use/macOS_use/agent/domain_knowledge/service.py +++ b/Auto_Use/macOS_use/agent/skills/service.py @@ -36,17 +36,17 @@ def __init__(self): self.browser_keywords = ["chrome", "firefox", "edge", "opera", "brave", "safari", "vivaldi", "browser"] def _load_mappings(self) -> dict: - """Load domain_knowledge.json mapping file""" + """Load skills.json mapping file""" try: - json_path = os.path.join(self.current_dir, "domain_knowledge.json") + json_path = os.path.join(self.current_dir, "skills.json") if os.path.exists(json_path): with open(json_path, 'r', encoding='utf-8') as f: return json.load(f) else: - logger.warning("domain_knowledge.json not found") + logger.warning("skills.json not found") return {"browser": {}, "os": {}} except Exception as e: - logger.error(f"Error loading domain_knowledge.json: {str(e)}") + logger.error(f"Error loading skills.json: {str(e)}") return {"browser": {}, "os": {}} def _is_browser(self, application_name: str) -> bool: diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/domain_knowledge.json b/Auto_Use/macOS_use/agent/skills/skills.json similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/domain_knowledge.json rename to Auto_Use/macOS_use/agent/skills/skills.json diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/skyscanner.md b/Auto_Use/macOS_use/agent/skills/skyscanner.md similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/skyscanner.md rename to Auto_Use/macOS_use/agent/skills/skyscanner.md diff --git a/Auto_Use/macOS_use/agent/domain_knowledge/wikipedia.md b/Auto_Use/macOS_use/agent/skills/wikipedia.md similarity index 100% rename from Auto_Use/macOS_use/agent/domain_knowledge/wikipedia.md rename to Auto_Use/macOS_use/agent/skills/wikipedia.md diff --git a/Auto_Use/macOS_use/controller/view.py b/Auto_Use/macOS_use/controller/view.py index b527e9d..7d269e6 100644 --- a/Auto_Use/macOS_use/controller/view.py +++ b/Auto_Use/macOS_use/controller/view.py @@ -627,7 +627,7 @@ def route_action(self, action_data): ] else: cli_cmd = [ - sys.executable, "-m", "Auto_Use.macOS_use.agent.cli", + sys.executable, "-m", "Auto_Use.macOS_use.agent.coder", "--task", task_description, "--provider", self.provider, "--model", self.model, @@ -749,7 +749,7 @@ def watch_cli_result(rf=result_file): ] else: cli_cmd = [ - sys.executable, "-m", "Auto_Use.macOS_use.agent.cli.minions", + sys.executable, "-m", "Auto_Use.macOS_use.agent.minions", "--task", minion_query, "--provider", self.provider, "--model", self.model, diff --git a/Auto_Use/macOS_use/remote_connection/telegram/service.py b/Auto_Use/macOS_use/remote_connection/telegram/service.py index d6e539b..645a396 100644 --- a/Auto_Use/macOS_use/remote_connection/telegram/service.py +++ b/Auto_Use/macOS_use/remote_connection/telegram/service.py @@ -677,7 +677,7 @@ def _run_agent(task, provider, model, chat_id, bot, loop): try: # Imported lazily — pulls in tree/element → skimage etc., which we # don't want to load until a task actually runs. - from Auto_Use.macOS_use.agent.service import AgentService + from Auto_Use.macOS_use.agent.main_driver.service import AgentService # Look up the runtime API key for the chosen provider so LLMManager # doesn't fall back to an os.getenv() the user never set. Telegram diff --git a/Auto_Use/windows_use/agent/__init__.py b/Auto_Use/windows_use/agent/__init__.py index 81cb3f4..f9e067a 100644 --- a/Auto_Use/windows_use/agent/__init__.py +++ b/Auto_Use/windows_use/agent/__init__.py @@ -17,7 +17,7 @@ # A small attribution goes a long way toward a healthy open-source # community — thank you for contributing. -from .service import AgentService -from .view import AgentResponseFormatter +from .main_driver.service import AgentService +from .main_driver.view import AgentResponseFormatter __all__ = ['AgentService', 'AgentResponseFormatter'] \ No newline at end of file diff --git a/Auto_Use/windows_use/agent/cli/__init__.py b/Auto_Use/windows_use/agent/coder/__init__.py similarity index 100% rename from Auto_Use/windows_use/agent/cli/__init__.py rename to Auto_Use/windows_use/agent/coder/__init__.py diff --git a/Auto_Use/windows_use/agent/cli/__main__.py b/Auto_Use/windows_use/agent/coder/__main__.py similarity index 93% rename from Auto_Use/windows_use/agent/cli/__main__.py rename to Auto_Use/windows_use/agent/coder/__main__.py index 11425ac..f90d7ea 100644 --- a/Auto_Use/windows_use/agent/cli/__main__.py +++ b/Auto_Use/windows_use/agent/coder/__main__.py @@ -23,7 +23,7 @@ This module allows the CLI agent to be run as a subprocess. Usage: - python -m Auto_Use.windows_use.agent.cli --task "your task here" + python -m Auto_Use.windows_use.agent.coder --task "your task here" Options: --task : Required. The task for CLI agent to execute @@ -37,7 +37,7 @@ - Result is written to --result file when done When called directly for testing: - - Run: python -m Auto_Use.windows_use.agent.cli --task "test task" + - Run: python -m Auto_Use.windows_use.agent.coder --task "test task" - Or use cli.py at project root """ @@ -62,8 +62,8 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - python -m Auto_Use.windows_use.agent.cli --task "fix the bug in test.py" - python -m Auto_Use.windows_use.agent.cli --task "create hello world" --provider openrouter --model gemini-3.5-flash + python -m Auto_Use.windows_use.agent.coder --task "fix the bug in test.py" + python -m Auto_Use.windows_use.agent.coder --task "create hello world" --provider openrouter --model gemini-3.5-flash """ ) diff --git a/Auto_Use/windows_use/agent/cli/service.py b/Auto_Use/windows_use/agent/coder/service.py similarity index 100% rename from Auto_Use/windows_use/agent/cli/service.py rename to Auto_Use/windows_use/agent/coder/service.py diff --git a/Auto_Use/windows_use/agent/cli/system_prompt.md b/Auto_Use/windows_use/agent/coder/system_prompt.md similarity index 100% rename from Auto_Use/windows_use/agent/cli/system_prompt.md rename to Auto_Use/windows_use/agent/coder/system_prompt.md diff --git a/Auto_Use/windows_use/agent/cli/view.py b/Auto_Use/windows_use/agent/coder/view.py similarity index 100% rename from Auto_Use/windows_use/agent/cli/view.py rename to Auto_Use/windows_use/agent/coder/view.py diff --git a/Auto_Use/windows_use/agent/main_driver/__init__.py b/Auto_Use/windows_use/agent/main_driver/__init__.py new file mode 100644 index 0000000..e487fc4 --- /dev/null +++ b/Auto_Use/windows_use/agent/main_driver/__init__.py @@ -0,0 +1,5 @@ +# Auto_Use/windows_use/agent/main_driver/__init__.py +from .service import AgentService +from .view import AgentResponseFormatter + +__all__ = ['AgentService', 'AgentResponseFormatter'] diff --git a/Auto_Use/windows_use/agent/service.py b/Auto_Use/windows_use/agent/main_driver/service.py similarity index 99% rename from Auto_Use/windows_use/agent/service.py rename to Auto_Use/windows_use/agent/main_driver/service.py index 5ee4a8b..14417a4 100644 --- a/Auto_Use/windows_use/agent/service.py +++ b/Auto_Use/windows_use/agent/main_driver/service.py @@ -26,11 +26,11 @@ from pathlib import Path from datetime import datetime from typing import Optional -from ..llm_provider.llm_manager import LLMManager +from ...llm_provider.llm_manager import LLMManager from .view import AgentResponseFormatter -from ..tree.element import UIElementScanner, ELEMENT_CONFIG -from ..controller import ControllerView -from .domain_knowledge import DomainKnowledgeService +from ...tree.element import UIElementScanner, ELEMENT_CONFIG +from ...controller import ControllerView +from ..skills import DomainKnowledgeService from PIL import Image from io import BytesIO @@ -106,7 +106,7 @@ def __init__(self, provider: str, model: str, save_conversation: bool = False, t self.controller = ControllerView(provider=provider, model=self.llm_manager.get_model_name(), web_callback=web_callback, shell_callback=shell_callback, cli_callback=cli_callback, api_key=api_key, stop_event=stop_event, external_terminal=external_terminal) # Initialize Domain Knowledge Service - self.domain_knowledge = DomainKnowledgeService() + self.skills = DomainKnowledgeService() # Save conversation flag self.save_conversation = save_conversation @@ -379,7 +379,7 @@ def process_request(self, task: str) -> str: formatted_element_tree = f"\n{element_tree_text}\n" # Fetch domain-specific knowledge if available - domain_block = self.domain_knowledge.get_knowledge( + domain_block = self.skills.get_knowledge( self.scanner.application_name, element_tree_text ) diff --git a/Auto_Use/windows_use/agent/system_prompt.md b/Auto_Use/windows_use/agent/main_driver/system_prompt.md similarity index 100% rename from Auto_Use/windows_use/agent/system_prompt.md rename to Auto_Use/windows_use/agent/main_driver/system_prompt.md diff --git a/Auto_Use/windows_use/agent/view.py b/Auto_Use/windows_use/agent/main_driver/view.py similarity index 100% rename from Auto_Use/windows_use/agent/view.py rename to Auto_Use/windows_use/agent/main_driver/view.py diff --git a/Auto_Use/macOS_use/agent/cli/minions/__init__.py b/Auto_Use/windows_use/agent/minions/__init__.py similarity index 96% rename from Auto_Use/macOS_use/agent/cli/minions/__init__.py rename to Auto_Use/windows_use/agent/minions/__init__.py index 95a5143..14d85eb 100644 --- a/Auto_Use/macOS_use/agent/cli/minions/__init__.py +++ b/Auto_Use/windows_use/agent/minions/__init__.py @@ -18,10 +18,10 @@ # community — thank you for contributing. # Minion sub-agent package. -# Mirrors the cli/ package structure: +# Mirrors the coder/ package structure: # - service.py : full agent loop (read-only scout variant) # - view.py : MinionResponseFormatter (next_goal-shape JSON validator) -# - __main__.py : subprocess entry — `python -m ...agent.cli.minions` +# - __main__.py : subprocess entry — `python -m ...agent.minions` # - system_prompt.md : read-only scout system prompt from .service import AgentService diff --git a/Auto_Use/windows_use/agent/cli/minions/__main__.py b/Auto_Use/windows_use/agent/minions/__main__.py similarity index 91% rename from Auto_Use/windows_use/agent/cli/minions/__main__.py rename to Auto_Use/windows_use/agent/minions/__main__.py index 01d4b78..647e8ad 100644 --- a/Auto_Use/windows_use/agent/cli/minions/__main__.py +++ b/Auto_Use/windows_use/agent/minions/__main__.py @@ -23,7 +23,7 @@ Subprocess entry for the read-only scout minion. Usage: - python -m Auto_Use.windows_use.agent.cli.minions --task "your question here" + python -m Auto_Use.windows_use.agent.minions --task "your question here" Options: --task : Required. The question/objective for the minion to answer. @@ -38,7 +38,7 @@ parent CLI agent as a tool response. When called directly for testing: - python -m Auto_Use.windows_use.agent.cli.minions --task "where is X defined?" + python -m Auto_Use.windows_use.agent.minions --task "where is X defined?" """ import argparse @@ -61,8 +61,8 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - python -m Auto_Use.windows_use.agent.cli.minions --task "where is _read_scratchpad_from_file defined and who calls it?" - python -m Auto_Use.windows_use.agent.cli.minions --task "list every file under src/ that imports requests" + python -m Auto_Use.windows_use.agent.minions --task "where is _read_scratchpad_from_file defined and who calls it?" + python -m Auto_Use.windows_use.agent.minions --task "list every file under src/ that imports requests" """ ) diff --git a/Auto_Use/windows_use/agent/cli/minions/service.py b/Auto_Use/windows_use/agent/minions/service.py similarity index 99% rename from Auto_Use/windows_use/agent/cli/minions/service.py rename to Auto_Use/windows_use/agent/minions/service.py index c08111f..7e9d52d 100644 --- a/Auto_Use/windows_use/agent/cli/minions/service.py +++ b/Auto_Use/windows_use/agent/minions/service.py @@ -44,8 +44,8 @@ from typing import Optional import threading -from ....llm_provider.llm_manager import LLMManager -from ....controller.view import ControllerView +from ...llm_provider.llm_manager import LLMManager +from ...controller.view import ControllerView from .view import MinionResponseFormatter try: diff --git a/Auto_Use/windows_use/agent/cli/minions/system_prompt.md b/Auto_Use/windows_use/agent/minions/system_prompt.md similarity index 99% rename from Auto_Use/windows_use/agent/cli/minions/system_prompt.md rename to Auto_Use/windows_use/agent/minions/system_prompt.md index e8e71a0..d29b916 100644 --- a/Auto_Use/windows_use/agent/cli/minions/system_prompt.md +++ b/Auto_Use/windows_use/agent/minions/system_prompt.md @@ -28,7 +28,7 @@ You exist so the parent CLI agent's context stays small. The parent does the edi **OS: Windows PowerShell. You are READ-ONLY.** 1. You MUST NEVER modify the filesystem. No editing, creating, deleting, moving, or renaming files. No `Set-Content`, `Add-Content`, `Out-File`, `New-Item`, `Remove-Item`, `Move-Item`, `Rename-Item`, redirection (`>`, `>>`), or any side-effecting shell command. You have NO `write` tool and NO `replace` tool. If you find yourself wanting to edit, instead record the exact location in your final report so the parent agent can apply the change. 2. Drill-down workflow: start broad (`glob`/`grep` to find candidates), then narrow (`view` exact ranges) — never dump whole large files into context. Standard pair: `grep` (locate the line) → `view` (read a 20-50 line range around the hit). -3. Always anchor findings to `path:line_no` (e.g. `Auto_Use\windows_use\agent\service.py:418`). Vague references like "somewhere in service.py" are never acceptable. +3. Always anchor findings to `path:line_no` (e.g. `Auto_Use\windows_use\agent\main_driver\service.py:418`). Vague references like "somewhere in service.py" are never acceptable. 4. For change requests: trace every connection — definition site, every caller, every place that reads/writes the affected state, related tests, related prompts. Report ALL of them, not just the obvious one. Missing one place = parent agent ships a broken change. 5. Keep running notes in `` after every confirmed finding so they survive across iterations and assemble into the final report. 6. When `view` shows `[line_number] text`, those numbers are the file's real line numbers — quote them exactly in your report. diff --git a/Auto_Use/windows_use/agent/cli/minions/view.py b/Auto_Use/windows_use/agent/minions/view.py similarity index 100% rename from Auto_Use/windows_use/agent/cli/minions/view.py rename to Auto_Use/windows_use/agent/minions/view.py diff --git a/Auto_Use/windows_use/agent/domain_knowledge/__init__.py b/Auto_Use/windows_use/agent/skills/__init__.py similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/__init__.py rename to Auto_Use/windows_use/agent/skills/__init__.py diff --git a/Auto_Use/windows_use/agent/domain_knowledge/browser.md b/Auto_Use/windows_use/agent/skills/browser.md similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/browser.md rename to Auto_Use/windows_use/agent/skills/browser.md diff --git a/Auto_Use/windows_use/agent/domain_knowledge/google_colab.md b/Auto_Use/windows_use/agent/skills/google_colab.md similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/google_colab.md rename to Auto_Use/windows_use/agent/skills/google_colab.md diff --git a/Auto_Use/windows_use/agent/domain_knowledge/google_services.md b/Auto_Use/windows_use/agent/skills/google_services.md similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/google_services.md rename to Auto_Use/windows_use/agent/skills/google_services.md diff --git a/Auto_Use/windows_use/agent/domain_knowledge/libreoffice_calc.md b/Auto_Use/windows_use/agent/skills/libreoffice_calc.md similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/libreoffice_calc.md rename to Auto_Use/windows_use/agent/skills/libreoffice_calc.md diff --git a/Auto_Use/windows_use/agent/domain_knowledge/microsoft_services.md b/Auto_Use/windows_use/agent/skills/microsoft_services.md similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/microsoft_services.md rename to Auto_Use/windows_use/agent/skills/microsoft_services.md diff --git a/Auto_Use/windows_use/agent/domain_knowledge/service.py b/Auto_Use/windows_use/agent/skills/service.py similarity index 96% rename from Auto_Use/windows_use/agent/domain_knowledge/service.py rename to Auto_Use/windows_use/agent/skills/service.py index 9fa2535..f3a6357 100644 --- a/Auto_Use/windows_use/agent/domain_knowledge/service.py +++ b/Auto_Use/windows_use/agent/skills/service.py @@ -36,17 +36,17 @@ def __init__(self): self.browser_keywords = ["chrome", "firefox", "edge", "opera", "brave", "safari", "vivaldi", "browser"] def _load_mappings(self) -> dict: - """Load domain_knowledge.json mapping file""" + """Load skills.json mapping file""" try: - json_path = os.path.join(self.current_dir, "domain_knowledge.json") + json_path = os.path.join(self.current_dir, "skills.json") if os.path.exists(json_path): with open(json_path, 'r', encoding='utf-8') as f: return json.load(f) else: - logger.warning("domain_knowledge.json not found") + logger.warning("skills.json not found") return {"browser": {}, "os": {}} except Exception as e: - logger.error(f"Error loading domain_knowledge.json: {str(e)}") + logger.error(f"Error loading skills.json: {str(e)}") return {"browser": {}, "os": {}} def _is_browser(self, application_name: str) -> bool: diff --git a/Auto_Use/windows_use/agent/domain_knowledge/domain_knowledge.json b/Auto_Use/windows_use/agent/skills/skills.json similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/domain_knowledge.json rename to Auto_Use/windows_use/agent/skills/skills.json diff --git a/Auto_Use/windows_use/agent/domain_knowledge/skyscanner.md b/Auto_Use/windows_use/agent/skills/skyscanner.md similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/skyscanner.md rename to Auto_Use/windows_use/agent/skills/skyscanner.md diff --git a/Auto_Use/windows_use/agent/domain_knowledge/wikipedia.md b/Auto_Use/windows_use/agent/skills/wikipedia.md similarity index 100% rename from Auto_Use/windows_use/agent/domain_knowledge/wikipedia.md rename to Auto_Use/windows_use/agent/skills/wikipedia.md diff --git a/Auto_Use/windows_use/controller/view.py b/Auto_Use/windows_use/controller/view.py index 42a0144..98c1ea0 100644 --- a/Auto_Use/windows_use/controller/view.py +++ b/Auto_Use/windows_use/controller/view.py @@ -569,7 +569,7 @@ def route_action(self, action_data): ] else: cli_cmd = [ - sys.executable, "-m", "Auto_Use.windows_use.agent.cli", + sys.executable, "-m", "Auto_Use.windows_use.agent.coder", "--task", task_description, "--provider", self.provider, "--model", self.model, @@ -706,7 +706,7 @@ def watch_cli_result(rf=result_file): ] else: cli_cmd = [ - sys.executable, "-m", "Auto_Use.windows_use.agent.cli.minions", + sys.executable, "-m", "Auto_Use.windows_use.agent.minions", "--task", minion_query, "--provider", self.provider, "--model", self.model, diff --git a/Auto_Use/windows_use/remote_connection/telegram/service.py b/Auto_Use/windows_use/remote_connection/telegram/service.py index 55e2a5c..613b8ae 100644 --- a/Auto_Use/windows_use/remote_connection/telegram/service.py +++ b/Auto_Use/windows_use/remote_connection/telegram/service.py @@ -678,7 +678,7 @@ def _run_agent(task, provider, model, chat_id, bot, loop): try: # Imported lazily — pulls in tree/element → skimage etc., which we # don't want to load until a task actually runs. - from Auto_Use.windows_use.agent.service import AgentService + from Auto_Use.windows_use.agent.main_driver.service import AgentService # Look up the runtime API key for the chosen provider so # LLMManager doesn't fall back to an os.getenv() that the user diff --git a/README.md b/README.md index d97dd1a..10be53a 100644 --- a/README.md +++ b/README.md @@ -81,10 +81,10 @@ Each Minion runs in its own session-isolated scratchpad at `cli_minion/{session_ python cli.py # CLI Agent with a specific task -python -m Auto_Use.macOS_use.agent.cli --task "refactor the auth module" +python -m Auto_Use.macOS_use.agent.coder --task "refactor the auth module" # Single Minion for a quick read-only question -python -m Auto_Use.macOS_use.agent.cli.minions --task "where is _validate_token defined and who calls it?" +python -m Auto_Use.macOS_use.agent.minions --task "where is _validate_token defined and who calls it?" ```
diff --git a/app.py b/app.py index 0aea3c8..bd69fdc 100644 --- a/app.py +++ b/app.py @@ -875,7 +875,7 @@ def monitor_milestones(): try: AgentService = importlib.import_module( - f"Auto_Use.{PLATFORM_PKG}.agent.service" + f"Auto_Use.{PLATFORM_PKG}.agent.main_driver.service" ).AgentService agent = AgentService( @@ -1027,7 +1027,7 @@ def main(): sys.argv.remove("--cli-mode") try: cli_main = importlib.import_module( - f"Auto_Use.{PLATFORM_PKG}.agent.cli.__main__" + f"Auto_Use.{PLATFORM_PKG}.agent.coder.__main__" ).main cli_main() except Exception: @@ -1041,7 +1041,7 @@ def main(): sys.argv.remove("--minion-mode") try: minion_main = importlib.import_module( - f"Auto_Use.{PLATFORM_PKG}.agent.cli.minions.__main__" + f"Auto_Use.{PLATFORM_PKG}.agent.minions.__main__" ).main minion_main() except Exception: diff --git a/cli.py b/cli.py index 71d6790..bc4fa86 100644 --- a/cli.py +++ b/cli.py @@ -22,9 +22,9 @@ import platform if platform.system() == "Darwin": - from Auto_Use.macOS_use.agent.cli import AgentService + from Auto_Use.macOS_use.agent.coder import AgentService elif platform.system() == "Windows": - from Auto_Use.windows_use.agent.cli import AgentService + from Auto_Use.windows_use.agent.coder import AgentService else: raise RuntimeError(f"Unsupported OS: {platform.system()}") diff --git a/main.py b/main.py index 68a814d..2167668 100644 --- a/main.py +++ b/main.py @@ -21,9 +21,9 @@ import platform if platform.system() == "Darwin": - from Auto_Use.macOS_use.agent.service import AgentService + from Auto_Use.macOS_use.agent.main_driver.service import AgentService elif platform.system() == "Windows": - from Auto_Use.windows_use.agent.service import AgentService + from Auto_Use.windows_use.agent.main_driver.service import AgentService else: raise RuntimeError(f"Unsupported OS: {platform.system()}") From 952421527e26d41f271b43a1d3cd6972fb655860 Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Sat, 23 May 2026 22:29:41 +0530 Subject: [PATCH 2/6] reducing word in the system promopt --- .../agent/main_driver/system_prompt.md | 22 +++++++++---------- .../agent/main_driver/system_prompt.md | 22 +++++++++---------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/Auto_Use/macOS_use/agent/main_driver/system_prompt.md b/Auto_Use/macOS_use/agent/main_driver/system_prompt.md index a4ad8d6..1668c3d 100644 --- a/Auto_Use/macOS_use/agent/main_driver/system_prompt.md +++ b/Auto_Use/macOS_use/agent/main_driver/system_prompt.md @@ -151,9 +151,8 @@ Each step includes: 3. [ID] is displayed at the top-left corner of the element it belongs to. -1. Each output must contain the following blocks. -2. These blocks build on one another as progress is made. -3. Output blocks: `thinking`, `verdict_last_action`, `decision`, `memory`, `current_goal`, and `action`. +1. Each output builds on the last; produce every block in order. +2. Blocks: `thinking`, `verdict_last_action`, `decision`, `memory`, `current_goal`, `action`. 1. You have thinking capability before jumping to any conclusion. You must follow the at each step. 2. Max 150 words. Keep to 3-5 sentences max. No repeating, no second-guessing. @@ -194,15 +193,14 @@ Each step includes: 2. Negative: `"verdict_last_action": "Based on : still on Home after clicking Downloads; id 100 path shows Home. : PASS, but left_click did not register. Verdict: FAIL."` -*The final synthesis of your thinking — bridge between reasoning and action.* -1. After reasoning through the screenshot and element_tree in your thinking block, distill your conclusion here in 2–3 concise lines. -2. Line 1: Focused app/window and its current state. -3. Line 2: Finalized actions (with IDs or tools). -4. Line 3: Why — the reasoning behind this decision and any recovery if applicable. -5. Format: "decision": "; .\nFinalized: .\nReason: ." -6. Examples: - 1. "decision": "Safari - Gmail Compose; To/Subject/Body fields loaded.\nFinalized: input id 12 (To), input id 15 (Subject), input id 20 (Body).\nReason: All compose fields visible and aligned, filling in sequence to complete email draft." - 2. "decision": "Finder; Downloads folder open with target file visible.\nFinalized: left_click 2 times on id 33.\nReason: File is fully visible and aligned, opening it to verify contents before marking todo complete." +*Commit step: lock the exact surface, ids/tools, and rationale before emitting `action`.* +1. Line 1: Active app/window + its current state. +2. Line 2: Exact ids/tools you will act on (each must exist in ). +3. Line 3: Why this is correct; if last verdict was FAIL, state the recovery. +4. Format: "decision": "; .\nFinalized: .\nReason: ." +5. Examples: + 1. "decision": "Safari - Gmail Compose; To/Subject/Body fields loaded.\nFinalized: input id 12 (To), input id 15 (Subject), input id 20 (Body).\nReason: Fields visible and aligned, filling in sequence to complete the draft." + 2. "decision": "Finder; still on Home, last Downloads click did not register.\nFinalized: left_click id 18 (Downloads, sidebar).\nReason: Verdict FAIL on toolbar item; retrying via the stable sidebar target id 18." # Rule: align with the top pending ToDo item. diff --git a/Auto_Use/windows_use/agent/main_driver/system_prompt.md b/Auto_Use/windows_use/agent/main_driver/system_prompt.md index ec5de5a..0af08b4 100644 --- a/Auto_Use/windows_use/agent/main_driver/system_prompt.md +++ b/Auto_Use/windows_use/agent/main_driver/system_prompt.md @@ -137,9 +137,8 @@ Each step includes: 3. [ID] is displayed at the top-left corner of the element it belongs to. -1. Each output must contain the following blocks. -2. These blocks build on one another as progress is made. -3. Output blocks: `thinking`, `verdict_last_action`, `decision`, `memory`, `current_goal`, and `action`. +1. Each output builds on the last; produce every block in order. +2. Blocks: `thinking`, `verdict_last_action`, `decision`, `memory`, `current_goal`, `action`. 1. You have thinking capability before jumping to any conclusion. You must follow the at each step. 2. Max 150 words. Keep to 3-5 sentences max. No repeating, no second-guessing. @@ -180,15 +179,14 @@ Each step includes: 2. Negative: `"verdict_last_action": "Based on : still on Home after clicking Downloads; id 100 path shows Home. : PASS, but left_click did not register. Verdict: FAIL."` -*The final synthesis of your thinking — bridge between reasoning and action.* -1. After reasoning through the screenshot and element_tree in your thinking block, distill your conclusion here in 2–3 concise lines. -2. Line 1: Focused app/window and its current state. -3. Line 2: Finalized actions (with IDs or tools). -4. Line 3: Why — the reasoning behind this decision and any recovery if applicable. -5. Format: "decision": "; .\nFinalized: .\nReason: ." -6. Examples: - 1. "decision": "MS Edge - Gmail Compose; To/Subject/Body fields loaded.\nFinalized: input id 12 (To), input id 15 (Subject), input id 20 (Body).\nReason: All compose fields visible and aligned, filling in sequence to complete email draft." - 2. "decision": "File Explorer; Downloads folder open with target file visible.\nFinalized: left_click 2 times on id 33.\nReason: File is fully visible and aligned, opening it to verify contents before marking todo complete." +*Commit step: lock the exact surface, ids/tools, and rationale before emitting `action`.* +1. Line 1: Active app/window + its current state. +2. Line 2: Exact ids/tools you will act on (each must exist in ). +3. Line 3: Why this is correct; if last verdict was FAIL, state the recovery. +4. Format: "decision": "; .\nFinalized: .\nReason: ." +5. Examples: + 1. "decision": "Safari - Gmail Compose; To/Subject/Body fields loaded.\nFinalized: input id 12 (To), input id 15 (Subject), input id 20 (Body).\nReason: Fields visible and aligned, filling in sequence to complete the draft." + 2. "decision": "Finder; still on Home, last Downloads click did not register.\nFinalized: left_click id 18 (Downloads, sidebar).\nReason: Verdict FAIL on toolbar item; retrying via the stable sidebar target id 18." # Rule: align with the top pending ToDo item. From 941e61accb241504a188888f019aa7d8251562d4 Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Mon, 25 May 2026 16:00:59 +0530 Subject: [PATCH 3/6] more model compatibility. --- .../macOS_use/llm_provider/llm_manager.py | 97 +++++++++++-------- .../macOS_use/llm_provider/openrouter/view.py | 38 ++++++++ .../windows_use/llm_provider/llm_manager.py | 97 +++++++++++-------- .../llm_provider/openrouter/view.py | 38 ++++++++ main.py | 2 +- model_list.txt | 8 +- 6 files changed, 192 insertions(+), 88 deletions(-) diff --git a/Auto_Use/macOS_use/llm_provider/llm_manager.py b/Auto_Use/macOS_use/llm_provider/llm_manager.py index 2d9a766..b3beac0 100644 --- a/Auto_Use/macOS_use/llm_provider/llm_manager.py +++ b/Auto_Use/macOS_use/llm_provider/llm_manager.py @@ -17,6 +17,7 @@ # A small attribution goes a long way toward a healthy open-source # community — thank you for contributing. +import copy import os import time from typing import Optional @@ -439,55 +440,65 @@ def _initialize_provider(self): raise ValueError(f"Unsupported provider: {self.provider}") def send_request(self, messages: list, annotated_screenshot_base64: Optional[str] = None): - """Send request to the selected provider""" - # Retry up to 3 times with 1 second delay + """Send request to the selected provider with idempotent retries.""" + last_error = None for attempt in range(3): + # Providers may mutate messages in-place (e.g. wrapping the last user + # message into multimodal content blocks); deep-copy per attempt so + # those mutations cannot compound across retries. + attempt_messages = copy.deepcopy(messages) try: - response = self.provider_instance.send_request(messages, self.model, annotated_screenshot_base64) - - # Extract the assistant's response + response = self.provider_instance.send_request( + attempt_messages, self.model, annotated_screenshot_base64 + ) return response['choices'][0]['message']['content'] except Exception as e: - if attempt < 2: # If not the last attempt - print(f"⚠️ API request failed (attempt {attempt + 1}/3), retrying in 1 second...") + last_error = e + if attempt < 2: + print(f"⚠️ API request failed (attempt {attempt + 1}/3): {e}") + print(" Retrying in 1 second with a fresh message copy...") time.sleep(1) continue - else: - # CLI agent: seamless fallback to secondary model (never die) - if self.cli_agent and hasattr(self, '_cli_fallback_model') and self._cli_fallback_model: - print(f"⚠️ CLI Agent: {self.display_name} failed after 3 attempts. Switching to fallback...") - # Resolve fallback model info (same provider, different model) - if self.provider == "openrouter": - model_info = get_openrouter_model_info(self._cli_fallback_model) - elif self.provider == "groq": - model_info = get_groq_model_info(self._cli_fallback_model) - elif self.provider == "openai": - model_info = get_openai_model_info(self._cli_fallback_model) - elif self.provider == "anthropic": - model_info = get_anthropic_model_info(self._cli_fallback_model) - elif self.provider == "google": - model_info = get_google_model_info(self._cli_fallback_model) - elif self.provider == "perplexity": - model_info = get_perplexity_model_info(self._cli_fallback_model) - else: - raise e - # Hot-swap model (provider stays the same, no re-init needed) - self.model = model_info["api_name"] - self.has_vision = model_info["vision"] - self.display_name = model_info["display_name"] - self.model_info = model_info - # Clear fallback so we don't loop forever - self._cli_fallback_model = None - print(f"✅ CLI Agent: Now using {self.display_name}") - # Retry with fallback (same messages, full history intact) - try: - response = self.provider_instance.send_request(messages, self.model, annotated_screenshot_base64) - return response['choices'][0]['message']['content'] - except Exception as fallback_e: - print(f"❌ CLI Agent: Fallback {self.display_name} also failed: {fallback_e}") - raise fallback_e - else: - raise e + print(f"❌ API request failed after 3 attempts: {e}") + break + + # All 3 attempts failed. CLI agent: seamless fallback to secondary model (never die) + if self.cli_agent and hasattr(self, '_cli_fallback_model') and self._cli_fallback_model: + print(f"⚠️ CLI Agent: {self.display_name} failed after 3 attempts. Switching to fallback...") + # Resolve fallback model info (same provider, different model) + if self.provider == "openrouter": + model_info = get_openrouter_model_info(self._cli_fallback_model) + elif self.provider == "groq": + model_info = get_groq_model_info(self._cli_fallback_model) + elif self.provider == "openai": + model_info = get_openai_model_info(self._cli_fallback_model) + elif self.provider == "anthropic": + model_info = get_anthropic_model_info(self._cli_fallback_model) + elif self.provider == "google": + model_info = get_google_model_info(self._cli_fallback_model) + elif self.provider == "perplexity": + model_info = get_perplexity_model_info(self._cli_fallback_model) + else: + raise last_error + # Hot-swap model (provider stays the same, no re-init needed) + self.model = model_info["api_name"] + self.has_vision = model_info["vision"] + self.display_name = model_info["display_name"] + self.model_info = model_info + # Clear fallback so we don't loop forever + self._cli_fallback_model = None + print(f"✅ CLI Agent: Now using {self.display_name}") + # Retry with fallback (fresh copy, full history intact) + try: + response = self.provider_instance.send_request( + copy.deepcopy(messages), self.model, annotated_screenshot_base64 + ) + return response['choices'][0]['message']['content'] + except Exception as fallback_e: + print(f"❌ CLI Agent: Fallback {self.display_name} also failed: {fallback_e}") + raise fallback_e + else: + raise last_error def get_model_name(self) -> str: """Get the current model short name (preserves vertex suffix for downstream routing)""" diff --git a/Auto_Use/macOS_use/llm_provider/openrouter/view.py b/Auto_Use/macOS_use/llm_provider/openrouter/view.py index c4992a0..e8a0ab3 100644 --- a/Auto_Use/macOS_use/llm_provider/openrouter/view.py +++ b/Auto_Use/macOS_use/llm_provider/openrouter/view.py @@ -47,6 +47,18 @@ "display_name": "GPT-5.4 Pro", "reasoning_support": False }, + "gpt-5.5": { + "api_name": "openai/gpt-5.5", + "vision": True, + "display_name": "GPT-5.5", + "reasoning_support": True + }, + "gpt-5.5-pro": { + "api_name": "openai/gpt-5.5-pro", + "vision": True, + "display_name": "GPT-5.5 Pro", + "reasoning_support": True + }, "claude-opus-4.7": { "api_name": "anthropic/claude-opus-4.7", "vision": True, @@ -75,11 +87,37 @@ "reasoning_support": True, "reasoning_effort": "low" }, + "grok-4.3": { + "api_name": "x-ai/grok-4.3", + "vision": True, + "display_name": "Grok 4.3", + "reasoning_support": True, + "reasoning_effort": "low" + }, "kimi-k2.6": { "api_name": "moonshotai/kimi-k2.6", "vision": True, "display_name": "Kimi K2.6", "reasoning_support": False + }, + "claude-opus-4.7-fast": { + "api_name": "anthropic/claude-opus-4.7-fast", + "vision": True, + "display_name": "Claude Opus 4.7 Fast", + "reasoning_support": True, + "reasoning_effort": "low" + }, + "mistral-medium-3.5": { + "api_name": "mistralai/mistral-medium-3-5", + "vision": True, + "display_name": "Mistral Medium 3.5", + "reasoning_support": False + }, + "qwen-3.6-plus": { + "api_name": "qwen/qwen3.6-plus", + "vision": True, + "display_name": "Qwen 3.6 Plus", + "reasoning_support": False } } diff --git a/Auto_Use/windows_use/llm_provider/llm_manager.py b/Auto_Use/windows_use/llm_provider/llm_manager.py index 0a49704..72e7682 100644 --- a/Auto_Use/windows_use/llm_provider/llm_manager.py +++ b/Auto_Use/windows_use/llm_provider/llm_manager.py @@ -17,6 +17,7 @@ # A small attribution goes a long way toward a healthy open-source # community — thank you for contributing. +import copy import os import time from typing import Optional @@ -438,55 +439,65 @@ def _initialize_provider(self): raise ValueError(f"Unsupported provider: {self.provider}") def send_request(self, messages: list, annotated_screenshot_base64: Optional[str] = None): - """Send request to the selected provider""" - # Retry up to 3 times with 1 second delay + """Send request to the selected provider with idempotent retries.""" + last_error = None for attempt in range(3): + # Providers may mutate messages in-place (e.g. wrapping the last user + # message into multimodal content blocks); deep-copy per attempt so + # those mutations cannot compound across retries. + attempt_messages = copy.deepcopy(messages) try: - response = self.provider_instance.send_request(messages, self.model, annotated_screenshot_base64) - - # Extract the assistant's response + response = self.provider_instance.send_request( + attempt_messages, self.model, annotated_screenshot_base64 + ) return response['choices'][0]['message']['content'] except Exception as e: - if attempt < 2: # If not the last attempt - print(f"⚠️ API request failed (attempt {attempt + 1}/3), retrying in 1 second...") + last_error = e + if attempt < 2: + print(f"⚠️ API request failed (attempt {attempt + 1}/3): {e}") + print(" Retrying in 1 second with a fresh message copy...") time.sleep(1) continue - else: - # CLI agent: seamless fallback to secondary model (never die) - if self.cli_agent and hasattr(self, '_cli_fallback_model') and self._cli_fallback_model: - print(f"⚠️ CLI Agent: {self.display_name} failed after 3 attempts. Switching to fallback...") - # Resolve fallback model info (same provider, different model) - if self.provider == "openrouter": - model_info = get_openrouter_model_info(self._cli_fallback_model) - elif self.provider == "groq": - model_info = get_groq_model_info(self._cli_fallback_model) - elif self.provider == "openai": - model_info = get_openai_model_info(self._cli_fallback_model) - elif self.provider == "anthropic": - model_info = get_anthropic_model_info(self._cli_fallback_model) - elif self.provider == "google": - model_info = get_google_model_info(self._cli_fallback_model) - elif self.provider == "perplexity": - model_info = get_perplexity_model_info(self._cli_fallback_model) - else: - raise e - # Hot-swap model (provider stays the same, no re-init needed) - self.model = model_info["api_name"] - self.has_vision = model_info["vision"] - self.display_name = model_info["display_name"] - self.model_info = model_info - # Clear fallback so we don't loop forever - self._cli_fallback_model = None - print(f"✅ CLI Agent: Now using {self.display_name}") - # Retry with fallback (same messages, full history intact) - try: - response = self.provider_instance.send_request(messages, self.model, annotated_screenshot_base64) - return response['choices'][0]['message']['content'] - except Exception as fallback_e: - print(f"❌ CLI Agent: Fallback {self.display_name} also failed: {fallback_e}") - raise fallback_e - else: - raise e + print(f"❌ API request failed after 3 attempts: {e}") + break + + # All 3 attempts failed. CLI agent: seamless fallback to secondary model (never die) + if self.cli_agent and hasattr(self, '_cli_fallback_model') and self._cli_fallback_model: + print(f"⚠️ CLI Agent: {self.display_name} failed after 3 attempts. Switching to fallback...") + # Resolve fallback model info (same provider, different model) + if self.provider == "openrouter": + model_info = get_openrouter_model_info(self._cli_fallback_model) + elif self.provider == "groq": + model_info = get_groq_model_info(self._cli_fallback_model) + elif self.provider == "openai": + model_info = get_openai_model_info(self._cli_fallback_model) + elif self.provider == "anthropic": + model_info = get_anthropic_model_info(self._cli_fallback_model) + elif self.provider == "google": + model_info = get_google_model_info(self._cli_fallback_model) + elif self.provider == "perplexity": + model_info = get_perplexity_model_info(self._cli_fallback_model) + else: + raise last_error + # Hot-swap model (provider stays the same, no re-init needed) + self.model = model_info["api_name"] + self.has_vision = model_info["vision"] + self.display_name = model_info["display_name"] + self.model_info = model_info + # Clear fallback so we don't loop forever + self._cli_fallback_model = None + print(f"✅ CLI Agent: Now using {self.display_name}") + # Retry with fallback (fresh copy, full history intact) + try: + response = self.provider_instance.send_request( + copy.deepcopy(messages), self.model, annotated_screenshot_base64 + ) + return response['choices'][0]['message']['content'] + except Exception as fallback_e: + print(f"❌ CLI Agent: Fallback {self.display_name} also failed: {fallback_e}") + raise fallback_e + else: + raise last_error def get_model_name(self) -> str: """Get the current model short name (preserves vertex suffix for downstream routing)""" diff --git a/Auto_Use/windows_use/llm_provider/openrouter/view.py b/Auto_Use/windows_use/llm_provider/openrouter/view.py index c4992a0..e8a0ab3 100644 --- a/Auto_Use/windows_use/llm_provider/openrouter/view.py +++ b/Auto_Use/windows_use/llm_provider/openrouter/view.py @@ -47,6 +47,18 @@ "display_name": "GPT-5.4 Pro", "reasoning_support": False }, + "gpt-5.5": { + "api_name": "openai/gpt-5.5", + "vision": True, + "display_name": "GPT-5.5", + "reasoning_support": True + }, + "gpt-5.5-pro": { + "api_name": "openai/gpt-5.5-pro", + "vision": True, + "display_name": "GPT-5.5 Pro", + "reasoning_support": True + }, "claude-opus-4.7": { "api_name": "anthropic/claude-opus-4.7", "vision": True, @@ -75,11 +87,37 @@ "reasoning_support": True, "reasoning_effort": "low" }, + "grok-4.3": { + "api_name": "x-ai/grok-4.3", + "vision": True, + "display_name": "Grok 4.3", + "reasoning_support": True, + "reasoning_effort": "low" + }, "kimi-k2.6": { "api_name": "moonshotai/kimi-k2.6", "vision": True, "display_name": "Kimi K2.6", "reasoning_support": False + }, + "claude-opus-4.7-fast": { + "api_name": "anthropic/claude-opus-4.7-fast", + "vision": True, + "display_name": "Claude Opus 4.7 Fast", + "reasoning_support": True, + "reasoning_effort": "low" + }, + "mistral-medium-3.5": { + "api_name": "mistralai/mistral-medium-3-5", + "vision": True, + "display_name": "Mistral Medium 3.5", + "reasoning_support": False + }, + "qwen-3.6-plus": { + "api_name": "qwen/qwen3.6-plus", + "vision": True, + "display_name": "Qwen 3.6 Plus", + "reasoning_support": False } } diff --git a/main.py b/main.py index 2167668..2113a6f 100644 --- a/main.py +++ b/main.py @@ -29,7 +29,7 @@ # Configuration PROVIDER = "openrouter" -MODEL = "gemini-3.5-flash" #refer to the model name correctly from model_list.txt. +MODEL = "qwen-3.6-flash" #refer to the model name correctly from model_list.txt. # Your task here task = """ diff --git a/model_list.txt b/model_list.txt index 9c261f0..45f8f72 100644 --- a/model_list.txt +++ b/model_list.txt @@ -18,11 +18,17 @@ gemini-3.1-pro gemini-3.5-flash gpt-5.4-mini gpt-5.4-pro +gpt-5.5 +gpt-5.5-pro claude-opus-4.6 +claude-opus-4.7-fast claude-sonnet-4.6 grok-4-fast grok-4.1-fast -kimi-k2.5 +grok-4.3 +kimi-k2.6 +mistral-medium-3.5 +qwen-3.6-plus -------------------------------------------------------------- From 966a0687d864511b0437561cd0cf7edad681f084 Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Mon, 25 May 2026 16:09:52 +0530 Subject: [PATCH 4/6] update the model --- Auto_Use/macOS_use/llm_provider/openai/view.py | 14 ++++++++++++++ Auto_Use/windows_use/llm_provider/openai/view.py | 14 ++++++++++++++ model_list.txt | 2 ++ 3 files changed, 30 insertions(+) diff --git a/Auto_Use/macOS_use/llm_provider/openai/view.py b/Auto_Use/macOS_use/llm_provider/openai/view.py index 09a1d3c..bdabfb0 100644 --- a/Auto_Use/macOS_use/llm_provider/openai/view.py +++ b/Auto_Use/macOS_use/llm_provider/openai/view.py @@ -34,6 +34,20 @@ "display_name": "GPT-5.4", "reasoning_support": True, "json_mode": True + }, + "gpt-5.5": { + "api_name": "gpt-5.5", + "vision": True, + "display_name": "GPT-5.5", + "reasoning_support": True, + "json_mode": True + }, + "gpt-5.5-pro": { + "api_name": "gpt-5.5-pro", + "vision": True, + "display_name": "GPT-5.5 Pro", + "reasoning_support": True, + "json_mode": True } } diff --git a/Auto_Use/windows_use/llm_provider/openai/view.py b/Auto_Use/windows_use/llm_provider/openai/view.py index 09a1d3c..bdabfb0 100644 --- a/Auto_Use/windows_use/llm_provider/openai/view.py +++ b/Auto_Use/windows_use/llm_provider/openai/view.py @@ -34,6 +34,20 @@ "display_name": "GPT-5.4", "reasoning_support": True, "json_mode": True + }, + "gpt-5.5": { + "api_name": "gpt-5.5", + "vision": True, + "display_name": "GPT-5.5", + "reasoning_support": True, + "json_mode": True + }, + "gpt-5.5-pro": { + "api_name": "gpt-5.5-pro", + "vision": True, + "display_name": "GPT-5.5 Pro", + "reasoning_support": True, + "json_mode": True } } diff --git a/model_list.txt b/model_list.txt index 45f8f72..85a343c 100644 --- a/model_list.txt +++ b/model_list.txt @@ -46,6 +46,8 @@ MODEL name -------------------------------------------------------------- gpt-5.4-mini gpt-5.4 +gpt-5.5 +gpt-5.5-pro -------------------------------------------------------------- From 34b45edc0d08594fc7d647e00c8602919934fd0f Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Mon, 25 May 2026 16:16:32 +0530 Subject: [PATCH 5/6] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0bb8e53..c109d40 100644 --- a/.gitignore +++ b/.gitignore @@ -112,3 +112,4 @@ mac_binary_build.py *.icns screen_record.py /Auto_Use/macOS_use/scratchpad +/cli_minion_result From ca85f2e146d346abb0dd0f8d18b40c6fe4dc971b Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Mon, 25 May 2026 16:20:49 +0530 Subject: [PATCH 6/6] added header --- .../macOS_use/agent/main_driver/__init__.py | 19 +++++++++++++++++++ .../windows_use/agent/main_driver/__init__.py | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/Auto_Use/macOS_use/agent/main_driver/__init__.py b/Auto_Use/macOS_use/agent/main_driver/__init__.py index d58e70c..300d765 100644 --- a/Auto_Use/macOS_use/agent/main_driver/__init__.py +++ b/Auto_Use/macOS_use/agent/main_driver/__init__.py @@ -1,3 +1,22 @@ +# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# If you build on this project, please keep this header and credit +# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works. +# A small attribution goes a long way toward a healthy open-source +# community — thank you for contributing. + # Auto_Use/macOS_use/agent/main_driver/__init__.py from .service import AgentService from .view import AgentResponseFormatter diff --git a/Auto_Use/windows_use/agent/main_driver/__init__.py b/Auto_Use/windows_use/agent/main_driver/__init__.py index e487fc4..d463907 100644 --- a/Auto_Use/windows_use/agent/main_driver/__init__.py +++ b/Auto_Use/windows_use/agent/main_driver/__init__.py @@ -1,3 +1,22 @@ +# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# If you build on this project, please keep this header and credit +# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works. +# A small attribution goes a long way toward a healthy open-source +# community — thank you for contributing. + # Auto_Use/windows_use/agent/main_driver/__init__.py from .service import AgentService from .view import AgentResponseFormatter