From 06c78db26abaada2cb72b74187aa98be48747548 Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Thu, 14 May 2026 17:42:01 +0530 Subject: [PATCH 1/4] initial setup code for telegram --- .../remote_connection/telegram/banner.py | 390 ++++++++++++++++++ .../remote_connection/telegram/service.py | 0 .../remote_connection/telegram/setup.py | 97 +++++ app.py | 16 + frontend/css/style.css | 212 +++++++++- frontend/index.html | 36 +- frontend/script.js | 53 ++- main.py | 6 +- 8 files changed, 780 insertions(+), 30 deletions(-) create mode 100644 Auto_Use/macOS_use/remote_connection/telegram/banner.py create mode 100644 Auto_Use/macOS_use/remote_connection/telegram/service.py create mode 100644 Auto_Use/macOS_use/remote_connection/telegram/setup.py diff --git a/Auto_Use/macOS_use/remote_connection/telegram/banner.py b/Auto_Use/macOS_use/remote_connection/telegram/banner.py new file mode 100644 index 0000000..e4c7e7e --- /dev/null +++ b/Auto_Use/macOS_use/remote_connection/telegram/banner.py @@ -0,0 +1,390 @@ +"""Interactive walkthrough banner for setup.py. + +A small always-on-top pill at the top-right of the screen that contains: + - the animated stop-orb on the left, + - a status message in the middle (multi-line capable; pill grows downward), + - a clickable "Next" button on the right (only visible when the script is + waiting for the user — hidden during processing steps). + +setup.py calls show() once, then alternates update("…") + wait_for_next() +to pace the user. close() tears it down. The Next button is shown +automatically inside wait_for_next() and hidden as soon as it returns, so +callers don't have to manage visibility manually. + +The pill default height is the original 44px. When a long status message +wraps to multiple lines a ResizeObserver in JS posts the new body height +back to Python via a second WKScriptMessageHandler, and Python resizes the +NSWindow (top edge anchored, height grows downward). + +Everything runs inside the existing Python process. pywebview's main-thread +NSApplication run loop (started by webview.start() in app.py) is reused — +AppKit work is dispatched onto it via PyObjCTools.AppHelper.callAfter so the +Flask worker thread that runs setup.py never touches Cocoa directly. + +If Cocoa/PyObjC isn't importable for any reason the class becomes a no-op +so the automation still completes without a banner. +""" +import logging +import threading + +logger = logging.getLogger(__name__) + +try: + from Cocoa import ( + NSWindow, NSColor, NSScreen, + NSBackingStoreBuffered, NSMakeRect, + ) + from Foundation import NSObject + from WebKit import WKWebView, WKWebViewConfiguration + from PyObjCTools.AppHelper import callAfter + _COCOA_OK = True +except Exception as e: + logger.warning(f"banner: Cocoa unavailable, popup disabled ({e})") + _COCOA_OK = False + +NSWindowStyleMaskBorderless = 0 +NSStatusWindowLevel = 25 + + +BANNER_HTML = """ + + + +
+
+
+
+
+
+
+
+Starting… + + + +""" + + +if _COCOA_OK: + class _NextHandler(NSObject): + """WKScriptMessageHandler — fires self._event when JS posts to 'next_clicked'. + + No custom init: PyObjC's bridged NSObject.init takes no args, so calling + NSObject.init(self) inside a subclass crashes with "Need 0 arguments, + got 1". Instead, allocate with the default init and set the event as a + plain Python attribute right after — PyObjC subclasses accept arbitrary + Python attributes just fine. + """ + def userContentController_didReceiveScriptMessage_(self, controller, message): + try: + self._event.set() + except Exception: + pass + + class _HeightHandler(NSObject): + """WKScriptMessageHandler — receives body.scrollHeight from JS and calls + the banner's _on_height_changed on the main thread (already the current + thread, since WK message delivery is on main).""" + def userContentController_didReceiveScriptMessage_(self, controller, message): + try: + banner = self._banner + if banner is not None: + banner._on_height_changed(int(message.body())) + except Exception: + pass +else: + _NextHandler = None + _HeightHandler = None + + +class StatusBanner: + W, MIN_H, MAX_H, TOP_MARGIN, RIGHT_MARGIN = 440, 44, 200, 56, 20 + + def __init__(self): + self._window = None + self._webview = None + self._next_handler = None # strong refs so the JS-bridge handlers + self._height_handler = None # don't get GC'd + self._next_event = threading.Event() + self._current_h = self.MIN_H + + # ---- public API (callable from any thread) ---- + + def show(self): + if not _COCOA_OK: + return + callAfter(self._create) + + def update(self, text): + if not _COCOA_OK: + return + callAfter(self._set_text, text) + + def wait_for_next(self, timeout=None): + """Block calling thread until user clicks Next (or timeout). Returns True if clicked. + + Shows the Next button on entry and hides it on exit, so during normal + update() calls the button stays hidden — only the entry/exit boundaries + of a wait_for_next show a clickable Next. + """ + if not _COCOA_OK: + return True # no banner → don't block forever + callAfter(self._set_next_visible, True) + self._next_event.clear() + clicked = self._next_event.wait(timeout) + callAfter(self._set_next_visible, False) + return clicked + + def close(self): + if not _COCOA_OK: + return + callAfter(self._destroy) + + # ---- main-thread implementations ---- + + def _create(self): + try: + scr = NSScreen.mainScreen().frame() + x = scr.size.width - self.W - self.RIGHT_MARGIN + y = scr.size.height - self.MIN_H - self.TOP_MARGIN + rect = NSMakeRect(x, y, self.W, self.MIN_H) + + w = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_( + rect, NSWindowStyleMaskBorderless, NSBackingStoreBuffered, False + ) + w.setLevel_(NSStatusWindowLevel) + w.setOpaque_(False) + w.setBackgroundColor_(NSColor.clearColor()) + w.setIgnoresMouseEvents_(False) + w.setHasShadow_(True) + w.setReleasedWhenClosed_(False) + + content = w.contentView() + content.setWantsLayer_(True) + content.layer().setBackgroundColor_( + NSColor.colorWithCalibratedRed_green_blue_alpha_(1.0, 1.0, 1.0, 0.96).CGColor() + ) + # Fixed at MIN_H/2 so the pill stays a stadium at default height + # and becomes a rounded-rectangle when the height grows to fit + # multi-line messages — cleaner than a fat oval. + content.layer().setCornerRadius_(self.MIN_H / 2.0) + content.layer().setMasksToBounds_(True) + + cfg = WKWebViewConfiguration.alloc().init() + + # Register both JS→Python bridges BEFORE the WebView is created. + nh = _NextHandler.alloc().init() + nh._event = self._next_event + cfg.userContentController().addScriptMessageHandler_name_(nh, "next_clicked") + + hh = _HeightHandler.alloc().init() + hh._banner = self + cfg.userContentController().addScriptMessageHandler_name_(hh, "height_changed") + + wv_rect = NSMakeRect(0, 0, self.W, self.MIN_H) + wv = WKWebView.alloc().initWithFrame_configuration_(wv_rect, cfg) + try: + wv.setValue_forKey_(False, "drawsBackground") + except Exception: + pass + try: + wv.setWantsLayer_(True) + wv.layer().setBackgroundColor_(NSColor.clearColor().CGColor()) + except Exception: + pass + wv.loadHTMLString_baseURL_(BANNER_HTML, None) + content.addSubview_(wv) + + w.orderFrontRegardless() + self._window, self._webview = w, wv + self._next_handler, self._height_handler = nh, hh + self._current_h = self.MIN_H + except Exception as e: + logger.warning(f"banner: _create failed ({e})") + + def _set_text(self, text): + try: + if self._webview is None: + return + safe = (str(text) + .replace("\\", "\\\\") + .replace("'", "\\'") + .replace("\n", " ") + .replace("\r", " ")) + # Hand the full text to JS which animates it in word-by-word so + # the banner reads as a smooth reveal rather than snapping. + js = f"if (window.setMsg) setMsg('{safe}'); else document.getElementById('msg').textContent = '{safe}';" + self._webview.evaluateJavaScript_completionHandler_(js, None) + except Exception: + pass + + def _set_next_visible(self, visible): + try: + if self._webview is None: + return + disp = "inline-block" if visible else "none" + js = (f"var b=document.getElementById('next'); " + f"if (b) b.style.display='{disp}';") + self._webview.evaluateJavaScript_completionHandler_(js, None) + except Exception: + pass + + def _on_height_changed(self, requested_h): + """Resize the NSWindow to match the WebView's content height. + + Top edge stays put — height grows downward by adjusting NSWindow's + bottom-left origin Y. Clamped to [MIN_H, MAX_H]. + """ + try: + if self._window is None: + return + new_h = max(self.MIN_H, min(int(requested_h), self.MAX_H)) + if abs(new_h - self._current_h) < 1: + return + self._current_h = new_h + frame = self._window.frame() + # NSWindow origin is bottom-left; to keep top edge fixed while + # height changes, shift origin Y by (old_h - new_h). + new_y = frame.origin.y + frame.size.height - new_h + new_frame = NSMakeRect(frame.origin.x, new_y, frame.size.width, new_h) + self._window.setFrame_display_animate_(new_frame, True, True) + # WebView must also follow — content view auto-resizes with the + # window, but the WKWebView subview doesn't unless told. + if self._webview is not None: + self._webview.setFrame_(NSMakeRect(0, 0, frame.size.width, new_h)) + except Exception as e: + logger.warning(f"banner: _on_height_changed failed ({e})") + + def _destroy(self): + try: + if self._webview is not None: + try: + self._webview.stopLoading() + except Exception: + pass + try: + cfg = self._webview.configuration() + if cfg is not None: + uc = cfg.userContentController() + uc.removeScriptMessageHandlerForName_("next_clicked") + uc.removeScriptMessageHandlerForName_("height_changed") + except Exception: + pass + if self._window is not None: + self._window.orderOut_(None) + except Exception: + pass + finally: + try: + self._next_event.set() + except Exception: + pass + self._window = None + self._webview = None + self._next_handler = None + self._height_handler = None diff --git a/Auto_Use/macOS_use/remote_connection/telegram/service.py b/Auto_Use/macOS_use/remote_connection/telegram/service.py new file mode 100644 index 0000000..e69de29 diff --git a/Auto_Use/macOS_use/remote_connection/telegram/setup.py b/Auto_Use/macOS_use/remote_connection/telegram/setup.py new file mode 100644 index 0000000..3dcd7af --- /dev/null +++ b/Auto_Use/macOS_use/remote_connection/telegram/setup.py @@ -0,0 +1,97 @@ +"""Telegram remote-connection setup driver (macOS, guided mode). + +Opens Safari, navigates to web.telegram.org, then lets the user log in +manually. Progress is paced by a small always-on-top banner that streams +status text and has a Next button. The script blocks on user clicks via +banner.wait_for_next() — the user does the actual login (phone, country, +OTP) themselves; we just get them to the right page. +""" +import logging +import time + +from Auto_Use.macOS_use.controller.tool.open_app import open_app +from Auto_Use.macOS_use.tree.element import UIElementScanner, ELEMENT_CONFIG +from Auto_Use.macOS_use.controller.service import ControllerService +from Auto_Use.macOS_use.controller.key_combo.service import KeyComboService +from Auto_Use.macOS_use.remote_connection.telegram.banner import StatusBanner + +logger = logging.getLogger(__name__) + +TELEGRAM_WEB_URL = "web.telegram.org" +STEP_DELAY_SEC = 2 + + +def _find_address_bar(mapping: dict) -> str | None: + """Return the index of Safari's smart-search field, or None if not found.""" + for idx, info in mapping.items(): + if info.get("name") == "smart search field" and info.get("type") == "TextField": + return idx + return None + + +def _open_telegram_in_safari(banner) -> bool: + """Launch Safari and navigate it to web.telegram.org. + + Streams sub-step status to the banner so the user can see what's happening + while Safari takes focus. Returns False on any failure. + """ + banner.update("Please wait — confirming Safari is open…") + if not open_app("Safari"): + logger.error("setup.py: failed to launch Safari") + return False + time.sleep(STEP_DELAY_SEC) + + scanner = UIElementScanner(ELEMENT_CONFIG) + scanner.scan_elements() + mapping = scanner.get_elements_mapping() + time.sleep(STEP_DELAY_SEC) + + address_bar_index = _find_address_bar(mapping) + if address_bar_index is None: + logger.error("setup.py: Safari address bar not found in scan") + return False + + banner.update("Safari detected. Writing the URL for you, please wait…") + + controller = ControllerService() + controller.set_elements(mapping, scanner.application_name) + key_combo = KeyComboService() + + controller.click(address_bar_index) + time.sleep(STEP_DELAY_SEC) + + controller.canvas_input(TELEGRAM_WEB_URL) + time.sleep(STEP_DELAY_SEC) + + key_combo.send("return") + return True + + +def run(country_code: str = "", phone: str = "") -> bool: + """Guided Telegram-Web pairing. + + Shows a banner, waits for the user to click Next, opens Telegram Web, + waits for the user to log in manually + click Next, then closes. + + country_code and phone are accepted but ignored — kept only so the + pre-existing /api/telegram/connect callsite signature still works. + """ + banner = StatusBanner() + banner.show() + try: + banner.update("Let's get you set up with Telegram. Please click Next.") + banner.wait_for_next() + + if not _open_telegram_in_safari(banner): + banner.update("Failed to open Telegram. Close this banner and try again.") + banner.wait_for_next(timeout=15) + return False + + banner.update("Please log in to Telegram, then click Next") + banner.wait_for_next() + + banner.update("Done") + time.sleep(STEP_DELAY_SEC) + return True + finally: + banner.close() diff --git a/app.py b/app.py index 8cd1d07..0c1039d 100644 --- a/app.py +++ b/app.py @@ -561,6 +561,22 @@ def delete_api_key(): debug_exception("delete_api_key") return jsonify({'error': 'Failed to delete'}), 500 +@app.route('/api/telegram/connect', methods=['POST']) +def telegram_connect(): + """Kick off the guided Telegram pairing flow. + + Returns immediately; the real work (banner + Safari navigation + manual + login) runs in a background thread because it blocks on user clicks for + minutes. The banner is the source of truth for live status. + """ + try: + from Auto_Use.macOS_use.remote_connection.telegram.setup import run as run_telegram_setup + threading.Thread(target=run_telegram_setup, daemon=True).start() + return jsonify({'status': 'started'}) + except Exception as e: + debug_exception('telegram_connect') + return jsonify({'status': 'error', 'message': str(e)}), 500 + @app.route('/api/vertex/status', methods=['GET']) def get_vertex_status(): """Return current Vertex AI config (project_id and location)""" diff --git a/frontend/css/style.css b/frontend/css/style.css index d017fbe..ea038d0 100644 --- a/frontend/css/style.css +++ b/frontend/css/style.css @@ -896,27 +896,124 @@ body { padding: clamp(20px, 3vw, 32px) 0; } -.remote-qr-container { - width: 160px; - height: 160px; +.remote-instruction { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + font-size: clamp(12px, 1.2vw, 14px); + color: rgba(255, 255, 255, 0.45); + text-align: center; +} + +.remote-service-btn { display: flex; align-items: center; - justify-content: center; + gap: 12px; + width: 100%; + padding: 14px 16px; + background: rgba(255, 255, 255, 0.06); + border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 14px; - background: rgba(255, 255, 255, 0.08); - padding: 12px; + color: rgba(255, 255, 255, 0.92); + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + font-size: clamp(13px, 1.3vw, 15px); + font-weight: 500; + cursor: pointer; + transition: background 0.2s ease, border-color 0.2s ease; } -.remote-qr-container img, -.remote-qr-container canvas { - border-radius: 6px; +.remote-service-btn:hover { + background: rgba(255, 255, 255, 0.1); + border-color: rgba(255, 255, 255, 0.18); } -.remote-instruction { +.remote-service-icon { + display: flex; + align-items: center; + justify-content: center; + color: rgba(80, 165, 230, 0.95); +} + +.remote-service-label { + flex: 1; + text-align: left; +} + +.remote-service-chevron { + color: rgba(255, 255, 255, 0.45); + display: flex; + align-items: center; +} + +.remote-phone-form { + display: flex; + flex-direction: column; + gap: 10px; + width: 100%; + margin-top: 4px; +} + +.remote-phone-row { + display: flex; + gap: 8px; +} + +.remote-country-select, +.remote-phone-input { + background: rgba(255, 255, 255, 0.06); + border: 1px solid rgba(255, 255, 255, 0.12); + border-radius: 10px; + color: rgba(255, 255, 255, 0.95); font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; font-size: clamp(12px, 1.2vw, 14px); - color: rgba(255, 255, 255, 0.45); - text-align: center; + padding: 9px 12px; + outline: none; + transition: border-color 0.2s ease, background 0.2s ease; +} + +.remote-country-select { + flex: 0 0 42%; + appearance: none; + -webkit-appearance: none; + background-image: url("data:image/svg+xml;utf8,"); + background-repeat: no-repeat; + background-position: right 10px center; + padding-right: 28px; +} + +.remote-country-select option { + background: #1a1a1a; + color: #fff; +} + +.remote-phone-input { + flex: 1; +} + +.remote-country-select:focus, +.remote-phone-input:focus { + border-color: rgba(80, 165, 230, 0.55); + background: rgba(255, 255, 255, 0.09); +} + +.remote-connect-btn { + padding: 10px 16px; + background: rgba(80, 165, 230, 0.85); + color: #fff; + border: none; + border-radius: 10px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + font-size: clamp(12px, 1.2vw, 14px); + font-weight: 600; + cursor: pointer; + transition: background 0.2s ease, opacity 0.2s ease; +} + +.remote-connect-btn:hover:not(:disabled) { + background: rgba(80, 165, 230, 1); +} + +.remote-connect-btn:disabled { + opacity: 0.45; + cursor: not-allowed; } .remote-connected { @@ -2334,3 +2431,94 @@ body.cli-mode .settings-btn { loop of funny scraping/searching phrases (handled in script.js). No CSS overlay — globe icon + streamed phrases are the cue. ============================================================ */ + +/* ============================================================ + AutoUse helper info popup + Shown once when the user clicks Connect under Remote Connection → + Telegram. Tells them to look at the top-right banner. + ============================================================ */ +.telegram-prompt-overlay { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.3); + backdrop-filter: blur(8px); + -webkit-backdrop-filter: blur(8px); + z-index: 110; + + display: flex; + align-items: center; + justify-content: center; + + opacity: 0; + visibility: hidden; + transition: opacity 0.3s ease, visibility 0.3s ease; +} + +.telegram-prompt-overlay.active { + opacity: 1; + visibility: visible; +} + +.telegram-prompt-popup { + position: relative; + width: clamp(320px, 90vw, 420px); + padding: clamp(24px, 3vw, 32px); + border-radius: clamp(16px, 2vw, 24px); + + transform: scale(0.9) translateY(20px); + transition: transform 0.4s cubic-bezier(0.175, 0.885, 0.32, 1.275); + + box-shadow: 0 clamp(8px, 2vw, 16px) clamp(32px, 6vw, 64px) rgba(0, 0, 0, 0.2); +} + +.telegram-prompt-overlay.active .telegram-prompt-popup { + transform: scale(1) translateY(0); +} + +.telegram-prompt-content { + position: relative; + z-index: 3; + display: flex; + flex-direction: column; + align-items: center; + gap: clamp(14px, 1.8vw, 18px); + text-align: center; +} + +.telegram-prompt-title { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + font-size: clamp(18px, 2vw, 22px); + font-weight: 600; + color: rgba(255, 255, 255, 0.95); +} + +.telegram-prompt-message { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + font-size: clamp(13px, 1.3vw, 14px); + font-weight: 400; + color: rgba(255, 255, 255, 0.75); + line-height: 1.5; +} + +.telegram-prompt-message strong { + color: rgba(255, 255, 255, 0.95); + font-weight: 600; +} + +.telegram-prompt-ok { + margin-top: 4px; + padding: 10px 28px; + border: none; + border-radius: 10px; + background: rgba(80, 165, 230, 0.85); + color: #fff; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + font-size: clamp(12px, 1.2vw, 14px); + font-weight: 600; + cursor: pointer; + transition: background 0.2s ease; +} + +.telegram-prompt-ok:hover { + background: rgba(80, 165, 230, 1); +} diff --git a/frontend/index.html b/frontend/index.html index f79890a..1617442 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -373,10 +373,26 @@ Remote Connection - +
-
-
Scan with your phone to pair via Telegram
+ + + + +
@@ -392,6 +408,20 @@ + +
+
+
+
+
+
+
AutoUse helper
+
The helper will walk you through Telegram setup. Please click Next on the banner in the top-right corner of your screen to continue.
+ +
+
+
+
diff --git a/frontend/script.js b/frontend/script.js index 2f3efa6..f3f2f40 100644 --- a/frontend/script.js +++ b/frontend/script.js @@ -605,12 +605,17 @@ document.addEventListener('DOMContentLoaded', () => { }); }); - // Remote Connection — QR + status logic + // Remote Connection — guided Telegram pairing const remoteSetup = document.getElementById('remoteSetup'); const remoteConnected = document.getElementById('remoteConnected'); - const remoteQrContainer = document.getElementById('remoteQrContainer'); const remoteBotName = document.getElementById('remoteBotName'); const remoteDisconnectBtn = document.getElementById('remoteDisconnectBtn'); + const remoteTelegramBtn = document.getElementById('remoteTelegramBtn'); + const remoteTelegramForm = document.getElementById('remoteTelegramForm'); + const remoteConnectBtn = document.getElementById('remoteConnectBtn'); + const remoteInstructions = document.getElementById('remoteInstructions'); + const telegramPromptOverlay = document.getElementById('telegramPromptOverlay'); + const telegramPromptOk = document.getElementById('telegramPromptOk'); function loadRemoteStatus() { fetch('/api/telegram/status') @@ -623,21 +628,45 @@ document.addEventListener('DOMContentLoaded', () => { } else { remoteSetup.style.display = 'flex'; remoteConnected.style.display = 'none'; - const pairUrl = 'http://' + data.local_ip + ':5000/pair'; - remoteQrContainer.innerHTML = ''; - new QRCode(remoteQrContainer, { - text: pairUrl, - width: 160, - height: 160, - colorDark: '#ffffff', - colorLight: 'transparent', - correctLevel: QRCode.CorrectLevel.M - }); + if (remoteTelegramForm) remoteTelegramForm.style.display = 'none'; + if (remoteInstructions) remoteInstructions.style.display = 'none'; } }) .catch(() => {}); } + if (remoteTelegramBtn) { + remoteTelegramBtn.addEventListener('click', () => { + if (!remoteTelegramForm) return; + const isHidden = remoteTelegramForm.style.display === 'none' || !remoteTelegramForm.style.display; + remoteTelegramForm.style.display = isHidden ? 'flex' : 'none'; + }); + } + + if (remoteConnectBtn) { + remoteConnectBtn.addEventListener('click', () => { + remoteConnectBtn.disabled = true; + if (telegramPromptOverlay) telegramPromptOverlay.classList.add('active'); + fetch('/api/telegram/connect', { method: 'POST' }) + .catch(() => {}) + .finally(() => { + remoteConnectBtn.disabled = false; + if (remoteInstructions) remoteInstructions.style.display = 'block'; + }); + }); + } + + if (telegramPromptOk && telegramPromptOverlay) { + telegramPromptOk.addEventListener('click', () => { + telegramPromptOverlay.classList.remove('active'); + }); + telegramPromptOverlay.addEventListener('click', (e) => { + if (e.target === telegramPromptOverlay) { + telegramPromptOverlay.classList.remove('active'); + } + }); + } + if (remoteDisconnectBtn) { remoteDisconnectBtn.addEventListener('click', () => { fetch('/api/telegram/disconnect', { method: 'POST' }) diff --git a/main.py b/main.py index 036c156..c0dffe8 100644 --- a/main.py +++ b/main.py @@ -28,12 +28,12 @@ raise RuntimeError(f"Unsupported OS: {platform.system()}") # Configuration -PROVIDER = "openrouter" -MODEL = "gemini-3.1-pro" #refer to the model name correctly from model_list.txt. +PROVIDER = "local" +MODEL = "gemma4:e4b" #refer to the model name correctly from model_list.txt. # Your task here task = """ -study the the grep glob approach of macos then syndicate to windowsus use the directory is desktop/github/Auto use. +open safari """ From 5e0a7851a46932baf02c4b8b6320df5490e98fc7 Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Fri, 15 May 2026 14:33:08 +0530 Subject: [PATCH 2/4] added connection with the phone --- .../remote_connection/telegram/banner.py | 164 +++- .../remote_connection/telegram/service.py | 758 ++++++++++++++++++ .../remote_connection/telegram/view.py | 138 ++++ app.py | 45 +- frontend/script.js | 25 +- mac_requirements.txt | 3 + 6 files changed, 1090 insertions(+), 43 deletions(-) create mode 100644 Auto_Use/macOS_use/remote_connection/telegram/view.py diff --git a/Auto_Use/macOS_use/remote_connection/telegram/banner.py b/Auto_Use/macOS_use/remote_connection/telegram/banner.py index e4c7e7e..db25d7e 100644 --- a/Auto_Use/macOS_use/remote_connection/telegram/banner.py +++ b/Auto_Use/macOS_use/remote_connection/telegram/banner.py @@ -170,6 +170,103 @@ """ +# Compact HTML — used when StatusBanner(compact=True). Just the orb in a tiny +# circular pill, no message span, no Next button, no JS message handlers. The +# centred PC monitor icon cross-fades with a Telegram paper-plane every ~5s +# so the user can tell at a glance this is a Telegram-triggered task. +COMPACT_HTML = """ + + + +
+
+
+
+
+
+
+
+
+ +
+
+
+ +""" + + if _COCOA_OK: class _NextHandler(NSObject): """WKScriptMessageHandler — fires self._event when JS posts to 'next_clicked'. @@ -204,14 +301,20 @@ def userContentController_didReceiveScriptMessage_(self, controller, message): class StatusBanner: W, MIN_H, MAX_H, TOP_MARGIN, RIGHT_MARGIN = 440, 44, 200, 56, 20 - - def __init__(self): + # Compact variant: just the orb, no msg / button / scripts. Fixed-size + # circular pill (W == H, radius == W/2). Used for "Telegram task running" + # indicator — pure visual, click-through. Sized to hug the 36 px orb with + # ~4 px breathing room — anything taller and the pill looks padded. + COMPACT_W = COMPACT_H = 44 + + def __init__(self, compact: bool = False): + self._compact = compact self._window = None self._webview = None self._next_handler = None # strong refs so the JS-bridge handlers self._height_handler = None # don't get GC'd self._next_event = threading.Event() - self._current_h = self.MIN_H + self._current_h = self.COMPACT_H if compact else self.MIN_H # ---- public API (callable from any thread) ---- @@ -221,7 +324,9 @@ def show(self): callAfter(self._create) def update(self, text): - if not _COCOA_OK: + # Compact pills have no msg span — silently no-op so callers don't + # have to branch. + if not _COCOA_OK or self._compact: return callAfter(self._set_text, text) @@ -234,6 +339,10 @@ def wait_for_next(self, timeout=None): """ if not _COCOA_OK: return True # no banner → don't block forever + if self._compact: + # No Next button in compact mode — return immediately so callers + # that accidentally chain it don't hang forever. + return True callAfter(self._set_next_visible, True) self._next_event.clear() clicked = self._next_event.wait(timeout) @@ -250,9 +359,19 @@ def close(self): def _create(self): try: scr = NSScreen.mainScreen().frame() - x = scr.size.width - self.W - self.RIGHT_MARGIN - y = scr.size.height - self.MIN_H - self.TOP_MARGIN - rect = NSMakeRect(x, y, self.W, self.MIN_H) + if self._compact: + w_px, h_px = self.COMPACT_W, self.COMPACT_H + corner = w_px / 2.0 + html = COMPACT_HTML + ignores_mouse = True # click-through; purely visual + else: + w_px, h_px = self.W, self.MIN_H + corner = self.MIN_H / 2.0 + html = BANNER_HTML + ignores_mouse = False + x = scr.size.width - w_px - self.RIGHT_MARGIN + y = scr.size.height - h_px - self.TOP_MARGIN + rect = NSMakeRect(x, y, w_px, h_px) w = NSWindow.alloc().initWithContentRect_styleMask_backing_defer_( rect, NSWindowStyleMaskBorderless, NSBackingStoreBuffered, False @@ -260,7 +379,7 @@ def _create(self): w.setLevel_(NSStatusWindowLevel) w.setOpaque_(False) w.setBackgroundColor_(NSColor.clearColor()) - w.setIgnoresMouseEvents_(False) + w.setIgnoresMouseEvents_(ignores_mouse) w.setHasShadow_(True) w.setReleasedWhenClosed_(False) @@ -271,22 +390,27 @@ def _create(self): ) # Fixed at MIN_H/2 so the pill stays a stadium at default height # and becomes a rounded-rectangle when the height grows to fit - # multi-line messages — cleaner than a fat oval. - content.layer().setCornerRadius_(self.MIN_H / 2.0) + # multi-line messages — cleaner than a fat oval. In compact mode + # we use W/2 → perfect circle. + content.layer().setCornerRadius_(corner) content.layer().setMasksToBounds_(True) cfg = WKWebViewConfiguration.alloc().init() - # Register both JS→Python bridges BEFORE the WebView is created. - nh = _NextHandler.alloc().init() - nh._event = self._next_event - cfg.userContentController().addScriptMessageHandler_name_(nh, "next_clicked") + # JS→Python bridges only relevant in standard mode (compact pill + # has no Next button and a fixed size — no need for either handler). + if not self._compact: + nh = _NextHandler.alloc().init() + nh._event = self._next_event + cfg.userContentController().addScriptMessageHandler_name_(nh, "next_clicked") - hh = _HeightHandler.alloc().init() - hh._banner = self - cfg.userContentController().addScriptMessageHandler_name_(hh, "height_changed") + hh = _HeightHandler.alloc().init() + hh._banner = self + cfg.userContentController().addScriptMessageHandler_name_(hh, "height_changed") + else: + nh = hh = None - wv_rect = NSMakeRect(0, 0, self.W, self.MIN_H) + wv_rect = NSMakeRect(0, 0, w_px, h_px) wv = WKWebView.alloc().initWithFrame_configuration_(wv_rect, cfg) try: wv.setValue_forKey_(False, "drawsBackground") @@ -297,13 +421,13 @@ def _create(self): wv.layer().setBackgroundColor_(NSColor.clearColor().CGColor()) except Exception: pass - wv.loadHTMLString_baseURL_(BANNER_HTML, None) + wv.loadHTMLString_baseURL_(html, None) content.addSubview_(wv) w.orderFrontRegardless() self._window, self._webview = w, wv self._next_handler, self._height_handler = nh, hh - self._current_h = self.MIN_H + self._current_h = h_px except Exception as e: logger.warning(f"banner: _create failed ({e})") diff --git a/Auto_Use/macOS_use/remote_connection/telegram/service.py b/Auto_Use/macOS_use/remote_connection/telegram/service.py index e69de29..5ca8ec7 100644 --- a/Auto_Use/macOS_use/remote_connection/telegram/service.py +++ b/Auto_Use/macOS_use/remote_connection/telegram/service.py @@ -0,0 +1,758 @@ +"""Telegram → AgentService bridge with a guided provider/model picker. + +Runs as a standalone process (not mounted into Flask). On the first message +the bot asks you to pick a provider (limited to providers with a non-empty +key in api_key.txt / .env), then a model (from the same MODEL_MAPPINGS the +AutoUse frontend uses). Subsequent messages are dispatched as tasks to the +agent with that provider/model. Picked provider/model persist for the whole +chat session until you `/reset`. + +Token lookup order (first non-empty wins): + 1. TELEGRAM_BOT_TOKEN env var + 2. .env at the project root + 3. Auto_Use/api_key/api_key.txt + +Setup: + 1. @BotFather → /newbot → copy token. + 2. Paste it into .env OR api_key.txt as TELEGRAM_BOT_TOKEN=… + 3. Make sure at least one provider key (e.g. OPENROUTER_API_KEY=…) is set. + 4. python -m Auto_Use.macOS_use.remote_connection.telegram.service + 5. On phone: open Telegram, find your bot, send any message. +""" +import asyncio +import datetime +import importlib +import logging +import threading +from pathlib import Path + +from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup +from telegram.ext import ( + Application, + CommandHandler, + MessageHandler, + CallbackQueryHandler, + filters, +) + +logger = logging.getLogger(__name__) + +# service.py → telegram → remote_connection → macOS_use → Auto_Use → repo root +# The Telegram surface treats api_key.txt as its single source of truth — we +# deliberately do NOT consult .env or env vars here. .env is app.py's general +# env-loading concern; keeping the bot self-contained against api_key.txt +# avoids two-files-of-record confusion. +_REPO_ROOT = Path(__file__).resolve().parents[4] +_API_KEY_FILE = _REPO_ROOT / "Auto_Use" / "api_key" / "api_key.txt" + +# Agent writes per-step "milestone" lines here. We tail this file during a +# task and forward each new line back to the user's Telegram chat so they +# see the agent's progress in real time. +SCRATCHPAD_PATH = ( + Path(__file__).resolve().parents[2] / "scratchpad" / "milestone" / "milestone.md" +) +SCRATCHPAD_POLL_SEC = 2.0 +MAX_TG_MSG_LEN = 4000 # Telegram caps at 4096; leave headroom for safety + +# Provider id → API-key name in the KV files. Same mapping the Windows side +# uses ([windows_use/remote_connection/telegram/service.py:44-51]). +PROVIDER_KEY_MAP = { + "openrouter": "OPENROUTER_API_KEY", + "groq": "GROQ_API_KEY", + "openai": "OPENAI_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + "google": "GOOGLE_API_KEY", + "perplexity": "PERPLEXITY_API_KEY", +} + + +# ── file helpers ───────────────────────────────────────────────────────────── + +def _read_all_keys(path: Path) -> dict: + """Parse a simple KEY=VALUE file (one per line) into a dict. Skips empty + values and lines starting with '#'.""" + out = {} + if not path.exists(): + return out + try: + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, _, v = line.partition("=") + k, v = k.strip(), v.strip() + if v: + out[k] = v + except Exception: + pass + return out + + +def _resolve_token() -> str | None: + """Read TELEGRAM_BOT_TOKEN from api_key.txt only. .env and env vars are + intentionally ignored — see header comment.""" + return _read_all_keys(_API_KEY_FILE).get("TELEGRAM_BOT_TOKEN") + + +def _get_available_providers() -> list: + """Providers with a non-empty key in api_key.txt only.""" + keys = _read_all_keys(_API_KEY_FILE) + return [ + {"id": pid, "key": keys[kname]} + for pid, kname in PROVIDER_KEY_MAP.items() + if keys.get(kname) + ] + + +def _set_key_in_file(path: Path, key: str, value: str) -> None: + """Write/update KEY=value in a KV file, preserving every other line. + + Unlike a naive read-all-and-write-back-with-_read_all_keys, this keeps + empty-value placeholder lines (e.g. GROQ_API_KEY=) intact — the AutoUse + UI relies on those for its provider list rendering. + """ + lines = [] + found = False + if path.exists(): + try: + with open(path, "r", encoding="utf-8") as f: + for raw in f: + stripped = raw.strip() + if stripped.startswith(f"{key}="): + lines.append(f"{key}={value}\n") + found = True + else: + lines.append(raw if raw.endswith("\n") else raw + "\n") + except Exception: + logger.warning("failed to read %s while updating %s", path, key) + return + if not found: + lines.append(f"{key}={value}\n") + try: + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + f.writelines(lines) + except Exception: + logger.warning("failed to write %s", path) + + +def _resolve_owner_chat_id() -> int | None: + """Owner chat_id = whoever last sent /start. Stored in api_key.txt as + TELEGRAM_OWNER_CHAT_ID=…, so it survives restarts.""" + val = _read_all_keys(_API_KEY_FILE).get("TELEGRAM_OWNER_CHAT_ID") + if not val: + return None + try: + return int(val) + except ValueError: + return None + + +def _save_owner_chat_id(chat_id: int) -> None: + """Persist the owner chat_id so we can message them on the next boot.""" + _set_key_in_file(_API_KEY_FILE, "TELEGRAM_OWNER_CHAT_ID", str(chat_id)) + + +def _get_models_for_provider(provider_id: str) -> list: + """Read MODEL_MAPPINGS from Auto_Use/macOS_use/llm_provider//view.py + and return non-hidden entries as [{id, display_name}, …].""" + try: + mod = importlib.import_module( + f"Auto_Use.macOS_use.llm_provider.{provider_id}.view" + ) + mappings = getattr(mod, "MODEL_MAPPINGS", {}) + return [ + {"id": mid, "display_name": info.get("display_name", mid)} + for mid, info in mappings.items() + if not info.get("hidden", False) + ] + except Exception: + return [] + + +# ── per-chat state ─────────────────────────────────────────────────────────── + +# chat_id → { +# "phase": "idle" | "pick_provider" | "pick_model" | "ready" | "running", +# "provider": str | None, +# "model": str | None, +# "model_display": str | None, +# "queue": list[str], # tasks waiting to run, FIFO +# "pending": dict[str, str], # pending_id → task awaiting Yes/No +# "pending_counter": int, # monotonic id source for pending +# } +_chat_state: dict = {} + +# Guards mutations that read+modify state across threads (queue drain races +# between _run_agent's finally and the callback handler tapping "Yes"). +_state_lock = threading.Lock() + + +def _state(chat_id: int) -> dict: + return _chat_state.setdefault(chat_id, {"phase": "idle"}) + + +def _maybe_run_next_queued(chat_id: int, bot, loop) -> None: + """If this chat is ready and has a queued task, pop the next one and + start it. Threadsafe — called from both _run_agent's finally (worker + thread) and the q+ callback (asyncio loop).""" + with _state_lock: + state = _chat_state.get(chat_id) + if not state: + return + if state.get("phase") != "ready": + return + queue = state.get("queue") or [] + if not queue: + return + provider = state.get("provider") + model = state.get("model") + if not provider or not model: + return + next_task = queue.pop(0) + display = state.get("model_display") or model + state["phase"] = "running" + + _send_chat( + bot, + chat_id, + f"📝 Running queued task: {next_task[:200]} ({provider} · {display})", + loop, + ) + threading.Thread( + target=_run_agent, + args=(next_task, provider, model, chat_id, bot, loop), + daemon=True, + name=f"telegram-agent-{chat_id}-queued", + ).start() + + +# ── Telegram handlers ──────────────────────────────────────────────────────── + +def _build_online_text(providers: list) -> str: + now_str = datetime.datetime.now().strftime("%H:%M:%S") + if providers: + provider_line = ", ".join(p["id"] for p in providers) + return f"🟢 AutoUse online at {now_str}\nProviders: {provider_line}" + return f"🟢 AutoUse online at {now_str}\nProviders: (none configured)" + + +async def _show_provider_picker(message): + providers = _get_available_providers() + # Always lead with the "AutoUse online" status line so the user gets the + # same greeting they'd see at app boot, even when they message the bot + # first instead of waiting for the unsolicited startup announcement. + await message.reply_text(_build_online_text(providers)) + if not providers: + await message.reply_text( + "⚠️ No provider API keys found. Add at least one (e.g. " + "OPENROUTER_API_KEY=…) to api_key.txt or .env and try again." + ) + return False + buttons = [ + [InlineKeyboardButton(p["id"], callback_data=f"provider:{p['id']}")] + for p in providers + ] + await message.reply_text( + "👋 Pick a provider:", reply_markup=InlineKeyboardMarkup(buttons) + ) + return True + + +async def _discover_owner_from_updates(bot) -> int | None: + """Peek at the latest pending update on Telegram's servers and use its + chat_id as the owner. Lets the bot self-bootstrap on the very first run + after the chat-saving code was deployed, without requiring the user to + /start again. Safe to call before start_polling — uses offset=-1 which + Telegram supports as 'just the most recent update', and doesn't consume + updates from the polling updater's offset cursor.""" + try: + updates = await bot.get_updates(offset=-1, limit=1, timeout=2) + except Exception: + logger.warning("owner discovery: get_updates failed", exc_info=True) + return None + for upd in updates: + chat = getattr(upd, "effective_chat", None) + if chat and chat.id: + return int(chat.id) + return None + + +async def _post_init(application) -> None: + """Fires once after the bot finishes initialising (before polling starts). + Used to message the saved owner: 'AutoUse online at …' + a fresh provider + picker — so the user doesn't have to send anything to get going.""" + owner_id = _resolve_owner_chat_id() + if not owner_id: + # Not saved yet — try to auto-discover from Telegram's pending updates. + # Works if the user has ever messaged the bot, even before the + # chat-saving code was deployed. Persist the result so we don't need + # to re-discover on every boot. + owner_id = await _discover_owner_from_updates(application.bot) + if owner_id: + try: + _save_owner_chat_id(owner_id) + logger.info( + "owner discovery: saved chat_id=%s from getUpdates", + owner_id, + ) + except Exception: + logger.warning("owner discovery: could not persist chat_id", exc_info=True) + if not owner_id: + # No owner anywhere — they've never interacted with the bot. Stay + # silent; they'll register themselves with /start. + return + bot = application.bot + providers = _get_available_providers() + try: + await bot.send_message(chat_id=owner_id, text=_build_online_text(providers)) + except Exception: + logger.exception("startup announcement: failed to send hello") + return # if we can't even greet, don't bother with the picker + + if not providers: + try: + await bot.send_message( + chat_id=owner_id, + text="⚠️ No provider API keys found. Add at least one to api_key.txt and /reset.", + ) + except Exception: + pass + return + + buttons = [ + [InlineKeyboardButton(p["id"], callback_data=f"provider:{p['id']}")] + for p in providers + ] + try: + await bot.send_message( + chat_id=owner_id, + text="👋 Pick a provider:", + reply_markup=InlineKeyboardMarkup(buttons), + ) + # Park the owner's chat in pick_provider so the next button tap routes + # cleanly through the existing callback flow. + _chat_state[owner_id] = {"phase": "pick_provider"} + except Exception: + logger.exception("startup announcement: failed to send provider picker") + + +async def start_cmd(update, ctx): + chat_id = update.effective_chat.id + # Remember this chat so future boots can auto-greet (Phase 10 startup + # announcement). Best-effort — never let a file-write failure block /start. + try: + _save_owner_chat_id(chat_id) + except Exception: + logger.warning("could not persist owner chat_id", exc_info=True) + _chat_state[chat_id] = {"phase": "pick_provider"} + ok = await _show_provider_picker(update.message) + if not ok: + _chat_state[chat_id] = {"phase": "idle"} + + +async def reset_cmd(update, ctx): + # Wipe state for this chat — including any queued tasks and pending + # awaiting Yes/No prompts. We do NOT clear the persisted owner chat_id; + # /reset is "start over the conversation", not "forget I exist". + _chat_state[update.effective_chat.id] = {"phase": "idle"} + await update.message.reply_text( + "🔄 Reset. Send any message to pick a provider again." + ) + + +async def text_handler(update, ctx): + chat_id = update.effective_chat.id + # Persist on every message, not just /start, so the next app boot can + # auto-announce "AutoUse online" without the user having to /start first. + try: + _save_owner_chat_id(chat_id) + except Exception: + logger.warning("could not persist owner chat_id", exc_info=True) + state = _state(chat_id) + phase = state.get("phase", "idle") + + if phase in ("idle", "pick_provider"): + state["phase"] = "pick_provider" + ok = await _show_provider_picker(update.message) + if not ok: + state["phase"] = "idle" + return + + if phase == "pick_model": + await update.message.reply_text( + "Pick a model from the buttons above first." + ) + return + + if phase == "running": + # Busy — offer to queue this task. Each pending prompt gets a unique + # id so multiple "queue this?" prompts can coexist if the user spams. + task = (update.message.text or "").strip() + if not task: + return + state.setdefault("pending", {}) + state["pending_counter"] = state.get("pending_counter", 0) + 1 + pending_id = str(state["pending_counter"]) + state["pending"][pending_id] = task + buttons = [[ + InlineKeyboardButton("✅ Yes, queue it", callback_data=f"q+:{pending_id}"), + InlineKeyboardButton("❌ No", callback_data=f"q-:{pending_id}"), + ]] + await update.message.reply_text( + f"⏳ Currently busy performing a task.\n" + f"Do you want to queue: \"{task[:200]}\" ?", + reply_markup=InlineKeyboardMarkup(buttons), + ) + return + + # phase == "ready" + task = (update.message.text or "").strip() + if not task: + return + state["phase"] = "running" + provider = state["provider"] + model = state["model"] + display = state.get("model_display", model) + await update.message.reply_text( + f"📝 Running: {task} ({provider} · {display})" + ) + bot = ctx.bot + loop = asyncio.get_running_loop() + threading.Thread( + target=_run_agent, + args=(task, provider, model, chat_id, bot, loop), + daemon=True, + ).start() + + +async def callback_handler(update, ctx): + query = update.callback_query + await query.answer() + chat_id = query.message.chat_id + try: + _save_owner_chat_id(chat_id) + except Exception: + logger.warning("could not persist owner chat_id", exc_info=True) + state = _state(chat_id) + data = query.data or "" + + if data.startswith("provider:"): + provider_id = data.split(":", 1)[1] + state["provider"] = provider_id + state["phase"] = "pick_model" + models = _get_models_for_provider(provider_id) + if not models: + state["phase"] = "pick_provider" + await query.edit_message_text( + f"⚠️ No models found for {provider_id}. Pick another provider." + ) + return + buttons = [ + [InlineKeyboardButton(m["display_name"], callback_data=f"model:{m['id']}")] + for m in models + ] + await query.edit_message_text( + f"Pick a model for {provider_id}:", + reply_markup=InlineKeyboardMarkup(buttons), + ) + return + + if data.startswith("model:"): + model_id = data.split(":", 1)[1] + provider_id = state.get("provider") + if not provider_id: + state["phase"] = "idle" + await query.edit_message_text("Session expired. Send any message to start over.") + return + models = _get_models_for_provider(provider_id) + display = next( + (m["display_name"] for m in models if m["id"] == model_id), model_id + ) + state["model"] = model_id + state["model_display"] = display + state["phase"] = "ready" + await query.edit_message_text( + f"✅ Provider: {provider_id} / Model: {display}\n" + f"Send me a task whenever you're ready." + ) + return + + if data.startswith("q+:"): + # User wants to queue the pending task. + pending_id = data.split(":", 1)[1] + task = (state.get("pending") or {}).pop(pending_id, None) + if not task: + await query.edit_message_text("(That prompt has already been handled.)") + return + state.setdefault("queue", []).append(task) + qlen = len(state["queue"]) + await query.edit_message_text( + f"📥 Queued (position {qlen}): \"{task[:200]}\"\n" + f"Will run when the current task finishes." + ) + # Edge case: agent finished in the milliseconds between the prompt + # being sent and the user tapping Yes. Drain the queue now so the + # queued task isn't stranded. + _maybe_run_next_queued(chat_id, ctx.bot, asyncio.get_running_loop()) + return + + if data.startswith("q-:"): + # User declines to queue. Drop the pending task. + pending_id = data.split(":", 1)[1] + (state.get("pending") or {}).pop(pending_id, None) + await query.edit_message_text( + "👍 OK, won't queue it. I'll let you know once the current task is done." + ) + return + + +# ── scratchpad streaming ───────────────────────────────────────────────────── + +def _send_chat(bot, chat_id, text, loop): + """Schedule a bot.send_message on the asyncio loop from a worker thread. + Silently ignores failures so a transient send error never kills the + monitor thread.""" + try: + asyncio.run_coroutine_threadsafe( + bot.send_message(chat_id=chat_id, text=text), loop + ) + except Exception: + logger.warning("Failed to schedule send_message to chat %s", chat_id) + + +def _monitor_scratchpad(chat_id, bot, loop, stop_event, start_pos): + """Tail SCRATCHPAD_PATH and forward each new non-empty line to the chat. + + Polls every SCRATCHPAD_POLL_SEC seconds. start_pos is the byte offset + the file was at when the task began — we only forward content written + AFTER that, so old milestones from previous tasks aren't replayed. + Exits when stop_event is set, after one final sweep to flush any tail. + """ + last_pos = start_pos + + def _read_and_forward(): + nonlocal last_pos + if not SCRATCHPAD_PATH.exists(): + return + try: + with open(SCRATCHPAD_PATH, "r", encoding="utf-8", errors="replace") as f: + f.seek(last_pos) + new_content = f.read() + if not new_content: + return + last_pos = f.tell() + except Exception as exc: + logger.warning("Scratchpad read error: %s", exc) + return + for raw in new_content.splitlines(): + line = raw.strip() + if not line: + continue + # Chunk excessively long lines so we stay under Telegram's 4096 cap. + for i in range(0, len(line), MAX_TG_MSG_LEN): + _send_chat(bot, chat_id, line[i : i + MAX_TG_MSG_LEN], loop) + + while not stop_event.is_set(): + _read_and_forward() + stop_event.wait(SCRATCHPAD_POLL_SEC) + + # Final sweep — catches any line written between the last poll and the + # stop_event being set (e.g. the agent's very last milestone). + _read_and_forward() + + +# ── agent runner (worker thread) ───────────────────────────────────────────── + +def _run_agent(task, provider, model, chat_id, bot, loop): + """Run the agent and ping the chat when done. Streams scratchpad milestones + back to the chat live while the agent works. Pops a compact pill so the + Mac user can see a Telegram task is running, and minimises the main app + window so the agent has the screen to itself. Restores phase to 'ready'.""" + # Compact "Telegram task in progress" indicator + minimise AutoUse window. + # Both are best-effort — never let UI fluff block the actual task. + from Auto_Use.macOS_use.remote_connection.telegram.banner import StatusBanner + task_banner = StatusBanner(compact=True) + try: + task_banner.show() + except Exception: + logger.warning("could not show task banner", exc_info=True) + # Minimise the AutoUse pywebview window so the agent has the screen to + # itself. We talk to pywebview directly via its global `windows` list + # rather than importing from app.py — `python app.py` makes app.py the + # __main__ module, so `from app import …` would re-import a *second* + # copy of app.py whose webview_window is still None, and the call would + # silently no-op. + try: + import webview as _webview + if _webview.windows: + _webview.windows[0].minimize() + except Exception: + logger.warning("could not minimise AutoUse window", exc_info=True) + + # Snapshot the scratchpad size NOW so the monitor only forwards new lines + # for THIS task, not anything left over from previous runs. + start_pos = SCRATCHPAD_PATH.stat().st_size if SCRATCHPAD_PATH.exists() else 0 + stop_event = threading.Event() + monitor = threading.Thread( + target=_monitor_scratchpad, + args=(chat_id, bot, loop, stop_event, start_pos), + daemon=True, + name=f"telegram-scratchpad-{chat_id}", + ) + monitor.start() + + try: + # Imported lazily — pulls in tree/element → skimage etc., which we + # don't want to load until a task actually runs. + from Auto_Use.macOS_use.agent.service import AgentService + + agent = AgentService( + provider=provider, + model=model, + save_conversation=False, + thinking=True, + ) + agent.process_request(task) + # Stop the monitor BEFORE the done message so the final scratchpad + # sweep happens first — keeps the chat in correct chronological order. + stop_event.set() + monitor.join(timeout=SCRATCHPAD_POLL_SEC + 2) + _send_chat(bot, chat_id, "✅ Done.", loop) + except Exception as e: + logger.exception("agent error") + stop_event.set() + monitor.join(timeout=SCRATCHPAD_POLL_SEC + 2) + _send_chat(bot, chat_id, f"❌ Error: {e}", loop) + finally: + if not stop_event.is_set(): + stop_event.set() + try: + task_banner.close() + except Exception: + pass + with _state_lock: + state = _chat_state.get(chat_id) + if state is not None and state.get("phase") == "running": + state["phase"] = "ready" + # Drain one queued task if any — keeps phase='running' if it spawns. + _maybe_run_next_queued(chat_id, bot, loop) + + +# ── entry points ───────────────────────────────────────────────────────────── + +def _build_telegram_app(token: str): + """Build a python-telegram-bot Application with all our handlers wired. + + `post_init` is the hook python-telegram-bot calls once after the bot + finishes initialising but before polling starts — perfect spot to send + the "AutoUse online" announcement + provider picker to the saved owner. + """ + app = ( + Application.builder() + .token(token) + .post_init(_post_init) + .build() + ) + app.add_handler(CommandHandler("start", start_cmd)) + app.add_handler(CommandHandler("reset", reset_cmd)) + app.add_handler(CallbackQueryHandler(callback_handler)) + app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, text_handler)) + return app + + +_BOT_THREAD: threading.Thread | None = None + + +def _stderr(msg: str) -> None: + """Loud print to the terminal where python app.py is running — bypasses + whatever logging config is in effect so the user actually sees it.""" + import sys + print(f"[telegram] {msg}", file=sys.stderr, flush=True) + + +async def _run_bot_until_stopped(tg_app): + """Manual lifecycle replacement for Application.run_polling(). + + run_polling() messes with signals and assumes it owns the main thread; + we want to drive it from a worker thread so we do it step by step. + + Order matches what run_polling() does internally: + initialize → start → post_init → start_polling. + We call _post_init BEFORE start_polling so its bot.get_updates(offset=-1) + auto-discovery doesn't race with the updater's own polling loop. + """ + await tg_app.initialize() + await tg_app.start() + # Application.post_init() is only invoked by run_polling(), not by the + # manual initialize+start path above. Call our startup announcement + # explicitly so the saved owner gets the "AutoUse online" message. + try: + await _post_init(tg_app) + except Exception: + logger.exception("post_init failed") + await tg_app.updater.start_polling(allowed_updates=Update.ALL_TYPES) + _stderr("polling loop is live — send your bot a message") + # Park here forever (daemon thread; killed on app exit). + await asyncio.Event().wait() + + +def start_bot() -> None: + """Start the Telegram bot polling on a daemon thread. + + Idempotent — safe to call multiple times from app.py boot. Prints loudly + to stderr at each milestone so the user can see what's happening. + """ + global _BOT_THREAD + if _BOT_THREAD is not None and _BOT_THREAD.is_alive(): + _stderr("start_bot() called but the bot is already running — skipping.") + return + token = _resolve_token() + if not token: + _stderr( + "BOT NOT STARTED — TELEGRAM_BOT_TOKEN not found in env, .env, or " + "api_key.txt. Paste your @BotFather token into one of those files." + ) + return + _stderr(f"starting bot (token ends in …{token[-6:]})") + + def _runner(): + import sys, traceback + try: + # Each thread needs its own asyncio event loop. Without this, the + # call to asyncio.Event() inside _run_bot_until_stopped fails. + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + tg_app = _build_telegram_app(token) + try: + loop.run_until_complete(_run_bot_until_stopped(tg_app)) + finally: + loop.close() + except Exception as e: + _stderr(f"BOT CRASHED: {e!r}") + traceback.print_exc(file=sys.stderr) + + _BOT_THREAD = threading.Thread(target=_runner, daemon=True, name="telegram-bot") + _BOT_THREAD.start() + + +def main(): + """Standalone entry — for testing without launching the full AutoUse app.""" + token = _resolve_token() + if not token: + raise SystemExit( + f"TELEGRAM_BOT_TOKEN not found in {_API_KEY_FILE}\n" + "(create the bot via @BotFather first, then add the token to that file)." + ) + tg_app = _build_telegram_app(token) + logger.info("Telegram bot polling started (main thread)") + tg_app.run_polling(allowed_updates=Update.ALL_TYPES) + + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + ) + main() diff --git a/Auto_Use/macOS_use/remote_connection/telegram/view.py b/Auto_Use/macOS_use/remote_connection/telegram/view.py new file mode 100644 index 0000000..0ac7bf2 --- /dev/null +++ b/Auto_Use/macOS_use/remote_connection/telegram/view.py @@ -0,0 +1,138 @@ +"""Flask Blueprint for the macOS Telegram surface. + +Lives in the telegram folder so all Telegram-related code stays here — app.py +just imports `telegram_bp` and calls `app.register_blueprint(...)`. Routes: + + GET /api/telegram/status → {connected, bot_username?} + POST /api/telegram/connect → kicks off the Phase 4 guided walkthrough + POST /api/telegram/disconnect → clears the persisted token + +All token lookups read ONLY from api_key.txt. We deliberately do NOT consult +.env — that file is app.py's general env-loading concern; the Telegram bot +treats api_key.txt as its single source of truth. +""" +import json +import logging +import threading +import urllib.request +from pathlib import Path + +from flask import Blueprint, jsonify + +logger = logging.getLogger(__name__) + +telegram_bp = Blueprint("telegram_macos", __name__) + +# view.py → telegram → remote_connection → macOS_use → Auto_Use → repo root +_API_KEY_FILE = ( + Path(__file__).resolve().parents[4] / "Auto_Use" / "api_key" / "api_key.txt" +) + +_bot_username_cache: str | None = None + + +def _read_token() -> str | None: + """Pull TELEGRAM_BOT_TOKEN out of api_key.txt. Returns None if missing or + empty. Does NOT consult .env or env vars on purpose.""" + if not _API_KEY_FILE.exists(): + return None + try: + with open(_API_KEY_FILE, "r", encoding="utf-8") as f: + for line in f: + stripped = line.strip() + if stripped.startswith("TELEGRAM_BOT_TOKEN="): + val = stripped.partition("=")[2].strip() + return val or None + except Exception: + logger.warning("could not read %s", _API_KEY_FILE) + return None + + +def _set_token(value: str) -> None: + """Write/clear TELEGRAM_BOT_TOKEN= in api_key.txt, preserving every other + line (incl. empty-value placeholders the AutoUse UI relies on).""" + lines = [] + found = False + if _API_KEY_FILE.exists(): + try: + with open(_API_KEY_FILE, "r", encoding="utf-8") as f: + for raw in f: + if raw.strip().startswith("TELEGRAM_BOT_TOKEN="): + lines.append(f"TELEGRAM_BOT_TOKEN={value}\n") + found = True + else: + lines.append(raw if raw.endswith("\n") else raw + "\n") + except Exception: + logger.warning("could not read %s while updating token", _API_KEY_FILE) + return + if not found: + lines.append(f"TELEGRAM_BOT_TOKEN={value}\n") + try: + _API_KEY_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(_API_KEY_FILE, "w", encoding="utf-8") as f: + f.writelines(lines) + except Exception: + logger.warning("could not write %s", _API_KEY_FILE) + + +def _fetch_bot_username(token: str) -> str | None: + """One-shot call to Telegram's getMe — used by /status so the panel can + show '@your_bot' instead of just 'connected'.""" + try: + resp = urllib.request.urlopen( + f"https://api.telegram.org/bot{token}/getMe", timeout=5 + ) + data = json.loads(resp.read()) + if data.get("ok"): + return data["result"].get("username", "") or None + except Exception: + pass + return None + + +# ── routes ────────────────────────────────────────────────────────────────── + +@telegram_bp.route("/api/telegram/status", methods=["GET"]) +def telegram_status(): + """Frontend uses this to decide which Remote Connection panel state to + show. If a token is present in api_key.txt → 'connected', and the panel + flips to the @bot_username + Disconnect view (Connect button is hidden). + Cached so we don't hit Telegram's API on every page load.""" + global _bot_username_cache + token = _read_token() + if not token: + _bot_username_cache = None + return jsonify({"connected": False}) + if _bot_username_cache is None: + _bot_username_cache = _fetch_bot_username(token) or "" + return jsonify({ + "connected": True, + "bot_username": _bot_username_cache, + }) + + +@telegram_bp.route("/api/telegram/connect", methods=["POST"]) +def telegram_connect(): + """Kick off the Phase 4 guided walkthrough (Safari → web.telegram.org → + user logs in manually, paced by the floating banner). Returns immediately; + the real work runs on a daemon thread since it blocks on user clicks.""" + try: + from Auto_Use.macOS_use.remote_connection.telegram.setup import ( + run as run_telegram_setup, + ) + threading.Thread(target=run_telegram_setup, daemon=True).start() + return jsonify({"status": "started"}) + except Exception as e: + logger.exception("telegram_connect failed") + return jsonify({"status": "error", "message": str(e)}), 500 + + +@telegram_bp.route("/api/telegram/disconnect", methods=["POST"]) +def telegram_disconnect(): + """Clear the persisted token + the cached @bot_username. The polling + thread already running keeps polling until the next app restart (soft + disconnect) — clean shutdown of the bot loop is a future enhancement.""" + global _bot_username_cache + _set_token("") + _bot_username_cache = None + return jsonify({"status": "disconnected"}) diff --git a/app.py b/app.py index 0c1039d..5f75a26 100644 --- a/app.py +++ b/app.py @@ -561,22 +561,6 @@ def delete_api_key(): debug_exception("delete_api_key") return jsonify({'error': 'Failed to delete'}), 500 -@app.route('/api/telegram/connect', methods=['POST']) -def telegram_connect(): - """Kick off the guided Telegram pairing flow. - - Returns immediately; the real work (banner + Safari navigation + manual - login) runs in a background thread because it blocks on user clicks for - minutes. The banner is the source of truth for live status. - """ - try: - from Auto_Use.macOS_use.remote_connection.telegram.setup import run as run_telegram_setup - threading.Thread(target=run_telegram_setup, daemon=True).start() - return jsonify({'status': 'started'}) - except Exception as e: - debug_exception('telegram_connect') - return jsonify({'status': 'error', 'message': str(e)}), 500 - @app.route('/api/vertex/status', methods=['GET']) def get_vertex_status(): """Return current Vertex AI config (project_id and location)""" @@ -884,6 +868,20 @@ def start_server(): host = '0.0.0.0' if IS_WINDOWS else '127.0.0.1' app.run(host=host, port=5000, debug=False, use_reloader=False) +def minimize_main_window(): + """Minimise the AutoUse pywebview window. No-op if the window isn't up yet + (e.g. someone calls this before main() has created it) or pywebview's + minimise call fails for any reason. Safe to call from any thread — + pywebview routes the call to its own UI loop internally.""" + win = globals().get('webview_window') + if win is None: + return + try: + win.minimize() + except Exception: + debug_exception("minimize_main_window") + + def _compute_window_center(win_w, win_h): """Return (x, y) to center a (win_w, win_h) window on the main display. Falls back to a sensible default if the native APIs are unavailable.""" @@ -915,7 +913,9 @@ class RECT(ctypes.Structure): return 600, 30 def main(): - # Register Telegram blueprint on Windows (macOS doesn't ship it yet). + # Wire the Telegram remote-control bot. Windows mounts a Flask blueprint + # plus a polling bot; macOS just starts the polling bot (no blueprint yet — + # token is read from .env / api_key.txt directly). if IS_WINDOWS: try: from Auto_Use.windows_use.remote_connection.telegram.view import telegram_bp, start_bot @@ -923,6 +923,17 @@ def main(): start_bot() except Exception: debug_exception("telegram_blueprint_init") + elif IS_MAC: + try: + from Auto_Use.macOS_use.remote_connection.telegram.view import telegram_bp + from Auto_Use.macOS_use.remote_connection.telegram.service import start_bot as start_telegram_bot + app.register_blueprint(telegram_bp) + start_telegram_bot() + except Exception as _tg_e: + import traceback as _tg_tb + print(f"[telegram] IMPORT/INIT FAILED: {_tg_e!r}", file=sys.stderr, flush=True) + _tg_tb.print_exc(file=sys.stderr) + debug_exception("telegram_bot_init") if "--cli-mode" in sys.argv: # CLI mode - delegate to the platform-specific CLI agent diff --git a/frontend/script.js b/frontend/script.js index f3f2f40..46ce8b7 100644 --- a/frontend/script.js +++ b/frontend/script.js @@ -621,13 +621,26 @@ document.addEventListener('DOMContentLoaded', () => { fetch('/api/telegram/status') .then(res => res.json()) .then(data => { - if (data.connected && data.bot_username) { - remoteSetup.style.display = 'none'; - remoteConnected.style.display = 'flex'; - remoteBotName.textContent = '@' + data.bot_username; + // Always keep the Telegram service button visible and + // expandable. When already paired, just grey out the + // Connect button inside the form rather than swapping + // to a different panel. + remoteSetup.style.display = 'flex'; + if (remoteConnected) remoteConnected.style.display = 'none'; + + if (data.connected) { + if (remoteConnectBtn) { + remoteConnectBtn.disabled = true; + remoteConnectBtn.textContent = data.bot_username + ? '✓ Already paired (@' + data.bot_username + ')' + : '✓ Already paired'; + } + if (remoteInstructions) remoteInstructions.style.display = 'none'; } else { - remoteSetup.style.display = 'flex'; - remoteConnected.style.display = 'none'; + if (remoteConnectBtn) { + remoteConnectBtn.disabled = false; + remoteConnectBtn.textContent = 'Connect'; + } if (remoteTelegramForm) remoteTelegramForm.style.display = 'none'; if (remoteInstructions) remoteInstructions.style.display = 'none'; } diff --git a/mac_requirements.txt b/mac_requirements.txt index caab906..903e73f 100644 --- a/mac_requirements.txt +++ b/mac_requirements.txt @@ -27,6 +27,9 @@ mss flask psutil +# Remote Connection (Telegram bot) +python-telegram-bot + # Build Tools (Nuitka binary compilation) nuitka ordered-set From 4db9dcd98b64167e38d2210e83bacaec1d3d6b6f Mon Sep 17 00:00:00 2001 From: FunctionFreak Date: Fri, 15 May 2026 18:37:14 +0530 Subject: [PATCH 3/4] macos telegram setup done --- .../remote_connection/telegram/banner.py | 405 ++++++++++++++++-- .../remote_connection/telegram/service.py | 60 ++- .../remote_connection/telegram/setup.py | 46 +- 3 files changed, 470 insertions(+), 41 deletions(-) diff --git a/Auto_Use/macOS_use/remote_connection/telegram/banner.py b/Auto_Use/macOS_use/remote_connection/telegram/banner.py index db25d7e..881d723 100644 --- a/Auto_Use/macOS_use/remote_connection/telegram/banner.py +++ b/Auto_Use/macOS_use/remote_connection/telegram/banner.py @@ -31,7 +31,7 @@ try: from Cocoa import ( - NSWindow, NSColor, NSScreen, + NSPanel, NSColor, NSScreen, NSBackingStoreBuffered, NSMakeRect, ) from Foundation import NSObject @@ -42,7 +42,11 @@ logger.warning(f"banner: Cocoa unavailable, popup disabled ({e})") _COCOA_OK = False -NSWindowStyleMaskBorderless = 0 +# Non-activating panel: clicks inside the WebView do NOT activate the Python +# process, so the AutoUse main pywebview window can't pop over Safari while +# the wizard is running. The panel still becomes key when a text input needs +# keyboard focus (setBecomesKeyOnlyIfNeeded_). +NSWindowStyleMaskNonactivatingPanel = 1 << 7 # 128 NSStatusWindowLevel = 25 @@ -52,11 +56,19 @@ html, body { margin: 0; padding: 0; width: 100%; background: transparent; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; } html { height: 100%; } -body { display: flex; align-items: center; gap: 8px; padding: 6px 10px; box-sizing: border-box; - min-height: 44px; overflow: hidden; } - -.orb-wrap { position: relative; width: 36px; height: 36px; flex-shrink: 0; - display: flex; align-items: center; justify-content: center; align-self: flex-start; margin-top: 0; } +/* The orb is absolute-positioned (top-left, anchored), and the body has + extra left padding (= orb-width 36 + gap 8 = 44) so flex content starts + to the right of the orb. This decouples orb position from message + height: no matter how many lines the message wraps to, the orb stays + exactly where it started — first line of text stays next to it, + additional lines flow below. */ +body { display: flex; flex-wrap: wrap; align-items: center; gap: 8px; + padding: 6px 10px 6px 54px; box-sizing: border-box; + min-height: 44px; overflow: hidden; position: relative; } + +.orb-wrap { position: absolute; top: 6px; left: 10px; + width: 36px; height: 36px; flex-shrink: 0; + display: flex; align-items: center; justify-content: center; } .stop-circle-1 { width: 36px; height: 36px; border-radius: 50%; position: absolute; background: transparent; display: flex; align-items: center; justify-content: center; @@ -99,9 +111,15 @@ .stop-base { width: 14px; height: 1px; background: white; border-radius: 0.5px; } /* min-width: 0 is the flexbox shrink-below-content-size fix — without it a - long message refuses to shrink and pushes the Next button off the pill. */ -.msg { flex: 1 1 auto; min-width: 0; font-size: 12.5px; color: #6b6b75; padding: 0 4px; - line-height: 1.35; word-wrap: break-word; overflow-wrap: break-word; } + long message refuses to shrink and pushes the Next button off the pill. + align-self + padding-top pin the first line to the same vertical spot it + sits at when single-line — so when the text wraps, the first line stays + put and the new line flows below it instead of the whole block sliding + down to stay centered. */ +.msg { flex: 1 1 auto; min-width: 0; font-size: 12.5px; color: #6b6b75; + padding: 10px 4px 0; line-height: 1.35; + word-wrap: break-word; overflow-wrap: break-word; + align-self: flex-start; } .next-btn { flex-shrink: 0; height: 28px; padding: 0 14px; border: none; border-radius: 14px; background: #5e6ad2; color: white; font-size: 12px; font-weight: 600; cursor: pointer; @@ -109,6 +127,16 @@ .next-btn:hover { background: #6e7ce3; } .next-btn:active { background: #4e5ac2; } +.choice-row { display: none; flex-shrink: 0; gap: 6px; align-self: center; } +.input-row { display: none; flex-basis: 100%; flex-direction: column; gap: 4px; + padding: 2px 4px 0; order: 1; } +.input-line { display: flex; gap: 6px; align-items: center; } +#token-input { flex: 1 1 auto; height: 28px; border: 1px solid #d4d4dc; border-radius: 14px; + padding: 0 10px; font-size: 12px; font-family: inherit; outline: none; color: #333; + background: white; } +#token-input:focus { border-color: #5e6ad2; } +.input-error { display: none; color: #d23; font-size: 11px; padding: 0 4px; } + @keyframes stop-pulse { 0%{transform:scale(.97)} 15%{transform:scale(1)} 30%{transform:scale(.98)} 45%{transform:scale(1)} 60%{transform:scale(.97)} 85%{transform:scale(1)} 100%{transform:scale(.97)} } @keyframes stop-pulse2 { 0%{transform:scale(1)} 15%{transform:scale(1.03)} 30%{transform:scale(.98)} 45%{transform:scale(1.04)} 60%{transform:scale(.97)} 85%{transform:scale(1.03)} 100%{transform:scale(1)} } @keyframes stop-bgRotate { 0%{transform:rotate(0)} 20%{transform:rotate(90deg)} 40%{transform:rotate(180deg) scale(.95,1)} 60%,100%{transform:rotate(360deg)} } @@ -127,6 +155,21 @@ Starting… +
+ + +
+
+
+ + +
+
+