diff --git a/Auto_Use/macOS_use/remote_connection/telegram/banner.py b/Auto_Use/macOS_use/remote_connection/telegram/banner.py
new file mode 100644
index 0000000..0d8e9b9
--- /dev/null
+++ b/Auto_Use/macOS_use/remote_connection/telegram/banner.py
@@ -0,0 +1,880 @@
+# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# If you build on this project, please keep this header and credit
+# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works.
+# A small attribution goes a long way toward a healthy open-source
+# community — thank you for contributing.
+
+"""Interactive walkthrough banner for setup.py.
+
+A small always-on-top pill at the top-right of the screen that contains:
+ - the animated stop-orb on the left,
+ - a status message in the middle (multi-line capable; pill grows downward),
+ - a clickable "Next" button on the right (only visible when the script is
+ waiting for the user — hidden during processing steps).
+
+setup.py calls show() once, then alternates update("…") + wait_for_next()
+to pace the user. close() tears it down. The Next button is shown
+automatically inside wait_for_next() and hidden as soon as it returns, so
+callers don't have to manage visibility manually.
+
+The pill default height is the original 44px. When a long status message
+wraps to multiple lines a ResizeObserver in JS posts the new body height
+back to Python via a second WKScriptMessageHandler, and Python resizes the
+NSWindow (top edge anchored, height grows downward).
+
+Everything runs inside the existing Python process. pywebview's main-thread
+NSApplication run loop (started by webview.start() in app.py) is reused —
+AppKit work is dispatched onto it via PyObjCTools.AppHelper.callAfter so the
+Flask worker thread that runs setup.py never touches Cocoa directly.
+
+If Cocoa/PyObjC isn't importable for any reason the class becomes a no-op
+so the automation still completes without a banner.
+"""
+import logging
+import threading
+
+logger = logging.getLogger(__name__)
+
+try:
+ from Cocoa import (
+ NSPanel, NSColor, NSScreen,
+ NSBackingStoreBuffered, NSMakeRect,
+ )
+ from Foundation import NSObject
+ from WebKit import WKWebView, WKWebViewConfiguration
+ from PyObjCTools.AppHelper import callAfter
+ _COCOA_OK = True
+except Exception as e:
+ logger.warning(f"banner: Cocoa unavailable, popup disabled ({e})")
+ _COCOA_OK = False
+
+# Non-activating panel: clicks inside the WebView do NOT activate the Python
+# process, so the AutoUse main pywebview window can't pop over Safari while
+# the wizard is running. The panel still becomes key when a text input needs
+# keyboard focus (setBecomesKeyOnlyIfNeeded_).
+NSWindowStyleMaskNonactivatingPanel = 1 << 7 # 128
+NSStatusWindowLevel = 25
+
+
+BANNER_HTML = """
+
+
+
+
+
+
+
+
+
+
+
+Starting…
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+"""
+
+
+# Compact HTML — used when StatusBanner(compact=True). Just the orb in a tiny
+# circular pill, no message span, no Next button, no JS message handlers. The
+# centred PC monitor icon cross-fades with a Telegram paper-plane every ~5s
+# so the user can tell at a glance this is a Telegram-triggered task.
+COMPACT_HTML = """
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+"""
+
+
+if _COCOA_OK:
+ class _NonActivatingPanel(NSPanel):
+ """Borderless NSPanel that can still become key.
+
+ AppKit returns NO from -canBecomeKeyWindow for borderless panels by
+ default, which blocks WKWebView text inputs from ever receiving
+ keyboard focus (the user clicks the field and nothing happens).
+ Overriding to YES makes the field usable. NSWindowStyleMaskNonactivatingPanel
+ is still set on the instance, so becoming key still doesn't activate
+ this Python process — Safari stays in the foreground."""
+ def canBecomeKeyWindow(self):
+ return True
+
+
+ class _ClickableWebView(WKWebView):
+ """WKWebView that returns YES from acceptsFirstMouse:.
+
+ Without this, the first click after the panel loses key status
+ (e.g. user just clicked Safari) is swallowed by AppKit while it
+ promotes the panel back to key — the button click never fires, and
+ the user has to tap a second time. Returning YES tells AppKit to
+ forward the very first click straight to the view, so single-tap
+ works regardless of key-window state."""
+ def acceptsFirstMouse_(self, event):
+ return True
+
+
+ class _NextHandler(NSObject):
+ """WKScriptMessageHandler — fires self._event when JS posts to 'next_clicked'.
+
+ No custom init: PyObjC's bridged NSObject.init takes no args, so calling
+ NSObject.init(self) inside a subclass crashes with "Need 0 arguments,
+ got 1". Instead, allocate with the default init and set the event as a
+ plain Python attribute right after — PyObjC subclasses accept arbitrary
+ Python attributes just fine.
+ """
+ def userContentController_didReceiveScriptMessage_(self, controller, message):
+ try:
+ self._event.set()
+ except Exception:
+ pass
+
+ class _HeightHandler(NSObject):
+ """WKScriptMessageHandler — receives body.scrollHeight from JS and calls
+ the banner's _on_height_changed on the main thread (already the current
+ thread, since WK message delivery is on main)."""
+ def userContentController_didReceiveScriptMessage_(self, controller, message):
+ try:
+ banner = self._banner
+ if banner is not None:
+ banner._on_height_changed(int(message.body()))
+ except Exception:
+ pass
+
+ class _ChoiceHandler(NSObject):
+ """WKScriptMessageHandler for the two-button choice row. Stores the
+ clicked label ('left' or 'right') on self._value, then fires self._event."""
+ def userContentController_didReceiveScriptMessage_(self, controller, message):
+ try:
+ self._value = str(message.body())
+ self._event.set()
+ except Exception:
+ pass
+
+ class _SaveHandler(NSObject):
+ """WKScriptMessageHandler for the token input. Stores the typed string
+ on self._value, then fires self._event."""
+ def userContentController_didReceiveScriptMessage_(self, controller, message):
+ try:
+ self._value = str(message.body())
+ self._event.set()
+ except Exception:
+ pass
+
+ class _RevealHandler(NSObject):
+ """WKScriptMessageHandler fired by JS when the word-by-word setMsg
+ reveal finishes. Used to gate control-set visibility on stream
+ completion so buttons don't pop in mid-sentence."""
+ def userContentController_didReceiveScriptMessage_(self, controller, message):
+ try:
+ self._event.set()
+ except Exception:
+ pass
+else:
+ _NextHandler = None
+ _HeightHandler = None
+ _ChoiceHandler = None
+ _SaveHandler = None
+ _RevealHandler = None
+
+
+class StatusBanner:
+ W, MIN_H, MAX_H, TOP_MARGIN, RIGHT_MARGIN = 440, 44, 200, 56, 20
+ # Compact variant: just the orb, no msg / button / scripts. Fixed-size
+ # circular pill (W == H, radius == W/2). Used for "Telegram task running"
+ # indicator — pure visual, click-through. Sized to hug the 36 px orb with
+ # ~4 px breathing room — anything taller and the pill looks padded.
+ COMPACT_W = COMPACT_H = 44
+
+ def __init__(self, compact: bool = False):
+ self._compact = compact
+ self._window = None
+ self._webview = None
+ self._next_handler = None # strong refs so the JS-bridge handlers
+ self._height_handler = None # don't get GC'd
+ self._choice_handler = None
+ self._save_handler = None
+ self._reveal_handler = None
+ self._next_event = threading.Event()
+ self._choice_event = threading.Event()
+ self._save_event = threading.Event()
+ # Set initially: no streaming reveal is pending until update() is called.
+ # update() clears this; the JS reveal_done handler re-sets it.
+ self._reveal_event = threading.Event()
+ self._reveal_event.set()
+ self._current_h = self.COMPACT_H if compact else self.MIN_H
+
+ # ---- public API (callable from any thread) ----
+
+ def show(self):
+ if not _COCOA_OK:
+ return
+ callAfter(self._create)
+
+ def update(self, text):
+ # Compact pills have no msg span — silently no-op so callers don't
+ # have to branch.
+ if not _COCOA_OK or self._compact:
+ return
+ # A streaming reveal is about to start in JS; clear the event so any
+ # following wait_for_* call blocks until JS posts reveal_done.
+ self._reveal_event.clear()
+ callAfter(self._set_text, text)
+
+ # Cap the wait-for-reveal so a JS hiccup that drops the reveal_done
+ # message can never deadlock us. Realistic banner messages stream out
+ # in well under this — and shorter is better, because the wait is what
+ # the user experiences between the message finishing and the button
+ # showing.
+ _REVEAL_WAIT_SEC = 3.0
+
+ def _await_reveal(self):
+ """Block until the most recent update()'s reveal animation has
+ finished (or the safety timeout fires). No-op if no update() is
+ pending — the event stays set in that case."""
+ self._reveal_event.wait(self._REVEAL_WAIT_SEC)
+
+ def wait_for_next(self, timeout=None):
+ """Block calling thread until user clicks Next (or timeout). Returns True if clicked.
+
+ Shows the Next button on entry and hides it on exit, so during normal
+ update() calls the button stays hidden — only the entry/exit boundaries
+ of a wait_for_next show a clickable Next.
+ """
+ if not _COCOA_OK:
+ return True # no banner → don't block forever
+ if self._compact:
+ # No Next button in compact mode — return immediately so callers
+ # that accidentally chain it don't hang forever.
+ return True
+ # Clear the click event BEFORE the reveal wait. If we cleared after,
+ # any click that lands during streaming (rare, since the button is
+ # hidden until reveal finishes — but defensive) would be wiped here
+ # and the user would have to click a second time.
+ self._next_event.clear()
+ self._await_reveal()
+ callAfter(self._clear_extra_ui)
+ callAfter(self._set_next_visible, True)
+ clicked = self._next_event.wait(timeout)
+ callAfter(self._set_next_visible, False)
+ return clicked
+
+ def wait_for_choice(self, left_label, right_label, timeout=None):
+ """Show two side-by-side buttons; block until one is clicked.
+ Returns 'left' or 'right', or None on timeout / no Cocoa."""
+ if not _COCOA_OK or self._compact:
+ return None
+ self._choice_event.clear()
+ self._await_reveal()
+ callAfter(self._set_next_visible, False)
+ callAfter(self._show_choice, left_label, right_label)
+ clicked = self._choice_event.wait(timeout)
+ value = getattr(self._choice_handler, "_value", None) if clicked else None
+ callAfter(self._clear_extra_ui)
+ return value
+
+ def wait_for_input(self, save_label="Save", validate=None,
+ error_msg="Token can't be empty"):
+ """Show a text input + Save button; block until user submits a value
+ that passes `validate` (default: non-empty after strip). Failed
+ validation surfaces `error_msg` in red below the input and keeps
+ waiting. Returns the accepted value, or None on no Cocoa."""
+ if not _COCOA_OK or self._compact:
+ return None
+ if validate is None:
+ validate = lambda v: bool((v or "").strip())
+ self._save_event.clear()
+ self._await_reveal()
+ callAfter(self._set_next_visible, False)
+ callAfter(self._show_input, save_label)
+ try:
+ while True:
+ self._save_event.wait()
+ # _destroy() also sets the event — bail out if the banner
+ # has been torn down out from under us.
+ if self._webview is None:
+ return None
+ value = getattr(self._save_handler, "_value", "") or ""
+ if validate(value):
+ return value
+ callAfter(self._set_input_error, error_msg)
+ self._save_event.clear()
+ finally:
+ callAfter(self._clear_extra_ui)
+
+ def close(self):
+ if not _COCOA_OK:
+ return
+ callAfter(self._destroy)
+
+ # ---- main-thread implementations ----
+
+ def _create(self):
+ try:
+ scr = NSScreen.mainScreen().frame()
+ if self._compact:
+ w_px, h_px = self.COMPACT_W, self.COMPACT_H
+ corner = w_px / 2.0
+ html = COMPACT_HTML
+ ignores_mouse = True # click-through; purely visual
+ else:
+ w_px, h_px = self.W, self.MIN_H
+ corner = self.MIN_H / 2.0
+ html = BANNER_HTML
+ ignores_mouse = False
+ x = scr.size.width - w_px - self.RIGHT_MARGIN
+ y = scr.size.height - h_px - self.TOP_MARGIN
+ rect = NSMakeRect(x, y, w_px, h_px)
+
+ w = _NonActivatingPanel.alloc().initWithContentRect_styleMask_backing_defer_(
+ rect, NSWindowStyleMaskNonactivatingPanel,
+ NSBackingStoreBuffered, False,
+ )
+ w.setLevel_(NSStatusWindowLevel)
+ w.setOpaque_(False)
+ w.setBackgroundColor_(NSColor.clearColor())
+ w.setIgnoresMouseEvents_(ignores_mouse)
+ w.setHasShadow_(True)
+ w.setReleasedWhenClosed_(False)
+ # Panels normally hide when their app deactivates — we want the
+ # banner to stay visible the entire time Safari is in front.
+ # Leave becomesKeyOnlyIfNeeded at the NSPanel default (NO) so a
+ # click on the token input properly makes the panel key and the
+ # field accepts paste / typing. NonactivatingPanelMask means
+ # becoming key still doesn't activate the Python process.
+ try:
+ w.setHidesOnDeactivate_(False)
+ except Exception:
+ pass
+
+ content = w.contentView()
+ content.setWantsLayer_(True)
+ content.layer().setBackgroundColor_(
+ NSColor.colorWithCalibratedRed_green_blue_alpha_(1.0, 1.0, 1.0, 0.96).CGColor()
+ )
+ # Fixed at MIN_H/2 so the pill stays a stadium at default height
+ # and becomes a rounded-rectangle when the height grows to fit
+ # multi-line messages — cleaner than a fat oval. In compact mode
+ # we use W/2 → perfect circle.
+ content.layer().setCornerRadius_(corner)
+ content.layer().setMasksToBounds_(True)
+
+ cfg = WKWebViewConfiguration.alloc().init()
+
+ # JS→Python bridges only relevant in standard mode (compact pill
+ # has no Next button and a fixed size — no need for either handler).
+ if not self._compact:
+ nh = _NextHandler.alloc().init()
+ nh._event = self._next_event
+ cfg.userContentController().addScriptMessageHandler_name_(nh, "next_clicked")
+
+ hh = _HeightHandler.alloc().init()
+ hh._banner = self
+ cfg.userContentController().addScriptMessageHandler_name_(hh, "height_changed")
+
+ ch = _ChoiceHandler.alloc().init()
+ ch._event = self._choice_event
+ ch._value = None
+ cfg.userContentController().addScriptMessageHandler_name_(ch, "choice_clicked")
+
+ sh = _SaveHandler.alloc().init()
+ sh._event = self._save_event
+ sh._value = ""
+ cfg.userContentController().addScriptMessageHandler_name_(sh, "save_clicked")
+
+ rh = _RevealHandler.alloc().init()
+ rh._event = self._reveal_event
+ cfg.userContentController().addScriptMessageHandler_name_(rh, "reveal_done")
+ else:
+ nh = hh = ch = sh = rh = None
+
+ wv_rect = NSMakeRect(0, 0, w_px, h_px)
+ wv = _ClickableWebView.alloc().initWithFrame_configuration_(wv_rect, cfg)
+ try:
+ wv.setValue_forKey_(False, "drawsBackground")
+ except Exception:
+ pass
+ try:
+ wv.setWantsLayer_(True)
+ wv.layer().setBackgroundColor_(NSColor.clearColor().CGColor())
+ except Exception:
+ pass
+ # NSViewWidthSizable (2) | NSViewHeightSizable (16). When the
+ # window animates between sizes (multi-line message growing,
+ # collapsing back to single line), the WebView's frame follows
+ # the animation instead of snapping — that's what makes the
+ # pill grow/shrink as a smooth shape.
+ try:
+ wv.setAutoresizingMask_(2 | 16)
+ except Exception:
+ pass
+ wv.loadHTMLString_baseURL_(html, None)
+ content.addSubview_(wv)
+
+ w.orderFrontRegardless()
+ # Make the panel key on show so the first user click on Next
+ # registers as the button click — not as "promote panel to key".
+ # NonActivatingPanelMask means becoming key still doesn't
+ # activate this Python process, so Safari stays in front.
+ if not self._compact:
+ try:
+ w.makeKeyWindow()
+ except Exception:
+ pass
+ self._window, self._webview = w, wv
+ self._next_handler, self._height_handler = nh, hh
+ self._choice_handler, self._save_handler = ch, sh
+ self._reveal_handler = rh
+ self._current_h = h_px
+ except Exception as e:
+ logger.warning(f"banner: _create failed ({e})")
+
+ def _set_text(self, text):
+ try:
+ if self._webview is None:
+ return
+ safe = (str(text)
+ .replace("\\", "\\\\")
+ .replace("'", "\\'")
+ .replace("\n", " ")
+ .replace("\r", " "))
+ # Primary path: hand the full text to JS which animates it
+ # word-by-word and fires reveal_done when finished. Fallback:
+ # if the page-side script hasn't run yet (window.setMsg is
+ # undefined — happens for the very first update right after
+ # the WebView starts loading), set textContent directly and
+ # post reveal_done ourselves so wait_for_next doesn't sit on
+ # its safety timeout.
+ js = (f"if (window.setMsg) {{ setMsg('{safe}'); }}"
+ f" else {{"
+ f" var m = document.getElementById('msg');"
+ f" if (m) m.textContent = '{safe}';"
+ f" try {{ webkit.messageHandlers.reveal_done.postMessage(1); }}"
+ f" catch (e) {{}}"
+ f" }}")
+ self._webview.evaluateJavaScript_completionHandler_(js, None)
+ except Exception:
+ pass
+
+ def _set_next_visible(self, visible):
+ try:
+ if self._webview is None:
+ return
+ disp = "inline-block" if visible else "none"
+ js = (f"var b=document.getElementById('next'); "
+ f"if (b) b.style.display='{disp}';")
+ self._webview.evaluateJavaScript_completionHandler_(js, None)
+ except Exception:
+ pass
+
+ @staticmethod
+ def _js_escape(text):
+ return (str(text)
+ .replace("\\", "\\\\")
+ .replace("'", "\\'")
+ .replace("\n", " ")
+ .replace("\r", " "))
+
+ def _show_choice(self, left_label, right_label):
+ try:
+ if self._webview is None:
+ return
+ l = self._js_escape(left_label)
+ r = self._js_escape(right_label)
+ js = f"if (window.setChoice) setChoice('{l}', '{r}');"
+ self._webview.evaluateJavaScript_completionHandler_(js, None)
+ except Exception:
+ pass
+
+ def _show_input(self, save_label):
+ try:
+ if self._webview is None:
+ return
+ s = self._js_escape(save_label)
+ js = f"if (window.setInput) setInput('{s}');"
+ self._webview.evaluateJavaScript_completionHandler_(js, None)
+ except Exception:
+ pass
+
+ def _set_input_error(self, msg):
+ try:
+ if self._webview is None:
+ return
+ m = self._js_escape(msg or "")
+ js = f"if (window.setInputError) setInputError('{m}');"
+ self._webview.evaluateJavaScript_completionHandler_(js, None)
+ except Exception:
+ pass
+
+ def _clear_extra_ui(self):
+ try:
+ if self._webview is None:
+ return
+ js = "if (window.clearAll) clearAll();"
+ self._webview.evaluateJavaScript_completionHandler_(js, None)
+ except Exception:
+ pass
+
+ def _on_height_changed(self, requested_h):
+ """Resize the NSWindow to match the WebView's content height.
+
+ Top edge stays put — height grows downward by adjusting NSWindow's
+ bottom-left origin Y. Clamped to [MIN_H, MAX_H].
+ """
+ try:
+ if self._window is None:
+ return
+ new_h = max(self.MIN_H, min(int(requested_h), self.MAX_H))
+ if abs(new_h - self._current_h) < 1:
+ return
+ self._current_h = new_h
+ frame = self._window.frame()
+ # NSWindow origin is bottom-left; to keep top edge fixed while
+ # height changes, shift origin Y by (old_h - new_h).
+ new_y = frame.origin.y + frame.size.height - new_h
+ new_frame = NSMakeRect(frame.origin.x, new_y, frame.size.width, new_h)
+ self._window.setFrame_display_animate_(new_frame, True, True)
+ # The WebView resizes with the window via its autoresizingMask
+ # (set in _create), so no manual setFrame snap is needed here —
+ # snapping would override the in-flight animation and the pill
+ # would visually jump to its final size rather than morph.
+ except Exception as e:
+ logger.warning(f"banner: _on_height_changed failed ({e})")
+
+ def _destroy(self):
+ try:
+ if self._webview is not None:
+ try:
+ self._webview.stopLoading()
+ except Exception:
+ pass
+ try:
+ cfg = self._webview.configuration()
+ if cfg is not None:
+ uc = cfg.userContentController()
+ uc.removeScriptMessageHandlerForName_("next_clicked")
+ uc.removeScriptMessageHandlerForName_("height_changed")
+ uc.removeScriptMessageHandlerForName_("choice_clicked")
+ uc.removeScriptMessageHandlerForName_("save_clicked")
+ uc.removeScriptMessageHandlerForName_("reveal_done")
+ except Exception:
+ pass
+ if self._window is not None:
+ self._window.orderOut_(None)
+ except Exception:
+ pass
+ finally:
+ for ev in (self._next_event, self._choice_event,
+ self._save_event, self._reveal_event):
+ try:
+ ev.set()
+ except Exception:
+ pass
+ self._window = None
+ self._webview = None
+ self._next_handler = None
+ self._height_handler = None
+ self._choice_handler = None
+ self._save_handler = None
+ self._reveal_handler = None
diff --git a/Auto_Use/macOS_use/remote_connection/telegram/service.py b/Auto_Use/macOS_use/remote_connection/telegram/service.py
new file mode 100644
index 0000000..bc581fd
--- /dev/null
+++ b/Auto_Use/macOS_use/remote_connection/telegram/service.py
@@ -0,0 +1,821 @@
+# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# If you build on this project, please keep this header and credit
+# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works.
+# A small attribution goes a long way toward a healthy open-source
+# community — thank you for contributing.
+
+"""Telegram → AgentService bridge with a guided provider/model picker.
+
+Runs as a standalone process (not mounted into Flask). On the first message
+the bot asks you to pick a provider (limited to providers with a non-empty
+key in api_key.txt / .env), then a model (from the same MODEL_MAPPINGS the
+AutoUse frontend uses). Subsequent messages are dispatched as tasks to the
+agent with that provider/model. Picked provider/model persist for the whole
+chat session until you `/reset`.
+
+Token lookup order (first non-empty wins):
+ 1. TELEGRAM_BOT_TOKEN env var
+ 2. .env at the project root
+ 3. Auto_Use/api_key/api_key.txt
+
+Setup:
+ 1. @BotFather → /newbot → copy token.
+ 2. Paste it into .env OR api_key.txt as TELEGRAM_BOT_TOKEN=…
+ 3. Make sure at least one provider key (e.g. OPENROUTER_API_KEY=…) is set.
+ 4. python -m Auto_Use.macOS_use.remote_connection.telegram.service
+ 5. On phone: open Telegram, find your bot, send any message.
+"""
+import asyncio
+import datetime
+import importlib
+import logging
+import threading
+from pathlib import Path
+
+from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
+from telegram.ext import (
+ Application,
+ CommandHandler,
+ MessageHandler,
+ CallbackQueryHandler,
+ filters,
+)
+
+logger = logging.getLogger(__name__)
+
+# service.py → telegram → remote_connection → macOS_use → Auto_Use → repo root
+# The Telegram surface treats api_key.txt as its single source of truth — we
+# deliberately do NOT consult .env or env vars here. .env is app.py's general
+# env-loading concern; keeping the bot self-contained against api_key.txt
+# avoids two-files-of-record confusion.
+_REPO_ROOT = Path(__file__).resolve().parents[4]
+_API_KEY_FILE = _REPO_ROOT / "Auto_Use" / "api_key" / "api_key.txt"
+
+# Agent writes per-step "milestone" lines here. We tail this file during a
+# task and forward each new line back to the user's Telegram chat so they
+# see the agent's progress in real time.
+SCRATCHPAD_PATH = (
+ Path(__file__).resolve().parents[2] / "scratchpad" / "milestone" / "milestone.md"
+)
+SCRATCHPAD_POLL_SEC = 2.0
+MAX_TG_MSG_LEN = 4000 # Telegram caps at 4096; leave headroom for safety
+
+# Provider id → API-key name in the KV files. Same mapping the Windows side
+# uses ([windows_use/remote_connection/telegram/service.py:44-51]).
+PROVIDER_KEY_MAP = {
+ "openrouter": "OPENROUTER_API_KEY",
+ "groq": "GROQ_API_KEY",
+ "openai": "OPENAI_API_KEY",
+ "anthropic": "ANTHROPIC_API_KEY",
+ "google": "GOOGLE_API_KEY",
+ "perplexity": "PERPLEXITY_API_KEY",
+}
+
+
+# ── file helpers ─────────────────────────────────────────────────────────────
+
+def _read_all_keys(path: Path) -> dict:
+ """Parse a simple KEY=VALUE file (one per line) into a dict. Skips empty
+ values and lines starting with '#'."""
+ out = {}
+ if not path.exists():
+ return out
+ try:
+ with open(path, "r", encoding="utf-8") as f:
+ for line in f:
+ line = line.strip()
+ if not line or line.startswith("#") or "=" not in line:
+ continue
+ k, _, v = line.partition("=")
+ k, v = k.strip(), v.strip()
+ if v:
+ out[k] = v
+ except Exception:
+ pass
+ return out
+
+
+def _resolve_token() -> str | None:
+ """Read TELEGRAM_BOT_TOKEN from api_key.txt only. .env and env vars are
+ intentionally ignored — see header comment."""
+ return _read_all_keys(_API_KEY_FILE).get("TELEGRAM_BOT_TOKEN")
+
+
+def _get_available_providers() -> list:
+ """Providers with a non-empty key in api_key.txt only."""
+ keys = _read_all_keys(_API_KEY_FILE)
+ return [
+ {"id": pid, "key": keys[kname]}
+ for pid, kname in PROVIDER_KEY_MAP.items()
+ if keys.get(kname)
+ ]
+
+
+def _set_key_in_file(path: Path, key: str, value: str) -> None:
+ """Write/update KEY=value in a KV file, preserving every other line.
+
+ Unlike a naive read-all-and-write-back-with-_read_all_keys, this keeps
+ empty-value placeholder lines (e.g. GROQ_API_KEY=) intact — the AutoUse
+ UI relies on those for its provider list rendering.
+ """
+ lines = []
+ found = False
+ if path.exists():
+ try:
+ with open(path, "r", encoding="utf-8") as f:
+ for raw in f:
+ stripped = raw.strip()
+ if stripped.startswith(f"{key}="):
+ lines.append(f"{key}={value}\n")
+ found = True
+ else:
+ lines.append(raw if raw.endswith("\n") else raw + "\n")
+ except Exception:
+ logger.warning("failed to read %s while updating %s", path, key)
+ return
+ if not found:
+ lines.append(f"{key}={value}\n")
+ try:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with open(path, "w", encoding="utf-8") as f:
+ f.writelines(lines)
+ except Exception:
+ logger.warning("failed to write %s", path)
+
+
+def _resolve_owner_chat_id() -> int | None:
+ """Owner chat_id = whoever last sent /start. Stored in api_key.txt as
+ TELEGRAM_OWNER_CHAT_ID=…, so it survives restarts."""
+ val = _read_all_keys(_API_KEY_FILE).get("TELEGRAM_OWNER_CHAT_ID")
+ if not val:
+ return None
+ try:
+ return int(val)
+ except ValueError:
+ return None
+
+
+def _save_owner_chat_id(chat_id: int) -> None:
+ """Persist the owner chat_id so we can message them on the next boot."""
+ _set_key_in_file(_API_KEY_FILE, "TELEGRAM_OWNER_CHAT_ID", str(chat_id))
+
+
+def _get_models_for_provider(provider_id: str) -> list:
+ """Read MODEL_MAPPINGS from Auto_Use/macOS_use/llm_provider//view.py
+ and return non-hidden entries as [{id, display_name}, …]."""
+ try:
+ mod = importlib.import_module(
+ f"Auto_Use.macOS_use.llm_provider.{provider_id}.view"
+ )
+ mappings = getattr(mod, "MODEL_MAPPINGS", {})
+ return [
+ {"id": mid, "display_name": info.get("display_name", mid)}
+ for mid, info in mappings.items()
+ if not info.get("hidden", False)
+ ]
+ except Exception:
+ return []
+
+
+# ── per-chat state ───────────────────────────────────────────────────────────
+
+# chat_id → {
+# "phase": "idle" | "pick_provider" | "pick_model" | "ready" | "running",
+# "provider": str | None,
+# "model": str | None,
+# "model_display": str | None,
+# "queue": list[str], # tasks waiting to run, FIFO
+# "pending": dict[str, str], # pending_id → task awaiting Yes/No
+# "pending_counter": int, # monotonic id source for pending
+# }
+_chat_state: dict = {}
+
+# Guards mutations that read+modify state across threads (queue drain races
+# between _run_agent's finally and the callback handler tapping "Yes").
+_state_lock = threading.Lock()
+
+
+def _state(chat_id: int) -> dict:
+ return _chat_state.setdefault(chat_id, {"phase": "idle"})
+
+
+def _maybe_run_next_queued(chat_id: int, bot, loop) -> None:
+ """If this chat is ready and has a queued task, pop the next one and
+ start it. Threadsafe — called from both _run_agent's finally (worker
+ thread) and the q+ callback (asyncio loop)."""
+ with _state_lock:
+ state = _chat_state.get(chat_id)
+ if not state:
+ return
+ if state.get("phase") != "ready":
+ return
+ queue = state.get("queue") or []
+ if not queue:
+ return
+ provider = state.get("provider")
+ model = state.get("model")
+ if not provider or not model:
+ return
+ next_task = queue.pop(0)
+ display = state.get("model_display") or model
+ state["phase"] = "running"
+
+ _send_chat(
+ bot,
+ chat_id,
+ f"📝 Running queued task: {next_task[:200]} ({provider} · {display})",
+ loop,
+ )
+ threading.Thread(
+ target=_run_agent,
+ args=(next_task, provider, model, chat_id, bot, loop),
+ daemon=True,
+ name=f"telegram-agent-{chat_id}-queued",
+ ).start()
+
+
+# ── Telegram handlers ────────────────────────────────────────────────────────
+
+def _build_online_text(providers: list) -> str:
+ now_str = datetime.datetime.now().strftime("%H:%M:%S")
+ if providers:
+ provider_line = ", ".join(p["id"] for p in providers)
+ return f"🟢 AutoUse online at {now_str}\nProviders: {provider_line}"
+ return f"🟢 AutoUse online at {now_str}\nProviders: (none configured)"
+
+
+async def _show_provider_picker(message):
+ providers = _get_available_providers()
+ # Always lead with the "AutoUse online" status line so the user gets the
+ # same greeting they'd see at app boot, even when they message the bot
+ # first instead of waiting for the unsolicited startup announcement.
+ await message.reply_text(_build_online_text(providers))
+ if not providers:
+ await message.reply_text(
+ "⚠️ No provider API keys found. Add at least one (e.g. "
+ "OPENROUTER_API_KEY=…) to api_key.txt or .env and try again."
+ )
+ return False
+ buttons = [
+ [InlineKeyboardButton(p["id"], callback_data=f"provider:{p['id']}")]
+ for p in providers
+ ]
+ await message.reply_text(
+ "👋 Pick a provider:", reply_markup=InlineKeyboardMarkup(buttons)
+ )
+ return True
+
+
+async def _discover_owner_from_updates(bot) -> int | None:
+ """Peek at the latest pending update on Telegram's servers and use its
+ chat_id as the owner. Lets the bot self-bootstrap on the very first run
+ after the chat-saving code was deployed, without requiring the user to
+ /start again. Safe to call before start_polling — uses offset=-1 which
+ Telegram supports as 'just the most recent update', and doesn't consume
+ updates from the polling updater's offset cursor."""
+ try:
+ updates = await bot.get_updates(offset=-1, limit=1, timeout=2)
+ except Exception:
+ logger.warning("owner discovery: get_updates failed", exc_info=True)
+ return None
+ for upd in updates:
+ chat = getattr(upd, "effective_chat", None)
+ if chat and chat.id:
+ return int(chat.id)
+ return None
+
+
+async def _post_init(application) -> None:
+ """Fires once after the bot finishes initialising (before polling starts).
+ Used to message the saved owner: 'AutoUse online at …' + a fresh provider
+ picker — so the user doesn't have to send anything to get going."""
+ owner_id = _resolve_owner_chat_id()
+ if not owner_id:
+ # Not saved yet — try to auto-discover from Telegram's pending updates.
+ # Works if the user has ever messaged the bot, even before the
+ # chat-saving code was deployed. Persist the result so we don't need
+ # to re-discover on every boot.
+ owner_id = await _discover_owner_from_updates(application.bot)
+ if owner_id:
+ try:
+ _save_owner_chat_id(owner_id)
+ logger.info(
+ "owner discovery: saved chat_id=%s from getUpdates",
+ owner_id,
+ )
+ except Exception:
+ logger.warning("owner discovery: could not persist chat_id", exc_info=True)
+ if not owner_id:
+ # No owner anywhere — they've never interacted with the bot. Stay
+ # silent; they'll register themselves with /start.
+ return
+ bot = application.bot
+ providers = _get_available_providers()
+ try:
+ await bot.send_message(chat_id=owner_id, text=_build_online_text(providers))
+ except Exception:
+ logger.exception("startup announcement: failed to send hello")
+ return # if we can't even greet, don't bother with the picker
+
+ if not providers:
+ try:
+ await bot.send_message(
+ chat_id=owner_id,
+ text="⚠️ No provider API keys found. Add at least one to api_key.txt and /reset.",
+ )
+ except Exception:
+ pass
+ return
+
+ buttons = [
+ [InlineKeyboardButton(p["id"], callback_data=f"provider:{p['id']}")]
+ for p in providers
+ ]
+ try:
+ await bot.send_message(
+ chat_id=owner_id,
+ text="👋 Pick a provider:",
+ reply_markup=InlineKeyboardMarkup(buttons),
+ )
+ # Park the owner's chat in pick_provider so the next button tap routes
+ # cleanly through the existing callback flow.
+ _chat_state[owner_id] = {"phase": "pick_provider"}
+ except Exception:
+ logger.exception("startup announcement: failed to send provider picker")
+
+
+async def start_cmd(update, ctx):
+ chat_id = update.effective_chat.id
+ # Remember this chat so future boots can auto-greet (Phase 10 startup
+ # announcement). Best-effort — never let a file-write failure block /start.
+ try:
+ _save_owner_chat_id(chat_id)
+ except Exception:
+ logger.warning("could not persist owner chat_id", exc_info=True)
+ _chat_state[chat_id] = {"phase": "pick_provider"}
+ ok = await _show_provider_picker(update.message)
+ if not ok:
+ _chat_state[chat_id] = {"phase": "idle"}
+
+
+async def reset_cmd(update, ctx):
+ # Wipe state for this chat — including any queued tasks and pending
+ # awaiting Yes/No prompts. We do NOT clear the persisted owner chat_id;
+ # /reset is "start over the conversation", not "forget I exist".
+ _chat_state[update.effective_chat.id] = {"phase": "idle"}
+ await update.message.reply_text(
+ "🔄 Reset. Send any message to pick a provider again."
+ )
+
+
+async def text_handler(update, ctx):
+ chat_id = update.effective_chat.id
+ # Persist on every message, not just /start, so the next app boot can
+ # auto-announce "AutoUse online" without the user having to /start first.
+ try:
+ _save_owner_chat_id(chat_id)
+ except Exception:
+ logger.warning("could not persist owner chat_id", exc_info=True)
+ state = _state(chat_id)
+ phase = state.get("phase", "idle")
+
+ if phase in ("idle", "pick_provider"):
+ state["phase"] = "pick_provider"
+ ok = await _show_provider_picker(update.message)
+ if not ok:
+ state["phase"] = "idle"
+ return
+
+ if phase == "pick_model":
+ await update.message.reply_text(
+ "Pick a model from the buttons above first."
+ )
+ return
+
+ if phase == "running":
+ # Busy — offer to queue this task. Each pending prompt gets a unique
+ # id so multiple "queue this?" prompts can coexist if the user spams.
+ task = (update.message.text or "").strip()
+ if not task:
+ return
+ state.setdefault("pending", {})
+ state["pending_counter"] = state.get("pending_counter", 0) + 1
+ pending_id = str(state["pending_counter"])
+ state["pending"][pending_id] = task
+ buttons = [[
+ InlineKeyboardButton("✅ Yes, queue it", callback_data=f"q+:{pending_id}"),
+ InlineKeyboardButton("❌ No", callback_data=f"q-:{pending_id}"),
+ ]]
+ await update.message.reply_text(
+ f"⏳ Currently busy performing a task.\n"
+ f"Do you want to queue: \"{task[:200]}\" ?",
+ reply_markup=InlineKeyboardMarkup(buttons),
+ )
+ return
+
+ # phase == "ready"
+ task = (update.message.text or "").strip()
+ if not task:
+ return
+ state["phase"] = "running"
+ provider = state["provider"]
+ model = state["model"]
+ display = state.get("model_display", model)
+ await update.message.reply_text(
+ f"📝 Running: {task} ({provider} · {display})"
+ )
+ bot = ctx.bot
+ loop = asyncio.get_running_loop()
+ threading.Thread(
+ target=_run_agent,
+ args=(task, provider, model, chat_id, bot, loop),
+ daemon=True,
+ ).start()
+
+
+async def callback_handler(update, ctx):
+ query = update.callback_query
+ await query.answer()
+ chat_id = query.message.chat_id
+ try:
+ _save_owner_chat_id(chat_id)
+ except Exception:
+ logger.warning("could not persist owner chat_id", exc_info=True)
+ state = _state(chat_id)
+ data = query.data or ""
+
+ if data.startswith("provider:"):
+ provider_id = data.split(":", 1)[1]
+ state["provider"] = provider_id
+ state["phase"] = "pick_model"
+ models = _get_models_for_provider(provider_id)
+ if not models:
+ state["phase"] = "pick_provider"
+ await query.edit_message_text(
+ f"⚠️ No models found for {provider_id}. Pick another provider."
+ )
+ return
+ buttons = [
+ [InlineKeyboardButton(m["display_name"], callback_data=f"model:{m['id']}")]
+ for m in models
+ ]
+ await query.edit_message_text(
+ f"Pick a model for {provider_id}:",
+ reply_markup=InlineKeyboardMarkup(buttons),
+ )
+ return
+
+ if data.startswith("model:"):
+ model_id = data.split(":", 1)[1]
+ provider_id = state.get("provider")
+ if not provider_id:
+ state["phase"] = "idle"
+ await query.edit_message_text("Session expired. Send any message to start over.")
+ return
+ models = _get_models_for_provider(provider_id)
+ display = next(
+ (m["display_name"] for m in models if m["id"] == model_id), model_id
+ )
+ state["model"] = model_id
+ state["model_display"] = display
+ state["phase"] = "ready"
+ await query.edit_message_text(
+ f"✅ Provider: {provider_id} / Model: {display}\n"
+ f"Send me a task whenever you're ready."
+ )
+ return
+
+ if data.startswith("q+:"):
+ # User wants to queue the pending task.
+ pending_id = data.split(":", 1)[1]
+ task = (state.get("pending") or {}).pop(pending_id, None)
+ if not task:
+ await query.edit_message_text("(That prompt has already been handled.)")
+ return
+ state.setdefault("queue", []).append(task)
+ qlen = len(state["queue"])
+ await query.edit_message_text(
+ f"📥 Queued (position {qlen}): \"{task[:200]}\"\n"
+ f"Will run when the current task finishes."
+ )
+ # Edge case: agent finished in the milliseconds between the prompt
+ # being sent and the user tapping Yes. Drain the queue now so the
+ # queued task isn't stranded.
+ _maybe_run_next_queued(chat_id, ctx.bot, asyncio.get_running_loop())
+ return
+
+ if data.startswith("q-:"):
+ # User declines to queue. Drop the pending task.
+ pending_id = data.split(":", 1)[1]
+ (state.get("pending") or {}).pop(pending_id, None)
+ await query.edit_message_text(
+ "👍 OK, won't queue it. I'll let you know once the current task is done."
+ )
+ return
+
+
+# ── scratchpad streaming ─────────────────────────────────────────────────────
+
+def _send_chat(bot, chat_id, text, loop, wait: bool = False, timeout: float = 5.0):
+ """Schedule a bot.send_message on the asyncio loop from a worker thread.
+ Silently ignores failures so a transient send error never kills the
+ monitor thread.
+
+ When wait=True, block the calling thread until the send actually
+ completes (or `timeout` seconds elapse). Used for terminal messages
+ like "✅ Done." that must land in the chat BEFORE the next message
+ is scheduled — without it, the "Done" send and the "Running queued
+ task" send race inside the asyncio loop as two parallel HTTP POSTs
+ and Telegram can deliver them out of order."""
+ try:
+ fut = asyncio.run_coroutine_threadsafe(
+ bot.send_message(chat_id=chat_id, text=text), loop
+ )
+ if wait:
+ try:
+ fut.result(timeout=timeout)
+ except Exception:
+ logger.warning(
+ "send_message to chat %s did not confirm within %ss",
+ chat_id, timeout, exc_info=True,
+ )
+ except Exception:
+ logger.warning("Failed to schedule send_message to chat %s", chat_id)
+
+
+def _monitor_scratchpad(chat_id, bot, loop, stop_event, start_pos):
+ """Tail SCRATCHPAD_PATH and forward each new non-empty line to the chat.
+
+ Polls every SCRATCHPAD_POLL_SEC seconds. start_pos is the byte offset
+ the file was at when the task began — we only forward content written
+ AFTER that, so old milestones from previous tasks aren't replayed.
+ Exits when stop_event is set, after one final sweep to flush any tail.
+ """
+ last_pos = start_pos
+
+ def _read_and_forward():
+ nonlocal last_pos
+ if not SCRATCHPAD_PATH.exists():
+ # File was deleted (e.g. AgentService.__init__ wiping the
+ # scratchpad). Reset so the next poll re-reads the whole new
+ # file from the top instead of seeking past its end.
+ last_pos = 0
+ return
+ try:
+ # Defensive: if the file shrank below last_pos it was truncated
+ # or rotated; restart from byte 0 so we don't slice into the
+ # middle of fresh content and stream a fragment.
+ try:
+ current_size = SCRATCHPAD_PATH.stat().st_size
+ if current_size < last_pos:
+ last_pos = 0
+ except Exception:
+ pass
+ with open(SCRATCHPAD_PATH, "r", encoding="utf-8", errors="replace") as f:
+ f.seek(last_pos)
+ new_content = f.read()
+ if not new_content:
+ return
+ last_pos = f.tell()
+ except Exception as exc:
+ logger.warning("Scratchpad read error: %s", exc)
+ return
+ for raw in new_content.splitlines():
+ line = raw.strip()
+ if not line:
+ continue
+ # Chunk excessively long lines so we stay under Telegram's 4096 cap.
+ for i in range(0, len(line), MAX_TG_MSG_LEN):
+ _send_chat(bot, chat_id, line[i : i + MAX_TG_MSG_LEN], loop)
+
+ while not stop_event.is_set():
+ _read_and_forward()
+ stop_event.wait(SCRATCHPAD_POLL_SEC)
+
+ # Final sweep — catches any line written between the last poll and the
+ # stop_event being set (e.g. the agent's very last milestone).
+ _read_and_forward()
+
+
+# ── agent runner (worker thread) ─────────────────────────────────────────────
+
+def _run_agent(task, provider, model, chat_id, bot, loop):
+ """Run the agent and ping the chat when done. Streams scratchpad milestones
+ back to the chat live while the agent works. Pops a compact pill so the
+ Mac user can see a Telegram task is running, and minimises the main app
+ window so the agent has the screen to itself. Restores phase to 'ready'."""
+ # Compact "Telegram task in progress" indicator + minimise AutoUse window.
+ # Both are best-effort — never let UI fluff block the actual task.
+ from Auto_Use.macOS_use.remote_connection.telegram.banner import StatusBanner
+ task_banner = StatusBanner(compact=True)
+ try:
+ task_banner.show()
+ except Exception:
+ logger.warning("could not show task banner", exc_info=True)
+ # Minimise the AutoUse pywebview window so the agent has the screen to
+ # itself. We talk to pywebview directly via its global `windows` list
+ # rather than importing from app.py — `python app.py` makes app.py the
+ # __main__ module, so `from app import …` would re-import a *second*
+ # copy of app.py whose webview_window is still None, and the call would
+ # silently no-op.
+ try:
+ import webview as _webview
+ if _webview.windows:
+ _webview.windows[0].minimize()
+ except Exception:
+ logger.warning("could not minimise AutoUse window", exc_info=True)
+
+ # Reset the milestone scratchpad to empty before starting the monitor.
+ # AgentService.__init__ wipes the entire scratchpad/ directory in
+ # _cleanup_scratchpad() — so if we snapshotted the file's current size
+ # here and the agent then deleted + rewrote it, the monitor's last_pos
+ # would point mid-way into the fresh content and we'd stream a
+ # fragment (e.g. "ome." instead of "Verified: …Chrome.") to the chat.
+ # Deleting the file ourselves up front and starting from byte 0 keeps
+ # the monitor aligned with whatever the agent writes next. Best-effort
+ # — a failure here just degrades us back to the old (buggy) behavior.
+ try:
+ if SCRATCHPAD_PATH.exists():
+ SCRATCHPAD_PATH.unlink()
+ except Exception:
+ logger.warning("could not reset milestone scratchpad", exc_info=True)
+ start_pos = 0
+ stop_event = threading.Event()
+ monitor = threading.Thread(
+ target=_monitor_scratchpad,
+ args=(chat_id, bot, loop, stop_event, start_pos),
+ daemon=True,
+ name=f"telegram-scratchpad-{chat_id}",
+ )
+ monitor.start()
+
+ try:
+ # Imported lazily — pulls in tree/element → skimage etc., which we
+ # don't want to load until a task actually runs.
+ from Auto_Use.macOS_use.agent.service import AgentService
+
+ agent = AgentService(
+ provider=provider,
+ model=model,
+ save_conversation=False,
+ thinking=True,
+ )
+ agent.process_request(task)
+ # Stop the monitor BEFORE the done message so the final scratchpad
+ # sweep happens first — keeps the chat in correct chronological order.
+ stop_event.set()
+ monitor.join(timeout=SCRATCHPAD_POLL_SEC + 2)
+ # wait=True: block until "✅ Done." is on Telegram's servers before
+ # the finally-block fires _maybe_run_next_queued, which would
+ # otherwise schedule "📝 Running queued task: …" as a second,
+ # concurrent HTTP POST that can race past Done in delivery.
+ _send_chat(bot, chat_id, "✅ Done.", loop, wait=True)
+ except Exception as e:
+ logger.exception("agent error")
+ stop_event.set()
+ monitor.join(timeout=SCRATCHPAD_POLL_SEC + 2)
+ _send_chat(bot, chat_id, f"❌ Error: {e}", loop, wait=True)
+ finally:
+ if not stop_event.is_set():
+ stop_event.set()
+ try:
+ task_banner.close()
+ except Exception:
+ pass
+ with _state_lock:
+ state = _chat_state.get(chat_id)
+ if state is not None and state.get("phase") == "running":
+ state["phase"] = "ready"
+ # Drain one queued task if any — keeps phase='running' if it spawns.
+ _maybe_run_next_queued(chat_id, bot, loop)
+
+
+# ── entry points ─────────────────────────────────────────────────────────────
+
+def _build_telegram_app(token: str):
+ """Build a python-telegram-bot Application with all our handlers wired.
+
+ `post_init` is the hook python-telegram-bot calls once after the bot
+ finishes initialising but before polling starts — perfect spot to send
+ the "AutoUse online" announcement + provider picker to the saved owner.
+ """
+ app = (
+ Application.builder()
+ .token(token)
+ .post_init(_post_init)
+ .build()
+ )
+ app.add_handler(CommandHandler("start", start_cmd))
+ app.add_handler(CommandHandler("reset", reset_cmd))
+ app.add_handler(CallbackQueryHandler(callback_handler))
+ app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, text_handler))
+ return app
+
+
+_BOT_THREAD: threading.Thread | None = None
+
+
+def _stderr(msg: str) -> None:
+ """Loud print to the terminal where python app.py is running — bypasses
+ whatever logging config is in effect so the user actually sees it."""
+ import sys
+ print(f"[telegram] {msg}", file=sys.stderr, flush=True)
+
+
+async def _run_bot_until_stopped(tg_app):
+ """Manual lifecycle replacement for Application.run_polling().
+
+ run_polling() messes with signals and assumes it owns the main thread;
+ we want to drive it from a worker thread so we do it step by step.
+
+ Order matches what run_polling() does internally:
+ initialize → start → post_init → start_polling.
+ We call _post_init BEFORE start_polling so its bot.get_updates(offset=-1)
+ auto-discovery doesn't race with the updater's own polling loop.
+ """
+ await tg_app.initialize()
+ await tg_app.start()
+ # Application.post_init() is only invoked by run_polling(), not by the
+ # manual initialize+start path above. Call our startup announcement
+ # explicitly so the saved owner gets the "AutoUse online" message.
+ try:
+ await _post_init(tg_app)
+ except Exception:
+ logger.exception("post_init failed")
+ await tg_app.updater.start_polling(allowed_updates=Update.ALL_TYPES)
+ _stderr("polling loop is live — send your bot a message")
+ # Park here forever (daemon thread; killed on app exit).
+ await asyncio.Event().wait()
+
+
+def start_bot() -> None:
+ """Start the Telegram bot polling on a daemon thread.
+
+ Idempotent — safe to call multiple times from app.py boot. Prints loudly
+ to stderr at each milestone so the user can see what's happening.
+ """
+ global _BOT_THREAD
+ if _BOT_THREAD is not None and _BOT_THREAD.is_alive():
+ _stderr("start_bot() called but the bot is already running — skipping.")
+ return
+ token = _resolve_token()
+ if not token:
+ _stderr(
+ "BOT NOT STARTED — TELEGRAM_BOT_TOKEN not found in env, .env, or "
+ "api_key.txt. Paste your @BotFather token into one of those files."
+ )
+ return
+ _stderr(f"starting bot (token ends in …{token[-6:]})")
+
+ def _runner():
+ import sys, traceback
+ try:
+ # Each thread needs its own asyncio event loop. Without this, the
+ # call to asyncio.Event() inside _run_bot_until_stopped fails.
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ tg_app = _build_telegram_app(token)
+ try:
+ loop.run_until_complete(_run_bot_until_stopped(tg_app))
+ finally:
+ loop.close()
+ except Exception as e:
+ _stderr(f"BOT CRASHED: {e!r}")
+ traceback.print_exc(file=sys.stderr)
+
+ _BOT_THREAD = threading.Thread(target=_runner, daemon=True, name="telegram-bot")
+ _BOT_THREAD.start()
+
+
+def main():
+ """Standalone entry — for testing without launching the full AutoUse app."""
+ token = _resolve_token()
+ if not token:
+ raise SystemExit(
+ f"TELEGRAM_BOT_TOKEN not found in {_API_KEY_FILE}\n"
+ "(create the bot via @BotFather first, then add the token to that file)."
+ )
+ tg_app = _build_telegram_app(token)
+ logger.info("Telegram bot polling started (main thread)")
+ tg_app.run_polling(allowed_updates=Update.ALL_TYPES)
+
+
+if __name__ == "__main__":
+ logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+ )
+ main()
diff --git a/Auto_Use/macOS_use/remote_connection/telegram/setup.py b/Auto_Use/macOS_use/remote_connection/telegram/setup.py
new file mode 100644
index 0000000..7d0a395
--- /dev/null
+++ b/Auto_Use/macOS_use/remote_connection/telegram/setup.py
@@ -0,0 +1,154 @@
+# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# If you build on this project, please keep this header and credit
+# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works.
+# A small attribution goes a long way toward a healthy open-source
+# community — thank you for contributing.
+
+"""Telegram remote-connection setup driver (macOS, guided mode).
+
+Opens Safari, navigates to web.telegram.org, then lets the user log in
+manually. Progress is paced by a small always-on-top banner that streams
+status text and has a Next button. The script blocks on user clicks via
+banner.wait_for_next() — the user does the actual login (phone, country,
+OTP) themselves; we just get them to the right page.
+"""
+import logging
+import os
+import time
+
+from Auto_Use.macOS_use.controller.tool.open_app import open_app
+from Auto_Use.macOS_use.tree.element import UIElementScanner, ELEMENT_CONFIG
+from Auto_Use.macOS_use.controller.service import ControllerService
+from Auto_Use.macOS_use.controller.key_combo.service import KeyComboService
+from Auto_Use.macOS_use.remote_connection.telegram.banner import StatusBanner
+from Auto_Use.macOS_use.remote_connection.telegram.service import (
+ _API_KEY_FILE, _set_key_in_file,
+)
+
+logger = logging.getLogger(__name__)
+
+TELEGRAM_WEB_URL = "web.telegram.org"
+STEP_DELAY_SEC = 2
+
+
+def _find_address_bar(mapping: dict) -> str | None:
+ """Return the index of Safari's smart-search field, or None if not found."""
+ for idx, info in mapping.items():
+ if info.get("name") == "smart search field" and info.get("type") == "TextField":
+ return idx
+ return None
+
+
+def _open_telegram_in_safari(banner) -> bool:
+ """Launch Safari and navigate it to web.telegram.org.
+
+ Streams sub-step status to the banner so the user can see what's happening
+ while Safari takes focus. Returns False on any failure.
+ """
+ banner.update("Please wait — confirming Safari is open…")
+ if not open_app("Safari"):
+ logger.error("setup.py: failed to launch Safari")
+ return False
+ # open_app itself sleeps ~1 s after launching and then runs an AppleScript
+ # window-move, so the address bar isn't reliably there yet. One more
+ # second is enough for the smart-search field to settle before we scan.
+ time.sleep(1)
+
+ scanner = UIElementScanner(ELEMENT_CONFIG)
+ scanner.scan_elements()
+ mapping = scanner.get_elements_mapping()
+ time.sleep(STEP_DELAY_SEC)
+
+ address_bar_index = _find_address_bar(mapping)
+ if address_bar_index is None:
+ logger.error("setup.py: Safari address bar not found in scan")
+ return False
+
+ banner.update("Safari detected. Writing the URL for you, please wait…")
+
+ controller = ControllerService()
+ controller.set_elements(mapping, scanner.application_name)
+ key_combo = KeyComboService()
+
+ controller.click(address_bar_index)
+ time.sleep(STEP_DELAY_SEC)
+
+ controller.canvas_input(TELEGRAM_WEB_URL)
+ time.sleep(STEP_DELAY_SEC)
+
+ key_combo.send("return")
+ return True
+
+
+def run(country_code: str = "", phone: str = "") -> bool:
+ """Guided Telegram-Web pairing.
+
+ Shows a banner, waits for the user to click Next, opens Telegram Web,
+ waits for the user to log in manually + click Next, then closes.
+
+ country_code and phone are accepted but ignored — kept only so the
+ pre-existing /api/telegram/connect callsite signature still works.
+ """
+ banner = StatusBanner()
+ banner.show()
+ try:
+ banner.update("Let's get you set up with Telegram. Please click Next.")
+ banner.wait_for_next()
+
+ if not _open_telegram_in_safari(banner):
+ banner.update("Failed to open Telegram. Close this banner and try again.")
+ banner.wait_for_next(timeout=15)
+ return False
+
+ banner.update("Please log in to Telegram, then click Next")
+ banner.wait_for_next()
+
+ banner.update(
+ "Now search for @BotFather in Telegram and open the chat. "
+ "Click Next when you're there."
+ )
+ banner.wait_for_next()
+
+ banner.update("How do you want to set up the bot?")
+ choice = banner.wait_for_choice("Fresh setup", "Token already generated")
+
+ if choice == "left":
+ banner.update(
+ "In @BotFather, send these one at a time: /newbot → AutoUse → "
+ "a unique bot name. BotFather will reply with your token. "
+ "Click Next when you have it."
+ )
+ banner.wait_for_next()
+
+ banner.update("Paste your BotFather token below and click Save.")
+ token = banner.wait_for_input(save_label="Save")
+ if not token:
+ return False # Cocoa-unavailable fallback; banner never appeared
+
+ _set_key_in_file(_API_KEY_FILE, "TELEGRAM_BOT_TOKEN", token.strip())
+
+ banner.update("Saved. Restarting AutoUse to start the bot…")
+ # Give the message time to stream out + a beat for the user to read
+ # it, then hard-exit the whole process. The user's next `python
+ # app.py` boot picks up the fresh TELEGRAM_BOT_TOKEN and the bot
+ # comes online with the saved owner chat. os._exit skips atexit /
+ # finally cleanup, which is what we want — Cocoa will tear down
+ # the banner + windows as the process dies.
+ time.sleep(3)
+ banner.close()
+ os._exit(0)
+ finally:
+ banner.close()
diff --git a/Auto_Use/macOS_use/remote_connection/telegram/view.py b/Auto_Use/macOS_use/remote_connection/telegram/view.py
new file mode 100644
index 0000000..5e3b9e4
--- /dev/null
+++ b/Auto_Use/macOS_use/remote_connection/telegram/view.py
@@ -0,0 +1,157 @@
+# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# If you build on this project, please keep this header and credit
+# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works.
+# A small attribution goes a long way toward a healthy open-source
+# community — thank you for contributing.
+
+"""Flask Blueprint for the macOS Telegram surface.
+
+Lives in the telegram folder so all Telegram-related code stays here — app.py
+just imports `telegram_bp` and calls `app.register_blueprint(...)`. Routes:
+
+ GET /api/telegram/status → {connected, bot_username?}
+ POST /api/telegram/connect → kicks off the Phase 4 guided walkthrough
+ POST /api/telegram/disconnect → clears the persisted token
+
+All token lookups read ONLY from api_key.txt. We deliberately do NOT consult
+.env — that file is app.py's general env-loading concern; the Telegram bot
+treats api_key.txt as its single source of truth.
+"""
+import json
+import logging
+import threading
+import urllib.request
+from pathlib import Path
+
+from flask import Blueprint, jsonify
+
+logger = logging.getLogger(__name__)
+
+telegram_bp = Blueprint("telegram_macos", __name__)
+
+# view.py → telegram → remote_connection → macOS_use → Auto_Use → repo root
+_API_KEY_FILE = (
+ Path(__file__).resolve().parents[4] / "Auto_Use" / "api_key" / "api_key.txt"
+)
+
+_bot_username_cache: str | None = None
+
+
+def _read_token() -> str | None:
+ """Pull TELEGRAM_BOT_TOKEN out of api_key.txt. Returns None if missing or
+ empty. Does NOT consult .env or env vars on purpose."""
+ if not _API_KEY_FILE.exists():
+ return None
+ try:
+ with open(_API_KEY_FILE, "r", encoding="utf-8") as f:
+ for line in f:
+ stripped = line.strip()
+ if stripped.startswith("TELEGRAM_BOT_TOKEN="):
+ val = stripped.partition("=")[2].strip()
+ return val or None
+ except Exception:
+ logger.warning("could not read %s", _API_KEY_FILE)
+ return None
+
+
+def _set_token(value: str) -> None:
+ """Write/clear TELEGRAM_BOT_TOKEN= in api_key.txt, preserving every other
+ line (incl. empty-value placeholders the AutoUse UI relies on)."""
+ lines = []
+ found = False
+ if _API_KEY_FILE.exists():
+ try:
+ with open(_API_KEY_FILE, "r", encoding="utf-8") as f:
+ for raw in f:
+ if raw.strip().startswith("TELEGRAM_BOT_TOKEN="):
+ lines.append(f"TELEGRAM_BOT_TOKEN={value}\n")
+ found = True
+ else:
+ lines.append(raw if raw.endswith("\n") else raw + "\n")
+ except Exception:
+ logger.warning("could not read %s while updating token", _API_KEY_FILE)
+ return
+ if not found:
+ lines.append(f"TELEGRAM_BOT_TOKEN={value}\n")
+ try:
+ _API_KEY_FILE.parent.mkdir(parents=True, exist_ok=True)
+ with open(_API_KEY_FILE, "w", encoding="utf-8") as f:
+ f.writelines(lines)
+ except Exception:
+ logger.warning("could not write %s", _API_KEY_FILE)
+
+
+def _fetch_bot_username(token: str) -> str | None:
+ """One-shot call to Telegram's getMe — used by /status so the panel can
+ show '@your_bot' instead of just 'connected'."""
+ try:
+ resp = urllib.request.urlopen(
+ f"https://api.telegram.org/bot{token}/getMe", timeout=5
+ )
+ data = json.loads(resp.read())
+ if data.get("ok"):
+ return data["result"].get("username", "") or None
+ except Exception:
+ pass
+ return None
+
+
+# ── routes ──────────────────────────────────────────────────────────────────
+
+@telegram_bp.route("/api/telegram/status", methods=["GET"])
+def telegram_status():
+ """Frontend uses this to decide which Remote Connection panel state to
+ show. If a token is present in api_key.txt → 'connected', and the panel
+ flips to the @bot_username + Disconnect view (Connect button is hidden).
+ Cached so we don't hit Telegram's API on every page load."""
+ global _bot_username_cache
+ token = _read_token()
+ if not token:
+ _bot_username_cache = None
+ return jsonify({"connected": False})
+ if _bot_username_cache is None:
+ _bot_username_cache = _fetch_bot_username(token) or ""
+ return jsonify({
+ "connected": True,
+ "bot_username": _bot_username_cache,
+ })
+
+
+@telegram_bp.route("/api/telegram/connect", methods=["POST"])
+def telegram_connect():
+ """Kick off the Phase 4 guided walkthrough (Safari → web.telegram.org →
+ user logs in manually, paced by the floating banner). Returns immediately;
+ the real work runs on a daemon thread since it blocks on user clicks."""
+ try:
+ from Auto_Use.macOS_use.remote_connection.telegram.setup import (
+ run as run_telegram_setup,
+ )
+ threading.Thread(target=run_telegram_setup, daemon=True).start()
+ return jsonify({"status": "started"})
+ except Exception as e:
+ logger.exception("telegram_connect failed")
+ return jsonify({"status": "error", "message": str(e)}), 500
+
+
+@telegram_bp.route("/api/telegram/disconnect", methods=["POST"])
+def telegram_disconnect():
+ """Clear the persisted token + the cached @bot_username. The polling
+ thread already running keeps polling until the next app restart (soft
+ disconnect) — clean shutdown of the bot loop is a future enhancement."""
+ global _bot_username_cache
+ _set_token("")
+ _bot_username_cache = None
+ return jsonify({"status": "disconnected"})
diff --git a/app.py b/app.py
index 8cd1d07..5f75a26 100644
--- a/app.py
+++ b/app.py
@@ -868,6 +868,20 @@ def start_server():
host = '0.0.0.0' if IS_WINDOWS else '127.0.0.1'
app.run(host=host, port=5000, debug=False, use_reloader=False)
+def minimize_main_window():
+ """Minimise the AutoUse pywebview window. No-op if the window isn't up yet
+ (e.g. someone calls this before main() has created it) or pywebview's
+ minimise call fails for any reason. Safe to call from any thread —
+ pywebview routes the call to its own UI loop internally."""
+ win = globals().get('webview_window')
+ if win is None:
+ return
+ try:
+ win.minimize()
+ except Exception:
+ debug_exception("minimize_main_window")
+
+
def _compute_window_center(win_w, win_h):
"""Return (x, y) to center a (win_w, win_h) window on the main display.
Falls back to a sensible default if the native APIs are unavailable."""
@@ -899,7 +913,9 @@ class RECT(ctypes.Structure):
return 600, 30
def main():
- # Register Telegram blueprint on Windows (macOS doesn't ship it yet).
+ # Wire the Telegram remote-control bot. Windows mounts a Flask blueprint
+ # plus a polling bot; macOS just starts the polling bot (no blueprint yet —
+ # token is read from .env / api_key.txt directly).
if IS_WINDOWS:
try:
from Auto_Use.windows_use.remote_connection.telegram.view import telegram_bp, start_bot
@@ -907,6 +923,17 @@ def main():
start_bot()
except Exception:
debug_exception("telegram_blueprint_init")
+ elif IS_MAC:
+ try:
+ from Auto_Use.macOS_use.remote_connection.telegram.view import telegram_bp
+ from Auto_Use.macOS_use.remote_connection.telegram.service import start_bot as start_telegram_bot
+ app.register_blueprint(telegram_bp)
+ start_telegram_bot()
+ except Exception as _tg_e:
+ import traceback as _tg_tb
+ print(f"[telegram] IMPORT/INIT FAILED: {_tg_e!r}", file=sys.stderr, flush=True)
+ _tg_tb.print_exc(file=sys.stderr)
+ debug_exception("telegram_bot_init")
if "--cli-mode" in sys.argv:
# CLI mode - delegate to the platform-specific CLI agent
diff --git a/frontend/css/style.css b/frontend/css/style.css
index d017fbe..ea038d0 100644
--- a/frontend/css/style.css
+++ b/frontend/css/style.css
@@ -896,27 +896,124 @@ body {
padding: clamp(20px, 3vw, 32px) 0;
}
-.remote-qr-container {
- width: 160px;
- height: 160px;
+.remote-instruction {
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ font-size: clamp(12px, 1.2vw, 14px);
+ color: rgba(255, 255, 255, 0.45);
+ text-align: center;
+}
+
+.remote-service-btn {
display: flex;
align-items: center;
- justify-content: center;
+ gap: 12px;
+ width: 100%;
+ padding: 14px 16px;
+ background: rgba(255, 255, 255, 0.06);
+ border: 1px solid rgba(255, 255, 255, 0.1);
border-radius: 14px;
- background: rgba(255, 255, 255, 0.08);
- padding: 12px;
+ color: rgba(255, 255, 255, 0.92);
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ font-size: clamp(13px, 1.3vw, 15px);
+ font-weight: 500;
+ cursor: pointer;
+ transition: background 0.2s ease, border-color 0.2s ease;
}
-.remote-qr-container img,
-.remote-qr-container canvas {
- border-radius: 6px;
+.remote-service-btn:hover {
+ background: rgba(255, 255, 255, 0.1);
+ border-color: rgba(255, 255, 255, 0.18);
}
-.remote-instruction {
+.remote-service-icon {
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ color: rgba(80, 165, 230, 0.95);
+}
+
+.remote-service-label {
+ flex: 1;
+ text-align: left;
+}
+
+.remote-service-chevron {
+ color: rgba(255, 255, 255, 0.45);
+ display: flex;
+ align-items: center;
+}
+
+.remote-phone-form {
+ display: flex;
+ flex-direction: column;
+ gap: 10px;
+ width: 100%;
+ margin-top: 4px;
+}
+
+.remote-phone-row {
+ display: flex;
+ gap: 8px;
+}
+
+.remote-country-select,
+.remote-phone-input {
+ background: rgba(255, 255, 255, 0.06);
+ border: 1px solid rgba(255, 255, 255, 0.12);
+ border-radius: 10px;
+ color: rgba(255, 255, 255, 0.95);
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
font-size: clamp(12px, 1.2vw, 14px);
- color: rgba(255, 255, 255, 0.45);
- text-align: center;
+ padding: 9px 12px;
+ outline: none;
+ transition: border-color 0.2s ease, background 0.2s ease;
+}
+
+.remote-country-select {
+ flex: 0 0 42%;
+ appearance: none;
+ -webkit-appearance: none;
+ background-image: url("data:image/svg+xml;utf8,");
+ background-repeat: no-repeat;
+ background-position: right 10px center;
+ padding-right: 28px;
+}
+
+.remote-country-select option {
+ background: #1a1a1a;
+ color: #fff;
+}
+
+.remote-phone-input {
+ flex: 1;
+}
+
+.remote-country-select:focus,
+.remote-phone-input:focus {
+ border-color: rgba(80, 165, 230, 0.55);
+ background: rgba(255, 255, 255, 0.09);
+}
+
+.remote-connect-btn {
+ padding: 10px 16px;
+ background: rgba(80, 165, 230, 0.85);
+ color: #fff;
+ border: none;
+ border-radius: 10px;
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ font-size: clamp(12px, 1.2vw, 14px);
+ font-weight: 600;
+ cursor: pointer;
+ transition: background 0.2s ease, opacity 0.2s ease;
+}
+
+.remote-connect-btn:hover:not(:disabled) {
+ background: rgba(80, 165, 230, 1);
+}
+
+.remote-connect-btn:disabled {
+ opacity: 0.45;
+ cursor: not-allowed;
}
.remote-connected {
@@ -2334,3 +2431,94 @@ body.cli-mode .settings-btn {
loop of funny scraping/searching phrases (handled in script.js).
No CSS overlay — globe icon + streamed phrases are the cue.
============================================================ */
+
+/* ============================================================
+ AutoUse helper info popup
+ Shown once when the user clicks Connect under Remote Connection →
+ Telegram. Tells them to look at the top-right banner.
+ ============================================================ */
+.telegram-prompt-overlay {
+ position: fixed;
+ inset: 0;
+ background: rgba(0, 0, 0, 0.3);
+ backdrop-filter: blur(8px);
+ -webkit-backdrop-filter: blur(8px);
+ z-index: 110;
+
+ display: flex;
+ align-items: center;
+ justify-content: center;
+
+ opacity: 0;
+ visibility: hidden;
+ transition: opacity 0.3s ease, visibility 0.3s ease;
+}
+
+.telegram-prompt-overlay.active {
+ opacity: 1;
+ visibility: visible;
+}
+
+.telegram-prompt-popup {
+ position: relative;
+ width: clamp(320px, 90vw, 420px);
+ padding: clamp(24px, 3vw, 32px);
+ border-radius: clamp(16px, 2vw, 24px);
+
+ transform: scale(0.9) translateY(20px);
+ transition: transform 0.4s cubic-bezier(0.175, 0.885, 0.32, 1.275);
+
+ box-shadow: 0 clamp(8px, 2vw, 16px) clamp(32px, 6vw, 64px) rgba(0, 0, 0, 0.2);
+}
+
+.telegram-prompt-overlay.active .telegram-prompt-popup {
+ transform: scale(1) translateY(0);
+}
+
+.telegram-prompt-content {
+ position: relative;
+ z-index: 3;
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ gap: clamp(14px, 1.8vw, 18px);
+ text-align: center;
+}
+
+.telegram-prompt-title {
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ font-size: clamp(18px, 2vw, 22px);
+ font-weight: 600;
+ color: rgba(255, 255, 255, 0.95);
+}
+
+.telegram-prompt-message {
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ font-size: clamp(13px, 1.3vw, 14px);
+ font-weight: 400;
+ color: rgba(255, 255, 255, 0.75);
+ line-height: 1.5;
+}
+
+.telegram-prompt-message strong {
+ color: rgba(255, 255, 255, 0.95);
+ font-weight: 600;
+}
+
+.telegram-prompt-ok {
+ margin-top: 4px;
+ padding: 10px 28px;
+ border: none;
+ border-radius: 10px;
+ background: rgba(80, 165, 230, 0.85);
+ color: #fff;
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ font-size: clamp(12px, 1.2vw, 14px);
+ font-weight: 600;
+ cursor: pointer;
+ transition: background 0.2s ease;
+}
+
+.telegram-prompt-ok:hover {
+ background: rgba(80, 165, 230, 1);
+}
diff --git a/frontend/index.html b/frontend/index.html
index f79890a..1617442 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -373,10 +373,26 @@
Remote Connection
-
+
-
-
Scan with your phone to pair via Telegram
+
+
+
+
+
+
Click below to begin pairing. A helper banner will guide you through opening Telegram and logging in.
+
+
Please follow the on-screen instructions in the helper banner (top-right of your screen).
+
@@ -392,6 +408,20 @@
+
+
+
+
+
+
+
+
AutoUse helper
+
The helper will walk you through Telegram setup. Please click Next on the banner in the top-right corner of your screen to continue.
+
+
+
+
+
diff --git a/frontend/script.js b/frontend/script.js
index 2f3efa6..46ce8b7 100644
--- a/frontend/script.js
+++ b/frontend/script.js
@@ -605,39 +605,81 @@ document.addEventListener('DOMContentLoaded', () => {
});
});
- // Remote Connection — QR + status logic
+ // Remote Connection — guided Telegram pairing
const remoteSetup = document.getElementById('remoteSetup');
const remoteConnected = document.getElementById('remoteConnected');
- const remoteQrContainer = document.getElementById('remoteQrContainer');
const remoteBotName = document.getElementById('remoteBotName');
const remoteDisconnectBtn = document.getElementById('remoteDisconnectBtn');
+ const remoteTelegramBtn = document.getElementById('remoteTelegramBtn');
+ const remoteTelegramForm = document.getElementById('remoteTelegramForm');
+ const remoteConnectBtn = document.getElementById('remoteConnectBtn');
+ const remoteInstructions = document.getElementById('remoteInstructions');
+ const telegramPromptOverlay = document.getElementById('telegramPromptOverlay');
+ const telegramPromptOk = document.getElementById('telegramPromptOk');
function loadRemoteStatus() {
fetch('/api/telegram/status')
.then(res => res.json())
.then(data => {
- if (data.connected && data.bot_username) {
- remoteSetup.style.display = 'none';
- remoteConnected.style.display = 'flex';
- remoteBotName.textContent = '@' + data.bot_username;
+ // Always keep the Telegram service button visible and
+ // expandable. When already paired, just grey out the
+ // Connect button inside the form rather than swapping
+ // to a different panel.
+ remoteSetup.style.display = 'flex';
+ if (remoteConnected) remoteConnected.style.display = 'none';
+
+ if (data.connected) {
+ if (remoteConnectBtn) {
+ remoteConnectBtn.disabled = true;
+ remoteConnectBtn.textContent = data.bot_username
+ ? '✓ Already paired (@' + data.bot_username + ')'
+ : '✓ Already paired';
+ }
+ if (remoteInstructions) remoteInstructions.style.display = 'none';
} else {
- remoteSetup.style.display = 'flex';
- remoteConnected.style.display = 'none';
- const pairUrl = 'http://' + data.local_ip + ':5000/pair';
- remoteQrContainer.innerHTML = '';
- new QRCode(remoteQrContainer, {
- text: pairUrl,
- width: 160,
- height: 160,
- colorDark: '#ffffff',
- colorLight: 'transparent',
- correctLevel: QRCode.CorrectLevel.M
- });
+ if (remoteConnectBtn) {
+ remoteConnectBtn.disabled = false;
+ remoteConnectBtn.textContent = 'Connect';
+ }
+ if (remoteTelegramForm) remoteTelegramForm.style.display = 'none';
+ if (remoteInstructions) remoteInstructions.style.display = 'none';
}
})
.catch(() => {});
}
+ if (remoteTelegramBtn) {
+ remoteTelegramBtn.addEventListener('click', () => {
+ if (!remoteTelegramForm) return;
+ const isHidden = remoteTelegramForm.style.display === 'none' || !remoteTelegramForm.style.display;
+ remoteTelegramForm.style.display = isHidden ? 'flex' : 'none';
+ });
+ }
+
+ if (remoteConnectBtn) {
+ remoteConnectBtn.addEventListener('click', () => {
+ remoteConnectBtn.disabled = true;
+ if (telegramPromptOverlay) telegramPromptOverlay.classList.add('active');
+ fetch('/api/telegram/connect', { method: 'POST' })
+ .catch(() => {})
+ .finally(() => {
+ remoteConnectBtn.disabled = false;
+ if (remoteInstructions) remoteInstructions.style.display = 'block';
+ });
+ });
+ }
+
+ if (telegramPromptOk && telegramPromptOverlay) {
+ telegramPromptOk.addEventListener('click', () => {
+ telegramPromptOverlay.classList.remove('active');
+ });
+ telegramPromptOverlay.addEventListener('click', (e) => {
+ if (e.target === telegramPromptOverlay) {
+ telegramPromptOverlay.classList.remove('active');
+ }
+ });
+ }
+
if (remoteDisconnectBtn) {
remoteDisconnectBtn.addEventListener('click', () => {
fetch('/api/telegram/disconnect', { method: 'POST' })
diff --git a/mac_requirements.txt b/mac_requirements.txt
index caab906..903e73f 100644
--- a/mac_requirements.txt
+++ b/mac_requirements.txt
@@ -27,6 +27,9 @@ mss
flask
psutil
+# Remote Connection (Telegram bot)
+python-telegram-bot
+
# Build Tools (Nuitka binary compilation)
nuitka
ordered-set
diff --git a/main.py b/main.py
index 036c156..c0dffe8 100644
--- a/main.py
+++ b/main.py
@@ -28,12 +28,12 @@
raise RuntimeError(f"Unsupported OS: {platform.system()}")
# Configuration
-PROVIDER = "openrouter"
-MODEL = "gemini-3.1-pro" #refer to the model name correctly from model_list.txt.
+PROVIDER = "local"
+MODEL = "gemma4:e4b" #refer to the model name correctly from model_list.txt.
# Your task here
task = """
-study the the grep glob approach of macos then syndicate to windowsus use the directory is desktop/github/Auto use.
+open safari
"""