Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
880 changes: 880 additions & 0 deletions Auto_Use/macOS_use/remote_connection/telegram/banner.py

Large diffs are not rendered by default.

821 changes: 821 additions & 0 deletions Auto_Use/macOS_use/remote_connection/telegram/service.py

Large diffs are not rendered by default.

154 changes: 154 additions & 0 deletions Auto_Use/macOS_use/remote_connection/telegram/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# If you build on this project, please keep this header and credit
# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works.
# A small attribution goes a long way toward a healthy open-source
# community — thank you for contributing.

"""Telegram remote-connection setup driver (macOS, guided mode).

Opens Safari, navigates to web.telegram.org, then lets the user log in
manually. Progress is paced by a small always-on-top banner that streams
status text and has a Next button. The script blocks on user clicks via
banner.wait_for_next() — the user does the actual login (phone, country,
OTP) themselves; we just get them to the right page.
"""
import logging
import os
import time

from Auto_Use.macOS_use.controller.tool.open_app import open_app
from Auto_Use.macOS_use.tree.element import UIElementScanner, ELEMENT_CONFIG
from Auto_Use.macOS_use.controller.service import ControllerService
from Auto_Use.macOS_use.controller.key_combo.service import KeyComboService
from Auto_Use.macOS_use.remote_connection.telegram.banner import StatusBanner
from Auto_Use.macOS_use.remote_connection.telegram.service import (
_API_KEY_FILE, _set_key_in_file,
)

logger = logging.getLogger(__name__)

TELEGRAM_WEB_URL = "web.telegram.org"
STEP_DELAY_SEC = 2


def _find_address_bar(mapping: dict) -> str | None:
"""Return the index of Safari's smart-search field, or None if not found."""
for idx, info in mapping.items():
if info.get("name") == "smart search field" and info.get("type") == "TextField":
return idx
return None


def _open_telegram_in_safari(banner) -> bool:
"""Launch Safari and navigate it to web.telegram.org.

Streams sub-step status to the banner so the user can see what's happening
while Safari takes focus. Returns False on any failure.
"""
banner.update("Please wait — confirming Safari is open…")
if not open_app("Safari"):
logger.error("setup.py: failed to launch Safari")
return False
# open_app itself sleeps ~1 s after launching and then runs an AppleScript
# window-move, so the address bar isn't reliably there yet. One more
# second is enough for the smart-search field to settle before we scan.
time.sleep(1)

scanner = UIElementScanner(ELEMENT_CONFIG)
scanner.scan_elements()
mapping = scanner.get_elements_mapping()
time.sleep(STEP_DELAY_SEC)

address_bar_index = _find_address_bar(mapping)
if address_bar_index is None:
logger.error("setup.py: Safari address bar not found in scan")
return False

banner.update("Safari detected. Writing the URL for you, please wait…")

controller = ControllerService()
controller.set_elements(mapping, scanner.application_name)
key_combo = KeyComboService()

controller.click(address_bar_index)
time.sleep(STEP_DELAY_SEC)

controller.canvas_input(TELEGRAM_WEB_URL)
time.sleep(STEP_DELAY_SEC)

key_combo.send("return")
return True


def run(country_code: str = "", phone: str = "") -> bool:
"""Guided Telegram-Web pairing.

Shows a banner, waits for the user to click Next, opens Telegram Web,
waits for the user to log in manually + click Next, then closes.

country_code and phone are accepted but ignored — kept only so the
pre-existing /api/telegram/connect callsite signature still works.
"""
banner = StatusBanner()
banner.show()
try:
banner.update("Let's get you set up with Telegram. Please click Next.")
banner.wait_for_next()

if not _open_telegram_in_safari(banner):
banner.update("Failed to open Telegram. Close this banner and try again.")
banner.wait_for_next(timeout=15)
return False

banner.update("Please log in to Telegram, then click Next")
banner.wait_for_next()

banner.update(
"Now search for @BotFather in Telegram and open the chat. "
"Click Next when you're there."
)
banner.wait_for_next()

banner.update("How do you want to set up the bot?")
choice = banner.wait_for_choice("Fresh setup", "Token already generated")

if choice == "left":
banner.update(
"In @BotFather, send these one at a time: /newbot → AutoUse → "
"a unique bot name. BotFather will reply with your token. "
"Click Next when you have it."
)
banner.wait_for_next()

banner.update("Paste your BotFather token below and click Save.")
token = banner.wait_for_input(save_label="Save")
if not token:
return False # Cocoa-unavailable fallback; banner never appeared

_set_key_in_file(_API_KEY_FILE, "TELEGRAM_BOT_TOKEN", token.strip())

banner.update("Saved. Restarting AutoUse to start the bot…")
# Give the message time to stream out + a beat for the user to read
# it, then hard-exit the whole process. The user's next `python
# app.py` boot picks up the fresh TELEGRAM_BOT_TOKEN and the bot
# comes online with the saved owner chat. os._exit skips atexit /
# finally cleanup, which is what we want — Cocoa will tear down
# the banner + windows as the process dies.
time.sleep(3)
banner.close()
os._exit(0)
finally:
banner.close()
157 changes: 157 additions & 0 deletions Auto_Use/macOS_use/remote_connection/telegram/view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Copyright 2026 Autouse AI — https://github.com/auto-use/Auto-Use
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# If you build on this project, please keep this header and credit
# Autouse AI (https://github.com/auto-use/Auto-Use) in forks and derivative works.
# A small attribution goes a long way toward a healthy open-source
# community — thank you for contributing.

"""Flask Blueprint for the macOS Telegram surface.

Lives in the telegram folder so all Telegram-related code stays here — app.py
just imports `telegram_bp` and calls `app.register_blueprint(...)`. Routes:

GET /api/telegram/status → {connected, bot_username?}
POST /api/telegram/connect → kicks off the Phase 4 guided walkthrough
POST /api/telegram/disconnect → clears the persisted token

All token lookups read ONLY from api_key.txt. We deliberately do NOT consult
.env — that file is app.py's general env-loading concern; the Telegram bot
treats api_key.txt as its single source of truth.
"""
import json
import logging
import threading
import urllib.request
from pathlib import Path

from flask import Blueprint, jsonify

logger = logging.getLogger(__name__)

telegram_bp = Blueprint("telegram_macos", __name__)

# view.py → telegram → remote_connection → macOS_use → Auto_Use → repo root
_API_KEY_FILE = (
Path(__file__).resolve().parents[4] / "Auto_Use" / "api_key" / "api_key.txt"
)

_bot_username_cache: str | None = None


def _read_token() -> str | None:
"""Pull TELEGRAM_BOT_TOKEN out of api_key.txt. Returns None if missing or
empty. Does NOT consult .env or env vars on purpose."""
if not _API_KEY_FILE.exists():
return None
try:
with open(_API_KEY_FILE, "r", encoding="utf-8") as f:
for line in f:
stripped = line.strip()
if stripped.startswith("TELEGRAM_BOT_TOKEN="):
val = stripped.partition("=")[2].strip()
return val or None
except Exception:
logger.warning("could not read %s", _API_KEY_FILE)
return None


def _set_token(value: str) -> None:
"""Write/clear TELEGRAM_BOT_TOKEN= in api_key.txt, preserving every other
line (incl. empty-value placeholders the AutoUse UI relies on)."""
lines = []
found = False
if _API_KEY_FILE.exists():
try:
with open(_API_KEY_FILE, "r", encoding="utf-8") as f:
for raw in f:
if raw.strip().startswith("TELEGRAM_BOT_TOKEN="):
lines.append(f"TELEGRAM_BOT_TOKEN={value}\n")
found = True
else:
lines.append(raw if raw.endswith("\n") else raw + "\n")
except Exception:
logger.warning("could not read %s while updating token", _API_KEY_FILE)
return
if not found:
lines.append(f"TELEGRAM_BOT_TOKEN={value}\n")
try:
_API_KEY_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(_API_KEY_FILE, "w", encoding="utf-8") as f:
f.writelines(lines)
except Exception:
logger.warning("could not write %s", _API_KEY_FILE)


def _fetch_bot_username(token: str) -> str | None:
"""One-shot call to Telegram's getMe — used by /status so the panel can
show '@your_bot' instead of just 'connected'."""
try:
resp = urllib.request.urlopen(
f"https://api.telegram.org/bot{token}/getMe", timeout=5
)
data = json.loads(resp.read())
if data.get("ok"):
return data["result"].get("username", "") or None
except Exception:
pass
return None


# ── routes ──────────────────────────────────────────────────────────────────

@telegram_bp.route("/api/telegram/status", methods=["GET"])
def telegram_status():
"""Frontend uses this to decide which Remote Connection panel state to
show. If a token is present in api_key.txt → 'connected', and the panel
flips to the @bot_username + Disconnect view (Connect button is hidden).
Cached so we don't hit Telegram's API on every page load."""
global _bot_username_cache
token = _read_token()
if not token:
_bot_username_cache = None
return jsonify({"connected": False})
if _bot_username_cache is None:
_bot_username_cache = _fetch_bot_username(token) or ""
return jsonify({
"connected": True,
"bot_username": _bot_username_cache,
})


@telegram_bp.route("/api/telegram/connect", methods=["POST"])
def telegram_connect():
"""Kick off the Phase 4 guided walkthrough (Safari → web.telegram.org →
user logs in manually, paced by the floating banner). Returns immediately;
the real work runs on a daemon thread since it blocks on user clicks."""
try:
from Auto_Use.macOS_use.remote_connection.telegram.setup import (
run as run_telegram_setup,
)
threading.Thread(target=run_telegram_setup, daemon=True).start()
return jsonify({"status": "started"})
except Exception as e:
logger.exception("telegram_connect failed")
return jsonify({"status": "error", "message": str(e)}), 500


@telegram_bp.route("/api/telegram/disconnect", methods=["POST"])
def telegram_disconnect():
"""Clear the persisted token + the cached @bot_username. The polling
thread already running keeps polling until the next app restart (soft
disconnect) — clean shutdown of the bot loop is a future enhancement."""
global _bot_username_cache
_set_token("")
_bot_username_cache = None
return jsonify({"status": "disconnected"})
29 changes: 28 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,20 @@ def start_server():
host = '0.0.0.0' if IS_WINDOWS else '127.0.0.1'
app.run(host=host, port=5000, debug=False, use_reloader=False)

def minimize_main_window():
"""Minimise the AutoUse pywebview window. No-op if the window isn't up yet
(e.g. someone calls this before main() has created it) or pywebview's
minimise call fails for any reason. Safe to call from any thread —
pywebview routes the call to its own UI loop internally."""
win = globals().get('webview_window')
if win is None:
return
try:
win.minimize()
except Exception:
debug_exception("minimize_main_window")


def _compute_window_center(win_w, win_h):
"""Return (x, y) to center a (win_w, win_h) window on the main display.
Falls back to a sensible default if the native APIs are unavailable."""
Expand Down Expand Up @@ -899,14 +913,27 @@ class RECT(ctypes.Structure):
return 600, 30

def main():
# Register Telegram blueprint on Windows (macOS doesn't ship it yet).
# Wire the Telegram remote-control bot. Windows mounts a Flask blueprint
# plus a polling bot; macOS just starts the polling bot (no blueprint yet —
# token is read from .env / api_key.txt directly).
if IS_WINDOWS:
try:
from Auto_Use.windows_use.remote_connection.telegram.view import telegram_bp, start_bot
app.register_blueprint(telegram_bp)
start_bot()
except Exception:
debug_exception("telegram_blueprint_init")
elif IS_MAC:
try:
from Auto_Use.macOS_use.remote_connection.telegram.view import telegram_bp
from Auto_Use.macOS_use.remote_connection.telegram.service import start_bot as start_telegram_bot
app.register_blueprint(telegram_bp)
start_telegram_bot()
except Exception as _tg_e:
import traceback as _tg_tb
print(f"[telegram] IMPORT/INIT FAILED: {_tg_e!r}", file=sys.stderr, flush=True)
_tg_tb.print_exc(file=sys.stderr)
debug_exception("telegram_bot_init")

if "--cli-mode" in sys.argv:
# CLI mode - delegate to the platform-specific CLI agent
Expand Down
Loading
Loading