diff --git a/README.md b/README.md index 74d6605..7a8c4d9 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,8 @@ # ceki-browser -> Real browsers of real people. 5-line API. Secure P2P via WebRTC. +Python SDK for [ceki.me](https://ceki.me) — rent real browsers from real people for AI agent automation. -Python SDK for [browser.ceki.me](https://browser.ceki.me) — rent real browsers from real people for AI agent automation. - -Browser commands travel over a direct WebRTC DataChannel between your agent and the provider's browser. Chat messages are routed through the relay server. The relay handles signaling, matchmaking, and chat. Connections are authenticated via STUN/TURN with identity validation. - -## Installation +## Install ```bash pip install ceki-browser @@ -16,221 +12,269 @@ pip install ceki-browser ```python import asyncio -from ceki_browser import Browser +import os +from ceki_browser import connect, ConnectOptions async def main(): - async with Browser(token="YOUR_TOKEN") as br: - async with await br.session(mode="incognito", domain_hints=["example.com"]) as s: - await s.navigate("https://example.com") - title = await s.query("h1") - print(title.text) + client = await connect(os.environ["CEKI_API_KEY"]) + options = await client.search({"geo": "US", "language": "en"}) + browser = await client.rent(options[0].schedule_id) + # ... CDP calls (see docs) + await browser.close() + await client.close() asyncio.run(main()) ``` -## Configuration - -| Parameter | Default | Description | -|---|---|---| -| `token` | — | Sanctum API token from your [dashboard](https://browser.ceki.me/dashboard) | -| `relay_url` | `wss://browser.ceki.me/ws/agent` | WebSocket relay endpoint | +**BREAKING in 2.2.0:** `connect()` no longer accepts `relay_url=` or `reconnect=` kwargs — pass a `ConnectOptions` object instead. -### Session options +## Environment Variables -| Parameter | Default | Description | -|---|---|---| -| `mode` | `"incognito"` | `"incognito"` (clean browser) or `"persona"` (real user cookies) | -| `domain_hints` | `[]` | Preferred domains for provider matching | -| `geo` | `""` | Preferred provider geo (e.g. `"US"`, `"DE"`) | -| `language` | `""` | Preferred browser language | -| `max_price_per_min` | `1.0` | Maximum price you're willing to pay per minute (USD) | -| `estimated_duration_min` | `30` | Estimated session duration for provider matching | - -## Methods - -| Method | Parameters | Returns | Description | -|---|---|---|---| -| `navigate(url)` | `url`, `timeout_ms=120000` | `NavigateResult` | Navigate to URL | -| `query(selector)` | `selector`, `attributes=["textContent"]` | `QueryResult` | Query first matching element | -| `query_all(selector)` | `selector`, `attributes`, `limit=20` | `QueryResult` | Query all matching elements | -| `get_html(selector)` | `selector="html"`, `outer=True` | `HtmlResult` | Get element HTML | -| `click(selector)` | `selector` or `x`/`y` coordinates | — | Click element or coordinates | -| `type(selector, text)` | `selector`, `text`, `delay_ms=0` | — | Type text into input | -| `scroll(selector)` | `selector` or `direction`/`amount` | — | Scroll to element or direction | -| `screenshot()` | `format="png"`, `quality=80` | `ScreenshotResult` | Capture visible tab | -| `back()` / `forward()` / `reload()` | — | `NavigateResult` | Navigation controls | -| `inject_credentials(secret_id, target)` | `secret_id`, `target` selectors | `dict` | Fill credentials from vault | -| `request_human_action(type, message)` | `action_type`, `message`, `timeout_sec=120` | `HumanActionResult` | Ask browser owner for help | - -### Credential Vault - -`inject_credentials` fills login forms using encrypted secrets stored on the provider side. -The SDK sends a `secret_id` — the provider extension decrypts and injects credentials locally (RSA-OAEP + AES-256-GCM). - -Create secrets via dashboard: **API Keys & Secrets** section. - -## Errors - -| Error | When | +| Variable | Description | |---|---| -| `AuthError` | Invalid or expired token | -| `ProviderDisconnected` | Provider went offline during session | -| `NavigationTimeout` | `navigate()` exceeded timeout | -| `CommandTimeout` | Any command exceeded timeout | -| `RateLimited` | Too many sessions/commands per hour | -| `ProviderNotVerified` | `inject_credentials` requires a verified provider | -| `HumanActionDeclined` | Browser owner declined the action | -| `HumanActionTimeout` | Browser owner didn't respond in time | +| `CEKI_API_KEY` | Your API key (required) | +| `CEKI_API_URL` | Override REST API base URL | +| `CEKI_RELAY_URL` | Override relay WebSocket URL | -## Chat +## API -During a browser session, your agent can exchange messages and images with the browser provider via `session.chat`. Chat is routed through the relay server. +### `connect(api_key, options: ConnectOptions | None = None) -> Client` -```python -from ceki_browser import Browser +Establish a WebSocket connection to the relay. Returns a `Client` instance. -async def main(): - async with Browser(token="YOUR_TOKEN") as br: - session = await br.session(mode="incognito", domain_hints=["example.com"]) +### `ConnectOptions` - # Listen for incoming text messages - session.chat.on_message(lambda msg: print(f"Provider: {msg.content}")) +| Field | Default | Description | +|---|---|---| +| `api_url` | `https://api.ceki.me` | REST API base URL | +| `relay_url` | `wss://browser.ceki.me/ws/agent` | Relay WebSocket URL | +| `basic_auth` | `None` | `(user, password)` for nginx htpasswd | +| `reconnect` | `True` | Auto-reconnect on disconnect | - # Send text - await session.chat.send("Starting automation, please don't close the browser") +### `client.search(filters=None, limit=20) -> list[BrowserOption]` - # Send image (bytes or path) - await session.chat.send_image(b"\x89PNG...", "image/png") - await session.chat.send_image("screenshot.png") +Search for available browsers. Filters: `geo`, `language`, etc. - # Fetch message history from server - messages = await session.chat.history() - for msg in messages: - print(msg) +### `client.rent(schedule_id) -> Browser` - await session.end() -``` +Rent a browser by schedule ID. Waits up to 60s for a match. + +### `client.close()` + +Close all sessions and the connection. + +## Error Codes -### Direct Chat (chat-service REST + WS) +| Exception | Cause | +|---|---| +| `AuthFailed` | Invalid API key or token revoked | +| `RateLimitExceeded` | Too many requests. Has `.retry_after` (seconds) | +| `InsufficientFunds` | Account balance too low | +| `SessionEnded` | Provider ended the session. Has `.reason` | +| `CdpUnrecoverable` | CDP connection lost permanently | +| `ConnectionLost` | Relay connection lost after max reconnects | + +## Session profile (cookies + storage) -For server-side chat access (polling, recovery, live push) independent of P2P: +`browser.profile` lets you snapshot and restore cookies, `localStorage`, and `sessionStorage` between sessions — without involving the relay or backend. The blob stays in your own storage. ```python -async with Browser(token=TOKEN) as br: - session = await br.session() +import json + +# First session — sign up, then export profile +async with await client.rent(schedule_id) as browser: + await browser.send({"method": "Page.navigate", "params": {"url": "https://reddit.com/login"}}) + # ... perform signup, 2FA ... + profile = await browser.profile.export(domains=[".reddit.com", "reddit.com"]) + +with open("reddit_profile.json", "w") as f: + json.dump(profile, f) + +# Next session — restore profile (navigate first, then import storage) +with open("reddit_profile.json") as f: + profile = json.load(f) + +async with await client.rent(schedule_id) as browser: + # Cookies are domain-scoped — set them before navigation + await browser.profile.import_(profile) + await browser.send({"method": "Page.navigate", "params": {"url": "https://reddit.com"}}) + # already logged in +``` - # topic_id from rent or passed manually - chat = session.chat_direct(topic_id="") +**Notes:** +- `localStorage`/`sessionStorage` require a document context — navigate to the target origin before calling `import_()`, or call it right after navigation. +- Cookies (`Network.setCookies`) work before any navigation. +- Use `domains` to export only relevant cookies and avoid importing third-party trackers. +- Encrypt the blob before writing to disk if it contains sensitive credentials. +- `import_()` raises `ValueError` on `schema_version` mismatch (future-proofing). - # Fetch message history (forward cursor) - msgs = await chat.history(after="", limit=50) +## CDP Lifecycle - # Send a message via REST - await chat.send("Hello from agent") +The relay maintains the CDP connection to the incognito browser tab. If the connection drops, it automatically reattaches with 1s/2s/4s exponential backoff. Commands during reattach are buffered (FIFO, max 50). If 3 reattach attempts fail, a new fallback tab is created. If that also fails, `cdp_unrecoverable` error is sent. - # Subscribe to live push via WebSocket - async def on_msg(msg): - print("new:", msg.get("content")) +## Real-signup examples - await chat.subscribe(on_msg) +See `examples/SMOKE.md` for full runbook. - # ... do work ... - await chat.close() +Quick: +```bash +pip install -e ".[dev]" +export CEKI_API_KEY=... +export SCHEDULE_ID=... +python examples/reddit_signup.py ``` -Set `CEKI_CHAT_SERVICE_URL` env var to override the chat-service URL (default: `https://chat.ceki.me`). +These are NOT automated tests — they require a live relay, an online provider, and a real IMAP mailbox. Run manually as part of Phase 2 acceptance. ## Human Mode -SDK includes built-in human-like behavior simulation (delays, typing jitter) enabled by default. - -### Profiles +Browser actions can optionally include human-like timing — delays before/after actions and per-character typing with jitter. ```python -# Default — natural delays (enabled by default) -async with Browser(token, human="natural") as br: - s = await br.session() +# Default: natural profile (enabled by default) +browser = await client.rent(schedule_id) + +# Explicit profile +browser = await client.rent(schedule_id, human="careful") -# Careful — slower, more human-like -async with Browser(token, human="careful") as br: - s = await br.session() +# Disable humanization +browser = await client.rent(schedule_id, human=None) -# Disabled — no delays -async with Browser(token, human=None) as br: - s = await br.session() +# Custom profile dict +browser = await client.rent(schedule_id, human={"typing": {"wpm": 130}}) +``` -# Custom profile from dict -async with Browser(token, human={"typing": {"wpm": 140}, "pre_action_ms": {"click": [50, 200]}}) as br: - s = await br.session() +### High-level methods -# Custom profile from JSON file -async with Browser(token, human="./my_profile.json") as br: - s = await br.session() +```python +await browser.navigate("https://example.com") +await browser.click(100, 200) +await browser.type("Hello, world!") # Per-char with jitter when human mode on +await browser.scroll(delta_y=-300) +img_bytes = await browser.screenshot() ``` -### Runtime Profile Change +### Runtime control ```python -prev = s.set_human("careful") # switch to careful -await s.type("#email", "user@example.com") -s.set_human(prev) # restore previous +prev = browser.set_human("careful") # Switch profile, returns previous +browser.set_human(None) # Disable mid-session +``` + +### Environment variables + +- `CEKI_HUMAN_PROFILE` — Override default profile name (e.g., `careful`) +- `CEKI_HUMAN_PROFILE_PATH` — Path to custom JSON profile file +- `CEKI_HUMAN_DISABLE=1` — Disable humanization entirely + +## CLI + +Both SDKs install a single `ceki-browser` binary on your PATH. Same command set whether you came from Python or Node.js. + +### Install + +Python: +```bash +pip install ceki-browser ``` -### Profile JSON Schema - -```json -{ - "version": 1, - "name": "natural", - "typing": { - "wpm": 110, - "jitter": 0.35, - "thinking_pause_prob": 0.012, - "thinking_pause_ms": [300, 1200], - "typo_prob": 0.0 - }, - "pre_action_ms": { - "click": [80, 350], - "type": [120, 500], - "scroll": [50, 250], - "navigate": [0, 0], - "screenshot": [0, 0] - }, - "post_action_ms": { - "click": [150, 800], - "type": [150, 800], - "scroll": [200, 900], - "navigate": [400, 1800], - "screenshot": [0, 0] - }, - "mouse": { - "move_before_click": false, - "trajectory": "off" - }, - "rng_seed": null -} +Node.js: +```bash +npm install -g ceki-browser ``` -### Environment Variables +### Environment variables -| Variable | Description | +| Variable | Required | Purpose | +|---|---|---| +| `CEKI_API_KEY` | yes | Agent token (`ag_...`) | +| `CEKI_API_URL` | no | Override API base URL (default: `https://api.ceki.me`) | +| `CEKI_RELAY_URL` | no | Override relay WS URL (default: `wss://browser.ceki.me/ws/agent`) | +| `CEKI_CHAT_URL` | no | Override chat-service URL | +| `CEKI_BASIC_AUTH_USER` / `_PASS` | no | HTTP Basic Auth for protected dev/stage endpoints | + +### Quick start + +```bash +export CEKI_API_KEY=ag_... + +SCHEDULE=$(ceki-browser search --limit 1 | jq -r '.[0].schedule_id') +SID=$(ceki-browser rent --schedule $SCHEDULE | jq -r .session_id) +ceki-browser navigate $SID https://example.com +ceki-browser snapshot $SID -o snap.png +ceki-browser stop $SID +``` + +The CLI persists session state locally — after `rent` it saves the session ID so subsequent commands resume it by SID without re-renting. + +### Commands + +#### Discovery and lifecycle + +| Command | Description | |---|---| -| `CEKI_HUMAN_PROFILE` | Preset name (`natural`, `careful`) | -| `CEKI_HUMAN_PROFILE_PATH` | Path to custom JSON profile | -| `CEKI_HUMAN_DISABLE=1` | Disable all human-mode delays | +| `search [--limit N] [--filter K=V]…` | List available browsers | +| `my-browsers` | List browsers with pre-arranged rent contracts | +| `rent --schedule ID [--mode incognito\|main] [--fingerprint-from FILE]` | Rent a browser | +| `sessions [--all] [--limit N] [--json]` | List your sessions | +| `stop SID` | End a session | +| `wait SID` | Block until the session ends | -Priority: explicit `Browser(human=...)` > env vars > default (`natural`). +#### Browser control -## Examples +| Command | Description | +|---|---| +| `navigate SID URL` | Open URL | +| `click SID X Y` | Click at viewport coordinates | +| `type SID TEXT [--natural]` | Type text into focused element | +| `scroll SID X Y DY` | Scroll from (X, Y) by `DY` pixels | +| `screenshot SID -o FILE [--format png\|jpeg] [--full]` | Save screenshot | +| `snapshot SID -o FILE` | Screenshot + new chat messages | +| `switch-tab SID` | Switch active tab | +| `upload SID --selector CSS --file PATH [--filename NAME]` | Attach file to `` | + +#### Chat with host + +| Command | Description | +|---|---| +| `chat SID send TEXT` | Send message to host | +| `chat SID next [--timeout SEC]` | Wait for next host message | +| `chat SID history [--since TS] [--limit N]` | Fetch chat history | +| `chat SID send-image --image PATH [--text MSG]` | Send image to host | + +#### Advanced -- [`quickstart.py`](examples/quickstart.py) — minimal 5-line example -- [`scraping.py`](examples/scraping.py) — query DOM elements -- [`login_flow.py`](examples/login_flow.py) — inject credentials + 2FA +| Command | Description | +|---|---| +| `profile SID export -o FILE [--domains CSV] [--no-session-storage]` | Export cookies / localStorage | +| `profile SID import -i FILE` | Import previously exported profile | +| `request-captcha SID [--acceptance SEC] [--completion SEC] [--manual]` | Ask host to solve CAPTCHA | +| `configure SID [--masking-mode VAL] [--fingerprint VAL]` | Toggle masking / fingerprint | +| `cdp SID --method METHOD [--params JSON]` | Raw CDP command | -## Pricing +### Output and errors -See [browser.ceki.me/pricing](https://browser.ceki.me/pricing). +Successful commands write a single JSON line to stdout. Errors go to stderr as `{"error": "...", "code": "..."}`. Pipe stdout through `jq` to chain commands. -## License +### Exit codes -MIT +| Code | Meaning | +|---|---| +| `0` | success | +| `1` | generic error | +| `2` | `CEKI_API_KEY` not set | +| `3` | session not found or not owner | +| `4` | timeout | +| `5` | network / connection error | +| `130` | interrupted (Ctrl-C) | + +Full reference (with EN+RU): https://browser.ceki.me/docs#cli + +## Development + +```bash +pip install -e ".[dev]" +pytest +ruff check ceki_browser/ +mypy ceki_browser/ +``` diff --git a/ceki_browser/__init__.py b/ceki_browser/__init__.py index aa6d132..45c0de9 100644 --- a/ceki_browser/__init__.py +++ b/ceki_browser/__init__.py @@ -1,58 +1,52 @@ -from .chat_direct import ChatClient -from .client import Browser -from .humanize import HumanProfile, Humanizer -from .errors import ( - AuthError, - CekiBrowserError, - CommandTimeout, - HumanActionDeclined, - HumanActionTimeout, - NavigationTimeout, - NoMatchError, +from ._browser import Browser +from ._captcha import CaptchaResult +from ._client import Client +from ._connect import ConnectOptions, connect +from ._profile import BrowserProfile +from ._exceptions import ( + AuthFailed, + CaptchaError, + CaptchaTimeoutError, + CdpUnrecoverable, + CekiError, + ConnectionLost, + InsufficientFunds, + NotOwner, ProviderDisconnected, - ProviderNotVerified, - RateLimited, - SessionEndedError, -) -from .chat import ChatAPI -from .session import Session -from .transport_rtc import RTCTransport -from .types import ( - ChatMessage, - HtmlResult, - HumanActionResult, - NavigateResult, - QueryResult, - ScreenshotResult, - TypingEvent, + RateLimitExceeded, + SessionEnded, + SessionExpired, + SessionNotFound, ) +from ._models import BrowserOption, ChatMessage, Match, ReadReceipt, SessionInfo, Snapshot +from .humanize import HumanProfile +__version__ = "2.15.1" __all__ = [ + "connect", + "ConnectOptions", + "Client", "Browser", - "ChatAPI", - "ChatClient", - "HumanProfile", - "Humanizer", - "Session", - "RTCTransport", - "AuthError", - "CekiBrowserError", - "CommandTimeout", - "HumanActionDeclined", - "HumanActionTimeout", - "NavigationTimeout", - "ProviderDisconnected", - "ProviderNotVerified", - "NoMatchError", - "RateLimited", - "SessionEndedError", + "BrowserOption", + "Match", "ChatMessage", - "HtmlResult", - "HumanActionResult", - "NavigateResult", - "QueryResult", - "ScreenshotResult", - "TypingEvent", + "ReadReceipt", + "RateLimitExceeded", + "InsufficientFunds", + "SessionEnded", + "CdpUnrecoverable", + "AuthFailed", + "ConnectionLost", + "ProviderDisconnected", + "SessionNotFound", + "SessionExpired", + "NotOwner", + "SessionInfo", + "Snapshot", + "BrowserProfile", + "CekiError", + "HumanProfile", + "CaptchaResult", + "CaptchaError", + "CaptchaTimeoutError", ] - -__version__ = "0.3.0" diff --git a/ceki_browser/_browser.py b/ceki_browser/_browser.py new file mode 100644 index 0000000..2db2a0f --- /dev/null +++ b/ceki_browser/_browser.py @@ -0,0 +1,646 @@ +from __future__ import annotations + +import asyncio +import base64 +import json +import logging +import mimetypes +import os +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import TYPE_CHECKING, Any, Awaitable, Callable, Coroutine, Literal, cast + +import httpx + +from .humanize import HumanProfile, Humanizer + +if TYPE_CHECKING: + from ._client import Client +from ._captcha import CaptchaResult +from ._exceptions import ( + CaptchaTimeoutError, + CdpUnrecoverable, + InsufficientFunds, + ProviderDisconnected, + RateLimitExceeded, + SessionEnded, +) +from ._models import Match, Snapshot + +log = logging.getLogger(__name__) + +EventCallback = Callable[[str, dict[str, Any]], Awaitable[None]] +TabOpenedCallback = Callable[[str], Awaitable[None]] +SimpleCallback = Callable[[], Awaitable[None]] +UserEventCallback = Callable[[list[dict[str, Any]]], Awaitable[None]] + +_ERROR_TERMINAL = {-1011, -1012, -1015, -1018} + + +def _resolve_human(human) -> Humanizer | None: + if os.environ.get("CEKI_HUMAN_DISABLE", "").lower() in ("1", "true", "yes"): + return None + if human is None: + return None + if isinstance(human, HumanProfile): + return Humanizer(human) + if isinstance(human, dict): + return Humanizer(HumanProfile.from_dict(human)) + if isinstance(human, (str, Path)): + s = str(human) + if s in ("natural", "careful"): + return Humanizer(HumanProfile.load_preset(s)) + return Humanizer(HumanProfile.load(s)) + raise ValueError(f"Invalid human profile: {human!r}") + + +class Browser: + def __init__(self, client: "Client", match: Match, *, human="natural") -> None: + self._client = client + self._match = match + self._cdp_counter = 0 + self._pending_cdp: dict[int, asyncio.Future[Any]] = {} + self._event_callbacks: list[EventCallback] = [] + self._tab_opened_callbacks: list[TabOpenedCallback] = [] + self._provider_disconnected_callbacks: list[SimpleCallback] = [] + self._provider_reconnected_callbacks: list[SimpleCallback] = [] + self._user_event_callbacks: list[UserEventCallback] = [] + self._ended = asyncio.Event() + self._ended_reason: str | None = None + + from ._chat import BrowserChat + from ._profile import BrowserProfile + + self.chat = BrowserChat(self) + self.profile = BrowserProfile(self) + + env_profile = os.environ.get("CEKI_HUMAN_PROFILE") + env_path = os.environ.get("CEKI_HUMAN_PROFILE_PATH") + if human == "natural" and env_profile: + human = env_profile + elif human == "natural" and env_path: + human = env_path + self._humanizer = _resolve_human(human) + self._last_pointer: tuple[int, int] | None = None + self._last_seen_ts: str | None = None + + @property + def session_id(self) -> str: + return self._match.session_id + + @property + def browser_id(self) -> int: + return self._match.schedule_id + + @property + def schedule_id(self) -> int: + return self._match.schedule_id + + @property + def chat_topic_id(self) -> str | None: + return self._match.chat_topic_id + + @property + def browser_info(self) -> dict[str, Any]: + return self._match.browser_info + + @property + def provider_user_id(self) -> int | None: + return self._match.provider_user_id + + async def send(self, cdp: dict[str, Any], *, timeout: float = 60.0) -> dict[str, Any]: + if self._ended.is_set(): + raise SessionEnded(self._ended_reason or "ended") + cdp_id = self._cdp_counter + self._cdp_counter += 1 + loop = asyncio.get_event_loop() + fut: asyncio.Future[Any] = loop.create_future() + self._pending_cdp[cdp_id] = fut + try: + await self._client._ws_send( + { + "type": "cdp", + "session_id": self.session_id, + "id": cdp_id, + "method": cdp["method"], + "params": cdp.get("params", {}), + } + ) + result = await asyncio.wait_for(asyncio.shield(fut), timeout=timeout) + return result + finally: + self._pending_cdp.pop(cdp_id, None) + + def on_event(self, callback: EventCallback) -> None: + self._event_callbacks.append(callback) + + def on_tab_opened(self, callback: TabOpenedCallback) -> None: + self._tab_opened_callbacks.append(callback) + + def on_provider_disconnected(self, callback: SimpleCallback) -> None: + self._provider_disconnected_callbacks.append(callback) + + def on_provider_reconnected(self, callback: SimpleCallback) -> None: + self._provider_reconnected_callbacks.append(callback) + + def on_user_event(self, callback: UserEventCallback) -> None: + self._user_event_callbacks.append(callback) + + async def switch_tab(self) -> None: + await self._client._ws_send({"type": "switch_tab", "session_id": self.session_id}) + + async def configure(self, *, masking_mode: str | None = None, **kwargs: Any) -> None: + payload: dict[str, Any] = {"type": "session.configure", "session_id": self.session_id} + if masking_mode is not None: + payload["masking_mode"] = masking_mode + payload.update(kwargs) + await self._client._ws_send(payload) + + async def close(self, *, timeout: float = 10.0) -> None: + if self._ended.is_set(): + return + try: + await self._client._ws_send( + {"type": "session.end", "session_id": self.session_id, "reason": "user_stop"} + ) + await asyncio.wait_for(self._ended.wait(), timeout=timeout) + except asyncio.TimeoutError: + for fut in self._pending_cdp.values(): + if not fut.done(): + fut.cancel() + self._pending_cdp.clear() + self._ended.set() + self._ended_reason = "user_stop" + finally: + self._client._active_browsers.pop(self.session_id, None) + + async def release(self, *, timeout: float = 10.0) -> None: + """Alias for :meth:`close` — завершить аренду браузера.""" + await self.close(timeout=timeout) + + async def wait_until_ended(self) -> str: + await self._ended.wait() + return self._ended_reason or "unknown" + + # ────────────────────────────────────────────────────────────────────────── + # High-level browser actions (with optional human-like timing) + # ────────────────────────────────────────────────────────────────────────── + + async def navigate(self, url: str, *, timeout: float = 30.0) -> dict: + if self._humanizer: + await self._humanizer.before("navigate") + result = await self.send({"method": "Page.navigate", "params": {"url": url}}, timeout=timeout) + if self._humanizer: + await self._humanizer.after("navigate") + return result + + async def click(self, x: int | float, y: int | float) -> None: + if self._humanizer: + await self._humanizer.before("click") + await self.send({"method": "Input.dispatchMouseEvent", "params": { + "type": "mousePressed", "x": int(x), "y": int(y), "button": "left", "clickCount": 1, + }}) + await self.send({"method": "Input.dispatchMouseEvent", "params": { + "type": "mouseReleased", "x": int(x), "y": int(y), "button": "left", "clickCount": 1, + }}) + self._last_pointer = (int(x), int(y)) + if self._humanizer: + await self._humanizer.after("click") + + async def _send_keystroke(self, char: str) -> None: + from .humanize.keymap import keymap_for_char + mapping = keymap_for_char(char) + if mapping is None: + await self.send({"method": "Input.insertText", "params": {"text": char}}) + log.warning("Non-ASCII char %r: falling back to Input.insertText", char) + return + code, key, vk, needs_shift = mapping + if needs_shift: + await self.send({"method": "Input.dispatchKeyEvent", "params": { + "type": "keyDown", "key": "Shift", "code": "ShiftLeft", + "windowsVirtualKeyCode": 16, "nativeVirtualKeyCode": 16, + }}) + await self.send({"method": "Input.dispatchKeyEvent", "params": { + "type": "keyDown", "key": key, "code": code, + "text": char, "unmodifiedText": char.lower() if needs_shift else char, + "windowsVirtualKeyCode": vk, "nativeVirtualKeyCode": vk, + **({"modifiers": 8} if needs_shift else {}), + }}) + await self.send({"method": "Input.dispatchKeyEvent", "params": { + "type": "keyUp", "key": key, "code": code, + "windowsVirtualKeyCode": vk, "nativeVirtualKeyCode": vk, + **({"modifiers": 8} if needs_shift else {}), + }}) + if needs_shift: + await self.send({"method": "Input.dispatchKeyEvent", "params": { + "type": "keyUp", "key": "Shift", "code": "ShiftLeft", + "windowsVirtualKeyCode": 16, "nativeVirtualKeyCode": 16, + }}) + + async def type(self, text: str) -> None: + if self._humanizer: + if self._last_pointer is not None: + await self.click(*self._last_pointer) + else: + log.debug("type() called with humanizer but no last_pointer; falling back to plain insertText") + await self._humanizer.before("type") + async for char, delay_ms in self._humanizer.humanize_text(text): + await self._send_keystroke(char) + if delay_ms > 0: + await asyncio.sleep(delay_ms / 1000) + await self._humanizer.after("type") + else: + for char in text: + await self._send_keystroke(char) + + async def scroll( + self, x: int = 0, y: int = 0, *, delta_x: int = 0, delta_y: int = -300 + ) -> None: + if self._humanizer: + await self._humanizer.before("scroll") + await self.send({"method": "Input.dispatchMouseEvent", "params": { + "type": "mouseWheel", "x": x, "y": y, "deltaX": delta_x, "deltaY": delta_y, + }}) + self._last_pointer = (int(x), int(y)) + if self._humanizer: + await self._humanizer.after("scroll") + + async def screenshot( + self, + *, + format: Literal["base64", "png"] = "base64", + full_page: bool = False, + ) -> dict | bytes: + """Take a screenshot. + + Args: + format: ``"base64"`` (default) returns CDP-shape dict, ``"png"`` returns raw PNG bytes. + full_page: If True, capture the entire scrollable page, not just the viewport. + """ + if format not in ("base64", "png"): + raise ValueError(f"Unsupported format: {format!r}. Use 'base64' or 'png'.") + if self._humanizer: + await self._humanizer.before("screenshot") + + params: dict[str, Any] = {} + if full_page: + metrics = await self.send({"method": "Page.getLayoutMetrics"}) + content = metrics.get("contentSize", {}) + width = int(content.get("width", 0)) + height = int(content.get("height", 0)) + MAX_HEIGHT = 16384 + if height > MAX_HEIGHT: + log.warning("full_page screenshot height=%d clamped to %d", height, MAX_HEIGHT) + height = MAX_HEIGHT + params["captureBeyondViewport"] = True + params["clip"] = {"x": 0, "y": 0, "width": width, "height": height, "scale": 1} + + resp = await self.send({"method": "Page.captureScreenshot", "params": params}) + if self._humanizer: + await self._humanizer.after("screenshot") + if format == "base64": + return resp + import base64 as _b64 + data = resp.get("data", "") + return _b64.b64decode(data) if data else b"" + + async def snapshot(self) -> Snapshot: + from datetime import datetime, timezone + resp = await self.send({"method": "Page.captureScreenshot"}) + screenshot_b64 = resp.get("data", "") + all_msgs = await self.chat.history(since=self._last_seen_ts) + if self._last_seen_ts and all_msgs: + all_msgs = [m for m in all_msgs if m.created_at > self._last_seen_ts] + if all_msgs: + self._last_seen_ts = all_msgs[-1].created_at + return Snapshot(screenshot=screenshot_b64, chat=all_msgs, ts=datetime.now(timezone.utc)) + + async def upload( + self, + selector: str, + source: str | Path | bytes, + *, + filename: str | None = None, + ) -> dict: + """Upload a file to an ```` element. + + Args: + selector: CSS selector for the file input element. + source: File path (str/Path) or raw bytes. + filename: Override the filename (default: basename of path or ``upload.bin``). + + Returns: + ``{"ok": True, "filename": "...", "size": N}`` on success. + + Raises: + ValueError: If selector matches no element or element is not a file input. + """ + if isinstance(source, (str, Path)): + path = Path(source) + if not path.is_file(): + raise ValueError(f"file not found: {path}") + data = path.read_bytes() + if filename is None: + filename = path.name + elif isinstance(source, bytes): + data = source + if filename is None: + filename = "upload.bin" + else: + raise TypeError(f"source must be str, Path, or bytes, got {type(source).__name__}") + + mime_type, _ = mimetypes.guess_type(filename) + if mime_type is None: + mime_type = "application/octet-stream" + + b64_data = base64.b64encode(data).decode("ascii") + + js_selector = json.dumps(selector) + js_filename = json.dumps(filename) + js_mimetype = json.dumps(mime_type) + + js_expr = ( + "(function() {" + f"var input = document.querySelector({js_selector});" + "if (!input) return JSON.stringify({error: 'no input matched'});" + "if (input.type !== 'file') return JSON.stringify({error: 'element is not a file input'});" + f"var b64 = '{b64_data}';" + "var bin = atob(b64);" + "var bytes = new Uint8Array(bin.length);" + "for (var i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i);" + f"var file = new File([bytes], {js_filename}, {{type: {js_mimetype}}});" + "var dt = new DataTransfer();" + "dt.items.add(file);" + "input.files = dt.files;" + "input.dispatchEvent(new Event('change', {bubbles: true}));" + f"return JSON.stringify({{ok: true, filename: {js_filename}, size: bytes.length}});" + "})()" + ) + + resp = await self.send( + {"method": "Runtime.evaluate", "params": {"expression": js_expr, "returnByValue": True}} + ) + + value = resp.get("result", {}).get("value", "") + if isinstance(value, str): + parsed = json.loads(value) + else: + parsed = value + + if "error" in parsed: + raise ValueError(parsed["error"]) + + return parsed + + def set_human(self, profile) -> "HumanProfile | None": + prev = self._humanizer.profile if self._humanizer else None + self._humanizer = _resolve_human(profile) + return prev + + # ────────────────────────────────────────────────────────────────────────── + # Human action / captcha + # ────────────────────────────────────────────────────────────────────────── + + def _api_headers(self) -> dict[str, str]: + headers: dict[str, str] = {"Authorization": f"Bearer {self._client.api_key}"} + if self._client._basic_auth: + creds = base64.b64encode( + f"{self._client._basic_auth[0]}:{self._client._basic_auth[1]}".encode() + ).decode() + headers["X-Basic-Auth"] = f"Basic {creds}" + return headers + + async def request_captcha( + self, + acceptance_timeout: float = 60, + completion_timeout: float = 120, + auto_accept: bool = True, + ) -> CaptchaResult: + if acceptance_timeout < 30: + raise ValueError("acceptance_timeout must be >= 30 seconds") + if completion_timeout < 30: + raise ValueError("completion_timeout must be >= 30 seconds") + + acceptance_timeout = min(acceptance_timeout, 300) + completion_timeout = min(completion_timeout, 600) + + child_event_id = await self._create_captcha_event(acceptance_timeout, completion_timeout) + + queue: asyncio.Queue[dict[str, Any]] = asyncio.Queue() + self.chat._action_queues[child_event_id] = queue + + accepted = False + completion_deadline = datetime.now(timezone.utc) + timedelta(seconds=completion_timeout) + + try: + deadline_accept = asyncio.get_event_loop().time() + acceptance_timeout + while True: + remaining = deadline_accept - asyncio.get_event_loop().time() + if remaining <= 0: + raise asyncio.TimeoutError() + action = await asyncio.wait_for(queue.get(), timeout=remaining) + kind = action.get("kind", "") + data: dict[str, Any] = action.get("data") or {} + + if kind == "human_action_accepted": + accepted = True + break + if kind == "human_action_completed": + return await self._finish_captcha( + child_event_id, data, auto_accept, solved=True, + ) + if kind in ( + "human_action_failed", + "human_action_declined", + "human_action_withdrew", + ): + return CaptchaResult( + solved=False, + child_event_id=child_event_id, + cancel_reason=kind.replace("human_action_", ""), + browser=self, + ) + + remaining_completion = ( + completion_deadline - datetime.now(timezone.utc) + ).total_seconds() + while True: + if remaining_completion <= 0: + raise asyncio.TimeoutError() + action = await asyncio.wait_for( + queue.get(), timeout=remaining_completion, + ) + kind = action.get("kind", "") + data = action.get("data") or {} + + if kind == "human_action_completed": + return await self._finish_captcha( + child_event_id, data, auto_accept, solved=True, + ) + if kind in ("human_action_failed", "human_action_withdrew"): + return CaptchaResult( + solved=False, + child_event_id=child_event_id, + cancel_reason=kind.replace("human_action_", ""), + browser=self, + ) + remaining_completion = ( + completion_deadline - datetime.now(timezone.utc) + ).total_seconds() + + except asyncio.TimeoutError: + phase = "completion" if accepted else "acceptance" + await self._expire_captcha_event(child_event_id) + raise CaptchaTimeoutError(phase) from None + finally: + self.chat._action_queues.pop(child_event_id, None) + + async def _finish_captcha( + self, + child_event_id: int, + data: dict[str, Any], + auto_accept: bool, + *, + solved: bool, + ) -> CaptchaResult: + result = CaptchaResult( + solved=solved, + child_event_id=child_event_id, + proof_message_id=data.get("proof_message_id"), + correction_id=data.get("correction_id"), + browser=self, + ) + if auto_accept and solved and result.correction_id: + await asyncio.sleep(2) + await result.accept_work() + return result + + async def _create_captcha_event( + self, acceptance_timeout: float, completion_timeout: float, + ) -> int: + body = { + "acceptance_deadline_at": int(acceptance_timeout), + "completion_deadline_at": int(completion_timeout), + } + async with httpx.AsyncClient() as http: + resp = await http.post( + f"{self._client.api_url}/api/agent/sessions/{self._match.event_id}/captcha-request", + headers={**self._api_headers(), "Content-Type": "application/json"}, + json=body, + ) + resp.raise_for_status() + result = resp.json() + event_id = result.get("id") + if not event_id: + raise RuntimeError("captcha request did not return an id") + return int(event_id) + + async def _expire_captcha_event(self, child_event_id: int) -> None: + try: + async with httpx.AsyncClient() as http: + await http.patch( + f"{self._client.api_url}/api/agent/kal/event/{child_event_id}", + headers={**self._api_headers(), "Content-Type": "application/json"}, + json={"status_id": 777}, + ) + except Exception as exc: + log.warning("expire captcha event %d failed: %s", child_event_id, exc) + + # ────────────────────────────────────────────────────────────────────────── + # Internal dispatch (called from Client._reader_loop) + # ────────────────────────────────────────────────────────────────────────── + + async def _on_cdp_response(self, msg: dict[str, Any]) -> None: + cmd_id = msg.get("id") + if cmd_id is not None and cmd_id in self._pending_cdp: + fut = self._pending_cdp.pop(cmd_id) + if not fut.done(): + if msg.get("ok", True): + fut.set_result(msg.get("result", {})) + else: + err = msg.get("error", {}) + fut.set_exception(Exception(f"CDP error {err}")) + + async def _on_cdp_event(self, msg: dict[str, Any]) -> None: + method = msg.get("method", "") + params = msg.get("params", {}) + for cb in self._event_callbacks: + asyncio.create_task(cast(Coroutine, cb(method, params))) + + async def _on_tab_opened(self, msg: dict[str, Any]) -> None: + url = msg.get("url", "") + for cb in self._tab_opened_callbacks: + asyncio.create_task(cast(Coroutine, cb(url))) + + async def _on_session_ended(self, msg: dict[str, Any]) -> None: + reason = msg.get("reason", "completed") + self._ended_reason = reason + if reason == "provider_disconnected": + exc: Exception = ProviderDisconnected() + else: + exc = SessionEnded(reason) + for fut in self._pending_cdp.values(): + if not fut.done(): + fut.set_exception(exc) + self._pending_cdp.clear() + self._ended.set() + self._client._active_browsers.pop(self.session_id, None) + + async def _on_provider_disconnected(self, msg: dict[str, Any]) -> None: + for cb in self._provider_disconnected_callbacks: + asyncio.create_task(cast(Coroutine, cb())) + + async def _on_provider_reconnected(self, msg: dict[str, Any]) -> None: + for cb in self._provider_reconnected_callbacks: + asyncio.create_task(cast(Coroutine, cb())) + + async def _on_user_events(self, msg: dict[str, Any]) -> None: + events: list[dict[str, Any]] = msg.get("events", []) + for cb in self._user_event_callbacks: + asyncio.create_task(cast(Coroutine, cb(events))) + + async def _on_error(self, msg: dict[str, Any]) -> None: + code = msg.get("code", 0) + cmd_id = msg.get("id") + + if code == -1013: + exc: Exception = RateLimitExceeded(retry_after=float(msg.get("retry_after", 1.0))) + if cmd_id is not None and cmd_id in self._pending_cdp: + fut = self._pending_cdp.pop(cmd_id) + if not fut.done(): + fut.set_exception(exc) + return + + if code == -1050: + last_err = msg.get("last_error", msg.get("message", "cdp_error")) + exc = CdpUnrecoverable(last_error=str(last_err)) + if cmd_id is not None and cmd_id in self._pending_cdp: + fut = self._pending_cdp.pop(cmd_id) + if not fut.done(): + fut.set_exception(exc) + return + + if code == -1011: + reason = "heartbeat_timeout" + elif code == -1012: + reason = "insufficient_funds" + elif code == -1015: + reason = "provider_declined" + elif code == -1018: + reason = "killed" + else: + reason = msg.get("reason") or msg.get("message") or f"error_{code}" + + self._ended_reason = reason + terminal_exc: Exception + if code == -1012: + terminal_exc = InsufficientFunds() + else: + terminal_exc = SessionEnded(reason) + + for fut in self._pending_cdp.values(): + if not fut.done(): + fut.set_exception(terminal_exc) + self._pending_cdp.clear() + self._ended.set() + self._client._active_browsers.pop(self.session_id, None) diff --git a/ceki_browser/_captcha.py b/ceki_browser/_captcha.py new file mode 100644 index 0000000..e9fdb44 --- /dev/null +++ b/ceki_browser/_captcha.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +import httpx + +if TYPE_CHECKING: + from ._browser import Browser + +from ._exceptions import CaptchaError + +log = logging.getLogger(__name__) + + +class CaptchaResult: + __slots__ = ( + "solved", "proof_message_id", "cancel_reason", "child_event_id", + "correction_id", "_browser", "_voted", + ) + + def __init__( + self, + *, + solved: bool, + child_event_id: int, + proof_message_id: str | None = None, + cancel_reason: str | None = None, + correction_id: int | None = None, + browser: Browser | None = None, + ) -> None: + self.solved = solved + self.proof_message_id = proof_message_id + self.cancel_reason = cancel_reason + self.child_event_id = child_event_id + self.correction_id = correction_id + self._browser = browser + self._voted = False + + async def accept_work(self) -> None: + if self._voted: + return + if not self.correction_id: + raise CaptchaError("no correction_id — provider has not proposed completion") + if not self._browser: + raise CaptchaError("no browser reference") + self._voted = True + client = self._browser._client + headers = self._browser._api_headers() + async with httpx.AsyncClient() as http: + resp = await http.post( + f"{client.api_url}/api/agent/kal/event/{self.child_event_id}/vote", + headers=headers, + json={"ids": [self.correction_id], "vote": True}, + ) + if not resp.is_success: + log.warning("accept_work vote failed: %s", resp.status_code) + + async def reject_work(self, reason: str | None = None) -> None: + if self._voted: + return + if not self.correction_id: + raise CaptchaError("no correction_id — provider has not proposed completion") + if not self._browser: + raise CaptchaError("no browser reference") + self._voted = True + client = self._browser._client + headers = self._browser._api_headers() + body: dict[str, Any] = {"ids": [self.correction_id], "vote": False} + if reason: + body["reason"] = reason + async with httpx.AsyncClient() as http: + resp = await http.post( + f"{client.api_url}/api/agent/kal/event/{self.child_event_id}/vote", + headers=headers, + json=body, + ) + if not resp.is_success: + log.warning("reject_work vote failed: %s", resp.status_code) + + def to_dict(self) -> dict[str, Any]: + return { + "solved": self.solved, + "proof_message_id": self.proof_message_id, + "cancel_reason": self.cancel_reason, + "child_event_id": self.child_event_id, + "correction_id": self.correction_id, + } diff --git a/ceki_browser/_chat.py b/ceki_browser/_chat.py new file mode 100644 index 0000000..63d610c --- /dev/null +++ b/ceki_browser/_chat.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import asyncio +import base64 +import logging +import mimetypes +from pathlib import Path +from typing import TYPE_CHECKING, Awaitable, Callable, Coroutine, cast +from uuid import uuid4 + +import httpx + +from ._exceptions import ChatSendFailed +from ._models import ChatMessage, ReadReceipt + +if TYPE_CHECKING: + from ._browser import Browser + +log = logging.getLogger(__name__) + +MessageCallback = Callable[[ChatMessage], Awaitable[None]] +ReadCallback = Callable[[ReadReceipt], Awaitable[None]] + +MAX_IMAGE_BYTES = 5 * 1024 * 1024 + + +def _detect_mime(data: bytes) -> str: + if data[:4] == b"\x89PNG": + return "image/png" + if data[:3] == b"\xff\xd8\xff": + return "image/jpeg" + if data[:4] == b"RIFF" and data[8:12] == b"WEBP": + return "image/webp" + return "image/png" + + +class BrowserChat: + def __init__(self, browser: "Browser") -> None: + self._browser = browser + self._topic_id: str | None = browser.chat_topic_id + self._message_callbacks: list[MessageCallback] = [] + self._read_callbacks: list[ReadCallback] = [] + self._pending_sends: dict[str, asyncio.Future[dict]] = {} + self._action_queues: dict[int, asyncio.Queue[dict]] = {} + + async def send(self, text: str) -> dict: + if not self._topic_id: + raise RuntimeError("chat topic not assigned (rent did not return chat_topic_id)") + client_msg_id = uuid4().hex + loop = asyncio.get_event_loop() + fut: asyncio.Future[dict] = loop.create_future() + self._pending_sends[client_msg_id] = fut + try: + await self._browser._client._ws_send( + { + "type": "chat.send", + "session_id": self._browser.session_id, + "client_msg_id": client_msg_id, + "text": text, + } + ) + return await asyncio.wait_for(asyncio.shield(fut), timeout=15) + finally: + self._pending_sends.pop(client_msg_id, None) + + async def send_image( + self, + image: bytes | str | Path, + *, + mime: str | None = None, + filename: str | None = None, + ) -> dict: + if not self._topic_id: + raise RuntimeError("chat topic not assigned (rent did not return chat_topic_id)") + + if isinstance(image, (str, Path)): + path = Path(image) + data = path.read_bytes() + if mime is None: + guessed, _ = mimetypes.guess_type(str(path)) + mime = guessed or _detect_mime(data) + if filename is None: + filename = path.name + else: + data = image + if mime is None: + mime = _detect_mime(data) + if filename is None: + ext = {'image/png': 'png', 'image/jpeg': 'jpg', 'image/webp': 'webp'}.get(mime or '', 'bin') + filename = f'image-{uuid4().hex[:8]}.{ext}' + + if len(data) > MAX_IMAGE_BYTES: + raise ValueError(f"image too large, max 5MB (got {len(data)} bytes)") + + b64 = base64.b64encode(data).decode() + client_msg_id = uuid4().hex + loop = asyncio.get_event_loop() + fut: asyncio.Future[dict] = loop.create_future() + self._pending_sends[client_msg_id] = fut + try: + await self._browser._client._ws_send( + { + "type": "chat.send_image", + "session_id": self._browser.session_id, + "client_msg_id": client_msg_id, + "filename": filename, + "mime": mime, + "data_b64": b64, + } + ) + return await asyncio.wait_for(asyncio.shield(fut), timeout=15) + finally: + self._pending_sends.pop(client_msg_id, None) + + def on_message(self, callback: MessageCallback) -> None: + self._message_callbacks.append(callback) + + def on_read(self, callback: ReadCallback) -> None: + self._read_callbacks.append(callback) + + async def history( + self, + limit: int = 50, + before_id: int | None = None, + since: str | None = None, + ) -> list[ChatMessage]: + if not self._topic_id: + return [] + client = self._browser._client + base = client.chat_url.rstrip('/') + params: dict = {"topic_id": self._topic_id, "limit": limit} + if before_id is not None: + params["before"] = before_id + if since is not None: + params["since"] = since + req = httpx.Request( + "GET", + f"{base}/messages", + headers={"Authorization": f"Bearer {client.api_key}"}, + params=params, + ) + async with httpx.AsyncClient() as http: + resp = await http.send(req) + resp.raise_for_status() + data = resp.json() + items = data.get("messages", data.get("data", data)) if isinstance(data, dict) else data + return [ChatMessage.model_validate(m) for m in items] + + async def _on_message(self, payload: dict) -> None: + raw = payload.get("message") + if not isinstance(raw, dict): + log.warning("chat.message without nested 'message': %s", payload) + return + if raw.get("type") == "action" and isinstance(raw.get("action"), dict): + action = raw["action"] + eid = action.get("event_id") + if eid is not None: + queue = self._action_queues.get(int(eid)) + if queue is not None: + await queue.put(action) + try: + msg = ChatMessage.model_validate(raw) + except Exception as exc: + log.warning("invalid chat.message payload: %s", exc) + return + for cb in self._message_callbacks: + try: + asyncio.create_task(cast(Coroutine, cb(msg))) + except Exception as exc: + log.warning("chat on_message callback error: %s", exc) + + async def _on_read(self, payload: dict) -> None: + try: + receipt = ReadReceipt.model_validate(payload) + except Exception as exc: + log.warning("invalid chat.read payload: %s", exc) + return + for cb in self._read_callbacks: + try: + asyncio.create_task(cast(Coroutine, cb(receipt))) + except Exception as exc: + log.warning("chat on_read callback error: %s", exc) + + async def _on_send_ack(self, msg: dict) -> None: + client_msg_id = msg.get("client_msg_id", "") + if client_msg_id in self._pending_sends: + fut = self._pending_sends.pop(client_msg_id) + if not fut.done(): + fut.set_result( + { + "message_id": msg.get("message_id"), + "sent_at": msg.get("sent_at"), + } + ) + + async def _on_send_error(self, msg: dict) -> None: + client_msg_id = msg.get("client_msg_id", "") + status = msg.get("status", 0) + message = msg.get("message", "unknown") + if client_msg_id and client_msg_id in self._pending_sends: + fut = self._pending_sends.pop(client_msg_id) + if not fut.done(): + fut.set_exception(ChatSendFailed(status, message)) diff --git a/ceki_browser/_client.py b/ceki_browser/_client.py new file mode 100644 index 0000000..eb09a0b --- /dev/null +++ b/ceki_browser/_client.py @@ -0,0 +1,501 @@ +from __future__ import annotations + +import asyncio +import json +import logging +import time +from typing import Any + +import httpx +import websockets +import websockets.exceptions + +from ._browser import Browser +from ._exceptions import ( + AuthFailed, + CdpUnrecoverable, + CekiError, + ConnectionLost, + InsufficientFunds, + NotOwner, + RateLimitExceeded, + SessionEnded, + SessionExpired, + SessionNotFound, +) +from ._models import BrowserOption, Match + +log = logging.getLogger(__name__) + +BACKOFF_STEPS = [1, 2, 4, 8, 16, 32, 60] +MAX_RECONNECT_ATTEMPTS = 10 +HEARTBEAT_INTERVAL = 30.0 +HEARTBEAT_TIMEOUT = 90.0 + + +class Client: + def __init__( + self, + api_key: str, + relay_url: str, + api_url: str, + chat_url: str, + reconnect: bool = True, + basic_auth: tuple[str, str] | None = None, + ) -> None: + self.api_key = api_key + self.relay_url = relay_url + self.api_url = api_url + self.chat_url = chat_url + self.reconnect = reconnect + self._basic_auth = basic_auth + self._ws: websockets.WebSocketClientProtocol | None = None + self._heartbeat_task: asyncio.Task[None] | None = None + self._reader_task: asyncio.Task[None] | None = None + self._pending_rents: dict[str, asyncio.Future[Match]] = {} + self._pending_rent_queue: list[asyncio.Future[Match]] = [] + self._pending_resumes: dict[str, asyncio.Future[dict]] = {} + self._active_browsers: dict[str, Browser] = {} + self._backoff_attempt = 0 + self._last_pong = 0.0 + self._closed = False + self._stashed_first_frame: str | None = None + + def _ws_extra_headers(self) -> dict[str, str]: + if not self._basic_auth: + return {} + import base64 + creds = base64.b64encode( + f"{self._basic_auth[0]}:{self._basic_auth[1]}".encode() + ).decode() + return {"Authorization": f"Basic {creds}"} + + async def _connect(self) -> None: + subprotocols = [f"bearer.{self.api_key}"] + extra_headers = self._ws_extra_headers() + try: + self._ws = await websockets.connect( + self.relay_url, + subprotocols=subprotocols, # type: ignore[arg-type] + extra_headers=extra_headers, + open_timeout=20, + ) + except websockets.exceptions.InvalidStatusCode as exc: + if exc.status_code in (401, 403): + raise AuthFailed(f"handshake rejected: {exc.status_code}") from exc + if exc.status_code == 429: + retry_after = 0 + try: + retry_after = int(exc.response_headers.get('Retry-After', 0)) + except (AttributeError, ValueError, TypeError): + pass + raise RateLimitExceeded(retry_after) from exc + raise + # Probe for immediate close (4401/4403 post-handshake auth rejection) + try: + first = await asyncio.wait_for(self._ws.recv(), timeout=1.0) + self._stashed_first_frame = first if isinstance(first, str) else first.decode() + except asyncio.TimeoutError: + self._stashed_first_frame = None + except websockets.exceptions.ConnectionClosedError as exc: + if exc.rcvd and exc.rcvd.code in (4401, 4403): + raise AuthFailed(f"ws closed with code {exc.rcvd.code}: {exc.rcvd.reason or 'auth_failed'}") from exc + raise + + self._last_pong = time.monotonic() + self._heartbeat_task = asyncio.create_task(self._heartbeat_loop(), name="heartbeat") + self._reader_task = asyncio.create_task(self._reader_loop(), name="reader") + log.info("connected to relay %s", self.relay_url) + + # ────────────────────────────────────────────────────────────────────────── + # Public API + # ────────────────────────────────────────────────────────────────────────── + + async def search( + self, filters: dict[str, Any] | None = None, limit: int = 20 + ) -> list[BrowserOption]: + url = f"{self.api_url}/api/browsers/search" + params: dict[str, Any] = {"limit": limit, **(filters or {})} + async with httpx.AsyncClient() as http: + resp = await http.get( + url, + headers={"Authorization": f"Bearer {self.api_key}"}, + params=params, + ) + resp.raise_for_status() + data = resp.json() + items = data.get("data", data) if isinstance(data, dict) else data + return [BrowserOption.model_validate(x) for x in items] + + async def list_sessions( + self, *, active: bool = True, limit: int = 50, + ) -> list["SessionInfo"]: + from ._models import SessionInfo + url = f"{self.api_url}/api/agent/sessions" + headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"} + if self._basic_auth: + import base64 + creds = base64.b64encode(f"{self._basic_auth[0]}:{self._basic_auth[1]}".encode()).decode() + headers["X-Basic-Auth"] = f"Basic {creds}" + async with httpx.AsyncClient() as http: + resp = await http.get( + url, headers=headers, + params={"active": "1" if active else "0", "limit": limit}, + ) + resp.raise_for_status() + data = resp.json() + items = data.get("data", data) if isinstance(data, dict) else data + return [SessionInfo.model_validate(x) for x in items] + + async def my_browsers(self) -> list[BrowserOption]: + url = f"{self.api_url}/api/agent/browsers" + headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"} + if self._basic_auth: + import base64 + creds = base64.b64encode(f"{self._basic_auth[0]}:{self._basic_auth[1]}".encode()).decode() + headers["X-Basic-Auth"] = f"Basic {creds}" + async with httpx.AsyncClient() as http: + resp = await http.get(url, headers=headers) + resp.raise_for_status() + data = resp.json() + items = data.get("browsers", data.get("data", data)) if isinstance(data, dict) else data + return [BrowserOption.model_validate(x) for x in items] + + async def rent( + self, + schedule_id: int, + *, + mode: str = "incognito", + human="natural", + masking_mode: bool = True, + fingerprint: bool | dict | None = True, + ) -> Browser: + if mode not in ("incognito", "main"): + raise ValueError(f"mode must be 'incognito' or 'main', got {mode!r}") + fut: asyncio.Future[Match] = asyncio.get_event_loop().create_future() + self._pending_rent_queue.append(fut) + msg: dict = {"type": "rent", "browser_id": schedule_id} + if mode != "incognito": + msg["mode"] = mode + await self._ws_send(msg) + try: + match = await asyncio.wait_for(fut, timeout=90) + except asyncio.TimeoutError: + try: + self._pending_rent_queue.remove(fut) + except ValueError: + pass + raise TimeoutError("rent timed out waiting for match") + browser = Browser(client=self, match=match, human=human) + self._active_browsers[match.session_id] = browser + if not masking_mode: + await browser.configure(masking_mode=False) + if isinstance(fingerprint, dict): + await browser.configure(fingerprint=fingerprint) + elif fingerprint is False or fingerprint is None: + await browser.configure(fingerprint=False) + return browser + + async def resume(self, session_id: str, *, human="natural") -> Browser: + fut: asyncio.Future[dict] = asyncio.get_event_loop().create_future() + self._pending_resumes[session_id] = fut + await self._ws_send({"type": "resume", "session_id": session_id}) + try: + resp = await asyncio.wait_for(fut, timeout=10) + except asyncio.TimeoutError: + self._pending_resumes.pop(session_id, None) + raise TimeoutError("resume timed out") + match = Match.model_validate(resp) + browser = Browser(client=self, match=match, human=human) + self._active_browsers[match.session_id] = browser + return browser + + async def close(self) -> None: + self._closed = True + for browser in list(self._active_browsers.values()): + await browser.close() + if self._heartbeat_task and not self._heartbeat_task.done(): + self._heartbeat_task.cancel() + if self._reader_task and not self._reader_task.done(): + self._reader_task.cancel() + if self._ws: + await self._ws.close() + self._ws = None + + async def disconnect(self) -> None: + """Close the WS without ending active sessions (for resume-pattern).""" + self._closed = True + self._active_browsers.clear() + if self._heartbeat_task and not self._heartbeat_task.done(): + self._heartbeat_task.cancel() + if self._reader_task and not self._reader_task.done(): + self._reader_task.cancel() + if self._ws: + await self._ws.close() + self._ws = None + + # ────────────────────────────────────────────────────────────────────────── + # Internal helpers + # ────────────────────────────────────────────────────────────────────────── + + async def _ws_send(self, msg: dict[str, Any]) -> None: + if self._ws is None: + raise ConnectionLost("no websocket connection") + await self._ws.send(json.dumps(msg)) + + async def _heartbeat_loop(self) -> None: + while not self._closed: + await asyncio.sleep(HEARTBEAT_INTERVAL) + if self._closed: + break + try: + await self._ws_send({"type": "ping"}) + except Exception: + break + if time.monotonic() - self._last_pong > HEARTBEAT_TIMEOUT: + log.warning("heartbeat timeout, forcing reconnect") + if self._ws: + await self._ws.close() + break + + async def _reader_loop(self) -> None: + if self._stashed_first_frame is not None: + try: + msg: dict[str, Any] = json.loads(self._stashed_first_frame) + self._stashed_first_frame = None + await self._dispatch(msg) + except Exception as exc: + log.error("error dispatching stashed frame: %s", exc) + while not self._closed: + try: + assert self._ws is not None + raw = await self._ws.recv() + msg = json.loads(raw) + await self._dispatch(msg) + except websockets.exceptions.ConnectionClosedError as exc: + if exc.rcvd and exc.rcvd.code in (4401, 4403): + # Server rejected auth post-handshake + self._closed = True + for fut in list(self._pending_rent_queue): + if not fut.done(): + fut.set_exception(AuthFailed(f"ws closed with code {exc.rcvd.code}: {exc.rcvd.reason}")) + self._pending_rent_queue.clear() + return + if not self._closed and self.reconnect: + asyncio.create_task(self._reconnect_loop()) + else: + self._fail_pending(ConnectionLost("connection closed")) + break + except ( + websockets.exceptions.ConnectionClosed, + websockets.exceptions.ConnectionClosedOK, + ): + if not self._closed and self.reconnect: + asyncio.create_task(self._reconnect_loop()) + else: + self._fail_pending(ConnectionLost("connection closed")) + break + except asyncio.CancelledError: + break + except Exception as exc: + log.error("reader error: %s", exc) + + async def _dispatch(self, msg: dict[str, Any]) -> None: + mtype = msg.get("type") + if mtype == "pong": + self._last_pong = time.monotonic() + return + if mtype == "rent_pending": + server_event_id = msg.get("event_id") + if self._pending_rent_queue and server_event_id: + fut = self._pending_rent_queue.pop(0) + if not fut.done(): + self._pending_rents[str(server_event_id)] = fut + return + if mtype == "rent.error": + code = msg.get("code", "") + message = msg.get("message", "rent failed") + server_event_id = str(msg.get("event_id", "")) if msg.get("event_id") is not None else None + from ._exceptions import ProviderOffline + exc_to_raise: Exception = ProviderOffline(message) if code == "provider_offline" else CekiError(message) + fut: asyncio.Future[Match] | None = None + if server_event_id: + fut = self._pending_rents.pop(server_event_id, None) + if fut is None and self._pending_rent_queue: + fut = self._pending_rent_queue.pop(0) + if fut and not fut.done(): + fut.set_exception(exc_to_raise) + return + if mtype == "match": + if msg.get("requires_ack"): + session_id = msg.get("session_id", "") + try: + await self._ws_send({"type": "match_ack", "session_id": session_id}) + except Exception: + pass + server_event_id = str(msg.get("event_id", "")) + fut = self._pending_rents.pop(server_event_id, None) + if fut and not fut.done(): + fut.set_result(Match.model_validate(msg)) + return + if mtype == "resume_ok": + sid = msg.get("session_id", "") + fut = self._pending_resumes.pop(sid, None) + if fut and not fut.done(): + fut.set_result(msg) + return + if mtype == "resume_failed": + sid = msg.get("session_id", "") + reason = msg.get("reason", "unknown") + fut = self._pending_resumes.pop(sid, None) + exc: Exception + if reason == "not_owner": + exc = NotOwner(f"session {sid}: not owner") + elif reason == "expired": + exc = SessionExpired(f"session {sid}: expired") + else: + exc = SessionNotFound(f"session {sid}: {reason}") + if fut and not fut.done(): + fut.set_exception(exc) + return + if mtype == "cdp_response": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser._on_cdp_response(msg) + return + if mtype == "cdp_event": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser._on_cdp_event(msg) + return + if mtype == "tab_opened": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser._on_tab_opened(msg) + return + if mtype in ("session.ended", "session_end"): + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser._on_session_ended(msg) + return + if mtype == "session.provider_disconnected": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser._on_provider_disconnected(msg) + return + if mtype == "session.provider_reconnected": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser._on_provider_reconnected(msg) + return + if mtype == "user_events": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser._on_user_events(msg) + return + if mtype == "chat.message": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser.chat._on_message(msg.get("payload", msg)) + return + if mtype == "chat.read": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser.chat._on_read(msg.get("payload", msg)) + return + if mtype == "chat.send_ack": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + await browser.chat._on_send_ack(msg) + return + if mtype == "chat.error": + session_id = msg.get("session_id", "") + browser = self._active_browsers.get(session_id) + if browser: + asyncio.create_task(browser.chat._on_send_error(msg)) + return + if mtype == "error": + session_id = msg.get("session_id") + if session_id and session_id in self._active_browsers: + await self._active_browsers[session_id]._on_error(msg) + else: + self._handle_error(msg) + return + + def _handle_error(self, msg: dict[str, Any]) -> None: + code = msg.get("code", 0) + server_event_id = msg.get("event_id") + msg_text = msg.get("reason") or msg.get("message") + if code == -1013: + exc: Exception = RateLimitExceeded(retry_after=float(msg.get("retry_after", 1.0))) + elif code == -1012: + exc = InsufficientFunds() + elif code in (-1011, -1018): + exc = SessionEnded(reason=msg_text or "ended") + elif code == -1015: + from ._exceptions import ProviderOffline + exc = ProviderOffline(msg_text or "no_providers") + elif code == -1050: + exc = CdpUnrecoverable(last_error=msg_text or "cdp_error") + else: + exc = CekiError(f"relay error {code}: {msg_text}") + + if server_event_id: + fut = self._pending_rents.pop(str(server_event_id), None) + if fut and not fut.done(): + fut.set_exception(exc) + return + + # Early error before rent_pending (e.g. -1014 rent failed, -1013 rate limit) + if self._pending_rent_queue: + fut = self._pending_rent_queue.pop(0) + if not fut.done(): + fut.set_exception(exc) + return + + log.error("unhandled relay error: %s", msg) + + def _fail_pending(self, exc: Exception) -> None: + for fut in list(self._pending_rent_queue): + if not fut.done(): + fut.set_exception(exc) + self._pending_rent_queue.clear() + for fut in list(self._pending_rents.values()): + if not fut.done(): + fut.set_exception(exc) + self._pending_rents.clear() + + async def _reconnect_loop(self) -> None: + for attempt in range(MAX_RECONNECT_ATTEMPTS): + delay = BACKOFF_STEPS[min(attempt, len(BACKOFF_STEPS) - 1)] + log.info("reconnect attempt %d in %ds", attempt + 1, delay) + await asyncio.sleep(delay) + try: + self._ws = await websockets.connect( + self.relay_url, + subprotocols=[f"bearer.{self.api_key}"], # type: ignore[arg-type,list-item] + extra_headers=self._ws_extra_headers(), + open_timeout=20, + ) + self._last_pong = time.monotonic() + self._heartbeat_task = asyncio.create_task( + self._heartbeat_loop(), name="heartbeat" + ) + self._reader_task = asyncio.create_task(self._reader_loop(), name="reader") + log.info("reconnected successfully") + return + except Exception as exc: + log.warning("reconnect attempt %d failed: %s", attempt + 1, exc) + + log.error("max reconnect attempts reached") + self._fail_pending(ConnectionLost("max reconnect attempts reached")) diff --git a/ceki_browser/_config.py b/ceki_browser/_config.py new file mode 100644 index 0000000..1b1c791 --- /dev/null +++ b/ceki_browser/_config.py @@ -0,0 +1,19 @@ +import os + +DEFAULT_API_URL = "https://api.ceki.me" +DEFAULT_RELAY_URL = "wss://browser.ceki.me/ws/agent" + + +def default_api_url() -> str: + return os.getenv("CEKI_API_URL") or DEFAULT_API_URL + + +def default_relay_url() -> str: + return os.getenv("CEKI_RELAY_URL") or DEFAULT_RELAY_URL + + +DEFAULT_CHAT_URL = "https://chat.ceki.me/api/chat" + + +def default_chat_url() -> str: + return os.getenv("CEKI_CHAT_URL") or DEFAULT_CHAT_URL diff --git a/ceki_browser/_connect.py b/ceki_browser/_connect.py new file mode 100644 index 0000000..0f92b67 --- /dev/null +++ b/ceki_browser/_connect.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from ._client import Client +from ._config import default_api_url, default_chat_url, default_relay_url + + +@dataclass +class ConnectOptions: + api_url: str | None = None + relay_url: str | None = None + chat_url: str | None = None + basic_auth: tuple[str, str] | None = None + reconnect: bool = True + + +async def connect(api_key: str, options: ConnectOptions | None = None) -> Client: + options = options or ConnectOptions() + relay_url = options.relay_url or default_relay_url() + api_url = options.api_url or default_api_url() + chat_url = options.chat_url or default_chat_url() + client = Client( + api_key=api_key, + relay_url=relay_url, + api_url=api_url, + chat_url=chat_url, + reconnect=options.reconnect, + basic_auth=options.basic_auth, + ) + await client._connect() + return client diff --git a/ceki_browser/_exceptions.py b/ceki_browser/_exceptions.py new file mode 100644 index 0000000..7dfb30d --- /dev/null +++ b/ceki_browser/_exceptions.py @@ -0,0 +1,70 @@ +class CekiError(Exception): + pass + + +class AuthFailed(CekiError): + pass + + +class RateLimitExceeded(CekiError): + def __init__(self, retry_after: float = 1.0, message: str = "rate_limit"): + super().__init__(message) + self.retry_after = retry_after + + +class InsufficientFunds(CekiError): + pass + + +class SessionEnded(CekiError): + def __init__(self, reason: str): + super().__init__(reason) + self.reason = reason + + +class CdpUnrecoverable(CekiError): + def __init__(self, last_error: str): + super().__init__(last_error) + self.last_error = last_error + + +class ConnectionLost(CekiError): + pass + + +class ChatSendFailed(CekiError): + def __init__(self, status: int, message: str): + super().__init__(f"chat send failed [{status}]: {message}") + self.status = status + self.message_text = message + + +class ProviderOffline(CekiError): + pass + + +class ProviderDisconnected(CekiError): + """Provider's browser disconnected during rental and didn't reconnect within grace period.""" + pass + + +class SessionNotFound(CekiError): + pass + + +class SessionExpired(SessionNotFound): + pass + + +class NotOwner(CekiError): + pass + + +class CaptchaError(CekiError): + pass + + +class CaptchaTimeoutError(CaptchaError): + def __init__(self, phase: str): + super().__init__(f"captcha timeout: {phase}") + self.phase = phase diff --git a/ceki_browser/_models.py b/ceki_browser/_models.py new file mode 100644 index 0000000..29c081a --- /dev/null +++ b/ceki_browser/_models.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class BrowserOption(BaseModel): + model_config = ConfigDict(extra="ignore") + + schedule_id: int + user_id: int | None = None + geo: str | None = None + language: str | None = None + languages: list[str] = [] + domain_allowed: list[str] | None = None + skills: list[str] = [] + price_per_min: float + rating: float | None = None + online: bool = True + currency: str | None = None + kal_id: int | None = None + profile_mode: Literal['main', 'incognito'] | None = None + allowed_domains: list[str] | None = None + + +class Match(BaseModel): + model_config = ConfigDict(extra='ignore') + + session_id: str + schedule_id: int + event_id: str | None = None + chat_topic_id: str | None = None + provider_user_id: int | None = None + started_at: float = 0.0 + browser_info: dict = {} + + +class ChatMessage(BaseModel): + model_config = ConfigDict(populate_by_name=True, extra='ignore') + + id: str = Field(alias='_id') + topic_id: str + sender_id: int | None = None + text: str | None = None + media: list[dict] | None = None + type: str = 'text' + created_at: str + edited_at: str | None = None + deleted_at: str | None = None + action: dict | None = None + + def is_system(self) -> bool: + return self.type == 'system' + + def is_from_provider(self, provider_user_id: int | None) -> bool: + return provider_user_id is not None and self.sender_id == provider_user_id + + +class ReadReceipt(BaseModel): + model_config = ConfigDict(extra='ignore') + + topic_id: str + last_read_message_id: str + read_at: float = 0.0 + + +class SessionInfo(BaseModel): + model_config = ConfigDict(extra="ignore") + + id: int + schedule_id: int + started_at: datetime | None = None + ended_at: datetime | None = None + status: str = "active" + duration: int = 0 + earned: float = 0.0 + price_per_min: float = 0.0 + renter: dict = {} + provider: dict = {} + data: dict = {} + + +class Snapshot(BaseModel): + screenshot: str + chat: list[ChatMessage] = [] + ts: datetime diff --git a/ceki_browser/_profile.py b/ceki_browser/_profile.py new file mode 100644 index 0000000..fffedd3 --- /dev/null +++ b/ceki_browser/_profile.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import json +import logging +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from ._browser import Browser + +log = logging.getLogger(__name__) + +SUPPORTED_SCHEMA_VERSIONS = {1, 2} + + +class BrowserProfile: + """Sugar layer for cookies + localStorage + sessionStorage + fingerprint snapshot/restore. + + Profile data stays agent-side. Server doesn't store it. Provider sees plaintext + only during the active session (same as without profile). + """ + + SCHEMA_VERSION = 2 + + def __init__(self, browser: "Browser") -> None: + self._browser = browser + + async def export( + self, + *, + domains: list[str] | None = None, + include_session_storage: bool = True, + ) -> dict[str, Any]: + """Export current session state (cookies + localStorage + sessionStorage + fingerprint). + + domains: filter cookies by domain (e.g., ['.reddit.com', 'reddit.com']). + None = all cookies. localStorage/sessionStorage exported only + for the currently-loaded origin (CDP limitation). + include_session_storage: set False to skip sessionStorage (e.g., to avoid + capturing tab-transient state). + """ + try: + fp_resp = await self._browser.send({"method": "Browser.getFingerprint"}) + fingerprint = fp_resp.get("fingerprint") + except Exception: + log.warning("profile.export: Browser.getFingerprint not available (extension too old?)") + fingerprint = None + + cookies_resp = await self._browser.send({"method": "Network.getCookies"}) + cookies = cookies_resp.get("cookies", []) + if domains is not None: + allowed = set(domains) + cookies = [c for c in cookies if c.get("domain") in allowed] + + local_storage = await self._eval_json("localStorage") + session_storage: dict[str, str] = {} + if include_session_storage: + session_storage = await self._eval_json("sessionStorage") + + origin_resp = await self._browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": "location.origin", "returnByValue": True}, + }) + origin = origin_resp.get("result", {}).get("value") + + return { + "schema_version": self.SCHEMA_VERSION, + "fingerprint": fingerprint, + "origin": origin, + "cookies": cookies, + "localStorage": local_storage, + "sessionStorage": session_storage, + } + + async def import_(self, profile: dict[str, Any]) -> None: + """Restore cookies + storage into the current session. + + Fingerprint is NOT applied here — it must be passed to client.rent(fingerprint=...) + before the session starts. This method only restores cookies + storage. + + Cookies can be set before first navigation (they are domain-scoped). + localStorage/sessionStorage require a document context — navigate to the + target origin first, then call import_(). + """ + version = profile.get("schema_version", 1) + if version not in SUPPORTED_SCHEMA_VERSIONS: + raise ValueError( + f"unsupported profile schema_version={version}, expected one of {SUPPORTED_SCHEMA_VERSIONS}" + ) + + cookies = profile.get("cookies", []) + if cookies: + await self._browser.send({ + "method": "Network.setCookies", + "params": {"cookies": cookies}, + }) + + local_storage = profile.get("localStorage", {}) + if local_storage: + await self._browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": ( + f"Object.entries({json.dumps(local_storage)})" + f".forEach(([k,v]) => localStorage.setItem(k, v))" + ), + }, + }) + + session_storage = profile.get("sessionStorage", {}) + if session_storage: + await self._browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": ( + f"Object.entries({json.dumps(session_storage)})" + f".forEach(([k,v]) => sessionStorage.setItem(k, v))" + ), + }, + }) + + async def _eval_json(self, var: str) -> dict[str, str]: + """JSON-stringify a storage object, return parsed dict. Empty dict on opaque origin.""" + resp = await self._browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": f"JSON.stringify(Object.fromEntries(Object.entries({var})))", + "returnByValue": True, + }, + }) + raw = resp.get("result", {}).get("value") + if not raw: + return {} + try: + return json.loads(raw) + except (json.JSONDecodeError, TypeError): + log.warning("profile.export: failed to parse %s", var) + return {} diff --git a/ceki_browser/_state.py b/ceki_browser/_state.py new file mode 100644 index 0000000..e85a2d1 --- /dev/null +++ b/ceki_browser/_state.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +_STATE_DIR = Path.home() / ".ceki" / "sessions" + + +def _ensure_dir() -> None: + _STATE_DIR.mkdir(parents=True, exist_ok=True) + + +def load_session(sid: str) -> dict[str, Any] | None: + path = _STATE_DIR / f"{sid}.json" + if not path.exists(): + return None + return json.loads(path.read_text()) + + +def save_session(sid: str, data: dict[str, Any]) -> None: + _ensure_dir() + data["updated_at"] = datetime.now(timezone.utc).isoformat() + path = _STATE_DIR / f"{sid}.json" + path.write_text(json.dumps(data)) + + +def delete_session(sid: str) -> None: + path = _STATE_DIR / f"{sid}.json" + path.unlink(missing_ok=True) + + +def get_last_seen_ts(sid: str) -> str | None: + data = load_session(sid) + if data is None: + return None + return data.get("last_seen_ts") + + +def update_last_seen_ts(sid: str, ts: str) -> None: + data = load_session(sid) or {} + data["last_seen_ts"] = ts + save_session(sid, data) diff --git a/ceki_browser/chat.py b/ceki_browser/chat.py deleted file mode 100644 index 9bf061e..0000000 --- a/ceki_browser/chat.py +++ /dev/null @@ -1,170 +0,0 @@ -from __future__ import annotations - -import base64 -import logging -from pathlib import Path -from typing import Any, Callable - -from .transport import Transport -from .types import ChatMessage, TypingEvent, parse_chat_message - -logger = logging.getLogger("ceki_browser") - -Unsubscribe = Callable[[], None] - - -class ChatAPI: - def __init__(self, transport: Transport, session_id: str, topic_id: str | None): - self._transport = transport - self._session_id = session_id - self._topic_id = topic_id - self._message_handlers: list[Callable[[ChatMessage], None]] = [] - self._typing_handlers: list[Callable[[TypingEvent], None]] = [] - - @property - def topic_id(self) -> str | None: - return self._topic_id - - @property - def available(self) -> bool: - return self._topic_id is not None - - def _set_topic_id(self, topic_id: str) -> None: - self._topic_id = topic_id - - async def send(self, text: str) -> ChatMessage: - data = await self._transport.send( - "chat.send", - {"session_id": self._session_id, "type": "text", "content": text}, - timeout=15.0, - ) - result = data if isinstance(data, dict) else {} - return ChatMessage( - _id=result.get("message_id", ""), - topic_id=self._topic_id or "", - author_id=0, - author_name="", - type="text", - content=text, - media=None, - created_at=result.get("created_at", ""), - ) - - async def send_image( - self, - image: bytes | Path | str, - mime: str = "image/png", - ) -> ChatMessage: - if isinstance(image, (str, Path)): - path = Path(image) - raw = path.read_bytes() - ext = path.suffix.lower() - if ext in (".jpg", ".jpeg"): - mime = "image/jpeg" - elif ext == ".gif": - mime = "image/gif" - elif ext == ".webp": - mime = "image/webp" - else: - raw = image - - b64 = base64.b64encode(raw).decode("ascii") - name = f"image.{mime.split('/')[-1]}" - - data = await self._transport.send( - "chat.send", - { - "session_id": self._session_id, - "type": "image", - "content": "", - "media": {"data": b64, "mime": mime, "name": name}, - }, - timeout=30.0, - ) - result = data if isinstance(data, dict) else {} - return ChatMessage( - _id=result.get("message_id", ""), - topic_id=self._topic_id or "", - author_id=0, - author_name="", - type="image", - content="", - media=None, - created_at=result.get("created_at", ""), - ) - - async def history( - self, - before: str | None = None, - limit: int = 50, - ) -> list[ChatMessage]: - if not self._topic_id: - logger.warning("chat.history called without topic_id — returning empty") - return [] - - params: dict[str, Any] = {"session_id": self._session_id, "limit": limit} - if before: - params["before"] = before - - data = await self._transport.send("chat.history", params, timeout=15.0) - result = data if isinstance(data, dict) else {} - messages = result.get("messages", []) - return [parse_chat_message(m) for m in messages if isinstance(m, dict)] - - async def mark_read(self, last_message_id: str) -> None: - if not self._topic_id: - return - await self._transport.send( - "chat.read", - {"session_id": self._session_id, "last_message_id": last_message_id}, - timeout=10.0, - ) - - async def typing(self, is_typing: bool = True) -> None: - await self._transport.notify( - "chat.typing", - {"session_id": self._session_id, "is_typing": is_typing}, - ) - - def on_message(self, handler: Callable[[ChatMessage], None]) -> Unsubscribe: - self._message_handlers.append(handler) - - def unsub() -> None: - try: - self._message_handlers.remove(handler) - except ValueError: - pass - - return unsub - - def on_typing(self, handler: Callable[[TypingEvent], None]) -> Unsubscribe: - self._typing_handlers.append(handler) - - def unsub() -> None: - try: - self._typing_handlers.remove(handler) - except ValueError: - pass - - return unsub - - def _dispatch_message(self, params: dict[str, Any]) -> None: - msg_data = params.get("message", params) - if isinstance(msg_data, dict): - msg = parse_chat_message(msg_data) - for h in self._message_handlers: - try: - h(msg) - except Exception: - logger.exception("Error in chat message handler") - - def _dispatch_typing(self, params: dict[str, Any]) -> None: - event = TypingEvent( - user_id=params.get("user_id", 0), - is_typing=bool(params.get("is_typing", False)), - ) - for h in self._typing_handlers: - try: - h(event) - except Exception: - logger.exception("Error in chat typing handler") diff --git a/ceki_browser/chat_direct.py b/ceki_browser/chat_direct.py deleted file mode 100644 index 709bd45..0000000 --- a/ceki_browser/chat_direct.py +++ /dev/null @@ -1,172 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -import os -from typing import Any, Awaitable, Callable - -import aiohttp -import websockets - -logger = logging.getLogger("ceki_browser") - -DEFAULT_CHAT_SERVICE_URL = os.environ.get( - "CEKI_CHAT_SERVICE_URL", "https://chat.ceki.me" -) - -MAX_RECONNECT_ATTEMPTS = 10 -BASE_RECONNECT_DELAY = 1.0 - - -class ChatClient: - def __init__( - self, - token: str, - topic_id: str, - base_url: str = DEFAULT_CHAT_SERVICE_URL, - ): - self._token = token - self._topic_id = topic_id - self._base_url = base_url.rstrip("/") - self._ws_url = self._base_url.replace("https://", "wss://").replace("http://", "ws://") + "/ws" - self._last_known_id: str | None = None - self._ws: Any = None - self._listen_task: asyncio.Task[None] | None = None - self._closed = False - - @property - def topic_id(self) -> str: - return self._topic_id - - async def history( - self, - after: str | None = None, - before: str | None = None, - limit: int = 50, - ) -> list[dict[str, Any]]: - params: dict[str, Any] = {"limit": str(limit)} - if after: - params["after"] = after - elif before: - params["before"] = before - - url = f"{self._base_url}/api/chat/topics/{self._topic_id}/messages" - async with aiohttp.ClientSession() as session: - async with session.get( - url, - params=params, - headers={"Authorization": f"Bearer {self._token}"}, - ) as resp: - if resp.status != 200: - text = await resp.text() - raise RuntimeError(f"chat-service returned {resp.status}: {text}") - data = await resp.json() - - msgs = data.get("messages", []) - if msgs: - self._last_known_id = msgs[-1].get("_id") or self._last_known_id - return msgs - - async def send(self, body: str, msg_type: str = "text") -> dict[str, Any]: - url = f"{self._base_url}/api/chat/topics/{self._topic_id}/messages" - payload = {"type": msg_type, "content": body} - async with aiohttp.ClientSession() as session: - async with session.post( - url, - json=payload, - headers={"Authorization": f"Bearer {self._token}"}, - ) as resp: - if resp.status not in (200, 201): - text = await resp.text() - raise RuntimeError(f"chat-service returned {resp.status}: {text}") - return await resp.json() - - async def subscribe( - self, - on_message: Callable[[dict[str, Any]], Awaitable[None] | None], - ) -> None: - if self._listen_task and not self._listen_task.done(): - return - - if not self._last_known_id: - msgs = await self.history(limit=1) - if msgs: - self._last_known_id = msgs[-1].get("_id") - - self._listen_task = asyncio.get_event_loop().create_task( - self._ws_loop(on_message) - ) - - async def _ws_loop( - self, - on_message: Callable[[dict[str, Any]], Awaitable[None] | None], - ) -> None: - attempt = 0 - while not self._closed: - try: - self._ws = await websockets.connect(self._ws_url) - attempt = 0 - - await self._ws.send(json.dumps({ - "action": "auth", - "token": f"Bearer {self._token}", - })) - auth_resp = json.loads(await self._ws.recv()) - if auth_resp.get("type") == "error": - logger.error("WS auth failed: %s", auth_resp) - break - - await self._ws.send(json.dumps({ - "action": "subscribe", - "topic_id": self._topic_id, - })) - sub_resp = json.loads(await self._ws.recv()) - logger.debug("WS subscribe response: %s", sub_resp) - - if self._last_known_id: - missed = await self.history(after=self._last_known_id, limit=200) - for msg in missed: - result = on_message(msg) - if asyncio.iscoroutine(result): - await result - - async for raw in self._ws: - if self._closed: - break - event = json.loads(raw) - if event.get("event") == "message": - msg = event.get("message", {}) - msg_topic = str(msg.get("topic_id", "")) - if msg_topic == self._topic_id: - msg_id = str(msg.get("_id", "")) - if msg_id: - self._last_known_id = msg_id - result = on_message(msg) - if asyncio.iscoroutine(result): - await result - - except (websockets.ConnectionClosed, OSError) as e: - if self._closed: - break - attempt += 1 - if attempt > MAX_RECONNECT_ATTEMPTS: - logger.error("WS reconnect limit reached (%d)", MAX_RECONNECT_ATTEMPTS) - break - delay = min(BASE_RECONNECT_DELAY * (2 ** (attempt - 1)), 30) - logger.warning("WS disconnected (%s), reconnecting in %.1fs (attempt %d)", e, delay, attempt) - await asyncio.sleep(delay) - except asyncio.CancelledError: - break - - async def close(self) -> None: - self._closed = True - if self._listen_task and not self._listen_task.done(): - self._listen_task.cancel() - try: - await self._listen_task - except asyncio.CancelledError: - pass - if self._ws: - await self._ws.close() - self._ws = None diff --git a/ceki_browser/cli.py b/ceki_browser/cli.py new file mode 100644 index 0000000..fce682c --- /dev/null +++ b/ceki_browser/cli.py @@ -0,0 +1,583 @@ +from __future__ import annotations + +import argparse +import asyncio +import json +import os +import sys +from pathlib import Path +from typing import Any + +from . import connect, ConnectOptions +from ._exceptions import ( + AuthFailed, + CaptchaTimeoutError, + CekiError, + ConnectionLost, + SessionNotFound, + SessionExpired, + NotOwner, +) +from ._state import save_session, load_session, delete_session, get_last_seen_ts, update_last_seen_ts + + +def _out(data: Any) -> None: + json.dump(data, sys.stdout) + sys.stdout.write("\n") + sys.stdout.flush() + + +def _err(error: str, code: str = "error") -> None: + json.dump({"error": error, "code": code}, sys.stderr) + sys.stderr.write("\n") + sys.stderr.flush() + + +def _get_api_key() -> str: + key = os.environ.get("CEKI_API_KEY") + if not key: + _err("CEKI_API_KEY not set", "auth") + sys.exit(2) + return key + + +def _connect_options() -> ConnectOptions: + opts = ConnectOptions(reconnect=False) + if os.environ.get("CEKI_API_URL"): + opts.api_url = os.environ["CEKI_API_URL"] + if os.environ.get("CEKI_RELAY_URL"): + opts.relay_url = os.environ["CEKI_RELAY_URL"] + if os.environ.get("CEKI_CHAT_URL"): + opts.chat_url = os.environ["CEKI_CHAT_URL"] + ba_user = os.environ.get("CEKI_BASIC_AUTH_USER") + ba_pass = os.environ.get("CEKI_BASIC_AUTH_PASS") + if ba_user and ba_pass: + opts.basic_auth = (ba_user, ba_pass) + return opts + + +async def _cmd_rent(args: argparse.Namespace) -> None: + api_key = _get_api_key() + fp_data: bool | dict = True + if args.fingerprint_from: + with open(args.fingerprint_from) as f: + profile = json.load(f) + fp_data = profile.get("fingerprint") or True + client = await connect(api_key, _connect_options()) + try: + browser = await client.rent(args.schedule, mode=args.mode, fingerprint=fp_data) + save_session(browser.session_id, { + "session_id": browser.session_id, + "chat_topic_id": browser.chat_topic_id, + "schedule_id": browser.schedule_id, + "last_seen_ts": None, + }) + _out({ + "session_id": browser.session_id, + "chat_topic_id": browser.chat_topic_id, + "schedule_id": browser.schedule_id, + }) + finally: + if client._ws: + await client.disconnect() + + +async def _resume_browser(api_key: str, session_id: str): + client = await connect(api_key, _connect_options()) + browser = await client.resume(session_id) + return client, browser + + +async def _cmd_snapshot(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + last_seen = get_last_seen_ts(args.session_id) + browser._last_seen_ts = last_seen + snap = await browser.snapshot() + import base64 + png_bytes = base64.b64decode(snap.screenshot) if snap.screenshot else b"" + out_path = args.output + with open(out_path, "wb") as f: + f.write(png_bytes) + if browser._last_seen_ts: + update_last_seen_ts(args.session_id, browser._last_seen_ts) + chat_list = [{"from": m.sender_id, "text": m.text, "ts": m.created_at} for m in snap.chat] + _out({"screenshot": out_path, "chat": chat_list, "ts": snap.ts.isoformat()}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_navigate(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + await browser.navigate(args.url) + _out({"ok": True}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_click(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + await browser.click(args.x, args.y) + _out({"ok": True, "pointer": [args.x, args.y]}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_type(args: argparse.Namespace) -> None: + api_key = _get_api_key() + human = "natural" if args.natural else None + client, browser = await _resume_browser(api_key, args.session_id) + if human is None: + browser.set_human(None) + try: + await browser.type(args.text) + _out({"ok": True}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_scroll(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + await browser.scroll(args.x, args.y, delta_y=args.dy) + _out({"ok": True}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_chat(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + if args.chat_action == "send": + result = await browser.chat.send(args.text) + _out({"ok": True, "message_id": result.get("message_id")}) + elif args.chat_action == "next": + last_seen = get_last_seen_ts(args.session_id) + msgs = await browser.chat.history(since=last_seen) + if msgs: + m = msgs[0] + update_last_seen_ts(args.session_id, m.created_at) + _out({"from": m.sender_id, "text": m.text, "ts": m.created_at}) + else: + got_msg = asyncio.Event() + result_msg: dict = {} + + async def on_msg(msg): + nonlocal result_msg + result_msg = {"from": msg.sender_id, "text": msg.text, "ts": msg.created_at} + got_msg.set() + + browser.chat.on_message(on_msg) + try: + await asyncio.wait_for(got_msg.wait(), timeout=args.timeout) + update_last_seen_ts(args.session_id, result_msg["ts"]) + _out(result_msg) + except asyncio.TimeoutError: + _out(None) + elif args.chat_action == "history": + since = None + if args.since: + try: + ts_val = float(args.since) + from datetime import datetime, timezone + since = datetime.fromtimestamp(ts_val, tz=timezone.utc).isoformat() + except ValueError: + since = args.since + msgs = await browser.chat.history(since=since, limit=args.limit) + _out([{"from": m.sender_id, "text": m.text, "ts": m.created_at} for m in msgs]) + elif args.chat_action == "send-image": + if args.text: + await browser.chat.send(args.text) + result = await browser.chat.send_image(Path(args.image)) + _out({"ok": True, "message_id": result.get("message_id")}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_stop(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + await browser.close() + delete_session(args.session_id) + _out({"ok": True}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_profile(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + if args.profile_action == "export": + domains = None + if args.domains: + domains = [d.strip() for d in args.domains.split(",")] + include_session_storage = not args.no_session_storage + profile = await browser.profile.export( + domains=domains, + include_session_storage=include_session_storage, + ) + with open(args.output, "w") as f: + json.dump(profile, f) + _out({"ok": True, "path": args.output}) + elif args.profile_action == "import": + with open(args.input, "r") as f: + profile_dict = json.load(f) + await browser.profile.import_(profile_dict) + _out({"ok": True}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_sessions(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client = await connect(api_key, _connect_options()) + try: + active = not getattr(args, "all", False) + limit = getattr(args, "limit", 50) + results = await client.list_sessions(active=active, limit=limit) + if getattr(args, "json", False): + _out([r.model_dump() for r in results]) + else: + if not results: + print("No sessions found.") + return + header = f"{'SID':<8}{'SCHEDULE':<10}{'STARTED':<22}{'DURATION':<10}{'EARNED':<9}{'STATUS':<10}{'RENTER':<16}{'PROVIDER'}" + print(header) + for s in results: + started = (s.started_at.strftime("%Y-%m-%dT%H:%M:%SZ") if s.started_at else "—") + mins, secs = divmod(s.duration, 60) + dur = f"{mins}:{secs:02d}" + earned = f"${s.earned:.2f}" + renter = s.renter.get("name", "—") if s.renter else "—" + provider = s.provider.get("name", "—") if s.provider else "—" + print(f"{s.id:<8}{s.schedule_id:<10}{started:<22}{dur:<10}{earned:<9}{s.status:<10}{renter:<16}{provider}") + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_my_browsers(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client = await connect(api_key, _connect_options()) + try: + results = await client.my_browsers() + _out([r.model_dump() for r in results]) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_search(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client = await connect(api_key, _connect_options()) + try: + filters: dict[str, str] = {} + for f in (args.filter or []): + k, v = f.split("=", 1) + filters[k] = v + results = await client.search(filters=filters, limit=args.limit) + _out([r.model_dump() for r in results]) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_wait(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + reason = await browser.wait_until_ended() + _out({"ended": True, "reason": reason}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_screenshot(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + data = await browser.screenshot(format="png", full_page=args.full) + with open(args.output, "wb") as f: + f.write(data) + _out({"ok": True, "path": args.output}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_switch_tab(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + await browser.switch_tab() + _out({"ok": True}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_configure(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + kwargs: dict[str, Any] = {} + if args.masking_mode is not None: + kwargs["masking_mode"] = args.masking_mode + if args.fingerprint is not None: + kwargs["fingerprint"] = args.fingerprint + await browser.configure(**kwargs) + _out({"ok": True}) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_upload(args: argparse.Namespace) -> None: + file_path = Path(args.file_path) + if not file_path.is_file(): + _err(f"file not found: {args.file_path}") + sys.exit(1) + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + result = await browser.upload( + args.selector, file_path, filename=args.filename + ) + _out(result) + except ValueError as e: + _err(str(e)) + sys.exit(1) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_request_captcha(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + auto = not args.manual + result = await browser.request_captcha( + acceptance_timeout=args.acceptance, + completion_timeout=args.completion, + auto_accept=auto, + ) + _out(result.to_dict()) + if not result.solved: + sys.exit(1) + except CaptchaTimeoutError as e: + _out({"solved": False, "cancel_reason": f"timeout:{e.phase}", "child_event_id": None, "correction_id": None}) + sys.exit(1) + finally: + if client._ws: + await client.disconnect() + + +async def _cmd_cdp(args: argparse.Namespace) -> None: + api_key = _get_api_key() + client, browser = await _resume_browser(api_key, args.session_id) + try: + params = json.loads(args.params) if args.params else {} + result = await browser.send({"method": args.method, "params": params}) + _out(result) + finally: + if client._ws: + await client.disconnect() + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="ceki-browser", description="CLI for ceki.me browser rental") + + sub = parser.add_subparsers(dest="command", required=True) + + p_rent = sub.add_parser("rent", help="Rent a browser") + p_rent.add_argument("--schedule", type=int, required=True, help="Schedule ID") + p_rent.add_argument("--mode", choices=["incognito", "main"], default="incognito", help="Profile mode (default: incognito)") + p_rent.add_argument("--fingerprint-from", help="Path to profile JSON with fingerprint data") + + p_snap = sub.add_parser("snapshot", help="Take screenshot + get new chat messages") + p_snap.add_argument("session_id", help="Session ID") + p_snap.add_argument("-o", "--output", required=True, help="Output PNG path") + + p_nav = sub.add_parser("navigate", help="Navigate to URL") + p_nav.add_argument("session_id", help="Session ID") + p_nav.add_argument("url", help="URL to navigate to") + + p_click = sub.add_parser("click", help="Click at coordinates") + p_click.add_argument("session_id", help="Session ID") + p_click.add_argument("x", type=int, help="X coordinate") + p_click.add_argument("y", type=int, help="Y coordinate") + + p_type = sub.add_parser("type", help="Type text") + p_type.add_argument("session_id", help="Session ID") + p_type.add_argument("text", help="Text to type") + p_type.add_argument("--natural", action="store_true", help="Enable human-like typing") + + p_scroll = sub.add_parser("scroll", help="Scroll") + p_scroll.add_argument("session_id", help="Session ID") + p_scroll.add_argument("x", type=int, help="X origin") + p_scroll.add_argument("y", type=int, help="Y origin") + p_scroll.add_argument("dy", type=int, help="Delta Y (negative = scroll down)") + + p_chat = sub.add_parser("chat", help="Chat with provider") + p_chat.add_argument("session_id", help="Session ID") + chat_sub = p_chat.add_subparsers(dest="chat_action", required=True) + + p_send = chat_sub.add_parser("send", help="Send message") + p_send.add_argument("text", help="Message text") + + p_next = chat_sub.add_parser("next", help="Wait for next message") + p_next.add_argument("--timeout", type=float, default=60, help="Timeout in seconds") + + p_history = chat_sub.add_parser("history", help="Get chat history") + p_history.add_argument("--since", help="Timestamp (Unix or ISO-8601)") + p_history.add_argument("--limit", type=int, default=50, help="Max messages") + + p_send_image = chat_sub.add_parser("send-image", help="Send image to chat") + p_send_image.add_argument("--image", required=True, help="Path to image file") + p_send_image.add_argument("--text", help="Optional text to send before image") + + p_stop = sub.add_parser("stop", help="End session") + p_stop.add_argument("session_id", help="Session ID") + + p_profile = sub.add_parser("profile", help="Profile export/import") + p_profile.add_argument("session_id", help="Session ID") + profile_sub = p_profile.add_subparsers(dest="profile_action", required=True) + + p_profile_export = profile_sub.add_parser("export", help="Export profile to file") + p_profile_export.add_argument("-o", "--output", required=True, help="Output JSON path") + p_profile_export.add_argument("--domains", help="Comma-separated domain filter") + p_profile_export.add_argument( + "--no-session-storage", action="store_true", help="Exclude sessionStorage" + ) + + p_profile_import = profile_sub.add_parser("import", help="Import profile from file") + p_profile_import.add_argument("-i", "--input", required=True, help="Input JSON path") + + p_sessions = sub.add_parser("sessions", help="List agent sessions (active by default)") + p_sessions.add_argument("--all", action="store_true", help="Show all sessions, not just active") + p_sessions.add_argument("--limit", type=int, default=50, help="Max results") + p_sessions.add_argument("--json", action="store_true", help="Raw JSON output") + + sub.add_parser("my-browsers", help="List browsers with pre-arranged rent contracts") + + p_search = sub.add_parser("search", help="Search available browsers") + p_search.add_argument("--limit", type=int, default=20, help="Max results") + p_search.add_argument("--filter", action="append", help="Filter key=val (repeatable)") + + p_wait = sub.add_parser("wait", help="Wait until session ends") + p_wait.add_argument("session_id", help="Session ID") + + p_screenshot = sub.add_parser("screenshot", help="Take screenshot and save to file") + p_screenshot.add_argument("session_id", help="Session ID") + p_screenshot.add_argument("-o", "--output", required=True, help="Output file path") + p_screenshot.add_argument( + "--format", choices=["png", "jpeg"], default="png", help="Image format" + ) + p_screenshot.add_argument( + "--full", action="store_true", default=False, help="Capture full page, not just viewport" + ) + + p_switch_tab = sub.add_parser("switch-tab", help="Switch browser tab") + p_switch_tab.add_argument("session_id", help="Session ID") + + p_configure = sub.add_parser("configure", help="Configure session settings") + p_configure.add_argument("session_id", help="Session ID") + p_configure.add_argument("--masking-mode", help="Masking mode (true/false)") + p_configure.add_argument("--fingerprint", help="Fingerprint (true/false)") + + p_upload = sub.add_parser("upload", help="Upload file to input[type=file]") + p_upload.add_argument("session_id") + p_upload.add_argument("--selector", required=True, help="CSS selector for file input") + p_upload.add_argument("--file", required=True, dest="file_path", help="Path to file") + p_upload.add_argument("--filename", help="Override filename (default: basename)") + + p_captcha = sub.add_parser("request-captcha", help="Request human to solve captcha") + p_captcha.add_argument("session_id", help="Session ID") + p_captcha.add_argument("--acceptance", type=float, default=60, help="Acceptance timeout sec (min 30)") + p_captcha.add_argument("--completion", type=float, default=120, help="Completion timeout sec (min 30)") + p_captcha.add_argument("--manual", action="store_true", help="Disable auto-accept (agent votes manually)") + + p_cdp = sub.add_parser("cdp", help="Send raw CDP command") + p_cdp.add_argument("session_id", help="Session ID") + p_cdp.add_argument("--method", required=True, help="CDP method name") + p_cdp.add_argument("--params", help="CDP params as JSON string") + + return parser + + +def main() -> None: + parser = build_parser() + args = parser.parse_args() + + handlers = { + "rent": _cmd_rent, + "snapshot": _cmd_snapshot, + "navigate": _cmd_navigate, + "click": _cmd_click, + "type": _cmd_type, + "scroll": _cmd_scroll, + "chat": _cmd_chat, + "stop": _cmd_stop, + "profile": _cmd_profile, + "sessions": _cmd_sessions, + "my-browsers": _cmd_my_browsers, + "search": _cmd_search, + "wait": _cmd_wait, + "screenshot": _cmd_screenshot, + "switch-tab": _cmd_switch_tab, + "configure": _cmd_configure, + "cdp": _cmd_cdp, + "upload": _cmd_upload, + "request-captcha": _cmd_request_captcha, + } + + handler = handlers.get(args.command) + if not handler: + _err(f"Unknown command: {args.command}") + sys.exit(1) + + try: + asyncio.run(handler(args)) + except (SessionNotFound, SessionExpired) as e: + _err(str(e), "session_not_found") + sys.exit(3) + except NotOwner as e: + _err(str(e), "not_owner") + sys.exit(3) + except TimeoutError as e: + _err(str(e), "timeout") + sys.exit(4) + except (ConnectionLost, AuthFailed, ConnectionError, OSError) as e: + _err(str(e), "network") + sys.exit(5) + except CekiError as e: + _err(str(e), "ceki_error") + sys.exit(1) + except KeyboardInterrupt: + sys.exit(130) + except Exception as e: + _err(str(e), "error") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/ceki_browser/client.py b/ceki_browser/client.py deleted file mode 100644 index 52263ae..0000000 --- a/ceki_browser/client.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import annotations - -from typing import Any - -from .errors import CekiBrowserError -from .session import Session, _HUMAN_DEFAULT -from .transport import DEFAULT_RELAY_URL, Transport - - -class Browser: - def __init__(self, token: str, relay_url: str = DEFAULT_RELAY_URL, human: Any = _HUMAN_DEFAULT): - self._transport = Transport(token=token, relay_url=relay_url) - self._connected = False - self._human = human - - @property - def agent_id(self) -> str | None: - return self._transport.agent_id - - @property - def connected(self) -> bool: - return self._connected and self._transport.connected - - def on_event(self, callback: Any) -> None: - self._transport.on_event(callback) - - async def connect(self) -> dict[str, Any]: - result = await self._transport.connect() - self._connected = True - return result - - async def close(self) -> None: - self._connected = False - await self._transport.close() - - async def session( - self, - mode: str = "incognito", - domain_hints: list[str] | None = None, - geo: str = "", - language: str = "", - max_price_per_min: float = 1.0, - estimated_duration_min: int = 30, - wait_timeout: float = 60.0, - human: Any = _HUMAN_DEFAULT, - schedule_id: int | None = None, - ) -> Session: - if not self._connected: - raise CekiBrowserError("Not connected. Call connect() or use `async with Browser(...)`") - - params: dict[str, Any] = { - "mode": mode, - "max_price_per_min": max_price_per_min, - "estimated_duration_min": estimated_duration_min, - } - if domain_hints: - params["domain_hints"] = domain_hints - if geo: - params["geo"] = geo - if language: - params["language"] = language - if schedule_id is not None: - params["schedule_id"] = int(schedule_id) - - effective_human = human if human is not _HUMAN_DEFAULT else self._human - sess = Session(self._transport, "", mode, human=effective_human) - sess._match_state = sess._install_match_listener() - - result = await self._transport.send("session.request", params, timeout=30) - request_id = result.get("request_id", "") if isinstance(result, dict) else "" - sess._request_id = request_id - - await sess._wait_for_active(timeout=wait_timeout) - return sess - - async def __aenter__(self) -> Browser: - await self.connect() - return self - - async def __aexit__(self, *args: Any) -> None: - await self.close() diff --git a/ceki_browser/errors.py b/ceki_browser/errors.py deleted file mode 100644 index 7589ae2..0000000 --- a/ceki_browser/errors.py +++ /dev/null @@ -1,51 +0,0 @@ -class CekiBrowserError(Exception): - def __init__(self, message: str, code: int = 0): - super().__init__(message) - self.code = code - - -class AuthError(CekiBrowserError): - pass - - -class ProviderDisconnected(CekiBrowserError): - pass - - -class NavigationTimeout(CekiBrowserError): - pass - - -class CommandTimeout(CekiBrowserError): - pass - - -class RateLimited(CekiBrowserError): - pass - - -class ProviderNotVerified(CekiBrowserError): - pass - - -class HumanActionDeclined(CekiBrowserError): - pass - - -class HumanActionTimeout(CekiBrowserError): - pass - - -class NoMatchError(CekiBrowserError): - pass - - -class SessionEndedError(CekiBrowserError): - pass - - -ERROR_CODE_MAP: dict[int, type[CekiBrowserError]] = { - -1010: ProviderDisconnected, - -1013: RateLimited, - -1014: ProviderNotVerified, -} diff --git a/ceki_browser/humanize/keymap.py b/ceki_browser/humanize/keymap.py new file mode 100644 index 0000000..5b14ce7 --- /dev/null +++ b/ceki_browser/humanize/keymap.py @@ -0,0 +1,70 @@ +"""US keyboard layout mapping for CDP Input.dispatchKeyEvent.""" + +from __future__ import annotations + +_SHIFTED_DIGITS: dict[str, str] = { + "!": "1", "@": "2", "#": "3", "$": "4", "%": "5", + "^": "6", "&": "7", "*": "8", "(": "9", ")": "0", +} + +# (code, key, windowsVirtualKeyCode, needsShift) +_KEYMAP: dict[str, tuple[str, str, int, bool]] = {} + +# a-z +for _c in range(ord("a"), ord("z") + 1): + _ch = chr(_c) + _KEYMAP[_ch] = (f"Key{_ch.upper()}", _ch, ord(_ch.upper()), False) + +# A-Z +for _c in range(ord("A"), ord("Z") + 1): + _ch = chr(_c) + _KEYMAP[_ch] = (f"Key{_ch}", _ch, ord(_ch), True) + +# 0-9 +for _c in range(ord("0"), ord("9") + 1): + _ch = chr(_c) + _KEYMAP[_ch] = (f"Digit{_ch}", _ch, ord(_ch), False) + +# Special keys +_KEYMAP[" "] = ("Space", " ", 32, False) +_KEYMAP["\n"] = ("Enter", "Enter", 13, False) +_KEYMAP["\t"] = ("Tab", "Tab", 9, False) +_KEYMAP["\b"] = ("Backspace", "Backspace", 8, False) + +# Shifted digits: !@#$%^&*() +for _shifted, _base in _SHIFTED_DIGITS.items(): + _KEYMAP[_shifted] = (f"Digit{_base}", _shifted, ord(_base), True) + +# Punctuation (unshifted) +_KEYMAP["-"] = ("Minus", "-", 189, False) +_KEYMAP["="] = ("Equal", "=", 187, False) +_KEYMAP["["] = ("BracketLeft", "[", 219, False) +_KEYMAP["]"] = ("BracketRight", "]", 221, False) +_KEYMAP["\\"] = ("Backslash", "\\", 220, False) +_KEYMAP[";"] = ("Semicolon", ";", 186, False) +_KEYMAP["'"] = ("Quote", "'", 222, False) +_KEYMAP[","] = ("Comma", ",", 188, False) +_KEYMAP["."] = ("Period", ".", 190, False) +_KEYMAP["/"] = ("Slash", "/", 191, False) +_KEYMAP["`"] = ("Backquote", "`", 192, False) + +# Punctuation (shifted) +_KEYMAP["_"] = ("Minus", "_", 189, True) +_KEYMAP["+"] = ("Equal", "+", 187, True) +_KEYMAP["{"] = ("BracketLeft", "{", 219, True) +_KEYMAP["}"] = ("BracketRight", "}", 221, True) +_KEYMAP["|"] = ("Backslash", "|", 220, True) +_KEYMAP[":"] = ("Semicolon", ":", 186, True) +_KEYMAP['"'] = ("Quote", '"', 222, True) +_KEYMAP["<"] = ("Comma", "<", 188, True) +_KEYMAP[">"] = ("Period", ">", 190, True) +_KEYMAP["?"] = ("Slash", "?", 191, True) +_KEYMAP["~"] = ("Backquote", "~", 192, True) + + +def keymap_for_char(char: str) -> tuple[str, str, int, bool] | None: + """Return (code, key, windowsVirtualKeyCode, needsShift) for a character. + + Returns None if the character is not in the US keyboard map (non-ASCII fallback). + """ + return _KEYMAP.get(char) diff --git a/ceki_browser/session.py b/ceki_browser/session.py deleted file mode 100644 index bea4002..0000000 --- a/ceki_browser/session.py +++ /dev/null @@ -1,463 +0,0 @@ -from __future__ import annotations - -import asyncio -import logging -import os -from pathlib import Path -from typing import Any, Callable - -from .chat import ChatAPI -from .chat_direct import ChatClient, DEFAULT_CHAT_SERVICE_URL -from .errors import CekiBrowserError, NoMatchError, SessionEndedError -from .humanize import HumanProfile, Humanizer -from .transport import Transport -from .transport_rtc import RTCTransport -from .types import ( - HtmlResult, - HumanActionResult, - NavigateResult, - QueryResult, - ScreenshotResult, - parse_result, -) - -logger = logging.getLogger("ceki_browser") - - -def _resolve_human_profile(human: Any) -> HumanProfile | None: - """Resolve human parameter to HumanProfile or None.""" - if os.environ.get("CEKI_HUMAN_DISABLE") == "1": - return None - if human is None: - return None - if isinstance(human, HumanProfile): - return human - if isinstance(human, dict): - return HumanProfile.from_dict(human) - if isinstance(human, Path): - return HumanProfile.load(human) - if isinstance(human, str): - # Check if it's a file path - if human.endswith(".json") or "/" in human or "\\" in human: - return HumanProfile.load(human) - # It's a preset name - return HumanProfile.load_preset(human) - raise ValueError(f"Invalid human profile: {human!r}") - - -_HUMAN_DEFAULT = object() # sentinel for "use default" - - -def _get_default_human() -> Any: - """Get default human profile from env or 'natural'.""" - if os.environ.get("CEKI_HUMAN_DISABLE") == "1": - return None - env_path = os.environ.get("CEKI_HUMAN_PROFILE_PATH") - if env_path: - return env_path - env_name = os.environ.get("CEKI_HUMAN_PROFILE") - if env_name: - return env_name - return "natural" - - -class Session: - def __init__( - self, - transport: Transport, - request_id: str, - mode: str, - ice_servers: list[dict[str, Any]] | None = None, - human: Any = _HUMAN_DEFAULT, - ): - self._transport = transport - self._request_id = request_id - self._session_id: str | None = None - self._mode = mode - self._active = False - self._rtc: RTCTransport | None = None - self._chat: ChatAPI | None = None - self._ice_servers = ice_servers or [{"urls": "stun:stun.l.google.com:19302"}] - self._tab_opened_callback: Callable[[dict[str, Any]], Any] | None = None - self._chat_direct: ChatClient | None = None - if human is _HUMAN_DEFAULT: - human = _get_default_human() - self._human_profile = _resolve_human_profile(human) - self._humanizer = Humanizer(self._human_profile) - - @property - def session_id(self) -> str | None: - return self._session_id - - @property - def active(self) -> bool: - return self._active - - @property - def chat(self) -> ChatAPI: - if self._chat is None: - raise CekiBrowserError("Chat not available until session is active") - return self._chat - - @property - def rtc(self) -> RTCTransport | None: - return self._rtc - - @property - def humanizer(self) -> Humanizer: - return self._humanizer - - def set_human(self, profile: Any) -> HumanProfile | None: - prev = self._human_profile - self._human_profile = _resolve_human_profile(profile) - self._humanizer = Humanizer(self._human_profile) - return prev - - def _install_match_listener(self) -> tuple[asyncio.Event, list[str], list[Exception]]: - ready = asyncio.Event() - session_id_holder: list[str] = [] - error_holder: list[Exception] = [] - - original_cb = self._transport._event_callback - self._original_cb_for_match = original_cb - - async def _on_event(method: str, params: dict[str, Any]) -> None: - if method == "session.matched": - sid = params.get("session_id", "") - session_id_holder.append(sid) - ready.set() - elif method == "session.no_match": - reason = params.get("reason", "No matching providers available") - error_holder.append(NoMatchError(reason)) - ready.set() - elif method == "session.ended": - reason = params.get("reason", "ended_before_active") - error_holder.append(SessionEndedError(reason)) - ready.set() - if original_cb: - result = original_cb(method, params) - if asyncio.iscoroutine(result): - await result - - self._transport.on_event(_on_event) - return ready, session_id_holder, error_holder - - async def _wait_for_active(self, timeout: float = 60.0) -> None: - ready, session_id_holder, error_holder = self._match_state - try: - await asyncio.wait_for(ready.wait(), timeout=timeout) - except asyncio.TimeoutError: - raise CekiBrowserError("Timed out waiting for session to become active") - finally: - self._transport.on_event(self._original_cb_for_match) - - if error_holder: - raise error_holder[0] - - if session_id_holder: - self._session_id = session_id_holder[0] - self._active = True - - await self._setup_rtc() - - async def _setup_rtc(self) -> None: - self._rtc = RTCTransport(self._ice_servers) - self._chat = ChatAPI(self._transport, self._session_id or self._request_id, None) - - signaling_done = asyncio.Event() - answer_holder: list[dict[str, Any]] = [] - - original_cb = self._transport._event_callback - - async def _on_signaling(method: str, params: dict[str, Any]) -> None: - if method == "webrtc.answer": - answer_holder.append(params) - signaling_done.set() - elif method == "webrtc.ice": - await self._rtc.add_ice(params) - elif method == "session.ended": - self._active = False - signaling_done.set() - if original_cb: - result = original_cb(method, params) - if asyncio.iscoroutine(result): - await result - - self._rtc.on_signaling(lambda method, params: asyncio.ensure_future( - self._transport.notify(method, { - "session_id": self._session_id, - **(params or {}), - }) - )) - - self._transport.on_event(_on_signaling) - - offer = await self._rtc.create_offer() - await self._transport.notify("webrtc.offer", { - "session_id": self._session_id, - "sdp": offer["sdp"], - "type": offer["type"], - }) - - try: - await asyncio.wait_for(signaling_done.wait(), timeout=30.0) - except asyncio.TimeoutError: - raise CekiBrowserError("Timed out waiting for WebRTC answer") - - if not answer_holder: - raise CekiBrowserError("Session ended before RTC handshake completed") - - await self._rtc.apply_answer(answer_holder[0]) - await self._rtc.wait_connected(timeout=15.0) - - self._install_session_event_handler() - logger.info("P2P connection established for session %s", self._session_id) - - def _install_session_event_handler(self) -> None: - original_cb = self._transport._event_callback - - async def _on_event(method: str, params: dict[str, Any]) -> None: - if method == "session.ended": - self._active = False - elif method == "tab.opened": - if self._tab_opened_callback: - result = self._tab_opened_callback(params) - if asyncio.iscoroutine(result): - await result - else: - tab_id = params.get("tab_id") - if tab_id is not None: - try: - await self._rtc.send_command("tabs.close", {"session_id": self._session_id, "tab_id": tab_id}) - except Exception: - pass - elif method == "chat.topic_created": - topic_id = params.get("chat_topic_id", "") - if self._chat: - self._chat._set_topic_id(topic_id) - elif method == "chat.message": - if self._chat: - self._chat._dispatch_message(params) - elif method == "chat.typing": - if self._chat: - self._chat._dispatch_typing(params) - if original_cb: - result = original_cb(method, params) - if asyncio.iscoroutine(result): - await result - - self._transport.on_event(_on_event) - - async def navigate(self, url: str, timeout_ms: int = 120000) -> NavigateResult: - self._check_active() - await self._humanizer.before("navigate") - data = await self._rtc.send_command( - "browser.navigate", - {"url": url, "timeout_ms": timeout_ms}, - timeout=timeout_ms / 1000 + 5, - ) - await self._humanizer.after("navigate") - return parse_result(data, NavigateResult) - - async def query(self, selector: str, attributes: list[str] | None = None) -> QueryResult: - self._check_active() - params: dict[str, Any] = {"selector": selector} - if attributes: - params["attributes"] = attributes - data = await self._rtc.send_command("browser.query", params) - return parse_result(data, QueryResult) - - async def query_all(self, selector: str, attributes: list[str] | None = None, limit: int = 20) -> QueryResult: - self._check_active() - params: dict[str, Any] = {"selector": selector, "limit": limit} - if attributes: - params["attributes"] = attributes - data = await self._rtc.send_command("browser.query_all", params) - return parse_result(data, QueryResult) - - async def get_html(self, selector: str = "html", outer: bool = True) -> HtmlResult: - self._check_active() - data = await self._rtc.send_command("browser.get_html", {"selector": selector, "outer": outer}) - return parse_result(data, HtmlResult) - - async def click(self, selector: str | None = None, x: int | None = None, y: int | None = None) -> None: - self._check_active() - await self._humanizer.before("click") - params: dict[str, Any] = {} - if selector: - params["selector"] = selector - if x is not None: - params["x"] = x - if y is not None: - params["y"] = y - await self._rtc.send_command("browser.click", params) - await self._humanizer.after("click") - - async def type(self, selector: str, text: str, delay_ms: int = 0) -> None: - self._check_active() - await self._humanizer.before("type") - if self._human_profile: - # Click to focus the element first - await self._rtc.send_command("browser.click", {"selector": selector}) - # Per-char typing with jitter - async for char, char_delay in self._humanizer.humanize_text(text): - await self._rtc.send_command("keyboard.press", { - "session_id": self._session_id, - "key": char, - }) - if char_delay > 0: - await asyncio.sleep(char_delay / 1000) - else: - await self._rtc.send_command("browser.type", { - "selector": selector, "text": text, "delay_ms": delay_ms, - }) - await self._humanizer.after("type") - - async def scroll( - self, - selector: str | None = None, - direction: str = "down", - amount: int = 500, - ) -> None: - self._check_active() - await self._humanizer.before("scroll") - params: dict[str, Any] = {} - if selector: - params["selector"] = selector - else: - params["direction"] = direction - params["amount"] = amount - await self._rtc.send_command("browser.scroll", params) - await self._humanizer.after("scroll") - - async def screenshot(self, format: str = "png", quality: int = 80) -> ScreenshotResult: - self._check_active() - await self._humanizer.before("screenshot") - data = await self._rtc.send_command("browser.screenshot", {"format": format, "quality": quality}) - await self._humanizer.after("screenshot") - return parse_result(data, ScreenshotResult) - - async def back(self) -> NavigateResult: - self._check_active() - data = await self._rtc.send_command("browser.back") - return parse_result(data, NavigateResult) - - async def forward(self) -> NavigateResult: - self._check_active() - data = await self._rtc.send_command("browser.forward") - return parse_result(data, NavigateResult) - - async def reload(self) -> NavigateResult: - self._check_active() - data = await self._rtc.send_command("browser.reload") - return parse_result(data, NavigateResult) - - def on_tab_opened(self, callback: Callable[[dict[str, Any]], Any]) -> None: - """Register a listener for new tab events. Params dict has: session_id, tab_id, url, opener_tab_id.""" - self._tab_opened_callback = callback - - async def switch_tab(self, tab_id: int) -> dict[str, Any]: - self._check_active() - data = await self._rtc.send_command("tabs.switch", {"session_id": self._session_id, "tab_id": tab_id}) - return data if isinstance(data, dict) else {} - - async def close_tab(self, tab_id: int) -> dict[str, Any]: - self._check_active() - data = await self._rtc.send_command("tabs.close", {"session_id": self._session_id, "tab_id": tab_id}) - return data if isinstance(data, dict) else {} - - async def mouse_click(self, x: float, y: float, button: str = "left") -> None: - self._check_active() - await self._rtc.send_command("mouse.click", {"session_id": self._session_id, "x": x, "y": y, "button": button}) - - async def mouse_move(self, x: float, y: float) -> None: - self._check_active() - await self._rtc.send_command("mouse.move", {"session_id": self._session_id, "x": x, "y": y}) - - async def click_real(self, selector: str) -> dict[str, Any]: - self._check_active() - await self._humanizer.before("click") - data = await self._rtc.send_command("mouse.click_selector", {"session_id": self._session_id, "selector": selector}) - await self._humanizer.after("click") - return data if isinstance(data, dict) else {} - - async def key_press(self, key: str) -> None: - self._check_active() - await self._rtc.send_command("keyboard.press", {"session_id": self._session_id, "key": key}) - - async def inject_credentials(self, secret_id: str, target: dict[str, str]) -> dict[str, Any]: - self._check_active() - params = {"secret_id": secret_id, **target} - data = await self._rtc.send_command("browser.inject_credentials", params) - return data if isinstance(data, dict) else {} - - async def request_human_action( - self, - action_type: str, - message: str, - timeout_sec: int = 120, - ) -> HumanActionResult: - self._check_active() - import uuid - - data = await self._rtc.send_command( - "browser.request_human_action", - { - "request_id": str(uuid.uuid4()), - "type": action_type, - "message": message, - "timeout_sec": timeout_sec, - }, - timeout=timeout_sec + 10, - ) - return parse_result(data, HumanActionResult) - - def chat_direct( - self, - topic_id: str | None = None, - chat_service_url: str = DEFAULT_CHAT_SERVICE_URL, - ) -> ChatClient: - tid = topic_id or getattr(self, "chat_topic_id", None) - if not tid: - raise CekiBrowserError( - "topic_id required: pass it explicitly or set session.chat_topic_id" - ) - token = self._transport._token - self._chat_direct = ChatClient( - token=token, - topic_id=tid, - base_url=chat_service_url, - ) - return self._chat_direct - - async def end(self, reason: str = "completed") -> None: - if not self._active: - return - self._active = False - try: - await self._transport.send( - "session.end", - {"session_id": self._session_id or self._request_id, "reason": reason}, - timeout=10, - ) - except CekiBrowserError: - pass - if self._chat_direct: - await self._chat_direct.close() - self._chat_direct = None - if self._rtc: - await self._rtc.close() - self._rtc = None - self._chat = None - - def _check_active(self) -> None: - if not self._active: - raise CekiBrowserError("Session is not active") - if not self._rtc: - raise CekiBrowserError("P2P transport not established") - - async def __aenter__(self) -> Session: - return self - - async def __aexit__(self, *args: Any) -> None: - await self.end() diff --git a/ceki_browser/transport.py b/ceki_browser/transport.py deleted file mode 100644 index 48714ea..0000000 --- a/ceki_browser/transport.py +++ /dev/null @@ -1,166 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -from typing import Any, Awaitable, Callable - -import websockets -from websockets.asyncio.client import ClientConnection - -from .errors import ( - ERROR_CODE_MAP, - AuthError, - CekiBrowserError, - CommandTimeout, -) - -logger = logging.getLogger("ceki_browser") - -EventCallback = Callable[[str, dict[str, Any]], Awaitable[None] | None] - -DEFAULT_RELAY_URL = "wss://browser.ceki.me/ws/agent" - -MAX_RECONNECT_ATTEMPTS = 5 -BASE_RECONNECT_DELAY = 1.0 - - -class Transport: - def __init__(self, token: str, relay_url: str = DEFAULT_RELAY_URL): - self._token = token - self._relay_url = relay_url - self._ws: ClientConnection | None = None - self._pending: dict[int | str, asyncio.Future[Any]] = {} - self._next_id = 1 - self._event_callback: EventCallback | None = None - self._recv_task: asyncio.Task[None] | None = None - self._heartbeat_task: asyncio.Task[None] | None = None - self._agent_id: str | None = None - self._closed = False - - @property - def agent_id(self) -> str | None: - return self._agent_id - - @property - def connected(self) -> bool: - return self._ws is not None and self._ws.state.name == "OPEN" - - def on_event(self, callback: EventCallback) -> None: - self._event_callback = callback - - async def connect(self) -> dict[str, Any]: - headers = {"Authorization": f"Bearer {self._token}"} - try: - self._ws = await websockets.connect(self._relay_url, extra_headers=headers) - except Exception as e: - raise AuthError(f"Failed to connect to relay: {e}", code=401) from e - - welcome_raw = await self._ws.recv() - welcome = json.loads(welcome_raw) - - if "error" in welcome: - err = welcome["error"] - raise AuthError(err.get("message", "Authentication failed"), code=err.get("code", 401)) - - result = welcome.get("result", {}) - self._agent_id = result.get("agent_id") - self._recv_task = asyncio.create_task(self._recv_loop()) - self._heartbeat_task = asyncio.create_task(self._heartbeat_loop()) - return result - - async def close(self) -> None: - self._closed = True - if self._heartbeat_task: - self._heartbeat_task.cancel() - self._heartbeat_task = None - if self._recv_task: - self._recv_task.cancel() - self._recv_task = None - if self._ws: - await self._ws.close() - self._ws = None - for fut in self._pending.values(): - if not fut.done(): - fut.cancel() - self._pending.clear() - - async def send(self, method: str, params: dict[str, Any] | None = None, timeout: float = 60.0) -> Any: - if not self._ws: - raise CekiBrowserError("Not connected") - - msg_id = self._next_id - self._next_id += 1 - - payload = {"jsonrpc": "2.0", "method": method, "id": msg_id} - if params: - payload["params"] = params - - fut: asyncio.Future[Any] = asyncio.get_event_loop().create_future() - self._pending[msg_id] = fut - - await self._ws.send(json.dumps(payload)) - - try: - return await asyncio.wait_for(fut, timeout=timeout) - except asyncio.TimeoutError: - self._pending.pop(msg_id, None) - raise CommandTimeout(f"Command {method} timed out after {timeout}s", code=-1020) - - async def notify(self, method: str, params: dict[str, Any] | None = None) -> None: - if not self._ws: - raise CekiBrowserError("Not connected") - payload: dict[str, Any] = {"jsonrpc": "2.0", "method": method} - if params: - payload["params"] = params - await self._ws.send(json.dumps(payload)) - - async def _recv_loop(self) -> None: - assert self._ws is not None - try: - async for raw in self._ws: - msg = json.loads(raw) - msg_id = msg.get("id") - - if msg_id is not None and msg_id in self._pending: - fut = self._pending.pop(msg_id) - if "error" in msg: - err = msg["error"] - code = err.get("code", 0) - message = err.get("message", "Unknown error") - exc_cls = ERROR_CODE_MAP.get(code, CekiBrowserError) - fut.set_exception(exc_cls(message, code=code)) - else: - fut.set_result(msg.get("result")) - elif "method" in msg: - if self._event_callback: - result = self._event_callback(msg["method"], msg.get("params", {})) - if asyncio.iscoroutine(result): - await result - except websockets.ConnectionClosed: - logger.info("WebSocket connection closed") - except asyncio.CancelledError: - return - except Exception as e: - logger.error("recv loop error: %s", e) - finally: - for fut in self._pending.values(): - if not fut.done(): - fut.set_exception(CekiBrowserError("Connection lost")) - self._pending.clear() - - async def _heartbeat_loop(self) -> None: - try: - while not self._closed: - await asyncio.sleep(10) - if self._ws and not self._closed: - try: - await self.send("heartbeat", timeout=5.0) - except CommandTimeout: - logger.warning("heartbeat response timed out, retrying next cycle") - except asyncio.CancelledError: - break - except CekiBrowserError: - break - except asyncio.CancelledError: - return diff --git a/ceki_browser/transport_rtc.py b/ceki_browser/transport_rtc.py deleted file mode 100644 index f3ab65f..0000000 --- a/ceki_browser/transport_rtc.py +++ /dev/null @@ -1,251 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -import time -from typing import Any, Callable - -from aiortc import ( - RTCConfiguration, - RTCDataChannel, - RTCIceCandidate, - RTCIceServer, - RTCPeerConnection, - RTCSessionDescription, -) - -from .errors import CekiBrowserError, CommandTimeout - -logger = logging.getLogger("ceki_browser") -bridge_probe = logging.getLogger("ceki_browser.bridge_probe") - -SignalingCallback = Callable[[str, dict[str, Any]], Any] - - -class RTCTransport: - def __init__(self, ice_servers: list[dict[str, Any]]): - config = RTCConfiguration( - iceServers=[RTCIceServer(**s) for s in ice_servers] - ) - self.pc = RTCPeerConnection(config) - self.cmd_channel: RTCDataChannel | None = None - self._cmd_pending: dict[int, asyncio.Future[Any]] = {} - self._cmd_send_ts: dict[int, float] = {} # msg_id -> send timestamp ms - self._cmd_next_id = 1 - self._signaling_callback: SignalingCallback | None = None - self._connected_event = asyncio.Event() - self._closed = False - - self._cmd_open_event = asyncio.Event() - - # Chunk reassembly: {msg_id: {"n": total, "parts": {i: str, ...}, "received": set}} - self._chunk_buf: dict[int, dict[str, Any]] = {} - - self.cmd_channel = self.pc.createDataChannel("ceki-cmd", ordered=True) - - self._setup_cmd_channel(self.cmd_channel) - - @self.pc.on("icecandidate") - def on_ice(candidate: RTCIceCandidate | None) -> None: - if candidate and self._signaling_callback: - self._signaling_callback("webrtc.ice", { - "candidate": candidate.to_sdp(), - "sdpMid": candidate.sdpMid, - "sdpMLineIndex": candidate.sdpMLineIndex, - }) - - @self.pc.on("connectionstatechange") - def on_state() -> None: - state = self.pc.connectionState - logger.info("RTC connection state: %s", state) - if state == "connected": - self._connected_event.set() - if self._signaling_callback: - self._signaling_callback("webrtc.connected", {}) - elif state in ("failed", "closed"): - self._connected_event.set() - - def on_signaling(self, callback: SignalingCallback) -> None: - self._signaling_callback = callback - - async def create_offer(self) -> dict[str, Any]: - offer = await self.pc.createOffer() - await self.pc.setLocalDescription(offer) - - await self._gather_ice() - - desc = self.pc.localDescription - return {"type": desc.type, "sdp": desc.sdp} - - async def apply_answer(self, sdp: dict[str, Any]) -> None: - answer = RTCSessionDescription(sdp=sdp["sdp"], type=sdp["type"]) - await self.pc.setRemoteDescription(answer) - - async def add_ice(self, candidate_data: dict[str, Any]) -> None: - candidate_str = candidate_data.get("candidate", "") - if not candidate_str: - return - sdp_mid = candidate_data.get("sdpMid", "0") - sdp_mline = candidate_data.get("sdpMLineIndex", 0) - if candidate_str.lstrip().startswith("{"): - try: - obj = json.loads(candidate_str) - except json.JSONDecodeError: - logger.debug("addIceCandidate: malformed JSON, skipping") - return - candidate_str = obj.get("candidate", "") or "" - sdp_mid = obj.get("sdpMid", sdp_mid) - sdp_mline = obj.get("sdpMLineIndex", sdp_mline) - if not candidate_str: - return - from aiortc.sdp import candidate_from_sdp - sdp = candidate_str - if sdp.startswith("candidate:"): - sdp = sdp[len("candidate:"):] - try: - candidate = candidate_from_sdp(sdp) - except Exception as exc: - logger.debug("addIceCandidate: failed to parse SDP %r: %s", sdp, exc) - return - candidate.sdpMid = sdp_mid - candidate.sdpMLineIndex = sdp_mline - await self.pc.addIceCandidate(candidate) - - async def wait_connected(self, timeout: float = 30.0) -> None: - try: - await asyncio.wait_for(self._connected_event.wait(), timeout=timeout) - except asyncio.TimeoutError: - raise CekiBrowserError("WebRTC connection timed out") - if self.pc.connectionState != "connected": - raise CekiBrowserError(f"WebRTC connection failed: {self.pc.connectionState}") - if self.cmd_channel and self.cmd_channel.readyState != "open": - try: - await asyncio.wait_for(self._cmd_open_event.wait(), timeout=10.0) - except asyncio.TimeoutError: - raise CekiBrowserError("Command DataChannel did not open after RTC connect") - - async def send_command(self, method: str, params: dict[str, Any] | None = None, timeout: float = 30.0) -> Any: - if not self.cmd_channel or self.cmd_channel.readyState != "open": - raise CekiBrowserError("Command DataChannel not open") - - msg_id = self._cmd_next_id - self._cmd_next_id += 1 - - payload: dict[str, Any] = {"jsonrpc": "2.0", "method": method, "id": msg_id} - if params: - payload["params"] = params - - fut: asyncio.Future[Any] = asyncio.get_running_loop().create_future() - self._cmd_pending[msg_id] = fut - self._cmd_send_ts[msg_id] = time.time() * 1000 - - self.cmd_channel.send(json.dumps(payload)) - - try: - return await asyncio.wait_for(fut, timeout=timeout) - except asyncio.TimeoutError: - self._cmd_pending.pop(msg_id, None) - self._cmd_send_ts.pop(msg_id, None) - raise CommandTimeout(f"Command {method} timed out after {timeout}s", code=-1020) - - async def close(self) -> None: - if self._closed: - return - self._closed = True - for fut in self._cmd_pending.values(): - if not fut.done(): - fut.cancel() - self._cmd_pending.clear() - await self.pc.close() - - def _setup_cmd_channel(self, channel: RTCDataChannel) -> None: - @channel.on("open") - def on_open() -> None: - self._cmd_open_event.set() - - if channel.readyState == "open": - self._cmd_open_event.set() - - def _process_chunk(msg: dict[str, Any]) -> None: - chunk_id = msg.get("id") - if chunk_id is None: - return - chunk = msg["chunk"] - i = chunk["i"] - n = chunk["n"] - data_slice = chunk["data"] - if chunk_id not in self._chunk_buf: - self._chunk_buf[chunk_id] = {"n": n, "parts": {}, "received": set()} - buf = self._chunk_buf[chunk_id] - buf["parts"][i] = data_slice - buf["received"].add(i) - if len(buf["received"]) == n: - full = "".join(buf["parts"][j] for j in range(n)) - del self._chunk_buf[chunk_id] - try: - full_msg = json.loads(full) - except (json.JSONDecodeError, TypeError): - return - _dispatch_response(full_msg) - - def _dispatch_response(msg: dict[str, Any]) -> None: - method = msg.get("method") - if method == "bridge.cmd_received": - params = msg.get("params") or {} - cmd_id = params.get("id") - send_ts = self._cmd_send_ts.get(cmd_id) if cmd_id is not None else None - latency = int(params.get("ts", time.time() * 1000) - send_ts) if send_ts is not None else -1 - bridge_probe.info( - "bridge.cmd_received id=%s method=%s latency_send_to_recv=%dms channel_state=%s", - cmd_id, params.get("method"), latency, params.get("channel_state"), - ) - return - if method == "bridge.sw_response_sent": - params = msg.get("params") or {} - cmd_id = params.get("id") - send_ts = self._cmd_send_ts.get(cmd_id) if cmd_id is not None else None - latency = int(time.time() * 1000 - send_ts) if send_ts is not None else -1 - bridge_probe.info( - "bridge.sw_response_sent id=%s latency_offscreen_roundtrip=%dms", - cmd_id, latency, - ) - return - msg_id = msg.get("id") - if msg_id is not None and msg_id in self._cmd_pending: - self._cmd_send_ts.pop(msg_id, None) - fut = self._cmd_pending.pop(msg_id) - if "error" in msg: - err = msg["error"] - fut.set_exception(CekiBrowserError( - err.get("message", "Unknown error"), - code=err.get("code", 0), - )) - else: - fut.set_result(msg.get("result")) - - @channel.on("message") - def on_message(data: str | bytes) -> None: - try: - msg = json.loads(data) - except (json.JSONDecodeError, TypeError): - return - - chunk_meta = msg.get("chunk") - if chunk_meta is not None: - _process_chunk(msg) - return - - _dispatch_response(msg) - - async def _gather_ice(self) -> None: - ice_done = asyncio.Event() - - @self.pc.on("icegatheringstatechange") - def on_gather() -> None: - if self.pc.iceGatheringState == "complete": - ice_done.set() - - if self.pc.iceGatheringState == "complete": - return - await asyncio.wait_for(ice_done.wait(), timeout=10.0) diff --git a/ceki_browser/types.py b/ceki_browser/types.py deleted file mode 100644 index cb10b9d..0000000 --- a/ceki_browser/types.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any - - -@dataclass -class QueryResult: - elements: list[dict[str, str | None]] = field(default_factory=list) - - @property - def text(self) -> str | None: - if self.elements: - return self.elements[0].get("textContent") - return None - - @property - def value(self) -> str | None: - if self.elements: - return self.elements[0].get("value") - return None - - def __len__(self) -> int: - return len(self.elements) - - -@dataclass -class NavigateResult: - url: str = "" - title: str = "" - status: int = 0 - - -@dataclass -class ScreenshotResult: - data: str = "" - width: int = 0 - height: int = 0 - - -@dataclass -class HtmlResult: - html: str = "" - - -@dataclass -class SessionInfo: - request_id: str = "" - session_id: str = "" - status: str = "" - - -@dataclass -class HumanActionResult: - status: str = "" - request_id: str = "" - - -@dataclass -class ChatMessage: - _id: str = "" - topic_id: str = "" - author_id: int = 0 - author_name: str = "" - type: str = "text" - content: str = "" - media: dict[str, Any] | None = None - created_at: str = "" - - -@dataclass -class TypingEvent: - user_id: int = 0 - is_typing: bool = False - - -def parse_chat_message(data: dict[str, Any]) -> ChatMessage: - return ChatMessage( - _id=str(data.get("_id", data.get("message_id", data.get("id", "")))), - topic_id=str(data.get("topic_id", "")), - author_id=int(data.get("author_id", data.get("user_id", 0))), - author_name=str(data.get("author_name", "")), - type=str(data.get("type", "text")), - content=str(data.get("content", "")), - media=data.get("media"), - created_at=str(data.get("created_at", "")), - ) - - -def parse_result(data: Any, cls: type) -> Any: - if data is None: - return cls() - if isinstance(data, dict): - valid_fields = {f.name for f in cls.__dataclass_fields__.values()} # type: ignore[attr-defined] - return cls(**{k: v for k, v in data.items() if k in valid_fields}) - return cls() diff --git a/examples/SMOKE.md b/examples/SMOKE.md new file mode 100644 index 0000000..27d4281 --- /dev/null +++ b/examples/SMOKE.md @@ -0,0 +1,61 @@ +# Real-signup Smoke + +Это не unit-тест. Это пошаговая ручная проверка SDK + relay + plugin против реального сервиса (Reddit / GitHub). + +## Pre-requisites + +1. Provider онлайн с известным `SCHEDULE_ID` на dev relay (`wss://relay.ittribe.org/ws/agent`) +2. Agent токен с ability `browser:relay` от dev backend +3. IMAP-доступ к `kom@ceki.me` (plus-addressing) +4. Chrome с установленным расширением `ceki-browser-extension` (dev build) на стороне провайдера +5. Provider положительный баланс (`agent:deposit` сделан) + +## Env + +``` +export CEKI_API_KEY=1| +export CEKI_RELAY_URL=wss://relay.ittribe.org/ws/agent +export CEKI_ENV=dev +export SCHEDULE_ID=42 +export IMAP_HOST=mail.ceki.me +export IMAP_USER=kom@ceki.me +export IMAP_PASS= +``` + +## Reddit + +```bash +EMAIL_TAG=browserlend-reddit-$(date +%s) python examples/reddit_signup.py +``` + +Чек-лист на провайдере: +- [ ] Side-panel чата открылся +- [ ] Пришёл скриншот капчи +- [ ] Провайдер вписал ответ — агент его получил +- [ ] Сессия не упала по heartbeat (-1011) +- [ ] Биллинг тикает (см. логи backend, agent_wallet -) + +Чек-лист на агенте: +- [ ] connect успешен (handshake без 401) +- [ ] rent вернул match с chat_topic_id +- [ ] Page.navigate отрабатывает, loadEventFired ловится +- [ ] confirm-link получен из IMAP < 2 мин после сабмита +- [ ] Финальная страница «Email verified» + +## GitHub + +```bash +EMAIL_TAG=browserlend-github-$(date +%s) python examples/github_signup.py +``` + +## Известные риски + +- Reddit Cloudflare: возможен soft-block если IP сильно подозрительный → провайдер должен иметь чистый residential IP +- GitHub puzzle: 2-3 раунда — провайдеру может надоесть, ставить таймауты и логировать +- IMAP rate limit: poll каждые 5с, не чаще + +## Если smoke упал + +- НЕ переключаться на example.com / synthetic (см. feedback_no_synthetic_smokes) +- Копать root cause: relay logs (`docker logs browser-relay`), backend logs, plugin chat-logger в Mongo +- Открыть issue с repro и логами diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/captcha_helper.py b/examples/captcha_helper.py new file mode 100644 index 0000000..e998c2f --- /dev/null +++ b/examples/captcha_helper.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import asyncio +import base64 +import os + +from ceki_browser import connect + + +async def main() -> None: + client = await connect(os.environ["CEKI_API_KEY"]) + browser = await client.rent(int(os.environ["SCHEDULE_ID"])) + + provider_replied = asyncio.Event() + provider_text: dict[str, str] = {} + + async def on_msg(msg) -> None: + if msg.is_system(): + return + if msg.is_from_provider(browser.provider_user_id): + provider_text["value"] = msg.text or "" + provider_replied.set() + + browser.chat.on_message(on_msg) + + await browser.send({"method": "Page.navigate", "params": {"url": "https://reddit.com/register"}}) + shot = await browser.send({"method": "Page.captureScreenshot"}) + + png = base64.b64decode(shot["data"]) + await browser.chat.send_image(png) + await browser.chat.send("Please solve the captcha, return text only") + + await asyncio.wait_for(provider_replied.wait(), timeout=120) + print("Provider:", provider_text["value"]) + + await browser.close() + await client.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/github_signup.py b/examples/github_signup.py new file mode 100644 index 0000000..1b324af --- /dev/null +++ b/examples/github_signup.py @@ -0,0 +1,203 @@ +""" +GitHub signup via ceki-browser SDK. + +Run: + CEKI_API_KEY=... \\ + CEKI_RELAY_URL=wss://relay.ittribe.org/ws/agent \\ + IMAP_HOST=mail.ceki.me IMAP_USER=kom@ceki.me IMAP_PASS=... \\ + EMAIL_TAG=browserlend2 \\ + python examples/github_signup.py + +Discovers an online provider via `client.search()` and rents the first one. +Optional `SCHEDULE_ID=N` env pins a specific provider (skip discovery). +""" +from __future__ import annotations + +import asyncio +import base64 +import os +import secrets +import string + +from ceki_browser import connect +from ceki_browser._connect import ConnectOptions + +from .imap_helper import wait_for_confirm_link + + +def _random_password(length: int = 16) -> str: + alphabet = string.ascii_letters + string.digits + "!@#$" + return "".join(secrets.choice(alphabet) for _ in range(length)) + + +async def main() -> None: + api_key = os.environ["CEKI_API_KEY"] + relay_url = os.environ.get("CEKI_RELAY_URL", "wss://relay.ittribe.org/ws/agent") + pinned_schedule_id = os.environ.get("SCHEDULE_ID") + email_tag = os.environ.get("EMAIL_TAG", f"browserlend-{secrets.token_hex(4)}") + email_base = os.environ.get("EMAIL_BASE", "kom@ceki.me") + local, _, domain = email_base.partition("@") + + email_addr = f"{local}+{email_tag}@{domain}" + username = f"tribe-{secrets.token_hex(4)}" + password = _random_password() + + print(f"[github_signup] email={email_addr} username={username}") + + client = await connect(api_key, ConnectOptions(relay_url=relay_url)) + + if pinned_schedule_id is not None: + schedule_id = int(pinned_schedule_id) + print(f"[search] using pinned SCHEDULE_ID={schedule_id}") + else: + options = await client.search({}) + if not options: + print("[search] no online providers — try later") + await client.close() + return + schedule_id = options[0].schedule_id + print(f"[search] found {len(options)} provider(s), renting schedule_id={schedule_id}") + + browser = await client.rent(schedule_id) + print(f"[session] id={browser.session_id} chat_topic_id={browser.chat_topic_id}") + print(f"[session] browser_info={browser.browser_info}") + + provider_replies: asyncio.Queue[str] = asyncio.Queue() + + async def on_chat(msg) -> None: + if msg.is_system(): + return + if msg.is_from_provider(browser.provider_user_id) and msg.text: + await provider_replies.put(msg.text) + + browser.chat.on_message(on_chat) + + async def on_tab(url: str) -> None: + print(f"[tab_opened] {url} — switching") + await browser.switch_tab() + + browser.on_tab_opened(on_tab) + + load_fired = asyncio.Event() + + async def on_event(method: str, params: dict) -> None: + if method == "Page.loadEventFired": + load_fired.set() + + browser.on_event(on_event) + + load_fired.clear() + await browser.send({"method": "Page.navigate", "params": {"url": "https://github.com/signup"}}) + await asyncio.wait_for(load_fired.wait(), timeout=30) + print("[nav] signup page loaded") + + async def type_into(selector: str, value: str) -> None: + await browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": f""" + (function() {{ + var el = document.querySelector({repr(selector)}); + if (!el) return false; + el.focus(); + var nativeInputValueSetter = Object.getOwnPropertyDescriptor( + window.HTMLInputElement.prototype, 'value').set; + nativeInputValueSetter.call(el, {repr(value)}); + el.dispatchEvent(new Event('input', {{ bubbles: true }})); + el.dispatchEvent(new Event('change', {{ bubbles: true }})); + return true; + }})() + """ + }, + }) + + await asyncio.sleep(1) + await type_into("#email", email_addr) + await asyncio.sleep(0.5) + + await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": "document.querySelector('button[type=submit]')?.click()"}, + }) + await asyncio.sleep(1) + + await type_into("#password", password) + await asyncio.sleep(0.3) + + await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": "document.querySelector('button[type=submit]')?.click()"}, + }) + await asyncio.sleep(1) + + await type_into("#login", username) + await asyncio.sleep(0.3) + + await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": "document.querySelector('button[type=submit]')?.click()"}, + }) + await asyncio.sleep(2) + + max_captcha_rounds = 3 + for round_num in range(1, max_captcha_rounds + 1): + captcha_res = await browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": """ + !!(document.querySelector('[data-hcaptcha-widget-id]') || + document.querySelector('.captcha-container') || + document.querySelector('iframe[title*="captcha" i]')) + """ + }, + }) + if not captcha_res.get("result", {}).get("value"): + print(f"[captcha] round {round_num}: no captcha detected, proceeding") + break + + print(f"[captcha] round {round_num}: detected — sending screenshot to provider") + shot = await browser.send({"method": "Page.captureScreenshot"}) + png = base64.b64decode(shot["data"]) + await browser.chat.send_image(png) + await browser.chat.send( + f"GitHub captcha puzzle (round {round_num}). " + "Please solve it and reply with the answer or describe what to click." + ) + print(f"[captcha] round {round_num}: waiting for provider (up to 300s)...") + answer = await asyncio.wait_for(provider_replies.get(), timeout=300) + print(f"[captcha] round {round_num}: provider answered: {answer!r}") + + await browser.send({ + "method": "Input.insertText", + "params": {"text": answer}, + }) + await asyncio.sleep(1) + await browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": "document.querySelector('button[type=submit]')?.click()" + }, + }) + await asyncio.sleep(2) + + print(f"[imap] waiting for confirm email to {email_addr}...") + confirm_url = await wait_for_confirm_link(email_tag, timeout=120, service="github") + print(f"[imap] got confirm link: {confirm_url}") + + load_fired.clear() + await browser.send({"method": "Page.navigate", "params": {"url": confirm_url}}) + await asyncio.wait_for(load_fired.wait(), timeout=30) + + title_res = await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": "document.title"}, + }) + print(f"[confirm] page title: {title_res.get('result', {}).get('value', '')}") + print(f"✅ GitHub account created: {username} / {email_addr}") + + await browser.close() + await client.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/hello.py b/examples/hello.py new file mode 100644 index 0000000..c467dd4 --- /dev/null +++ b/examples/hello.py @@ -0,0 +1,22 @@ +import asyncio +import os + +from ceki_browser import connect + + +async def main() -> None: + api_key = os.environ["CEKI_API_KEY"] + client = await connect(api_key) + print("Connected to relay") + + options = await client.search({"geo": "US"}, limit=5) + print(f"Found {len(options)} browser(s)") + for opt in options: + print(f" schedule_id={opt.schedule_id} geo={opt.geo} price={opt.price_per_min}/min") + + await client.close() + print("Done") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/imap_helper.py b/examples/imap_helper.py new file mode 100644 index 0000000..242cd27 --- /dev/null +++ b/examples/imap_helper.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import asyncio +import email +import imaplib +import os +import re + +CONFIRM_PATTERNS = { + "reddit": re.compile(r"https://www\.reddit\.com/account/verify-email/[A-Za-z0-9_\-]+"), + "github": re.compile( + r"https://github\.com/users/[A-Za-z0-9_\-]+/email/verify\?[^\"\s]+" + ), +} + + +def _extract_body(msg: email.message.Message) -> str: + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() in ("text/html", "text/plain"): + charset = part.get_content_charset() or "utf-8" + payload = part.get_payload(decode=True) + if payload: + return payload.decode(charset, errors="replace") + return "" + payload = msg.get_payload(decode=True) + if payload: + charset = msg.get_content_charset() or "utf-8" + return payload.decode(charset, errors="replace") + return "" + + +def _check_imap(tag: str, service: str) -> str | None: + pattern = CONFIRM_PATTERNS[service] + email_base = os.environ.get("EMAIL_BASE", "kom@ceki.me") + local, _, domain = email_base.partition("@") + with imaplib.IMAP4_SSL(os.environ["IMAP_HOST"]) as m: + m.login(os.environ["IMAP_USER"], os.environ["IMAP_PASS"]) + m.select("INBOX") + typ, data = m.search(None, f'TO "{local}+{tag}@{domain}"') + if typ != "OK" or not data[0]: + return None + for msg_id in reversed(data[0].split()): + typ2, msg_data = m.fetch(msg_id, "(RFC822)") + if typ2 != "OK": + continue + raw = msg_data[0][1] # type: ignore[index] + msg = email.message_from_bytes(raw) + body = _extract_body(msg) + match = pattern.search(body) + if match: + return match.group(0) + return None + + +async def wait_for_confirm_link( + tag: str, + *, + timeout: float = 120, + service: str = "reddit", + poll_interval: float = 5, +) -> str: + deadline = asyncio.get_event_loop().time() + timeout + while asyncio.get_event_loop().time() < deadline: + link = await asyncio.to_thread(_check_imap, tag, service) + if link: + return link + await asyncio.sleep(poll_interval) + raise TimeoutError(f"No confirm link for tag={tag} within {timeout}s") diff --git a/examples/mvp_smoke_p2p.py b/examples/mvp_smoke_p2p.py deleted file mode 100644 index fc0ba67..0000000 --- a/examples/mvp_smoke_p2p.py +++ /dev/null @@ -1,274 +0,0 @@ -""" -P2P-MVP Integration Smoke Test. - -Requires: - - browser-relay running (signaling only) - - coturn TURN server - - Chrome extension loaded in a provider's browser, provider online - - Agent API key from dashboard (Sanctum token) - -Environment variables: - CEKI_TOKEN — agent Sanctum API token (required) - RELAY_URL — relay WebSocket URL (required) - -Usage: - export CEKI_TOKEN="123|abcdef..." - export RELAY_URL="wss://browser.ittribe.org/ws/agent" - python examples/mvp_smoke_p2p.py -""" -import asyncio -import base64 -import hashlib -import logging -import os -import sys -import time - -from ceki_browser import Browser -from ceki_browser.errors import CekiBrowserError -from ceki_browser.types import ChatMessage - -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s %(levelname)s %(name)s: %(message)s", - handlers=[ - logging.StreamHandler(sys.stdout), - logging.FileHandler("examples/mvp_smoke_p2p.log", mode="w"), - ], -) -log = logging.getLogger("mvp_smoke_p2p") - -RELAY_URL = os.environ.get("RELAY_URL", "") -TOKEN = os.environ.get("CEKI_TOKEN", "") - -STEPS_PASSED: list[str] = [] -STEPS_FAILED: list[str] = [] - - -def step_ok(name: str, detail: str = ""): - msg = f"PASS {name}" + (f" — {detail}" if detail else "") - log.info(msg) - STEPS_PASSED.append(name) - - -def step_fail(name: str, detail: str = ""): - msg = f"FAIL {name}" + (f" — {detail}" if detail else "") - log.error(msg) - STEPS_FAILED.append(name) - - -def make_test_png(size_bytes: int = 200 * 1024) -> bytes: - """Generate a minimal valid PNG-like payload for testing.""" - header = b"\x89PNG\r\n\x1a\n" - padding = os.urandom(size_bytes - len(header)) - return header + padding - - -async def main() -> int: - if not TOKEN: - log.error("CEKI_TOKEN env var is required") - return 1 - if not RELAY_URL: - log.error("RELAY_URL env var is required") - return 1 - - log.info("=" * 60) - log.info("P2P-MVP SMOKE TEST START") - log.info(f"relay: {RELAY_URL}") - log.info("=" * 60) - - t0 = time.time() - - received_messages: list[ChatMessage] = [] - - # --- Step 1: Connect to relay --- - try: - br = Browser(token=TOKEN, relay_url=RELAY_URL) - info = await br.connect() - step_ok("connect", f"agent_id={br.agent_id}") - except Exception as e: - step_fail("connect", str(e)) - return 1 - - try: - # --- Step 2: Create session (waits for provider match + RTC handshake) --- - log.info("Requesting incognito session... (waiting for provider match + P2P, timeout=120s)") - try: - session = await br.session( - mode="incognito", - geo="", - max_price_per_min=0.10, - estimated_duration_min=5, - wait_timeout=120.0, - ) - step_ok("session_matched", f"session_id={session.session_id}") - except Exception as e: - step_fail("session_matched", str(e)) - await br.close() - return 1 - - try: - # --- Step 3: Verify P2P connection --- - rtc = session.rtc - if rtc and rtc.pc.connectionState == "connected": - step_ok("rtc_connected", f"connectionState={rtc.pc.connectionState}") - else: - state = rtc.pc.connectionState if rtc else "no_rtc" - step_fail("rtc_connected", f"connectionState={state}") - - # --- Step 4: Chat available --- - try: - chat = session.chat - step_ok("chat_available", "relay chat API ready") - except Exception as e: - step_fail("chat_available", str(e)) - - # Register chat listener - session.chat.on_message(received_messages.append) - - # --- Step 5: Send chat text --- - try: - await session.chat.send("Привет, начинаю P2P-MVP smoke test.") - step_ok("chat_send_text", "sent via relay") - except Exception as e: - step_fail("chat_send_text", str(e)) - - # --- Step 6: Navigate via ceki-cmd DataChannel --- - try: - nav = await session.navigate("https://github.com") - step_ok("navigate", f"url={nav.url}") - except Exception as e: - step_fail("navigate", str(e)) - - # --- Step 7: Query DOM --- - try: - result = await session.query("a") - step_ok("query_dom", f"text={result.text!r}") - except Exception as e: - step_fail("query_dom", str(e)) - - # --- Step 8: Click --- - try: - await session.click("a") - step_ok("click", "a") - except Exception as e: - step_fail("click", str(e)) - - # --- Step 9: Type into search --- - try: - await session.type("input[name='q']", "hello") - step_ok("type", "input[name='q'] 'hello'") - except CekiBrowserError: - step_ok("type_skipped", "no matching input on page") - except Exception as e: - step_fail("type", str(e)) - - # --- Step 10: Screenshot --- - try: - shot = await session.screenshot(format="png") - size_kb = len(base64.b64decode(shot.data)) / 1024 if shot.data else 0 - step_ok("screenshot", f"{shot.width}x{shot.height} {size_kb:.0f}KB") - except Exception as e: - step_fail("screenshot", str(e)) - - # --- Step 11: Send image via relay chat --- - test_png = make_test_png(200 * 1024) - test_png_sha256 = hashlib.sha256(test_png).hexdigest() - log.info(f"Test image: {len(test_png)} bytes, sha256={test_png_sha256[:16]}...") - try: - await session.chat.send_image(test_png, "image/png") - step_ok("chat_send_image", f"{len(test_png)} bytes sent via relay") - except Exception as e: - step_fail("chat_send_image", str(e)) - - # --- Step 12: Wait for provider chat response --- - log.info("Waiting up to 30s for provider chat responses...") - deadline = time.time() + 30 - while time.time() < deadline: - if len(received_messages) >= 1: - break - await asyncio.sleep(0.5) - - if received_messages: - step_ok( - "chat_recv_message", - f"got {len(received_messages)} msg(s), first: {received_messages[0].content[:50]!r}", - ) - else: - step_fail( - "chat_recv_message", - "no messages from provider (manual provider response required)", - ) - - # --- Step 13: Check chat history --- - history = await session.chat.history() - log.info(f"Chat history: {len(history)} messages") - if len(history) >= 1: - step_ok("chat_history", f"{len(history)} messages") - else: - step_ok( - "chat_history_partial", - f"{len(history)} messages (provider response may be missing)", - ) - - # --- Step 14: Second chat text --- - try: - await session.chat.send("Smoke test завершается. Все команды отработали.") - step_ok("chat_send_text_2", "sent") - except Exception as e: - step_fail("chat_send_text_2", str(e)) - - # --- Step 15: End session --- - try: - await session.end(reason="completed") - step_ok("session_end", "reason=completed") - except Exception as e: - step_fail("session_end", str(e)) - - # Verify RTC closed - if rtc: - log.info(f"RTC state after end: {rtc.pc.connectionState}") - log.info("Session ended, chat closed") - - except Exception as e: - log.error(f"Unexpected error during session: {e}", exc_info=True) - try: - await session.end(reason="error") - except Exception: - pass - - finally: - await br.close() - - elapsed = time.time() - t0 - - # --- Summary --- - log.info("=" * 60) - log.info("P2P-MVP SMOKE TEST SUMMARY") - log.info(f"Elapsed: {elapsed:.1f}s") - log.info(f"Passed: {len(STEPS_PASSED)}/{len(STEPS_PASSED) + len(STEPS_FAILED)}") - for s in STEPS_PASSED: - log.info(f" ✓ {s}") - for s in STEPS_FAILED: - log.error(f" ✗ {s}") - - critical_steps = { - "connect", "session_matched", "rtc_connected", "navigate", - "query_dom", "screenshot", "chat_send_text", "chat_send_image", - "session_end", - } - critical_fails = [s for s in STEPS_FAILED if s in critical_steps] - - if critical_fails: - log.error(f"STATUS: FAIL (critical: {', '.join(critical_fails)})") - return 1 - elif STEPS_FAILED: - log.warning(f"STATUS: PARTIAL PASS ({len(STEPS_FAILED)} non-critical failures)") - return 0 - else: - log.info("STATUS: PASS") - return 0 - - -if __name__ == "__main__": - sys.exit(asyncio.run(main())) diff --git a/examples/navigate.py b/examples/navigate.py new file mode 100644 index 0000000..4194442 --- /dev/null +++ b/examples/navigate.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import asyncio +import os + +from ceki_browser import connect + + +async def main() -> None: + client = await connect(os.environ["CEKI_API_KEY"]) + browsers = await client.search(limit=5) + if not browsers: + print("no browsers available") + await client.close() + return + + browser = await client.rent(browsers[0].schedule_id) + + load_fired = asyncio.Event() + + async def on_event(method: str, params: dict) -> None: + if method == "Page.loadEventFired": + load_fired.set() + + browser.on_event(on_event) + + await browser.send({"method": "Page.enable"}) + await browser.send({"method": "Page.navigate", "params": {"url": "https://example.com"}}) + + await asyncio.wait_for(load_fired.wait(), timeout=30) + print("page loaded") + + shot = await browser.send({"method": "Page.captureScreenshot"}) + print(f"screenshot data length: {len(shot.get('data', ''))}") + + await browser.close() + await client.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/reddit_signup.py b/examples/reddit_signup.py new file mode 100644 index 0000000..b6b5730 --- /dev/null +++ b/examples/reddit_signup.py @@ -0,0 +1,205 @@ +""" +Reddit signup via ceki-browser SDK. + +Run: + CEKI_API_KEY=... \\ + CEKI_RELAY_URL=wss://relay.ittribe.org/ws/agent \\ + IMAP_HOST=mail.ceki.me IMAP_USER=kom@ceki.me IMAP_PASS=... \\ + EMAIL_TAG=browserlend1 \\ + python examples/reddit_signup.py + +Discovers an online provider via `client.search()` and rents the first one. +Optional `SCHEDULE_ID=N` env pins a specific provider (skip discovery). +""" +from __future__ import annotations + +import asyncio +import base64 +import os +import secrets +import string + +from ceki_browser import connect +from ceki_browser._connect import ConnectOptions + +from .imap_helper import wait_for_confirm_link + + +def _random_password(length: int = 16) -> str: + alphabet = string.ascii_letters + string.digits + "!@#$" + return "".join(secrets.choice(alphabet) for _ in range(length)) + + +async def main() -> None: + api_key = os.environ["CEKI_API_KEY"] + relay_url = os.environ.get("CEKI_RELAY_URL", "wss://relay.ittribe.org/ws/agent") + pinned_schedule_id = os.environ.get("SCHEDULE_ID") + email_tag = os.environ.get("EMAIL_TAG", f"browserlend-{secrets.token_hex(4)}") + + email_addr = f"kom+{email_tag}@ceki.me" + username = f"tribe_{secrets.token_hex(4)}" + password = _random_password() + + print(f"[reddit_signup] email={email_addr} username={username}") + + client = await connect(api_key, ConnectOptions(relay_url=relay_url)) + + if pinned_schedule_id is not None: + schedule_id = int(pinned_schedule_id) + print(f"[search] using pinned SCHEDULE_ID={schedule_id}") + else: + options = await client.search({}) + if not options: + print("[search] no online providers — try later") + await client.close() + return + schedule_id = options[0].schedule_id + print(f"[search] found {len(options)} provider(s), renting schedule_id={schedule_id}") + + browser = await client.rent(schedule_id) + print(f"[session] id={browser.session_id} chat_topic_id={browser.chat_topic_id}") + print(f"[session] browser_info={browser.browser_info}") + + provider_replies: asyncio.Queue[str] = asyncio.Queue() + + async def on_chat(msg) -> None: + if msg.is_system(): + return + if msg.is_from_provider(browser.provider_user_id) and msg.text: + await provider_replies.put(msg.text) + + browser.chat.on_message(on_chat) + + async def on_tab(url: str) -> None: + print(f"[tab_opened] {url} — switching") + await browser.switch_tab() + + browser.on_tab_opened(on_tab) + + load_fired = asyncio.Event() + frame_navigated = asyncio.Event() + + async def on_event(method: str, params: dict) -> None: + if method == "Page.loadEventFired": + load_fired.set() + elif method == "Page.frameNavigated": + frame_navigated.set() + + browser.on_event(on_event) + + await browser.send({"method": "Page.enable"}) + await browser.send({"method": "Network.enable"}) + + load_fired.clear() + await browser.send({ + "method": "Page.navigate", + "params": {"url": "https://www.reddit.com/register"}, + }) + await asyncio.wait_for(load_fired.wait(), timeout=30) + print("[nav] register page loaded") + + async def fill_field(selector: str, value: str) -> None: + await browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": f""" + (function() {{ + var el = document.querySelector({repr(selector)}); + if (!el) return false; + var nativeInputValueSetter = Object.getOwnPropertyDescriptor( + window.HTMLInputElement.prototype, 'value').set; + nativeInputValueSetter.call(el, {repr(value)}); + el.dispatchEvent(new Event('input', {{ bubbles: true }})); + el.dispatchEvent(new Event('change', {{ bubbles: true }})); + return true; + }})() + """ + }, + }) + + await asyncio.sleep(1) + await fill_field('input[name="email"]', email_addr) + await asyncio.sleep(0.3) + await fill_field('input[name="username"]', username) + await asyncio.sleep(0.3) + await fill_field('input[name="password"]', password) + await asyncio.sleep(0.5) + + await browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": """ + (function() { + var btn = document.querySelector('button[type="submit"]'); + if (btn) { btn.click(); return true; } + return false; + })() + """ + }, + }) + print("[form] submitted") + + await asyncio.sleep(3) + + captcha_detected = await browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": """ + !!(document.querySelector('iframe[src*="captcha"]') || + document.querySelector('[data-testid="captcha"]') || + document.title.toLowerCase().includes('captcha')) + """ + }, + }) + + if captcha_detected.get("result", {}).get("value"): + print("[captcha] detected — sending screenshot to provider") + shot = await browser.send({"method": "Page.captureScreenshot"}) + png = base64.b64decode(shot["data"]) + await browser.chat.send_image(png) + await browser.chat.send( + "Please solve the captcha visible on screen and reply with the answer text" + ) + print("[captcha] waiting for provider answer (up to 300s)...") + answer = await asyncio.wait_for(provider_replies.get(), timeout=300) + print(f"[captcha] provider answered: {answer!r}") + + await browser.send({ + "method": "Input.insertText", + "params": {"text": answer}, + }) + await asyncio.sleep(1) + await browser.send({ + "method": "Runtime.evaluate", + "params": { + "expression": """ + (function() { + var btn = document.querySelector('button[type="submit"]'); + if (btn) { btn.click(); return true; } + return false; + })() + """ + }, + }) + + print(f"[imap] waiting for confirm email to {email_addr}...") + confirm_url = await wait_for_confirm_link(email_tag, timeout=120, service="reddit") + print(f"[imap] got confirm link: {confirm_url}") + + load_fired.clear() + await browser.send({"method": "Page.navigate", "params": {"url": confirm_url}}) + await asyncio.wait_for(load_fired.wait(), timeout=30) + + title_res = await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": "document.title"}, + }) + print(f"[confirm] page title: {title_res.get('result', {}).get('value', '')}") + print(f"✅ Reddit account created: {username} / {email_addr}") + + await browser.close() + await client.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/smoke/README.md b/examples/smoke/README.md new file mode 100644 index 0000000..a4a0063 --- /dev/null +++ b/examples/smoke/README.md @@ -0,0 +1,74 @@ +# Integration Smoke Tests — SDK 2.2.0+ + +Lifecycle integration tests against a live provider. Not for CI — requires a real browser rental environment. + +## Prerequisites + +- Provider online with known `SCHEDULE_ID` on dev relay +- Agent token with `browser:relay` ability and positive balance +- `ceki-browser` SDK installed (`pip install -e .` from repo root) +- Extension v0.6.74+ on provider Chrome + +## Environment Variables + +```bash +export CEKI_TOKEN="385|" +export CEKI_API_URL="https://clawapi.ittribe.org" # default +export CEKI_RELAY_URL="wss://browser.ittribe.org/ws/agent" # default +export SCHEDULE_ID=240 # default +``` + +Optional (scenario H only): +```bash +export CEKI_TOKEN_NO_FUNDS="" +``` + +## Scenarios + +### Automatic (no manual intervention) + +| ID | Name | What it tests | +|----|------|--------------| +| A | Happy path | connect → rent → Page.navigate → title check → screenshot → close | +| B | Auto-accept | Same as A (requires provider auto-accept enabled) | +| D | Offer timeout | Rent with nonexistent schedule — expects ProviderOffline/CekiError | +| H | Insufficient funds | Rent with zero-balance token — expects InsufficientFunds (needs CEKI_TOKEN_NO_FUNDS) | +| I | 10 sequential commands | 10x Runtime.evaluate in sequence | +| J | Long navigation | Page.navigate to httpbin.org/delay/5, wait for loadEventFired | + +### Manual (require provider-side action) + +| ID | Name | Instructions | +|----|------|-------------| +| C | Decline offer | Decline the offer in provider plugin when prompted | +| E | Chrome crash | Kill Chrome on provider machine during active session | +| F | Network drop | Disconnect provider network for ~30s, then reconnect | +| G | Kill session | Press Kill/Stop in provider plugin during active session | + +### Obsolete + +| ID | Name | Reason | +|----|------|--------| +| K | No incognito mode | `mode` parameter removed from public API in SDK 2.0+ | + +## Usage + +```bash +# Single scenario +python examples/smoke/mvp_smoke_v2.py --scenario A + +# Multiple scenarios +python examples/smoke/mvp_smoke_v2.py --scenario A,I,J + +# All automatic scenarios (manual are skipped) +python examples/smoke/mvp_smoke_v2.py --scenario all + +# Manual scenario (run individually) +python examples/smoke/mvp_smoke_v2.py --scenario C +``` + +## Known Risks + +- GitHub may show Cloudflare challenge or login wall — title check may fail. Not a SDK bug. +- httpbin.org may be slow or down — scenario J timeout is not a SDK issue. +- Provider must have auto-accept enabled for scenarios A/B to work without manual intervention. diff --git a/examples/smoke/__init__.py b/examples/smoke/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/smoke/mvp_smoke_v2.py b/examples/smoke/mvp_smoke_v2.py new file mode 100644 index 0000000..1708cfa --- /dev/null +++ b/examples/smoke/mvp_smoke_v2.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +"""Integration smoke tests for ceki-browser SDK 2.2.0+. + +Scenarios A–K exercising the full lifecycle through the public API. + +Usage: + python examples/smoke/mvp_smoke_v2.py --scenario A + python examples/smoke/mvp_smoke_v2.py --scenario all + python examples/smoke/mvp_smoke_v2.py --scenario A,I,J + +Environment variables: + CEKI_TOKEN Sanctum token (e.g. 385|xxx) + CEKI_API_URL REST API base (default: https://clawapi.ittribe.org) + CEKI_RELAY_URL WS relay (default: wss://browser.ittribe.org/ws/agent) + SCHEDULE_ID Provider schedule id (default: 240) +""" +from __future__ import annotations + +import argparse +import asyncio +import base64 +import os +import sys +import time +import traceback +from typing import Any + +# Ensure the SDK is importable when running from repo root +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) + +from ceki_browser import ( + CekiError, + Client, + ConnectOptions, + InsufficientFunds, + ProviderDisconnected, + SessionEnded, + connect, +) +from ceki_browser._exceptions import ProviderOffline + +# ── Config ────────────────────────────────────────────────────────────────── + +TOKEN = os.environ.get("CEKI_TOKEN", "") +API_URL = os.environ.get("CEKI_API_URL", "https://clawapi.ittribe.org") +RELAY_URL = os.environ.get("CEKI_RELAY_URL", "wss://browser.ittribe.org/ws/agent") +SCHEDULE_ID = int(os.environ.get("SCHEDULE_ID", "240")) + +# ── Step reporting ────────────────────────────────────────────────────────── + +_steps: list[tuple[str, bool, str]] = [] + + +def step_ok(name: str, detail: str = "") -> None: + _steps.append((name, True, detail)) + tag = f" [{name}]" + print(f"\033[32m PASS {tag}\033[0m {detail}") + + +def step_fail(name: str, detail: str = "") -> None: + _steps.append((name, False, detail)) + tag = f" [{name}]" + print(f"\033[31m FAIL {tag}\033[0m {detail}") + + +def summary() -> int: + total = len(_steps) + passed = sum(1 for _, ok, _ in _steps if ok) + failed = total - passed + print() + if failed == 0: + print(f"\033[32m STATUS: PASS ({passed}/{total} steps)\033[0m") + else: + print(f"\033[31m STATUS: FAIL ({failed} failed, {passed} passed / {total})\033[0m") + return 0 if failed == 0 else 1 + + +def reset_steps() -> None: + _steps.clear() + + +# ── Helpers ───────────────────────────────────────────────────────────────── + +async def make_client() -> Client: + if not TOKEN: + raise RuntimeError("CEKI_TOKEN not set") + return await connect(TOKEN, ConnectOptions( + api_url=API_URL, + relay_url=RELAY_URL, + )) + + +async def wait_event(browser: Any, method: str, timeout: float = 15.0) -> dict: + """Wait for a specific CDP event, returns its params.""" + fut: asyncio.Future[dict] = asyncio.get_event_loop().create_future() + + async def handler(m: str, params: dict) -> None: + if m == method and not fut.done(): + fut.set_result(params) + + browser.on_event(handler) + return await asyncio.wait_for(fut, timeout=timeout) + + +# ── Scenario A: Happy path ───────────────────────────────────────────────── + +async def scenario_a() -> int: + print("\n=== Scenario A: Happy path ===") + reset_steps() + client = await make_client() + try: + step_ok("connect", f"relay={RELAY_URL}") + + browser = await client.rent(SCHEDULE_ID) + step_ok("rent", f"session={browser.session_id}") + + # First navigate creates the incognito window on the provider side; + # CDP domains like Page.enable only work after the window exists. + await browser.send({"method": "Page.navigate", "params": {"url": "about:blank"}}) + step_ok("window_init", "Page.navigate about:blank (creates window)") + + await browser.send({"method": "Page.enable"}) + step_ok("Page.enable") + + load_fut = asyncio.ensure_future(wait_event(browser, "Page.loadEventFired", timeout=20)) + await browser.send({"method": "Page.navigate", "params": {"url": "https://github.com"}}) + step_ok("Page.navigate", "url=https://github.com") + + await load_fut + step_ok("Page.loadEventFired") + + title_resp = await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": "document.title"}, + }) + title = title_resp.get("result", {}).get("value", "") + if "GitHub" in title or "github" in title.lower(): + step_ok("title_check", f"title={title!r}") + else: + step_fail("title_check", f"expected 'GitHub' in title, got {title!r}") + + screenshot_resp = await browser.send({"method": "Page.captureScreenshot"}) + data = screenshot_resp.get("data", "") + img_bytes = len(base64.b64decode(data)) if data else 0 + if img_bytes > 10_000: + step_ok("screenshot", f"{img_bytes} bytes") + else: + step_fail("screenshot", f"too small: {img_bytes} bytes") + + await browser.close() + step_ok("close") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario B: Auto-accept (same as A) ──────────────────────────────────── + +async def scenario_b() -> int: + print("\n=== Scenario B: Auto-accept (same as A, requires provider auto-accept) ===") + return await scenario_a() + + +# ── Scenario C: Decline offer (manual) ───────────────────────────────────── + +async def scenario_c() -> int: + print("\n=== Scenario C: Decline offer (MANUAL — decline in provider plugin) ===") + reset_steps() + client = await make_client() + try: + step_ok("connect") + print(" >>> Decline the offer in the provider plugin within 30s <<<") + try: + await client.rent(SCHEDULE_ID) + step_fail("rent", "expected exception on decline, got Browser") + except CekiError as exc: + step_ok("rent_declined", f"{type(exc).__name__}: {exc}") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario D: Offer timeout / offline provider ─────────────────────────── + +async def scenario_d() -> int: + print("\n=== Scenario D: Offer timeout (offline/nonexistent provider) ===") + reset_steps() + client = await make_client() + try: + step_ok("connect") + fake_schedule = 999999 + try: + await client.rent(fake_schedule) + step_fail("rent", "expected error for nonexistent schedule, got Browser") + except (ProviderOffline, ProviderDisconnected, CekiError) as exc: + step_ok("rent_error", f"{type(exc).__name__}: {exc}") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario E: Chrome crash (manual) ────────────────────────────────────── + +async def scenario_e() -> int: + print("\n=== Scenario E: Chrome crash (MANUAL — kill Chrome on provider side) ===") + reset_steps() + client = await make_client() + try: + step_ok("connect") + browser = await client.rent(SCHEDULE_ID) + step_ok("rent", f"session={browser.session_id}") + await browser.send({"method": "Page.navigate", "params": {"url": "https://example.com"}}) + await browser.send({"method": "Page.enable"}) + step_ok("navigate") + print(" >>> Kill Chrome on provider side NOW, then wait <<<") + try: + reason = await browser.wait_until_ended() + step_ok("session_ended", f"reason={reason}") + except ProviderDisconnected: + step_ok("provider_disconnected") + except SessionEnded as exc: + step_ok("session_ended", f"reason={exc.reason}") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario F: Network drop (manual) ───────────────────────────────────── + +async def scenario_f() -> int: + print("\n=== Scenario F: Network drop (MANUAL — disconnect provider for 30s) ===") + reset_steps() + client = await make_client() + try: + step_ok("connect") + browser = await client.rent(SCHEDULE_ID) + step_ok("rent", f"session={browser.session_id}") + await browser.send({"method": "Page.navigate", "params": {"url": "https://example.com"}}) + await browser.send({"method": "Page.enable"}) + step_ok("navigate") + + disconnected_at: float | None = None + reconnected_at: float | None = None + + async def on_disconnect() -> None: + nonlocal disconnected_at + disconnected_at = time.monotonic() + print(f" >> provider disconnected at t={disconnected_at:.1f}") + + async def on_reconnect() -> None: + nonlocal reconnected_at + reconnected_at = time.monotonic() + print(f" >> provider reconnected at t={reconnected_at:.1f}") + + browser.on_provider_disconnected(on_disconnect) + browser.on_provider_reconnected(on_reconnect) + + print(" >>> Disconnect provider network for ~30s, then reconnect <<<") + try: + reason = await asyncio.wait_for(browser.wait_until_ended(), timeout=90) + if reconnected_at and disconnected_at: + gap = reconnected_at - disconnected_at + step_ok("recovery", f"disconnected {gap:.1f}s, then ended reason={reason}") + else: + step_ok("session_ended", f"reason={reason}") + except asyncio.TimeoutError: + if reconnected_at: + step_ok("recovery", "session survived disconnect") + await browser.close() + else: + step_fail("timeout", "no events in 90s") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario G: Kill session button (manual) ────────────────────────────── + +async def scenario_g() -> int: + print("\n=== Scenario G: Kill session (MANUAL — press kill in provider plugin) ===") + reset_steps() + client = await make_client() + try: + step_ok("connect") + browser = await client.rent(SCHEDULE_ID) + step_ok("rent", f"session={browser.session_id}") + await browser.send({"method": "Page.navigate", "params": {"url": "https://example.com"}}) + await browser.send({"method": "Page.enable"}) + step_ok("navigate") + print(" >>> Press Kill/Stop in provider plugin NOW <<<") + try: + reason = await browser.wait_until_ended() + step_ok("session_ended", f"reason={reason}") + except SessionEnded as exc: + step_ok("session_ended", f"reason={exc.reason}") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario H: Insufficient funds ───────────────────────────────────────── + +async def scenario_h() -> int: + print("\n=== Scenario H: Insufficient funds ===") + no_funds_token = os.environ.get("CEKI_TOKEN_NO_FUNDS", "") + if not no_funds_token: + print(" SKIP: CEKI_TOKEN_NO_FUNDS not set") + print(" To test: create a zero-balance user and set CEKI_TOKEN_NO_FUNDS") + return 0 + reset_steps() + client = await connect(no_funds_token, ConnectOptions( + api_url=API_URL, relay_url=RELAY_URL, + )) + try: + step_ok("connect") + try: + await client.rent(SCHEDULE_ID) + step_fail("rent", "expected InsufficientFunds, got Browser") + except InsufficientFunds as exc: + step_ok("insufficient_funds", str(exc)) + except CekiError as exc: + step_ok("error", f"{type(exc).__name__}: {exc}") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario I: 10 sequential CDP commands ────────────────────────────────── + +async def scenario_i() -> int: + print("\n=== Scenario I: 10 sequential CDP commands ===") + reset_steps() + client = await make_client() + try: + step_ok("connect") + browser = await client.rent(SCHEDULE_ID) + step_ok("rent", f"session={browser.session_id}") + + await browser.send({"method": "Page.navigate", "params": {"url": "about:blank"}}) + step_ok("window_init") + for i in range(10): + resp = await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": f"1 + {i}"}, + }) + val = resp.get("result", {}).get("value") + if val == 1 + i: + step_ok(f"cmd_{i+1}", f"1+{i}={val}") + else: + step_fail(f"cmd_{i+1}", f"expected {1+i}, got {val}") + + await browser.close() + step_ok("close") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario J: Long-running navigation ──────────────────────────────────── + +async def scenario_j() -> int: + print("\n=== Scenario J: Long-running navigation (httpbin delay) ===") + reset_steps() + client = await make_client() + try: + step_ok("connect") + browser = await client.rent(SCHEDULE_ID) + step_ok("rent", f"session={browser.session_id}") + + await browser.send({"method": "Page.navigate", "params": {"url": "about:blank"}}) + step_ok("window_init") + + await browser.send({"method": "Page.enable"}) + step_ok("Page.enable") + + load_fut = asyncio.ensure_future(wait_event(browser, "Page.loadEventFired", timeout=30)) + t0 = time.monotonic() + await browser.send({"method": "Page.navigate", "params": {"url": "https://httpbin.org/delay/5"}}) + step_ok("Page.navigate", "url=httpbin.org/delay/5") + + await load_fut + elapsed = time.monotonic() - t0 + if elapsed < 30: + step_ok("loadEventFired", f"elapsed={elapsed:.1f}s") + else: + step_fail("loadEventFired", f"too slow: {elapsed:.1f}s") + + await browser.close() + step_ok("close") + except Exception as exc: + step_fail("exception", f"{type(exc).__name__}: {exc}") + traceback.print_exc() + finally: + await client.close() + return summary() + + +# ── Scenario registry ────────────────────────────────────────────────────── + +AUTOMATIC = {"A": scenario_a, "B": scenario_b, "D": scenario_d, "H": scenario_h, "I": scenario_i, "J": scenario_j} +MANUAL = {"C": scenario_c, "E": scenario_e, "F": scenario_f, "G": scenario_g} +ALL_SCENARIOS = {**AUTOMATIC, **MANUAL} + + +def main() -> int: + parser = argparse.ArgumentParser(description="Ceki Browser SDK 2.2.0 integration smoke tests") + parser.add_argument("--scenario", default="A", help="Scenario letter(s): A, B, C-G, H-K, all, or comma-separated (e.g. A,I,J)") + args = parser.parse_args() + + requested = args.scenario.strip() + if requested.lower() == "all": + scenarios = list(AUTOMATIC.keys()) + skipped_manual = list(MANUAL.keys()) + elif requested.lower() == "k": + print("\n=== Scenario K: Obsolete ===") + print(" SKIP: mode parameter removed from public API in SDK 2.0+") + return 0 + else: + scenarios = [s.strip().upper() for s in requested.split(",")] + skipped_manual = [] + + exit_code = 0 + for key in scenarios: + fn = ALL_SCENARIOS.get(key) + if fn is None: + print(f"\n Unknown scenario: {key}") + exit_code = 1 + continue + if key in MANUAL and requested.lower() == "all": + continue + rc = asyncio.run(fn()) + if rc != 0: + exit_code = 1 + + if skipped_manual: + print(f"\n Skipped manual scenarios: {', '.join(skipped_manual)} (run individually)") + + return exit_code + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/pyproject.toml b/pyproject.toml index dfa82de..cb719c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,16 @@ [build-system] -requires = ["setuptools>=68.0", "wheel"] -build-backend = "setuptools.build_meta" +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "ceki-browser" -version = "0.3.0" -description = "Python SDK for browser.ceki.me — rent real browsers of real people" +version = "2.15.1" +description = "Python SDK for ceki.me browser rental" readme = "README.md" license = {text = "MIT"} requires-python = ">=3.10" authors = [{name = "Ceki.me", email = "hello@ceki.me"}] -keywords = ["browser", "automation", "websocket", "rpc", "ai-agent"] +keywords = ["browser", "automation", "websocket", "ai-agent"] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", @@ -20,39 +20,33 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Framework :: AsyncIO", - "Topic :: Internet :: WWW/HTTP :: Browsers", ] dependencies = [ - "websockets>=12.0,<14.0", - "aiortc>=1.6.0", - "aiohttp>=3.9.0", + "websockets>=12", + "httpx>=0.27", + "pydantic>=2", ] [project.optional-dependencies] -dev = [ - "pytest>=8.0", - "pytest-asyncio>=0.23", - "ruff>=0.4", -] +dev = ["pytest>=8", "pytest-asyncio>=0.23", "ruff>=0.5", "mypy>=1.10"] [project.urls] -Homepage = "https://browser.ceki.me" +Homepage = "https://ceki.me" Repository = "https://github.com/Ceki-me/python-sdk" -Documentation = "https://github.com/Ceki-me/python-sdk#readme" - -[tool.setuptools.packages.find] -include = ["ceki_browser*"] -[tool.setuptools.package-data] -"ceki_browser.humanize" = ["profiles/*.json"] +[project.scripts] +ceki-browser = "ceki_browser.cli:main" -[tool.pytest.ini_options] -asyncio_mode = "auto" -testpaths = ["tests"] +[tool.hatch.build.targets.wheel] +packages = ["ceki_browser"] [tool.ruff] +line-length = 100 target-version = "py310" -line-length = 120 [tool.ruff.lint] select = ["E", "F", "W", "I"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e7e28ed --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import json +from collections.abc import AsyncGenerator +from typing import Any + +import pytest +import websockets +import websockets.server + + +class MockRelayServer: + def __init__(self) -> None: + self.connections: list[websockets.server.WebSocketServerProtocol] = [] + self.received: list[dict[str, Any]] = [] + self._server: websockets.server.WebSocketServer | None = None + self.port: int = 0 + + @staticmethod + def _select_subprotocol( + ws: websockets.server.WebSocketServerProtocol, subprotocols: list[str] + ) -> str | None: + for sp in subprotocols: + if sp.startswith("bearer."): + return sp + return None + + async def start(self) -> None: + self._server = await websockets.serve( + self._handler, + "127.0.0.1", + 0, # OS assigns port + select_subprotocol=self._select_subprotocol, + ) + self.port = next(iter(self._server.sockets)).getsockname()[1] + + async def _handler(self, ws: websockets.server.WebSocketServerProtocol) -> None: + self.connections.append(ws) + try: + async for raw in ws: + msg: dict[str, Any] = json.loads(raw) + self.received.append(msg) + if msg.get("type") == "ping": + await ws.send(json.dumps({"type": "pong"})) + except websockets.exceptions.ConnectionClosed: + pass + finally: + if ws in self.connections: + self.connections.remove(ws) + + async def send_to_all(self, msg: dict[str, Any]) -> None: + for ws in list(self.connections): + await ws.send(json.dumps(msg)) + + async def stop(self) -> None: + if self._server: + self._server.close() + await self._server.wait_closed() + + +@pytest.fixture +async def mock_relay() -> AsyncGenerator[MockRelayServer, None]: + server = MockRelayServer() + await server.start() + yield server + await server.stop() diff --git a/tests/e2e/README.md b/tests/e2e/README.md new file mode 100644 index 0000000..f456ee6 --- /dev/null +++ b/tests/e2e/README.md @@ -0,0 +1,43 @@ +# E2E Tests + +Real integration tests against `browser.ittribe.org` (dev relay + live Chrome provider). + +## Prerequisites + +- Provider online (Konstantin's Chrome with extension v0.6.102+) +- Skill Rent Agent token + +## Setup + +```bash +export CEKI_API_KEY=$(cat /home/node/.openclaw/secrets/skill_rent_agent_token.txt) +export CEKI_RELAY_URL="wss://browser.ittribe.org/ws/agent" +export CEKI_API_URL="https://clawapi.ittribe.org" +export CEKI_CHAT_URL="https://chat.ittribe.org/api/chat" +``` + +## Run + +```bash +cd python-sdk +python3 -m pytest tests/e2e/ -v -s +``` + +## Tests + +### test_fingerprint_persistence.py + +Two sequential rents. Session A exports profile with fingerprint. Session B rents with `fingerprint=profile["fingerprint"]`. Asserts: +- `navigator.userAgent` A == B +- `Intl.DateTimeFormat().resolvedOptions().timeZone` A == B +- `screen.width/height` A == B +- `navigator.hardwareConcurrency` A == B +- WebGL renderer A == B +- `Browser.getFingerprint` CDP response A == B + +Cost: ~$0.02 (2 rents x 1 min x $0.10/min). + +## Notes + +- These tests are **not** in the default `pytest` run (skipped when `CEKI_API_KEY` is not set). +- Run before each extension release to verify fingerprint persistence works end-to-end. diff --git a/tests/e2e/test_fingerprint_persistence.py b/tests/e2e/test_fingerprint_persistence.py new file mode 100644 index 0000000..6eb8420 --- /dev/null +++ b/tests/e2e/test_fingerprint_persistence.py @@ -0,0 +1,188 @@ +"""E2E test: fingerprint persistence across two sequential rents. + +Requires a live relay + provider on browser.ittribe.org. +Run manually: python3 -m pytest tests/e2e/test_fingerprint_persistence.py -v -s + +Env vars required: + CEKI_API_KEY — agent token (Skill Rent Agent or equivalent) + CEKI_RELAY_URL — wss://browser.ittribe.org/ws/agent (default) + CEKI_API_URL — https://clawapi.ittribe.org (default) + CEKI_CHAT_URL — https://chat.ittribe.org/api/chat (default) +""" +from __future__ import annotations + +import asyncio +import json +import os + +import pytest + +pytestmark = pytest.mark.skipif( + not os.environ.get("CEKI_API_KEY"), + reason="CEKI_API_KEY not set — skip E2E (requires live relay + provider)", +) + + +def _opts(): + from ceki_browser import ConnectOptions + return ConnectOptions( + relay_url=os.environ.get("CEKI_RELAY_URL", "wss://browser.ittribe.org/ws/agent"), + api_url=os.environ.get("CEKI_API_URL", "https://clawapi.ittribe.org"), + chat_url=os.environ.get("CEKI_CHAT_URL", "https://chat.ittribe.org/api/chat"), + reconnect=False, + ) + + +async def _eval_string(browser, expr: str) -> str: + resp = await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": expr, "returnByValue": True}, + }) + return resp.get("result", {}).get("value", "") + + +async def _eval_int(browser, expr: str) -> int: + resp = await browser.send({ + "method": "Runtime.evaluate", + "params": {"expression": expr, "returnByValue": True}, + }) + return int(resp.get("result", {}).get("value", 0)) + + +async def _discover_schedule(): + from ceki_browser import connect + api_key = os.environ["CEKI_API_KEY"] + client = await connect(api_key, _opts()) + try: + results = await client.search() + if not results: + pytest.skip("no providers online") + return results[0].schedule_id + finally: + await client.close() + + +async def _try_get_fingerprint_cdp(browser): + """Try Browser.getFingerprint (ext v0.6.102+). Returns dict or None if not available.""" + try: + resp = await browser.send({"method": "Browser.getFingerprint"}) + return resp.get("fingerprint") + except Exception: + return None + + +async def _collect_browser_fingerprint(browser): + """Collect fingerprint values via Runtime.evaluate from the actual page.""" + await browser.navigate("about:blank") + await asyncio.sleep(1.5) + + ua = await _eval_string(browser, "navigator.userAgent") + tz = await _eval_string(browser, "Intl.DateTimeFormat().resolvedOptions().timeZone") + locale = await _eval_string(browser, "navigator.language") + sw = await _eval_int(browser, "screen.width") + sh = await _eval_int(browser, "screen.height") + hc = await _eval_int(browser, "navigator.hardwareConcurrency") + webgl = await _eval_string(browser, """ + (() => { + const c = document.createElement('canvas'); + const g = c.getContext('webgl'); + if (!g) return 'no-webgl'; + const e = g.getExtension('WEBGL_debug_renderer_info'); + if (!e) return 'no-debug-ext'; + return g.getParameter(e.UNMASKED_RENDERER_WEBGL); + })() + """) + + fp_cdp = await _try_get_fingerprint_cdp(browser) + profile = await browser.profile.export() + + return { + "ua": ua, + "tz": tz, + "locale": locale, + "screen_w": sw, + "screen_h": sh, + "hc": hc, + "webgl": webgl, + "fp_cdp": fp_cdp, + "profile": profile, + } + + +@pytest.mark.asyncio +async def test_fingerprint_persists_across_rents(): + from ceki_browser import connect + + api_key = os.environ["CEKI_API_KEY"] + schedule_id = await _discover_schedule() + + # --- Session A: rent, collect fingerprint, export profile --- + client_a = await connect(api_key, _opts()) + try: + browser_a = await client_a.rent(schedule_id) + try: + a = await _collect_browser_fingerprint(browser_a) + finally: + await browser_a.close() + finally: + await client_a.close() + + print(f"\n--- Session A ---") + print(f" UA: {a['ua']}") + print(f" TZ: {a['tz']}") + print(f" Locale: {a['locale']}") + print(f" Screen: {a['screen_w']}x{a['screen_h']}") + print(f" HW conc: {a['hc']}") + print(f" WebGL: {a['webgl']}") + if a["fp_cdp"]: + print(f" FP seed: {a['fp_cdp'].get('seed')}") + else: + print(f" FP CDP: not available (ext < 0.6.102)") + + fingerprint_from_profile = a["profile"].get("fingerprint") + + # --- Session B: rent with fingerprint from profile --- + client_b = await connect(api_key, _opts()) + try: + browser_b = await client_b.rent( + schedule_id, + fingerprint=fingerprint_from_profile if fingerprint_from_profile else True, + ) + try: + b = await _collect_browser_fingerprint(browser_b) + finally: + await browser_b.close() + finally: + await client_b.close() + + print(f"\n--- Session B ---") + print(f" UA: {b['ua']}") + print(f" TZ: {b['tz']}") + print(f" Locale: {b['locale']}") + print(f" Screen: {b['screen_w']}x{b['screen_h']}") + print(f" HW conc: {b['hc']}") + print(f" WebGL: {b['webgl']}") + if b["fp_cdp"]: + print(f" FP seed: {b['fp_cdp'].get('seed')}") + else: + print(f" FP CDP: not available (ext < 0.6.102)") + + # --- Assertions --- + if fingerprint_from_profile is None: + print("\n⚠ Extension < 0.6.102: Browser.getFingerprint not available.") + print(" Cannot test fingerprint persistence (profile has no fingerprint).") + print(" Update extension to 0.6.102+ and re-run.") + pytest.skip("Extension too old — Browser.getFingerprint not available, fingerprint not in profile") + + assert a["ua"] == b["ua"], f"UA mismatch: {a['ua']!r} vs {b['ua']!r}" + assert a["tz"] == b["tz"], f"TZ mismatch: {a['tz']!r} vs {b['tz']!r}" + assert a["locale"] == b["locale"], f"Locale mismatch: {a['locale']!r} vs {b['locale']!r}" + assert a["screen_w"] == b["screen_w"], f"screen.width mismatch: {a['screen_w']} vs {b['screen_w']}" + assert a["screen_h"] == b["screen_h"], f"screen.height mismatch: {a['screen_h']} vs {b['screen_h']}" + assert a["hc"] == b["hc"], f"hardwareConcurrency mismatch: {a['hc']} vs {b['hc']}" + assert a["webgl"] == b["webgl"], f"WebGL mismatch: {a['webgl']!r} vs {b['webgl']!r}" + + if a["fp_cdp"] and b["fp_cdp"]: + assert a["fp_cdp"] == b["fp_cdp"], "CDP fingerprint mismatch" + + print("\n✅ All fingerprint values match between Session A and Session B") diff --git a/tests/test_browser_cdp.py b/tests/test_browser_cdp.py new file mode 100644 index 0000000..407a99a --- /dev/null +++ b/tests/test_browser_cdp.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import ConnectOptions, connect + + +@pytest.fixture +async def connected_client(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + yield client, mock_relay + await client.close() + + +async def _do_rent(client, mock_relay, session_id="sess-1", schedule_id=42): + server_event_id = "ev-test-1" + + async def ack_rent(): + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": server_event_id}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": server_event_id, + "session_id": session_id, + "schedule_id": schedule_id, + "chat_topic_id": None, + "browser_info": {}, + }) + + task = asyncio.create_task(ack_rent()) + browser = await client.rent(schedule_id) + await task + return browser + + +@pytest.mark.asyncio +async def test_cdp_happy_path(connected_client): + client, mock_relay = connected_client + browser = await _do_rent(client, mock_relay) + + async def send_cdp_response(): + await asyncio.sleep(0.05) + cdp_msg = next((m for m in mock_relay.received if m.get("type") == "cdp"), None) + assert cdp_msg is not None + await mock_relay.send_to_all({ + "type": "cdp_response", + "session_id": browser.session_id, + "id": cdp_msg["id"], + "ok": True, + "result": {"frameId": "abc123"}, + }) + + task = asyncio.create_task(send_cdp_response()) + result = await browser.send({"method": "Page.navigate", "params": {"url": "https://example.com"}}) + await task + + assert result["frameId"] == "abc123" + + sent = [m for m in mock_relay.received if m.get("type") == "cdp"] + assert sent[-1]["method"] == "Page.navigate" + assert sent[-1]["params"]["url"] == "https://example.com" + assert sent[-1]["session_id"] == browser.session_id + + +@pytest.mark.asyncio +async def test_cdp_on_event_callback(connected_client): + client, mock_relay = connected_client + browser = await _do_rent(client, mock_relay) + + received_events: list[tuple[str, dict]] = [] + + async def on_ev(method: str, params: dict) -> None: + received_events.append((method, params)) + + browser.on_event(on_ev) + + await mock_relay.send_to_all({ + "type": "cdp_event", + "session_id": browser.session_id, + "method": "Page.loadEventFired", + "params": {"timestamp": 1.23}, + }) + + await asyncio.sleep(0.1) + + assert len(received_events) == 1 + assert received_events[0] == ("Page.loadEventFired", {"timestamp": 1.23}) + + +@pytest.mark.asyncio +async def test_cdp_timeout(connected_client): + client, mock_relay = connected_client + browser = await _do_rent(client, mock_relay) + + cdp = {"method": "Page.navigate", "params": {"url": "https://example.com"}} + with pytest.raises(asyncio.TimeoutError): + await browser.send(cdp, timeout=0.05) + + +@pytest.mark.asyncio +async def test_cdp_error_response(connected_client): + client, mock_relay = connected_client + browser = await _do_rent(client, mock_relay) + + async def send_error(): + await asyncio.sleep(0.05) + cdp_msg = next((m for m in mock_relay.received if m.get("type") == "cdp"), None) + await mock_relay.send_to_all({ + "type": "cdp_response", + "session_id": browser.session_id, + "id": cdp_msg["id"], + "ok": False, + "error": {"message": "No such target"}, + }) + + task = asyncio.create_task(send_error()) + with pytest.raises(Exception, match="CDP error"): + await browser.send({"method": "Page.navigate", "params": {"url": "https://example.com"}}) + await task diff --git a/tests/test_browser_errors.py b/tests/test_browser_errors.py new file mode 100644 index 0000000..7badeae --- /dev/null +++ b/tests/test_browser_errors.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import ConnectOptions, connect +from ceki_browser._exceptions import ( + CdpUnrecoverable, + InsufficientFunds, + RateLimitExceeded, + SessionEnded, +) + + +@pytest.fixture +async def browser_and_relay(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "ev-1", "schedule_id": 1}) + await asyncio.sleep(0.02) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "ev-1", + "session_id": "sess-err", + "schedule_id": 1, + "chat_topic_id": None, + "browser_info": {}, + }) + + task = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await task + yield browser, mock_relay + await client.close() + + +@pytest.mark.asyncio +async def test_error_1011_heartbeat_timeout(browser_and_relay): + browser, mock_relay = browser_and_relay + + cdp = {"method": "Page.navigate", "params": {"url": "https://x.com"}} + + async def pending_cdp(): + with pytest.raises(SessionEnded): + await browser.send(cdp, timeout=5) + + task = asyncio.create_task(pending_cdp()) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({ + "type": "error", + "session_id": "sess-err", + "code": -1011, + "message": "heartbeat_timeout", + }) + + reason = await asyncio.wait_for(browser.wait_until_ended(), timeout=2) + assert reason == "heartbeat_timeout" + await task + + +@pytest.mark.asyncio +async def test_error_1012_insufficient_funds(browser_and_relay): + browser, mock_relay = browser_and_relay + cdp = {"method": "Page.navigate", "params": {"url": "https://x.com"}} + + async def pending_cdp(): + with pytest.raises(InsufficientFunds): + await browser.send(cdp, timeout=5) + + task = asyncio.create_task(pending_cdp()) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({ + "type": "error", + "session_id": "sess-err", + "code": -1012, + "message": "insufficient_funds", + }) + + reason = await asyncio.wait_for(browser.wait_until_ended(), timeout=2) + assert reason == "insufficient_funds" + await task + + +@pytest.mark.asyncio +async def test_error_1013_rate_limit_does_not_end_session(browser_and_relay): + browser, mock_relay = browser_and_relay + + async def pending_cdp(): + await asyncio.sleep(0.05) + cdp_msg = next((m for m in mock_relay.received if m.get("type") == "cdp"), None) + assert cdp_msg is not None + await mock_relay.send_to_all({ + "type": "error", + "session_id": "sess-err", + "code": -1013, + "id": cdp_msg["id"], + "retry_after": 2.5, + }) + + cdp = {"method": "Page.navigate", "params": {"url": "https://x.com"}} + task = asyncio.create_task(pending_cdp()) + with pytest.raises(RateLimitExceeded) as exc_info: + await browser.send(cdp, timeout=5) + await task + + assert exc_info.value.retry_after == 2.5 + assert not browser._ended.is_set() + + +@pytest.mark.asyncio +async def test_error_1050_cdp_unrecoverable_does_not_end_session(browser_and_relay): + browser, mock_relay = browser_and_relay + + async def pending_cdp(): + await asyncio.sleep(0.05) + cdp_msg = next((m for m in mock_relay.received if m.get("type") == "cdp"), None) + assert cdp_msg is not None + await mock_relay.send_to_all({ + "type": "error", + "session_id": "sess-err", + "code": -1050, + "id": cdp_msg["id"], + "message": "CDP pipe broken", + }) + + cdp = {"method": "Page.navigate", "params": {"url": "https://x.com"}} + task = asyncio.create_task(pending_cdp()) + with pytest.raises(CdpUnrecoverable) as exc_info: + await browser.send(cdp, timeout=5) + await task + + assert "CDP pipe broken" in str(exc_info.value) + assert not browser._ended.is_set() diff --git a/tests/test_browser_release_alias.py b/tests/test_browser_release_alias.py new file mode 100644 index 0000000..02bf638 --- /dev/null +++ b/tests/test_browser_release_alias.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from unittest.mock import AsyncMock, patch + +import pytest + +from ceki_browser import Browser + + +@pytest.fixture +def browser(): + client = AsyncMock() + client._active_browsers = {} + + match = AsyncMock() + match.session_id = "test-session" + match.schedule_id = 1 + match.chat_topic_id = None + match.browser_info = {} + match.provider_user_id = None + + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + b = Browser(client, match) + return b + + +async def test_release_exists_and_callable(browser: Browser): + assert hasattr(browser, "release") + assert callable(browser.release) + + +async def test_release_delegates_to_close(browser: Browser): + browser.close = AsyncMock() + await browser.release() + browser.close.assert_awaited_once_with(timeout=10.0) + + +async def test_release_passes_timeout(browser: Browser): + browser.close = AsyncMock() + await browser.release(timeout=5.0) + browser.close.assert_awaited_once_with(timeout=5.0) diff --git a/tests/test_browser_screenshot_format.py b/tests/test_browser_screenshot_format.py new file mode 100644 index 0000000..f2db8cd --- /dev/null +++ b/tests/test_browser_screenshot_format.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import base64 +import logging +from unittest.mock import AsyncMock, call, patch + +import pytest + +from ceki_browser import Browser + + +@pytest.fixture +def browser(): + client = AsyncMock() + client._active_browsers = {} + + match = AsyncMock() + match.session_id = "test-session" + match.schedule_id = 1 + match.chat_topic_id = None + match.browser_info = {} + match.provider_user_id = None + + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + b = Browser(client, match) + return b + + +async def test_screenshot_default_returns_dict(browser: Browser): + cdp_resp = {"data": "AAAA", "width": 100, "height": 200} + browser.send = AsyncMock(return_value=cdp_resp) + result = await browser.screenshot() + assert isinstance(result, dict) + assert result["data"] == "AAAA" + + +async def test_screenshot_base64_returns_dict(browser: Browser): + cdp_resp = {"data": "AAAA"} + browser.send = AsyncMock(return_value=cdp_resp) + result = await browser.screenshot(format="base64") + assert isinstance(result, dict) + assert result is cdp_resp + + +async def test_screenshot_png_returns_bytes(browser: Browser): + raw = b"\x89PNG" + cdp_resp = {"data": base64.b64encode(raw).decode()} + browser.send = AsyncMock(return_value=cdp_resp) + result = await browser.screenshot(format="png") + assert isinstance(result, bytes) + assert result == raw + + +async def test_screenshot_png_empty_data_returns_empty_bytes(browser: Browser): + cdp_resp = {"data": ""} + browser.send = AsyncMock(return_value=cdp_resp) + result = await browser.screenshot(format="png") + assert result == b"" + + +async def test_screenshot_invalid_format_raises(browser: Browser): + with pytest.raises(ValueError, match="Unsupported format"): + await browser.screenshot(format="bogus") + + +async def test_screenshot_full_page_sends_layout_metrics_and_clip(browser: Browser): + metrics_resp = {"contentSize": {"width": 1280, "height": 5000}} + cdp_resp = {"data": "AAAA"} + browser.send = AsyncMock(side_effect=[metrics_resp, cdp_resp]) + result = await browser.screenshot(full_page=True) + assert isinstance(result, dict) + assert browser.send.call_count == 2 + assert browser.send.call_args_list[0] == call({"method": "Page.getLayoutMetrics"}) + capture_call = browser.send.call_args_list[1].args[0] + assert capture_call["method"] == "Page.captureScreenshot" + assert capture_call["params"]["captureBeyondViewport"] is True + assert capture_call["params"]["clip"] == {"x": 0, "y": 0, "width": 1280, "height": 5000, "scale": 1} + + +async def test_screenshot_full_page_clamps_height(browser: Browser, caplog): + metrics_resp = {"contentSize": {"width": 1920, "height": 20000}} + cdp_resp = {"data": "AAAA"} + browser.send = AsyncMock(side_effect=[metrics_resp, cdp_resp]) + with caplog.at_level(logging.WARNING, logger="ceki_browser._browser"): + await browser.screenshot(full_page=True) + capture_call = browser.send.call_args_list[1].args[0] + assert capture_call["params"]["clip"]["height"] == 16384 + assert "clamped" in caplog.text + + +async def test_screenshot_full_page_png_returns_bytes(browser: Browser): + raw = b"\x89PNG_FULL" + metrics_resp = {"contentSize": {"width": 800, "height": 3000}} + cdp_resp = {"data": base64.b64encode(raw).decode()} + browser.send = AsyncMock(side_effect=[metrics_resp, cdp_resp]) + result = await browser.screenshot(format="png", full_page=True) + assert isinstance(result, bytes) + assert result == raw + + +async def test_screenshot_default_no_full_page(browser: Browser): + cdp_resp = {"data": "AAAA"} + browser.send = AsyncMock(return_value=cdp_resp) + await browser.screenshot() + assert browser.send.call_count == 1 + sent = browser.send.call_args.args[0] + assert sent["method"] == "Page.captureScreenshot" + assert sent.get("params", {}).get("captureBeyondViewport") is None diff --git a/tests/test_captcha.py b/tests/test_captcha.py new file mode 100644 index 0000000..e728d79 --- /dev/null +++ b/tests/test_captcha.py @@ -0,0 +1,369 @@ +from __future__ import annotations + +import asyncio +import json +from unittest.mock import AsyncMock, MagicMock, Mock, patch + +import pytest + +from ceki_browser import Client, ConnectOptions, connect +from ceki_browser._captcha import CaptchaResult +from ceki_browser._exceptions import CaptchaError, CaptchaTimeoutError + +from .conftest import MockRelayServer + +MOCK_EVENT_STORE_RESPONSE = {"id": 9001, "status_id": 100} + + +def _patch_httpx_post(status_code=200, json_body=None): + resp = MagicMock() + resp.status_code = status_code + resp.is_success = status_code < 400 + resp.json = Mock(return_value=json_body or MOCK_EVENT_STORE_RESPONSE) + resp.raise_for_status = Mock() + client_mock = AsyncMock() + client_mock.__aenter__ = AsyncMock(return_value=client_mock) + client_mock.__aexit__ = AsyncMock(return_value=False) + client_mock.post = AsyncMock(return_value=resp) + client_mock.patch = AsyncMock(return_value=resp) + return patch("httpx.AsyncClient", return_value=client_mock) + + +async def _setup_browser(mock_relay: MockRelayServer): + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect( + "testkey", + ConnectOptions(relay_url=url, api_url="http://localhost:9999"), + ) + rent_task = asyncio.create_task(client.rent(schedule_id=42)) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "500", "schedule_id": 42}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "500", + "session_id": "sess-123", + "schedule_id": 42, + "chat_topic_id": "topic-1", + "provider_user_id": 77, + }) + browser = await asyncio.wait_for(rent_task, timeout=5) + return client, browser + + +@pytest.mark.asyncio +async def test_request_captcha_happy_path(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with _patch_httpx_post(): + captcha_task = asyncio.create_task( + browser.request_captcha(acceptance_timeout=30, completion_timeout=30, auto_accept=False) + ) + await asyncio.sleep(0.1) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-123", + "payload": { + "message": { + "type": "action", + "_id": "msg-1", + "topic_id": "topic-1", + "created_at": "2025-01-01T00:00:00Z", + "action": { + "kind": "human_action_accepted", + "event_id": 9001, + "data": {}, + }, + } + }, + }) + await asyncio.sleep(0.1) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-123", + "payload": { + "message": { + "type": "action", + "_id": "msg-2", + "topic_id": "topic-1", + "created_at": "2025-01-01T00:00:10Z", + "action": { + "kind": "human_action_completed", + "event_id": 9001, + "data": { + "proof_message_id": "proof-abc", + "correction_id": 5555, + }, + }, + } + }, + }) + + result = await asyncio.wait_for(captcha_task, timeout=5) + assert result.solved is True + assert result.proof_message_id == "proof-abc" + assert result.child_event_id == 9001 + assert result.cancel_reason is None + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_acceptance_timeout(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with _patch_httpx_post(): + with pytest.raises(CaptchaTimeoutError) as exc_info: + await asyncio.wait_for( + browser.request_captcha( + acceptance_timeout=30, completion_timeout=30, auto_accept=False, + ), + timeout=35, + ) + assert exc_info.value.phase == "acceptance" + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_provider_declined(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with _patch_httpx_post(): + captcha_task = asyncio.create_task( + browser.request_captcha(acceptance_timeout=30, completion_timeout=30, auto_accept=False) + ) + await asyncio.sleep(0.1) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-123", + "payload": { + "message": { + "type": "action", + "_id": "msg-d", + "topic_id": "topic-1", + "created_at": "2025-01-01T00:00:00Z", + "action": { + "kind": "human_action_declined", + "event_id": 9001, + "data": {}, + }, + } + }, + }) + + result = await asyncio.wait_for(captcha_task, timeout=5) + assert result.solved is False + assert result.cancel_reason == "declined" + assert result.child_event_id == 9001 + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_min_timeout_hard_30s(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with pytest.raises(ValueError, match="acceptance_timeout must be >= 30"): + await browser.request_captcha(acceptance_timeout=20) + with pytest.raises(ValueError, match="completion_timeout must be >= 30"): + await browser.request_captcha(completion_timeout=10) + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_request_captcha_happy_path_auto_accept(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with _patch_httpx_post() as mock_http_cls: + http_instance = mock_http_cls.return_value + captcha_task = asyncio.create_task( + browser.request_captcha(acceptance_timeout=30, completion_timeout=30, auto_accept=True) + ) + await asyncio.sleep(0.1) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-123", + "payload": { + "message": { + "type": "action", + "_id": "msg-auto", + "topic_id": "topic-1", + "created_at": "2025-01-01T00:00:00Z", + "action": { + "kind": "human_action_completed", + "event_id": 9001, + "data": { + "proof_message_id": "proof-auto", + "correction_id": 7777, + }, + }, + } + }, + }) + + result = await asyncio.wait_for(captcha_task, timeout=10) + assert result.solved is True + assert result.correction_id == 7777 + + vote_calls = [ + c for c in http_instance.post.call_args_list + if "/vote" in str(c) + ] + assert len(vote_calls) == 1 + call_kwargs = vote_calls[0] + assert "vote" in str(call_kwargs) + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_request_captcha_manual_accept(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with _patch_httpx_post() as mock_http_cls: + http_instance = mock_http_cls.return_value + captcha_task = asyncio.create_task( + browser.request_captcha(acceptance_timeout=30, completion_timeout=30, auto_accept=False) + ) + await asyncio.sleep(0.1) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-123", + "payload": { + "message": { + "type": "action", + "_id": "msg-man-a", + "topic_id": "topic-1", + "created_at": "2025-01-01T00:00:00Z", + "action": { + "kind": "human_action_completed", + "event_id": 9001, + "data": { + "proof_message_id": "proof-manual", + "correction_id": 8888, + }, + }, + } + }, + }) + + result = await asyncio.wait_for(captcha_task, timeout=5) + assert result.solved is True + assert result.correction_id == 8888 + + await result.accept_work() + + vote_calls = [ + c for c in http_instance.post.call_args_list + if "/vote" in str(c) + ] + assert len(vote_calls) == 1 + url_arg = str(vote_calls[0]) + assert "/api/agent/kal/event/9001/vote" in url_arg + json_arg = vote_calls[0].kwargs.get("json", {}) + assert json_arg.get("vote") is True + assert json_arg.get("ids") == [8888] + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_request_captcha_manual_reject(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with _patch_httpx_post() as mock_http_cls: + http_instance = mock_http_cls.return_value + captcha_task = asyncio.create_task( + browser.request_captcha(acceptance_timeout=30, completion_timeout=30, auto_accept=False) + ) + await asyncio.sleep(0.1) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-123", + "payload": { + "message": { + "type": "action", + "_id": "msg-man-r", + "topic_id": "topic-1", + "created_at": "2025-01-01T00:00:00Z", + "action": { + "kind": "human_action_completed", + "event_id": 9001, + "data": { + "proof_message_id": "proof-reject", + "correction_id": 6666, + }, + }, + } + }, + }) + + result = await asyncio.wait_for(captcha_task, timeout=5) + assert result.solved is True + + await result.reject_work(reason="blurry") + + vote_calls = [ + c for c in http_instance.post.call_args_list + if "/vote" in str(c) + ] + assert len(vote_calls) == 1 + url_arg = str(vote_calls[0]) + assert "/api/agent/kal/event/9001/vote" in url_arg + json_arg = vote_calls[0].kwargs.get("json", {}) + assert json_arg.get("vote") is False + assert json_arg.get("reason") == "blurry" + assert json_arg.get("ids") == [6666] + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_request_captcha_no_correction_id_raises(mock_relay: MockRelayServer) -> None: + client, browser = await _setup_browser(mock_relay) + try: + with _patch_httpx_post(): + captcha_task = asyncio.create_task( + browser.request_captcha(acceptance_timeout=30, completion_timeout=30, auto_accept=False) + ) + await asyncio.sleep(0.1) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-123", + "payload": { + "message": { + "type": "action", + "_id": "msg-no-corr", + "topic_id": "topic-1", + "created_at": "2025-01-01T00:00:00Z", + "action": { + "kind": "human_action_completed", + "event_id": 9001, + "data": { + "proof_message_id": "proof-no-corr", + }, + }, + } + }, + }) + + result = await asyncio.wait_for(captcha_task, timeout=5) + assert result.solved is True + assert result.correction_id is None + + with pytest.raises(CaptchaError, match="no correction_id"): + await result.accept_work() + + with pytest.raises(CaptchaError, match="no correction_id"): + await result.reject_work() + finally: + await client.close() diff --git a/tests/test_chat.py b/tests/test_chat.py new file mode 100644 index 0000000..cb95112 --- /dev/null +++ b/tests/test_chat.py @@ -0,0 +1,246 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import ConnectOptions, connect +from ceki_browser._models import ChatMessage, ReadReceipt + + +@pytest.fixture +async def chat_browser(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "ev-chat", "schedule_id": 1}) + await asyncio.sleep(0.02) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "ev-chat", + "session_id": "sess-chat", + "schedule_id": 1, + "chat_topic_id": "77", + "browser_info": {}, + }) + + t = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await t + yield browser, mock_relay + await client.close() + + +@pytest.fixture +async def chat_browser_no_topic(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "ev-notopic", "schedule_id": 1}) + await asyncio.sleep(0.02) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "ev-notopic", + "session_id": "sess-notopic", + "schedule_id": 1, + "chat_topic_id": None, + "browser_info": {}, + }) + + t = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await t + yield browser, mock_relay + await client.close() + + +@pytest.mark.asyncio +async def test_send_text_ack(chat_browser): + browser, mock_relay = chat_browser + + async def ack_send(): + await asyncio.sleep(0.05) + send_msg = next((m for m in mock_relay.received if m.get("type") == "chat.send"), None) + assert send_msg is not None + assert send_msg["text"] == "hello" + assert send_msg["session_id"] == "sess-chat" + client_msg_id = send_msg["client_msg_id"] + await mock_relay.send_to_all({ + "type": "chat.send_ack", + "session_id": "sess-chat", + "client_msg_id": client_msg_id, + "message_id": 42, + "sent_at": "2026-05-05T10:00:00Z", + }) + + t = asyncio.create_task(ack_send()) + result = await browser.chat.send("hello") + await t + + assert result["message_id"] == 42 + assert result["sent_at"] == "2026-05-05T10:00:00Z" + + +@pytest.mark.asyncio +async def test_on_message_callback(chat_browser): + browser, mock_relay = chat_browser + + received: list[ChatMessage] = [] + + async def on_msg(msg: ChatMessage) -> None: + received.append(msg) + + browser.chat.on_message(on_msg) + + await mock_relay.send_to_all({ + "type": "chat.message", + "session_id": "sess-chat", + "topic_id": "77", + "message": { + "_id": "69fcbb000000000000000001", + "topic_id": "77", + "sender_id": 7, + "text": "captcha solved", + "media": None, + "type": "text", + "created_at": "2026-05-07T10:00:00.000Z", + "edited_at": None, + "deleted_at": None, + }, + }) + + await asyncio.sleep(0.1) + + assert len(received) == 1 + assert received[0].id == "69fcbb000000000000000001" + assert received[0].sender_id == 7 + assert received[0].text == "captcha solved" + assert received[0].type == "text" + assert not received[0].is_system() + assert received[0].is_from_provider(7) + assert not received[0].is_from_provider(99) + + +@pytest.mark.asyncio +async def test_on_read_callback(chat_browser): + browser, mock_relay = chat_browser + + receipts: list[ReadReceipt] = [] + + async def on_read(receipt: ReadReceipt) -> None: + receipts.append(receipt) + + browser.chat.on_read(on_read) + + await mock_relay.send_to_all({ + "type": "chat.read", + "session_id": "sess-chat", + "payload": { + "topic_id": "77", + "last_read_message_id": "69fcbb000000000000000042", + "read_at": 1746441720.0, + }, + }) + + await asyncio.sleep(0.1) + + assert len(receipts) == 1 + assert receipts[0].last_read_message_id == "69fcbb000000000000000042" + assert receipts[0].topic_id == "77" + + +@pytest.mark.asyncio +async def test_send_without_topic_raises(chat_browser_no_topic): + browser, _ = chat_browser_no_topic + + with pytest.raises(RuntimeError, match="chat topic not assigned"): + await browser.chat.send("hello") + + +# --- Unit tests for ChatMessage v2 schema and helpers --- + +def _make_msg(**kwargs): + base = { + "_id": "69fcbb000000000000000001", + "topic_id": "69fcbb000000000000000002", + "sender_id": 1, + "text": "hello", + "type": "text", + "created_at": "2026-05-07T10:00:00.000Z", + } + base.update(kwargs) + return ChatMessage.model_validate(base) + + +def test_chatmessage_validates_real_payload(): + msg = _make_msg(media=None, edited_at=None, deleted_at=None) + assert msg.id == "69fcbb000000000000000001" + assert msg.topic_id == "69fcbb000000000000000002" + assert msg.sender_id == 1 + assert msg.text == "hello" + assert msg.type == "text" + assert msg.created_at == "2026-05-07T10:00:00.000Z" + + +def test_chatmessage_type_system(): + msg = _make_msg(type="system", text="[ext v0.6.63]\n[10:00:00][info] session start") + assert msg.is_system() + assert not msg.is_from_provider(None) + assert not msg.is_from_provider(99) + + +def test_chatmessage_type_text_default(): + base = { + "_id": "69fcbb000000000000000003", + "topic_id": "69fcbb000000000000000002", + "sender_id": 2, + "text": "hi", + "created_at": "2026-05-07T10:01:00.000Z", + } + msg = ChatMessage.model_validate(base) + assert msg.type == "text" + assert not msg.is_system() + + +def test_chatmessage_is_from_provider(): + msg = _make_msg(sender_id=5) + assert msg.is_from_provider(5) + assert not msg.is_from_provider(6) + assert not msg.is_from_provider(None) + + +def test_chatmessage_extra_fields_ignored(): + base = { + "_id": "69fcbb000000000000000004", + "topic_id": "69fcbb000000000000000002", + "sender_id": 1, + "text": "x", + "created_at": "2026-05-07T10:02:00.000Z", + "unknown_future_field": "value", + } + msg = ChatMessage.model_validate(base) + assert msg.text == "x" + + +def test_match_provider_user_id(): + from ceki_browser._models import Match + m = Match.model_validate({ + "session_id": "sess-1", + "schedule_id": 240, + "event_id": "ev-1", + "chat_topic_id": "69fcbb000000000000000002", + "provider_user_id": 3, + "browser_info": {}, + }) + assert m.provider_user_id == 3 + + +def test_match_provider_user_id_missing(): + from ceki_browser._models import Match + m = Match.model_validate({ + "session_id": "sess-2", + "schedule_id": 240, + }) + assert m.provider_user_id is None diff --git a/tests/test_chat_history.py b/tests/test_chat_history.py new file mode 100644 index 0000000..6f67c0b --- /dev/null +++ b/tests/test_chat_history.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, patch + +import httpx +import pytest + +from ceki_browser import ConnectOptions, connect +from ceki_browser._models import ChatMessage + + +def _make_response(data) -> httpx.Response: + resp = httpx.Response(200, json=data) + resp.request = httpx.Request("GET", "http://test") + return resp + + +@pytest.fixture +async def chat_browser(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + server_ev = "ev-test-1" + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": server_ev}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": server_ev, + "session_id": "sess-hist", + "schedule_id": 1, + "chat_topic_id": "55", + "browser_info": {}, + }) + + t = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await t + yield browser, mock_relay + await client.close() + + +@pytest.mark.asyncio +async def test_history_returns_messages_asc(chat_browser): + browser, _ = chat_browser + + def _msg(mid, sid, text, ts): + return {"_id": str(mid), "topic_id": "55", "sender_id": sid, + "text": text, "type": "text", "created_at": ts} + + messages_data = [ + _msg(1, 1, "first", "2026-05-07T10:00:00.000Z"), + _msg(2, 7, "second", "2026-05-07T10:01:00.000Z"), + _msg(3, 1, "third", "2026-05-07T10:02:00.000Z"), + ] + + mock_resp = _make_response({"messages": messages_data}) + with patch("httpx.AsyncClient.send", new_callable=AsyncMock, return_value=mock_resp): + history = await browser.chat.history(limit=3) + + assert len(history) == 3 + assert [m.id for m in history] == ["1", "2", "3"] + assert all(isinstance(m, ChatMessage) for m in history) + + +@pytest.mark.asyncio +async def test_history_passes_limit_param(chat_browser): + browser, _ = chat_browser + + captured_request: list[httpx.Request] = [] + + async def mock_send(request: httpx.Request, **kwargs): + captured_request.append(request) + resp = _make_response({"data": []}) + resp.request = request + return resp + + with patch("httpx.AsyncClient.send", new_callable=AsyncMock, side_effect=mock_send): + await browser.chat.history(limit=10) + + assert len(captured_request) == 1 + url_str = str(captured_request[0].url) + assert "limit=10" in url_str + assert "topic_id=" in url_str + assert "/messages" in url_str + + +@pytest.mark.asyncio +async def test_history_passes_before_id_param(chat_browser): + browser, _ = chat_browser + + captured_request: list[httpx.Request] = [] + + async def mock_send(request: httpx.Request, **kwargs): + captured_request.append(request) + resp = _make_response({"data": []}) + resp.request = request + return resp + + with patch("httpx.AsyncClient.send", new_callable=AsyncMock, side_effect=mock_send): + await browser.chat.history(limit=5, before_id=100) + + url_str = str(captured_request[0].url) + assert "before=100" in url_str + assert "limit=5" in url_str + assert "topic_id=" in url_str + assert "/messages" in url_str + + +@pytest.mark.asyncio +async def test_history_no_topic_returns_empty(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + server_ev = "ev-test-2" + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": server_ev}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": server_ev, + "session_id": "sess-notopic", + "schedule_id": 1, + "chat_topic_id": None, + "browser_info": {}, + }) + + t = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await t + + result = await browser.chat.history() + assert result == [] + + await client.close() diff --git a/tests/test_chat_image.py b/tests/test_chat_image.py new file mode 100644 index 0000000..a7b1b18 --- /dev/null +++ b/tests/test_chat_image.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +import asyncio +import base64 + +import pytest + +from ceki_browser import ConnectOptions, connect +from ceki_browser._chat import MAX_IMAGE_BYTES + +PNG_MAGIC = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 +JPEG_MAGIC = b"\xff\xd8\xff\xe0" + b"\x00" * 100 +WEBP_MAGIC = b"RIFF\x00\x00\x00\x00WEBP" + b"\x00" * 100 + + +@pytest.fixture +async def chat_browser(mock_relay, tmp_path): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + server_ev = "ev-test-1" + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": server_ev}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": server_ev, + "session_id": "sess-img", + "schedule_id": 1, + "chat_topic_id": "88", + "browser_info": {}, + }) + + t = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await t + yield browser, mock_relay, tmp_path + await client.close() + + +async def _ack_image_send(mock_relay): + await asyncio.sleep(0.05) + send_msg = next((m for m in mock_relay.received if m.get("type") == "chat.send_image"), None) + assert send_msg is not None + client_msg_id = send_msg["client_msg_id"] + await mock_relay.send_to_all({ + "type": "chat.send_ack", + "session_id": "sess-img", + "client_msg_id": client_msg_id, + "message_id": 1, + "sent_at": "2026-05-05T10:00:00Z", + }) + return send_msg + + +@pytest.mark.asyncio +async def test_send_image_png_mime_detect(chat_browser): + browser, mock_relay, _ = chat_browser + + t = asyncio.create_task(_ack_image_send(mock_relay)) + await browser.chat.send_image(PNG_MAGIC) + sent = await t + + assert sent["mime"] == "image/png" + assert sent["data_b64"] == base64.b64encode(PNG_MAGIC).decode() + + +@pytest.mark.asyncio +async def test_send_image_jpeg_mime_detect(chat_browser): + browser, mock_relay, _ = chat_browser + + t = asyncio.create_task(_ack_image_send(mock_relay)) + await browser.chat.send_image(JPEG_MAGIC) + sent = await t + + assert sent["mime"] == "image/jpeg" + + +@pytest.mark.asyncio +async def test_send_image_webp_mime_detect(chat_browser): + browser, mock_relay, _ = chat_browser + + t = asyncio.create_task(_ack_image_send(mock_relay)) + await browser.chat.send_image(WEBP_MAGIC) + sent = await t + + assert sent["mime"] == "image/webp" + + +@pytest.mark.asyncio +async def test_send_image_from_path_jpeg(chat_browser): + browser, mock_relay, tmp_path = chat_browser + + img_file = tmp_path / "photo.jpg" + img_file.write_bytes(JPEG_MAGIC) + + t = asyncio.create_task(_ack_image_send(mock_relay)) + await browser.chat.send_image(img_file) + sent = await t + + assert sent["mime"] == "image/jpeg" + + +@pytest.mark.asyncio +async def test_send_image_from_str_path(chat_browser): + browser, mock_relay, tmp_path = chat_browser + + img_file = tmp_path / "photo.png" + img_file.write_bytes(PNG_MAGIC) + + t = asyncio.create_task(_ack_image_send(mock_relay)) + await browser.chat.send_image(str(img_file)) + sent = await t + + assert sent["mime"] == "image/png" + + +@pytest.mark.asyncio +async def test_send_image_size_limit(chat_browser): + browser, mock_relay, _ = chat_browser + + big_data = b"\x89PNG" + b"\x00" * (MAX_IMAGE_BYTES + 1) + with pytest.raises(ValueError, match="too large"): + await browser.chat.send_image(big_data) + + +@pytest.mark.asyncio +async def test_send_image_mime_override(chat_browser): + browser, mock_relay, _ = chat_browser + + t = asyncio.create_task(_ack_image_send(mock_relay)) + await browser.chat.send_image(PNG_MAGIC, mime="image/webp") + sent = await t + + assert sent["mime"] == "image/webp" diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..28ed508 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,436 @@ +from __future__ import annotations + +import json +import subprocess +import sys +import tempfile +from collections.abc import AsyncGenerator +from pathlib import Path +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from ceki_browser._state import save_session, load_session, delete_session, get_last_seen_ts, update_last_seen_ts +from ceki_browser.cli import build_parser + + +# ────────────────────────────────────────────────────────────────────────── +# State file tests +# ────────────────────────────────────────────────────────────────────────── + + +def test_state_save_load_delete(tmp_path: Path): + with patch("ceki_browser._state._STATE_DIR", tmp_path / "sessions"): + save_session("test-1", {"session_id": "test-1", "schedule_id": 5}) + data = load_session("test-1") + assert data is not None + assert data["session_id"] == "test-1" + assert "updated_at" in data + + delete_session("test-1") + assert load_session("test-1") is None + + +def test_state_last_seen_ts(tmp_path: Path): + with patch("ceki_browser._state._STATE_DIR", tmp_path / "sessions"): + assert get_last_seen_ts("s1") is None + save_session("s1", {"session_id": "s1"}) + assert get_last_seen_ts("s1") is None + update_last_seen_ts("s1", "2026-01-01T00:00:00Z") + assert get_last_seen_ts("s1") == "2026-01-01T00:00:00Z" + + +# ────────────────────────────────────────────────────────────────────────── +# Parser tests +# ────────────────────────────────────────────────────────────────────────── + + +def test_parser_rent(): + parser = build_parser() + args = parser.parse_args(["rent", "--schedule", "42"]) + assert args.command == "rent" + assert args.schedule == 42 + + +def test_parser_snapshot(): + parser = build_parser() + args = parser.parse_args(["snapshot", "ses-123", "-o", "/tmp/x.png"]) + assert args.command == "snapshot" + assert args.session_id == "ses-123" + assert args.output == "/tmp/x.png" + + +def test_parser_navigate(): + parser = build_parser() + args = parser.parse_args(["navigate", "ses-1", "https://example.com"]) + assert args.command == "navigate" + assert args.url == "https://example.com" + + +def test_parser_click(): + parser = build_parser() + args = parser.parse_args(["click", "ses-1", "100", "200"]) + assert args.command == "click" + assert args.x == 100 + assert args.y == 200 + + +def test_parser_type(): + parser = build_parser() + args = parser.parse_args(["type", "ses-1", "hello world"]) + assert args.command == "type" + assert args.text == "hello world" + assert not args.natural + + +def test_parser_type_natural(): + parser = build_parser() + args = parser.parse_args(["type", "ses-1", "hi", "--natural"]) + assert args.natural is True + + +def test_parser_scroll(): + parser = build_parser() + args = parser.parse_args(["scroll", "ses-1", "0", "0", "-300"]) + assert args.command == "scroll" + assert args.dy == -300 + + +def test_parser_chat_send(): + parser = build_parser() + args = parser.parse_args(["chat", "ses-1", "send", "hello provider"]) + assert args.command == "chat" + assert args.chat_action == "send" + assert args.text == "hello provider" + + +def test_parser_chat_next(): + parser = build_parser() + args = parser.parse_args(["chat", "ses-1", "next", "--timeout=30"]) + assert args.chat_action == "next" + assert args.timeout == 30 + + +def test_parser_stop(): + parser = build_parser() + args = parser.parse_args(["stop", "ses-1"]) + assert args.command == "stop" + + +# ────────────────────────────────────────────────────────────────────────── +# New subcommand parser tests +# ────────────────────────────────────────────────────────────────────────── + + +def test_parser_profile_export(): + parser = build_parser() + args = parser.parse_args([ + "profile", "ses-1", "export", "-o", "/tmp/p.json", + "--domains", ".reddit.com,reddit.com", + ]) + assert args.command == "profile" + assert args.session_id == "ses-1" + assert args.profile_action == "export" + assert args.output == "/tmp/p.json" + assert args.domains == ".reddit.com,reddit.com" + assert args.no_session_storage is False + + +def test_parser_profile_export_no_session_storage(): + parser = build_parser() + args = parser.parse_args([ + "profile", "ses-1", "export", "-o", "/tmp/p.json", "--no-session-storage", + ]) + assert args.no_session_storage is True + + +def test_parser_profile_import(): + parser = build_parser() + args = parser.parse_args(["profile", "ses-1", "import", "-i", "/tmp/p.json"]) + assert args.command == "profile" + assert args.profile_action == "import" + assert args.input == "/tmp/p.json" + + +def test_parser_search(): + parser = build_parser() + args = parser.parse_args([ + "search", "--limit", "5", "--filter", "region=US", "--filter", "price_max=0.5", + ]) + assert args.command == "search" + assert args.limit == 5 + assert args.filter == ["region=US", "price_max=0.5"] + + +def test_parser_search_defaults(): + parser = build_parser() + args = parser.parse_args(["search"]) + assert args.limit == 20 + assert args.filter is None + + +def test_parser_chat_history(): + parser = build_parser() + args = parser.parse_args([ + "chat", "ses-1", "history", "--since", "2026-01-01T00:00:00Z", "--limit", "20", + ]) + assert args.command == "chat" + assert args.chat_action == "history" + assert args.since == "2026-01-01T00:00:00Z" + assert args.limit == 20 + + +def test_parser_wait(): + parser = build_parser() + args = parser.parse_args(["wait", "ses-1"]) + assert args.command == "wait" + assert args.session_id == "ses-1" + + +def test_parser_chat_send_image(): + parser = build_parser() + args = parser.parse_args(["chat", "ses-1", "send-image", "--image", "/tmp/img.png"]) + assert args.command == "chat" + assert args.chat_action == "send-image" + assert args.image == "/tmp/img.png" + assert args.text is None + + +def test_parser_chat_send_image_with_text(): + parser = build_parser() + args = parser.parse_args([ + "chat", "ses-1", "send-image", "--image", "/tmp/img.png", "--text", "look at this", + ]) + assert args.text == "look at this" + + +def test_parser_screenshot(): + parser = build_parser() + args = parser.parse_args(["screenshot", "ses-1", "-o", "/tmp/s.png", "--format", "jpeg"]) + assert args.command == "screenshot" + assert args.session_id == "ses-1" + assert args.output == "/tmp/s.png" + assert args.format == "jpeg" + + +def test_parser_screenshot_default_format(): + parser = build_parser() + args = parser.parse_args(["screenshot", "ses-1", "-o", "/tmp/s.png"]) + assert args.format == "png" + + +def test_parser_switch_tab(): + parser = build_parser() + args = parser.parse_args(["switch-tab", "ses-1"]) + assert args.command == "switch-tab" + assert args.session_id == "ses-1" + + +def test_parser_configure(): + parser = build_parser() + args = parser.parse_args([ + "configure", "ses-1", "--masking-mode", "true", "--fingerprint", "false", + ]) + assert args.command == "configure" + assert args.session_id == "ses-1" + assert args.masking_mode == "true" + assert args.fingerprint == "false" + + +def test_parser_configure_partial(): + parser = build_parser() + args = parser.parse_args(["configure", "ses-1", "--masking-mode", "true"]) + assert args.masking_mode == "true" + assert args.fingerprint is None + + +def test_parser_cdp(): + parser = build_parser() + args = parser.parse_args([ + "cdp", "ses-1", "--method", "Page.navigate", + "--params", '{"url":"https://example.com"}', + ]) + assert args.command == "cdp" + assert args.session_id == "ses-1" + assert args.method == "Page.navigate" + assert args.params == '{"url":"https://example.com"}' + + +def test_parser_cdp_no_params(): + parser = build_parser() + args = parser.parse_args(["cdp", "ses-1", "--method", "Page.reload"]) + assert args.method == "Page.reload" + assert args.params is None + + +def test_search_domains_parsing(): + """Verify --domains comma-split logic in profile export.""" + parser = build_parser() + args = parser.parse_args([ + "profile", "ses-1", "export", "-o", "/tmp/p.json", + "--domains", ".reddit.com,reddit.com,www.reddit.com", + ]) + domains = [d.strip() for d in args.domains.split(",")] + assert domains == [".reddit.com", "reddit.com", "www.reddit.com"] + + +def test_cli_uses_disconnect_not_close(): + """CLI must use client.disconnect(), not client.close(), to avoid killing active sessions.""" + cli_path = Path(__file__).resolve().parent.parent / "ceki_browser" / "cli.py" + src = cli_path.read_text() + assert "client.disconnect()" in src + assert "client.close()" not in src + + +def test_search_filter_parsing(): + """Verify --filter key=val parsing logic.""" + parser = build_parser() + args = parser.parse_args([ + "search", "--filter", "region=US", "--filter", "price_max=0.5", + ]) + filters = {} + for f in args.filter: + k, v = f.split("=", 1) + filters[k] = v + assert filters == {"region": "US", "price_max": "0.5"} + + +# ────────────────────────────────────────────────────────────────────────── +# Exit code: missing CEKI_API_KEY +# ────────────────────────────────────────────────────────────────────────── + + +def test_missing_api_key_exits_2(): + env = {k: v for k, v in __import__("os").environ.items() if k != "CEKI_API_KEY"} + result = subprocess.run( + [sys.executable, "-m", "ceki_browser.cli", "rent", "--schedule", "1"], + capture_output=True, + text=True, + env=env, + ) + assert result.returncode == 2 + err = json.loads(result.stderr.strip()) + assert err["code"] == "auth" + + +# ────────────────────────────────────────────────────────────────────────── +# Resume exception mapping tests +# ────────────────────────────────────────────────────────────────────────── + + +async def test_resume_not_found(): + from ceki_browser._client import Client + from ceki_browser._exceptions import SessionNotFound + + client = Client( + api_key="test", + relay_url="wss://test/ws/agent", + api_url="https://test", + chat_url="https://test/chat", + reconnect=False, + ) + client._ws = AsyncMock() + client._ws.send = AsyncMock() + + async def fake_dispatch(): + await client._dispatch({"type": "resume_failed", "session_id": "s1", "reason": "not_found"}) + + import asyncio + loop = asyncio.get_event_loop() + task = loop.create_task(client.resume("s1")) + await asyncio.sleep(0.01) + await fake_dispatch() + with pytest.raises(SessionNotFound): + await task + + +async def test_resume_not_owner(): + from ceki_browser._client import Client + from ceki_browser._exceptions import NotOwner + + client = Client( + api_key="test", + relay_url="wss://test/ws/agent", + api_url="https://test", + chat_url="https://test/chat", + reconnect=False, + ) + client._ws = AsyncMock() + client._ws.send = AsyncMock() + + async def fake_dispatch(): + await client._dispatch({"type": "resume_failed", "session_id": "s2", "reason": "not_owner"}) + + import asyncio + loop = asyncio.get_event_loop() + task = loop.create_task(client.resume("s2")) + await asyncio.sleep(0.01) + await fake_dispatch() + with pytest.raises(NotOwner): + await task + + +async def test_resume_ok(): + from ceki_browser._client import Client + + client = Client( + api_key="test", + relay_url="wss://test/ws/agent", + api_url="https://test", + chat_url="https://test/chat", + reconnect=False, + ) + client._ws = AsyncMock() + client._ws.send = AsyncMock() + + import asyncio + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + task = asyncio.create_task(client.resume("s3")) + await asyncio.sleep(0.01) + await client._dispatch({ + "type": "resume_ok", + "session_id": "s3", + "schedule_id": 42, + "chat_topic_id": "topic-1", + "provider_user_id": 99, + }) + browser = await task + assert browser.session_id == "s3" + assert browser.schedule_id == 42 + assert browser.chat_topic_id == "topic-1" + assert "s3" in client._active_browsers + + +# ────────────────────────────────────────────────────────────────────────── +# Snapshot test +# ────────────────────────────────────────────────────────────────────────── + + +async def test_snapshot_returns_data(): + from ceki_browser import Browser + import base64 + + client = AsyncMock() + client._active_browsers = {} + client.chat_url = "https://test/chat" + client.api_key = "test" + + match = AsyncMock() + match.session_id = "snap-1" + match.schedule_id = 1 + match.chat_topic_id = "t1" + match.browser_info = {} + match.provider_user_id = None + + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + b = Browser(client, match) + + png_data = base64.b64encode(b"\x89PNG\r\n").decode() + b.send = AsyncMock(return_value={"data": png_data}) + b.chat.history = AsyncMock(return_value=[]) + + snap = await b.snapshot() + assert snap.screenshot == png_data + assert snap.chat == [] + assert snap.ts is not None diff --git a/tests/test_client.py b/tests/test_client.py deleted file mode 100644 index 57463c9..0000000 --- a/tests/test_client.py +++ /dev/null @@ -1,179 +0,0 @@ -from unittest.mock import MagicMock - -import pytest - -from ceki_browser import Browser, NavigateResult, QueryResult, Session -from ceki_browser.errors import CekiBrowserError - - -class MockRTC: - def __init__(self): - self._responses: dict[str, dict] = {} - self._calls: list[tuple[str, dict | None]] = [] - self.cmd_channel = MagicMock() - self.cmd_channel.readyState = "open" - self.pc = MagicMock() - self.pc.connectionState = "connected" - - def set_response(self, method: str, result: dict): - self._responses[method] = result - - async def send_command(self, method: str, params: dict | None = None, timeout: float = 30.0): - self._calls.append((method, params)) - if method in self._responses: - return self._responses[method] - return {} - - async def close(self): - pass - - -class MockTransport: - def __init__(self): - self.agent_id = "agent-mock" - self._event_callback = None - self._responses: dict[str, dict] = {} - self._calls: list[tuple[str, dict | None]] = [] - - def on_event(self, cb): - self._event_callback = cb - - def set_response(self, method: str, result: dict): - self._responses[method] = result - - async def connect(self): - return {"status": "connected", "agent_id": self.agent_id} - - async def close(self): - pass - - async def send(self, method: str, params: dict | None = None, timeout: float = 60.0): - self._calls.append((method, params)) - if method in self._responses: - return self._responses[method] - return {} - - async def notify(self, method: str, params: dict | None = None): - pass - - @property - def connected(self): - return True - - -def make_session_with_mock_rtc(human=None) -> tuple[Session, MockRTC]: - mt = MockTransport() - mock_rtc = MockRTC() - sess = Session(mt, "req-1", "incognito", human=human) - sess._active = True - sess._session_id = "sess-1" - sess._rtc = mock_rtc - from ceki_browser.chat import ChatAPI - sess._chat = ChatAPI(mt, "sess-1", None) - return sess, mock_rtc - - -@pytest.mark.asyncio -async def test_browser_connect_and_close(): - mt = MockTransport() - browser = Browser.__new__(Browser) - browser._transport = mt - browser._connected = False - - result = await browser.connect() - assert result["agent_id"] == "agent-mock" - assert browser.connected - - await browser.close() - assert not browser.connected - - -@pytest.mark.asyncio -async def test_session_navigate(): - sess, rtc = make_session_with_mock_rtc() - rtc.set_response("browser.navigate", {"url": "https://example.com", "title": "Example", "status": 200}) - - result = await sess.navigate("https://example.com") - assert isinstance(result, NavigateResult) - assert result.url == "https://example.com" - assert result.title == "Example" - - -@pytest.mark.asyncio -async def test_session_query(): - sess, rtc = make_session_with_mock_rtc() - rtc.set_response("browser.query", {"elements": [{"textContent": "Hello World"}]}) - - result = await sess.query("h1") - assert isinstance(result, QueryResult) - assert result.text == "Hello World" - assert len(result) == 1 - - -@pytest.mark.asyncio -async def test_session_query_all(): - sess, rtc = make_session_with_mock_rtc() - rtc.set_response("browser.query_all", {"elements": [ - {"textContent": "Item 1"}, - {"textContent": "Item 2"}, - {"textContent": "Item 3"}, - ]}) - - result = await sess.query_all("li") - assert len(result) == 3 - - -@pytest.mark.asyncio -async def test_session_inactive_raises(): - mt = MockTransport() - sess = Session(mt, "req-1", "incognito") - - with pytest.raises(CekiBrowserError, match="not active"): - await sess.navigate("https://example.com") - - -@pytest.mark.asyncio -async def test_session_click_and_type(): - sess, rtc = make_session_with_mock_rtc() - rtc.set_response("browser.click", {"clicked": True}) - rtc.set_response("browser.type", {"typed": True}) - - await sess.click(selector="#btn") - await sess.type("#input", "hello") - - assert rtc._calls[0] == ("browser.click", {"selector": "#btn"}) - assert rtc._calls[1] == ("browser.type", {"selector": "#input", "text": "hello", "delay_ms": 0}) - - -@pytest.mark.asyncio -async def test_session_screenshot(): - sess, rtc = make_session_with_mock_rtc() - rtc.set_response("browser.screenshot", {"data": "base64data", "width": 1920, "height": 1080}) - - result = await sess.screenshot() - assert result.data == "base64data" - assert result.width == 1920 - - -@pytest.mark.asyncio -async def test_session_end(): - mt = MockTransport() - mt.set_response("session.end", {"status": "ended"}) - sess = Session(mt, "req-1", "incognito") - sess._active = True - sess._session_id = "sess-1" - - await sess.end() - assert not sess.active - - -@pytest.mark.asyncio -async def test_session_context_manager(): - mt = MockTransport() - mt.set_response("session.end", {"status": "ended"}) - sess = Session(mt, "req-1", "incognito") - sess._active = True - - async with sess: - assert sess.active - assert not sess.active diff --git a/tests/test_connect.py b/tests/test_connect.py new file mode 100644 index 0000000..7ab4deb --- /dev/null +++ b/tests/test_connect.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import pytest + +from ceki_browser import Client, ConnectOptions, connect + +from .conftest import MockRelayServer + + +@pytest.mark.asyncio +async def test_connect_establishes_ws(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + assert client._ws is not None + assert not client._ws.closed + await client.close() + + +@pytest.mark.asyncio +async def test_connect_uses_bearer_subprotocol(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("my-api-key", ConnectOptions(relay_url=url)) + ws = client._ws + assert ws is not None + # Verify the client sent bearer subprotocol in the handshake + # (ws.request_headers contains the Upgrade request headers) + proto_header = ws.request_headers.get("Sec-WebSocket-Protocol", "") + assert "bearer.my-api-key" in proto_header + await client.close() + + +@pytest.mark.asyncio +async def test_close_cancels_tasks(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + ht = client._heartbeat_task + rt = client._reader_task + assert ht is not None and rt is not None + await client.close() + assert client._ws is None + + +@pytest.mark.asyncio +async def test_client_is_client_instance(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + assert isinstance(client, Client) + await client.close() diff --git a/tests/test_error_mapping.py b/tests/test_error_mapping.py new file mode 100644 index 0000000..189d864 --- /dev/null +++ b/tests/test_error_mapping.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import asyncio + +import pytest +import websockets +import websockets.server + +from ceki_browser import ConnectOptions, connect +from ceki_browser._exceptions import AuthFailed, CekiError, InsufficientFunds, ProviderOffline, SessionEnded + + +class _CloseImmediately4403: + """WS server that accepts upgrade then immediately closes with 4403.""" + + def __init__(self) -> None: + self._server: websockets.server.WebSocketServer | None = None + self.port: int = 0 + + @staticmethod + def _select_subprotocol(ws: websockets.server.WebSocketServerProtocol, subprotocols: list[str]) -> str | None: + for sp in subprotocols: + if sp.startswith("bearer."): + return sp + return None + + async def _handler(self, ws: websockets.server.WebSocketServerProtocol) -> None: + await ws.close(4403, "unauthorized") + + async def start(self) -> None: + self._server = await websockets.serve( + self._handler, + "127.0.0.1", + 0, + select_subprotocol=self._select_subprotocol, + ) + self.port = next(iter(self._server.sockets)).getsockname()[1] + + async def stop(self) -> None: + if self._server: + self._server.close() + await self._server.wait_closed() + + +@pytest.fixture +async def close_4403_server(): + server = _CloseImmediately4403() + await server.start() + yield server + await server.stop() + + +@pytest.mark.asyncio +async def test_connect_bogus_token_close_4403_raises_auth_failed(close_4403_server: _CloseImmediately4403) -> None: + """Relay accepts WS upgrade then immediately closes 4403 → connect() raises AuthFailed within 2s.""" + url = f"ws://127.0.0.1:{close_4403_server.port}" + with pytest.raises(AuthFailed): + await asyncio.wait_for(connect("bad-token", ConnectOptions(relay_url=url)), timeout=2.0) + + +@pytest.mark.asyncio +async def test_handle_error_minus_1015_raises_provider_offline(mock_relay) -> None: + """error code=-1015 from relay → ProviderOffline raised from rent().""" + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=99)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({"type": "error", "code": -1015, "reason": "no_providers"}) + + with pytest.raises(ProviderOffline) as exc_info: + await asyncio.wait_for(rent_task, timeout=5) + assert "no_providers" in str(exc_info.value) + + await client.close() + + +@pytest.mark.asyncio +async def test_error_message_uses_reason_field(mock_relay) -> None: + """relay sends {code:-1011, reason:'heartbeat_timeout'} with no 'message' field + → SessionEnded.reason == 'heartbeat_timeout', not 'None' or 'ended'.""" + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=77)) + await asyncio.sleep(0.05) + + # error with reason field only (no message), no session_id → goes to _handle_error + await mock_relay.send_to_all({ + "type": "error", + "code": -1011, + "reason": "heartbeat_timeout", + }) + + with pytest.raises(SessionEnded) as exc_info: + await asyncio.wait_for(rent_task, timeout=5) + assert exc_info.value.reason == "heartbeat_timeout" + assert exc_info.value.reason != "None" + + await client.close() + + +@pytest.mark.asyncio +async def test_handle_error_minus_1014_raises_ceki_error(mock_relay) -> None: + """error code=-1014 (Insufficient balance) → CekiError raised immediately, not 90s timeout.""" + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=99)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({ + "type": "error", + "code": -1014, + "message": "Insufficient balance", + }) + + with pytest.raises(CekiError, match="Insufficient balance"): + await asyncio.wait_for(rent_task, timeout=2) + + await client.close() + + +@pytest.mark.asyncio +async def test_handle_error_minus_1012_raises_insufficient_funds(mock_relay) -> None: + """error code=-1012 → InsufficientFunds raised immediately.""" + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=88)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({ + "type": "error", + "code": -1012, + "message": "Not enough funds", + }) + + with pytest.raises(InsufficientFunds): + await asyncio.wait_for(rent_task, timeout=2) + + await client.close() + + +@pytest.mark.asyncio +async def test_handle_error_unknown_code_raises_ceki_error(mock_relay) -> None: + """Unknown error code → CekiError (not plain Exception).""" + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=77)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({ + "type": "error", + "code": -9999, + "message": "something weird", + }) + + with pytest.raises(CekiError, match="relay error -9999"): + await asyncio.wait_for(rent_task, timeout=2) + + await client.close() diff --git a/tests/test_examples_signature.py b/tests/test_examples_signature.py new file mode 100644 index 0000000..81975f9 --- /dev/null +++ b/tests/test_examples_signature.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import importlib +import inspect + +import pytest + + +@pytest.mark.parametrize("module_name", [ + "examples.reddit_signup", + "examples.github_signup", + "examples.imap_helper", +]) +def test_example_imports(module_name): + mod = importlib.import_module(module_name) + assert mod is not None + + +def test_imap_helper_signature(): + from examples.imap_helper import wait_for_confirm_link + + sig = inspect.signature(wait_for_confirm_link) + assert "tag" in sig.parameters + assert "timeout" in sig.parameters + assert "service" in sig.parameters diff --git a/tests/test_humanize.py b/tests/test_humanize.py deleted file mode 100644 index defa1a7..0000000 --- a/tests/test_humanize.py +++ /dev/null @@ -1,148 +0,0 @@ -from __future__ import annotations - -import json -import random -from pathlib import Path - -import pytest - -from ceki_browser.humanize import HumanProfile, Humanizer - - -class TestHumanProfile: - def test_load_preset_natural(self): - p = HumanProfile.load_preset("natural") - assert p.name == "natural" - assert p.raw["typing"]["wpm"] == 110 - - def test_load_preset_careful(self): - p = HumanProfile.load_preset("careful") - assert p.name == "careful" - assert p.raw["typing"]["wpm"] == 80 - - def test_from_dict_roundtrip(self): - p = HumanProfile.load_preset("natural") - d = p.to_dict() - p2 = HumanProfile.from_dict(d) - assert p2.to_dict() == d - - def test_json_roundtrip(self): - p = HumanProfile.load_preset("natural") - j = p.to_json() - d = json.loads(j) - p2 = HumanProfile.from_dict(d) - assert p2.to_dict() == p.to_dict() - - def test_get_range(self): - p = HumanProfile.load_preset("natural") - lo, hi = p.get_range("click", "pre") - assert lo == 80 - assert hi == 350 - - def test_get_range_missing(self): - p = HumanProfile.from_dict({"name": "empty"}) - lo, hi = p.get_range("unknown_action", "pre") - assert lo == 0 - assert hi == 0 - - def test_typing_interval(self): - p = HumanProfile.load_preset("natural") - interval = p.typing_interval() - expected = 60_000 / (110 * 5) - assert abs(interval - expected) < 0.01 - - def test_load_preset_not_found(self): - with pytest.raises(FileNotFoundError): - HumanProfile.load_preset("nonexistent") - - def test_from_dict_custom(self): - p = HumanProfile.from_dict({"typing": {"wpm": 200}}) - assert p.name == "custom" - assert p.raw["typing"]["wpm"] == 200 - - -class TestHumanizer: - @pytest.mark.asyncio - async def test_none_profile_zero_overhead(self): - h = Humanizer(None) - import time - start = time.monotonic() - await h.before("click") - await h.after("click") - elapsed = time.monotonic() - start - assert elapsed < 0.01 - - @pytest.mark.asyncio - async def test_none_humanize_text_no_delay(self): - h = Humanizer(None) - chars = [] - async for ch, delay in h.humanize_text("hello"): - chars.append((ch, delay)) - assert len(chars) == 5 - assert all(d == 0.0 for _, d in chars) - - @pytest.mark.asyncio - async def test_humanize_text_jitter_not_constant(self): - p = HumanProfile.from_dict({"typing": {"wpm": 110, "jitter": 0.35}, "rng_seed": 42}) - h = Humanizer(p) - delays = [] - async for _, delay in h.humanize_text("abcdefghij"): - delays.append(delay) - assert len(set(round(d, 2) for d in delays)) > 1, "Delays should not all be the same" - - @pytest.mark.asyncio - async def test_humanize_text_min_clamp(self): - p = HumanProfile.from_dict({"typing": {"wpm": 110, "jitter": 0.35}, "rng_seed": 42}) - h = Humanizer(p) - async for _, delay in h.humanize_text("abcdefghijklmnop"): - assert delay >= 20.0 - - @pytest.mark.asyncio - async def test_before_after_with_zero_range(self): - p = HumanProfile.from_dict({ - "pre_action_ms": {"screenshot": [0, 0]}, - "post_action_ms": {"screenshot": [0, 0]}, - }) - h = Humanizer(p) - import time - start = time.monotonic() - await h.before("screenshot") - await h.after("screenshot") - elapsed = time.monotonic() - start - assert elapsed < 0.01 - - -class TestSetHuman: - def test_set_human_returns_previous(self): - from ceki_browser.session import _resolve_human_profile - - p1 = _resolve_human_profile("natural") - assert p1 is not None - assert p1.name == "natural" - - p2 = _resolve_human_profile("careful") - assert p2 is not None - assert p2.name == "careful" - - p3 = _resolve_human_profile(None) - assert p3 is None - - def test_resolve_dict(self): - from ceki_browser.session import _resolve_human_profile - - p = _resolve_human_profile({"typing": {"wpm": 200}}) - assert p.raw["typing"]["wpm"] == 200 - - def test_resolve_human_profile_object(self): - from ceki_browser.session import _resolve_human_profile - - orig = HumanProfile.load_preset("natural") - p = _resolve_human_profile(orig) - assert p is orig - - def test_disable_env(self, monkeypatch): - from ceki_browser.session import _resolve_human_profile - - monkeypatch.setenv("CEKI_HUMAN_DISABLE", "1") - p = _resolve_human_profile("natural") - assert p is None diff --git a/tests/test_humanize_browser.py b/tests/test_humanize_browser.py new file mode 100644 index 0000000..e4f8225 --- /dev/null +++ b/tests/test_humanize_browser.py @@ -0,0 +1,127 @@ +"""Tests for Browser humanization integration.""" +from __future__ import annotations +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch +import pytest +from ceki_browser._browser import Browser, _resolve_human +from ceki_browser.humanize import HumanProfile, Humanizer + + +def _make_browser(human="natural"): + """Create a Browser with mocked internals.""" + client = MagicMock() + client._ws_send = AsyncMock() + match = MagicMock() + match.session_id = "test-session" + match.schedule_id = 1 + match.browser_info = {} + match.provider_user_id = None + match.chat_topic_id = None + b = Browser(client, match, human=human) + b._ended = asyncio.Event() + return b + + +class TestResolveHuman: + def test_none_returns_none(self): + assert _resolve_human(None) is None + + def test_string_preset(self): + h = _resolve_human("natural") + assert isinstance(h, Humanizer) + assert h.profile.name == "natural" + + def test_careful_preset(self): + h = _resolve_human("careful") + assert isinstance(h, Humanizer) + assert h.profile.name == "careful" + + def test_dict_profile(self): + h = _resolve_human({"typing": {"wpm": 130}}) + assert isinstance(h, Humanizer) + + def test_human_profile_instance(self): + p = HumanProfile.load_preset("natural") + h = _resolve_human(p) + assert h.profile is p + + def test_disable_env(self, monkeypatch): + monkeypatch.setenv("CEKI_HUMAN_DISABLE", "1") + assert _resolve_human("natural") is None + + +class TestBrowserHumanNone: + """human=None means zero overhead.""" + + @pytest.mark.asyncio + async def test_type_sends_single_insert(self): + b = _make_browser(human=None) + b.send = AsyncMock(return_value={}) + await b.type("hello") + b.send.assert_called_once() + call_args = b.send.call_args[0][0] + assert call_args["method"] == "Input.insertText" + assert call_args["params"]["text"] == "hello" + + @pytest.mark.asyncio + async def test_click_no_sleep(self): + b = _make_browser(human=None) + b.send = AsyncMock(return_value={}) + with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep: + await b.click(100, 200) + mock_sleep.assert_not_called() + + @pytest.mark.asyncio + async def test_navigate(self): + b = _make_browser(human=None) + b.send = AsyncMock(return_value={"frameId": "123"}) + result = await b.navigate("https://example.com") + assert result == {"frameId": "123"} + + +class TestBrowserHumanNatural: + """human="natural" adds delays.""" + + @pytest.mark.asyncio + async def test_type_per_char(self): + b = _make_browser(human="natural") + b.send = AsyncMock(return_value={}) + await b.type("abc") + insert_calls = [c for c in b.send.call_args_list + if c[0][0].get("method") == "Input.insertText"] + assert len(insert_calls) == 3 + + @pytest.mark.asyncio + async def test_click_timing_variance(self): + """100 clicks should have non-constant timing (std > 0).""" + b = _make_browser(human="natural") + b.send = AsyncMock(return_value={}) + import time + times = [] + for _ in range(20): + t0 = time.monotonic() + await b.click(100, 200) + times.append(time.monotonic() - t0) + deltas = [abs(times[i+1] - times[i]) for i in range(len(times)-1)] + assert max(deltas) > 0.001, "Timings should vary with human profile" + + +class TestSetHuman: + def test_set_human_returns_previous(self): + b = _make_browser(human="natural") + prev = b.set_human("careful") + assert prev is not None + assert prev.name == "natural" + assert b._humanizer.profile.name == "careful" + + def test_set_human_none_disables(self): + b = _make_browser(human="natural") + prev = b.set_human(None) + assert prev is not None + assert b._humanizer is None + + def test_set_human_from_none(self): + b = _make_browser(human=None) + prev = b.set_human("natural") + assert prev is None + assert b._humanizer is not None diff --git a/tests/test_long_running_session.py b/tests/test_long_running_session.py deleted file mode 100644 index d1170cb..0000000 --- a/tests/test_long_running_session.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Test that heartbeat survives long sessions with infrequent RPCs. - -Verifies that a single CommandTimeout in the heartbeat loop does NOT -kill heartbeats permanently — the loop must continue sending pings. -""" - -import asyncio -import json -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from ceki_browser.transport import Transport - - -class FakeWebSocket: - def __init__(self, messages: list[str] | None = None): - self._messages = list(messages or []) - self._sent: list[str] = [] - self._closed = False - self.state = MagicMock() - self.state.name = "OPEN" - self._pending_responses: dict[int, dict] = {} - - async def recv(self) -> str: - if self._messages: - return self._messages.pop(0) - await asyncio.sleep(100) - return "" - - async def send(self, data: str) -> None: - self._sent.append(data) - msg = json.loads(data) - if msg.get("method") == "heartbeat" and msg.get("id") is not None: - resp = json.dumps({"jsonrpc": "2.0", "result": "pong", "id": msg["id"]}) - self._messages.append(resp) - - async def close(self) -> None: - self._closed = True - - def __aiter__(self): - return self - - async def __anext__(self) -> str: - if self._messages: - return self._messages.pop(0) - if self._closed: - raise StopAsyncIteration - await asyncio.sleep(0.05) - if self._messages: - return self._messages.pop(0) - raise StopAsyncIteration - - -@pytest.fixture -def welcome_msg(): - return json.dumps({"jsonrpc": "2.0", "result": {"status": "connected", "agent_id": "agent-123"}, "id": 0}) - - -@pytest.mark.asyncio -async def test_heartbeat_continues_after_timeout(welcome_msg): - """Heartbeat loop must survive a CommandTimeout and keep sending.""" - ws = FakeWebSocket([welcome_msg]) - - call_count = 0 - original_send = Transport.send - - async def patched_send(self, method, params=None, timeout=60.0): - nonlocal call_count - if method == "heartbeat": - call_count += 1 - if call_count == 2: - raise asyncio.TimeoutError() - return await original_send(self, method, params=params, timeout=timeout) - - with patch("websockets.connect", AsyncMock(return_value=ws)): - t = Transport("test-token") - await t.connect() - - with patch.object(t, "send", lambda m, **kw: patched_send(t, m, **kw)): - await asyncio.sleep(0.5) - - heartbeat_sends = [ - s for s in ws._sent - if '"heartbeat"' in s and '"id"' in s - ] - assert len(heartbeat_sends) >= 1, "At least one heartbeat should have been sent" - assert not t._closed, "Transport should still be open" - await t.close() - - -@pytest.mark.asyncio -async def test_heartbeat_sends_periodically(welcome_msg): - """Heartbeat pings are sent at regular intervals.""" - ws = FakeWebSocket([welcome_msg]) - - with patch("websockets.connect", AsyncMock(return_value=ws)): - t = Transport("test-token") - await t.connect() - - await asyncio.sleep(0.3) - - heartbeat_sends = [ - json.loads(s) for s in ws._sent - if '"heartbeat"' in s - ] - assert not t._closed, "Transport should remain open during heartbeats" - await t.close() diff --git a/tests/test_multi_session.py b/tests/test_multi_session.py new file mode 100644 index 0000000..db525ae --- /dev/null +++ b/tests/test_multi_session.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import ConnectOptions, connect + + +@pytest.mark.asyncio +async def test_two_sessions_routed_independently(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + acked: set[str] = set() + + async def ack_rent(session_id: str, schedule_id: int) -> None: + deadline = asyncio.get_event_loop().time() + 5 + while asyncio.get_event_loop().time() < deadline: + await asyncio.sleep(0.05) + rent = next( + (m for m in mock_relay.received + if m.get("type") == "rent" and m.get("schedule_id") == schedule_id + and schedule_id not in acked), + None, + ) + if rent: + acked.add(schedule_id) + ev_id = f"ev-{session_id}" + await mock_relay.send_to_all({"type": "rent_pending", "event_id": ev_id, "schedule_id": schedule_id}) + await asyncio.sleep(0.02) + await mock_relay.send_to_all({ + "type": "match", + "event_id": ev_id, + "session_id": session_id, + "schedule_id": schedule_id, + "chat_topic_id": None, + "browser_info": {}, + }) + return + + t1 = asyncio.create_task(ack_rent("sess-1", 1)) + t2 = asyncio.create_task(ack_rent("sess-2", 2)) + b1 = await client.rent(1) + b2 = await client.rent(2) + await asyncio.gather(t1, t2) + + assert b1.session_id != b2.session_id + assert b1.session_id == "sess-1" + assert b2.session_id == "sess-2" + + results: dict[str, dict] = {} + + async def send_and_store(browser, key): + async def reply(): + await asyncio.sleep(0.05) + cdp_msgs = [ + m for m in mock_relay.received + if m.get("type") == "cdp" and m.get("session_id") == browser.session_id + ] + if cdp_msgs: + await mock_relay.send_to_all({ + "type": "cdp_response", + "session_id": browser.session_id, + "id": cdp_msgs[-1]["id"], + "ok": True, + "result": {"session": browser.session_id}, + }) + + t = asyncio.create_task(reply()) + cdp = {"method": "Runtime.evaluate", "params": {"expression": "1"}} + result = await browser.send(cdp, timeout=2) + await t + results[key] = result + + await asyncio.gather( + send_and_store(b1, "b1"), + send_and_store(b2, "b2"), + ) + + assert results["b1"]["session"] == "sess-1" + assert results["b2"]["session"] == "sess-2" + + await b1.close() + assert b2.session_id in client._active_browsers + + await client.close() + + +@pytest.mark.asyncio +async def test_close_one_session_leaves_other_alive(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(session_id): + await asyncio.sleep(0.05) + ev_id = f"ev-{session_id}" + await mock_relay.send_to_all({"type": "rent_pending", "event_id": ev_id, "schedule_id": 1}) + await asyncio.sleep(0.02) + await mock_relay.send_to_all({ + "type": "match", + "event_id": ev_id, + "session_id": session_id, + "schedule_id": 1, + "chat_topic_id": None, + "browser_info": {}, + }) + + t1 = asyncio.create_task(ack_rent("sess-A")) + b1 = await client.rent(1) + await t1 + + t2 = asyncio.create_task(ack_rent("sess-B")) + await client.rent(1) + await t2 + + async def ack_session_end(session_id): + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "session.ended", + "session_id": session_id, + "reason": "user_stop", + }) + + t = asyncio.create_task(ack_session_end("sess-A")) + await b1.close() + await t + + assert "sess-A" not in client._active_browsers + assert "sess-B" in client._active_browsers + + await client.close() diff --git a/tests/test_profile.py b/tests/test_profile.py new file mode 100644 index 0000000..db8fd47 --- /dev/null +++ b/tests/test_profile.py @@ -0,0 +1,277 @@ +from __future__ import annotations + +import json + +import pytest +from unittest.mock import AsyncMock + +from ceki_browser._profile import BrowserProfile + + +SAMPLE_FINGERPRINT = { + "seed": 123456789, + "timezoneId": "Europe/Berlin", + "locale": "en-US", + "acceptLanguage": "en-US,en;q=0.9", + "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "platform": "Win32", + "screen": {"width": 1920, "height": 1080, "devicePixelRatio": 1}, + "geolocation": {"latitude": 52.52, "longitude": 13.405, "accuracy": 100}, + "hardwareConcurrency": 8, + "canvasNoise": 0.04, + "webglVendor": "Google Inc. (NVIDIA)", + "webglRenderer": "ANGLE (NVIDIA, NVIDIA GeForce GTX 1080)", + "audioNoiseDb": -85.3, + "mediaDevicesDelta": 2, + "speechVoicesDelta": 1, +} + + +class FakeBrowser: + def __init__(self): + self.send = AsyncMock() + + +@pytest.mark.asyncio +async def test_export_full(): + fb = FakeBrowser() + fb.send.side_effect = [ + {"fingerprint": SAMPLE_FINGERPRINT}, + {"cookies": [{"name": "sid", "value": "abc", "domain": ".reddit.com"}]}, + {"result": {"value": '{"theme":"dark","auth":"xyz"}'}}, + {"result": {"value": '{"draft":"hello"}'}}, + {"result": {"value": "https://reddit.com"}}, + ] + p = BrowserProfile(fb) + blob = await p.export() + assert blob["schema_version"] == 2 + assert blob["fingerprint"] == SAMPLE_FINGERPRINT + assert blob["origin"] == "https://reddit.com" + assert len(blob["cookies"]) == 1 + assert blob["cookies"][0]["name"] == "sid" + assert blob["localStorage"] == {"theme": "dark", "auth": "xyz"} + assert blob["sessionStorage"] == {"draft": "hello"} + assert fb.send.call_count == 5 + assert fb.send.call_args_list[0].args[0]["method"] == "Browser.getFingerprint" + + +@pytest.mark.asyncio +async def test_export_filter_domains(): + fb = FakeBrowser() + fb.send.side_effect = [ + {"fingerprint": SAMPLE_FINGERPRINT}, + {"cookies": [ + {"name": "sid", "value": "1", "domain": ".reddit.com"}, + {"name": "ad", "value": "2", "domain": ".doubleclick.net"}, + ]}, + {"result": {"value": "{}"}}, + {"result": {"value": "{}"}}, + {"result": {"value": "https://reddit.com"}}, + ] + p = BrowserProfile(fb) + blob = await p.export(domains=[".reddit.com"]) + assert len(blob["cookies"]) == 1 + assert blob["cookies"][0]["name"] == "sid" + + +@pytest.mark.asyncio +async def test_export_skip_session_storage(): + fb = FakeBrowser() + fb.send.side_effect = [ + {"fingerprint": SAMPLE_FINGERPRINT}, + {"cookies": []}, + {"result": {"value": "{}"}}, + {"result": {"value": "https://example.com"}}, + ] + p = BrowserProfile(fb) + blob = await p.export(include_session_storage=False) + assert blob["sessionStorage"] == {} + assert fb.send.call_count == 4 + + +@pytest.mark.asyncio +async def test_export_handles_opaque_origin(): + fb = FakeBrowser() + fb.send.side_effect = [ + {"fingerprint": None}, + {"cookies": []}, + {"result": {"value": None}}, + {"result": {"value": None}}, + {"result": {"value": "about:blank"}}, + ] + p = BrowserProfile(fb) + blob = await p.export() + assert blob["localStorage"] == {} + assert blob["sessionStorage"] == {} + assert blob["origin"] == "about:blank" + assert blob["fingerprint"] is None + + +@pytest.mark.asyncio +async def test_export_fingerprint_null_when_disabled(): + fb = FakeBrowser() + fb.send.side_effect = [ + {"fingerprint": None}, + {"cookies": []}, + {"result": {"value": "{}"}}, + {"result": {"value": "{}"}}, + {"result": {"value": "https://example.com"}}, + ] + p = BrowserProfile(fb) + blob = await p.export() + assert blob["schema_version"] == 2 + assert blob["fingerprint"] is None + + +@pytest.mark.asyncio +async def test_export_fingerprint_fallback_on_old_extension(): + """When Browser.getFingerprint is not available, export falls back to fingerprint=None.""" + fb = FakeBrowser() + fb.send.side_effect = [ + Exception("CDP error: Browser.getFingerprint wasn't found"), + {"cookies": [{"name": "x", "value": "y", "domain": ".example.com"}]}, + {"result": {"value": "{}"}}, + {"result": {"value": "{}"}}, + {"result": {"value": "https://example.com"}}, + ] + p = BrowserProfile(fb) + blob = await p.export() + assert blob["schema_version"] == 2 + assert blob["fingerprint"] is None + assert len(blob["cookies"]) == 1 + + +@pytest.mark.asyncio +async def test_import_v2_full(): + fb = FakeBrowser() + fb.send.return_value = {} + p = BrowserProfile(fb) + blob = { + "schema_version": 2, + "fingerprint": SAMPLE_FINGERPRINT, + "origin": "https://reddit.com", + "cookies": [{"name": "sid", "value": "abc", "domain": ".reddit.com"}], + "localStorage": {"theme": "dark"}, + "sessionStorage": {"draft": "hi"}, + } + await p.import_(blob) + assert fb.send.call_count == 3 + first_call = fb.send.call_args_list[0] + assert first_call.args[0]["method"] == "Network.setCookies" + assert first_call.args[0]["params"]["cookies"][0]["name"] == "sid" + second_call = fb.send.call_args_list[1] + assert "localStorage.setItem" in second_call.args[0]["params"]["expression"] + third_call = fb.send.call_args_list[2] + assert "sessionStorage.setItem" in third_call.args[0]["params"]["expression"] + + +@pytest.mark.asyncio +async def test_import_v1_backward_compat(): + """v1 profiles without fingerprint import successfully.""" + fb = FakeBrowser() + fb.send.return_value = {} + p = BrowserProfile(fb) + blob = { + "schema_version": 1, + "origin": "https://reddit.com", + "cookies": [{"name": "sid", "value": "abc", "domain": ".reddit.com"}], + "localStorage": {"theme": "dark"}, + "sessionStorage": {}, + } + await p.import_(blob) + assert fb.send.call_count == 2 + + +@pytest.mark.asyncio +async def test_import_unknown_schema_version(): + fb = FakeBrowser() + p = BrowserProfile(fb) + with pytest.raises(ValueError, match="schema_version=99"): + await p.import_({"schema_version": 99, "cookies": []}) + assert fb.send.call_count == 0 + + +@pytest.mark.asyncio +async def test_import_v3_raises(): + fb = FakeBrowser() + p = BrowserProfile(fb) + with pytest.raises(ValueError, match="schema_version=3"): + await p.import_({"schema_version": 3, "cookies": []}) + + +@pytest.mark.asyncio +async def test_import_empty_blob(): + fb = FakeBrowser() + fb.send.return_value = {} + p = BrowserProfile(fb) + await p.import_({"schema_version": 2, "cookies": [], "localStorage": {}, "sessionStorage": {}}) + assert fb.send.call_count == 0 + + +@pytest.mark.asyncio +async def test_import_cookies_only(): + fb = FakeBrowser() + fb.send.return_value = {} + p = BrowserProfile(fb) + await p.import_({ + "schema_version": 2, + "cookies": [{"name": "tok", "value": "x", "domain": ".example.com"}], + "localStorage": {}, + "sessionStorage": {}, + }) + assert fb.send.call_count == 1 + assert fb.send.call_args_list[0].args[0]["method"] == "Network.setCookies" + + +@pytest.mark.asyncio +async def test_import_storage_values_serialized_as_json(): + """localStorage values must be JSON-stringified properly in the injected expression.""" + fb = FakeBrowser() + fb.send.return_value = {} + p = BrowserProfile(fb) + storage = {"key": 'value with "quotes" and \\backslash'} + await p.import_({"schema_version": 2, "cookies": [], "localStorage": storage, "sessionStorage": {}}) + expr = fb.send.call_args_list[0].args[0]["params"]["expression"] + assert json.dumps(storage) in expr + + +@pytest.mark.asyncio +async def test_import_v2_ignores_fingerprint(): + """import_() does not apply fingerprint — that's done via rent(fingerprint=...).""" + fb = FakeBrowser() + fb.send.return_value = {} + p = BrowserProfile(fb) + blob = { + "schema_version": 2, + "fingerprint": SAMPLE_FINGERPRINT, + "cookies": [{"name": "x", "value": "y", "domain": ".example.com"}], + "localStorage": {}, + "sessionStorage": {}, + } + await p.import_(blob) + assert fb.send.call_count == 1 + assert fb.send.call_args_list[0].args[0]["method"] == "Network.setCookies" + + +@pytest.mark.asyncio +async def test_fingerprint_json_roundtrip(): + """Fingerprint dict serializes to JSON and back with all fields preserved.""" + serialized = json.dumps(SAMPLE_FINGERPRINT) + deserialized = json.loads(serialized) + assert deserialized == SAMPLE_FINGERPRINT + assert isinstance(deserialized["seed"], int) + assert isinstance(deserialized["screen"], dict) + assert isinstance(deserialized["canvasNoise"], float) + + +@pytest.mark.asyncio +async def test_profile_accessible_on_browser(mock_relay): + """Browser.profile is available after connect (no rent needed — just check attribute).""" + from ceki_browser import ConnectOptions, connect + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + try: + assert hasattr(client, "_active_browsers") + from ceki_browser import BrowserProfile + assert BrowserProfile is not None + finally: + await client.close() diff --git a/tests/test_provider_disconnect.py b/tests/test_provider_disconnect.py new file mode 100644 index 0000000..50082b6 --- /dev/null +++ b/tests/test_provider_disconnect.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import ConnectOptions, ProviderDisconnected, SessionEnded, connect + + +async def _make_browser(mock_relay, session_id: str = "sess-pd"): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "ev-1"}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "ev-1", + "session_id": session_id, + "schedule_id": 1, + "chat_topic_id": None, + "browser_info": {}, + }) + + t = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await t + return client, browser + + +@pytest.mark.asyncio +async def test_provider_disconnected_raises_on_session_end(mock_relay): + client, browser = await _make_browser(mock_relay) + try: + task = asyncio.create_task(browser.wait_until_ended()) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "session.ended", + "session_id": "sess-pd", + "reason": "provider_disconnected", + }) + await asyncio.sleep(0.05) + reason = await asyncio.wait_for(task, timeout=1.0) + assert reason == "provider_disconnected" + assert browser._ended.is_set() + assert isinstance(browser._ended_reason, str) + + # Pending CDP futures should get ProviderDisconnected + loop = asyncio.get_event_loop() + fut = loop.create_future() + browser._pending_cdp[999] = fut + # Trigger again with a fresh session end message won't work since already ended; + # instead verify the exception type was set correctly on futures during end + # (we'll just check directly) + assert not fut.done() # Was added after session ended, so won't be set + fut.cancel() + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_session_end_other_reason_raises_session_ended(mock_relay): + client, browser = await _make_browser(mock_relay, "sess-pd2") + try: + task = asyncio.create_task(browser.wait_until_ended()) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "session.ended", + "session_id": "sess-pd2", + "reason": "user_stop", + }) + reason = await asyncio.wait_for(task, timeout=1.0) + assert reason == "user_stop" + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_on_provider_disconnected_callback(mock_relay): + client, browser = await _make_browser(mock_relay, "sess-pd3") + try: + called = asyncio.Event() + + async def on_disc(): + called.set() + + browser.on_provider_disconnected(on_disc) + await mock_relay.send_to_all({ + "type": "session.provider_disconnected", + "session_id": "sess-pd3", + "retry_within_ms": 30000, + }) + await asyncio.wait_for(called.wait(), timeout=1.0) + assert called.is_set() + finally: + await client.close() + + +@pytest.mark.asyncio +async def test_on_provider_reconnected_callback(mock_relay): + client, browser = await _make_browser(mock_relay, "sess-pd4") + try: + called = asyncio.Event() + + async def on_reconn(): + called.set() + + browser.on_provider_reconnected(on_reconn) + await mock_relay.send_to_all({ + "type": "session.provider_reconnected", + "session_id": "sess-pd4", + }) + await asyncio.wait_for(called.wait(), timeout=1.0) + assert called.is_set() + finally: + await client.close() diff --git a/tests/test_provider_offline.py b/tests/test_provider_offline.py new file mode 100644 index 0000000..0b8e81e --- /dev/null +++ b/tests/test_provider_offline.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import Client, ConnectOptions, connect +from ceki_browser._exceptions import ProviderOffline + +from .conftest import MockRelayServer + + +@pytest.mark.asyncio +async def test_rent_error_provider_offline_raises_provider_offline(mock_relay: MockRelayServer) -> None: + """relay sends rent.error provider_offline after probe timeout → ProviderOffline raised.""" + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=55)) + await asyncio.sleep(0.05) + + # Relay sends rent_pending (moves fut to _pending_rents) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "555", "schedule_id": 55}) + await asyncio.sleep(0.05) + + # Relay sends rent.error provider_offline (with event_id, as relay does after probe timeout) + await mock_relay.send_to_all({ + "type": "rent.error", + "code": "provider_offline", + "message": "Provider not responding", + "event_id": "555", + }) + + with pytest.raises(ProviderOffline): + await asyncio.wait_for(rent_task, timeout=5) + + await client.close() + + +@pytest.mark.asyncio +async def test_rent_error_provider_offline_without_event_id(mock_relay: MockRelayServer) -> None: + """rent.error provider_offline without event_id (early, before rent_pending) → ProviderOffline.""" + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=55)) + await asyncio.sleep(0.05) + + # rent.error arrives before rent_pending (fut still in queue) + await mock_relay.send_to_all({ + "type": "rent.error", + "code": "provider_offline", + "message": "Provider not responding", + }) + + with pytest.raises(ProviderOffline): + await asyncio.wait_for(rent_task, timeout=5) + + await client.close() diff --git a/tests/test_reconnect.py b/tests/test_reconnect.py new file mode 100644 index 0000000..ae055d3 --- /dev/null +++ b/tests/test_reconnect.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import ConnectOptions, connect + +from .conftest import MockRelayServer + + +@pytest.mark.asyncio +async def test_reconnect_after_drop(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url, reconnect=True)) + + initial_ws = client._ws + assert initial_ws is not None + + # Drop connection from server side + for ws in list(mock_relay.connections): + await ws.close() + + # Give client time to detect and schedule reconnect + await asyncio.sleep(2.5) + + # Client should have started reconnect (new ws or reconnect scheduled) + # In test environment reconnect may not succeed fully, but task should be created + await client.close() + + +@pytest.mark.asyncio +async def test_no_reconnect_when_disabled(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url, reconnect=False)) + + # Drop from server + for ws in list(mock_relay.connections): + await ws.close() + + await asyncio.sleep(0.5) + # With reconnect=False, ws should be closed and no reconnect attempted + await client.close() + + +@pytest.mark.asyncio +async def test_heartbeat_pong_updates_timestamp(mock_relay: MockRelayServer) -> None: + + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + before = client._last_pong + # Send ping manually and verify pong updates timestamp + await client._ws_send({"type": "ping"}) + await asyncio.sleep(0.3) + assert client._last_pong >= before + await client.close() diff --git a/tests/test_rent_flow.py b/tests/test_rent_flow.py new file mode 100644 index 0000000..eb5a00a --- /dev/null +++ b/tests/test_rent_flow.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import Client, ConnectOptions, connect +from ceki_browser._exceptions import ( + ProviderOffline, + RateLimitExceeded, + SessionEnded, +) +from tests.test_profile import SAMPLE_FINGERPRINT + +from .conftest import MockRelayServer + + +@pytest.mark.asyncio +async def test_rent_resolves_via_rent_pending_then_match(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=240)) + await asyncio.sleep(0.05) + + # Relay sends rent_pending with server-assigned event_id + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "1924", "schedule_id": 240}) + await asyncio.sleep(0.05) + + # Relay sends match with same event_id + await mock_relay.send_to_all({ + "type": "match", + "event_id": "1924", + "session_id": "1924", + "schedule_id": 240, + "capabilities": {}, + "price_per_min": 0.01, + }) + + browser = await asyncio.wait_for(rent_task, timeout=5) + assert browser.session_id == "1924" + assert browser.schedule_id == 240 + + # Verify WS rent message had only type + schedule_id (no event_id, no duration_minutes) + rent_msgs = [m for m in mock_relay.received if m.get("type") == "rent"] + assert len(rent_msgs) == 1 + assert set(rent_msgs[0].keys()) == {"type", "browser_id"} + assert rent_msgs[0]["browser_id"] == 240 + + await client.close() + + +@pytest.mark.asyncio +async def test_rent_error_with_event_id_raises_exception(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=240)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "777", "schedule_id": 240}) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({ + "type": "error", + "code": -1015, + "reason": "no_providers", + "event_id": "777", + }) + + with pytest.raises(ProviderOffline) as exc_info: + await asyncio.wait_for(rent_task, timeout=5) + assert "no_providers" in str(exc_info.value) + + await client.close() + + +@pytest.mark.asyncio +async def test_rent_early_error_without_event_id_raises_exception(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=240)) + await asyncio.sleep(0.05) + + # Early error before rent_pending (e.g. rate limit) — no event_id + await mock_relay.send_to_all({ + "type": "error", + "code": -1013, + "retry_after": 2.0, + }) + + with pytest.raises(RateLimitExceeded): + await asyncio.wait_for(rent_task, timeout=5) + + await client.close() + + +@pytest.mark.asyncio +async def test_rent_with_fingerprint_dict_sends_configure(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=240, fingerprint=SAMPLE_FINGERPRINT)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "fp1", "schedule_id": 240}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "fp1", + "session_id": "fp1", + "schedule_id": 240, + "capabilities": {}, + "price_per_min": 0.01, + }) + + browser = await asyncio.wait_for(rent_task, timeout=5) + assert browser.session_id == "fp1" + await asyncio.sleep(0.1) + + configure_msgs = [m for m in mock_relay.received if m.get("type") == "session.configure"] + assert len(configure_msgs) == 1 + assert configure_msgs[0]["fingerprint"] == SAMPLE_FINGERPRINT + assert configure_msgs[0]["session_id"] == "fp1" + + await client.close() + + +@pytest.mark.asyncio +async def test_rent_with_fingerprint_false_sends_configure_false(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=240, fingerprint=False)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "fp2", "schedule_id": 240}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "fp2", + "session_id": "fp2", + "schedule_id": 240, + "capabilities": {}, + "price_per_min": 0.01, + }) + + browser = await asyncio.wait_for(rent_task, timeout=5) + await asyncio.sleep(0.1) + + configure_msgs = [m for m in mock_relay.received if m.get("type") == "session.configure"] + assert len(configure_msgs) == 1 + assert configure_msgs[0]["fingerprint"] is False + + await client.close() + + +@pytest.mark.asyncio +async def test_rent_with_fingerprint_true_no_configure(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + + rent_task = asyncio.create_task(client.rent(schedule_id=240, fingerprint=True)) + await asyncio.sleep(0.05) + + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "fp3", "schedule_id": 240}) + await asyncio.sleep(0.05) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "fp3", + "session_id": "fp3", + "schedule_id": 240, + "capabilities": {}, + "price_per_min": 0.01, + }) + + browser = await asyncio.wait_for(rent_task, timeout=5) + + configure_msgs = [m for m in mock_relay.received if m.get("type") == "session.configure"] + assert len(configure_msgs) == 0 + + await client.close() diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 0000000..9b7e2f7 --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import base64 +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from ceki_browser import BrowserOption, ConnectOptions, connect +from ceki_browser._client import Client + +from .conftest import MockRelayServer + + +def _make_response(data: dict | list) -> httpx.Response: + req = httpx.Request("GET", "http://test") + return httpx.Response(200, json=data, request=req) + + +def _make_client(relay_url: str = "wss://relay.ceki.me/ws/agent") -> Client: + return Client( + api_key="testkey", + relay_url=relay_url, + api_url="https://api.ceki.me", + reconnect=False, + ) + + +@pytest.mark.asyncio +async def test_search_returns_browser_options(mock_relay: MockRelayServer) -> None: + sample = { + "schedule_id": 1, + "geo": "US", + "languages": ["en"], + "price_per_min": 0.05, + } + mock_resp = _make_response({"data": [sample]}) + + with patch("httpx.AsyncClient.get", AsyncMock(return_value=mock_resp)): + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + results = await client.search({"geo": "US"}, limit=5) + assert len(results) == 1 + assert isinstance(results[0], BrowserOption) + assert results[0].geo == "US" + assert results[0].price_per_min == 0.05 + await client.close() + + +@pytest.mark.asyncio +async def test_search_uses_plural_browsers_endpoint(mock_relay: MockRelayServer) -> None: + mock_resp = _make_response({"data": []}) + mock_get = AsyncMock(return_value=mock_resp) + + with patch("httpx.AsyncClient.get", mock_get): + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect( + "testkey", + ConnectOptions(relay_url=url, api_url="https://clawapi.ittribe.org"), + ) + await client.search() + await client.close() + + call_args = mock_get.call_args + called_url = call_args.args[0] if call_args.args else call_args.kwargs.get("url", "") + assert "/api/browsers/search" in called_url + + +@pytest.mark.asyncio +async def test_search_filters_passed_as_params(mock_relay: MockRelayServer) -> None: + mock_resp = _make_response({"data": []}) + mock_get = AsyncMock(return_value=mock_resp) + + with patch("httpx.AsyncClient.get", mock_get): + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url)) + await client.search({"geo": "DE", "language": "de"}, limit=10) + await client.close() + + call_kwargs = mock_get.call_args.kwargs + params = call_kwargs.get("params", {}) + assert params.get("geo") == "DE" + assert params.get("limit") == 10 + + +@pytest.mark.asyncio +async def test_search_bearer_auth_header(mock_relay: MockRelayServer) -> None: + mock_resp = _make_response({"data": []}) + mock_get = AsyncMock(return_value=mock_resp) + + with patch("httpx.AsyncClient.get", mock_get): + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("my-secret-key", ConnectOptions(relay_url=url)) + await client.search() + await client.close() + + headers = mock_get.call_args.kwargs.get("headers", {}) + assert headers.get("Authorization") == "Bearer my-secret-key" + + +def test_browser_option_laravel_response() -> None: + raw = { + "schedule_id": 42, + "geo": None, + "language": "en", + "skills": ["form-fill"], + "price_per_min": 0.03, + "currency": "USD", + "kal_id": 7, + "rating": 4.5, + } + opt = BrowserOption.model_validate(raw) + assert opt.schedule_id == 42 + assert opt.geo is None + assert opt.language == "en" + assert opt.currency == "USD" + assert opt.kal_id == 7 + + +def test_browser_option_ignores_extra_fields() -> None: + raw = { + "schedule_id": 1, + "price_per_min": 0.05, + "unknown_field": "ignored", + } + opt = BrowserOption.model_validate(raw) + assert opt.schedule_id == 1 + + +@pytest.mark.asyncio +async def test_rest_uses_bearer_only_even_with_basic_auth(mock_relay: MockRelayServer) -> None: + """basic_auth must not overwrite Bearer in REST Authorization header.""" + mock_resp = _make_response({"data": []}) + mock_get = AsyncMock(return_value=mock_resp) + + with patch("httpx.AsyncClient.get", mock_get): + client = await connect( + "my-api-key", + ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}", basic_auth=("u", "p")), + ) + await client.search() + await client.close() + + headers = mock_get.call_args.kwargs.get("headers", {}) + assert headers.get("Authorization") == "Bearer my-api-key" + + +@pytest.mark.asyncio +async def test_ws_uses_basic_auth_in_extra_headers(mock_relay: MockRelayServer) -> None: + """basic_auth must appear as Authorization: Basic in WS extra_headers.""" + client = await connect( + "my-api-key", + ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}", basic_auth=("u", "p")), + ) + expected = "Basic " + base64.b64encode(b"u:p").decode() + assert client._ws_extra_headers().get("Authorization") == expected + await client.close() diff --git a/tests/test_sessions.py b/tests/test_sessions.py new file mode 100644 index 0000000..3c9c607 --- /dev/null +++ b/tests/test_sessions.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, MagicMock, Mock, patch + +import pytest + +from ceki_browser import Client, ConnectOptions, SessionInfo, connect + +from .conftest import MockRelayServer + +MOCK_SESSIONS_RESPONSE = { + "data": [ + { + "id": 2650, + "schedule_id": 703, + "started_at": "2026-05-18T10:43:09Z", + "ended_at": None, + "status": "active", + "duration": 148, + "earned": 0.25, + "price_per_min": 0.10, + "renter": {"type": "agent", "id": 4, "name": "First"}, + "provider": {"type": "user", "id": 1, "name": "Konstantin"}, + "data": {"chat_topic_id": "topic-abc"}, + }, + { + "id": 2651, + "schedule_id": 704, + "started_at": "2026-05-18T11:00:00Z", + "ended_at": None, + "status": "active", + "duration": 60, + "earned": 0.10, + "price_per_min": 0.10, + "renter": {"type": "agent", "id": 5, "name": "Second"}, + "provider": {"type": "user", "id": 2, "name": "Alice"}, + "data": {}, + }, + ] +} + + +def _patch_httpx_get(json_body=None): + resp = MagicMock() + resp.status_code = 200 + resp.is_success = True + resp.json = Mock(return_value=json_body or MOCK_SESSIONS_RESPONSE) + resp.raise_for_status = Mock() + client_mock = AsyncMock() + client_mock.__aenter__ = AsyncMock(return_value=client_mock) + client_mock.__aexit__ = AsyncMock(return_value=False) + client_mock.get = AsyncMock(return_value=resp) + return patch("httpx.AsyncClient", return_value=client_mock), client_mock + + +@pytest.mark.asyncio +async def test_list_sessions_active_only(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url, api_url="http://localhost:9999")) + try: + patcher, http_mock = _patch_httpx_get() + with patcher: + results = await client.list_sessions(active=True, limit=50) + assert len(results) == 2 + assert all(isinstance(r, SessionInfo) for r in results) + assert results[0].id == 2650 + assert results[0].schedule_id == 703 + assert results[0].status == "active" + assert results[0].renter["name"] == "First" + + call_args = http_mock.get.call_args + assert "active" in str(call_args) + finally: + if client._ws: + await client.disconnect() + + +@pytest.mark.asyncio +async def test_list_sessions_all(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url, api_url="http://localhost:9999")) + try: + patcher, http_mock = _patch_httpx_get() + with patcher: + results = await client.list_sessions(active=False) + call_kwargs = http_mock.get.call_args + params = call_kwargs.kwargs.get("params", {}) + assert params.get("active") == "0" + finally: + if client._ws: + await client.disconnect() + + +@pytest.mark.asyncio +async def test_list_sessions_empty(mock_relay: MockRelayServer) -> None: + url = f"ws://127.0.0.1:{mock_relay.port}" + client = await connect("testkey", ConnectOptions(relay_url=url, api_url="http://localhost:9999")) + try: + patcher, _ = _patch_httpx_get(json_body={"data": []}) + with patcher: + results = await client.list_sessions() + assert results == [] + finally: + if client._ws: + await client.disconnect() diff --git a/tests/test_state_persistence.py b/tests/test_state_persistence.py new file mode 100644 index 0000000..9dfec29 --- /dev/null +++ b/tests/test_state_persistence.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import base64 +from pathlib import Path +from unittest.mock import AsyncMock, patch + +import pytest + +from ceki_browser import Browser +from ceki_browser._state import save_session, load_session, get_last_seen_ts, update_last_seen_ts + + +def _make_browser(): + client = AsyncMock() + client._active_browsers = {} + client.chat_url = "https://test/chat" + client.api_key = "test" + + match = AsyncMock() + match.session_id = "persist-1" + match.schedule_id = 1 + match.chat_topic_id = "t1" + match.browser_info = {} + match.provider_user_id = None + + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + b = Browser(client, match) + return b + + +def _make_chat_msg(msg_id: str, text: str, ts: str): + from ceki_browser._models import ChatMessage + return ChatMessage( + _id=msg_id, + topic_id="t1", + sender_id=99, + text=text, + type="text", + created_at=ts, + ) + + +async def test_snapshot_filters_old_messages_client_side(tmp_path: Path): + """Two sequential snapshots: second one returns no old messages even if + chat.history returns them (simulating server ignoring 'since' param).""" + with patch("ceki_browser._state._STATE_DIR", tmp_path / "sessions"): + sid = "persist-1" + save_session(sid, {"session_id": sid, "schedule_id": 1, "last_seen_ts": None}) + + msg1 = _make_chat_msg("m1", "hello", "2026-01-01T00:00:01Z") + msg2 = _make_chat_msg("m2", "world", "2026-01-01T00:00:02Z") + msg3 = _make_chat_msg("m3", "new", "2026-01-01T00:00:03Z") + + png_data = base64.b64encode(b"\x89PNG").decode() + + # --- Process 1: first snapshot, gets 2 messages --- + b1 = _make_browser() + b1._last_seen_ts = get_last_seen_ts(sid) + b1.send = AsyncMock(return_value={"data": png_data}) + b1.chat.history = AsyncMock(return_value=[msg1, msg2]) + + snap1 = await b1.snapshot() + assert len(snap1.chat) == 2 + assert b1._last_seen_ts == "2026-01-01T00:00:02Z" + + # Persist like CLI does + if b1._last_seen_ts: + update_last_seen_ts(sid, b1._last_seen_ts) + + # Verify state file + assert get_last_seen_ts(sid) == "2026-01-01T00:00:02Z" + + # --- Process 2: second snapshot, server returns same messages (doesn't filter by since) --- + b2 = _make_browser() + b2._last_seen_ts = get_last_seen_ts(sid) + b2.send = AsyncMock(return_value={"data": png_data}) + b2.chat.history = AsyncMock(return_value=[msg1, msg2]) + + snap2 = await b2.snapshot() + assert len(snap2.chat) == 0, "Should filter out already-seen messages" + assert b2._last_seen_ts == "2026-01-01T00:00:02Z" + + # --- Process 3: third snapshot, server returns old + new messages --- + b3 = _make_browser() + b3._last_seen_ts = get_last_seen_ts(sid) + b3.send = AsyncMock(return_value={"data": png_data}) + b3.chat.history = AsyncMock(return_value=[msg1, msg2, msg3]) + + snap3 = await b3.snapshot() + assert len(snap3.chat) == 1, "Should return only new message" + assert snap3.chat[0].id == "m3" + assert b3._last_seen_ts == "2026-01-01T00:00:03Z" + + if b3._last_seen_ts: + update_last_seen_ts(sid, b3._last_seen_ts) + assert get_last_seen_ts(sid) == "2026-01-01T00:00:03Z" + + +async def test_snapshot_no_last_seen_returns_all(tmp_path: Path): + """First ever snapshot (no last_seen_ts) returns all messages.""" + with patch("ceki_browser._state._STATE_DIR", tmp_path / "sessions"): + msg1 = _make_chat_msg("m1", "hi", "2026-01-01T00:00:01Z") + png_data = base64.b64encode(b"\x89PNG").decode() + + b = _make_browser() + b._last_seen_ts = None + b.send = AsyncMock(return_value={"data": png_data}) + b.chat.history = AsyncMock(return_value=[msg1]) + + snap = await b.snapshot() + assert len(snap.chat) == 1 + assert b._last_seen_ts == "2026-01-01T00:00:01Z" diff --git a/tests/test_switch_tab.py b/tests/test_switch_tab.py new file mode 100644 index 0000000..935ba56 --- /dev/null +++ b/tests/test_switch_tab.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import asyncio + +import pytest + +from ceki_browser import ConnectOptions, connect + + +@pytest.fixture +async def browser_fixture(mock_relay): + client = await connect("test-key", ConnectOptions(relay_url=f"ws://127.0.0.1:{mock_relay.port}/ws/agent")) + + async def ack_rent(): + await asyncio.sleep(0.05) + await mock_relay.send_to_all({"type": "rent_pending", "event_id": "ev-tab", "schedule_id": 1}) + await asyncio.sleep(0.02) + await mock_relay.send_to_all({ + "type": "match", + "event_id": "ev-tab", + "session_id": "sess-tab", + "schedule_id": 1, + "chat_topic_id": None, + "browser_info": {}, + }) + + t = asyncio.create_task(ack_rent()) + browser = await client.rent(1) + await t + yield browser, mock_relay + await client.close() + + +@pytest.mark.asyncio +async def test_tab_opened_callback(browser_fixture): + browser, mock_relay = browser_fixture + + opened_urls: list[str] = [] + + async def on_tab(url: str) -> None: + opened_urls.append(url) + + browser.on_tab_opened(on_tab) + + await mock_relay.send_to_all({ + "type": "tab_opened", + "session_id": "sess-tab", + "url": "https://popup.example.com", + }) + + await asyncio.sleep(0.1) + + assert opened_urls == ["https://popup.example.com"] + + +@pytest.mark.asyncio +async def test_switch_tab_sends_correct_msg(browser_fixture): + browser, mock_relay = browser_fixture + + await browser.switch_tab() + await asyncio.sleep(0.05) + + switch_msgs = [m for m in mock_relay.received if m.get("type") == "switch_tab"] + assert len(switch_msgs) == 1 + assert switch_msgs[0]["session_id"] == "sess-tab" diff --git a/tests/test_transport.py b/tests/test_transport.py deleted file mode 100644 index b2db4a1..0000000 --- a/tests/test_transport.py +++ /dev/null @@ -1,127 +0,0 @@ -import asyncio -import json -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from ceki_browser.errors import AuthError, CommandTimeout, RateLimited -from ceki_browser.transport import Transport - - -class FakeWebSocket: - def __init__(self, messages: list[str] | None = None): - self._messages = list(messages or []) - self._sent: list[str] = [] - self._closed = False - self.state = MagicMock() - self.state.name = "OPEN" - - async def recv(self) -> str: - if self._messages: - return self._messages.pop(0) - await asyncio.sleep(100) - return "" - - async def send(self, data: str) -> None: - self._sent.append(data) - - async def close(self) -> None: - self._closed = True - - def __aiter__(self): - return self - - async def __anext__(self) -> str: - if self._messages: - return self._messages.pop(0) - if self._closed: - raise StopAsyncIteration - await asyncio.sleep(100) - raise StopAsyncIteration - - -@pytest.fixture -def welcome_msg(): - return json.dumps({"jsonrpc": "2.0", "result": {"status": "connected", "agent_id": "agent-123"}, "id": 0}) - - -@pytest.mark.asyncio -async def test_connect_success(welcome_msg): - ws = FakeWebSocket([welcome_msg]) - with patch("ceki_browser.transport.websockets.connect", new_callable=AsyncMock, return_value=ws): - t = Transport(token="test-token", relay_url="wss://relay.test/ws/agent") - result = await t.connect() - assert result["agent_id"] == "agent-123" - assert t.agent_id == "agent-123" - await t.close() - - -@pytest.mark.asyncio -async def test_connect_auth_error(): - welcome = json.dumps({"jsonrpc": "2.0", "error": {"code": 401, "message": "Unauthorized"}, "id": 0}) - ws = FakeWebSocket([welcome]) - with patch("ceki_browser.transport.websockets.connect", new_callable=AsyncMock, return_value=ws): - t = Transport(token="bad-token", relay_url="wss://relay.test/ws/agent") - with pytest.raises(AuthError, match="Unauthorized"): - await t.connect() - await t.close() - - -@pytest.mark.asyncio -async def test_send_receive_roundtrip(welcome_msg): - response = json.dumps({"jsonrpc": "2.0", "result": {"url": "https://example.com", "title": "Example"}, "id": 1}) - ws = FakeWebSocket([welcome_msg, response]) - with patch("ceki_browser.transport.websockets.connect", new_callable=AsyncMock, return_value=ws): - t = Transport(token="test-token", relay_url="wss://relay.test/ws/agent") - await t.connect() - result = await t.send("browser.navigate", {"url": "https://example.com"}) - assert result["url"] == "https://example.com" - - sent = json.loads(ws._sent[0]) - assert sent["method"] == "browser.navigate" - assert sent["id"] == 1 - await t.close() - - -@pytest.mark.asyncio -async def test_error_mapping(welcome_msg): - error_resp = json.dumps({"jsonrpc": "2.0", "error": {"code": -1013, "message": "Rate limit exceeded"}, "id": 1}) - ws = FakeWebSocket([welcome_msg, error_resp]) - with patch("ceki_browser.transport.websockets.connect", new_callable=AsyncMock, return_value=ws): - t = Transport(token="test-token", relay_url="wss://relay.test/ws/agent") - await t.connect() - with pytest.raises(RateLimited, match="Rate limit"): - await t.send("session.request", {"mode": "incognito"}) - await t.close() - - -@pytest.mark.asyncio -async def test_notification_dispatch(welcome_msg): - notification = json.dumps({"jsonrpc": "2.0", "method": "session.state_changed", "params": {"state": "ACTIVE"}}) - ws = FakeWebSocket([welcome_msg, notification]) - - events: list[tuple[str, dict]] = [] - - async def on_event(method: str, params: dict) -> None: - events.append((method, params)) - - with patch("ceki_browser.transport.websockets.connect", new_callable=AsyncMock, return_value=ws): - t = Transport(token="test-token", relay_url="wss://relay.test/ws/agent") - t.on_event(on_event) - await t.connect() - await asyncio.sleep(0.1) - assert len(events) == 1 - assert events[0][0] == "session.state_changed" - assert events[0][1]["state"] == "ACTIVE" - await t.close() - - -@pytest.mark.asyncio -async def test_command_timeout(welcome_msg): - ws = FakeWebSocket([welcome_msg]) - with patch("ceki_browser.transport.websockets.connect", new_callable=AsyncMock, return_value=ws): - t = Transport(token="test-token", relay_url="wss://relay.test/ws/agent") - await t.connect() - with pytest.raises(CommandTimeout): - await t.send("browser.navigate", {"url": "https://slow.test"}, timeout=0.1) - await t.close() diff --git a/tests/test_transport_rtc.py b/tests/test_transport_rtc.py deleted file mode 100644 index 695036d..0000000 --- a/tests/test_transport_rtc.py +++ /dev/null @@ -1,95 +0,0 @@ -import asyncio -import json -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from ceki_browser.transport_rtc import RTCTransport - - -class MockDataChannel: - def __init__(self, label: str): - self.label = label - self.readyState = "open" - self._sent: list[str] = [] - self._handlers: dict[str, list] = {} - - def send(self, data: str) -> None: - self._sent.append(data) - - def on(self, event: str): - def decorator(fn): - self._handlers.setdefault(event, []).append(fn) - return fn - return decorator - - def emit(self, event: str, *args): - for h in self._handlers.get(event, []): - h(*args) - - -def make_transport_with_mock_channels(): - with patch("ceki_browser.transport_rtc.RTCPeerConnection") as MockPC: - pc = MagicMock() - pc.connectionState = "new" - pc.iceGatheringState = "new" - pc._handlers = {} - - def on_decorator(event): - def decorator(fn): - pc._handlers.setdefault(event, []).append(fn) - return fn - return decorator - - pc.on = on_decorator - - cmd_ch = MockDataChannel("ceki-cmd") - pc.createDataChannel = MagicMock(return_value=cmd_ch) - pc.close = AsyncMock() - MockPC.return_value = pc - - transport = RTCTransport([{"urls": "stun:stun.l.google.com:19302"}]) - transport.cmd_channel = cmd_ch - - return transport, pc, cmd_ch - - -@pytest.mark.asyncio -async def test_send_command_roundtrip(): - transport, pc, cmd_ch = make_transport_with_mock_channels() - - async def respond(): - await asyncio.sleep(0.05) - sent = json.loads(cmd_ch._sent[0]) - response = json.dumps({"jsonrpc": "2.0", "result": {"url": "https://example.com"}, "id": sent["id"]}) - cmd_ch.emit("message", response) - - task = asyncio.create_task(respond()) - result = await transport.send_command("browser.navigate", {"url": "https://example.com"}, timeout=2.0) - await task - - assert result["url"] == "https://example.com" - sent = json.loads(cmd_ch._sent[0]) - assert sent["method"] == "browser.navigate" - assert sent["params"]["url"] == "https://example.com" - await transport.close() - - -@pytest.mark.asyncio -async def test_send_command_error(): - transport, pc, cmd_ch = make_transport_with_mock_channels() - - async def respond(): - await asyncio.sleep(0.05) - sent = json.loads(cmd_ch._sent[0]) - err = {"code": -1010, "message": "Provider disconnected"} - response = json.dumps({"jsonrpc": "2.0", "error": err, "id": sent["id"]}) - cmd_ch.emit("message", response) - - from ceki_browser.errors import CekiBrowserError - - task = asyncio.create_task(respond()) - with pytest.raises(CekiBrowserError, match="Provider disconnected"): - await transport.send_command("browser.screenshot", timeout=2.0) - await task - await transport.close() diff --git a/tests/test_type_keyboard_events.py b/tests/test_type_keyboard_events.py new file mode 100644 index 0000000..20c4225 --- /dev/null +++ b/tests/test_type_keyboard_events.py @@ -0,0 +1,95 @@ +from __future__ import annotations +from unittest.mock import AsyncMock, patch +import pytest +from ceki_browser import Browser + + +@pytest.fixture +def browser(): + client = AsyncMock() + client._active_browsers = {} + match = AsyncMock() + match.session_id = "test-session" + match.schedule_id = 1 + match.chat_topic_id = None + match.browser_info = {} + match.provider_user_id = None + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + b = Browser(client, match) + return b + + +async def test_type_sends_keydown_keyup_per_char(browser: Browser): + sent: list[dict] = [] + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + browser.send = fake_send + + await browser.type("hi") + + key_events = [s for s in sent if s["method"] == "Input.dispatchKeyEvent"] + keydowns = [s for s in key_events if s["params"]["type"] == "keyDown"] + keyups = [s for s in key_events if s["params"]["type"] == "keyUp"] + assert len(keydowns) == 2 + assert len(keyups) == 2 + assert keydowns[0]["params"]["key"] == "h" + assert keydowns[0]["params"]["code"] == "KeyH" + assert keydowns[1]["params"]["key"] == "i" + assert keydowns[1]["params"]["code"] == "KeyI" + + +async def test_type_uppercase_uses_shift(browser: Browser): + sent: list[dict] = [] + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + browser.send = fake_send + + await browser.type("Hi") + + key_events = [s for s in sent if s["method"] == "Input.dispatchKeyEvent"] + # H: shift_down, keyDown(H), keyUp(H), shift_up = 4 + # i: keyDown(i), keyUp(i) = 2 + # Total = 6 + assert len(key_events) == 6 + assert key_events[0]["params"]["key"] == "Shift" + assert key_events[0]["params"]["type"] == "keyDown" + assert key_events[1]["params"]["key"] == "H" + assert key_events[1]["params"]["modifiers"] == 8 + assert key_events[4]["params"]["key"] == "i" + + +async def test_type_digits_and_punctuation(browser: Browser): + sent: list[dict] = [] + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + browser.send = fake_send + + await browser.type("1!") + + key_events = [s for s in sent if s["method"] == "Input.dispatchKeyEvent"] + # 1: keyDown, keyUp = 2 + # !: shift_down, keyDown, keyUp, shift_up = 4 + assert len(key_events) == 6 + digit_down = key_events[0] + assert digit_down["params"]["code"] == "Digit1" + assert digit_down["params"]["text"] == "1" + excl_down = key_events[3] + assert excl_down["params"]["text"] == "!" + assert excl_down["params"]["modifiers"] == 8 + + +async def test_type_non_ascii_falls_back_to_insert_text(browser: Browser): + sent: list[dict] = [] + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + browser.send = fake_send + + await browser.type("ы") + + insert_events = [s for s in sent if s["method"] == "Input.insertText"] + assert len(insert_events) == 1 + assert insert_events[0]["params"]["text"] == "ы" diff --git a/tests/test_type_with_pointer.py b/tests/test_type_with_pointer.py new file mode 100644 index 0000000..ebf0f6d --- /dev/null +++ b/tests/test_type_with_pointer.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +from unittest.mock import AsyncMock, patch + +import pytest + +from ceki_browser import Browser + + +@pytest.fixture +def browser_humanized(): + client = AsyncMock() + client._active_browsers = {} + + match = AsyncMock() + match.session_id = "test-session" + match.schedule_id = 1 + match.chat_topic_id = None + match.browser_info = {} + match.provider_user_id = None + + b = Browser(client, match, human="natural") + return b + + +@pytest.fixture +def browser_no_human(): + client = AsyncMock() + client._active_browsers = {} + + match = AsyncMock() + match.session_id = "test-session" + match.schedule_id = 1 + match.chat_topic_id = None + match.browser_info = {} + match.provider_user_id = None + + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + b = Browser(client, match) + return b + + +async def test_humanizer_on_with_pointer_clicks_before_type(browser_humanized: Browser): + """humanizer ON + last_pointer set → click() before dispatchKeyEvent.""" + sent: list[dict] = [] + + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + + browser_humanized.send = fake_send + browser_humanized._last_pointer = (100, 200) + + await browser_humanized.type("ab") + + mouse_events = [s for s in sent if s["method"] == "Input.dispatchMouseEvent"] + key_events = [ + s for s in sent + if s["method"] == "Input.dispatchKeyEvent" + and s["params"]["type"] == "keyDown" + and s["params"].get("key") != "Shift" + ] + + assert len(mouse_events) >= 2, "should have mousePressed + mouseReleased" + assert any(e["params"]["type"] == "mousePressed" for e in mouse_events) + assert any(e["params"]["type"] == "mouseReleased" for e in mouse_events) + assert len(key_events) == 2, "should have per-char keyDown events" + + first_mouse_idx = sent.index(mouse_events[0]) + first_key_idx = sent.index(key_events[0]) + assert first_mouse_idx < first_key_idx, "mouse events must precede key events" + + +async def test_humanizer_on_no_pointer_no_click(browser_humanized: Browser): + """humanizer ON + last_pointer is None → no mouse events, just dispatchKeyEvent.""" + sent: list[dict] = [] + + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + + browser_humanized.send = fake_send + assert browser_humanized._last_pointer is None + + await browser_humanized.type("x") + + mouse_events = [s for s in sent if s["method"] == "Input.dispatchMouseEvent"] + key_events = [ + s for s in sent + if s["method"] == "Input.dispatchKeyEvent" + and s["params"]["type"] == "keyDown" + and s["params"].get("key") != "Shift" + ] + + assert len(mouse_events) == 0, "no mouse events without last_pointer" + assert len(key_events) >= 1 + + +async def test_humanizer_off_with_pointer_no_click(browser_no_human: Browser): + """humanizer OFF + last_pointer set → per-char dispatchKeyEvent, no click.""" + sent: list[dict] = [] + + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + + browser_no_human._last_pointer = (50, 60) + browser_no_human.send = fake_send + + await browser_no_human.type("hello") + + key_events = [s for s in sent if s["method"] == "Input.dispatchKeyEvent"] + keydowns = [ + s for s in key_events + if s["params"]["type"] == "keyDown" + and s["params"].get("key") != "Shift" + ] + assert len(keydowns) == 5, "should have 5 keyDown events for 'hello'" + assert keydowns[0]["params"]["key"] == "h" + assert keydowns[4]["params"]["key"] == "o" + + +async def test_humanizer_off_no_pointer_no_click(browser_no_human: Browser): + """humanizer OFF + no last_pointer → per-char dispatchKeyEvent, no click.""" + sent: list[dict] = [] + + async def fake_send(cdp, **kw): + sent.append(cdp) + return {} + + assert browser_no_human._last_pointer is None + browser_no_human.send = fake_send + + await browser_no_human.type("world") + + key_events = [s for s in sent if s["method"] == "Input.dispatchKeyEvent"] + keydowns = [ + s for s in key_events + if s["params"]["type"] == "keyDown" + and s["params"].get("key") != "Shift" + ] + assert len(keydowns) == 5, "should have 5 keyDown events for 'world'" + assert keydowns[0]["params"]["key"] == "w" + assert keydowns[4]["params"]["key"] == "d" diff --git a/tests/test_upload.py b/tests/test_upload.py new file mode 100644 index 0000000..327be7f --- /dev/null +++ b/tests/test_upload.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import base64 +import json +import subprocess +import sys +import tempfile +from pathlib import Path +from unittest.mock import AsyncMock, patch + +import pytest + +from ceki_browser._browser import Browser +from ceki_browser.cli import build_parser + + +# ────────────────────────────────────────────────────────────────────────── +# Helpers +# ────────────────────────────────────────────────────────────────────────── + + +def _make_browser() -> Browser: + client = AsyncMock() + client._active_browsers = {} + client.chat_url = "https://test/chat" + client.api_key = "test" + + match = AsyncMock() + match.session_id = "upload-1" + match.schedule_id = 1 + match.chat_topic_id = "t1" + match.browser_info = {} + match.provider_user_id = None + + with patch.dict("os.environ", {"CEKI_HUMAN_DISABLE": "1"}): + return Browser(client, match) + + +# ────────────────────────────────────────────────────────────────────────── +# browser.upload() with file path +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_file_path(tmp_path: Path): + b = _make_browser() + test_file = tmp_path / "doc.pdf" + test_file.write_bytes(b"%PDF-1.4 test content") + + b.send = AsyncMock(return_value={ + "result": { + "value": json.dumps({"ok": True, "filename": "doc.pdf", "size": 21}), + } + }) + + result = await b.upload("input[type=file]", test_file) + assert result == {"ok": True, "filename": "doc.pdf", "size": 21} + + # Verify send was called with Runtime.evaluate + call_args = b.send.call_args[0][0] + assert call_args["method"] == "Runtime.evaluate" + expr = call_args["params"]["expression"] + assert "document.querySelector" in expr + assert "doc.pdf" in expr + + +# ────────────────────────────────────────────────────────────────────────── +# browser.upload() with bytes + custom filename +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_bytes_custom_filename(): + b = _make_browser() + data = b"hello world" + + b.send = AsyncMock(return_value={ + "result": { + "value": json.dumps({"ok": True, "filename": "custom.txt", "size": 11}), + } + }) + + result = await b.upload("#file-input", data, filename="custom.txt") + assert result == {"ok": True, "filename": "custom.txt", "size": 11} + + call_args = b.send.call_args[0][0] + expr = call_args["params"]["expression"] + b64 = base64.b64encode(data).decode("ascii") + assert b64 in expr + assert "custom.txt" in expr + + +# ────────────────────────────────────────────────────────────────────────── +# browser.upload() bytes without filename defaults to upload.bin +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_bytes_default_filename(): + b = _make_browser() + + b.send = AsyncMock(return_value={ + "result": { + "value": json.dumps({"ok": True, "filename": "upload.bin", "size": 3}), + } + }) + + result = await b.upload("input", b"\x00\x01\x02") + assert result["filename"] == "upload.bin" + + call_args = b.send.call_args[0][0] + expr = call_args["params"]["expression"] + assert "upload.bin" in expr + assert "application/octet-stream" in expr + + +# ────────────────────────────────────────────────────────────────────────── +# JS expression escapes special chars in filenames +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_escapes_special_chars(tmp_path: Path): + b = _make_browser() + # Filename with quotes and backslash + test_file = tmp_path / "normal.png" + test_file.write_bytes(b"\x89PNG") + + b.send = AsyncMock(return_value={ + "result": { + "value": json.dumps({"ok": True, "filename": 'file"with\'quotes.png', "size": 4}), + } + }) + + result = await b.upload("input", test_file, filename='file"with\'quotes.png') + + call_args = b.send.call_args[0][0] + expr = call_args["params"]["expression"] + # json.dumps properly escapes the double quote + assert r'file\"with' in expr + # The expression should be valid JS (no syntax error from unescaped quotes) + assert "image/png" in expr + + +# ────────────────────────────────────────────────────────────────────────── +# upload raises ValueError on "no input matched" +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_no_input_matched(): + b = _make_browser() + + b.send = AsyncMock(return_value={ + "result": { + "value": json.dumps({"error": "no input matched"}), + } + }) + + with pytest.raises(ValueError, match="no input matched"): + await b.upload("#missing", b"data", filename="f.txt") + + +# ────────────────────────────────────────────────────────────────────────── +# upload raises ValueError on "not a file input" +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_not_file_input(): + b = _make_browser() + + b.send = AsyncMock(return_value={ + "result": { + "value": json.dumps({"error": "element is not a file input"}), + } + }) + + with pytest.raises(ValueError, match="element is not a file input"): + await b.upload("#text-input", b"data", filename="f.txt") + + +# ────────────────────────────────────────────────────────────────────────── +# upload raises ValueError for missing file path +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_file_not_found(): + b = _make_browser() + with pytest.raises(ValueError, match="file not found"): + await b.upload("input", "/nonexistent/path/file.txt") + + +# ────────────────────────────────────────────────────────────────────────── +# MIME type detection +# ────────────────────────────────────────────────────────────────────────── + + +async def test_upload_mime_type_png(tmp_path: Path): + b = _make_browser() + f = tmp_path / "image.png" + f.write_bytes(b"\x89PNG") + + b.send = AsyncMock(return_value={ + "result": {"value": json.dumps({"ok": True, "filename": "image.png", "size": 4})} + }) + + await b.upload("input", f) + expr = b.send.call_args[0][0]["params"]["expression"] + assert "image/png" in expr + + +async def test_upload_mime_type_pdf(tmp_path: Path): + b = _make_browser() + f = tmp_path / "doc.pdf" + f.write_bytes(b"%PDF-1.4") + + b.send = AsyncMock(return_value={ + "result": {"value": json.dumps({"ok": True, "filename": "doc.pdf", "size": 8})} + }) + + await b.upload("input", f) + expr = b.send.call_args[0][0]["params"]["expression"] + assert "application/pdf" in expr + + +async def test_upload_mime_type_unknown(tmp_path: Path): + b = _make_browser() + f = tmp_path / "data.qzx" + f.write_bytes(b"binary data") + + b.send = AsyncMock(return_value={ + "result": {"value": json.dumps({"ok": True, "filename": "data.qzx", "size": 11})} + }) + + await b.upload("input", f) + expr = b.send.call_args[0][0]["params"]["expression"] + assert "application/octet-stream" in expr + + +# ────────────────────────────────────────────────────────────────────────── +# CLI parser test +# ────────────────────────────────────────────────────────────────────────── + + +def test_parser_upload(): + parser = build_parser() + args = parser.parse_args([ + "upload", "ses-1", "--selector", "input[type=file]", + "--file", "/tmp/doc.pdf", + ]) + assert args.command == "upload" + assert args.session_id == "ses-1" + assert args.selector == "input[type=file]" + assert args.file_path == "/tmp/doc.pdf" + assert args.filename is None + + +def test_parser_upload_with_filename(): + parser = build_parser() + args = parser.parse_args([ + "upload", "ses-1", "--selector", "#upload", + "--file", "/tmp/doc.pdf", "--filename", "renamed.pdf", + ]) + assert args.filename == "renamed.pdf" + + +# ────────────────────────────────────────────────────────────────────────── +# CLI upload with missing file → exit 1 + error JSON +# ────────────────────────────────────────────────────────────────────────── + + +def test_cli_upload_missing_file(): + env = {**__import__("os").environ, "CEKI_API_KEY": "test-key"} + result = subprocess.run( + [ + sys.executable, "-m", "ceki_browser.cli", + "upload", "ses-1", + "--selector", "input[type=file]", + "--file", "/nonexistent/path/file.txt", + ], + capture_output=True, + text=True, + env=env, + ) + assert result.returncode == 1 + err = json.loads(result.stderr.strip()) + assert "file not found" in err["error"]