From 054c6a37ed2dd8ae37a3ea4c359449d93f89d07e Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Tue, 7 Apr 2026 16:58:22 +0200 Subject: [PATCH 01/35] 1st iter --- pvc/model-test-refs-pvc.yaml | 12 + pyproject.toml | 2 +- tests/README-model-snapshots.md | 85 +++++ tests/__init__.py | 0 tests/benchmark/benchmark_batch_size.py | 333 ++++++++++++++++++ tests/benchmark/load_test.py | 286 +++++++++++++++ tests/model_snapshots/__init__.py | 0 tests/model_snapshots/_shared.py | 144 ++++++++ tests/model_snapshots/generate_references.py | 103 ++++++ .../test_binary_classifier_model_snapshot.py | 36 ++ ...st_semantic_segmentation_model_snapshot.py | 35 ++ 11 files changed, 1035 insertions(+), 1 deletion(-) create mode 100644 pvc/model-test-refs-pvc.yaml create mode 100644 tests/README-model-snapshots.md create mode 100644 tests/__init__.py create mode 100644 tests/benchmark/benchmark_batch_size.py create mode 100644 tests/benchmark/load_test.py create mode 100644 tests/model_snapshots/__init__.py create mode 100644 tests/model_snapshots/_shared.py create mode 100644 tests/model_snapshots/generate_references.py create mode 100644 tests/model_snapshots/test_binary_classifier_model_snapshot.py create mode 100644 tests/model_snapshots/test_semantic_segmentation_model_snapshot.py diff --git a/pvc/model-test-refs-pvc.yaml b/pvc/model-test-refs-pvc.yaml new file mode 100644 index 0000000..4b3390a --- /dev/null +++ b/pvc/model-test-refs-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: model-test-refs-pvc + labels: + app: model-service-tests +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi diff --git a/pyproject.toml b/pyproject.toml index fbb3a9f..df49f5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,4 +18,4 @@ dependencies = [ ] [dependency-groups] -dev = ["mypy>=1.18.2", "ruff>=0.14.6"] +dev = ["mypy>=1.18.2", "ruff>=0.14.6", "pytest>=8.4.2"] diff --git a/tests/README-model-snapshots.md b/tests/README-model-snapshots.md new file mode 100644 index 0000000..3d2fb7b --- /dev/null +++ b/tests/README-model-snapshots.md @@ -0,0 +1,85 @@ +# Model snapshot tests + +This repository contains end-to-end snapshot tests in `tests/model_snapshots/`. + +Per-model test files: + +- `tests/model_snapshots/test_binary_classifier_model_snapshot.py` +- `tests/model_snapshots/test_semantic_segmentation_model_snapshot.py` + +Shared files: + +- `tests/model_snapshots/_shared.py` +- `tests/model_snapshots/run_all_model_snapshot_tests.py` + +These tests are meant as post-deploy use-case checks (not only liveness checks): + +- they execute a real request path through Ray Serve deployment +- they verify request processing success (timeouts/errors fail the test) +- they verify result correctness for each deployment (`binary_classifier`, `semantic_segmentation`) +- they touch real slide paths, helping catch mount/filesystem issues + +Each test calls its deployment-specific endpoint: + +- binary classifier: SDK call `client.models.classify_image("prostate-classifier-1", tile)` +- semantic segmentation: SDK call `client.models.segment_image("episeg-1", tile)` + +Input tile is read directly from a real WSI using `ratiopath.openslide.OpenSlide`. + +## Adding a new model test + +Přidání nového modelu do testů je nyní velmi jednoduché: + +1. Vytvořte nový soubor v `tests/model_snapshots/`, např. `test_novy_model_snapshot.py`. +2. Importujte a zavolejte příslušnou case funkci z `_shared.py` a předejte jí konfiguraci napřímo parametrem: + +```python +from pathlib import Path +from tests.model_snapshots._shared import run_binary_classifier_case + +def test_novy_model_snapshot() -> None: + # Parametry si rovnou zadefinujte v testovacím souboru + run_binary_classifier_case( + model_id="my-new-endpoint-id", + slide_path="/mnt/bioptic_tree/.../slide.mrxs", + expected_score=0.987, + tile_size=512, + level=0, + ) +``` + +Tím se stane automaticky součástí sady `pytest tests/model_snapshots`. + +## Global environment variables + +Common (pro celý cluster a všechny testy): + +- `MODEL_SERVICE_MODELS_BASE_URL` (default: `http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000`) + +Očekávané skóre/pole a cesty k datasetům pro stávající modely (`episeg-1` a `prostate-classifier-1`) se tahají z těchto proměnných ve stávajících testovacích souborech, pokud chcete zachovat původní CI chování (případně se dají časem snadno zahardkódit do testovacího souboru): + +- `MODEL_TEST_BINARY_EXPECTED_SCORE` +- `MODEL_TEST_SEMANTIC_EXPECTED_ARRAY_PATH` + +## Example (PowerShell) + +```powershell +$env:MODEL_TEST_BINARY_EXPECTED_SCORE = "0.9732" +$env:MODEL_TEST_SEMANTIC_EXPECTED_ARRAY_PATH = "/mnt/path/to/reference/semantic_expected.npy" + +# Models base URL is resolved directly from SDK fallback inside kubernetes: +# http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000 + +python tests/model_snapshots/run_all_model_snapshot_tests.py + +# Alternative: +python -m pytest tests/model_snapshots -q +``` + +## SDK dependency + +Install SDK package so that `import rationai` works in tests, e.g.: + +```powershell +python -m pip install git+https://github.com/RationAI/rationai-sdk-python.git +``` diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/benchmark/benchmark_batch_size.py b/tests/benchmark/benchmark_batch_size.py new file mode 100644 index 0000000..7ca0f1f --- /dev/null +++ b/tests/benchmark/benchmark_batch_size.py @@ -0,0 +1,333 @@ +# kubectl apply -n rationai-jobs-ns -f c:\Users\jiris\muni-dp\dp\model-service\ray-service.yaml +# kubectl get pods -n rationai-jobs-ns | Select-String "episeg" (model name) +# kubectl cp tests/benchmark_batch_size.py rationai-jobs-ns/rayservice-model-optimized-7zwlk-head-fbzr5:/tmp/benchmark_batch_size.py +# kubectl exec -n rationai-jobs-ns rayservice-model-optimized-7zwlk-head-fbzr5 -- bash -c "python3 -u /tmp/benchmark_batch_size.py --url http://localhost:8000/episeg-1/ --batch-size 128" + +# kubectl exec -n rationai-jobs-ns rayservice-model-optimized-7zwlk-head-fbzr5 -- bash -c "pip install httpx -q && python3 -u /tmp/benchmark_batch_size.py --url http://localhost:8000/episeg-1/ --batch-size 8 --concurrency-values 4,8,16,24,32,48,64 --tile-size 1024 --n 500 --warmup 100" + +from __future__ import annotations + +import argparse +import asyncio +import csv +import sys +import time +from pathlib import Path + +import lz4.frame +import numpy as np + + +try: + import httpx +except ImportError: + print("pip install httpx") + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +TILE_SIZE_DEFAULT = 224 +POOL_SIZE = 64 +OUTPUT_CSV = "results.csv" + + +def make_pool(tile_size: int, n: int = POOL_SIZE) -> list[bytes]: + rng = np.random.default_rng(seed=42) + pool = [] + for _ in range(n): + img = rng.integers(0, 255, (tile_size, tile_size, 3), dtype=np.uint8) + pool.append(lz4.frame.compress(img.tobytes())) + return pool + + +async def run_batch( + url: str, + pool: list[bytes], + total: int, + concurrency: int, + timeout: float, +) -> tuple[float, int, int]: + """Pošle `total` requestů s `concurrency` souběžnými workery.""" + remaining = total + ok = 0 + fail = 0 + pool_len = len(pool) + counter = 0 + lock = asyncio.Lock() + + limits = httpx.Limits( + max_connections=concurrency + 8, + max_keepalive_connections=concurrency + 8, + ) + + async def worker(client: httpx.AsyncClient) -> None: + nonlocal remaining, ok, fail, counter + while True: + async with lock: + if remaining <= 0: + return + remaining -= 1 + idx = counter % pool_len + counter += 1 + payload = pool[idx] + try: + r = await client.post( + url, + content=payload, + headers={"Content-Type": "application/octet-stream"}, + timeout=timeout, + ) + if r.status_code == 200: + ok += 1 + else: + fail += 1 + print(f" [WARN] HTTP {r.status_code}: {r.text[:120]}") + except Exception as e: + fail += 1 + print(f" [ERR] {type(e).__name__}: {e!r}") + + t0 = time.perf_counter() + async with httpx.AsyncClient(limits=limits, follow_redirects=True) as client: + await asyncio.gather(*[worker(client) for _ in range(concurrency)]) + return time.perf_counter() - t0, ok, fail + + +def append_csv(path: str, row: dict) -> None: + fieldnames = [ + "url", + "batch_size", + "concurrency", + "n", + "elapsed_s", + "throughput_img_s", + "ok", + "fail", + ] + write_header = not Path(path).exists() + with open(path, "a", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + if write_header: + writer.writeheader() + writer.writerow(row) + + +def load_csv(path: str, url: str) -> list[dict]: + if not Path(path).exists(): + return [] + with open(path) as f: + reader = csv.DictReader(f) + return [r for r in reader if r["url"] == url] + + +def concurrency_sweep_values(batch_size: int) -> list[int]: + """Pro MIG-2g.20gb: testujeme rozsah od batch_size/2 do batch_size*4. + Jemnější kroky kolem batch_size kde bývá knee. + """ + half = max(1, batch_size // 2) + candidates = sorted( + set( + [ + half, + batch_size, + batch_size + batch_size // 2, + batch_size * 2, + batch_size * 3, + batch_size * 4, + ] + ) + ) + # Přidej mezikroky kolem batch_size + extras = [batch_size - batch_size // 4, batch_size + batch_size // 4] + candidates = sorted(set(candidates + [e for e in extras if e > 0])) + return candidates + + +def print_summary(rows: list[dict], batch_size: int | None = None) -> None: + if not rows: + return + if batch_size is not None: + rows = [r for r in rows if int(r["batch_size"]) == batch_size] + if not rows: + return + + best = max(rows, key=lambda r: float(r["throughput_img_s"])) + + header = f"{'batch_size':>12} {'concurrency':>12} {'throughput img/s':>18} {'ok':>8} {'fail':>8}" + print(header) + print("-" * len(header)) + for row in sorted( + rows, key=lambda r: (int(r["batch_size"]), int(r["concurrency"])) + ): + marker = " ← BEST" if row is best else "" + fail_val = int(row["fail"]) + fail_str = f"[!]{fail_val}" if fail_val > 0 else str(fail_val) + print( + f"{row['batch_size']:>12} {row['concurrency']:>12}" + f" {row['throughput_img_s']:>18} {row['ok']:>8} {fail_str:>8}{marker}" + ) + print() + print("Doporučené YAML hodnoty pro batch_size =", best["batch_size"]) + tor = int(best["concurrency"]) + mor = int(tor * 1.25) + 8 + print(f" max_batch_size: {best['batch_size']}") + print(f" target_ongoing_requests: {tor} # = nejlepší concurrency") + print(f" max_ongoing_requests: {mor} # target * 1.25 + buffer") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +async def main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--url", + default="http://localhost:8000/virchow2/", + help="Endpointová URL (default: http://localhost:8000/virchow2/)", + ) + parser.add_argument( + "--batch-size", + type=int, + required=True, + help="max_batch_size nastavený v user_config (shodný s YAML)", + ) + parser.add_argument( + "--concurrency", + type=int, + default=None, + help="Pevná hodnota concurrency – přeskočí sweep a naměří jen tuto", + ) + parser.add_argument( + "--concurrency-values", + type=str, + default=None, + help="Čárkami oddělený seznam concurrency hodnot k otestování, " + "např. '32,64,128,256' (přepíše výchozí sweep)", + ) + parser.add_argument( + "--n", + type=int, + default=1000, + help="Počet měřených requestů na jeden bod (default: 1000)", + ) + parser.add_argument( + "--warmup", + type=int, + default=100, + help="Warmup requesty před měřením (default: 100)", + ) + parser.add_argument("--tile-size", type=int, default=TILE_SIZE_DEFAULT) + parser.add_argument("--timeout", type=float, default=300.0) + parser.add_argument( + "--output", + default=OUTPUT_CSV, + help=f"Výstupní CSV soubor (default: {OUTPUT_CSV})", + ) + parser.add_argument( + "--skip-existing", + action="store_true", + help="Přeskočí (batch_size, concurrency) kombinace už změřené v CSV", + ) + args = parser.parse_args() + + url = args.url.rstrip("/") + "/" + pool = make_pool(args.tile_size) + + # Determine sweep values + if args.concurrency is not None: + sweep = [args.concurrency] + elif args.concurrency_values: + sweep = [int(v.strip()) for v in args.concurrency_values.split(",")] + else: + sweep = concurrency_sweep_values(args.batch_size) + + # Already measured (for --skip-existing) + existing: set[int] = set() + if args.skip_existing: + for row in load_csv(args.output, url): + if int(row["batch_size"]) == args.batch_size: + existing.add(int(row["concurrency"])) + + print("=" * 60) + print("Virchow2 Benchmark Sweep") + print("=" * 60) + print(f"URL: {url}") + print(f"max_batch_size: {args.batch_size} (musí odpovídat YAML!)") + print(f"concurrency sweep:{sweep}") + print(f"n per point: {args.n}") + print(f"warmup: {args.warmup}") + print(f"output: {args.output}") + print() + + # Warmup – jednou, s prostředním concurrency + warmup_conc = sweep[len(sweep) // 2] + print(f"Warmup ({args.warmup} img, concurrency={warmup_conc})...") + await run_batch(url, pool, args.warmup, warmup_conc, args.timeout) + print("Warmup done.\n") + + results_this_run: list[dict] = [] + + for conc in sweep: + if conc in existing: + print(f"[SKIP] concurrency={conc} (already in CSV)") + continue + + print(f"▶ batch_size={args.batch_size} concurrency={conc} ({args.n} img)...") + elapsed, ok, fail = await run_batch(url, pool, args.n, conc, args.timeout) + rps = ok / elapsed if elapsed > 0 else 0.0 + + row = { + "url": url, + "batch_size": args.batch_size, + "concurrency": conc, + "n": ok + fail, + "elapsed_s": f"{elapsed:.2f}", + "throughput_img_s": f"{rps:.1f}", + "ok": ok, + "fail": fail, + } + append_csv(args.output, row) + results_this_run.append(row) + + status = f" → {rps:.1f} img/s" + if fail: + status += f" [{fail} failures!]" + print(status) + + # Kratká pauza mezi body aby se server stabilizoval + await asyncio.sleep(2) + + # Summary – jen aktuální batch_size + print() + print("=" * 60) + print(f"Výsledky pro batch_size = {args.batch_size}") + print("=" * 60) + all_rows = load_csv(args.output, url) + print_summary(all_rows, batch_size=args.batch_size) + + # Pokud existují data pro více batch_size, ukaž i celkové porovnání + all_batch_sizes = sorted(set(int(r["batch_size"]) for r in all_rows)) + if len(all_batch_sizes) > 1: + print() + print("=" * 60) + print("Celkové porovnání všech batch_size (best concurrency per batch)") + print("=" * 60) + # Pro každý batch_size vyber jen nejlepší concurrency + best_per_batch = [] + for bs in all_batch_sizes: + candidates = [r for r in all_rows if int(r["batch_size"]) == bs] + if candidates: + best_per_batch.append( + max(candidates, key=lambda r: float(r["throughput_img_s"])) + ) + print_summary(best_per_batch) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/benchmark/load_test.py b/tests/benchmark/load_test.py new file mode 100644 index 0000000..ba5b8ef --- /dev/null +++ b/tests/benchmark/load_test.py @@ -0,0 +1,286 @@ +# kubectl cp tests/load_test.py rationai-jobs-ns/rayservice-model-virchow2-5qfmz-head-98tbv:/tmp/load_test.py +# kubectl exec -n rationai-jobs-ns rayservice-model-virchow2-5qfmz-head-98tbv -- bash -c "python3 -u /tmp/load_test.py --url http://localhost:8000/virchow2/ --tiles 5000 --concurrency 128" +from __future__ import annotations + +import argparse +import asyncio +import sys +import time +from dataclasses import dataclass, field + +import lz4.frame +import numpy as np + + +try: + import httpx +except ImportError: + print("pip install httpx") + sys.exit(1) + + +TILE_SIZE_DEFAULT = 224 +POOL_SIZE = 64 + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_pool(tile_size: int, n: int = POOL_SIZE) -> list[bytes]: + rng = np.random.default_rng(seed=42) + pool = [] + for _ in range(n): + img = rng.integers(0, 255, (tile_size, tile_size, 3), dtype=np.uint8) + pool.append(lz4.frame.compress(img.tobytes())) + return pool + + +@dataclass +class Stats: + ok: int = 0 + fail_503: int = 0 + fail_other: int = 0 + latencies: list[float] = field(default_factory=list) + lock: asyncio.Lock = field(default_factory=asyncio.Lock) + + @property + def total(self) -> int: + return self.ok + self.fail_503 + self.fail_other + + def percentile(self, p: float) -> float: + if not self.latencies: + return 0.0 + s = sorted(self.latencies) + idx = int(len(s) * p / 100) + return s[min(idx, len(s) - 1)] + + +async def send_tile( + client: httpx.AsyncClient, + url: str, + payload: bytes, + stats: Stats, + timeout: float, + progress_every: int, +) -> None: + t0 = time.perf_counter() + try: + r = await client.post( + url, + content=payload, + headers={"Content-Type": "application/octet-stream"}, + timeout=timeout, + ) + latency = time.perf_counter() - t0 + async with stats.lock: + if r.status_code == 200: + stats.ok += 1 + stats.latencies.append(latency) + if stats.ok % progress_every == 0: + print( + f" ✓ {stats.ok} OK | 503: {stats.fail_503} | other: {stats.fail_other}" + ) + elif r.status_code == 503: + stats.fail_503 += 1 + else: + stats.fail_other += 1 + print(f" [WARN] HTTP {r.status_code}: {r.text[:120]}") + except Exception as e: + async with stats.lock: + stats.fail_other += 1 + print(f" [ERR] {e}") + + +async def run_wsi( + url: str, + pool: list[bytes], + tiles: int, + concurrency: int, + timeout: float, + wsi_id: int, + stats: Stats, +) -> float: + """Simuluje jeden WSI — pošle `tiles` requestů s max `concurrency` souběžně.""" + semaphore = asyncio.Semaphore(concurrency) + pool_len = len(pool) + + limits = httpx.Limits( + max_connections=concurrency + 8, + max_keepalive_connections=concurrency + 8, + ) + + async def bounded_send(client: httpx.AsyncClient, idx: int) -> None: + async with semaphore: + await send_tile( + client, + url, + pool[idx % pool_len], + stats, + timeout, + progress_every=max(tiles // 10, 100), + ) + + t0 = time.perf_counter() + async with httpx.AsyncClient(limits=limits, follow_redirects=True) as client: + tasks = [bounded_send(client, i) for i in range(tiles)] + await asyncio.gather(*tasks) + return time.perf_counter() - t0 + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +async def main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--url", default="http://localhost:8000/virchow2/", help="Endpoint URL" + ) + parser.add_argument( + "--tiles", + type=int, + default=5000, + help="Počet dlaždic na jeden WSI (default: 5000)", + ) + parser.add_argument( + "--wsi-count", + type=int, + default=1, + help="Počet paralelních WSI slidů (default: 1)", + ) + parser.add_argument( + "--concurrency", + type=int, + default=128, + help="Max souběžných requestů na WSI (default: 128, " + "mělo by odpovídat target_ongoing_requests)", + ) + parser.add_argument("--tile-size", type=int, default=TILE_SIZE_DEFAULT) + parser.add_argument( + "--timeout", + type=float, + default=120.0, + help="Timeout na jeden request v sekundách (default: 120)", + ) + parser.add_argument( + "--warmup", + type=int, + default=50, + help="Warmup requestů před testem (default: 50)", + ) + parser.add_argument("--no-warmup", action="store_true", help="Přeskočit warmup") + args = parser.parse_args() + + url = args.url.rstrip("/") + "/" + pool = make_pool(args.tile_size) + total_tiles = args.tiles * args.wsi_count + + print("=" * 60) + print("Virchow2 WSI Load Test") + print("=" * 60) + print(f"URL: {url}") + print(f"Tiles per WSI: {args.tiles:,}") + print(f"WSI count: {args.wsi_count}") + print(f"Total tiles: {total_tiles:,}") + print(f"Concurrency/WSI: {args.concurrency}") + print(f"Total concurrent: {args.concurrency * args.wsi_count}") + print(f"Request timeout: {args.timeout}s") + print() + + # Warmup + if not args.no_warmup: + print(f"Warmup ({args.warmup} tiles)...") + warmup_stats = Stats() + await run_wsi( + url, + pool, + args.warmup, + min(args.concurrency, 32), + args.timeout, + wsi_id=0, + stats=warmup_stats, + ) + print( + f"Warmup done (ok={warmup_stats.ok}, fail={warmup_stats.fail_503 + warmup_stats.fail_other}).\n" + ) + + # Actual test + stats = Stats() + print( + f"▶ Spouštím {'paralelně ' + str(args.wsi_count) + ' WSI' if args.wsi_count > 1 else '1 WSI'} " + f"({total_tiles:,} tiles celkem)...\n" + ) + + t0 = time.perf_counter() + + if args.wsi_count == 1: + await run_wsi( + url, pool, args.tiles, args.concurrency, args.timeout, wsi_id=0, stats=stats + ) + else: + # Všechny WSI slidy spustit paralelně — simulace více scannerů najednou + await asyncio.gather( + *[ + run_wsi( + url, + pool, + args.tiles, + args.concurrency, + args.timeout, + wsi_id=i, + stats=stats, + ) + for i in range(args.wsi_count) + ] + ) + + elapsed = time.perf_counter() - t0 + rps = stats.ok / elapsed if elapsed > 0 else 0.0 + + # Report + print() + print("=" * 60) + print("Výsledky") + print("=" * 60) + print(f"Celkový čas: {elapsed:.1f}s ({elapsed / 60:.1f} min)") + print(f"Throughput: {rps:.1f} img/s") + print() + print( + f"Úspěšné: {stats.ok:,} / {total_tiles:,} ({100 * stats.ok / total_tiles:.1f}%)" + ) + print( + f"503 backpressure: {stats.fail_503:,} ({100 * stats.fail_503 / total_tiles:.1f}%)" + ) + print(f"Jiné chyby: {stats.fail_other:,}") + print() + if stats.latencies: + print("Latence (úspěšné requesty):") + print(f" p50: {stats.percentile(50) * 1000:.0f} ms") + print(f" p90: {stats.percentile(90) * 1000:.0f} ms") + print(f" p99: {stats.percentile(99) * 1000:.0f} ms") + print(f" max: {max(stats.latencies) * 1000:.0f} ms") + print() + + # Verdict + fail_rate = (stats.fail_503 + stats.fail_other) / total_tiles + if fail_rate == 0: + print("✅ PASS — žádné chyby, nastavení je v pořádku pro WSI.") + elif fail_rate < 0.01: + print( + f"⚠️ WARN — {fail_rate * 100:.2f}% chyb. Zvažte zvýšení max_queued_requests." + ) + else: + print( + f"❌ FAIL — {fail_rate * 100:.1f}% chyb. Nastavení nestačí pro tento objem." + ) + print(" → Zvyšte max_queued_requests nebo snižte --concurrency klientů.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/model_snapshots/__init__.py b/tests/model_snapshots/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py new file mode 100644 index 0000000..e0bc161 --- /dev/null +++ b/tests/model_snapshots/_shared.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import hashlib +import importlib +import os +from pathlib import Path + +import numpy as np +import pytest + + +def _required_env(var_name: str) -> str: + value = os.environ.get(var_name) + if not value: + pytest.skip(f"Missing env var `{var_name}`.") + return value + + +def _sha256(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as file_handle: + for chunk in iter(lambda: file_handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _read_tile_from_slide( + slide_path: str, + tile_size: int, + level: int, +) -> np.ndarray: + try: + from ratiopath.openslide import OpenSlide + except ImportError: + pytest.skip("Python package `ratiopath` is not installed.") + + with OpenSlide(slide_path) as slide: + extent_x, extent_y = slide.level_dimensions[level] + x = max(0, (extent_x - tile_size) // 2) + y = max(0, (extent_y - tile_size) // 2) + tile = slide.read_region_relative( + (x, y), level, (tile_size, tile_size) + ).convert("RGB") + + return np.asarray(tile, dtype=np.uint8) + + +def _client( + models_base_url: str, + timeout_s: float, +): + try: + rationai = importlib.import_module("rationai") + except ImportError: + pytest.skip("Python package `rationai` is not installed.") + + return rationai.Client(models_base_url=models_base_url, timeout=timeout_s) + + +def run_binary_classifier_case( + model_id: str, + slide_path: str, + expected_score: float, + tile_size: int = 512, + level: int = 0, + timeout_s: float = 600.0, + tolerance: float = 0.00001, +) -> None: + models_base_url = os.environ.get( + "MODEL_SERVICE_MODELS_BASE_URL", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + ) + + tile = _read_tile_from_slide( + slide_path=slide_path, tile_size=tile_size, level=level + ) + + with _client(models_base_url=models_base_url, timeout_s=timeout_s) as client: + prediction = client.models.classify_image( + model=model_id, image=tile, timeout=timeout_s + ) + + if not isinstance(prediction, int | float): + pytest.fail( + "Expected binary classifier to return scalar score, " + f"got {type(prediction)}: {prediction}" + ) + + actual_score = float(prediction) + assert abs(actual_score - expected_score) <= tolerance, ( + f"Binary score mismatch: expected={expected_score}, actual={actual_score}, " + f"tolerance={tolerance}" + ) + + +def run_semantic_segmentation_case( + model_id: str, + slide_path: str, + expected_array_path: Path | str, + tile_size: int = 1024, + level: int = 0, + timeout_s: float = 600.0, + atol: float = 0.0, + rtol: float = 0.0, +) -> None: + models_base_url = os.environ.get( + "MODEL_SERVICE_MODELS_BASE_URL", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + ) + expected_array_path = Path(expected_array_path) + + if not expected_array_path.exists(): + pytest.fail(f"Expected array file does not exist: {expected_array_path}") + + tile = _read_tile_from_slide( + slide_path=slide_path, tile_size=tile_size, level=level + ) + expected = np.load(expected_array_path) + + with _client(models_base_url=models_base_url, timeout_s=timeout_s) as client: + prediction = client.models.segment_image( + model=model_id, image=tile, timeout=timeout_s + ) + + actual = np.asarray(prediction) + + if actual.shape != expected.shape: + pytest.fail( + f"Semantic shape mismatch: expected={expected.shape}, actual={actual.shape}" + ) + + if not np.allclose(actual, expected, rtol=rtol, atol=atol): + mismatch = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() + pytest.fail( + "Semantic output mismatch: arrays differ beyond tolerance " + f"(atol={atol}, rtol={rtol}, max_abs_diff={mismatch})" + ) + + +def verify_file_hash(path: Path, expected_hash: str) -> None: + actual_hash = _sha256(path) + assert actual_hash == expected_hash, ( + f"Hash mismatch for {path}: expected={expected_hash}, actual={actual_hash}" + ) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py new file mode 100644 index 0000000..f5ea20d --- /dev/null +++ b/tests/model_snapshots/generate_references.py @@ -0,0 +1,103 @@ +import json +import os +from pathlib import Path + +import numpy as np + +from tests.model_snapshots._shared import _client, _read_tile_from_slide + + +OUT_DIR = Path("/mnt/test_refs") + +CASES = [ + { + "label": "breast", + "slide_path": "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", + "model_id": "episeg-1", + "type": "semantic", + "tile_size": 1024, + "level": 0, + }, + { + "label": "breast", + "slide_path": "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", + "model_id": "prostate-classifier-1", + "type": "binary", + "tile_size": 512, + "level": 0, + }, + { + "label": "colorectum", + "slide_path": "/mnt/data/MOU/colorectum/colorectal_cancer_2020-2024-06/2020_00106-01-N.mrxs", + "model_id": "episeg-1", + "type": "semantic", + "tile_size": 1024, + "level": 0, + }, + { + "label": "colon", + "slide_path": "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", + "model_id": "episeg-1", + "type": "semantic", + "tile_size": 1024, + "level": 0, + }, +] + + +def generate_references() -> None: + models_base_url = os.environ.get( + "MODEL_SERVICE_MODELS_BASE_URL", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + ) + + OUT_DIR.mkdir(parents=True, exist_ok=True) + print(f"== Generating references to {OUT_DIR} via {models_base_url} ==") + + with _client(models_base_url=models_base_url, timeout_s=600) as client: + for case in CASES: + label = case["label"] + model_id = case["model_id"] + mtype = case["type"] + slide_path = case["slide_path"] + tile_size = case["tile_size"] + level = case["level"] + + print(f"\nProcessing [{label}] => Model: {model_id} ({mtype})") + print(f"Slide path: {slide_path}") + + try: + tile = _read_tile_from_slide( + slide_path=slide_path, tile_size=tile_size, level=level + ) + except Exception as e: + print(f" -> Failed to read tile: {e}") + continue + + try: + if mtype == "binary": + prediction = client.models.classify_image( + model=model_id, image=tile, timeout=600 + ) + out_file = OUT_DIR / f"{label}_{model_id}_expected.json" + with out_file.open("w") as f: + json.dump({"expected_score": float(prediction)}, f, indent=2) + print(f" -> SUCCESS! Saved binary score to {out_file}") + + elif mtype == "semantic": + prediction = client.models.segment_image( + model=model_id, image=tile, timeout=600 + ) + arr = np.asarray(prediction) + out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" + np.save(out_file, arr) + print( + f" -> SUCCESS! Saved semantic array {arr.shape} to {out_file}" + ) + + except Exception as e: + print(f" -> ERROR during prediction/saving: {e}") + + +if __name__ == "__main__": + generate_references() diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py new file mode 100644 index 0000000..cd533f8 --- /dev/null +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -0,0 +1,36 @@ +import json +from pathlib import Path + +import pytest + +from tests.model_snapshots._shared import run_binary_classifier_case + + +@pytest.mark.parametrize( + "label, slide_path", + [ + ( + "breast", + "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", + ), + ], +) +def test_binary_classifier(label: str, slide_path: str) -> None: + model_id = "prostate-classifier-1" + json_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.json") + + if json_path.exists(): + with json_path.open() as f: + expected_score = json.load(f)["expected_score"] + else: + pytest.skip( + f"Reference file {json_path} missing. Run generate_references.py first." + ) + + run_binary_classifier_case( + model_id=model_id, + slide_path=slide_path, + expected_score=expected_score, + tile_size=512, + level=0, + ) diff --git a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py new file mode 100644 index 0000000..a6301a8 --- /dev/null +++ b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import pytest + +from tests.model_snapshots._shared import run_semantic_segmentation_case + + +@pytest.mark.parametrize( + "label, slide_path", + [ + ( + "breast", + "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", + ), + ( + "colorectum", + "/mnt/data/MOU/colorectum/colorectal_cancer_2020-2024-06/2020_00106-01-N.mrxs", + ), + ( + "colon", + "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", + ), + ], +) +def test_semantic_episeg(label: str, slide_path: str) -> None: + model_id = "episeg-1" + expected_array_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.npy") + + run_semantic_segmentation_case( + model_id=model_id, + slide_path=slide_path, + expected_array_path=expected_array_path, + tile_size=1024, + level=0, + ) From 5f80c05c1089d1cbfa28539efab96aab43e02a30 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 14:50:30 +0200 Subject: [PATCH 02/35] test runner --- builders/test_runner.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 builders/test_runner.py diff --git a/builders/test_runner.py b/builders/test_runner.py new file mode 100644 index 0000000..e8a8cef --- /dev/null +++ b/builders/test_runner.py @@ -0,0 +1,36 @@ +import subprocess +import sys + +from fastapi import FastAPI +from ray import serve + + +fastapi = FastAPI() + + +@serve.deployment(num_replicas=1) +@serve.ingress(fastapi) +class TestRunner: + @fastapi.post("/") + def run(self) -> dict: + result = subprocess.run( + [ + sys.executable, + "-m", + "pytest", + "tests/model_snapshots/", + "-v", + "--tb=short", + ], + capture_output=True, + text=True, + ) + return { + "returncode": result.returncode, + "stdout": result.stdout, + "stderr": result.stderr, + "passed": result.returncode == 0, + } + + +app = TestRunner.bind() From 4aed7548abb573a5115fb3439ce05fd5fb61f4d7 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 14:54:43 +0200 Subject: [PATCH 03/35] fix --- builders/test_runner.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/builders/test_runner.py b/builders/test_runner.py index e8a8cef..3a0a578 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -11,6 +11,12 @@ @serve.deployment(num_replicas=1) @serve.ingress(fastapi) class TestRunner: + def __init__(self) -> None: + subprocess.run( + [sys.executable, "-m", "pip", "install", "pytest", "-q"], + check=True, + ) + @fastapi.post("/") def run(self) -> dict: result = subprocess.run( From 85db95c693f400bf76cbe2d3ef8a84a2c999cfc4 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 15:10:07 +0200 Subject: [PATCH 04/35] tests --- tests/model_snapshots/_shared.py | 95 ++++++-------- tests/model_snapshots/generate_references.py | 119 +++++++----------- .../test_binary_classifier_model_snapshot.py | 7 +- ...st_semantic_segmentation_model_snapshot.py | 11 +- 4 files changed, 87 insertions(+), 145 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index e0bc161..5edd6c6 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -1,34 +1,36 @@ from __future__ import annotations import hashlib -import importlib import os from pathlib import Path +import httpx +import lz4.frame import numpy as np import pytest - - -def _required_env(var_name: str) -> str: - value = os.environ.get(var_name) - if not value: - pytest.skip(f"Missing env var `{var_name}`.") - return value +from numpy.typing import NDArray def _sha256(path: Path) -> str: digest = hashlib.sha256() - with path.open("rb") as file_handle: - for chunk in iter(lambda: file_handle.read(1024 * 1024), b""): + with path.open("rb") as fh: + for chunk in iter(lambda: fh.read(1024 * 1024), b""): digest.update(chunk) return digest.hexdigest() +def _models_base_url() -> str: + return os.environ.get( + "MODEL_SERVICE_MODELS_BASE_URL", + "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + ) + + def _read_tile_from_slide( slide_path: str, tile_size: int, level: int, -) -> np.ndarray: +) -> NDArray[np.uint8]: try: from ratiopath.openslide import OpenSlide except ImportError: @@ -45,16 +47,31 @@ def _read_tile_from_slide( return np.asarray(tile, dtype=np.uint8) -def _client( - models_base_url: str, +def _classify( + model_id: str, + tile: NDArray[np.uint8], timeout_s: float, -): - try: - rationai = importlib.import_module("rationai") - except ImportError: - pytest.skip("Python package `rationai` is not installed.") +) -> float: + url = f"{_models_base_url()}/{model_id}/" + data = lz4.frame.compress(tile.tobytes()) + response = httpx.post(url, content=data, timeout=httpx.Timeout(timeout_s)) + response.raise_for_status() + return float(response.json()) - return rationai.Client(models_base_url=models_base_url, timeout=timeout_s) + +def _segment( + model_id: str, + tile: NDArray[np.uint8], + timeout_s: float, +) -> NDArray[np.float16]: + h, w = tile.shape[:2] + url = f"{_models_base_url()}/{model_id}/" + data = lz4.frame.compress(tile.tobytes()) + response = httpx.post(url, content=data, timeout=httpx.Timeout(timeout_s)) + response.raise_for_status() + return np.frombuffer( + lz4.frame.decompress(response.content), dtype=np.float16 + ).reshape(-1, h, w) def run_binary_classifier_case( @@ -66,27 +83,11 @@ def run_binary_classifier_case( timeout_s: float = 600.0, tolerance: float = 0.00001, ) -> None: - models_base_url = os.environ.get( - "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", - ) - tile = _read_tile_from_slide( slide_path=slide_path, tile_size=tile_size, level=level ) + actual_score = _classify(model_id=model_id, tile=tile, timeout_s=timeout_s) - with _client(models_base_url=models_base_url, timeout_s=timeout_s) as client: - prediction = client.models.classify_image( - model=model_id, image=tile, timeout=timeout_s - ) - - if not isinstance(prediction, int | float): - pytest.fail( - "Expected binary classifier to return scalar score, " - f"got {type(prediction)}: {prediction}" - ) - - actual_score = float(prediction) assert abs(actual_score - expected_score) <= tolerance, ( f"Binary score mismatch: expected={expected_score}, actual={actual_score}, " f"tolerance={tolerance}" @@ -103,37 +104,23 @@ def run_semantic_segmentation_case( atol: float = 0.0, rtol: float = 0.0, ) -> None: - models_base_url = os.environ.get( - "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", - ) expected_array_path = Path(expected_array_path) - if not expected_array_path.exists(): - pytest.fail(f"Expected array file does not exist: {expected_array_path}") + pytest.fail(f"Reference file does not exist: {expected_array_path}") tile = _read_tile_from_slide( slide_path=slide_path, tile_size=tile_size, level=level ) expected = np.load(expected_array_path) - - with _client(models_base_url=models_base_url, timeout_s=timeout_s) as client: - prediction = client.models.segment_image( - model=model_id, image=tile, timeout=timeout_s - ) - - actual = np.asarray(prediction) + actual = _segment(model_id=model_id, tile=tile, timeout_s=timeout_s) if actual.shape != expected.shape: - pytest.fail( - f"Semantic shape mismatch: expected={expected.shape}, actual={actual.shape}" - ) + pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") if not np.allclose(actual, expected, rtol=rtol, atol=atol): mismatch = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() pytest.fail( - "Semantic output mismatch: arrays differ beyond tolerance " - f"(atol={atol}, rtol={rtol}, max_abs_diff={mismatch})" + f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={mismatch})" ) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index f5ea20d..04f831f 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -1,10 +1,10 @@ import json -import os from pathlib import Path +import httpx +import lz4.frame import numpy as np - -from tests.model_snapshots._shared import _client, _read_tile_from_slide +from _shared import _models_base_url, _read_tile_from_slide OUT_DIR = Path("/mnt/test_refs") @@ -18,85 +18,50 @@ "tile_size": 1024, "level": 0, }, - { - "label": "breast", - "slide_path": "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", - "model_id": "prostate-classifier-1", - "type": "binary", - "tile_size": 512, - "level": 0, - }, - { - "label": "colorectum", - "slide_path": "/mnt/data/MOU/colorectum/colorectal_cancer_2020-2024-06/2020_00106-01-N.mrxs", - "model_id": "episeg-1", - "type": "semantic", - "tile_size": 1024, - "level": 0, - }, - { - "label": "colon", - "slide_path": "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", - "model_id": "episeg-1", - "type": "semantic", - "tile_size": 1024, - "level": 0, - }, ] def generate_references() -> None: - models_base_url = os.environ.get( - "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", - ) - + base_url = _models_base_url() OUT_DIR.mkdir(parents=True, exist_ok=True) - print(f"== Generating references to {OUT_DIR} via {models_base_url} ==") - - with _client(models_base_url=models_base_url, timeout_s=600) as client: - for case in CASES: - label = case["label"] - model_id = case["model_id"] - mtype = case["type"] - slide_path = case["slide_path"] - tile_size = case["tile_size"] - level = case["level"] - - print(f"\nProcessing [{label}] => Model: {model_id} ({mtype})") - print(f"Slide path: {slide_path}") - - try: - tile = _read_tile_from_slide( - slide_path=slide_path, tile_size=tile_size, level=level - ) - except Exception as e: - print(f" -> Failed to read tile: {e}") - continue - - try: - if mtype == "binary": - prediction = client.models.classify_image( - model=model_id, image=tile, timeout=600 - ) - out_file = OUT_DIR / f"{label}_{model_id}_expected.json" - with out_file.open("w") as f: - json.dump({"expected_score": float(prediction)}, f, indent=2) - print(f" -> SUCCESS! Saved binary score to {out_file}") - - elif mtype == "semantic": - prediction = client.models.segment_image( - model=model_id, image=tile, timeout=600 - ) - arr = np.asarray(prediction) - out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" - np.save(out_file, arr) - print( - f" -> SUCCESS! Saved semantic array {arr.shape} to {out_file}" - ) - - except Exception as e: - print(f" -> ERROR during prediction/saving: {e}") + print(f"== Generating references to {OUT_DIR} via {base_url} ==") + + for case in CASES: + label, model_id, mtype = case["label"], case["model_id"], case["type"] + print(f"\n[{label}] {model_id} ({mtype})") + + try: + tile = _read_tile_from_slide( + case["slide_path"], case["tile_size"], case["level"] + ) + except Exception as e: + print(f" -> Failed to read tile: {e}") + continue + + url = f"{base_url}/{model_id}/" + data = lz4.frame.compress(tile.tobytes()) + + try: + response = httpx.post(url, content=data, timeout=600) + response.raise_for_status() + + if mtype == "binary": + out_file = OUT_DIR / f"{label}_{model_id}_expected.json" + with out_file.open("w") as f: + json.dump({"expected_score": float(response.json())}, f, indent=2) + print(f" -> Saved {out_file}") + + elif mtype == "semantic": + h, w = tile.shape[:2] + arr = np.frombuffer( + lz4.frame.decompress(response.content), dtype=np.float16 + ).reshape(-1, h, w) + out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" + np.save(out_file, arr) + print(f" -> Saved {out_file} shape={arr.shape}") + + except Exception as e: + print(f" -> ERROR: {e}") if __name__ == "__main__": diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index cd533f8..87709a7 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -2,16 +2,15 @@ from pathlib import Path import pytest - -from tests.model_snapshots._shared import run_binary_classifier_case +from _shared import run_binary_classifier_case @pytest.mark.parametrize( "label, slide_path", [ ( - "breast", - "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", + "colon", + "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", ), ], ) diff --git a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py index a6301a8..294f682 100644 --- a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py +++ b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py @@ -1,8 +1,7 @@ from pathlib import Path import pytest - -from tests.model_snapshots._shared import run_semantic_segmentation_case +from _shared import run_semantic_segmentation_case @pytest.mark.parametrize( @@ -12,14 +11,6 @@ "breast", "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", ), - ( - "colorectum", - "/mnt/data/MOU/colorectum/colorectal_cancer_2020-2024-06/2020_00106-01-N.mrxs", - ), - ( - "colon", - "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", - ), ], ) def test_semantic_episeg(label: str, slide_path: str) -> None: From 115e8a2f32775296b101cafa98859f24d79dd670 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 15:14:40 +0200 Subject: [PATCH 05/35] pyproj --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index df49f5d..57ce2e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,3 +19,6 @@ dependencies = [ [dependency-groups] dev = ["mypy>=1.18.2", "ruff>=0.14.6", "pytest>=8.4.2"] + +[tool.pytest.ini_options] +pythonpath = ["tests/model_snapshots"] From 8352582e3c20b867c9ad9454a9ffe8c96d9b4631 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 16:23:23 +0200 Subject: [PATCH 06/35] better output --- builders/test_runner.py | 5 ++-- tests/model_snapshots/_shared.py | 42 ++++++++++---------------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/builders/test_runner.py b/builders/test_runner.py index 3a0a578..97f125c 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -27,15 +27,14 @@ def run(self) -> dict: "tests/model_snapshots/", "-v", "--tb=short", + "--no-header", ], capture_output=True, text=True, ) return { - "returncode": result.returncode, - "stdout": result.stdout, - "stderr": result.stderr, "passed": result.returncode == 0, + "output": result.stdout, } diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 5edd6c6..6b2b1bc 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -4,8 +4,6 @@ import os from pathlib import Path -import httpx -import lz4.frame import numpy as np import pytest from numpy.typing import NDArray @@ -22,7 +20,7 @@ def _sha256(path: Path) -> str: def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) @@ -47,31 +45,13 @@ def _read_tile_from_slide( return np.asarray(tile, dtype=np.uint8) -def _classify( - model_id: str, - tile: NDArray[np.uint8], - timeout_s: float, -) -> float: - url = f"{_models_base_url()}/{model_id}/" - data = lz4.frame.compress(tile.tobytes()) - response = httpx.post(url, content=data, timeout=httpx.Timeout(timeout_s)) - response.raise_for_status() - return float(response.json()) +def _client(timeout_s: float = 600.0): + try: + from rationai import Client + except ImportError: + pytest.skip("Python package `rationai` is not installed.") - -def _segment( - model_id: str, - tile: NDArray[np.uint8], - timeout_s: float, -) -> NDArray[np.float16]: - h, w = tile.shape[:2] - url = f"{_models_base_url()}/{model_id}/" - data = lz4.frame.compress(tile.tobytes()) - response = httpx.post(url, content=data, timeout=httpx.Timeout(timeout_s)) - response.raise_for_status() - return np.frombuffer( - lz4.frame.decompress(response.content), dtype=np.float16 - ).reshape(-1, h, w) + return Client(models_base_url=_models_base_url(), timeout=timeout_s) def run_binary_classifier_case( @@ -86,7 +66,9 @@ def run_binary_classifier_case( tile = _read_tile_from_slide( slide_path=slide_path, tile_size=tile_size, level=level ) - actual_score = _classify(model_id=model_id, tile=tile, timeout_s=timeout_s) + + with _client(timeout_s=timeout_s) as client: + actual_score = float(client.models.classify_image(model=model_id, image=tile)) assert abs(actual_score - expected_score) <= tolerance, ( f"Binary score mismatch: expected={expected_score}, actual={actual_score}, " @@ -112,7 +94,9 @@ def run_semantic_segmentation_case( slide_path=slide_path, tile_size=tile_size, level=level ) expected = np.load(expected_array_path) - actual = _segment(model_id=model_id, tile=tile, timeout_s=timeout_s) + + with _client(timeout_s=timeout_s) as client: + actual = np.asarray(client.models.segment_image(model=model_id, image=tile)) if actual.shape != expected.shape: pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") From b020ebcc1dd7e57a54c37c1a513032feb661751f Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 17:37:48 +0200 Subject: [PATCH 07/35] fixes --- helm/rayservice/applications/test-runner.yaml | 16 ++++++++++++++++ helm/rayservice/values.yaml | 1 + helm/rayservice/workers/cpu-workers.yaml | 5 +++++ helm/rayservice/workers/mig20-workers.yaml | 5 +++++ pvc/model-test-refs-pvc.yaml | 4 ++-- tests/model_snapshots/_shared.py | 6 +++--- 6 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 helm/rayservice/applications/test-runner.yaml diff --git a/helm/rayservice/applications/test-runner.yaml b/helm/rayservice/applications/test-runner.yaml new file mode 100644 index 0000000..45f5b68 --- /dev/null +++ b/helm/rayservice/applications/test-runner.yaml @@ -0,0 +1,16 @@ +- name: test-runner + import_path: builders.test_runner:app + route_prefix: /run-tests + runtime_env: + working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/tests.zip?v5 + pip: + - git+https://github.com/RationAI/rationai-sdk-python.git + deployments: + - name: TestRunner + autoscaling_config: + min_replicas: 0 + max_replicas: 1 + target_ongoing_requests: 1 + ray_actor_options: + num_cpus: 4 + memory: 8589934592 diff --git a/helm/rayservice/values.yaml b/helm/rayservice/values.yaml index b6e24b7..7b90191 100644 --- a/helm/rayservice/values.yaml +++ b/helm/rayservice/values.yaml @@ -7,3 +7,4 @@ applications: - heatmap-builder - prostate-classifier-1 - virchow2 + - test-runner diff --git a/helm/rayservice/workers/cpu-workers.yaml b/helm/rayservice/workers/cpu-workers.yaml index 0c98d85..eda5986 100644 --- a/helm/rayservice/workers/cpu-workers.yaml +++ b/helm/rayservice/workers/cpu-workers.yaml @@ -45,6 +45,8 @@ template: mountPath: /mnt/cache - name: huggingface-cache mountPath: /mnt/huggingface_cache + - name: test-refs + mountPath: /mnt/test_refs volumes: - name: data persistentVolumeClaim: @@ -64,3 +66,6 @@ template: - name: huggingface-cache persistentVolumeClaim: claimName: huggingface-cache-pvc + - name: test-refs + persistentVolumeClaim: + claimName: model-test-refs-pvc diff --git a/helm/rayservice/workers/mig20-workers.yaml b/helm/rayservice/workers/mig20-workers.yaml index 77032d9..86571c7 100644 --- a/helm/rayservice/workers/mig20-workers.yaml +++ b/helm/rayservice/workers/mig20-workers.yaml @@ -55,6 +55,8 @@ template: mountPath: /mnt/cache - name: huggingface-cache mountPath: /mnt/huggingface_cache + - name: test-refs + mountPath: /mnt/test_refs volumes: - name: data persistentVolumeClaim: @@ -74,3 +76,6 @@ template: - name: huggingface-cache persistentVolumeClaim: claimName: huggingface-cache-pvc + - name: test-refs + persistentVolumeClaim: + claimName: model-test-refs-pvc diff --git a/pvc/model-test-refs-pvc.yaml b/pvc/model-test-refs-pvc.yaml index 4b3390a..ae08c4e 100644 --- a/pvc/model-test-refs-pvc.yaml +++ b/pvc/model-test-refs-pvc.yaml @@ -2,11 +2,11 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: name: model-test-refs-pvc - labels: - app: model-service-tests + namespace: rationai-jobs-ns spec: accessModes: - ReadWriteMany resources: requests: storage: 5Gi + storageClassName: nfs-csi diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 6b2b1bc..99347b4 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -20,7 +20,7 @@ def _sha256(path: Path) -> str: def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) @@ -45,7 +45,7 @@ def _read_tile_from_slide( return np.asarray(tile, dtype=np.uint8) -def _client(timeout_s: float = 600.0): +def _client(timeout_s: float = 1600.0): try: from rationai import Client except ImportError: @@ -82,7 +82,7 @@ def run_semantic_segmentation_case( expected_array_path: Path | str, tile_size: int = 1024, level: int = 0, - timeout_s: float = 600.0, + timeout_s: float = 1200.0, atol: float = 0.0, rtol: float = 0.0, ) -> None: From b74de01ba2fffb7f19dc1ff5f4bd80ac8b23a0ed Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 18:51:21 +0200 Subject: [PATCH 08/35] sdk use --- tests/model_snapshots/_shared.py | 77 ++++++++----------- tests/model_snapshots/generate_references.py | 78 ++++++++++---------- 2 files changed, 73 insertions(+), 82 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 99347b4..187aaa6 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -1,22 +1,14 @@ from __future__ import annotations -import hashlib import os from pathlib import Path +from time import perf_counter import numpy as np import pytest from numpy.typing import NDArray -def _sha256(path: Path) -> str: - digest = hashlib.sha256() - with path.open("rb") as fh: - for chunk in iter(lambda: fh.read(1024 * 1024), b""): - digest.update(chunk) - return digest.hexdigest() - - def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", @@ -24,20 +16,24 @@ def _models_base_url() -> str: ) -def _read_tile_from_slide( - slide_path: str, - tile_size: int, - level: int, -) -> NDArray[np.uint8]: +def _client(timeout_s: float = 1200.0): + try: + from rationai import Client + except ImportError: + pytest.skip("Python package `rationai` is not installed.") + return Client(models_base_url=_models_base_url(), timeout=timeout_s) + + +def _read_tile(slide_path: str, tile_size: int, level: int) -> NDArray[np.uint8]: try: from ratiopath.openslide import OpenSlide except ImportError: pytest.skip("Python package `ratiopath` is not installed.") with OpenSlide(slide_path) as slide: - extent_x, extent_y = slide.level_dimensions[level] - x = max(0, (extent_x - tile_size) // 2) - y = max(0, (extent_y - tile_size) // 2) + w, h = slide.level_dimensions[level] + x = max(0, (w - tile_size) // 2) + y = max(0, (h - tile_size) // 2) tile = slide.read_region_relative( (x, y), level, (tile_size, tile_size) ).convert("RGB") @@ -45,15 +41,6 @@ def _read_tile_from_slide( return np.asarray(tile, dtype=np.uint8) -def _client(timeout_s: float = 1600.0): - try: - from rationai import Client - except ImportError: - pytest.skip("Python package `rationai` is not installed.") - - return Client(models_base_url=_models_base_url(), timeout=timeout_s) - - def run_binary_classifier_case( model_id: str, slide_path: str, @@ -63,16 +50,19 @@ def run_binary_classifier_case( timeout_s: float = 600.0, tolerance: float = 0.00001, ) -> None: - tile = _read_tile_from_slide( - slide_path=slide_path, tile_size=tile_size, level=level - ) + tile = _read_tile(slide_path, tile_size, level) - with _client(timeout_s=timeout_s) as client: + with _client(timeout_s) as client: + t0 = perf_counter() actual_score = float(client.models.classify_image(model=model_id, image=tile)) + elapsed = perf_counter() - t0 + + print( + f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | score={actual_score:.6f} | expected={expected_score:.6f}" + ) assert abs(actual_score - expected_score) <= tolerance, ( - f"Binary score mismatch: expected={expected_score}, actual={actual_score}, " - f"tolerance={tolerance}" + f"Binary score mismatch: expected={expected_score}, actual={actual_score}, tolerance={tolerance}" ) @@ -90,26 +80,23 @@ def run_semantic_segmentation_case( if not expected_array_path.exists(): pytest.fail(f"Reference file does not exist: {expected_array_path}") - tile = _read_tile_from_slide( - slide_path=slide_path, tile_size=tile_size, level=level - ) + tile = _read_tile(slide_path, tile_size, level) expected = np.load(expected_array_path) - with _client(timeout_s=timeout_s) as client: + with _client(timeout_s) as client: + t0 = perf_counter() actual = np.asarray(client.models.segment_image(model=model_id, image=tile)) + elapsed = perf_counter() - t0 + + max_diff = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() + print( + f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | shape={actual.shape} | max_diff={max_diff:.6f}" + ) if actual.shape != expected.shape: pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") if not np.allclose(actual, expected, rtol=rtol, atol=atol): - mismatch = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() pytest.fail( - f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={mismatch})" + f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={max_diff})" ) - - -def verify_file_hash(path: Path, expected_hash: str) -> None: - actual_hash = _sha256(path) - assert actual_hash == expected_hash, ( - f"Hash mismatch for {path}: expected={expected_hash}, actual={actual_hash}" - ) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 04f831f..d85cec5 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -1,10 +1,8 @@ import json from pathlib import Path -import httpx -import lz4.frame import numpy as np -from _shared import _models_base_url, _read_tile_from_slide +from _shared import _client, _models_base_url, _read_tile OUT_DIR = Path("/mnt/test_refs") @@ -18,50 +16,56 @@ "tile_size": 1024, "level": 0, }, + { + "label": "colon", + "slide_path": "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", + "model_id": "prostate-classifier-1", + "type": "binary", + "tile_size": 512, + "level": 0, + }, ] def generate_references() -> None: - base_url = _models_base_url() OUT_DIR.mkdir(parents=True, exist_ok=True) - print(f"== Generating references to {OUT_DIR} via {base_url} ==") - - for case in CASES: - label, model_id, mtype = case["label"], case["model_id"], case["type"] - print(f"\n[{label}] {model_id} ({mtype})") - - try: - tile = _read_tile_from_slide( - case["slide_path"], case["tile_size"], case["level"] - ) - except Exception as e: - print(f" -> Failed to read tile: {e}") - continue + print(f"== Generating references to {OUT_DIR} via {_models_base_url()} ==") - url = f"{base_url}/{model_id}/" - data = lz4.frame.compress(tile.tobytes()) + with _client(timeout_s=1200) as client: + for case in CASES: + label, model_id, mtype = case["label"], case["model_id"], case["type"] + print(f"\n[{label}] {model_id} ({mtype})") - try: - response = httpx.post(url, content=data, timeout=600) - response.raise_for_status() + try: + tile = _read_tile(case["slide_path"], case["tile_size"], case["level"]) + except Exception as e: + print(f" -> Failed to read tile: {e}") + continue - if mtype == "binary": - out_file = OUT_DIR / f"{label}_{model_id}_expected.json" - with out_file.open("w") as f: - json.dump({"expected_score": float(response.json())}, f, indent=2) - print(f" -> Saved {out_file}") + try: + if mtype == "binary": + score = float( + client.models.classify_image( + model=model_id, image=tile, timeout=600 + ) + ) + out_file = OUT_DIR / f"{label}_{model_id}_expected.json" + with out_file.open("w") as f: + json.dump({"expected_score": score}, f, indent=2) + print(f" -> Saved {out_file}") - elif mtype == "semantic": - h, w = tile.shape[:2] - arr = np.frombuffer( - lz4.frame.decompress(response.content), dtype=np.float16 - ).reshape(-1, h, w) - out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" - np.save(out_file, arr) - print(f" -> Saved {out_file} shape={arr.shape}") + elif mtype == "semantic": + arr = np.asarray( + client.models.segment_image( + model=model_id, image=tile, timeout=1200 + ) + ) + out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" + np.save(out_file, arr) + print(f" -> Saved {out_file} shape={arr.shape}") - except Exception as e: - print(f" -> ERROR: {e}") + except Exception as e: + print(f" -> ERROR: {e}") if __name__ == "__main__": From 50eb79bb763ab5f75afc99412de2e266f0f11f63 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 26 Apr 2026 18:54:13 +0200 Subject: [PATCH 09/35] better print --- builders/test_runner.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/builders/test_runner.py b/builders/test_runner.py index 97f125c..b818eb4 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -18,7 +18,7 @@ def __init__(self) -> None: ) @fastapi.post("/") - def run(self) -> dict: + def run(self) -> str: result = subprocess.run( [ sys.executable, @@ -28,14 +28,15 @@ def run(self) -> dict: "-v", "--tb=short", "--no-header", + "-s", + "--color=no", ], capture_output=True, text=True, ) - return { - "passed": result.returncode == 0, - "output": result.stdout, - } + return result.stdout + ( + f"\nSTDERR:\n{result.stderr}" if result.returncode != 0 else "" + ) app = TestRunner.bind() From e855481492699133ad1c04f2804d2b2944b5d709 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Tue, 28 Apr 2026 21:21:31 +0200 Subject: [PATCH 10/35] better print --- builders/test_runner.py | 25 +++++++++++++++++--- tests/model_snapshots/_shared.py | 20 ++++------------ tests/model_snapshots/generate_references.py | 12 +++++++--- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/builders/test_runner.py b/builders/test_runner.py index b818eb4..96f947c 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -1,6 +1,7 @@ import subprocess import sys +import requests from fastapi import FastAPI from ray import serve @@ -17,8 +18,24 @@ def __init__(self) -> None: check=True, ) + def _model_statuses(self) -> str: + try: + resp = requests.get("http://localhost:52365/api/serve/applications/") + resp.raise_for_status() + data = resp.json() + lines = ["Model statuses:"] + for app_name, app_info in data.get("applications", {}).items(): + for dep_name, dep_info in app_info.get("deployments", {}).items(): + status = dep_info.get("status", "UNKNOWN") + lines.append(f" {app_name} ({dep_name}): {status}") + return "\n".join(lines) + except Exception as e: + return f"Could not fetch model statuses: {e}" + @fastapi.post("/") def run(self) -> str: + statuses = self._model_statuses() + result = subprocess.run( [ sys.executable, @@ -34,9 +51,11 @@ def run(self) -> str: capture_output=True, text=True, ) - return result.stdout + ( - f"\nSTDERR:\n{result.stderr}" if result.returncode != 0 else "" - ) + + output = statuses + "\n\n" + result.stdout + if result.returncode != 0: + output += f"\nSTDERR:\n{result.stderr}" + return output app = TestRunner.bind() diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 187aaa6..8f560fd 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -7,6 +7,8 @@ import numpy as np import pytest from numpy.typing import NDArray +from rationai import Client +from ratiopath.openslide import OpenSlide def _models_base_url() -> str: @@ -16,20 +18,7 @@ def _models_base_url() -> str: ) -def _client(timeout_s: float = 1200.0): - try: - from rationai import Client - except ImportError: - pytest.skip("Python package `rationai` is not installed.") - return Client(models_base_url=_models_base_url(), timeout=timeout_s) - - def _read_tile(slide_path: str, tile_size: int, level: int) -> NDArray[np.uint8]: - try: - from ratiopath.openslide import OpenSlide - except ImportError: - pytest.skip("Python package `ratiopath` is not installed.") - with OpenSlide(slide_path) as slide: w, h = slide.level_dimensions[level] x = max(0, (w - tile_size) // 2) @@ -37,7 +26,6 @@ def _read_tile(slide_path: str, tile_size: int, level: int) -> NDArray[np.uint8] tile = slide.read_region_relative( (x, y), level, (tile_size, tile_size) ).convert("RGB") - return np.asarray(tile, dtype=np.uint8) @@ -52,7 +40,7 @@ def run_binary_classifier_case( ) -> None: tile = _read_tile(slide_path, tile_size, level) - with _client(timeout_s) as client: + with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: t0 = perf_counter() actual_score = float(client.models.classify_image(model=model_id, image=tile)) elapsed = perf_counter() - t0 @@ -83,7 +71,7 @@ def run_semantic_segmentation_case( tile = _read_tile(slide_path, tile_size, level) expected = np.load(expected_array_path) - with _client(timeout_s) as client: + with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: t0 = perf_counter() actual = np.asarray(client.models.segment_image(model=model_id, image=tile)) elapsed = perf_counter() - t0 diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index d85cec5..35b3840 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -1,11 +1,17 @@ import json +import os from pathlib import Path import numpy as np -from _shared import _client, _models_base_url, _read_tile +from _shared import _read_tile +from rationai import Client OUT_DIR = Path("/mnt/test_refs") +MODELS_BASE_URL = os.environ.get( + "MODEL_SERVICE_MODELS_BASE_URL", + "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", +) CASES = [ { @@ -29,9 +35,9 @@ def generate_references() -> None: OUT_DIR.mkdir(parents=True, exist_ok=True) - print(f"== Generating references to {OUT_DIR} via {_models_base_url()} ==") + print(f"== Generating references to {OUT_DIR} via {MODELS_BASE_URL} ==") - with _client(timeout_s=1200) as client: + with Client(models_base_url=MODELS_BASE_URL, timeout=1200) as client: for case in CASES: label, model_id, mtype = case["label"], case["model_id"], case["type"] print(f"\n[{label}] {model_id} ({mtype})") From 5e7654d234765421d4ad423ad2811e5666132fd6 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Tue, 28 Apr 2026 21:49:46 +0200 Subject: [PATCH 11/35] fix port --- builders/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builders/test_runner.py b/builders/test_runner.py index 96f947c..5d6ef2b 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -20,7 +20,7 @@ def __init__(self) -> None: def _model_statuses(self) -> str: try: - resp = requests.get("http://localhost:52365/api/serve/applications/") + resp = requests.get("http://localhost:8265/api/serve/applications/") resp.raise_for_status() data = resp.json() lines = ["Model statuses:"] From e7a7987684702469dc2087d9cd3624b1f644006f Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Mon, 4 May 2026 19:58:17 +0200 Subject: [PATCH 12/35] fix: change middle to x,y points --- tests/model_snapshots/_shared.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 8f560fd..a0581a7 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -18,13 +18,15 @@ def _models_base_url() -> str: ) -def _read_tile(slide_path: str, tile_size: int, level: int) -> NDArray[np.uint8]: +def _read_tile_at( + slide_path: str, x: int, y: int, tile_size: int, level: int +) -> NDArray[np.uint8]: with OpenSlide(slide_path) as slide: - w, h = slide.level_dimensions[level] - x = max(0, (w - tile_size) // 2) - y = max(0, (h - tile_size) // 2) + downsample = slide.level_downsamples[level] + x_rel = int(x / downsample) + y_rel = int(y / downsample) tile = slide.read_region_relative( - (x, y), level, (tile_size, tile_size) + (x_rel, y_rel), level, (tile_size, tile_size) ).convert("RGB") return np.asarray(tile, dtype=np.uint8) @@ -32,13 +34,15 @@ def _read_tile(slide_path: str, tile_size: int, level: int) -> NDArray[np.uint8] def run_binary_classifier_case( model_id: str, slide_path: str, + x: int, + y: int, expected_score: float, tile_size: int = 512, level: int = 0, timeout_s: float = 600.0, tolerance: float = 0.00001, ) -> None: - tile = _read_tile(slide_path, tile_size, level) + tile = _read_tile_at(slide_path, x, y, tile_size, level) with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: t0 = perf_counter() @@ -57,6 +61,8 @@ def run_binary_classifier_case( def run_semantic_segmentation_case( model_id: str, slide_path: str, + x: int, + y: int, expected_array_path: Path | str, tile_size: int = 1024, level: int = 0, @@ -68,7 +74,7 @@ def run_semantic_segmentation_case( if not expected_array_path.exists(): pytest.fail(f"Reference file does not exist: {expected_array_path}") - tile = _read_tile(slide_path, tile_size, level) + tile = _read_tile_at(slide_path, x, y, tile_size, level) expected = np.load(expected_array_path) with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: From c055043a1144eabbe5336489c7e54bbf3c773704 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Mon, 4 May 2026 19:58:36 +0200 Subject: [PATCH 13/35] generate new refs --- tests/model_snapshots/generate_references.py | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 35b3840..5b6822b 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -3,7 +3,7 @@ from pathlib import Path import numpy as np -from _shared import _read_tile +from _shared import _read_tile_at from rationai import Client @@ -15,20 +15,14 @@ CASES = [ { - "label": "breast", - "slide_path": "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", - "model_id": "episeg-1", - "type": "semantic", - "tile_size": 1024, - "level": 0, - }, - { - "label": "colon", - "slide_path": "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", + "label": "prostate_positive", + "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs", "model_id": "prostate-classifier-1", "type": "binary", "tile_size": 512, "level": 0, + "x": 43390, + "y": 45865, }, ] @@ -43,7 +37,13 @@ def generate_references() -> None: print(f"\n[{label}] {model_id} ({mtype})") try: - tile = _read_tile(case["slide_path"], case["tile_size"], case["level"]) + tile = _read_tile_at( + case["slide_path"], + case["x"], + case["y"], + case["tile_size"], + case["level"], + ) except Exception as e: print(f" -> Failed to read tile: {e}") continue From fa43f530ecddc05ed67e0154104ce42cb148f721 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Mon, 4 May 2026 19:58:56 +0200 Subject: [PATCH 14/35] test: new test --- .../test_binary_classifier_model_snapshot.py | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index 87709a7..7060927 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -6,29 +6,40 @@ @pytest.mark.parametrize( - "label, slide_path", + "label, slide_path, x, y", [ ( - "colon", - "/mnt/data/MOU/colon/comparison_of_scanners/FLASH2021_5638-02-T.mrxs", + "prostate_positive", + "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs", + 43390, + 45865, ), ], ) -def test_binary_classifier(label: str, slide_path: str) -> None: +def test_prostate_classifier_positive( + label: str, slide_path: str, x: int, y: int +) -> None: model_id = "prostate-classifier-1" json_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.json") - if json_path.exists(): - with json_path.open() as f: - expected_score = json.load(f)["expected_score"] - else: + if not json_path.exists(): pytest.skip( f"Reference file {json_path} missing. Run generate_references.py first." ) + with json_path.open() as f: + expected_score = json.load(f)["expected_score"] + + assert expected_score >= 0.5, ( + f"Reference score {expected_score:.4f} is below positive threshold 0.5 — " + "was the reference generated on the correct tile?" + ) + run_binary_classifier_case( model_id=model_id, slide_path=slide_path, + x=x, + y=y, expected_score=expected_score, tile_size=512, level=0, From 968af4f5376758de6b7d32aa7f89ca671424a8f9 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Mon, 4 May 2026 21:45:40 +0200 Subject: [PATCH 15/35] test fixes Co-authored-by: Copilot --- builders/test_runner.py | 18 +--- helm/rayservice/applications/test-runner.yaml | 6 +- tests/README-model-snapshots.md | 85 ------------------- tests/model_snapshots/_shared.py | 46 ++++++++++ tests/model_snapshots/generate_references.py | 38 ++++++++- .../test_binary_classifier_model_snapshot.py | 49 +++++++++-- 6 files changed, 128 insertions(+), 114 deletions(-) delete mode 100644 tests/README-model-snapshots.md diff --git a/builders/test_runner.py b/builders/test_runner.py index 5d6ef2b..01d7bdb 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -1,7 +1,6 @@ import subprocess import sys -import requests from fastapi import FastAPI from ray import serve @@ -18,23 +17,8 @@ def __init__(self) -> None: check=True, ) - def _model_statuses(self) -> str: - try: - resp = requests.get("http://localhost:8265/api/serve/applications/") - resp.raise_for_status() - data = resp.json() - lines = ["Model statuses:"] - for app_name, app_info in data.get("applications", {}).items(): - for dep_name, dep_info in app_info.get("deployments", {}).items(): - status = dep_info.get("status", "UNKNOWN") - lines.append(f" {app_name} ({dep_name}): {status}") - return "\n".join(lines) - except Exception as e: - return f"Could not fetch model statuses: {e}" - @fastapi.post("/") def run(self) -> str: - statuses = self._model_statuses() result = subprocess.run( [ @@ -52,7 +36,7 @@ def run(self) -> str: text=True, ) - output = statuses + "\n\n" + result.stdout + output = result.stdout if result.returncode != 0: output += f"\nSTDERR:\n{result.stderr}" return output diff --git a/helm/rayservice/applications/test-runner.yaml b/helm/rayservice/applications/test-runner.yaml index 45f5b68..3aad245 100644 --- a/helm/rayservice/applications/test-runner.yaml +++ b/helm/rayservice/applications/test-runner.yaml @@ -2,7 +2,7 @@ import_path: builders.test_runner:app route_prefix: /run-tests runtime_env: - working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/tests.zip?v5 + working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/tests.zip?v10 pip: - git+https://github.com/RationAI/rationai-sdk-python.git deployments: @@ -12,5 +12,5 @@ max_replicas: 1 target_ongoing_requests: 1 ray_actor_options: - num_cpus: 4 - memory: 8589934592 + num_cpus: 1 + memory: 2147483648 diff --git a/tests/README-model-snapshots.md b/tests/README-model-snapshots.md deleted file mode 100644 index 3d2fb7b..0000000 --- a/tests/README-model-snapshots.md +++ /dev/null @@ -1,85 +0,0 @@ -# Model snapshot tests - -This repository contains end-to-end snapshot tests in `tests/model_snapshots/`. - -Per-model test files: - -- `tests/model_snapshots/test_binary_classifier_model_snapshot.py` -- `tests/model_snapshots/test_semantic_segmentation_model_snapshot.py` - -Shared files: - -- `tests/model_snapshots/_shared.py` -- `tests/model_snapshots/run_all_model_snapshot_tests.py` - -These tests are meant as post-deploy use-case checks (not only liveness checks): - -- they execute a real request path through Ray Serve deployment -- they verify request processing success (timeouts/errors fail the test) -- they verify result correctness for each deployment (`binary_classifier`, `semantic_segmentation`) -- they touch real slide paths, helping catch mount/filesystem issues - -Each test calls its deployment-specific endpoint: - -- binary classifier: SDK call `client.models.classify_image("prostate-classifier-1", tile)` -- semantic segmentation: SDK call `client.models.segment_image("episeg-1", tile)` - -Input tile is read directly from a real WSI using `ratiopath.openslide.OpenSlide`. - -## Adding a new model test - -Přidání nového modelu do testů je nyní velmi jednoduché: - -1. Vytvořte nový soubor v `tests/model_snapshots/`, např. `test_novy_model_snapshot.py`. -2. Importujte a zavolejte příslušnou case funkci z `_shared.py` a předejte jí konfiguraci napřímo parametrem: - -```python -from pathlib import Path -from tests.model_snapshots._shared import run_binary_classifier_case - -def test_novy_model_snapshot() -> None: - # Parametry si rovnou zadefinujte v testovacím souboru - run_binary_classifier_case( - model_id="my-new-endpoint-id", - slide_path="/mnt/bioptic_tree/.../slide.mrxs", - expected_score=0.987, - tile_size=512, - level=0, - ) -``` - -Tím se stane automaticky součástí sady `pytest tests/model_snapshots`. - -## Global environment variables - -Common (pro celý cluster a všechny testy): - -- `MODEL_SERVICE_MODELS_BASE_URL` (default: `http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000`) - -Očekávané skóre/pole a cesty k datasetům pro stávající modely (`episeg-1` a `prostate-classifier-1`) se tahají z těchto proměnných ve stávajících testovacích souborech, pokud chcete zachovat původní CI chování (případně se dají časem snadno zahardkódit do testovacího souboru): - -- `MODEL_TEST_BINARY_EXPECTED_SCORE` -- `MODEL_TEST_SEMANTIC_EXPECTED_ARRAY_PATH` - -## Example (PowerShell) - -```powershell -$env:MODEL_TEST_BINARY_EXPECTED_SCORE = "0.9732" -$env:MODEL_TEST_SEMANTIC_EXPECTED_ARRAY_PATH = "/mnt/path/to/reference/semantic_expected.npy" - -# Models base URL is resolved directly from SDK fallback inside kubernetes: -# http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000 - -python tests/model_snapshots/run_all_model_snapshot_tests.py - -# Alternative: -python -m pytest tests/model_snapshots -q -``` - -## SDK dependency - -Install SDK package so that `import rationai` works in tests, e.g.: - -```powershell -python -m pip install git+https://github.com/RationAI/rationai-sdk-python.git -``` diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index a0581a7..260c42e 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -41,6 +41,8 @@ def run_binary_classifier_case( level: int = 0, timeout_s: float = 600.0, tolerance: float = 0.00001, + expected_is_positive: bool | None = None, + threshold: float = 0.5, ) -> None: tile = _read_tile_at(slide_path, x, y, tile_size, level) @@ -53,6 +55,14 @@ def run_binary_classifier_case( f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | score={actual_score:.6f} | expected={expected_score:.6f}" ) + if expected_is_positive is not None: + actual_is_positive = actual_score >= threshold + assert actual_is_positive == expected_is_positive, ( + "Binary class mismatch: " + f"expected_is_positive={expected_is_positive}, " + f"actual_score={actual_score:.6f}, threshold={threshold:.3f}" + ) + assert abs(actual_score - expected_score) <= tolerance, ( f"Binary score mismatch: expected={expected_score}, actual={actual_score}, tolerance={tolerance}" ) @@ -94,3 +104,39 @@ def run_semantic_segmentation_case( pytest.fail( f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={max_diff})" ) + + +def run_embed_case( + model_id: str, + slide_path: str, + expected_array_path: Path | str, + tile_size: int = 224, + level: int = 0, + timeout_s: float = 1200.0, + atol: float = 0.0, + rtol: float = 0.0, +) -> None: + expected_array_path = Path(expected_array_path) + if not expected_array_path.exists(): + pytest.fail(f"Reference file does not exist: {expected_array_path}") + + tile = _read_tile(slide_path, tile_size, level) + expected = np.load(expected_array_path) + + with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: + t0 = perf_counter() + actual = np.asarray(client.models.embed_image(model=model_id, image=tile)) + elapsed = perf_counter() - t0 + + max_diff = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() + print( + f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | shape={actual.shape} | max_diff={max_diff:.6f}" + ) + + if actual.shape != expected.shape: + pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") + + if not np.allclose(actual, expected, rtol=rtol, atol=atol): + pytest.fail( + f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={max_diff})" + ) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 5b6822b..c7251a8 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -12,6 +12,7 @@ "MODEL_SERVICE_MODELS_BASE_URL", "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) +BINARY_POSITIVE_THRESHOLD = 0.5 CASES = [ { @@ -24,6 +25,16 @@ "x": 43390, "y": 45865, }, + { + "label": "prostate_negative", + "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", + "model_id": "prostate-classifier-1", + "type": "binary", + "tile_size": 512, + "level": 0, + "x": 34467, + "y": 104964, + }, ] @@ -57,7 +68,23 @@ def generate_references() -> None: ) out_file = OUT_DIR / f"{label}_{model_id}_expected.json" with out_file.open("w") as f: - json.dump({"expected_score": score}, f, indent=2) + json.dump( + { + "label": label, + "model_id": model_id, + "slide_path": case["slide_path"], + "x": case["x"], + "y": case["y"], + "tile_size": case["tile_size"], + "level": case["level"], + "threshold": BINARY_POSITIVE_THRESHOLD, + "expected_is_positive": score + >= BINARY_POSITIVE_THRESHOLD, + "expected_score": score, + }, + f, + indent=2, + ) print(f" -> Saved {out_file}") elif mtype == "semantic": @@ -70,6 +97,15 @@ def generate_references() -> None: np.save(out_file, arr) print(f" -> Saved {out_file} shape={arr.shape}") + elif mtype == "embed": + arr = np.asarray( + client.models.embed_image( + model=model_id, image=tile, timeout=1200 + ) + ) + out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" + np.save(out_file, arr) + print(f" -> Saved {out_file} shape={arr.shape}") except Exception as e: print(f" -> ERROR: {e}") diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index 7060927..f46b3f6 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -5,19 +5,30 @@ from _shared import run_binary_classifier_case +BINARY_POSITIVE_THRESHOLD = 0.5 + + @pytest.mark.parametrize( - "label, slide_path, x, y", + "label, slide_path, x, y, is_positive", [ ( "prostate_positive", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs", 43390, 45865, + True, + ), + ( + "prostate_negative", + "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", + 34467, + 104964, + False, ), ], ) -def test_prostate_classifier_positive( - label: str, slide_path: str, x: int, y: int +def test_prostate_classifier_snapshot( + label: str, slide_path: str, x: int, y: int, is_positive: bool ) -> None: model_id = "prostate-classifier-1" json_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.json") @@ -28,12 +39,32 @@ def test_prostate_classifier_positive( ) with json_path.open() as f: - expected_score = json.load(f)["expected_score"] + reference = json.load(f) - assert expected_score >= 0.5, ( - f"Reference score {expected_score:.4f} is below positive threshold 0.5 — " - "was the reference generated on the correct tile?" - ) + assert reference.get("label") == label + assert reference.get("model_id") == model_id + assert reference.get("slide_path") == slide_path + assert reference.get("x") == x + assert reference.get("y") == y + assert reference.get("tile_size") == 512 + assert reference.get("level") == 0 + + expected_score = reference["expected_score"] + threshold = reference.get("threshold", BINARY_POSITIVE_THRESHOLD) + expected_is_positive = reference.get("expected_is_positive") + assert expected_is_positive is not None + assert expected_is_positive == is_positive + + if is_positive: + assert expected_score >= threshold, ( + f"Reference score {expected_score:.4f} is below positive threshold {threshold:.3f} — " + "was the reference generated on the correct tile?" + ) + else: + assert expected_score < threshold, ( + f"Reference score {expected_score:.4f} is above negative threshold {threshold:.3f} — " + "was the reference generated on the correct tile?" + ) run_binary_classifier_case( model_id=model_id, @@ -43,4 +74,6 @@ def test_prostate_classifier_positive( expected_score=expected_score, tile_size=512, level=0, + expected_is_positive=expected_is_positive, + threshold=threshold, ) From d78cddb409f25ebfbd35cacfeb5692426ce8538f Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Mon, 4 May 2026 21:54:28 +0200 Subject: [PATCH 16/35] fix: different coordinates Co-authored-by: Copilot --- tests/model_snapshots/generate_references.py | 4 ++-- .../model_snapshots/test_binary_classifier_model_snapshot.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index c7251a8..01aeda1 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -32,8 +32,8 @@ "type": "binary", "tile_size": 512, "level": 0, - "x": 34467, - "y": 104964, + "x": 31017, + "y": 113220, }, ] diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index f46b3f6..be433d1 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -21,8 +21,8 @@ ( "prostate_negative", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", - 34467, - 104964, + 31017, + 113220, False, ), ], From 324a71319758154aa6260f197a359d6d7702ae4f Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Mon, 4 May 2026 22:07:00 +0200 Subject: [PATCH 17/35] fix coors Co-authored-by: Copilot --- tests/model_snapshots/generate_references.py | 4 ++-- .../model_snapshots/test_binary_classifier_model_snapshot.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 01aeda1..18dedfb 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -32,8 +32,8 @@ "type": "binary", "tile_size": 512, "level": 0, - "x": 31017, - "y": 113220, + "x": 32950, + "y": 108990, }, ] diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index be433d1..e2c13e9 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -21,8 +21,8 @@ ( "prostate_negative", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", - 31017, - 113220, + 32950, + 108990, False, ), ], From 09138635d62531f7b0c6e07c9ccc5ee5eaf3befc Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Tue, 5 May 2026 20:51:40 +0200 Subject: [PATCH 18/35] feat: add virchow2 test Co-authored-by: Copilot --- tests/model_snapshots/_shared.py | 4 ++- tests/model_snapshots/generate_references.py | 36 ++++++++----------- .../test_virchow2_model_snapshot.py | 30 ++++++++++++++++ 3 files changed, 47 insertions(+), 23 deletions(-) create mode 100644 tests/model_snapshots/test_virchow2_model_snapshot.py diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 260c42e..285adcb 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -109,6 +109,8 @@ def run_semantic_segmentation_case( def run_embed_case( model_id: str, slide_path: str, + x: int, + y: int, expected_array_path: Path | str, tile_size: int = 224, level: int = 0, @@ -120,7 +122,7 @@ def run_embed_case( if not expected_array_path.exists(): pytest.fail(f"Reference file does not exist: {expected_array_path}") - tile = _read_tile(slide_path, tile_size, level) + tile = _read_tile_at(slide_path, x, y, tile_size, level) expected = np.load(expected_array_path) with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 18dedfb..3bba3e0 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -14,28 +14,20 @@ ) BINARY_POSITIVE_THRESHOLD = 0.5 -CASES = [ - { - "label": "prostate_positive", - "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs", - "model_id": "prostate-classifier-1", - "type": "binary", - "tile_size": 512, - "level": 0, - "x": 43390, - "y": 45865, - }, - { - "label": "prostate_negative", - "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", - "model_id": "prostate-classifier-1", - "type": "binary", - "tile_size": 512, - "level": 0, - "x": 32950, - "y": 108990, - }, -] +# Keep only one active case here. Store other candidate slides in new_images.txt +# and swap them in when you want to regenerate a different reference. +ACTIVE_CASE = { + "label": "virchow2", + "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", + "model_id": "virchow2", + "type": "embed", + "tile_size": 224, + "level": 0, + "x": 40000, + "y": 70000, +} + +CASES = [ACTIVE_CASE] def generate_references() -> None: diff --git a/tests/model_snapshots/test_virchow2_model_snapshot.py b/tests/model_snapshots/test_virchow2_model_snapshot.py new file mode 100644 index 0000000..dd1ba92 --- /dev/null +++ b/tests/model_snapshots/test_virchow2_model_snapshot.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import pytest +from _shared import run_embed_case + + +@pytest.mark.parametrize( + "label, slide_path, x, y", + [ + ( + "prostate", + "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", + 40000, + 70000, + ), + ], +) +def test_virchow2(label: str, slide_path: str, x: int, y: int) -> None: + model_id = "virchow2" + expected_array_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.npy") + + run_embed_case( + model_id=model_id, + slide_path=slide_path, + x=x, + y=y, + expected_array_path=expected_array_path, + tile_size=224, + level=0, + ) From e72e9001df1240279d3a916c65aa1b471ec5624b Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Tue, 5 May 2026 20:54:00 +0200 Subject: [PATCH 19/35] fix: name --- tests/model_snapshots/test_virchow2_model_snapshot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/model_snapshots/test_virchow2_model_snapshot.py b/tests/model_snapshots/test_virchow2_model_snapshot.py index dd1ba92..38f6c1d 100644 --- a/tests/model_snapshots/test_virchow2_model_snapshot.py +++ b/tests/model_snapshots/test_virchow2_model_snapshot.py @@ -8,7 +8,7 @@ "label, slide_path, x, y", [ ( - "prostate", + "virchow2", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", 40000, 70000, From 4d13c8384f0bf9f2e837d97788dfbfdaa52b55b2 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Tue, 5 May 2026 22:12:52 +0200 Subject: [PATCH 20/35] fix: tolerance --- tests/model_snapshots/_shared.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 285adcb..6c36ff7 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -40,7 +40,6 @@ def run_binary_classifier_case( tile_size: int = 512, level: int = 0, timeout_s: float = 600.0, - tolerance: float = 0.00001, expected_is_positive: bool | None = None, threshold: float = 0.5, ) -> None: @@ -58,15 +57,10 @@ def run_binary_classifier_case( if expected_is_positive is not None: actual_is_positive = actual_score >= threshold assert actual_is_positive == expected_is_positive, ( - "Binary class mismatch: " - f"expected_is_positive={expected_is_positive}, " + f"Binary class mismatch: expected_is_positive={expected_is_positive}, " f"actual_score={actual_score:.6f}, threshold={threshold:.3f}" ) - assert abs(actual_score - expected_score) <= tolerance, ( - f"Binary score mismatch: expected={expected_score}, actual={actual_score}, tolerance={tolerance}" - ) - def run_semantic_segmentation_case( model_id: str, @@ -115,30 +109,35 @@ def run_embed_case( tile_size: int = 224, level: int = 0, timeout_s: float = 1200.0, - atol: float = 0.0, - rtol: float = 0.0, + min_cosine_similarity: float = 0.999, ) -> None: expected_array_path = Path(expected_array_path) if not expected_array_path.exists(): pytest.fail(f"Reference file does not exist: {expected_array_path}") tile = _read_tile_at(slide_path, x, y, tile_size, level) - expected = np.load(expected_array_path) + expected = np.load(expected_array_path).flatten().astype(np.float32) with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: t0 = perf_counter() - actual = np.asarray(client.models.embed_image(model=model_id, image=tile)) + actual = ( + np.asarray(client.models.embed_image(model=model_id, image=tile)) + .flatten() + .astype(np.float32) + ) elapsed = perf_counter() - t0 - max_diff = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() + similarity = float( + np.dot(actual, expected) / (np.linalg.norm(actual) * np.linalg.norm(expected)) + ) print( - f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | shape={actual.shape} | max_diff={max_diff:.6f}" + f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | shape={actual.shape} | cosine_similarity={similarity:.6f}" ) if actual.shape != expected.shape: pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") - if not np.allclose(actual, expected, rtol=rtol, atol=atol): + if similarity < min_cosine_similarity: pytest.fail( - f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={max_diff})" + f"Embedding similarity too low: {similarity:.6f} < {min_cosine_similarity}" ) From 7f2924a5a54e9be346316f18aa38009566bfd757 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 6 May 2026 13:57:46 +0200 Subject: [PATCH 21/35] test: semantic test Co-authored-by: Copilot --- .../test_semantic_segmentation_model_snapshot.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py index 294f682..54e9c56 100644 --- a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py +++ b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py @@ -5,21 +5,25 @@ @pytest.mark.parametrize( - "label, slide_path", + "label, slide_path, x, y", [ ( - "breast", - "/mnt/bioptic_tree/2019/08/728/2019_08728-01-T/2019_08728-01-T.mrxs", + "colorectum_kos04", + "/mnt/data/MOU/colorectum/tissue_microarray/he/KOS04.mrxs", + 46000, + 82400, ), ], ) -def test_semantic_episeg(label: str, slide_path: str) -> None: +def test_semantic_episeg(label: str, slide_path: str, x: int, y: int) -> None: model_id = "episeg-1" expected_array_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.npy") run_semantic_segmentation_case( model_id=model_id, slide_path=slide_path, + x=x, + y=y, expected_array_path=expected_array_path, tile_size=1024, level=0, From 1995689be56b638e5d926b84471a90164c03b1fa Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 6 May 2026 16:44:37 +0200 Subject: [PATCH 22/35] test: print Co-authored-by: Copilot --- tests/model_snapshots/_shared.py | 82 ++++++++++++++++--- .../test_binary_classifier_model_snapshot.py | 1 + ...st_semantic_segmentation_model_snapshot.py | 3 + .../test_virchow2_model_snapshot.py | 1 + 4 files changed, 76 insertions(+), 11 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 6c36ff7..c3e751e 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -42,6 +42,7 @@ def run_binary_classifier_case( timeout_s: float = 600.0, expected_is_positive: bool | None = None, threshold: float = 0.5, + case_name: str | None = None, ) -> None: tile = _read_tile_at(slide_path, x, y, tile_size, level) @@ -50,9 +51,8 @@ def run_binary_classifier_case( actual_score = float(client.models.classify_image(model=model_id, image=tile)) elapsed = perf_counter() - t0 - print( - f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | score={actual_score:.6f} | expected={expected_score:.6f}" - ) + delta = actual_score - expected_score + name = case_name or "case" if expected_is_positive is not None: actual_is_positive = actual_score >= threshold @@ -61,6 +61,13 @@ def run_binary_classifier_case( f"actual_score={actual_score:.6f}, threshold={threshold:.3f}" ) + print(f"\n/{model_id}") + print("passed") + print( + f"{name} stats: score={actual_score:.6f} expected={expected_score:.6f} " + f"delta={delta:+.6f} threshold={threshold:.3f}" + ) + def run_semantic_segmentation_case( model_id: str, @@ -71,8 +78,12 @@ def run_semantic_segmentation_case( tile_size: int = 1024, level: int = 0, timeout_s: float = 1200.0, - atol: float = 0.0, - rtol: float = 0.0, + atol: float = 1e-6, + rtol: float = 1e-5, + epithelium_threshold: float | None = None, + min_epithelium_fraction: float | None = None, + epithelium_channel: int | None = None, + case_name: str | None = None, ) -> None: expected_array_path = Path(expected_array_path) if not expected_array_path.exists(): @@ -87,9 +98,20 @@ def run_semantic_segmentation_case( elapsed = perf_counter() - t0 max_diff = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() - print( - f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | shape={actual.shape} | max_diff={max_diff:.6f}" - ) + + if actual.ndim == 4: + stats_slice = actual[0, 0] + elif actual.ndim == 3: + stats_slice = actual[0] + else: + stats_slice = actual.squeeze() + + stats_slice = stats_slice.astype(np.float32) + min_val = float(stats_slice.min()) + mean_val = float(stats_slice.mean()) + max_val = float(stats_slice.max()) + frac_05 = float((stats_slice >= 0.5).mean()) + name = case_name or "case" if actual.shape != expected.shape: pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") @@ -99,6 +121,36 @@ def run_semantic_segmentation_case( f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={max_diff})" ) + if epithelium_threshold is not None and min_epithelium_fraction is not None: + if actual.ndim == 4: + channel = 0 if epithelium_channel is None else epithelium_channel + epithelium = actual[0, channel] + elif actual.ndim == 3: + channel = 0 if epithelium_channel is None else epithelium_channel + epithelium = actual[channel] + else: + epithelium = actual.squeeze() + + if epithelium.ndim != 2: + pytest.fail( + "Cannot determine epithelium channel; provide epithelium_channel explicitly." + ) + + fraction = float((epithelium >= epithelium_threshold).mean()) + if fraction < min_epithelium_fraction: + pytest.fail( + "Epithelium coverage too low: " + f"fraction={fraction:.6f} < min_fraction={min_epithelium_fraction:.6f}" + ) + + print(f"\n/{model_id}") + print("passed") + print( + f"{name} stats: shape={actual.shape} max_diff={max_diff:.6f} " + f"min={min_val:.6f} mean={mean_val:.6f} max={max_val:.6f} " + f"frac>=0.5={frac_05:.6f}" + ) + def run_embed_case( model_id: str, @@ -110,6 +162,7 @@ def run_embed_case( level: int = 0, timeout_s: float = 1200.0, min_cosine_similarity: float = 0.999, + case_name: str | None = None, ) -> None: expected_array_path = Path(expected_array_path) if not expected_array_path.exists(): @@ -130,9 +183,9 @@ def run_embed_case( similarity = float( np.dot(actual, expected) / (np.linalg.norm(actual) * np.linalg.norm(expected)) ) - print( - f"\n model={model_id} | tile={tile_size}px | time={elapsed:.2f}s | shape={actual.shape} | cosine_similarity={similarity:.6f}" - ) + actual_norm = float(np.linalg.norm(actual)) + expected_norm = float(np.linalg.norm(expected)) + name = case_name or "case" if actual.shape != expected.shape: pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") @@ -141,3 +194,10 @@ def run_embed_case( pytest.fail( f"Embedding similarity too low: {similarity:.6f} < {min_cosine_similarity}" ) + + print(f"\n/{model_id}") + print("passed") + print( + f"{name} stats: shape={actual.shape} cosine_similarity={similarity:.6f} " + f"norm_actual={actual_norm:.6f} norm_expected={expected_norm:.6f}" + ) diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index e2c13e9..01e7162 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -76,4 +76,5 @@ def test_prostate_classifier_snapshot( level=0, expected_is_positive=expected_is_positive, threshold=threshold, + case_name=label, ) diff --git a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py index 54e9c56..865f50f 100644 --- a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py +++ b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py @@ -27,4 +27,7 @@ def test_semantic_episeg(label: str, slide_path: str, x: int, y: int) -> None: expected_array_path=expected_array_path, tile_size=1024, level=0, + epithelium_threshold=0.5, + min_epithelium_fraction=0.01, + case_name=label, ) diff --git a/tests/model_snapshots/test_virchow2_model_snapshot.py b/tests/model_snapshots/test_virchow2_model_snapshot.py index 38f6c1d..e94d097 100644 --- a/tests/model_snapshots/test_virchow2_model_snapshot.py +++ b/tests/model_snapshots/test_virchow2_model_snapshot.py @@ -27,4 +27,5 @@ def test_virchow2(label: str, slide_path: str, x: int, y: int) -> None: expected_array_path=expected_array_path, tile_size=224, level=0, + case_name=label, ) From be23dd5c92f72b33ffd14a12cb000ae9d7fe0d1f Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 6 May 2026 16:51:16 +0200 Subject: [PATCH 23/35] print --- builders/test_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builders/test_runner.py b/builders/test_runner.py index 01d7bdb..3baf09f 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -1,7 +1,7 @@ import subprocess import sys -from fastapi import FastAPI +from fastapi import FastAPI, Response from ray import serve @@ -18,7 +18,7 @@ def __init__(self) -> None: ) @fastapi.post("/") - def run(self) -> str: + def run(self) -> Response: result = subprocess.run( [ @@ -39,7 +39,7 @@ def run(self) -> str: output = result.stdout if result.returncode != 0: output += f"\nSTDERR:\n{result.stderr}" - return output + return Response(content=output, media_type="text/plain") app = TestRunner.bind() From fbfba69037964f7e0a693fcfd9eeb93c873d6038 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 6 May 2026 19:10:09 +0200 Subject: [PATCH 24/35] fix: add isclose() Co-authored-by: Copilot --- tests/model_snapshots/_shared.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index c3e751e..0c696c1 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -42,6 +42,8 @@ def run_binary_classifier_case( timeout_s: float = 600.0, expected_is_positive: bool | None = None, threshold: float = 0.5, + atol: float = 1e-6, + rtol: float = 1e-5, case_name: str | None = None, ) -> None: tile = _read_tile_at(slide_path, x, y, tile_size, level) @@ -61,8 +63,13 @@ def run_binary_classifier_case( f"actual_score={actual_score:.6f}, threshold={threshold:.3f}" ) + if not np.isclose(actual_score, expected_score, rtol=rtol, atol=atol): + pytest.fail( + f"Binary score mismatch beyond tolerance (atol={atol}, rtol={rtol}, " + f"expected={expected_score:.6f}, actual={actual_score:.6f})" + ) + print(f"\n/{model_id}") - print("passed") print( f"{name} stats: score={actual_score:.6f} expected={expected_score:.6f} " f"delta={delta:+.6f} threshold={threshold:.3f}" @@ -116,9 +123,13 @@ def run_semantic_segmentation_case( if actual.shape != expected.shape: pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") - if not np.allclose(actual, expected, rtol=rtol, atol=atol): + close_mask = np.isclose(actual, expected, rtol=rtol, atol=atol) + if not close_mask.all(): + mismatch_fraction = float((~close_mask).mean()) pytest.fail( - f"Output mismatch beyond tolerance (atol={atol}, rtol={rtol}, max_abs_diff={max_diff})" + "Output mismatch beyond tolerance " + f"(atol={atol}, rtol={rtol}, max_abs_diff={max_diff}, " + f"mismatch_fraction={mismatch_fraction:.6f})" ) if epithelium_threshold is not None and min_epithelium_fraction is not None: @@ -144,7 +155,6 @@ def run_semantic_segmentation_case( ) print(f"\n/{model_id}") - print("passed") print( f"{name} stats: shape={actual.shape} max_diff={max_diff:.6f} " f"min={min_val:.6f} mean={mean_val:.6f} max={max_val:.6f} " @@ -196,7 +206,6 @@ def run_embed_case( ) print(f"\n/{model_id}") - print("passed") print( f"{name} stats: shape={actual.shape} cosine_similarity={similarity:.6f} " f"norm_actual={actual_norm:.6f} norm_expected={expected_norm:.6f}" From b1010b5f5976b6655e4739901e35fa14ff94f4f9 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Tue, 12 May 2026 21:17:09 +0200 Subject: [PATCH 25/35] new tests Co-authored-by: Copilot --- builders/throughput_runner.py | 46 ++++ .../applications/throughput-test.yaml | 16 ++ helm/rayservice/values.yaml | 1 + tests/benchmark/perf_throughput.py | 236 ++++++++++++++++++ .../test_prov_gigapath_model_snapshot.py | 31 +++ 5 files changed, 330 insertions(+) create mode 100644 builders/throughput_runner.py create mode 100644 helm/rayservice/applications/throughput-test.yaml create mode 100644 tests/benchmark/perf_throughput.py create mode 100644 tests/model_snapshots/test_prov_gigapath_model_snapshot.py diff --git a/builders/throughput_runner.py b/builders/throughput_runner.py new file mode 100644 index 0000000..87a568c --- /dev/null +++ b/builders/throughput_runner.py @@ -0,0 +1,46 @@ +import subprocess +import sys + +from fastapi import FastAPI +from ray import serve + + +fastapi = FastAPI() + + +@serve.deployment(num_replicas=1) +@serve.ingress(fastapi) +class ThroughputRunner: + def __init__(self) -> None: + subprocess.run( + [sys.executable, "-m", "pip", "install", "pytest", "-q"], + check=True, + ) + + @fastapi.post("/") + def run( + self, + duration_s: float = 60.0, + concurrency: int = 8, + timeout: float = 60.0, + ) -> str: + result = subprocess.run( + [ + sys.executable, + "misc/throughput_test.py", + "--duration-s", + str(duration_s), + "--concurrency", + str(concurrency), + "--timeout", + str(timeout), + ], + capture_output=True, + text=True, + ) + return result.stdout + ( + f"\nSTDERR:\n{result.stderr}" if result.returncode != 0 else "" + ) + + +app = ThroughputRunner.bind() diff --git a/helm/rayservice/applications/throughput-test.yaml b/helm/rayservice/applications/throughput-test.yaml new file mode 100644 index 0000000..6bb045a --- /dev/null +++ b/helm/rayservice/applications/throughput-test.yaml @@ -0,0 +1,16 @@ +- name: throughput-runner + import_path: builders.throughput_runner:app + route_prefix: /run-throughput + runtime_env: + working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/tests.zip?v1 + pip: + - git+https://github.com/RationAI/rationai-sdk-python.git + deployments: + - name: ThroughputRunner + autoscaling_config: + min_replicas: 0 + max_replicas: 1 + target_ongoing_requests: 1 + ray_actor_options: + num_cpus: 1 + memory: 2147483648 \ No newline at end of file diff --git a/helm/rayservice/values.yaml b/helm/rayservice/values.yaml index 34f8128..2dbc443 100644 --- a/helm/rayservice/values.yaml +++ b/helm/rayservice/values.yaml @@ -9,3 +9,4 @@ applications: - prov-gigapath - virchow2 - test-runner + - throughput-test diff --git a/tests/benchmark/perf_throughput.py b/tests/benchmark/perf_throughput.py new file mode 100644 index 0000000..bb3fc36 --- /dev/null +++ b/tests/benchmark/perf_throughput.py @@ -0,0 +1,236 @@ +from __future__ import annotations + +import argparse +import os +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, field +from threading import Lock + +import numpy as np +from rationai import Client + + +DEFAULT_MODELS = [ + ("prostate-classifier-1", "binary", 512), + ("episeg-1", "semantic", 1024), + ("virchow2", "embed", 224), +] +POOL_SIZE_DEFAULT = 64 + + +@dataclass +class Stats: + ok: int = 0 + fail_503: int = 0 + fail_other: int = 0 + latencies: list[float] = field(default_factory=list) + lock: Lock = field(default_factory=Lock) + + @property + def total(self) -> int: + return self.ok + self.fail_503 + self.fail_other + + def percentile(self, p: float) -> float: + if not self.latencies: + return 0.0 + s = sorted(self.latencies) + idx = int(len(s) * p / 100) + return s[min(idx, len(s) - 1)] + + +def _models_base_url() -> str: + return os.environ.get( + "MODEL_SERVICE_MODELS_BASE_URL", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + ) + + +def make_pool(tile_size: int, n: int) -> list[np.ndarray]: + rng = np.random.default_rng(seed=42) + pool = [] + for _ in range(n): + img = rng.integers(0, 255, (tile_size, tile_size, 3), dtype=np.uint8) + pool.append(img) + return pool + + +def _call_model( + client: Client, model_id: str, model_type: str, image: np.ndarray +) -> None: + if model_type == "binary": + client.models.classify_image(model=model_id, image=image) + elif model_type == "semantic": + client.models.segment_image(model=model_id, image=image) + elif model_type == "embed": + client.models.embed_image(model=model_id, image=image) + else: + raise ValueError(f"Unknown model type: {model_type}") + + +def send_loop( + model_id: str, + model_type: str, + pool: list[np.ndarray], + stats: Stats, + end_time: float, + timeout: float, + models_base_url: str, +) -> None: + pool_len = len(pool) + idx = 0 + with Client(models_base_url=models_base_url, timeout=timeout) as client: + while time.perf_counter() < end_time: + image = pool[idx % pool_len] + idx += 1 + t0 = time.perf_counter() + try: + _call_model(client, model_id, model_type, image) + latency = time.perf_counter() - t0 + with stats.lock: + stats.ok += 1 + stats.latencies.append(latency) + except Exception as exc: + status_code = getattr( + getattr(exc, "response", None), "status_code", None + ) + with stats.lock: + if status_code == 503: + stats.fail_503 += 1 + else: + stats.fail_other += 1 + + +def run_model( + name: str, + model_type: str, + tile_size: int, + duration_s: float, + concurrency: int, + timeout: float, + pool_size: int, + models_base_url: str, +) -> dict: + if pool_size <= 0: + raise ValueError("pool_size must be > 0") + + pool = make_pool(tile_size, pool_size) + stats = Stats() + + start = time.perf_counter() + end_time = start + duration_s + with ThreadPoolExecutor(max_workers=concurrency) as executor: + futures = [ + executor.submit( + send_loop, + name, + model_type, + pool, + stats, + end_time, + timeout, + models_base_url, + ) + for _ in range(concurrency) + ] + for future in as_completed(futures): + future.result() + elapsed = time.perf_counter() - start + + throughput = stats.ok / elapsed if elapsed > 0 else 0.0 + return { + "name": name, + "model_type": model_type, + "tile_size": tile_size, + "elapsed_s": elapsed, + "ok": stats.ok, + "fail_503": stats.fail_503, + "fail_other": stats.fail_other, + "throughput": throughput, + "p50": stats.percentile(50), + "p95": stats.percentile(95), + } + + +def parse_models(values: list[str]) -> list[tuple[str, str, int]]: + if not values: + return DEFAULT_MODELS + parsed: list[tuple[str, str, int]] = [] + for item in values: + parts = [p.strip() for p in item.split(",")] + if len(parts) != 3: + raise ValueError("--model expects: model_id,model_type,tile_size") + model_id, model_type, tile_size = parts[0], parts[1], int(parts[2]) + parsed.append((model_id, model_type, tile_size)) + return parsed + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Run per-model throughput tests and report img/s via SDK." + ) + parser.add_argument( + "--model", + action="append", + default=[], + help="Model spec: model_id,model_type,tile_size (repeatable)", + ) + parser.add_argument( + "--models-base-url", + default=_models_base_url(), + help="Base URL for the SDK (default: MODEL_SERVICE_MODELS_BASE_URL or http://localhost:8000)", + ) + parser.add_argument("--duration-s", type=float, default=300.0) + parser.add_argument("--concurrency", type=int, default=64) + parser.add_argument("--timeout", type=float, default=60.0) + parser.add_argument("--pool-size", type=int, default=POOL_SIZE_DEFAULT) + args = parser.parse_args() + + models = parse_models(args.model) + + print("=" * 72) + print("Throughput Test (img/s) - SDK") + print("=" * 72) + print(f"Models base URL: {args.models_base_url}") + print(f"Duration: {args.duration_s:.0f}s") + print(f"Concurrency: {args.concurrency}") + print(f"Timeout: {args.timeout}s") + print() + + results = [] + for name, model_type, tile_size in models: + print(f"/ {name} ({model_type}, tile={tile_size})") + result = run_model( + name, + model_type, + tile_size, + args.duration_s, + args.concurrency, + args.timeout, + args.pool_size, + args.models_base_url, + ) + results.append(result) + print( + f" ok={result['ok']} fail_503={result['fail_503']} " + f"fail_other={result['fail_other']} elapsed={result['elapsed_s']:.2f}s" + ) + print( + f" img/s={result['throughput']:.2f} p50={result['p50']:.3f}s " + f"p95={result['p95']:.3f}s" + ) + print() + + print("Summary") + print("name".ljust(28), "img/s".rjust(10), "p50".rjust(10), "p95".rjust(10)) + for r in results: + print( + r["name"].ljust(28), + f"{r['throughput']:.2f}".rjust(10), + f"{r['p50']:.3f}".rjust(10), + f"{r['p95']:.3f}".rjust(10), + ) + + +if __name__ == "__main__": + main() diff --git a/tests/model_snapshots/test_prov_gigapath_model_snapshot.py b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py new file mode 100644 index 0000000..9d3f200 --- /dev/null +++ b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py @@ -0,0 +1,31 @@ +from pathlib import Path + +import pytest +from _shared import run_embed_case + + +@pytest.mark.parametrize( + "label, slide_path, x, y", + [ + ( + "prov-gigapath", + "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", + 40000, + 70000, + ), + ], +) +def test_prov_gigapath(label: str, slide_path: str, x: int, y: int) -> None: + model_id = "prov-gigapath" + expected_array_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.npy") + + run_embed_case( + model_id=model_id, + slide_path=slide_path, + x=x, + y=y, + expected_array_path=expected_array_path, + tile_size=224, + level=0, + case_name=label, + ) From db4cd4fffe08cbc8311b5b3f28b79e8445ed756d Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 13 May 2026 14:43:07 +0200 Subject: [PATCH 26/35] fix path Co-authored-by: Copilot --- builders/throughput_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builders/throughput_runner.py b/builders/throughput_runner.py index 87a568c..300b35b 100644 --- a/builders/throughput_runner.py +++ b/builders/throughput_runner.py @@ -27,7 +27,7 @@ def run( result = subprocess.run( [ sys.executable, - "misc/throughput_test.py", + "tests/benchmark/perf_throughput.py", "--duration-s", str(duration_s), "--concurrency", From db27737070ce3f83486e5b46852685ce7183a6b6 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 13 May 2026 15:47:43 +0200 Subject: [PATCH 27/35] print Co-authored-by: Copilot --- builders/throughput_runner.py | 7 ++++--- tests/benchmark/perf_throughput.py | 10 +++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/builders/throughput_runner.py b/builders/throughput_runner.py index 300b35b..e8126fb 100644 --- a/builders/throughput_runner.py +++ b/builders/throughput_runner.py @@ -1,7 +1,7 @@ import subprocess import sys -from fastapi import FastAPI +from fastapi import FastAPI, Response from ray import serve @@ -23,7 +23,7 @@ def run( duration_s: float = 60.0, concurrency: int = 8, timeout: float = 60.0, - ) -> str: + ) -> Response: result = subprocess.run( [ sys.executable, @@ -38,9 +38,10 @@ def run( capture_output=True, text=True, ) - return result.stdout + ( + output = result.stdout + ( f"\nSTDERR:\n{result.stderr}" if result.returncode != 0 else "" ) + return Response(content=output, media_type="text/plain") app = ThroughputRunner.bind() diff --git a/tests/benchmark/perf_throughput.py b/tests/benchmark/perf_throughput.py index bb3fc36..20bd4bf 100644 --- a/tests/benchmark/perf_throughput.py +++ b/tests/benchmark/perf_throughput.py @@ -199,7 +199,6 @@ def main() -> None: results = [] for name, model_type, tile_size in models: - print(f"/ {name} ({model_type}, tile={tile_size})") result = run_model( name, model_type, @@ -212,14 +211,11 @@ def main() -> None: ) results.append(result) print( - f" ok={result['ok']} fail_503={result['fail_503']} " - f"fail_other={result['fail_other']} elapsed={result['elapsed_s']:.2f}s" - ) - print( - f" img/s={result['throughput']:.2f} p50={result['p50']:.3f}s " + f"{name} stats: ok={result['ok']} fail_503={result['fail_503']} " + f"fail_other={result['fail_other']} elapsed={result['elapsed_s']:.2f}s " + f"img/s={result['throughput']:.2f} p50={result['p50']:.3f}s " f"p95={result['p95']:.3f}s" ) - print() print("Summary") print("name".ljust(28), "img/s".rjust(10), "p50".rjust(10), "p95".rjust(10)) From d557f5f1a8265372368bf70d4b98592de2185d0c Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 13 May 2026 15:52:30 +0200 Subject: [PATCH 28/35] test Co-authored-by: Copilot --- builders/throughput_runner.py | 41 ++++++++++++------- tests/benchmark/perf_throughput.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 15 deletions(-) diff --git a/builders/throughput_runner.py b/builders/throughput_runner.py index e8126fb..d3ba549 100644 --- a/builders/throughput_runner.py +++ b/builders/throughput_runner.py @@ -20,24 +20,35 @@ def __init__(self) -> None: @fastapi.post("/") def run( self, - duration_s: float = 60.0, + duration_s: float = 300.0, concurrency: int = 8, timeout: float = 60.0, + wait_ready: bool = True, + wait_timeout_s: float = 0.0, + wait_interval_s: float = 10.0, ) -> Response: - result = subprocess.run( - [ - sys.executable, - "tests/benchmark/perf_throughput.py", - "--duration-s", - str(duration_s), - "--concurrency", - str(concurrency), - "--timeout", - str(timeout), - ], - capture_output=True, - text=True, - ) + cmd = [ + sys.executable, + "tests/benchmark/perf_throughput.py", + "--duration-s", + str(duration_s), + "--concurrency", + str(concurrency), + "--timeout", + str(timeout), + ] + if wait_ready: + cmd.extend( + [ + "--wait-ready", + "--wait-timeout-s", + str(wait_timeout_s), + "--wait-interval-s", + str(wait_interval_s), + ] + ) + + result = subprocess.run(cmd, capture_output=True, text=True) output = result.stdout + ( f"\nSTDERR:\n{result.stderr}" if result.returncode != 0 else "" ) diff --git a/tests/benchmark/perf_throughput.py b/tests/benchmark/perf_throughput.py index 20bd4bf..726923f 100644 --- a/tests/benchmark/perf_throughput.py +++ b/tests/benchmark/perf_throughput.py @@ -68,6 +68,42 @@ def _call_model( raise ValueError(f"Unknown model type: {model_type}") +def wait_for_ready( + model_id: str, + model_type: str, + tile_size: int, + timeout: float, + models_base_url: str, + wait_timeout_s: float, + wait_interval_s: float, +) -> None: + pool = make_pool(tile_size, 1) + image = pool[0] + start = time.perf_counter() + reported = False + + while True: + try: + with Client(models_base_url=models_base_url, timeout=timeout) as client: + _call_model(client, model_id, model_type, image) + if reported: + waited = time.perf_counter() - start + print(f"{model_id} ready after {waited:.1f}s") + return + except Exception as exc: + status_code = getattr(getattr(exc, "response", None), "status_code", None) + if status_code not in (None, 503, 504): + raise + if not reported: + print(f"{model_id} waiting for readiness...") + reported = True + if wait_timeout_s > 0 and (time.perf_counter() - start) >= wait_timeout_s: + raise RuntimeError( + f"{model_id} not ready after {wait_timeout_s:.1f}s" + ) from exc + time.sleep(wait_interval_s) + + def send_loop( model_id: str, model_type: str, @@ -184,6 +220,23 @@ def main() -> None: parser.add_argument("--concurrency", type=int, default=64) parser.add_argument("--timeout", type=float, default=60.0) parser.add_argument("--pool-size", type=int, default=POOL_SIZE_DEFAULT) + parser.add_argument( + "--wait-ready", + action="store_true", + help="Wait for each model to become ready before running the test", + ) + parser.add_argument( + "--wait-timeout-s", + type=float, + default=0.0, + help="Max time to wait for readiness (0 = wait forever)", + ) + parser.add_argument( + "--wait-interval-s", + type=float, + default=10.0, + help="Wait interval between readiness checks", + ) args = parser.parse_args() models = parse_models(args.model) @@ -199,6 +252,16 @@ def main() -> None: results = [] for name, model_type, tile_size in models: + if args.wait_ready: + wait_for_ready( + model_id=name, + model_type=model_type, + tile_size=tile_size, + timeout=args.timeout, + models_base_url=args.models_base_url, + wait_timeout_s=args.wait_timeout_s, + wait_interval_s=args.wait_interval_s, + ) result = run_model( name, model_type, From f14c82ce62d23bd8fc13e52e062f97003743f2db Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 31 May 2026 10:46:16 +0200 Subject: [PATCH 29/35] fixes Co-authored-by: Copilot --- tests/benchmark/benchmark_batch_size.py | 333 ------------------ tests/benchmark/load_test.py | 286 --------------- tests/model_snapshots/generate_references.py | 17 +- tests/model_snapshots/new_images.txt | 6 + .../test_binary_classifier_model_snapshot.py | 8 +- .../test_prov_gigapath_model_snapshot.py | 3 +- ...st_semantic_segmentation_model_snapshot.py | 3 +- .../test_virchow2_model_snapshot.py | 3 +- tests/{benchmark => }/perf_throughput.py | 2 +- 9 files changed, 24 insertions(+), 637 deletions(-) delete mode 100644 tests/benchmark/benchmark_batch_size.py delete mode 100644 tests/benchmark/load_test.py create mode 100644 tests/model_snapshots/new_images.txt rename tests/{benchmark => }/perf_throughput.py (98%) diff --git a/tests/benchmark/benchmark_batch_size.py b/tests/benchmark/benchmark_batch_size.py deleted file mode 100644 index 7ca0f1f..0000000 --- a/tests/benchmark/benchmark_batch_size.py +++ /dev/null @@ -1,333 +0,0 @@ -# kubectl apply -n rationai-jobs-ns -f c:\Users\jiris\muni-dp\dp\model-service\ray-service.yaml -# kubectl get pods -n rationai-jobs-ns | Select-String "episeg" (model name) -# kubectl cp tests/benchmark_batch_size.py rationai-jobs-ns/rayservice-model-optimized-7zwlk-head-fbzr5:/tmp/benchmark_batch_size.py -# kubectl exec -n rationai-jobs-ns rayservice-model-optimized-7zwlk-head-fbzr5 -- bash -c "python3 -u /tmp/benchmark_batch_size.py --url http://localhost:8000/episeg-1/ --batch-size 128" - -# kubectl exec -n rationai-jobs-ns rayservice-model-optimized-7zwlk-head-fbzr5 -- bash -c "pip install httpx -q && python3 -u /tmp/benchmark_batch_size.py --url http://localhost:8000/episeg-1/ --batch-size 8 --concurrency-values 4,8,16,24,32,48,64 --tile-size 1024 --n 500 --warmup 100" - -from __future__ import annotations - -import argparse -import asyncio -import csv -import sys -import time -from pathlib import Path - -import lz4.frame -import numpy as np - - -try: - import httpx -except ImportError: - print("pip install httpx") - sys.exit(1) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -TILE_SIZE_DEFAULT = 224 -POOL_SIZE = 64 -OUTPUT_CSV = "results.csv" - - -def make_pool(tile_size: int, n: int = POOL_SIZE) -> list[bytes]: - rng = np.random.default_rng(seed=42) - pool = [] - for _ in range(n): - img = rng.integers(0, 255, (tile_size, tile_size, 3), dtype=np.uint8) - pool.append(lz4.frame.compress(img.tobytes())) - return pool - - -async def run_batch( - url: str, - pool: list[bytes], - total: int, - concurrency: int, - timeout: float, -) -> tuple[float, int, int]: - """Pošle `total` requestů s `concurrency` souběžnými workery.""" - remaining = total - ok = 0 - fail = 0 - pool_len = len(pool) - counter = 0 - lock = asyncio.Lock() - - limits = httpx.Limits( - max_connections=concurrency + 8, - max_keepalive_connections=concurrency + 8, - ) - - async def worker(client: httpx.AsyncClient) -> None: - nonlocal remaining, ok, fail, counter - while True: - async with lock: - if remaining <= 0: - return - remaining -= 1 - idx = counter % pool_len - counter += 1 - payload = pool[idx] - try: - r = await client.post( - url, - content=payload, - headers={"Content-Type": "application/octet-stream"}, - timeout=timeout, - ) - if r.status_code == 200: - ok += 1 - else: - fail += 1 - print(f" [WARN] HTTP {r.status_code}: {r.text[:120]}") - except Exception as e: - fail += 1 - print(f" [ERR] {type(e).__name__}: {e!r}") - - t0 = time.perf_counter() - async with httpx.AsyncClient(limits=limits, follow_redirects=True) as client: - await asyncio.gather(*[worker(client) for _ in range(concurrency)]) - return time.perf_counter() - t0, ok, fail - - -def append_csv(path: str, row: dict) -> None: - fieldnames = [ - "url", - "batch_size", - "concurrency", - "n", - "elapsed_s", - "throughput_img_s", - "ok", - "fail", - ] - write_header = not Path(path).exists() - with open(path, "a", newline="") as f: - writer = csv.DictWriter(f, fieldnames=fieldnames) - if write_header: - writer.writeheader() - writer.writerow(row) - - -def load_csv(path: str, url: str) -> list[dict]: - if not Path(path).exists(): - return [] - with open(path) as f: - reader = csv.DictReader(f) - return [r for r in reader if r["url"] == url] - - -def concurrency_sweep_values(batch_size: int) -> list[int]: - """Pro MIG-2g.20gb: testujeme rozsah od batch_size/2 do batch_size*4. - Jemnější kroky kolem batch_size kde bývá knee. - """ - half = max(1, batch_size // 2) - candidates = sorted( - set( - [ - half, - batch_size, - batch_size + batch_size // 2, - batch_size * 2, - batch_size * 3, - batch_size * 4, - ] - ) - ) - # Přidej mezikroky kolem batch_size - extras = [batch_size - batch_size // 4, batch_size + batch_size // 4] - candidates = sorted(set(candidates + [e for e in extras if e > 0])) - return candidates - - -def print_summary(rows: list[dict], batch_size: int | None = None) -> None: - if not rows: - return - if batch_size is not None: - rows = [r for r in rows if int(r["batch_size"]) == batch_size] - if not rows: - return - - best = max(rows, key=lambda r: float(r["throughput_img_s"])) - - header = f"{'batch_size':>12} {'concurrency':>12} {'throughput img/s':>18} {'ok':>8} {'fail':>8}" - print(header) - print("-" * len(header)) - for row in sorted( - rows, key=lambda r: (int(r["batch_size"]), int(r["concurrency"])) - ): - marker = " ← BEST" if row is best else "" - fail_val = int(row["fail"]) - fail_str = f"[!]{fail_val}" if fail_val > 0 else str(fail_val) - print( - f"{row['batch_size']:>12} {row['concurrency']:>12}" - f" {row['throughput_img_s']:>18} {row['ok']:>8} {fail_str:>8}{marker}" - ) - print() - print("Doporučené YAML hodnoty pro batch_size =", best["batch_size"]) - tor = int(best["concurrency"]) - mor = int(tor * 1.25) + 8 - print(f" max_batch_size: {best['batch_size']}") - print(f" target_ongoing_requests: {tor} # = nejlepší concurrency") - print(f" max_ongoing_requests: {mor} # target * 1.25 + buffer") - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - - -async def main() -> None: - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument( - "--url", - default="http://localhost:8000/virchow2/", - help="Endpointová URL (default: http://localhost:8000/virchow2/)", - ) - parser.add_argument( - "--batch-size", - type=int, - required=True, - help="max_batch_size nastavený v user_config (shodný s YAML)", - ) - parser.add_argument( - "--concurrency", - type=int, - default=None, - help="Pevná hodnota concurrency – přeskočí sweep a naměří jen tuto", - ) - parser.add_argument( - "--concurrency-values", - type=str, - default=None, - help="Čárkami oddělený seznam concurrency hodnot k otestování, " - "např. '32,64,128,256' (přepíše výchozí sweep)", - ) - parser.add_argument( - "--n", - type=int, - default=1000, - help="Počet měřených requestů na jeden bod (default: 1000)", - ) - parser.add_argument( - "--warmup", - type=int, - default=100, - help="Warmup requesty před měřením (default: 100)", - ) - parser.add_argument("--tile-size", type=int, default=TILE_SIZE_DEFAULT) - parser.add_argument("--timeout", type=float, default=300.0) - parser.add_argument( - "--output", - default=OUTPUT_CSV, - help=f"Výstupní CSV soubor (default: {OUTPUT_CSV})", - ) - parser.add_argument( - "--skip-existing", - action="store_true", - help="Přeskočí (batch_size, concurrency) kombinace už změřené v CSV", - ) - args = parser.parse_args() - - url = args.url.rstrip("/") + "/" - pool = make_pool(args.tile_size) - - # Determine sweep values - if args.concurrency is not None: - sweep = [args.concurrency] - elif args.concurrency_values: - sweep = [int(v.strip()) for v in args.concurrency_values.split(",")] - else: - sweep = concurrency_sweep_values(args.batch_size) - - # Already measured (for --skip-existing) - existing: set[int] = set() - if args.skip_existing: - for row in load_csv(args.output, url): - if int(row["batch_size"]) == args.batch_size: - existing.add(int(row["concurrency"])) - - print("=" * 60) - print("Virchow2 Benchmark Sweep") - print("=" * 60) - print(f"URL: {url}") - print(f"max_batch_size: {args.batch_size} (musí odpovídat YAML!)") - print(f"concurrency sweep:{sweep}") - print(f"n per point: {args.n}") - print(f"warmup: {args.warmup}") - print(f"output: {args.output}") - print() - - # Warmup – jednou, s prostředním concurrency - warmup_conc = sweep[len(sweep) // 2] - print(f"Warmup ({args.warmup} img, concurrency={warmup_conc})...") - await run_batch(url, pool, args.warmup, warmup_conc, args.timeout) - print("Warmup done.\n") - - results_this_run: list[dict] = [] - - for conc in sweep: - if conc in existing: - print(f"[SKIP] concurrency={conc} (already in CSV)") - continue - - print(f"▶ batch_size={args.batch_size} concurrency={conc} ({args.n} img)...") - elapsed, ok, fail = await run_batch(url, pool, args.n, conc, args.timeout) - rps = ok / elapsed if elapsed > 0 else 0.0 - - row = { - "url": url, - "batch_size": args.batch_size, - "concurrency": conc, - "n": ok + fail, - "elapsed_s": f"{elapsed:.2f}", - "throughput_img_s": f"{rps:.1f}", - "ok": ok, - "fail": fail, - } - append_csv(args.output, row) - results_this_run.append(row) - - status = f" → {rps:.1f} img/s" - if fail: - status += f" [{fail} failures!]" - print(status) - - # Kratká pauza mezi body aby se server stabilizoval - await asyncio.sleep(2) - - # Summary – jen aktuální batch_size - print() - print("=" * 60) - print(f"Výsledky pro batch_size = {args.batch_size}") - print("=" * 60) - all_rows = load_csv(args.output, url) - print_summary(all_rows, batch_size=args.batch_size) - - # Pokud existují data pro více batch_size, ukaž i celkové porovnání - all_batch_sizes = sorted(set(int(r["batch_size"]) for r in all_rows)) - if len(all_batch_sizes) > 1: - print() - print("=" * 60) - print("Celkové porovnání všech batch_size (best concurrency per batch)") - print("=" * 60) - # Pro každý batch_size vyber jen nejlepší concurrency - best_per_batch = [] - for bs in all_batch_sizes: - candidates = [r for r in all_rows if int(r["batch_size"]) == bs] - if candidates: - best_per_batch.append( - max(candidates, key=lambda r: float(r["throughput_img_s"])) - ) - print_summary(best_per_batch) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/tests/benchmark/load_test.py b/tests/benchmark/load_test.py deleted file mode 100644 index ba5b8ef..0000000 --- a/tests/benchmark/load_test.py +++ /dev/null @@ -1,286 +0,0 @@ -# kubectl cp tests/load_test.py rationai-jobs-ns/rayservice-model-virchow2-5qfmz-head-98tbv:/tmp/load_test.py -# kubectl exec -n rationai-jobs-ns rayservice-model-virchow2-5qfmz-head-98tbv -- bash -c "python3 -u /tmp/load_test.py --url http://localhost:8000/virchow2/ --tiles 5000 --concurrency 128" -from __future__ import annotations - -import argparse -import asyncio -import sys -import time -from dataclasses import dataclass, field - -import lz4.frame -import numpy as np - - -try: - import httpx -except ImportError: - print("pip install httpx") - sys.exit(1) - - -TILE_SIZE_DEFAULT = 224 -POOL_SIZE = 64 - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def make_pool(tile_size: int, n: int = POOL_SIZE) -> list[bytes]: - rng = np.random.default_rng(seed=42) - pool = [] - for _ in range(n): - img = rng.integers(0, 255, (tile_size, tile_size, 3), dtype=np.uint8) - pool.append(lz4.frame.compress(img.tobytes())) - return pool - - -@dataclass -class Stats: - ok: int = 0 - fail_503: int = 0 - fail_other: int = 0 - latencies: list[float] = field(default_factory=list) - lock: asyncio.Lock = field(default_factory=asyncio.Lock) - - @property - def total(self) -> int: - return self.ok + self.fail_503 + self.fail_other - - def percentile(self, p: float) -> float: - if not self.latencies: - return 0.0 - s = sorted(self.latencies) - idx = int(len(s) * p / 100) - return s[min(idx, len(s) - 1)] - - -async def send_tile( - client: httpx.AsyncClient, - url: str, - payload: bytes, - stats: Stats, - timeout: float, - progress_every: int, -) -> None: - t0 = time.perf_counter() - try: - r = await client.post( - url, - content=payload, - headers={"Content-Type": "application/octet-stream"}, - timeout=timeout, - ) - latency = time.perf_counter() - t0 - async with stats.lock: - if r.status_code == 200: - stats.ok += 1 - stats.latencies.append(latency) - if stats.ok % progress_every == 0: - print( - f" ✓ {stats.ok} OK | 503: {stats.fail_503} | other: {stats.fail_other}" - ) - elif r.status_code == 503: - stats.fail_503 += 1 - else: - stats.fail_other += 1 - print(f" [WARN] HTTP {r.status_code}: {r.text[:120]}") - except Exception as e: - async with stats.lock: - stats.fail_other += 1 - print(f" [ERR] {e}") - - -async def run_wsi( - url: str, - pool: list[bytes], - tiles: int, - concurrency: int, - timeout: float, - wsi_id: int, - stats: Stats, -) -> float: - """Simuluje jeden WSI — pošle `tiles` requestů s max `concurrency` souběžně.""" - semaphore = asyncio.Semaphore(concurrency) - pool_len = len(pool) - - limits = httpx.Limits( - max_connections=concurrency + 8, - max_keepalive_connections=concurrency + 8, - ) - - async def bounded_send(client: httpx.AsyncClient, idx: int) -> None: - async with semaphore: - await send_tile( - client, - url, - pool[idx % pool_len], - stats, - timeout, - progress_every=max(tiles // 10, 100), - ) - - t0 = time.perf_counter() - async with httpx.AsyncClient(limits=limits, follow_redirects=True) as client: - tasks = [bounded_send(client, i) for i in range(tiles)] - await asyncio.gather(*tasks) - return time.perf_counter() - t0 - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - - -async def main() -> None: - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "--url", default="http://localhost:8000/virchow2/", help="Endpoint URL" - ) - parser.add_argument( - "--tiles", - type=int, - default=5000, - help="Počet dlaždic na jeden WSI (default: 5000)", - ) - parser.add_argument( - "--wsi-count", - type=int, - default=1, - help="Počet paralelních WSI slidů (default: 1)", - ) - parser.add_argument( - "--concurrency", - type=int, - default=128, - help="Max souběžných requestů na WSI (default: 128, " - "mělo by odpovídat target_ongoing_requests)", - ) - parser.add_argument("--tile-size", type=int, default=TILE_SIZE_DEFAULT) - parser.add_argument( - "--timeout", - type=float, - default=120.0, - help="Timeout na jeden request v sekundách (default: 120)", - ) - parser.add_argument( - "--warmup", - type=int, - default=50, - help="Warmup requestů před testem (default: 50)", - ) - parser.add_argument("--no-warmup", action="store_true", help="Přeskočit warmup") - args = parser.parse_args() - - url = args.url.rstrip("/") + "/" - pool = make_pool(args.tile_size) - total_tiles = args.tiles * args.wsi_count - - print("=" * 60) - print("Virchow2 WSI Load Test") - print("=" * 60) - print(f"URL: {url}") - print(f"Tiles per WSI: {args.tiles:,}") - print(f"WSI count: {args.wsi_count}") - print(f"Total tiles: {total_tiles:,}") - print(f"Concurrency/WSI: {args.concurrency}") - print(f"Total concurrent: {args.concurrency * args.wsi_count}") - print(f"Request timeout: {args.timeout}s") - print() - - # Warmup - if not args.no_warmup: - print(f"Warmup ({args.warmup} tiles)...") - warmup_stats = Stats() - await run_wsi( - url, - pool, - args.warmup, - min(args.concurrency, 32), - args.timeout, - wsi_id=0, - stats=warmup_stats, - ) - print( - f"Warmup done (ok={warmup_stats.ok}, fail={warmup_stats.fail_503 + warmup_stats.fail_other}).\n" - ) - - # Actual test - stats = Stats() - print( - f"▶ Spouštím {'paralelně ' + str(args.wsi_count) + ' WSI' if args.wsi_count > 1 else '1 WSI'} " - f"({total_tiles:,} tiles celkem)...\n" - ) - - t0 = time.perf_counter() - - if args.wsi_count == 1: - await run_wsi( - url, pool, args.tiles, args.concurrency, args.timeout, wsi_id=0, stats=stats - ) - else: - # Všechny WSI slidy spustit paralelně — simulace více scannerů najednou - await asyncio.gather( - *[ - run_wsi( - url, - pool, - args.tiles, - args.concurrency, - args.timeout, - wsi_id=i, - stats=stats, - ) - for i in range(args.wsi_count) - ] - ) - - elapsed = time.perf_counter() - t0 - rps = stats.ok / elapsed if elapsed > 0 else 0.0 - - # Report - print() - print("=" * 60) - print("Výsledky") - print("=" * 60) - print(f"Celkový čas: {elapsed:.1f}s ({elapsed / 60:.1f} min)") - print(f"Throughput: {rps:.1f} img/s") - print() - print( - f"Úspěšné: {stats.ok:,} / {total_tiles:,} ({100 * stats.ok / total_tiles:.1f}%)" - ) - print( - f"503 backpressure: {stats.fail_503:,} ({100 * stats.fail_503 / total_tiles:.1f}%)" - ) - print(f"Jiné chyby: {stats.fail_other:,}") - print() - if stats.latencies: - print("Latence (úspěšné requesty):") - print(f" p50: {stats.percentile(50) * 1000:.0f} ms") - print(f" p90: {stats.percentile(90) * 1000:.0f} ms") - print(f" p99: {stats.percentile(99) * 1000:.0f} ms") - print(f" max: {max(stats.latencies) * 1000:.0f} ms") - print() - - # Verdict - fail_rate = (stats.fail_503 + stats.fail_other) / total_tiles - if fail_rate == 0: - print("✅ PASS — žádné chyby, nastavení je v pořádku pro WSI.") - elif fail_rate < 0.01: - print( - f"⚠️ WARN — {fail_rate * 100:.2f}% chyb. Zvažte zvýšení max_queued_requests." - ) - else: - print( - f"❌ FAIL — {fail_rate * 100:.1f}% chyb. Nastavení nestačí pro tento objem." - ) - print(" → Zvyšte max_queued_requests nebo snižte --concurrency klientů.") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 3bba3e0..8078bdf 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -3,9 +3,10 @@ from pathlib import Path import numpy as np -from _shared import _read_tile_at from rationai import Client +from tests.model_snapshots._shared import _read_tile_at + OUT_DIR = Path("/mnt/test_refs") MODELS_BASE_URL = os.environ.get( @@ -17,14 +18,14 @@ # Keep only one active case here. Store other candidate slides in new_images.txt # and swap them in when you want to regenerate a different reference. ACTIVE_CASE = { - "label": "virchow2", - "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", - "model_id": "virchow2", - "type": "embed", - "tile_size": 224, + "label": "colorectum_kos04", + "slide_path": "/mnt/data/MOU/colorectum/tissue_microarray/he/KOS04.mrxs", + "model_id": "episeg-1", + "type": "semantic", + "tile_size": 1024, "level": 0, - "x": 40000, - "y": 70000, + "x": 46000, + "y": 82400, } CASES = [ACTIVE_CASE] diff --git a/tests/model_snapshots/new_images.txt b/tests/model_snapshots/new_images.txt new file mode 100644 index 0000000..46c847e --- /dev/null +++ b/tests/model_snapshots/new_images.txt @@ -0,0 +1,6 @@ +# New images and coordinates +# Format: label | slide_path | x | y | tile_size | level | notes +prostate_positive | /mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs | 43390 | 45865 | 512 | 0 | prostate-classifier-1 positive +prostate_negative | /mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs | 32950 | 108990 | 512 | 0 | prostate-classifier-1 negative +prostate | /mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs | 40000 | 70000 | 224 | 0 | virchow2 embed +colorectum_kos04 | /mnt/data/MOU/colorectum/tissue_microarray/he/KOS04.mrxs | 46000 | 82400 | 1024 | 0 | episeg-1 semantic diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index 01e7162..ce3d71f 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -2,7 +2,8 @@ from pathlib import Path import pytest -from _shared import run_binary_classifier_case + +from tests.model_snapshots._shared import run_binary_classifier_case BINARY_POSITIVE_THRESHOLD = 0.5 @@ -33,11 +34,6 @@ def test_prostate_classifier_snapshot( model_id = "prostate-classifier-1" json_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.json") - if not json_path.exists(): - pytest.skip( - f"Reference file {json_path} missing. Run generate_references.py first." - ) - with json_path.open() as f: reference = json.load(f) diff --git a/tests/model_snapshots/test_prov_gigapath_model_snapshot.py b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py index 9d3f200..9f6aad6 100644 --- a/tests/model_snapshots/test_prov_gigapath_model_snapshot.py +++ b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py @@ -1,7 +1,8 @@ from pathlib import Path import pytest -from _shared import run_embed_case + +from tests.model_snapshots._shared import run_embed_case @pytest.mark.parametrize( diff --git a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py index 865f50f..8ff06cc 100644 --- a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py +++ b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py @@ -1,7 +1,8 @@ from pathlib import Path import pytest -from _shared import run_semantic_segmentation_case + +from tests.model_snapshots._shared import run_semantic_segmentation_case @pytest.mark.parametrize( diff --git a/tests/model_snapshots/test_virchow2_model_snapshot.py b/tests/model_snapshots/test_virchow2_model_snapshot.py index e94d097..4714005 100644 --- a/tests/model_snapshots/test_virchow2_model_snapshot.py +++ b/tests/model_snapshots/test_virchow2_model_snapshot.py @@ -1,7 +1,8 @@ from pathlib import Path import pytest -from _shared import run_embed_case + +from tests.model_snapshots._shared import run_embed_case @pytest.mark.parametrize( diff --git a/tests/benchmark/perf_throughput.py b/tests/perf_throughput.py similarity index 98% rename from tests/benchmark/perf_throughput.py rename to tests/perf_throughput.py index 726923f..e89f8d5 100644 --- a/tests/benchmark/perf_throughput.py +++ b/tests/perf_throughput.py @@ -42,7 +42,7 @@ def percentile(self, p: float) -> float: def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) From 661225bdfb85d7c98c97eee77bda2f81ccfca8cf Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 31 May 2026 11:30:38 +0200 Subject: [PATCH 30/35] more fixes Co-authored-by: Copilot --- builders/throughput_runner.py | 2 +- tests/model_snapshots/new_images.txt | 1 + tests/perf_throughput.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/builders/throughput_runner.py b/builders/throughput_runner.py index d3ba549..acb79b9 100644 --- a/builders/throughput_runner.py +++ b/builders/throughput_runner.py @@ -29,7 +29,7 @@ def run( ) -> Response: cmd = [ sys.executable, - "tests/benchmark/perf_throughput.py", + "tests/perf_throughput.py", "--duration-s", str(duration_s), "--concurrency", diff --git a/tests/model_snapshots/new_images.txt b/tests/model_snapshots/new_images.txt index 46c847e..9a74517 100644 --- a/tests/model_snapshots/new_images.txt +++ b/tests/model_snapshots/new_images.txt @@ -3,4 +3,5 @@ prostate_positive | /mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs | 43390 | 45865 | 512 | 0 | prostate-classifier-1 positive prostate_negative | /mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs | 32950 | 108990 | 512 | 0 | prostate-classifier-1 negative prostate | /mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs | 40000 | 70000 | 224 | 0 | virchow2 embed +prov-gigapath | /mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs | 40000 | 70000 | 224 | 0 | prov-gigapath embed colorectum_kos04 | /mnt/data/MOU/colorectum/tissue_microarray/he/KOS04.mrxs | 46000 | 82400 | 1024 | 0 | episeg-1 semantic diff --git a/tests/perf_throughput.py b/tests/perf_throughput.py index e89f8d5..395da94 100644 --- a/tests/perf_throughput.py +++ b/tests/perf_throughput.py @@ -15,6 +15,7 @@ ("prostate-classifier-1", "binary", 512), ("episeg-1", "semantic", 1024), ("virchow2", "embed", 224), + ("prov-gigapath", "embed", 224), ] POOL_SIZE_DEFAULT = 64 From f629274455dbe6a7130ffe0786880134c620b58d Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 31 May 2026 11:56:05 +0200 Subject: [PATCH 31/35] fixes --- tests/model_snapshots/_shared.py | 7 ---- .../test_binary_classifier_model_snapshot.py | 26 ++------------ tests/perf_throughput.py | 36 ++++++++++--------- 3 files changed, 22 insertions(+), 47 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 0c696c1..c3bd2dd 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -2,7 +2,6 @@ import os from pathlib import Path -from time import perf_counter import numpy as np import pytest @@ -49,9 +48,7 @@ def run_binary_classifier_case( tile = _read_tile_at(slide_path, x, y, tile_size, level) with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: - t0 = perf_counter() actual_score = float(client.models.classify_image(model=model_id, image=tile)) - elapsed = perf_counter() - t0 delta = actual_score - expected_score name = case_name or "case" @@ -100,9 +97,7 @@ def run_semantic_segmentation_case( expected = np.load(expected_array_path) with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: - t0 = perf_counter() actual = np.asarray(client.models.segment_image(model=model_id, image=tile)) - elapsed = perf_counter() - t0 max_diff = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() @@ -182,13 +177,11 @@ def run_embed_case( expected = np.load(expected_array_path).flatten().astype(np.float32) with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: - t0 = perf_counter() actual = ( np.asarray(client.models.embed_image(model=model_id, image=tile)) .flatten() .astype(np.float32) ) - elapsed = perf_counter() - t0 similarity = float( np.dot(actual, expected) / (np.linalg.norm(actual) * np.linalg.norm(expected)) diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index ce3d71f..ca9d8ea 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -10,26 +10,24 @@ @pytest.mark.parametrize( - "label, slide_path, x, y, is_positive", + "label, slide_path, x, y", [ ( "prostate_positive", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs", 43390, 45865, - True, ), ( "prostate_negative", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", 32950, 108990, - False, ), ], ) def test_prostate_classifier_snapshot( - label: str, slide_path: str, x: int, y: int, is_positive: bool + label: str, slide_path: str, x: int, y: int ) -> None: model_id = "prostate-classifier-1" json_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.json") @@ -37,30 +35,10 @@ def test_prostate_classifier_snapshot( with json_path.open() as f: reference = json.load(f) - assert reference.get("label") == label - assert reference.get("model_id") == model_id - assert reference.get("slide_path") == slide_path - assert reference.get("x") == x - assert reference.get("y") == y - assert reference.get("tile_size") == 512 - assert reference.get("level") == 0 - expected_score = reference["expected_score"] threshold = reference.get("threshold", BINARY_POSITIVE_THRESHOLD) expected_is_positive = reference.get("expected_is_positive") assert expected_is_positive is not None - assert expected_is_positive == is_positive - - if is_positive: - assert expected_score >= threshold, ( - f"Reference score {expected_score:.4f} is below positive threshold {threshold:.3f} — " - "was the reference generated on the correct tile?" - ) - else: - assert expected_score < threshold, ( - f"Reference score {expected_score:.4f} is above negative threshold {threshold:.3f} — " - "was the reference generated on the correct tile?" - ) run_binary_classifier_case( model_id=model_id, diff --git a/tests/perf_throughput.py b/tests/perf_throughput.py index 395da94..be68c4a 100644 --- a/tests/perf_throughput.py +++ b/tests/perf_throughput.py @@ -35,9 +35,7 @@ def total(self) -> int: def percentile(self, p: float) -> float: if not self.latencies: return 0.0 - s = sorted(self.latencies) - idx = int(len(s) * p / 100) - return s[min(idx, len(s) - 1)] + return float(np.percentile(self.latencies, p)) def _models_base_url() -> str: @@ -49,11 +47,10 @@ def _models_base_url() -> str: def make_pool(tile_size: int, n: int) -> list[np.ndarray]: rng = np.random.default_rng(seed=42) - pool = [] - for _ in range(n): - img = rng.integers(0, 255, (tile_size, tile_size, 3), dtype=np.uint8) - pool.append(img) - return pool + return [ + rng.integers(0, 256, (tile_size, tile_size, 3), dtype=np.uint8) + for _ in range(n) + ] def _call_model( @@ -78,8 +75,7 @@ def wait_for_ready( wait_timeout_s: float, wait_interval_s: float, ) -> None: - pool = make_pool(tile_size, 1) - image = pool[0] + image = make_pool(tile_size, 1)[0] start = time.perf_counter() reported = False @@ -88,8 +84,7 @@ def wait_for_ready( with Client(models_base_url=models_base_url, timeout=timeout) as client: _call_model(client, model_id, model_type, image) if reported: - waited = time.perf_counter() - start - print(f"{model_id} ready after {waited:.1f}s") + print(f"{model_id} ready after {time.perf_counter() - start:.1f}s") return except Exception as exc: status_code = getattr(getattr(exc, "response", None), "status_code", None) @@ -98,7 +93,8 @@ def wait_for_ready( if not reported: print(f"{model_id} waiting for readiness...") reported = True - if wait_timeout_s > 0 and (time.perf_counter() - start) >= wait_timeout_s: + elapsed = time.perf_counter() - start + if wait_timeout_s > 0 and elapsed >= wait_timeout_s: raise RuntimeError( f"{model_id} not ready after {wait_timeout_s:.1f}s" ) from exc @@ -147,7 +143,7 @@ def run_model( timeout: float, pool_size: int, models_base_url: str, -) -> dict: +) -> dict[str, object]: if pool_size <= 0: raise ValueError("pool_size must be > 0") @@ -186,6 +182,7 @@ def run_model( "throughput": throughput, "p50": stats.percentile(50), "p95": stats.percentile(95), + "p99": stats.percentile(99), } @@ -278,17 +275,24 @@ def main() -> None: f"{name} stats: ok={result['ok']} fail_503={result['fail_503']} " f"fail_other={result['fail_other']} elapsed={result['elapsed_s']:.2f}s " f"img/s={result['throughput']:.2f} p50={result['p50']:.3f}s " - f"p95={result['p95']:.3f}s" + f"p95={result['p95']:.3f}s p99={result['p99']:.3f}s" ) print("Summary") - print("name".ljust(28), "img/s".rjust(10), "p50".rjust(10), "p95".rjust(10)) + print( + "name".ljust(28), + "img/s".rjust(10), + "p50".rjust(10), + "p95".rjust(10), + "p99".rjust(10), + ) for r in results: print( r["name"].ljust(28), f"{r['throughput']:.2f}".rjust(10), f"{r['p50']:.3f}".rjust(10), f"{r['p95']:.3f}".rjust(10), + f"{r['p99']:.3f}".rjust(10), ) From afbcab6e15b080ba969646b63f2054c595911bed Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 31 May 2026 13:41:28 +0200 Subject: [PATCH 32/35] url fix Co-authored-by: Copilot --- helm/rayservice/applications/test-runner.yaml | 2 +- .../rayservice/applications/throughput-test.yaml | 2 +- tests/model_snapshots/_shared.py | 2 +- tests/model_snapshots/generate_references.py | 16 ++++++++-------- tests/perf_throughput.py | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/helm/rayservice/applications/test-runner.yaml b/helm/rayservice/applications/test-runner.yaml index 3aad245..765c0b4 100644 --- a/helm/rayservice/applications/test-runner.yaml +++ b/helm/rayservice/applications/test-runner.yaml @@ -2,7 +2,7 @@ import_path: builders.test_runner:app route_prefix: /run-tests runtime_env: - working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/tests.zip?v10 + working_dir: https://github.com/RationAI/model-service/archive/refs/heads/main.zip pip: - git+https://github.com/RationAI/rationai-sdk-python.git deployments: diff --git a/helm/rayservice/applications/throughput-test.yaml b/helm/rayservice/applications/throughput-test.yaml index 6bb045a..a056a56 100644 --- a/helm/rayservice/applications/throughput-test.yaml +++ b/helm/rayservice/applications/throughput-test.yaml @@ -2,7 +2,7 @@ import_path: builders.throughput_runner:app route_prefix: /run-throughput runtime_env: - working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/tests.zip?v1 + working_dir: https://github.com/RationAI/model-service/archive/refs/heads/main.zip pip: - git+https://github.com/RationAI/rationai-sdk-python.git deployments: diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index c3bd2dd..122f2f7 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -13,7 +13,7 @@ def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 8078bdf..4543302 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -11,21 +11,21 @@ OUT_DIR = Path("/mnt/test_refs") MODELS_BASE_URL = os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) BINARY_POSITIVE_THRESHOLD = 0.5 # Keep only one active case here. Store other candidate slides in new_images.txt # and swap them in when you want to regenerate a different reference. ACTIVE_CASE = { - "label": "colorectum_kos04", - "slide_path": "/mnt/data/MOU/colorectum/tissue_microarray/he/KOS04.mrxs", - "model_id": "episeg-1", - "type": "semantic", - "tile_size": 1024, + "label": "prov-gigapath", + "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", + "model_id": "prov-gigapath", + "type": "embed", + "tile_size": 224, "level": 0, - "x": 46000, - "y": 82400, + "x": 40000, + "y": 70000, } CASES = [ACTIVE_CASE] diff --git a/tests/perf_throughput.py b/tests/perf_throughput.py index be68c4a..06fa89e 100644 --- a/tests/perf_throughput.py +++ b/tests/perf_throughput.py @@ -41,7 +41,7 @@ def percentile(self, p: float) -> float: def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) From 5bbc9b51edded9af6dab625604d9942fc4547fb6 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 31 May 2026 13:52:01 +0200 Subject: [PATCH 33/35] fix: uv lock and mypy --- builders/test_runner.py | 3 +- builders/throughput_runner.py | 2 +- tests/model_snapshots/generate_references.py | 31 +++++++++------- .../test_binary_classifier_model_snapshot.py | 20 +++++++++-- .../test_prov_gigapath_model_snapshot.py | 2 +- ...st_semantic_segmentation_model_snapshot.py | 2 +- .../test_virchow2_model_snapshot.py | 2 +- tests/perf_throughput.py | 21 +++++++++-- uv.lock | 36 +++++++++++++++++++ 9 files changed, 95 insertions(+), 24 deletions(-) diff --git a/builders/test_runner.py b/builders/test_runner.py index 3baf09f..d98c97a 100644 --- a/builders/test_runner.py +++ b/builders/test_runner.py @@ -19,7 +19,6 @@ def __init__(self) -> None: @fastapi.post("/") def run(self) -> Response: - result = subprocess.run( [ sys.executable, @@ -42,4 +41,4 @@ def run(self) -> Response: return Response(content=output, media_type="text/plain") -app = TestRunner.bind() +app = TestRunner.bind() # type: ignore[attr-defined] diff --git a/builders/throughput_runner.py b/builders/throughput_runner.py index acb79b9..976d596 100644 --- a/builders/throughput_runner.py +++ b/builders/throughput_runner.py @@ -55,4 +55,4 @@ def run( return Response(content=output, media_type="text/plain") -app = ThroughputRunner.bind() +app = ThroughputRunner.bind() # type: ignore[attr-defined] diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 4543302..99ed0ee 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -1,6 +1,7 @@ import json import os from pathlib import Path +from typing import TypedDict import numpy as np from rationai import Client @@ -11,13 +12,25 @@ OUT_DIR = Path("/mnt/test_refs") MODELS_BASE_URL = os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) BINARY_POSITIVE_THRESHOLD = 0.5 + +class CaseConfig(TypedDict): + label: str + slide_path: str + model_id: str + type: str + tile_size: int + level: int + x: int + y: int + + # Keep only one active case here. Store other candidate slides in new_images.txt # and swap them in when you want to regenerate a different reference. -ACTIVE_CASE = { +ACTIVE_CASE: CaseConfig = { "label": "prov-gigapath", "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", "model_id": "prov-gigapath", @@ -28,7 +41,7 @@ "y": 70000, } -CASES = [ACTIVE_CASE] +CASES: list[CaseConfig] = [ACTIVE_CASE] def generate_references() -> None: @@ -55,9 +68,7 @@ def generate_references() -> None: try: if mtype == "binary": score = float( - client.models.classify_image( - model=model_id, image=tile, timeout=600 - ) + client.models.classify_image(model=model_id, image=tile) ) out_file = OUT_DIR / f"{label}_{model_id}_expected.json" with out_file.open("w") as f: @@ -82,9 +93,7 @@ def generate_references() -> None: elif mtype == "semantic": arr = np.asarray( - client.models.segment_image( - model=model_id, image=tile, timeout=1200 - ) + client.models.segment_image(model=model_id, image=tile) ) out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" np.save(out_file, arr) @@ -92,9 +101,7 @@ def generate_references() -> None: elif mtype == "embed": arr = np.asarray( - client.models.embed_image( - model=model_id, image=tile, timeout=1200 - ) + client.models.embed_image(model=model_id, image=tile) ) out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" np.save(out_file, arr) diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index ca9d8ea..dde86eb 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -9,25 +9,27 @@ BINARY_POSITIVE_THRESHOLD = 0.5 -@pytest.mark.parametrize( - "label, slide_path, x, y", +@pytest.mark.parametrize( # type: ignore[untyped-decorator] + "label, slide_path, x, y, is_positive", [ ( "prostate_positive", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs", 43390, 45865, + True, ), ( "prostate_negative", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", 32950, 108990, + False, ), ], ) def test_prostate_classifier_snapshot( - label: str, slide_path: str, x: int, y: int + label: str, slide_path: str, x: int, y: int, is_positive: bool ) -> None: model_id = "prostate-classifier-1" json_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.json") @@ -39,6 +41,18 @@ def test_prostate_classifier_snapshot( threshold = reference.get("threshold", BINARY_POSITIVE_THRESHOLD) expected_is_positive = reference.get("expected_is_positive") assert expected_is_positive is not None + assert expected_is_positive == is_positive + + if is_positive: + assert expected_score >= threshold, ( + f"Reference score {expected_score:.4f} is below positive threshold {threshold:.3f} — " + "was the reference generated on the correct tile?" + ) + else: + assert expected_score < threshold, ( + f"Reference score {expected_score:.4f} is above negative threshold {threshold:.3f} — " + "was the reference generated on the correct tile?" + ) run_binary_classifier_case( model_id=model_id, diff --git a/tests/model_snapshots/test_prov_gigapath_model_snapshot.py b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py index 9f6aad6..0df6f49 100644 --- a/tests/model_snapshots/test_prov_gigapath_model_snapshot.py +++ b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py @@ -5,7 +5,7 @@ from tests.model_snapshots._shared import run_embed_case -@pytest.mark.parametrize( +@pytest.mark.parametrize( # type: ignore[untyped-decorator] "label, slide_path, x, y", [ ( diff --git a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py index 8ff06cc..50b638c 100644 --- a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py +++ b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py @@ -5,7 +5,7 @@ from tests.model_snapshots._shared import run_semantic_segmentation_case -@pytest.mark.parametrize( +@pytest.mark.parametrize( # type: ignore[untyped-decorator] "label, slide_path, x, y", [ ( diff --git a/tests/model_snapshots/test_virchow2_model_snapshot.py b/tests/model_snapshots/test_virchow2_model_snapshot.py index 4714005..005cad1 100644 --- a/tests/model_snapshots/test_virchow2_model_snapshot.py +++ b/tests/model_snapshots/test_virchow2_model_snapshot.py @@ -5,7 +5,7 @@ from tests.model_snapshots._shared import run_embed_case -@pytest.mark.parametrize( +@pytest.mark.parametrize( # type: ignore[untyped-decorator] "label, slide_path, x, y", [ ( diff --git a/tests/perf_throughput.py b/tests/perf_throughput.py index 06fa89e..e76ba04 100644 --- a/tests/perf_throughput.py +++ b/tests/perf_throughput.py @@ -6,6 +6,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from threading import Lock +from typing import TypedDict import numpy as np from rationai import Client @@ -20,6 +21,20 @@ POOL_SIZE_DEFAULT = 64 +class ModelResult(TypedDict): + name: str + model_type: str + tile_size: int + elapsed_s: float + ok: int + fail_503: int + fail_other: int + throughput: float + p50: float + p95: float + p99: float + + @dataclass class Stats: ok: int = 0 @@ -41,7 +56,7 @@ def percentile(self, p: float) -> float: def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) @@ -143,7 +158,7 @@ def run_model( timeout: float, pool_size: int, models_base_url: str, -) -> dict[str, object]: +) -> ModelResult: if pool_size <= 0: raise ValueError("pool_size must be > 0") @@ -248,7 +263,7 @@ def main() -> None: print(f"Timeout: {args.timeout}s") print() - results = [] + results: list[ModelResult] = [] for name, model_type, tile_size in models: if args.wait_ready: wait_for_ready( diff --git a/uv.lock b/uv.lock index 3ac8fef..ecad61f 100644 --- a/uv.lock +++ b/uv.lock @@ -1235,6 +1235,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "itsdangerous" version = "2.2.0" @@ -1777,6 +1786,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "mypy" }, + { name = "pytest" }, { name = "ruff" }, ] docs = [ @@ -1798,6 +1808,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "mypy", specifier = ">=1.18.2" }, + { name = "pytest", specifier = ">=8.4.2" }, { name = "ruff", specifier = ">=0.14.6" }, ] docs = [ @@ -2570,6 +2581,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" }, ] +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + [[package]] name = "prometheus-client" version = "0.25.0" @@ -2983,6 +3003,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/73/a7141a1a0559bf1a7aa42a11c879ceb19f02f5c6c371c6d57fd86cefd4d1/pyproj-3.7.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d9d25bae416a24397e0d85739f84d323b55f6511e45a522dd7d7eae70d10c7e4", size = 6391844, upload-time = "2025-08-14T12:05:40.745Z" }, ] +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" From ab16f2379f2cc01fe3bc070d0f8f1357005f759c Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 31 May 2026 14:10:05 +0200 Subject: [PATCH 34/35] review fixes --- tests/model_snapshots/_shared.py | 18 ++++++---- tests/model_snapshots/generate_references.py | 34 +++++++------------ .../test_binary_classifier_model_snapshot.py | 25 +++----------- .../test_prov_gigapath_model_snapshot.py | 8 ++--- ...st_semantic_segmentation_model_snapshot.py | 8 ++--- .../test_virchow2_model_snapshot.py | 8 ++--- tests/perf_throughput.py | 21 +++--------- 7 files changed, 42 insertions(+), 80 deletions(-) diff --git a/tests/model_snapshots/_shared.py b/tests/model_snapshots/_shared.py index 122f2f7..ed40070 100644 --- a/tests/model_snapshots/_shared.py +++ b/tests/model_snapshots/_shared.py @@ -13,10 +13,14 @@ def _models_base_url() -> str: return os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", - "http://rayservice-model-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", + "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) +def test_refs_dir() -> Path: + return Path(os.environ.get("MODEL_SERVICE_TEST_REFS_DIR", "/mnt/test_refs")) + + def _read_tile_at( slide_path: str, x: int, y: int, tile_size: int, level: int ) -> NDArray[np.uint8]: @@ -99,6 +103,9 @@ def run_semantic_segmentation_case( with Client(models_base_url=_models_base_url(), timeout=timeout_s) as client: actual = np.asarray(client.models.segment_image(model=model_id, image=tile)) + if actual.shape != expected.shape: + pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") + max_diff = np.abs(actual.astype(np.float32) - expected.astype(np.float32)).max() if actual.ndim == 4: @@ -115,9 +122,6 @@ def run_semantic_segmentation_case( frac_05 = float((stats_slice >= 0.5).mean()) name = case_name or "case" - if actual.shape != expected.shape: - pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") - close_mask = np.isclose(actual, expected, rtol=rtol, atol=atol) if not close_mask.all(): mismatch_fraction = float((~close_mask).mean()) @@ -183,6 +187,9 @@ def run_embed_case( .astype(np.float32) ) + if actual.shape != expected.shape: + pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") + similarity = float( np.dot(actual, expected) / (np.linalg.norm(actual) * np.linalg.norm(expected)) ) @@ -190,9 +197,6 @@ def run_embed_case( expected_norm = float(np.linalg.norm(expected)) name = case_name or "case" - if actual.shape != expected.shape: - pytest.fail(f"Shape mismatch: expected={expected.shape}, actual={actual.shape}") - if similarity < min_cosine_similarity: pytest.fail( f"Embedding similarity too low: {similarity:.6f} < {min_cosine_similarity}" diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 99ed0ee..001ca5b 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -1,36 +1,22 @@ import json import os -from pathlib import Path -from typing import TypedDict import numpy as np from rationai import Client -from tests.model_snapshots._shared import _read_tile_at +from tests.model_snapshots._shared import _read_tile_at, test_refs_dir -OUT_DIR = Path("/mnt/test_refs") +OUT_DIR = test_refs_dir() MODELS_BASE_URL = os.environ.get( "MODEL_SERVICE_MODELS_BASE_URL", "http://rayservice-model-tests-serve-svc.rationai-jobs-ns.svc.cluster.local:8000", ) BINARY_POSITIVE_THRESHOLD = 0.5 - -class CaseConfig(TypedDict): - label: str - slide_path: str - model_id: str - type: str - tile_size: int - level: int - x: int - y: int - - # Keep only one active case here. Store other candidate slides in new_images.txt # and swap them in when you want to regenerate a different reference. -ACTIVE_CASE: CaseConfig = { +ACTIVE_CASE = { "label": "prov-gigapath", "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", "model_id": "prov-gigapath", @@ -41,7 +27,7 @@ class CaseConfig(TypedDict): "y": 70000, } -CASES: list[CaseConfig] = [ACTIVE_CASE] +CASES = [ACTIVE_CASE] def generate_references() -> None: @@ -68,7 +54,9 @@ def generate_references() -> None: try: if mtype == "binary": score = float( - client.models.classify_image(model=model_id, image=tile) + client.models.classify_image( + model=model_id, image=tile, timeout=600 + ) ) out_file = OUT_DIR / f"{label}_{model_id}_expected.json" with out_file.open("w") as f: @@ -93,7 +81,9 @@ def generate_references() -> None: elif mtype == "semantic": arr = np.asarray( - client.models.segment_image(model=model_id, image=tile) + client.models.segment_image( + model=model_id, image=tile, timeout=1200 + ) ) out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" np.save(out_file, arr) @@ -101,7 +91,9 @@ def generate_references() -> None: elif mtype == "embed": arr = np.asarray( - client.models.embed_image(model=model_id, image=tile) + client.models.embed_image( + model=model_id, image=tile, timeout=1200 + ) ) out_file = OUT_DIR / f"{label}_{model_id}_expected.npy" np.save(out_file, arr) diff --git a/tests/model_snapshots/test_binary_classifier_model_snapshot.py b/tests/model_snapshots/test_binary_classifier_model_snapshot.py index dde86eb..fcbef95 100644 --- a/tests/model_snapshots/test_binary_classifier_model_snapshot.py +++ b/tests/model_snapshots/test_binary_classifier_model_snapshot.py @@ -1,38 +1,35 @@ import json -from pathlib import Path import pytest -from tests.model_snapshots._shared import run_binary_classifier_case +from tests.model_snapshots._shared import run_binary_classifier_case, test_refs_dir BINARY_POSITIVE_THRESHOLD = 0.5 -@pytest.mark.parametrize( # type: ignore[untyped-decorator] - "label, slide_path, x, y, is_positive", +@pytest.mark.parametrize( + "label, slide_path, x, y", [ ( "prostate_positive", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_2386-06-1.mrxs", 43390, 45865, - True, ), ( "prostate_negative", "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_0845-02-0.mrxs", 32950, 108990, - False, ), ], ) def test_prostate_classifier_snapshot( - label: str, slide_path: str, x: int, y: int, is_positive: bool + label: str, slide_path: str, x: int, y: int ) -> None: model_id = "prostate-classifier-1" - json_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.json") + json_path = test_refs_dir() / f"{label}_{model_id}_expected.json" with json_path.open() as f: reference = json.load(f) @@ -41,18 +38,6 @@ def test_prostate_classifier_snapshot( threshold = reference.get("threshold", BINARY_POSITIVE_THRESHOLD) expected_is_positive = reference.get("expected_is_positive") assert expected_is_positive is not None - assert expected_is_positive == is_positive - - if is_positive: - assert expected_score >= threshold, ( - f"Reference score {expected_score:.4f} is below positive threshold {threshold:.3f} — " - "was the reference generated on the correct tile?" - ) - else: - assert expected_score < threshold, ( - f"Reference score {expected_score:.4f} is above negative threshold {threshold:.3f} — " - "was the reference generated on the correct tile?" - ) run_binary_classifier_case( model_id=model_id, diff --git a/tests/model_snapshots/test_prov_gigapath_model_snapshot.py b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py index 0df6f49..f206d19 100644 --- a/tests/model_snapshots/test_prov_gigapath_model_snapshot.py +++ b/tests/model_snapshots/test_prov_gigapath_model_snapshot.py @@ -1,11 +1,9 @@ -from pathlib import Path - import pytest -from tests.model_snapshots._shared import run_embed_case +from tests.model_snapshots._shared import run_embed_case, test_refs_dir -@pytest.mark.parametrize( # type: ignore[untyped-decorator] +@pytest.mark.parametrize( "label, slide_path, x, y", [ ( @@ -18,7 +16,7 @@ ) def test_prov_gigapath(label: str, slide_path: str, x: int, y: int) -> None: model_id = "prov-gigapath" - expected_array_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.npy") + expected_array_path = test_refs_dir() / f"{label}_{model_id}_expected.npy" run_embed_case( model_id=model_id, diff --git a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py index 50b638c..7022871 100644 --- a/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py +++ b/tests/model_snapshots/test_semantic_segmentation_model_snapshot.py @@ -1,11 +1,9 @@ -from pathlib import Path - import pytest -from tests.model_snapshots._shared import run_semantic_segmentation_case +from tests.model_snapshots._shared import run_semantic_segmentation_case, test_refs_dir -@pytest.mark.parametrize( # type: ignore[untyped-decorator] +@pytest.mark.parametrize( "label, slide_path, x, y", [ ( @@ -18,7 +16,7 @@ ) def test_semantic_episeg(label: str, slide_path: str, x: int, y: int) -> None: model_id = "episeg-1" - expected_array_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.npy") + expected_array_path = test_refs_dir() / f"{label}_{model_id}_expected.npy" run_semantic_segmentation_case( model_id=model_id, diff --git a/tests/model_snapshots/test_virchow2_model_snapshot.py b/tests/model_snapshots/test_virchow2_model_snapshot.py index 005cad1..8fa79d8 100644 --- a/tests/model_snapshots/test_virchow2_model_snapshot.py +++ b/tests/model_snapshots/test_virchow2_model_snapshot.py @@ -1,11 +1,9 @@ -from pathlib import Path - import pytest -from tests.model_snapshots._shared import run_embed_case +from tests.model_snapshots._shared import run_embed_case, test_refs_dir -@pytest.mark.parametrize( # type: ignore[untyped-decorator] +@pytest.mark.parametrize( "label, slide_path, x, y", [ ( @@ -18,7 +16,7 @@ ) def test_virchow2(label: str, slide_path: str, x: int, y: int) -> None: model_id = "virchow2" - expected_array_path = Path(f"/mnt/test_refs/{label}_{model_id}_expected.npy") + expected_array_path = test_refs_dir() / f"{label}_{model_id}_expected.npy" run_embed_case( model_id=model_id, diff --git a/tests/perf_throughput.py b/tests/perf_throughput.py index e76ba04..037f78d 100644 --- a/tests/perf_throughput.py +++ b/tests/perf_throughput.py @@ -6,7 +6,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from threading import Lock -from typing import TypedDict import numpy as np from rationai import Client @@ -21,20 +20,6 @@ POOL_SIZE_DEFAULT = 64 -class ModelResult(TypedDict): - name: str - model_type: str - tile_size: int - elapsed_s: float - ok: int - fail_503: int - fail_other: int - throughput: float - p50: float - p95: float - p99: float - - @dataclass class Stats: ok: int = 0 @@ -102,6 +87,8 @@ def wait_for_ready( print(f"{model_id} ready after {time.perf_counter() - start:.1f}s") return except Exception as exc: + if isinstance(exc, ValueError): + raise status_code = getattr(getattr(exc, "response", None), "status_code", None) if status_code not in (None, 503, 504): raise @@ -158,7 +145,7 @@ def run_model( timeout: float, pool_size: int, models_base_url: str, -) -> ModelResult: +) -> dict[str, object]: if pool_size <= 0: raise ValueError("pool_size must be > 0") @@ -263,7 +250,7 @@ def main() -> None: print(f"Timeout: {args.timeout}s") print() - results: list[ModelResult] = [] + results = [] for name, model_type, tile_size in models: if args.wait_ready: wait_for_ready( From 11c1dcd36f286e9ddb0a6fa404f810091ad54da3 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 31 May 2026 14:13:30 +0200 Subject: [PATCH 35/35] new classes --- tests/model_snapshots/generate_references.py | 17 +++++++++++++++-- tests/perf_throughput.py | 19 +++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/model_snapshots/generate_references.py b/tests/model_snapshots/generate_references.py index 001ca5b..24c49b5 100644 --- a/tests/model_snapshots/generate_references.py +++ b/tests/model_snapshots/generate_references.py @@ -1,5 +1,6 @@ import json import os +from typing import TypedDict import numpy as np from rationai import Client @@ -14,9 +15,21 @@ ) BINARY_POSITIVE_THRESHOLD = 0.5 + +class CaseConfig(TypedDict): + label: str + slide_path: str + model_id: str + type: str + tile_size: int + level: int + x: int + y: int + + # Keep only one active case here. Store other candidate slides in new_images.txt # and swap them in when you want to regenerate a different reference. -ACTIVE_CASE = { +ACTIVE_CASE: CaseConfig = { "label": "prov-gigapath", "slide_path": "/mnt/data/MOU/prostate/tile_level_annotations/P-2016_1367-01-0.mrxs", "model_id": "prov-gigapath", @@ -27,7 +40,7 @@ "y": 70000, } -CASES = [ACTIVE_CASE] +CASES: list[CaseConfig] = [ACTIVE_CASE] def generate_references() -> None: diff --git a/tests/perf_throughput.py b/tests/perf_throughput.py index 037f78d..2be004d 100644 --- a/tests/perf_throughput.py +++ b/tests/perf_throughput.py @@ -6,6 +6,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from threading import Lock +from typing import TypedDict import numpy as np from rationai import Client @@ -20,6 +21,20 @@ POOL_SIZE_DEFAULT = 64 +class ModelResult(TypedDict): + name: str + model_type: str + tile_size: int + elapsed_s: float + ok: int + fail_503: int + fail_other: int + throughput: float + p50: float + p95: float + p99: float + + @dataclass class Stats: ok: int = 0 @@ -145,7 +160,7 @@ def run_model( timeout: float, pool_size: int, models_base_url: str, -) -> dict[str, object]: +) -> ModelResult: if pool_size <= 0: raise ValueError("pool_size must be > 0") @@ -250,7 +265,7 @@ def main() -> None: print(f"Timeout: {args.timeout}s") print() - results = [] + results: list[ModelResult] = [] for name, model_type, tile_size in models: if args.wait_ready: wait_for_ready(