benevolentbandwidth · HassanDawy · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
diff --git a/demos/pipeline_diagram.png b/demos/pipeline_diagram.png
diff --git a/demos/pipeline_diagram.py b/demos/pipeline_diagram.py
@@ -0,0 +1,102 @@
+"""Render an end-to-end architecture diagram of the Second Look pipeline.
+
+This is a factual flow of the system as it exists in the code today:
+GCS -> retriever -> manifest -> preprocess -> baseline model -> checkpoint ->
+evaluate / tier UX. The on-device TF Lite target is drawn as a future step.
+
+Usage:
+    python demos/pipeline_diagram.py [--out PATH]
+"""
+import argparse
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from matplotlib.patches import FancyBboxPatch, FancyArrowPatch
+
+REPO = Path(__file__).resolve().parents[1]
+
+# (title, subtitle, source file) per stage, grouped into lanes.
+LANES = [
+    ("DATA", "#1565c0", [
+        ("Google Cloud Storage", "CBIS-DDSM (RSNA, VinDr wired)", "gs://b2-foundation"),
+        ("Retriever", "download CSV + PNG, local cache\n(skip-if-cached)", "retriever.py"),
+        ("Manifest builder", "label -> WORTH / NOT WORTH,\npatient-disjoint splits", "manifest.py -> manifest.csv"),
+    ]),
+    ("PREPROCESS", "#00838f", [
+        ("Quality gate", "reject blank / low-contrast /\nlow-resolution inputs", "quality.py"),
+        ("Preprocessor", "grayscale -> CLAHE -> breast mask\n-> pectoral removal -> orient -> 224x224", "preprocessor.py"),
+    ]),
+    ("MODEL", "#6a1b9a", [
+        ("Baseline classifier", "1x1 conv -> MobileNetV2 (frozen)\n-> GAP -> dropout -> sigmoid", "baseline_classifier.py"),
+        ("Training", "tf.data + class weighting,\nbest checkpoint by val AUC", "train.py -> best.keras"),
+    ]),
+    ("EVALUATE / UX", "#2e7d32", [
+        ("Evaluation", "sensitivity-first; WORTH floor 0.80;\nconfusion matrix", "evaluate.py"),
+        ("Result + tiers", "Worth / Not worth a second look;\nLow / Moderate / Elevated", "label_mapper.py"),
+        ("On-device (next)", "TF Lite, runs on phone/browser;\nstores & transmits nothing", "future"),
+    ]),
+]
+
+
+def _box(ax, x, y, w, h, title, subtitle, src, color):
+    ax.add_patch(FancyBboxPatch(
+        (x, y), w, h, boxstyle="round,pad=0.02,rounding_size=0.06",
+        linewidth=1.6, edgecolor=color, facecolor="white", zorder=2))
+    ax.text(x + w / 2, y + h - 0.16, title, ha="center", va="top",
+            fontsize=10.5, fontweight="bold", color=color, zorder=3)
+    ax.text(x + w / 2, y + h - 0.40, subtitle, ha="center", va="top",
+            fontsize=8.0, color="#333", zorder=3)
+    ax.text(x + w / 2, y + 0.07, src, ha="center", va="bottom",
+            fontsize=7.0, style="italic", color="#888", zorder=3)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--out", default=str(REPO / "demos" / "pipeline_diagram.png"))
+    args = ap.parse_args()
+
+    box_w, box_h, gap_y = 3.4, 1.15, 0.45
+    lane_gap = 0.7
+    max_rows = max(len(items) for _, _, items in LANES)
+
+    fig_w = len(LANES) * (box_w + lane_gap)
+    fig_h = max_rows * (box_h + gap_y) + 1.2
+    fig, ax = plt.subplots(figsize=(fig_w, fig_h))
+    ax.set_xlim(0, fig_w)
+    ax.set_ylim(0, fig_h)
+    ax.axis("off")
+    fig.suptitle("Second Look - end-to-end pipeline", fontsize=15, fontweight="bold")
+
+    centers = {}  # (lane_idx, row_idx) -> (cx_top, cx_bottom anchors)
+    for li, (lane, color, items) in enumerate(LANES):
+        x = li * (box_w + lane_gap) + 0.35
+        ax.text(x + box_w / 2, fig_h - 0.55, lane, ha="center", va="center",
+                fontsize=11, fontweight="bold", color=color)
+        for ri, (title, subtitle, src) in enumerate(items):
+            y = fig_h - 1.2 - (ri + 1) * (box_h + gap_y) + gap_y
+            _box(ax, x, y, box_w, box_h, title, subtitle, src, color)
+            centers[(li, ri)] = (x, y, box_w, box_h)
+            # Vertical arrow within a lane.
+            if ri > 0:
+                px, py, pw, ph = centers[(li, ri - 1)]
+                ax.add_patch(FancyArrowPatch(
+                    (px + pw / 2, py), (x + box_w / 2, y + box_h),
+                    arrowstyle="-|>", mutation_scale=14, color="#999", zorder=1))
+        # Horizontal arrow to next lane (from last box of this lane to first of next).
+        if li < len(LANES) - 1:
+            lx, ly, lw, lh = centers[(li, len(items) - 1)]
+            nx = (li + 1) * (box_w + lane_gap) + 0.35
+            ny0 = fig_h - 1.2 - (box_h + gap_y) + gap_y  # first row y of next lane
+            ax.add_patch(FancyArrowPatch(
+                (lx + lw, ly + lh / 2), (nx, ny0 + box_h / 2),
+                arrowstyle="-|>", mutation_scale=16, color="#555",
+                connectionstyle="arc3,rad=0.0", zorder=1))
+
+    fig.savefig(args.out, dpi=140, bbox_inches="tight")
+    print(f"Wrote {args.out}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/demos/preprocessing_grid.png b/demos/preprocessing_grid.png
diff --git a/demos/preprocessing_grid.py b/demos/preprocessing_grid.py
@@ -0,0 +1,97 @@
+"""Generate a before/after preprocessing panel for the demo.
+
+Renders each stage of data_pipeline.preprocessor on a real CBIS-DDSM scan:
+raw -> grayscale -> CLAHE -> breast mask -> masked -> pectoral-removed ->
+orientation-normalized -> final 224x224 model input.
+
+Usage:
+    python demos/preprocessing_grid.py [--case CASE_FOLDER] [--out PATH]
+
+Reads from data/manifest.csv (cached images only). Writes a PNG suitable for
+a slide.
+"""
+import argparse
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+if str(REPO) not in sys.path:
+    sys.path.insert(0, str(REPO))
+
+import cv2
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import pandas as pd
+
+from config.constants import INPUT_SIZE
+from data_pipeline import preprocessor as pp
+from data_pipeline._imaging_utils import breast_mask, to_grayscale
+
+
+def pick_case(manifest_path: Path, case: str | None) -> pd.Series:
+    m = pd.read_csv(manifest_path)
+    has_img = ~(m["image_local_path"].isna()
+                | (m["image_local_path"].astype(str).str.strip() == ""))
+    m = m[has_img].copy()
+    if case:
+        sel = m[m["case_folder"] == case]
+        if sel.empty:
+            raise SystemExit(f"Case {case!r} not found among cached images.")
+        return sel.iloc[0]
+    # Default: a positive MLO case (pectoral triangle visible).
+    mlo_pos = m[m["case_folder"].str.contains("MLO", case=False)
+                & (m["canonical_label"] == 1)]
+    return (mlo_pos if not mlo_pos.empty else m).iloc[0]
+
+
+def build_stages(path: str):
+    raw = pp.load_image(path)
+    gray = to_grayscale(raw)
+    clahe = pp._apply_clahe(gray)
+    mask = breast_mask(clahe)
+    masked = cv2.bitwise_and(clahe, clahe, mask=mask)
+    no_pec = pp._remove_pectoral(masked, mask)
+    oriented = pp._normalize_orientation(no_pec, mask)
+    final = cv2.resize(oriented, INPUT_SIZE, interpolation=cv2.INTER_AREA)
+    return [
+        ("1. Raw scan", raw),
+        ("2. Grayscale", gray),
+        ("3. CLAHE contrast", clahe),
+        ("4. Breast mask", mask),
+        ("5. Background removed", masked),
+        ("6. Pectoral removed", no_pec),
+        ("7. Orientation normalized", oriented),
+        (f"8. Model input {INPUT_SIZE[0]}x{INPUT_SIZE[1]}", final),
+    ]
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--case", default=None)
+    ap.add_argument("--manifest", default=str(REPO / "data" / "manifest.csv"))
+    ap.add_argument("--out", default=str(REPO / "demos" / "preprocessing_grid.png"))
+    args = ap.parse_args()
+
+    row = pick_case(Path(args.manifest), args.case)
+    label = "WORTH_SECOND_LOOK" if int(row["canonical_label"]) == 1 else "NOT_WORTH_SECOND_LOOK"
+    stages = build_stages(row["image_local_path"])
+
+    fig, axes = plt.subplots(2, 4, figsize=(16, 8.5))
+    fig.suptitle(
+        f"Second Look — preprocessing pipeline\n"
+        f"{row['case_folder']}   (label: {label})",
+        fontsize=15, fontweight="bold",
+    )
+    for ax, (title, img) in zip(axes.ravel(), stages):
+        ax.imshow(img, cmap="gray")
+        ax.set_title(title, fontsize=11)
+        ax.axis("off")
+    fig.tight_layout(rect=(0, 0, 1, 0.94))
+    fig.savefig(args.out, dpi=130, bbox_inches="tight")
+    print(f"Wrote {args.out}")
+    print(f"Case: {row['case_folder']}  label: {label}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/demos/second_look_app.py b/demos/second_look_app.py
@@ -0,0 +1,140 @@
+"""Second Look — demo UI (plumbing prototype).
+
+Single-screen Gradio app that demonstrates the end-to-end on-device flow:
+    image -> preprocessing -> binary model -> confidence -> concern tier.
+
+IMPORTANT: the bundled checkpoint is a 1-epoch smoke model. Predictions are
+PLACEHOLDERS and carry no clinical meaning. The banner in the UI says so; do
+not remove it. This app exists to show the pipeline + tier UX, not performance.
+
+Run:
+    python demos/second_look_app.py
+then open the printed http://127.0.0.1:7860 URL.
+"""
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+if str(REPO) not in sys.path:
+    sys.path.insert(0, str(REPO))
+
+import numpy as np
+import pandas as pd
+import gradio as gr
+import tensorflow as tf
+
+from data_pipeline.preprocessor import preprocess, load_image
+from data_pipeline.label_mapper import confidence_to_tier, display_label
+
+# Prefer the larger overnight checkpoint if present, else the 1-epoch smoke
+# model. Either way the model is a frozen-head baseline — predictions remain
+# placeholders and the UI banner says so.
+_CKPT_CANDIDATES = [
+    REPO / "modeling" / "checkpoints" / "overnight" / "best.keras",
+    REPO / "modeling" / "checkpoints" / "smoke" / "best.keras",
+]
+CKPT = next((p for p in _CKPT_CANDIDATES if p.exists()), _CKPT_CANDIDATES[-1])
+MANIFEST = REPO / "data" / "manifest.csv"
+
+TIER_COLORS = {"Low": "#2e7d32", "Moderate": "#f9a825", "Elevated": "#c62828"}
+
+# Binary decision threshold on P(WORTH_SECOND_LOOK). 0.5 is the default
+# operating point; the real threshold will be tuned to the sensitivity floor
+# once the evaluation protocol is locked.
+WORTH_THRESHOLD = 0.5
+
+_model = tf.keras.models.load_model(str(CKPT)) if CKPT.exists() else None
+
+
+def _sample_choices() -> dict[str, str]:
+    """Map a human label -> image path for cached manifest rows."""
+    if not MANIFEST.exists():
+        return {}
+    m = pd.read_csv(MANIFEST)
+    has = ~(m["image_local_path"].isna()
+            | (m["image_local_path"].astype(str).str.strip() == ""))
+    m = m[has]
+    out = {}
+    for _, r in m.head(20).iterrows():
+        truth = "WORTH" if int(r["canonical_label"]) == 1 else "NOT WORTH"
+        out[f"{r['case_folder']}  (truth: {truth})"] = r["image_local_path"]
+    return out
+
+
+SAMPLES = _sample_choices()
+
+
+def _result_html(prob: float) -> str:
+    """Render the binary verdict (primary) plus the concern tier (supporting)."""
+    worth = prob >= WORTH_THRESHOLD
+    if worth:
+        verdict, vcolor, vicon = "Worth a second look", "#c62828", "⚠️"
+    else:
+        verdict, vcolor, vicon = "Not worth a second look", "#2e7d32", "✅"
+
+    tier = confidence_to_tier(prob)
+    tier_color = TIER_COLORS[tier]
+
+    return (
+        # Primary: the binary classification, stated plainly.
+        f"<div style='text-align:center;padding:22px;border-radius:14px;"
+        f"background:{vcolor};color:white;font-family:sans-serif;'>"
+        f"<div style='font-size:13px;letter-spacing:1px;opacity:0.9;'>RESULT</div>"
+        f"<div style='font-size:32px;font-weight:800;margin-top:6px;'>"
+        f"{vicon} {verdict}</div></div>"
+        # Supporting: the UX concern tier + the (placeholder) raw confidence.
+        f"<div style='text-align:center;padding:12px;margin-top:10px;"
+        f"border-radius:12px;border:2px solid {tier_color};color:{tier_color};"
+        f"font-family:sans-serif;'>"
+        f"<div style='font-size:13px;opacity:0.85;'>Concern tier</div>"
+        f"<div style='font-size:22px;font-weight:700;'>{display_label(tier)}</div>"
+        f"<div style='font-size:12px;color:#777;margin-top:6px;'>"
+        f"model output (placeholder): {prob:.2f}</div></div>"
+    )
+
+
+def analyze(sample_key: str, uploaded: np.ndarray | None):
+    if uploaded is not None:
+        raw = uploaded
+    elif sample_key and sample_key in SAMPLES:
+        raw = load_image(SAMPLES[sample_key])
+    else:
+        return None, "<div style='padding:18px;'>Pick a sample or upload an image.</div>"
+
+    proc = preprocess(raw)                     # (224, 224, 1) float32 [0,1]
+    disp = (proc[:, :, 0] * 255).astype(np.uint8)
+
+    if _model is None:
+        return disp, "<div style='padding:18px;color:#c62828;'>No checkpoint found.</div>"
+
+    prob = float(_model.predict(proc[None, ...], verbose=0).ravel()[0])
+    return disp, _result_html(prob)
+
+
+BANNER = (
+    "## 🔍 Second Look — pipeline prototype\n"
+    "**⚠️ Placeholder model — NOT yet trained.** The checkpoint behind this app "
+    "is a 1-epoch smoke model; tiers shown are *meaningless* and for plumbing "
+    "demonstration only. This screen shows the **preprocessing → binary model → "
+    "concern-tier UX**, not real performance. Nothing is uploaded or stored — "
+    "the target is fully on-device."
+)
+
+with gr.Blocks(title="Second Look (prototype)") as demo:
+    gr.Markdown(BANNER)
+    with gr.Row():
+        with gr.Column():
+            sample = gr.Dropdown(
+                choices=list(SAMPLES.keys()),
+                label="Sample mammogram (cached CBIS-DDSM)",
+                value=(list(SAMPLES.keys())[0] if SAMPLES else None),
+            )
+            upload = gr.Image(label="…or upload your own", type="numpy", image_mode="L")
+            run = gr.Button("Run Second Look", variant="primary")
+        with gr.Column():
+            out_img = gr.Image(label="Preprocessed model input (224×224)")
+            out_tier = gr.HTML()
+    run.click(analyze, inputs=[sample, upload], outputs=[out_img, out_tier])
+
+if __name__ == "__main__":
+    demo.launch(server_name="127.0.0.1", server_port=7860, inbrowser=False)
diff --git a/modeling/evaluate.py b/modeling/evaluate.py
@@ -125,14 +125,14 @@ def _print_results(
     threshold: float,
 ) -> None:
     print("\n" + "=" * 60)
-    print("SECOND LOOK — BASELINE EVALUATION")
+    print("SECOND LOOK - BASELINE EVALUATION")
     print("=" * 60)
 
     print(f"\nDecision threshold: {threshold:.2f}")
 
     print("\nSensitivity (Recall) per class:")
     for i, name in enumerate(LABEL_ORDER):
-        marker = " ← PRIMARY METRIC" if i == POSITIVE_CLASS_INDEX else ""
+        marker = " <-- PRIMARY METRIC" if i == POSITIVE_CLASS_INDEX else ""
         print(f"  {name:25s}: {per_class_sensitivity[i]:.3f}{marker}")
 
     floor = WORTH_SENSITIVITY_FLOOR

diff --git a/requirements.txt b/requirements.txt
@@ -8,3 +8,4 @@ matplotlib>=3.7
 pytest>=7.4
 PyYAML>=6.0.2
 google-cloud-storage>=2.18
+gradio>=6.0  # demo UI (demos/second_look_app.py)