diff --git a/demos/pipeline_diagram.png b/demos/pipeline_diagram.png new file mode 100644 index 0000000..0e454d5 Binary files /dev/null and b/demos/pipeline_diagram.png differ diff --git a/demos/pipeline_diagram.py b/demos/pipeline_diagram.py new file mode 100644 index 0000000..0f2d3e3 --- /dev/null +++ b/demos/pipeline_diagram.py @@ -0,0 +1,102 @@ +"""Render an end-to-end architecture diagram of the Second Look pipeline. + +This is a factual flow of the system as it exists in the code today: +GCS -> retriever -> manifest -> preprocess -> baseline model -> checkpoint -> +evaluate / tier UX. The on-device TF Lite target is drawn as a future step. + +Usage: + python demos/pipeline_diagram.py [--out PATH] +""" +import argparse +from pathlib import Path + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from matplotlib.patches import FancyBboxPatch, FancyArrowPatch + +REPO = Path(__file__).resolve().parents[1] + +# (title, subtitle, source file) per stage, grouped into lanes. +LANES = [ + ("DATA", "#1565c0", [ + ("Google Cloud Storage", "CBIS-DDSM (RSNA, VinDr wired)", "gs://b2-foundation"), + ("Retriever", "download CSV + PNG, local cache\n(skip-if-cached)", "retriever.py"), + ("Manifest builder", "label -> WORTH / NOT WORTH,\npatient-disjoint splits", "manifest.py -> manifest.csv"), + ]), + ("PREPROCESS", "#00838f", [ + ("Quality gate", "reject blank / low-contrast /\nlow-resolution inputs", "quality.py"), + ("Preprocessor", "grayscale -> CLAHE -> breast mask\n-> pectoral removal -> orient -> 224x224", "preprocessor.py"), + ]), + ("MODEL", "#6a1b9a", [ + ("Baseline classifier", "1x1 conv -> MobileNetV2 (frozen)\n-> GAP -> dropout -> sigmoid", "baseline_classifier.py"), + ("Training", "tf.data + class weighting,\nbest checkpoint by val AUC", "train.py -> best.keras"), + ]), + ("EVALUATE / UX", "#2e7d32", [ + ("Evaluation", "sensitivity-first; WORTH floor 0.80;\nconfusion matrix", "evaluate.py"), + ("Result + tiers", "Worth / Not worth a second look;\nLow / Moderate / Elevated", "label_mapper.py"), + ("On-device (next)", "TF Lite, runs on phone/browser;\nstores & transmits nothing", "future"), + ]), +] + + +def _box(ax, x, y, w, h, title, subtitle, src, color): + ax.add_patch(FancyBboxPatch( + (x, y), w, h, boxstyle="round,pad=0.02,rounding_size=0.06", + linewidth=1.6, edgecolor=color, facecolor="white", zorder=2)) + ax.text(x + w / 2, y + h - 0.16, title, ha="center", va="top", + fontsize=10.5, fontweight="bold", color=color, zorder=3) + ax.text(x + w / 2, y + h - 0.40, subtitle, ha="center", va="top", + fontsize=8.0, color="#333", zorder=3) + ax.text(x + w / 2, y + 0.07, src, ha="center", va="bottom", + fontsize=7.0, style="italic", color="#888", zorder=3) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--out", default=str(REPO / "demos" / "pipeline_diagram.png")) + args = ap.parse_args() + + box_w, box_h, gap_y = 3.4, 1.15, 0.45 + lane_gap = 0.7 + max_rows = max(len(items) for _, _, items in LANES) + + fig_w = len(LANES) * (box_w + lane_gap) + fig_h = max_rows * (box_h + gap_y) + 1.2 + fig, ax = plt.subplots(figsize=(fig_w, fig_h)) + ax.set_xlim(0, fig_w) + ax.set_ylim(0, fig_h) + ax.axis("off") + fig.suptitle("Second Look - end-to-end pipeline", fontsize=15, fontweight="bold") + + centers = {} # (lane_idx, row_idx) -> (cx_top, cx_bottom anchors) + for li, (lane, color, items) in enumerate(LANES): + x = li * (box_w + lane_gap) + 0.35 + ax.text(x + box_w / 2, fig_h - 0.55, lane, ha="center", va="center", + fontsize=11, fontweight="bold", color=color) + for ri, (title, subtitle, src) in enumerate(items): + y = fig_h - 1.2 - (ri + 1) * (box_h + gap_y) + gap_y + _box(ax, x, y, box_w, box_h, title, subtitle, src, color) + centers[(li, ri)] = (x, y, box_w, box_h) + # Vertical arrow within a lane. + if ri > 0: + px, py, pw, ph = centers[(li, ri - 1)] + ax.add_patch(FancyArrowPatch( + (px + pw / 2, py), (x + box_w / 2, y + box_h), + arrowstyle="-|>", mutation_scale=14, color="#999", zorder=1)) + # Horizontal arrow to next lane (from last box of this lane to first of next). + if li < len(LANES) - 1: + lx, ly, lw, lh = centers[(li, len(items) - 1)] + nx = (li + 1) * (box_w + lane_gap) + 0.35 + ny0 = fig_h - 1.2 - (box_h + gap_y) + gap_y # first row y of next lane + ax.add_patch(FancyArrowPatch( + (lx + lw, ly + lh / 2), (nx, ny0 + box_h / 2), + arrowstyle="-|>", mutation_scale=16, color="#555", + connectionstyle="arc3,rad=0.0", zorder=1)) + + fig.savefig(args.out, dpi=140, bbox_inches="tight") + print(f"Wrote {args.out}") + + +if __name__ == "__main__": + main() diff --git a/demos/preprocessing_grid.png b/demos/preprocessing_grid.png new file mode 100644 index 0000000..b3e7079 Binary files /dev/null and b/demos/preprocessing_grid.png differ diff --git a/demos/preprocessing_grid.py b/demos/preprocessing_grid.py new file mode 100644 index 0000000..e16304c --- /dev/null +++ b/demos/preprocessing_grid.py @@ -0,0 +1,97 @@ +"""Generate a before/after preprocessing panel for the demo. + +Renders each stage of data_pipeline.preprocessor on a real CBIS-DDSM scan: +raw -> grayscale -> CLAHE -> breast mask -> masked -> pectoral-removed -> +orientation-normalized -> final 224x224 model input. + +Usage: + python demos/preprocessing_grid.py [--case CASE_FOLDER] [--out PATH] + +Reads from data/manifest.csv (cached images only). Writes a PNG suitable for +a slide. +""" +import argparse +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +if str(REPO) not in sys.path: + sys.path.insert(0, str(REPO)) + +import cv2 +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import pandas as pd + +from config.constants import INPUT_SIZE +from data_pipeline import preprocessor as pp +from data_pipeline._imaging_utils import breast_mask, to_grayscale + + +def pick_case(manifest_path: Path, case: str | None) -> pd.Series: + m = pd.read_csv(manifest_path) + has_img = ~(m["image_local_path"].isna() + | (m["image_local_path"].astype(str).str.strip() == "")) + m = m[has_img].copy() + if case: + sel = m[m["case_folder"] == case] + if sel.empty: + raise SystemExit(f"Case {case!r} not found among cached images.") + return sel.iloc[0] + # Default: a positive MLO case (pectoral triangle visible). + mlo_pos = m[m["case_folder"].str.contains("MLO", case=False) + & (m["canonical_label"] == 1)] + return (mlo_pos if not mlo_pos.empty else m).iloc[0] + + +def build_stages(path: str): + raw = pp.load_image(path) + gray = to_grayscale(raw) + clahe = pp._apply_clahe(gray) + mask = breast_mask(clahe) + masked = cv2.bitwise_and(clahe, clahe, mask=mask) + no_pec = pp._remove_pectoral(masked, mask) + oriented = pp._normalize_orientation(no_pec, mask) + final = cv2.resize(oriented, INPUT_SIZE, interpolation=cv2.INTER_AREA) + return [ + ("1. Raw scan", raw), + ("2. Grayscale", gray), + ("3. CLAHE contrast", clahe), + ("4. Breast mask", mask), + ("5. Background removed", masked), + ("6. Pectoral removed", no_pec), + ("7. Orientation normalized", oriented), + (f"8. Model input {INPUT_SIZE[0]}x{INPUT_SIZE[1]}", final), + ] + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--case", default=None) + ap.add_argument("--manifest", default=str(REPO / "data" / "manifest.csv")) + ap.add_argument("--out", default=str(REPO / "demos" / "preprocessing_grid.png")) + args = ap.parse_args() + + row = pick_case(Path(args.manifest), args.case) + label = "WORTH_SECOND_LOOK" if int(row["canonical_label"]) == 1 else "NOT_WORTH_SECOND_LOOK" + stages = build_stages(row["image_local_path"]) + + fig, axes = plt.subplots(2, 4, figsize=(16, 8.5)) + fig.suptitle( + f"Second Look — preprocessing pipeline\n" + f"{row['case_folder']} (label: {label})", + fontsize=15, fontweight="bold", + ) + for ax, (title, img) in zip(axes.ravel(), stages): + ax.imshow(img, cmap="gray") + ax.set_title(title, fontsize=11) + ax.axis("off") + fig.tight_layout(rect=(0, 0, 1, 0.94)) + fig.savefig(args.out, dpi=130, bbox_inches="tight") + print(f"Wrote {args.out}") + print(f"Case: {row['case_folder']} label: {label}") + + +if __name__ == "__main__": + main() diff --git a/demos/second_look_app.py b/demos/second_look_app.py new file mode 100644 index 0000000..eaaf8a5 --- /dev/null +++ b/demos/second_look_app.py @@ -0,0 +1,140 @@ +"""Second Look — demo UI (plumbing prototype). + +Single-screen Gradio app that demonstrates the end-to-end on-device flow: + image -> preprocessing -> binary model -> confidence -> concern tier. + +IMPORTANT: the bundled checkpoint is a 1-epoch smoke model. Predictions are +PLACEHOLDERS and carry no clinical meaning. The banner in the UI says so; do +not remove it. This app exists to show the pipeline + tier UX, not performance. + +Run: + python demos/second_look_app.py +then open the printed http://127.0.0.1:7860 URL. +""" +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +if str(REPO) not in sys.path: + sys.path.insert(0, str(REPO)) + +import numpy as np +import pandas as pd +import gradio as gr +import tensorflow as tf + +from data_pipeline.preprocessor import preprocess, load_image +from data_pipeline.label_mapper import confidence_to_tier, display_label + +# Prefer the larger overnight checkpoint if present, else the 1-epoch smoke +# model. Either way the model is a frozen-head baseline — predictions remain +# placeholders and the UI banner says so. +_CKPT_CANDIDATES = [ + REPO / "modeling" / "checkpoints" / "overnight" / "best.keras", + REPO / "modeling" / "checkpoints" / "smoke" / "best.keras", +] +CKPT = next((p for p in _CKPT_CANDIDATES if p.exists()), _CKPT_CANDIDATES[-1]) +MANIFEST = REPO / "data" / "manifest.csv" + +TIER_COLORS = {"Low": "#2e7d32", "Moderate": "#f9a825", "Elevated": "#c62828"} + +# Binary decision threshold on P(WORTH_SECOND_LOOK). 0.5 is the default +# operating point; the real threshold will be tuned to the sensitivity floor +# once the evaluation protocol is locked. +WORTH_THRESHOLD = 0.5 + +_model = tf.keras.models.load_model(str(CKPT)) if CKPT.exists() else None + + +def _sample_choices() -> dict[str, str]: + """Map a human label -> image path for cached manifest rows.""" + if not MANIFEST.exists(): + return {} + m = pd.read_csv(MANIFEST) + has = ~(m["image_local_path"].isna() + | (m["image_local_path"].astype(str).str.strip() == "")) + m = m[has] + out = {} + for _, r in m.head(20).iterrows(): + truth = "WORTH" if int(r["canonical_label"]) == 1 else "NOT WORTH" + out[f"{r['case_folder']} (truth: {truth})"] = r["image_local_path"] + return out + + +SAMPLES = _sample_choices() + + +def _result_html(prob: float) -> str: + """Render the binary verdict (primary) plus the concern tier (supporting).""" + worth = prob >= WORTH_THRESHOLD + if worth: + verdict, vcolor, vicon = "Worth a second look", "#c62828", "⚠️" + else: + verdict, vcolor, vicon = "Not worth a second look", "#2e7d32", "✅" + + tier = confidence_to_tier(prob) + tier_color = TIER_COLORS[tier] + + return ( + # Primary: the binary classification, stated plainly. + f"
" + f"
RESULT
" + f"
" + f"{vicon} {verdict}
" + # Supporting: the UX concern tier + the (placeholder) raw confidence. + f"
" + f"
Concern tier
" + f"
{display_label(tier)}
" + f"
" + f"model output (placeholder): {prob:.2f}
" + ) + + +def analyze(sample_key: str, uploaded: np.ndarray | None): + if uploaded is not None: + raw = uploaded + elif sample_key and sample_key in SAMPLES: + raw = load_image(SAMPLES[sample_key]) + else: + return None, "
Pick a sample or upload an image.
" + + proc = preprocess(raw) # (224, 224, 1) float32 [0,1] + disp = (proc[:, :, 0] * 255).astype(np.uint8) + + if _model is None: + return disp, "
No checkpoint found.
" + + prob = float(_model.predict(proc[None, ...], verbose=0).ravel()[0]) + return disp, _result_html(prob) + + +BANNER = ( + "## 🔍 Second Look — pipeline prototype\n" + "**⚠️ Placeholder model — NOT yet trained.** The checkpoint behind this app " + "is a 1-epoch smoke model; tiers shown are *meaningless* and for plumbing " + "demonstration only. This screen shows the **preprocessing → binary model → " + "concern-tier UX**, not real performance. Nothing is uploaded or stored — " + "the target is fully on-device." +) + +with gr.Blocks(title="Second Look (prototype)") as demo: + gr.Markdown(BANNER) + with gr.Row(): + with gr.Column(): + sample = gr.Dropdown( + choices=list(SAMPLES.keys()), + label="Sample mammogram (cached CBIS-DDSM)", + value=(list(SAMPLES.keys())[0] if SAMPLES else None), + ) + upload = gr.Image(label="…or upload your own", type="numpy", image_mode="L") + run = gr.Button("Run Second Look", variant="primary") + with gr.Column(): + out_img = gr.Image(label="Preprocessed model input (224×224)") + out_tier = gr.HTML() + run.click(analyze, inputs=[sample, upload], outputs=[out_img, out_tier]) + +if __name__ == "__main__": + demo.launch(server_name="127.0.0.1", server_port=7860, inbrowser=False) diff --git a/modeling/evaluate.py b/modeling/evaluate.py index 3b23a43..4962bc4 100644 --- a/modeling/evaluate.py +++ b/modeling/evaluate.py @@ -125,14 +125,14 @@ def _print_results( threshold: float, ) -> None: print("\n" + "=" * 60) - print("SECOND LOOK — BASELINE EVALUATION") + print("SECOND LOOK - BASELINE EVALUATION") print("=" * 60) print(f"\nDecision threshold: {threshold:.2f}") print("\nSensitivity (Recall) per class:") for i, name in enumerate(LABEL_ORDER): - marker = " ← PRIMARY METRIC" if i == POSITIVE_CLASS_INDEX else "" + marker = " <-- PRIMARY METRIC" if i == POSITIVE_CLASS_INDEX else "" print(f" {name:25s}: {per_class_sensitivity[i]:.3f}{marker}") floor = WORTH_SENSITIVITY_FLOOR diff --git a/requirements.txt b/requirements.txt index d79d241..0752893 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ matplotlib>=3.7 pytest>=7.4 PyYAML>=6.0.2 google-cloud-storage>=2.18 +gradio>=6.0 # demo UI (demos/second_look_app.py)