diff --git a/README.md b/README.md index 72003cd..df94b77 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,42 @@ # Tissue Classification -* repository for classification of various tissue types using WSIs -* detailed description can be found on [Youtrack](https://youtrack.rationai.cloud.e-infra.cz/articles/TC) \ No newline at end of file + +This repository contains a pipeline for tissue classification in whole-slide +images (WSIs). It includes preprocessing utilities, dataset splitting tools, +machine learning training and evaluation code, Hydra configurations, and +submission scripts for running individual pipeline stages. + +## Repository Structure + +- `preprocessing/` contains source code for WSI preprocessing, tiling, mask + generation, embedding extraction, quality control, and related statistics. +- `ml/` contains machine learning code for data loading, model training, + evaluation, callbacks, and prediction outputs. +- `split/` contains utilities for train/test and k-fold dataset splitting. +- `configs/` contains Hydra configuration files for preprocessing, splitting, + logging, datasets, and machine learning experiments. +- `scripts/` contains executable submission scripts for running pipeline stages. +- `pyproject.toml` and `uv.lock` define the Python project metadata and locked + dependencies. +- `LICENSE` contains the MIT License for this repository. + +## Setup + +The project uses Python 3.12 and `uv` for dependency management. + +```bash +uv sync +``` + +## Usage + +Pipeline stages can be run through the corresponding scripts in `scripts/`. +For example: + +```bash +uv run python scripts/submit_tiling.py +uv run python scripts/submit_embeddings.py +uv run python scripts/submit_train_linear_probe.py +``` + +Configuration is managed with Hydra. Base configurations are stored in +`configs/`, with task-specific configurations grouped by pipeline stage. diff --git a/configs/data/dataset.yaml b/configs/data/dataset.yaml index f061371..766849f 100644 --- a/configs/data/dataset.yaml +++ b/configs/data/dataset.yaml @@ -16,7 +16,9 @@ dataset: filter_tiles_run_id: "4e8f5d3c82124ea5a8f871a42d3ed9ba" stratified_kfold_run_id: "850c81506684450b9af92296acfd045a" stratified_group_kfold_run_id: "382b41d2fa894514908e8067949c4326" - embedding_run_id: "c325e3a5033b4077b6febb0e3e6b0bd6" + embedding_run_ids: + virchow2: "262d680f52374614baaf19f1e4d05940" + provgigapath: "5099ce57e886411dababfc1234a88b23" tissue_masks_run_id: "52bc0924f8624b259819c480c7cf213f" tissue_stats_run_id: "16ae2d003d88471b924e5f332415232a" diff --git a/configs/experiment/ml/final_linear_provgigapath_adamw.yaml b/configs/experiment/ml/final_linear_provgigapath_adamw.yaml index 2de2584..fb54dc6 100644 --- a/configs/experiment/ml/final_linear_provgigapath_adamw.yaml +++ b/configs/experiment/ml/final_linear_provgigapath_adamw.yaml @@ -6,7 +6,7 @@ defaults: embedding_model_name: ProvGigaPath embedding_dim: 1536 -embedding_run_id: 410c8672471348ceb4c58817f70fa097 +embedding_run_id: ${dataset.mlflow_artifacts.embedding_run_ids.provgigapath} kfold_strategy: stratified_group kfold_run_id: ${dataset.mlflow_artifacts.stratified_group_kfold_run_id} mlflow_artifact_path: linear_classifier_final_provgigapath diff --git a/configs/experiment/ml/final_linear_provgigapath_lbfgs.yaml b/configs/experiment/ml/final_linear_provgigapath_lbfgs.yaml index 49c71f3..85c8171 100644 --- a/configs/experiment/ml/final_linear_provgigapath_lbfgs.yaml +++ b/configs/experiment/ml/final_linear_provgigapath_lbfgs.yaml @@ -6,7 +6,7 @@ defaults: embedding_model_name: ProvGigaPath embedding_dim: 1536 -embedding_run_id: 410c8672471348ceb4c58817f70fa097 +embedding_run_id: ${dataset.mlflow_artifacts.embedding_run_ids.provgigapath} kfold_strategy: stratified_group kfold_run_id: ${dataset.mlflow_artifacts.stratified_group_kfold_run_id} mlflow_artifact_path: linear_classifier_final_provgigapath diff --git a/configs/experiment/ml/train_linear_provgigapath_adamw_group_kfold.yaml b/configs/experiment/ml/train_linear_provgigapath_adamw_group_kfold.yaml index 797f6e0..30b700a 100644 --- a/configs/experiment/ml/train_linear_provgigapath_adamw_group_kfold.yaml +++ b/configs/experiment/ml/train_linear_provgigapath_adamw_group_kfold.yaml @@ -6,7 +6,7 @@ defaults: embedding_model_name: ProvGigaPath embedding_dim: 1536 -embedding_run_id: 410c8672471348ceb4c58817f70fa097 +embedding_run_id: ${dataset.mlflow_artifacts.embedding_run_ids.provgigapath} mlflow_artifact_path: linear_classifier_provgigapath metadata: diff --git a/configs/ml/task/final_linear_classifier.yaml b/configs/ml/task/final_linear_classifier.yaml index 5e8f3b3..a33638e 100644 --- a/configs/ml/task/final_linear_classifier.yaml +++ b/configs/ml/task/final_linear_classifier.yaml @@ -12,7 +12,7 @@ mode: fit embedding_model_name: Virchow2 embedding_dim: 2560 -embedding_run_id: ${dataset.mlflow_artifacts.embedding_run_id} +embedding_run_id: ${dataset.mlflow_artifacts.embedding_run_ids.virchow2} kfold_strategy: stratified kfold_run_id: ${dataset.mlflow_artifacts.stratified_kfold_run_id} filter_tiles_run_id: ${dataset.mlflow_artifacts.filter_tiles_run_id} diff --git a/configs/ml/task/kfold_linear_classifier.yaml b/configs/ml/task/kfold_linear_classifier.yaml index 6b6951b..dd0a05d 100644 --- a/configs/ml/task/kfold_linear_classifier.yaml +++ b/configs/ml/task/kfold_linear_classifier.yaml @@ -12,7 +12,7 @@ mode: fit embedding_model_name: Virchow2 embedding_dim: 2560 -embedding_run_id: ${dataset.mlflow_artifacts.embedding_run_id} +embedding_run_id: ${dataset.mlflow_artifacts.embedding_run_ids.virchow2} kfold_strategy: stratified kfold_run_id: ${dataset.mlflow_artifacts.stratified_kfold_run_id} filter_tiles_run_id: ${dataset.mlflow_artifacts.filter_tiles_run_id}