From c42290abf947a1c06331093428c6af7598fda75e Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Wed, 15 Apr 2026 10:24:59 +0200
Subject: [PATCH 1/8] fixing vignettes nomenclatures

---
 vignettes/SpaceTrooper_utilities.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/SpaceTrooper_utilities.Rmd b/vignettes/SpaceTrooper_utilities.Rmd
index ee68496..d44e414 100644
--- a/vignettes/SpaceTrooper_utilities.Rmd
+++ b/vignettes/SpaceTrooper_utilities.Rmd
@@ -9,7 +9,7 @@ output:
         toc: true
         toc_float: true
 vignette: >
-    %\VignetteIndexEntry{Imaging-based Spatial Transcriptomics Data Quality Control with SpaceTrooper}
+    %\VignetteIndexEntry{SpaceTrooper utilities overview}
     %\VignetteEncoding{UTF-8}
     %\VignetteEngine{knitr::rmarkdown}
 editor_options:

From ddff68f5a255b364a01f094b3e0ea74a13910adc Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Wed, 6 May 2026 10:41:18 +0200
Subject: [PATCH 2/8] transfer/eval coeff between datasets

---
 inst/scripts/evaluate_qs_split_kfold.R  | 726 ++++++++++++++++++++++++
 inst/scripts/transfer_qs_coefficients.R | 665 ++++++++++++++++++++++
 2 files changed, 1391 insertions(+)
 create mode 100644 inst/scripts/evaluate_qs_split_kfold.R
 create mode 100644 inst/scripts/transfer_qs_coefficients.R

diff --git a/inst/scripts/evaluate_qs_split_kfold.R b/inst/scripts/evaluate_qs_split_kfold.R
new file mode 100644
index 0000000..faa4d1d
--- /dev/null
+++ b/inst/scripts/evaluate_qs_split_kfold.R
@@ -0,0 +1,726 @@
+library(SpaceTrooper)
+library(SummarizedExperiment)
+library(S4Vectors)
+
+# Apply a fitted ridge-logistic QS model to a new SpatialExperiment subset.
+#
+# The function rebuilds the design matrix from `colData`, using the same
+# formula employed during training, and stores predicted probabilities in the
+# `QC_score` column.
+score_subset <- function(spe_subset, fit, lambda, model_formula) {
+    # Convert per-cell metadata to a plain data.frame so model.matrix can use it.
+    df_subset <- as.data.frame(SummarizedExperiment::colData(spe_subset))
+
+    # Recreate the exact predictor matrix used by glmnet.
+    x_subset <- stats::model.matrix(stats::as.formula(model_formula),
+        data=df_subset)
+
+    # Predict the probability of being a good-quality cell.
+    spe_subset$QC_score <- as.vector(
+        stats::predict(fit, s=lambda, newx=x_subset, type="response")
+    )
+    spe_subset
+}
+
+# Compute AUROC from ranks, without requiring extra packages.
+#
+# `label_positive` must be a 0/1 vector, where 1 indicates the positive class.
+# In this script we use it both for bad-vs-good and for derived summaries.
+auc_rank <- function(score, label_positive) {
+    # Coerce labels to integer to avoid issues with logical/factor inputs.
+    label_positive <- as.integer(label_positive)
+    n_pos <- sum(label_positive == 1L)
+    n_neg <- sum(label_positive == 0L)
+
+    # AUROC is undefined if one of the two classes is missing.
+    if (n_pos == 0L || n_neg == 0L) {
+        return(NA_real_)
+    }
+
+    # Mann-Whitney / rank-based computation of AUROC.
+    ranks <- rank(score, ties.method="average")
+    (sum(ranks[label_positive == 1L]) - n_pos * (n_pos + 1) / 2) /
+        (n_pos * n_neg)
+}
+
+# Build pseudo-reference labels on a subset, following the same SpaceTrooper
+# logic used internally to define training examples.
+#
+# Returned labels are:
+# - `qcscore_train = 0`: low-quality examples
+# - `qcscore_train = 1`: good-quality examples
+build_reference_labels <- function(spe_subset, verbose=FALSE) {
+    # Detect outliers for the metrics used by QS.
+    spe_ref <- computeOutliersQCScore(spe_subset)
+
+    # Remove variables that do not have enough outliers to be informative.
+    spe_ref <- checkOutliers(spe_ref, verbose=verbose)
+
+    # Build a balanced table of bad and good examples.
+    df_ref <- computeTrainDF(
+        colData=SummarizedExperiment::colData(spe_ref),
+        formulaVars=S4Vectors::metadata(spe_ref)$formula_variables,
+        tech=S4Vectors::metadata(spe_ref)$technology,
+        verbose=verbose
+    )
+
+    # Keep only the columns needed for downstream evaluation.
+    df_ref[, c("cell_id", "qcscore_train")]
+}
+
+# Safe wrapper around `build_reference_labels()`.
+#
+# On small or unlucky splits/folds, SpaceTrooper may not find enough outliers
+# to define a reference set. Instead of stopping the whole script, this returns
+# the error message so the caller can inspect it.
+safe_reference_labels <- function(spe_subset, verbose=FALSE) {
+    tryCatch(
+        list(data=build_reference_labels(spe_subset, verbose=verbose),
+            error=NULL),
+        error=function(e) list(data=NULL, error=conditionMessage(e))
+    )
+}
+
+# Safely extract an optional scalar character field from a list.
+#
+# This is used when collecting fold-level error messages at the end of k-fold
+# evaluation. Missing, NULL, or NA entries are converted to `NA_character_`.
+extract_optional_chr1 <- function(x, name) {
+    value <- x[[name]]
+
+    if (is.null(value) || length(value) == 0L || all(is.na(value))) {
+        return(NA_character_)
+    }
+
+    as.character(value[[1]])
+}
+
+# Add good/bad/unlabelled labels to a SpatialExperiment subset for plotting.
+#
+# Labels are derived from the evaluation table:
+# - `qcscore_train = 0` -> bad
+# - `qcscore_train = 1` -> good
+# - missing label -> unlabelled
+append_eval_labels <- function(
+    spe_subset,
+    df_eval,
+    label_col="eval_label",
+    colour_col="eval_label_color"
+) {
+    labels <- rep("unlabelled", ncol(spe_subset))
+
+    if (!is.null(df_eval) && nrow(df_eval) > 0L) {
+        idx <- match(spe_subset$cell_id, df_eval$cell_id)
+        keep <- !is.na(idx)
+        labels[keep] <- ifelse(df_eval$qcscore_train[idx[keep]] == 0,
+            "bad", "good")
+    }
+
+    spe_subset[[label_col]] <- factor(labels,
+        levels=c("bad", "good", "unlabelled"))
+
+    palette <- c(
+        bad="#D55E00",
+        good="#009E73",
+        unlabelled="#BDBDBD"
+    )
+    spe_subset[[colour_col]] <- unname(palette[as.character(spe_subset[[label_col]])])
+    spe_subset
+}
+
+# Select either the whole result object (single split) or one fold result.
+#
+# This helper keeps the plotting functions compact and makes the API uniform
+# across single-split and k-fold outputs.
+get_result_level <- function(result, fold=NULL) {
+    if (identical(result$split_type, "k_fold_cv")) {
+        if (is.null(fold)) {
+            stop("Please provide `fold` when plotting a k-fold result")
+        }
+        if (length(fold) != 1L || is.na(fold) || fold < 1 || fold > length(result$folds)) {
+            stop("`fold` must be an integer between 1 and the number of folds")
+        }
+        return(result$folds[[fold]])
+    }
+
+    result
+}
+
+# Plot cells coloured as good, bad, or unlabelled.
+#
+# By default the function uses centroids because they are always available in
+# the current workflow. For k-fold results, choose which fold to display.
+plot_labelled_cells <- function(
+    result,
+    dataset=c("train", "test"),
+    fold=NULL,
+    size=0.05,
+    alpha=0.8,
+    scaleBar=TRUE
+) {
+    dataset <- match.arg(dataset)
+    level_result <- get_result_level(result, fold=fold)
+
+    spe_subset <- switch(dataset,
+        train=level_result$spe_train,
+        test=level_result$spe_test
+    )
+    df_eval <- switch(dataset,
+        train=level_result$train_eval,
+        test=level_result$test_eval
+    )
+
+    spe_subset <- append_eval_labels(spe_subset, df_eval)
+
+    title_suffix <- if (identical(result$split_type, "k_fold_cv")) {
+        paste0(" - fold ", fold)
+    } else {
+        ""
+    }
+
+    plotCentroids(
+        spe_subset,
+        colourBy="eval_label",
+        palette="eval_label_color",
+        size=size,
+        alpha=alpha,
+        scaleBar=scaleBar
+    ) +
+        ggplot2::labs(
+            title=paste0("Labelled cells (", dataset, ")", title_suffix),
+            colour="Reference label",
+            fill="Reference label"
+        )
+}
+
+# Compute the points of a ROC curve from evaluation labels and QS values.
+#
+# The positive class is the bad-quality group, so the curve is built using
+# `1 - QC_score` as the decision score.
+build_roc_df <- function(df_eval) {
+    if (is.null(df_eval) || nrow(df_eval) == 0L) {
+        return(data.frame())
+    }
+
+    label_positive <- as.integer(df_eval$qcscore_train == 0)
+    n_pos <- sum(label_positive == 1L)
+    n_neg <- sum(label_positive == 0L)
+
+    if (n_pos == 0L || n_neg == 0L) {
+        return(data.frame())
+    }
+
+    score_bad <- 1 - df_eval$QC_score
+    ord <- order(score_bad, decreasing=TRUE)
+    score_bad <- score_bad[ord]
+    label_positive <- label_positive[ord]
+
+    tp <- cumsum(label_positive == 1L)
+    fp <- cumsum(label_positive == 0L)
+    change <- c(score_bad[-1] != score_bad[-length(score_bad)], TRUE)
+
+    data.frame(
+        fpr=c(0, fp[change] / n_neg),
+        tpr=c(0, tp[change] / n_pos)
+    )
+}
+
+# Plot ROC curves for a single split or across folds.
+#
+# In k-fold mode the function overlays one ROC curve per fold and reports the
+# average AUC in the subtitle.
+plot_roc_eval <- function(result, dataset=c("train", "test")) {
+    dataset <- match.arg(dataset)
+
+    if (identical(result$split_type, "k_fold_cv")) {
+        roc_list <- lapply(seq_along(result$folds), function(i) {
+            df_eval <- switch(dataset,
+                train=result$folds[[i]]$train_eval,
+                test=result$folds[[i]]$test_eval
+            )
+            roc_df <- build_roc_df(df_eval)
+            if (nrow(roc_df) == 0L) {
+                return(NULL)
+            }
+            roc_df$fold <- i
+            roc_df
+        })
+        roc_list <- Filter(Negate(is.null), roc_list)
+
+        if (length(roc_list) == 0L) {
+            stop("No ROC curves available for the selected dataset")
+        }
+
+        roc_df <- do.call(rbind, roc_list)
+        auc_mean <- switch(dataset,
+            train=result$train_summary_average$auc_bad_vs_good,
+            test=result$test_summary_average$auc_bad_vs_good
+        )
+
+        return(
+            ggplot2::ggplot(roc_df,
+                ggplot2::aes(x=fpr, y=tpr, colour=factor(fold), group=fold)) +
+                ggplot2::geom_abline(intercept=0, slope=1, linetype=2,
+                    colour="grey60") +
+                ggplot2::geom_path(linewidth=0.7, alpha=0.8) +
+                ggplot2::coord_fixed() +
+                ggplot2::theme_bw() +
+                ggplot2::labs(
+                    title=paste0("ROC curves across folds (", dataset, ")"),
+                    subtitle=paste0("Mean AUC = ", round(auc_mean, 4)),
+                    x="False positive rate",
+                    y="True positive rate",
+                    colour="Fold"
+                )
+        )
+    }
+
+    df_eval <- switch(dataset,
+        train=result$train_eval,
+        test=result$test_eval
+    )
+    roc_df <- build_roc_df(df_eval)
+
+    if (nrow(roc_df) == 0L) {
+        stop("No ROC curve available for the selected dataset")
+    }
+
+    auc_value <- switch(dataset,
+        train=result$train_summary$auc_bad_vs_good,
+        test=result$test_summary$auc_bad_vs_good
+    )
+
+    ggplot2::ggplot(roc_df, ggplot2::aes(x=fpr, y=tpr)) +
+        ggplot2::geom_abline(intercept=0, slope=1, linetype=2,
+            colour="grey60") +
+        ggplot2::geom_path(linewidth=0.9, colour="#0072B2") +
+        ggplot2::coord_fixed() +
+        ggplot2::theme_bw() +
+        ggplot2::labs(
+            title=paste0("ROC curve (", dataset, ")"),
+            subtitle=paste0("AUC = ", round(auc_value, 4)),
+            x="False positive rate",
+            y="True positive rate"
+        )
+}
+
+# Plot AUC values for each fold.
+#
+# In single-split mode the function returns a one-point summary. In k-fold mode
+# it shows one point per fold and a dashed line for the average AUC.
+plot_auc_across_folds <- function(result, dataset=c("train", "test")) {
+    dataset <- match.arg(dataset)
+
+    if (identical(result$split_type, "k_fold_cv")) {
+        auc_df <- switch(dataset,
+            train=result$train_summary_per_fold,
+            test=result$test_summary_per_fold
+        )
+        auc_mean <- switch(dataset,
+            train=result$train_summary_average$auc_bad_vs_good,
+            test=result$test_summary_average$auc_bad_vs_good
+        )
+
+        return(
+            ggplot2::ggplot(auc_df,
+                ggplot2::aes(x=fold, y=auc_bad_vs_good)) +
+                ggplot2::geom_hline(yintercept=auc_mean, linetype=2,
+                    colour="#D55E00") +
+                ggplot2::geom_line(colour="#0072B2") +
+                ggplot2::geom_point(size=2.2, colour="#0072B2") +
+                ggplot2::theme_bw() +
+                ggplot2::labs(
+                    title=paste0("AUC across folds (", dataset, ")"),
+                    subtitle=paste0("Dashed line = mean AUC = ", round(auc_mean, 4)),
+                    x="Fold",
+                    y="AUC"
+                )
+        )
+    }
+
+    auc_value <- switch(dataset,
+        train=result$train_summary$auc_bad_vs_good,
+        test=result$test_summary$auc_bad_vs_good
+    )
+    auc_df <- data.frame(split=1, auc_bad_vs_good=auc_value)
+
+    ggplot2::ggplot(auc_df, ggplot2::aes(x=split, y=auc_bad_vs_good)) +
+        ggplot2::geom_point(size=3, colour="#0072B2") +
+        ggplot2::theme_bw() +
+        ggplot2::scale_x_continuous(breaks=1, labels="split") +
+        ggplot2::labs(
+            title=paste0("AUC summary (", dataset, ")"),
+            subtitle=paste0("AUC = ", round(auc_value, 4)),
+            x=NULL,
+            y="AUC"
+        )
+}
+
+# Summarise QS performance against pseudo-reference labels.
+#
+# The summary reports:
+# - number of labelled cells
+# - number of bad/good examples
+# - median QS in bad and good examples
+# - AUROC for separating bad from good
+# - sensitivity/specificity at the chosen threshold
+summarise_eval <- function(df_eval, threshold=0.5) {
+    # Return an empty data.frame if the evaluation table is missing.
+    if (is.null(df_eval) || nrow(df_eval) == 0L) {
+        return(data.frame())
+    }
+
+    # Derive boolean masks for the pseudo-reference classes.
+    is_bad <- df_eval$qcscore_train == 0
+    is_good <- df_eval$qcscore_train == 1
+
+    # A cell is predicted as bad if its QS is below the chosen threshold.
+    pred_bad <- df_eval$QC_score < threshold
+
+    data.frame(
+        n_labeled=nrow(df_eval),
+        n_bad=sum(is_bad),
+        n_good=sum(is_good),
+        median_qs_bad=if (sum(is_bad) > 0) {
+            stats::median(df_eval$QC_score[is_bad])
+        } else {
+            NA_real_
+        },
+        median_qs_good=if (sum(is_good) > 0) {
+            stats::median(df_eval$QC_score[is_good])
+        } else {
+            NA_real_
+        },
+        # `1 - QC_score` is used so that larger values correspond to worse cells.
+        auc_bad_vs_good=auc_rank(1 - df_eval$QC_score, as.integer(is_bad)),
+        sensitivity_bad_qs_lt_threshold=if (sum(is_bad) > 0) {
+            mean(pred_bad[is_bad])
+        } else {
+            NA_real_
+        },
+        specificity_good_qs_ge_threshold=if (sum(is_good) > 0) {
+            mean(!pred_bad[is_good])
+        } else {
+            NA_real_
+        }
+    )
+}
+
+# Create train/test indices for a single random split.
+#
+# `train_fraction` is only used when `k_folds = 1`. It controls the proportion
+# of cells assigned to the training set, allowing e.g. 50/50, 70/30, 80/20.
+make_random_split <- function(n_cells, train_fraction, seed) {
+    stopifnot(length(n_cells) == 1L, n_cells > 1L)
+    stopifnot(length(train_fraction) == 1L, !is.na(train_fraction))
+
+    if (train_fraction <= 0 || train_fraction >= 1) {
+        stop("train_fraction must be strictly between 0 and 1")
+    }
+
+    set.seed(seed)
+
+    # Sample the training cells at random.
+    n_train <- floor(n_cells * train_fraction)
+    if (n_train < 1L || n_train >= n_cells) {
+        stop("train_fraction produces an empty training or test set")
+    }
+
+    train_idx <- sample(seq_len(n_cells), size=n_train, replace=FALSE)
+    test_idx <- setdiff(seq_len(n_cells), train_idx)
+
+    list(train_idx=train_idx, test_idx=test_idx)
+}
+
+# Assign each cell to one of the k folds used in cross-validation.
+#
+# Each fold acts as test set once, while the remaining folds are used for
+# training. This function is only used when `k_folds > 1`.
+make_fold_ids <- function(n_cells, k_folds, seed) {
+    stopifnot(length(n_cells) == 1L, n_cells > 1L)
+    stopifnot(length(k_folds) == 1L, !is.na(k_folds), k_folds >= 1)
+
+    k_folds <- as.integer(k_folds)
+    if (k_folds < 1L) {
+        stop("k_folds must be >= 1")
+    }
+
+    set.seed(seed)
+
+    if (k_folds > n_cells) {
+        stop("k_folds cannot be greater than the number of cells")
+    }
+
+    # Shuffle cells and then distribute them approximately evenly across folds.
+    shuffled <- sample(seq_len(n_cells))
+    fold_id <- integer(n_cells)
+    fold_id[shuffled] <- rep(seq_len(k_folds), length.out=n_cells)
+    fold_id
+}
+
+# Train the QS model on one training split/fold and evaluate it on both train
+# and test cells.
+#
+# This is the core routine used by both single-split validation and k-fold CV.
+run_one_fold <- function(spe, test_idx, threshold=0.5, verbose=TRUE) {
+    # Partition the input SpatialExperiment into training and test subsets.
+    spe_train <- spe[, -test_idx]
+    spe_test <- spe[, test_idx]
+
+    # Compute outliers only on training cells, so the model is fitted without
+    # information leakage from the test set.
+    spe_train <- computeOutliersQCScore(spe_train)
+    spe_train <- checkOutliers(spe_train, verbose=verbose)
+
+    # Extract the training formula selected by SpaceTrooper.
+    formula_vars <- S4Vectors::metadata(spe_train)$formula_variables
+    model_formula <- getModelFormula(formula_vars, verbose=verbose)
+
+    # Create the balanced training table of pseudo-bad and pseudo-good cells.
+    df_train <- computeTrainDF(
+        colData=SummarizedExperiment::colData(spe_train),
+        formulaVars=formula_vars,
+        tech=S4Vectors::metadata(spe_train)$technology,
+        verbose=verbose
+    )
+
+    # Build the design matrix and estimate the ridge penalty.
+    x_train <- stats::model.matrix(stats::as.formula(model_formula), data=df_train)
+    best_lambda <- computeLambda(df_train, model_formula)
+
+    # Fit the ridge logistic model.
+    fit <- trainModel(x_train, df_train)
+
+    # Score both the training and test subsets using the trained model.
+    spe_train <- score_subset(spe_train, fit, best_lambda, model_formula)
+    spe_test <- score_subset(spe_test, fit, best_lambda, model_formula)
+
+    # Training evaluation uses the very same labelled examples used for fitting.
+    train_eval <- merge(
+        data.frame(cell_id=spe_train$cell_id, QC_score=spe_train$QC_score),
+        df_train[, c("cell_id", "qcscore_train")],
+        by="cell_id"
+    )
+
+    # On the test set, build pseudo-reference labels independently.
+    test_ref <- safe_reference_labels(spe_test, verbose=verbose)
+    test_eval <- if (is.null(test_ref$data)) {
+        NULL
+    } else {
+        merge(
+            data.frame(cell_id=spe_test$cell_id, QC_score=spe_test$QC_score),
+            test_ref$data,
+            by="cell_id"
+        )
+    }
+
+    # Return both summaries and raw objects/tables for further inspection.
+    list(
+        formula_variables=formula_vars,
+        model_formula=model_formula,
+        lambda=best_lambda,
+        train_class_balance=table(df_train$qcscore_train),
+        train_label_error=NA_character_,
+        train_summary=summarise_eval(train_eval, threshold=threshold),
+        test_summary=summarise_eval(test_eval, threshold=threshold),
+        test_label_error=test_ref$error,
+        spe_train=spe_train,
+        spe_test=spe_test,
+        train_eval=train_eval,
+        test_eval=test_eval
+    )
+}
+
+# Combine per-fold summaries into a single table and compute their mean.
+#
+# This is only used in k-fold mode. The output contains:
+# - `per_fold`: one row per fold
+# - `average`: average across folds for each metric
+combine_fold_summaries <- function(fold_results, summary_name) {
+    summaries <- lapply(seq_along(fold_results), function(i) {
+        x <- fold_results[[i]][[summary_name]]
+        if (is.null(x) || nrow(x) == 0L) {
+            return(NULL)
+        }
+        x$fold <- i
+        x
+    })
+    summaries <- Filter(Negate(is.null), summaries)
+
+    if (length(summaries) == 0L) {
+        return(list(per_fold=data.frame(), average=data.frame()))
+    }
+
+    per_fold <- do.call(rbind, summaries)
+    metric_cols <- setdiff(colnames(per_fold), "fold")
+
+    # Average each numeric summary metric across folds.
+    average <- as.data.frame(
+        lapply(per_fold[, metric_cols, drop=FALSE], function(x) mean(x, na.rm=TRUE))
+    )
+
+    list(per_fold=per_fold, average=average)
+}
+
+# Main entry point for evaluating SpaceTrooper QS on a CosMx dataset.
+#
+# Modes:
+# - `k_folds = 1`: single random split using `train_fraction`
+# - `k_folds > 1`: k-fold cross-validation
+#
+# Important notes:
+# - `train_fraction` is ignored when `k_folds > 1`
+# - `threshold` is used only for classification summaries, not for model fitting
+evaluate_qs_split <- function(
+    data_dir="/Users/inzirio/Downloads/CosMx_data/DBKero/CosMx_Breast/CosMx_data_Case2",
+    sample_name="CosMx_Case2",
+    seed=1713,
+    threshold=0.5,
+    k_folds=1,
+    train_fraction=0.5,
+    verbose=TRUE
+) {
+    # Validate the high-level input parameters.
+    stopifnot(length(k_folds) == 1L, !is.na(k_folds), k_folds >= 1)
+    k_folds <- as.integer(k_folds)
+    stopifnot(length(train_fraction) == 1L, !is.na(train_fraction))
+
+    if (train_fraction <= 0 || train_fraction >= 1) {
+        stop("train_fraction must be strictly between 0 and 1")
+    }
+
+    # Read the CosMx dataset from disk.
+    spe <- readCosmxSPE(data_dir, sampleName=sample_name)
+
+    # Compute the per-cell QC metrics required by the QS model.
+    spe <- spatialPerCellQC(
+        spe,
+        rmZeros=TRUE,
+        negProbList=c("NegPrb", "Negative", "SystemControl")
+    )
+
+    # Count the number of cells remaining after QC preprocessing.
+    n_cells <- ncol(spe)
+
+    if (k_folds == 1L) {
+        # Single random split: build train/test indices according to the
+        # requested training fraction.
+        split_idx <- make_random_split(
+            n_cells=n_cells,
+            train_fraction=train_fraction,
+            seed=seed
+        )
+
+        res <- run_one_fold(
+            spe=spe,
+            test_idx=split_idx$test_idx,
+            threshold=threshold,
+            verbose=verbose
+        )
+
+        return(c(
+            list(
+                split_type="random_split",
+                k_folds=k_folds,
+                seed=seed,
+                threshold=threshold,
+                train_fraction=train_fraction,
+                test_fraction=1 - train_fraction,
+                n_cells=n_cells,
+                n_train=length(split_idx$train_idx),
+                n_test=length(split_idx$test_idx)
+            ),
+            res
+        ))
+    }
+
+    # K-fold mode: assign each cell to a fold once.
+    fold_id <- make_fold_ids(n_cells=n_cells, k_folds=k_folds, seed=seed)
+
+    fold_results <- lapply(seq_len(k_folds), function(i) {
+        if (verbose) {
+            message("Running fold ", i, " of ", k_folds)
+        }
+
+        # Fold i is the test set; all other folds are the training set.
+        run_one_fold(
+            spe=spe,
+            test_idx=which(fold_id == i),
+            threshold=threshold,
+            verbose=verbose
+        )
+    })
+
+    train_combined <- combine_fold_summaries(fold_results, "train_summary")
+    test_combined <- combine_fold_summaries(fold_results, "test_summary")
+    test_label_errors <- vapply(
+        fold_results,
+        extract_optional_chr1,
+        character(1),
+        name = "test_label_error"
+    )
+
+    train_label_errors <- vapply(
+        fold_results,
+        extract_optional_chr1,
+        character(1),
+        name = "train_label_error"
+    )
+
+    # if you only want real errors later, drop missing values explicitly
+    test_label_errors <- stats::na.omit(test_label_errors)
+    train_label_errors <- stats::na.omit(train_label_errors)
+
+    list(
+        split_type="k_fold_cv",
+        k_folds=k_folds,
+        seed=seed,
+        threshold=threshold,
+        n_cells=n_cells,
+        fold_assignment=fold_id,
+        train_summary_per_fold=train_combined$per_fold,
+        train_summary_average=train_combined$average,
+        test_summary_per_fold=test_combined$per_fold,
+        test_summary_average=test_combined$average,
+        test_label_errors=test_label_errors,
+        train_label_errors=train_label_errors,
+        folds=fold_results
+    )
+}
+
+# Esempi di utilizzo:
+# source("inst/scripts/evaluate_qs_split_kfold.R")
+#
+# Split singolo 50/50
+# res_split_50_50 <- evaluate_qs_split(k_folds=1, train_fraction=0.50)
+# res_split_50_50$test_summary
+#
+# Split singolo 70/30
+# res_split_70_30 <- evaluate_qs_split(k_folds=1, train_fraction=0.70)
+# res_split_70_30$test_summary
+res_split <- evaluate_qs_split(k_folds=1, train_fraction=0.70)
+
+plot_labelled_cells(res_split, dataset="train", size=0.08)
+plot_labelled_cells(res_split, dataset="test", size=0.08)
+
+plot_roc_eval(res_split, dataset="test")
+plot_auc_across_folds(res_split, dataset="test")
+#
+# Split singolo 80/20
+# res_split_80_20 <- evaluate_qs_split(k_folds=1, train_fraction=0.80)
+# res_split_80_20$test_summary
+#
+# 5-fold cross-validation
+res_kfold_5 <- evaluate_qs_split(k_folds=5)
+res_kfold_5$test_summary_per_fold
+res_kfold_5$test_summary_average
+res_kfold_5$test_label_errors
+#
+# Celle colorate good/bad sul test set del fold 1
+plot_labelled_cells(res_kfold_5, dataset="test", fold=1, size=0.1)
+#
+# ROC curve across folds
+plot_roc_eval(res_kfold_5, dataset="test")
+#
+# AUC across folds
+plot_auc_across_folds(res_kfold_5, dataset="test")
diff --git a/inst/scripts/transfer_qs_coefficients.R b/inst/scripts/transfer_qs_coefficients.R
new file mode 100644
index 0000000..27b4c3a
--- /dev/null
+++ b/inst/scripts/transfer_qs_coefficients.R
@@ -0,0 +1,665 @@
+library(SpaceTrooper)
+library(SummarizedExperiment)
+library(S4Vectors)
+
+# Read a dataset with the appropriate SpaceTrooper reader according to the
+# selected technology.
+#
+# Supported values are:
+# - "cosmx"
+# - "cosmx_protein"
+# - "xenium"
+read_dataset_for_transfer <- function(
+    dir_name,
+    technology=c("cosmx", "cosmx_protein", "xenium"),
+    sample_name="sample01"
+) {
+    technology <- match.arg(technology)
+
+    switch(
+        technology,
+        cosmx=readCosmxSPE(dir_name, sampleName=sample_name),
+        cosmx_protein=readCosmxProteinSPE(dir_name, sampleName=sample_name),
+        xenium=readXeniumSPE(
+            dir_name,
+            sampleName=sample_name,
+            computeMissingMetrics=TRUE,
+            keepPolygons=FALSE
+        )
+    )
+}
+
+# Compute the per-cell QC metrics required before training or applying QS.
+#
+# The same preprocessing is applied to both source and target datasets so that
+# transferred coefficients operate on harmonised metric columns.
+prepare_dataset_for_transfer <- function(
+    spe,
+    rm_zeros=TRUE,
+    neg_prob_list=c("NegPrb", "Negative", "SystemControl")
+) {
+    spatialPerCellQC(
+        spe,
+        rmZeros=rm_zeros,
+        negProbList=neg_prob_list
+    )
+}
+
+# Identify the metric set that can be transferred from source to target.
+#
+# For heterogeneous transfers, the safest common metrics are:
+# - log2SignalDensity
+# - Area_um
+# - log2Ctrl_total_ratio
+#
+# The border-dependent aspect-ratio term is included only if both datasets have
+# `log2AspectRatio` and `dist_border`, which is typically the case for CosMx to
+# CosMx transfers.
+get_transferable_metrics <- function(source_spe, target_spe) {
+    source_cols <- colnames(SummarizedExperiment::colData(source_spe))
+    target_cols <- colnames(SummarizedExperiment::colData(target_spe))
+    shared_cols <- intersect(source_cols, target_cols)
+
+    metrics <- intersect(
+        c("log2SignalDensity", "Area_um", "log2Ctrl_total_ratio"),
+        shared_cols
+    )
+
+    if (all(c("log2AspectRatio", "dist_border") %in% source_cols) &&
+            all(c("log2AspectRatio", "dist_border") %in% target_cols)) {
+        metrics <- c(metrics, "log2AspectRatio")
+    }
+
+    unique(metrics)
+}
+
+# Extract a readable coefficient table from a fitted glmnet model.
+#
+# Coefficients are returned at the selected lambda, so they are exactly the ones
+# transferred to the target dataset.
+extract_qs_coefficients <- function(fit, lambda) {
+    coef_mat <- as.matrix(stats::predict(fit, s=lambda, type="coefficients"))
+    data.frame(
+        term=rownames(coef_mat),
+        coefficient=as.numeric(coef_mat[, 1]),
+        row.names=NULL
+    )
+}
+
+# Apply a fitted QS model to a new SpatialExperiment object.
+#
+# The design matrix is rebuilt from the target `colData` using the compatible
+# transfer formula trained on the source dataset.
+score_target_dataset <- function(spe, fit, lambda, model_formula) {
+    df <- as.data.frame(SummarizedExperiment::colData(spe))
+    x <- stats::model.matrix(stats::as.formula(model_formula), data=df)
+    spe$QC_score <- as.vector(
+        stats::predict(fit, s=lambda, newx=x, type="response")
+    )
+    spe
+}
+
+# Compute AUROC with a rank-based formula, avoiding extra package dependencies.
+auc_rank <- function(score, label_positive) {
+    label_positive <- as.integer(label_positive)
+    n_pos <- sum(label_positive == 1L)
+    n_neg <- sum(label_positive == 0L)
+
+    if (n_pos == 0L || n_neg == 0L) {
+        return(NA_real_)
+    }
+
+    ranks <- rank(score, ties.method="average")
+    (sum(ranks[label_positive == 1L]) - n_pos * (n_pos + 1) / 2) /
+        (n_pos * n_neg)
+}
+
+# Build pseudo-reference labels on a dataset using the same metric subset used
+# for transfer.
+#
+# This is useful for evaluating how well transferred coefficients separate
+# pseudo-bad from pseudo-good cells on the target dataset.
+build_reference_labels_transfer <- function(spe, metric_list, verbose=FALSE) {
+    spe_ref <- computeOutliersQCScore(spe, metricList=metric_list)
+    spe_ref <- checkOutliers(spe_ref, verbose=verbose)
+
+    df_ref <- computeTrainDF(
+        colData=SummarizedExperiment::colData(spe_ref),
+        formulaVars=S4Vectors::metadata(spe_ref)$formula_variables,
+        tech=S4Vectors::metadata(spe_ref)$technology,
+        verbose=verbose
+    )
+
+    df_ref[, c("cell_id", "qcscore_train")]
+}
+
+# Safe wrapper for pseudo-reference label generation.
+safe_reference_labels_transfer <- function(spe, metric_list, verbose=FALSE) {
+    tryCatch(
+        list(
+            data=build_reference_labels_transfer(
+                spe,
+                metric_list=metric_list,
+                verbose=verbose
+            ),
+            error=NULL
+        ),
+        error=function(e) list(data=NULL, error=conditionMessage(e))
+    )
+}
+
+# Summarise the separation between pseudo-bad and pseudo-good cells.
+#
+# The summary is computed on either source or target after scoring with the
+# transferred model.
+summarise_transfer_eval <- function(df_eval, threshold=0.5) {
+    if (is.null(df_eval) || nrow(df_eval) == 0L) {
+        return(data.frame())
+    }
+
+    is_bad <- df_eval$qcscore_train == 0
+    is_good <- df_eval$qcscore_train == 1
+    pred_bad <- df_eval$QC_score < threshold
+
+    data.frame(
+        n_labeled=nrow(df_eval),
+        n_bad=sum(is_bad),
+        n_good=sum(is_good),
+        median_qs_bad=if (sum(is_bad) > 0) {
+            stats::median(df_eval$QC_score[is_bad])
+        } else {
+            NA_real_
+        },
+        median_qs_good=if (sum(is_good) > 0) {
+            stats::median(df_eval$QC_score[is_good])
+        } else {
+            NA_real_
+        },
+        auc_bad_vs_good=auc_rank(1 - df_eval$QC_score, as.integer(is_bad)),
+        sensitivity_bad_qs_lt_threshold=if (sum(is_bad) > 0) {
+            mean(pred_bad[is_bad])
+        } else {
+            NA_real_
+        },
+        specificity_good_qs_ge_threshold=if (sum(is_good) > 0) {
+            mean(!pred_bad[is_good])
+        } else {
+            NA_real_
+        }
+    )
+}
+
+# Add good/bad/unlabelled labels to a SpatialExperiment object for plotting.
+#
+# Labels are derived from the evaluation table built for either the source or
+# the target dataset after scoring with transferred coefficients.
+append_transfer_labels <- function(
+    spe,
+    df_eval,
+    label_col="transfer_label",
+    colour_col="transfer_label_color"
+) {
+    labels <- rep("unlabelled", ncol(spe))
+
+    if (!is.null(df_eval) && nrow(df_eval) > 0L) {
+        idx <- match(spe$cell_id, df_eval$cell_id)
+        keep <- !is.na(idx)
+        labels[keep] <- ifelse(df_eval$qcscore_train[idx[keep]] == 0,
+            "bad", "good")
+    }
+
+    spe[[label_col]] <- factor(labels, levels=c("bad", "good", "unlabelled"))
+    palette <- c(bad="#D55E00", good="#009E73", unlabelled="#BDBDBD")
+    spe[[colour_col]] <- unname(palette[as.character(spe[[label_col]])])
+    spe
+}
+
+# Build a ROC data.frame from pseudo-reference labels and transferred QS.
+build_transfer_roc_df <- function(df_eval) {
+    if (is.null(df_eval) || nrow(df_eval) == 0L) {
+        return(data.frame())
+    }
+
+    label_positive <- as.integer(df_eval$qcscore_train == 0)
+    n_pos <- sum(label_positive == 1L)
+    n_neg <- sum(label_positive == 0L)
+
+    if (n_pos == 0L || n_neg == 0L) {
+        return(data.frame())
+    }
+
+    score_bad <- 1 - df_eval$QC_score
+    ord <- order(score_bad, decreasing=TRUE)
+    score_bad <- score_bad[ord]
+    label_positive <- label_positive[ord]
+
+    tp <- cumsum(label_positive == 1L)
+    fp <- cumsum(label_positive == 0L)
+    change <- c(score_bad[-1] != score_bad[-length(score_bad)], TRUE)
+
+    data.frame(
+        fpr=c(0, fp[change] / n_neg),
+        tpr=c(0, tp[change] / n_pos)
+    )
+}
+
+# Plot source or target cells coloured as pseudo-good / pseudo-bad after
+# scoring with transferred coefficients.
+plot_transfer_labelled_cells <- function(
+    result,
+    dataset=c("source", "target"),
+    size=0.05,
+    alpha=0.8,
+    scaleBar=TRUE
+) {
+    dataset <- match.arg(dataset)
+
+    spe <- switch(dataset,
+        source=result$source_spe,
+        target=result$target_spe
+    )
+    df_eval <- switch(dataset,
+        source=result$source_eval,
+        target=result$target_eval
+    )
+
+    spe <- append_transfer_labels(spe, df_eval)
+
+    plotCentroids(
+        spe,
+        colourBy="transfer_label",
+        palette="transfer_label_color",
+        size=size,
+        alpha=alpha,
+        scaleBar=scaleBar
+    ) +
+        ggplot2::labs(
+            title=paste0("Transferred QS labels on ", dataset, " dataset"),
+            colour="Reference label",
+            fill="Reference label"
+        )
+}
+
+# Plot the spatial distribution of transferred QS on source or target.
+plot_transfer_qs <- function(
+    result,
+    dataset=c("source", "target"),
+    size=0.05,
+    alpha=0.8,
+    scaleBar=TRUE
+) {
+    dataset <- match.arg(dataset)
+
+    spe <- switch(dataset,
+        source=result$source_spe,
+        target=result$target_spe
+    )
+
+    plotCentroids(
+        spe,
+        colourBy="QC_score",
+        size=size,
+        alpha=alpha,
+        scaleBar=scaleBar
+    ) +
+        ggplot2::labs(title=paste0("Transferred QC score on ", dataset, " dataset"))
+}
+
+# Plot ROC curves for the source or target transfer evaluation.
+plot_transfer_roc <- function(result, dataset=c("source", "target")) {
+    dataset <- match.arg(dataset)
+
+    df_eval <- switch(dataset,
+        source=result$source_eval,
+        target=result$target_eval
+    )
+    roc_df <- build_transfer_roc_df(df_eval)
+
+    if (nrow(roc_df) == 0L) {
+        stop("No ROC curve available for the selected dataset")
+    }
+
+    auc_value <- switch(dataset,
+        source=result$source_summary$auc_bad_vs_good,
+        target=result$target_summary$auc_bad_vs_good
+    )
+
+    ggplot2::ggplot(roc_df, ggplot2::aes(x=fpr, y=tpr)) +
+        ggplot2::geom_abline(intercept=0, slope=1, linetype=2,
+            colour="grey60") +
+        ggplot2::geom_path(linewidth=0.9, colour="#0072B2") +
+        ggplot2::coord_fixed() +
+        ggplot2::theme_bw() +
+        ggplot2::labs(
+            title=paste0("ROC curve for transferred QS (", dataset, ")"),
+            subtitle=paste0("AUC = ", round(auc_value, 4)),
+            x="False positive rate",
+            y="True positive rate"
+        )
+}
+
+# Compare AUC between source and target after transfer.
+plot_transfer_auc_summary <- function(result) {
+    auc_df <- data.frame(
+        dataset=c("source", "target"),
+        auc_bad_vs_good=c(
+            result$source_summary$auc_bad_vs_good,
+            if (nrow(result$target_summary) == 0L) NA_real_ else result$target_summary$auc_bad_vs_good
+        )
+    )
+
+    ggplot2::ggplot(auc_df, ggplot2::aes(x=dataset, y=auc_bad_vs_good, fill=dataset)) +
+        ggplot2::geom_col(width=0.65, alpha=0.85, show.legend=FALSE) +
+        ggplot2::geom_text(ggplot2::aes(label=round(auc_bad_vs_good, 4)),
+            vjust=-0.4, na.rm=TRUE) +
+        ggplot2::theme_bw() +
+        ggplot2::labs(
+            title="AUC summary for transferred QS",
+            x=NULL,
+            y="AUC"
+        )
+}
+
+# Train a native QS model directly on the target dataset, so it can be compared
+# against the transferred QS.
+#
+# By default it uses the full SpaceTrooper native workflow on the target.
+# If `use_transfer_metrics = TRUE`, it restricts the native training to the same
+# metric subset used by the transferred model.
+compute_native_target_qs <- function(
+    result,
+    use_transfer_metrics=FALSE,
+    verbose=TRUE
+) {
+    target_native <- result$target_spe
+    target_native$QC_score_transfer <- target_native$QC_score
+
+    if (isTRUE(use_transfer_metrics)) {
+        target_native <- computeOutliersQCScore(
+            target_native,
+            metricList=result$transfer_metrics
+        )
+        target_native <- checkOutliers(target_native, verbose=verbose)
+
+        formula_vars <- S4Vectors::metadata(target_native)$formula_variables
+        model_formula <- getModelFormula(formula_vars, verbose=verbose)
+        train_df <- computeTrainDF(
+            colData=SummarizedExperiment::colData(target_native),
+            formulaVars=formula_vars,
+            tech=S4Vectors::metadata(target_native)$technology,
+            verbose=verbose
+        )
+        x_target <- stats::model.matrix(stats::as.formula(model_formula), data=train_df)
+        best_lambda <- computeLambda(train_df, model_formula)
+        fit_native <- trainModel(x_target, train_df)
+        target_native$QC_score_native <- as.vector(
+            stats::predict(
+                fit_native,
+                s=best_lambda,
+                newx=stats::model.matrix(
+                    stats::as.formula(model_formula),
+                    data=as.data.frame(SummarizedExperiment::colData(target_native))
+                ),
+                type="response"
+            )
+        )
+        return(target_native)
+    }
+
+    target_native <- computeQCScore(target_native, verbose=verbose)
+    target_native$QC_score_native <- target_native$QC_score
+    target_native$QC_score <- target_native$QC_score_transfer
+    target_native
+}
+
+# Compare transferred QS with a native QS trained directly on the target.
+#
+# The function reports correlations and agreement on low-quality calls.
+compare_transferred_vs_native <- function(
+    result,
+    threshold=0.5,
+    low_fraction=0.1,
+    use_transfer_metrics=FALSE,
+    verbose=TRUE
+) {
+    stopifnot(length(low_fraction) == 1L, low_fraction > 0, low_fraction < 1)
+
+    target_native <- compute_native_target_qs(
+        result,
+        use_transfer_metrics=use_transfer_metrics,
+        verbose=verbose
+    )
+
+    df_compare <- data.frame(
+        cell_id=target_native$cell_id,
+        qc_transfer=target_native$QC_score_transfer,
+        qc_native=target_native$QC_score_native
+    )
+
+    transfer_cut <- stats::quantile(df_compare$qc_transfer, probs=low_fraction, na.rm=TRUE)
+    native_cut <- stats::quantile(df_compare$qc_native, probs=low_fraction, na.rm=TRUE)
+
+    comparison_summary <- data.frame(
+        pearson=stats::cor(df_compare$qc_transfer, df_compare$qc_native,
+            method="pearson", use="complete.obs"),
+        spearman=stats::cor(df_compare$qc_transfer, df_compare$qc_native,
+            method="spearman", use="complete.obs"),
+        low_qc_agreement_at_threshold=mean(
+            (df_compare$qc_transfer < threshold) == (df_compare$qc_native < threshold),
+            na.rm=TRUE
+        ),
+        overlap_lowest_fraction=mean(
+            (df_compare$qc_transfer <= transfer_cut) &
+                (df_compare$qc_native <= native_cut),
+            na.rm=TRUE
+        ) / low_fraction
+    )
+
+    list(
+        summary=comparison_summary,
+        df_compare=df_compare,
+        target_native_spe=target_native,
+        threshold=threshold,
+        low_fraction=low_fraction,
+        use_transfer_metrics=use_transfer_metrics
+    )
+}
+
+# Scatter plot of transferred QS vs native QS on the target dataset.
+plot_transfer_vs_native_scatter <- function(comparison_result) {
+    ggplot2::ggplot(
+        comparison_result$df_compare,
+        ggplot2::aes(x=qc_native, y=qc_transfer)
+    ) +
+        ggplot2::geom_point(alpha=0.4, size=0.6, colour="#0072B2") +
+        ggplot2::geom_abline(intercept=0, slope=1, linetype=2,
+            colour="grey60") +
+        ggplot2::theme_bw() +
+        ggplot2::labs(
+            title="Transferred QS vs native QS on target",
+            subtitle=paste0(
+                "Spearman = ",
+                round(comparison_result$summary$spearman, 4),
+                ", agreement@",
+                comparison_result$threshold,
+                " = ",
+                round(comparison_result$summary$low_qc_agreement_at_threshold, 4)
+            ),
+            x="Native target QS",
+            y="Transferred QS"
+        )
+}
+
+# Train a transferable QS model on one dataset and apply it to another.
+#
+# Workflow:
+# 1. read source and target datasets
+# 2. compute QC metrics on both
+# 3. identify the metric subset that exists in both datasets
+# 4. train the ridge-logistic QS model on the source dataset using only those metrics
+# 5. transfer the fitted coefficients to the target dataset
+# 6. optionally evaluate the transfer on pseudo-reference labels in the target
+transfer_qs_coefficients <- function(
+    source_dir,
+    target_dir,
+    source_technology=c("cosmx", "cosmx_protein", "xenium"),
+    target_technology=c("cosmx", "cosmx_protein", "xenium"),
+    source_sample_name="source_sample",
+    target_sample_name="target_sample",
+    threshold=0.5,
+    evaluate_target=TRUE,
+    verbose=TRUE
+) {
+    source_technology <- match.arg(source_technology)
+    target_technology <- match.arg(target_technology)
+
+    # Read the source dataset, from which coefficients will be learned.
+    source_spe <- read_dataset_for_transfer(
+        dir_name=source_dir,
+        technology=source_technology,
+        sample_name=source_sample_name
+    )
+
+    # Read the target dataset, on which coefficients will be transferred.
+    target_spe <- read_dataset_for_transfer(
+        dir_name=target_dir,
+        technology=target_technology,
+        sample_name=target_sample_name
+    )
+
+    # Harmonise per-cell QC metrics in both datasets.
+    source_spe <- prepare_dataset_for_transfer(source_spe)
+    target_spe <- prepare_dataset_for_transfer(target_spe)
+
+    # Keep only the subset of QS metrics that is available in both datasets.
+    transfer_metrics <- get_transferable_metrics(source_spe, target_spe)
+
+    if (!"log2SignalDensity" %in% transfer_metrics) {
+        stop("log2SignalDensity is required for transferable QS training")
+    }
+    if (length(transfer_metrics) < 2L) {
+        stop("At least two shared metrics are recommended for coefficient transfer")
+    }
+
+    if (verbose) {
+        message("Transferable metrics: ", paste(transfer_metrics, collapse=", "))
+    }
+
+    # Compute source outliers only on the transferable metric subset.
+    source_spe <- computeOutliersQCScore(source_spe, metricList=transfer_metrics)
+    source_spe <- checkOutliers(source_spe, verbose=verbose)
+
+    formula_vars <- S4Vectors::metadata(source_spe)$formula_variables
+    if (!"log2SignalDensity" %in% names(formula_vars)) {
+        stop("The transferable source formula lost log2SignalDensity after outlier filtering")
+    }
+
+    # Build the compatible model formula and the balanced source training table.
+    model_formula <- getModelFormula(formula_vars, verbose=verbose)
+    source_train_df <- computeTrainDF(
+        colData=SummarizedExperiment::colData(source_spe),
+        formulaVars=formula_vars,
+        tech=S4Vectors::metadata(source_spe)$technology,
+        verbose=verbose
+    )
+
+    # Fit the ridge-logistic QS model on the source dataset.
+    x_source <- stats::model.matrix(stats::as.formula(model_formula),
+        data=source_train_df)
+    best_lambda <- computeLambda(source_train_df, model_formula)
+    fit <- trainModel(x_source, source_train_df)
+    coefficient_table <- extract_qs_coefficients(fit, best_lambda)
+
+    # Score the full source and target datasets with the same transferred model.
+    source_spe <- score_target_dataset(source_spe, fit, best_lambda, model_formula)
+    target_spe <- score_target_dataset(target_spe, fit, best_lambda, model_formula)
+
+    # Build the labelled source evaluation set from the source training examples.
+    source_eval <- merge(
+        data.frame(cell_id=source_spe$cell_id, QC_score=source_spe$QC_score),
+        source_train_df[, c("cell_id", "qcscore_train")],
+        by="cell_id"
+    )
+    source_summary <- summarise_transfer_eval(source_eval, threshold=threshold)
+
+    target_eval <- NULL
+    target_summary <- data.frame()
+    target_label_error <- NA_character_
+
+    if (isTRUE(evaluate_target)) {
+        # Build pseudo-reference labels independently on the target dataset,
+        # using the same transferable metric subset.
+        target_ref <- safe_reference_labels_transfer(
+            target_spe,
+            metric_list=transfer_metrics,
+            verbose=verbose
+        )
+        target_label_error <- target_ref$error
+
+        if (!is.null(target_ref$data)) {
+            target_eval <- merge(
+                data.frame(cell_id=target_spe$cell_id, QC_score=target_spe$QC_score),
+                target_ref$data,
+                by="cell_id"
+            )
+            target_summary <- summarise_transfer_eval(target_eval,
+                threshold=threshold)
+        }
+    }
+
+    list(
+        source_technology=source_technology,
+        target_technology=target_technology,
+        transfer_metrics=transfer_metrics,
+        model_formula=model_formula,
+        lambda=best_lambda,
+        coefficient_table=coefficient_table,
+        source_summary=source_summary,
+        target_summary=target_summary,
+        target_label_error=target_label_error,
+        source_train_df=source_train_df,
+        source_eval=source_eval,
+        target_eval=target_eval,
+        source_spe=source_spe,
+        target_spe=target_spe,
+        fit=fit
+    )
+}
+
+# Esempi di utilizzo:
+# source("inst/scripts/transfer_qs_coefficients.R")
+#
+# Train su CosMx e applica a Xenium
+# res_transfer_cx <- transfer_qs_coefficients(
+#     source_dir="/path/to/cosmx_dataset",
+#     source_technology="cosmx",
+#     target_dir="/path/to/xenium_dataset",
+#     target_technology="xenium",
+#     source_sample_name="CosMx_source",
+#     target_sample_name="Xenium_target"
+# )
+# res_transfer_cx$coefficient_table
+# res_transfer_cx$target_summary
+# plot_transfer_labelled_cells(res_transfer_cx, dataset="target", size=0.08)
+# plot_transfer_qs(res_transfer_cx, dataset="target", size=0.08)
+# plot_transfer_roc(res_transfer_cx, dataset="target")
+# plot_transfer_auc_summary(res_transfer_cx)
+#
+# cmp_transfer_cx <- compare_transferred_vs_native(
+#     res_transfer_cx,
+#     threshold=0.5,
+#     low_fraction=0.1
+# )
+# cmp_transfer_cx$summary
+# plot_transfer_vs_native_scatter(cmp_transfer_cx)
+#
+# Train su Xenium e applica a CosMx
+# res_transfer_xc <- transfer_qs_coefficients(
+#     source_dir="/path/to/xenium_dataset",
+#     source_technology="xenium",
+#     target_dir="/path/to/cosmx_dataset",
+#     target_technology="cosmx"
+# )
+# res_transfer_xc$coefficient_table
+# res_transfer_xc$target_summary
+# res_transfer_xc$target_summary

From a4bdc6c997c8f8a43e83837357de424e6236b204 Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Fri, 15 May 2026 16:13:02 +0200
Subject: [PATCH 3/8] adding functions for model transfer across datasets

---
 R/QC.R | 243 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 225 insertions(+), 18 deletions(-)

diff --git a/R/QC.R b/R/QC.R
index 14ef5ed..6d69efd 100644
--- a/R/QC.R
+++ b/R/QC.R
@@ -370,7 +370,17 @@ computeThresholdFlags <- function(spe, totalThreshold=0,
 #' @export
 computeLambda <- function(trainDF, modelFormula) {
     # model_formula <- getModelFormula(technology)
+    train_ok <- .filterCompleteModelCases(
+        df=trainDF,
+        modelFormula=modelFormula,
+        response=trainDF$qcscore_train,
+        context="training cells for lambda selection"
+    )
+
+    trainDF <- trainDF[train_ok, , drop=FALSE]
+
     model_matrix <- model.matrix(as.formula(modelFormula), data=trainDF)
+    model_matrix <- .dropModelIntercept(model_matrix)
     ridge_cv <- cv.glmnet(model_matrix, trainDF$qcscore_train,
                         family="binomial", alpha=0, lambda=NULL)
     bestLambda <- ridge_cv$lambda.min
@@ -442,7 +452,7 @@ computeLambda <- function(trainDF, modelFormula) {
 #' set.seed(1998)
 #' spe <- computeQCScore(spe)
 #' summary(spe$QC_score)
-computeQCScore <- function(spe, bestLambda=NULL, verbose=FALSE) {
+computeQCScore <- function(spe, bestLambda=NULL, modelFormula=NULL, verbose=FALSE) {
     stopifnot(is(spe, "SpatialExperiment"))
     if (dim(spe[,spe$total == 0])[2] != 0) {
         warning(paste0(dim(spe[,spe$total == 0])[2],
@@ -458,34 +468,71 @@ computeQCScore <- function(spe, bestLambda=NULL, verbose=FALSE) {
 
     train_df <- computeTrainDF(df, out_var, tech, verbose)
 
-    model_formula <- getModelFormula(out_var, verbose)
+    if (is.null(modelFormula)) {
+        model_formula <- getModelFormula(out_var, verbose)
+    } else {
+        model_formula <- modelFormula
+        if (verbose) {
+            message("Using user-supplied model formula:")
+            message(model_formula)
+        }
+    }
+    train_ok <- .filterCompleteModelCases(
+        df=train_df,
+        modelFormula=model_formula,
+        response=train_df$qcscore_train,
+        context="training cells"
+    )
+
+    train_df <- train_df[train_ok, , drop=FALSE]
 
     model_matrix <- model.matrix(as.formula(model_formula), data=train_df)
+    model_matrix <- .dropModelIntercept(model_matrix)
     model <- trainModel(model_matrix, train_df)
     if(is.null(bestLambda)) {
         bestLambda <- computeLambda(train_df, model_formula)
     }
 
+    coefs <- coef(model, s=bestLambda)
+
     if (verbose) {
         message("Model coefficients for every term used in the formula:")
-        message(paste(round(predict(model, s=bestLambda, type="coefficients"),
-                            2),
-                    collapse = " "))
+        message( paste( rownames(coefs), round(as.numeric(coefs), 2), sep="=",
+            collapse=" "))
     }
 
-    full_matrix <- model.matrix(as.formula(model_formula), data=df)
-    cd <- colData(spe)
-    cd$QC_score <- as.vector(predict(model, s=bestLambda,
-                                    newx = full_matrix,
-                                    type = "response"))
-    spe$QC_score <- cd$QC_score
-    # train_identity <- rep("TEST", dim(spe)[2])
-    # train_bad <- train_df$cell_id[train_df$qcscore_train==0]
-    # train_good <- train_df$cell_id[train_df$qcscore_train==1]
-    # spe$training_status <- dplyr::case_when(
-    #     spe$cell_id %in% train_bad ~ "BAD",
-    #     spe$cell_id %in% train_good ~ "GOOD",
-    #     TRUE ~ train_identity)
+    full_ok <- .filterCompleteModelCases(df=df, modelFormula=model_formula,
+        context="cells")
+
+    full_matrix <- model.matrix(as.formula(model_formula),
+        data=df[full_ok, , drop=FALSE])
+
+    full_matrix <- .dropModelIntercept(full_matrix)
+    full_matrix <- full_matrix[, colnames(model_matrix), drop=FALSE]
+
+    ## NAs may come from model variables used in `model_formula`, e.g. cells with
+    ## missing `log2AspectRatio` when aspect ratio cannot be computed from polygons.
+    ## Predict only complete cases; incomplete cells keep QC_score = NA.
+    qc_score <- rep(NA_real_, nrow(df))
+    qc_score[full_ok] <- as.vector(
+        predict(model, s=bestLambda, newx=full_matrix, type="response")
+    )
+
+    spe$QC_score <- qc_score
+
+    metadata(spe)$QCScore_model <- list(
+        model=model,
+        bestLambda=bestLambda,
+        model_formula=model_formula,
+        formula_variables=out_var,
+        model_matrix_colnames=colnames(model_matrix),
+        coefficients=coefs,
+        coefficients_table=data.frame(
+            term=rownames(coefs),
+            coefficient=as.numeric(coefs),
+            row.names=NULL
+        )
+    )
     return(spe)
 }
 
@@ -1148,3 +1195,163 @@ checkOutliers <- function(spe, verbose=FALSE) {
     return(list(df=df, out_var=out_var, tech=tech))
 }
 
+#' applyQCScoreModel
+#'
+#' @description
+#' Apply a previously trained QC score model to a new SpatialExperiment object.
+#'
+#' @param spe A `SpatialExperiment` object with QC metrics already computed.
+#' @param qcModel A QC score model object, usually stored in
+#'   `metadata(spe)$QCScore_model`.
+#' @param scoreName Name of the output column in `colData`.
+#'
+#' @return A `SpatialExperiment` object with added QC score in `colData`.
+#'
+#' @export
+#' @examples
+#'
+#' example(readCosmxSPE)
+#'
+#' ## Compute per-cell quality control metrics
+#' spe <- spatialPerCellQC(spe)
+#'
+#' ## Split the object only for example purposes
+#' set.seed(1998)
+#' idx <- sample(seq_len(ncol(spe)), floor(ncol(spe) / 2))
+#' spe_train <- spe[, idx]
+#' spe_test <- spe[, -idx]
+#'
+#' ## Train the Quality Control (QC) score model on one dataset
+#' spe_train <- computeQCScore(spe_train)
+#' qc_model <- metadata(spe_train)$QCScore_model
+#'
+#' ## Apply the trained model to another dataset
+#' spe_test <- applyQCScoreModel(
+#'     spe=spe_test,
+#'     qcModel=qc_model,
+#'     scoreName="QC_score_transferred"
+#' )
+#'
+#' summary(spe_test$QC_score_transferred)
+applyQCScoreModel <- function(spe, qcModel, scoreName="QC_score") {
+    stopifnot(is(spe, "SpatialExperiment"))
+
+    required <- c(
+        "model",
+        "bestLambda",
+        "model_formula",
+        "model_matrix_colnames"
+    )
+
+    stopifnot(
+        "qcModel is missing required fields" =
+            all(required %in% names(qcModel))
+    )
+
+    df <- as.data.frame(colData(spe))
+
+    ok <- .filterCompleteModelCases(
+        df=df,
+        modelFormula=qcModel$model_formula,
+        context="cells"
+    )
+
+    new_matrix <- model.matrix(
+        as.formula(qcModel$model_formula),
+        data=df[ok, , drop=FALSE]
+    )
+
+    new_matrix <- .dropModelIntercept(new_matrix)
+
+    missing_cols <- setdiff(
+        qcModel$model_matrix_colnames,
+        colnames(new_matrix)
+    )
+
+    if (length(missing_cols) > 0L) {
+        zero_matrix <- matrix(
+            0,
+            nrow=nrow(new_matrix),
+            ncol=length(missing_cols),
+            dimnames=list(rownames(new_matrix), missing_cols)
+        )
+
+        new_matrix <- cbind(new_matrix, zero_matrix)
+    }
+
+    extra_cols <- setdiff(
+        colnames(new_matrix),
+        qcModel$model_matrix_colnames
+    )
+
+    if (length(extra_cols) > 0L) {
+        warning(
+            "New model matrix contains columns not present in the ",
+            "training matrix. These columns will be ignored: ",
+            paste(extra_cols, collapse=", ")
+        )
+    }
+
+    new_matrix <- new_matrix[, qcModel$model_matrix_colnames, drop=FALSE]
+
+    score <- rep(NA_real_, nrow(df))
+
+    score[ok] <- as.vector(
+        predict(
+            qcModel$model,
+            s=qcModel$bestLambda,
+            newx=new_matrix,
+            type="response"
+        )
+    )
+
+    cd <- colData(spe)
+    cd[[scoreName]] <- score
+    colData(spe) <- cd
+
+    metadata(spe)$QCScore_model_applied <- qcModel
+
+    return(spe)
+}
+
+.dropModelIntercept <- function(modelMatrix) {
+    if ("(Intercept)" %in% colnames(modelMatrix)) {
+        modelMatrix <- modelMatrix[, colnames(modelMatrix)!="(Intercept)",
+            drop=FALSE]
+    }
+
+    return(modelMatrix)
+}
+
+.filterCompleteModelCases <- function(df, modelFormula, response=NULL,
+    context="cells") {
+
+    vars <- all.vars(as.formula(modelFormula))
+
+    missing_vars <- setdiff(vars, colnames(df))
+
+    if (length(missing_vars) > 0L) {
+        stop(
+            "Missing variables required by model formula: ",
+            paste(missing_vars, collapse=", ")
+        )
+    }
+    ## Missing values may come from variables used in `modelFormula`, for
+    ## example `log2AspectRatio` when aspect ratio cannot be computed from
+    ## polygons.
+    ok <- complete.cases(df[, vars, drop=FALSE])
+
+    if (!is.null(response)) {
+        ok <- ok & !is.na(response)
+    }
+
+    if (!all(ok)) {
+        warning(
+            sum(!ok),
+            " ", context,
+            " contain missing model variables."
+        )
+    }
+
+    return(ok)
+}

From 8aeee801dc2e480af7be5867e8499013ab06102b Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Fri, 15 May 2026 16:13:13 +0200
Subject: [PATCH 4/8] script for model transfer

---
 inst/scripts/transfer_coeffs.R | 126 +++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 inst/scripts/transfer_coeffs.R

diff --git a/inst/scripts/transfer_coeffs.R b/inst/scripts/transfer_coeffs.R
new file mode 100644
index 0000000..19812b4
--- /dev/null
+++ b/inst/scripts/transfer_coeffs.R
@@ -0,0 +1,126 @@
+library(SpaceTrooper)
+library(SummarizedExperiment)
+library(S4Vectors)
+
+set.seed(1998)
+
+common_formula <- "~(log2SignalDensity + Area_um + log2AspectRatio)^2"
+
+## -----------------------------
+## Read Xenium data
+## -----------------------------
+
+dbkx_path <- "~/Downloads/Xenium_data/db_kero_xen"
+
+spexen <- readXeniumSPE(dbkx_path)
+
+spexen <- spatialPerCellQC(spexen)
+
+spexen <- computeQCScore(
+    spexen,
+    modelFormula=common_formula,
+    verbose=TRUE
+)
+
+xen_model <- metadata(spexen)$QCScore_model
+
+
+## -----------------------------
+## Read CosMx data
+## -----------------------------
+
+cosm_path <- "~/Downloads/CosMx_data/DBKero/CosMx_Breast/CosMx_data_Case2/"
+
+specosm <- readCosmxSPE(cosm_path)
+
+specosm <- spatialPerCellQC(specosm)
+
+specosm <- computeQCScore(
+    specosm,
+    modelFormula=common_formula,
+    verbose=TRUE
+)
+
+cosmx_model <- metadata(specosm)$QCScore_model
+
+
+## -----------------------------
+## Save native scores before transfer
+## -----------------------------
+
+spexen$QC_score_xenium_native <- spexen$QC_score
+specosm$QC_score_cosmx_native <- specosm$QC_score
+
+
+## -----------------------------
+## Apply transferred models
+## -----------------------------
+
+specosm <- applyQCScoreModel(
+    spe=specosm,
+    qcModel=xen_model,
+    scoreName="QC_score_xenium_model"
+)
+
+spexen <- applyQCScoreModel(
+    spe=spexen,
+    qcModel=cosmx_model,
+    scoreName="QC_score_cosmx_model"
+)
+
+
+## -----------------------------
+## Summaries
+## -----------------------------
+
+message("Xenium native score:")
+print(summary(spexen$QC_score_xenium_native))
+
+message("Xenium scored with CosMx model:")
+print(summary(spexen$QC_score_cosmx_model))
+
+message("CosMx native score:")
+print(summary(specosm$QC_score_cosmx_native))
+
+message("CosMx scored with Xenium model:")
+print(summary(specosm$QC_score_xenium_model))
+
+
+## -----------------------------
+## Correlations
+## -----------------------------
+
+message("Correlation in Xenium: native vs CosMx model")
+print(
+    cor(
+        spexen$QC_score_xenium_native,
+        spexen$QC_score_cosmx_model,
+        use="complete.obs"
+    )
+)
+
+message("Correlation in CosMx: native vs Xenium model")
+print(
+    cor(
+        specosm$QC_score_cosmx_native,
+        specosm$QC_score_xenium_model,
+        use="complete.obs"
+    )
+)
+
+
+## -----------------------------
+## Model details
+## -----------------------------
+
+message("Xenium model formula:")
+print(metadata(spexen)$QCScore_model$model_formula)
+
+message("CosMx model formula:")
+print(metadata(specosm)$QCScore_model$model_formula)
+
+message("Xenium model coefficients:")
+print(metadata(spexen)$QCScore_model$coefficients_table)
+
+message("CosMx model coefficients:")
+print(metadata(specosm)$QCScore_model$coefficients_table)

From c0cf042355c3f3cc610199b34350af8742aea875 Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Wed, 20 May 2026 15:40:37 +0200
Subject: [PATCH 5/8] adding scripts for reviewing process

---
 R/plotUtils.R                                 |  54 ++
 inst/scripts/ablation_study.R                 | 484 ++++++++++++
 inst/scripts/compare_qs_general.R             |  79 ++
 inst/scripts/create_cosmx_rds.R               |  39 +
 inst/scripts/run_qs_general.R                 | 275 +++++++
 inst/scripts/transfer_coeffs_cosmx_cosmx.R    | 730 ++++++++++++++++++
 ...oeffs.R => transfer_coeffs_cosmx_xenium.R} |  82 +-
 inst/scripts/transfer_coeffs_xenium_xenium.R  | 348 +++++++++
 8 files changed, 2087 insertions(+), 4 deletions(-)
 create mode 100644 inst/scripts/ablation_study.R
 create mode 100644 inst/scripts/compare_qs_general.R
 create mode 100644 inst/scripts/create_cosmx_rds.R
 create mode 100644 inst/scripts/run_qs_general.R
 create mode 100644 inst/scripts/transfer_coeffs_cosmx_cosmx.R
 rename inst/scripts/{transfer_coeffs.R => transfer_coeffs_cosmx_xenium.R} (56%)
 create mode 100644 inst/scripts/transfer_coeffs_xenium_xenium.R

diff --git a/R/plotUtils.R b/R/plotUtils.R
index 21d6a8c..9fe56bd 100644
--- a/R/plotUtils.R
+++ b/R/plotUtils.R
@@ -200,3 +200,57 @@ firstFlagPalette <- c(
     "> logged aspect ratio higher thr." = "greenyellow"
 )
 
+# To be decided if this should be moved to a separate file or included in the main package
+# plot_qc_score_comparison <- function(x, y, x_label="x", y_label="y",
+#     title="QC score comparison", threshold=0.75,
+#     cor_method="spearman", output_file=NULL) {
+
+#     stopifnot(length(x) == length(y))
+
+#     df <- data.frame(x=x, y=y)
+
+#     df$quadrant <- with(df, ifelse(
+#         x < threshold & y < threshold, "low / low",
+#         ifelse(x >= threshold & y < threshold, "high x / low y",
+#         ifelse(x < threshold & y >= threshold, "low x / high y",
+#             "high / high"))
+#     ))
+
+#     cor_val <- cor(df$x, df$y, use="complete.obs", method=cor_method)
+
+#     p <- ggplot(df, aes(x=x, y=y, color=quadrant)) +
+#         geom_point(alpha=0.35, size=0.6) +
+#         geom_abline(slope=1, intercept=0, linetype="dashed", color="red") +
+#         geom_hline(yintercept=threshold) +
+#         geom_vline(xintercept=threshold) +
+#         scale_color_manual(values=c(
+#             "low / low"="grey60",
+#             "high x / low y"="orange",
+#             "low x / high y"="dodgerblue",
+#             "high / high"="black"
+#         )) +
+#         labs(x=x_label, y=y_label, title=title, color="Quadrant") +
+#         theme_minimal() +
+#         annotate(
+#             "text",
+#             x=quantile(df$x, 0.99, na.rm=TRUE),
+#             y=quantile(df$y, 0.01, na.rm=TRUE),
+#             label=paste0(cor_method, " r = ",
+#                 formatC(cor_val, digits=3, format="f")),
+#             hjust=1,
+#             vjust=0,
+#             color="black"
+#         )
+
+#     if (!is.null(output_file)) {
+#         ggsave(output_file, p, device="pdf",
+#             width=11.69, height=8.27, units="in")
+#     }
+
+#     return(list(
+#         plot=p,
+#         correlation=cor_val,
+#         quadrant_table=table(df$quadrant)
+#     ))
+# }
+
diff --git a/inst/scripts/ablation_study.R b/inst/scripts/ablation_study.R
new file mode 100644
index 0000000..9cd3777
--- /dev/null
+++ b/inst/scripts/ablation_study.R
@@ -0,0 +1,484 @@
+## ============================================================
+## 0. Libraries and output directory
+## ============================================================
+
+library(SpaceTrooper)
+library(ggplot2)
+
+out_dir <- "~/SpaceTrooper_qs_check"
+
+if (!dir.exists(out_dir)) {
+    dir.create(out_dir, recursive=TRUE)
+}
+
+
+## ============================================================
+## 1. Define ablation formulas
+## ============================================================
+## The full model contains all Quality Score (QS) components.
+## Each ablated model removes one component at a time.
+
+formulas <- list(
+    full="~(log2SignalDensity + Area_um + log2AspectRatio + log2Ctrl_total_ratio)^2",
+
+    no_log2SignalDensity=
+        "~(Area_um + log2AspectRatio + log2Ctrl_total_ratio)^2",
+
+    no_Area_um=
+        "~(log2SignalDensity + log2AspectRatio + log2Ctrl_total_ratio)^2",
+
+    no_log2AspectRatio=
+        "~(log2SignalDensity + Area_um + log2Ctrl_total_ratio)^2",
+
+    no_log2Ctrl_total_ratio=
+        "~(log2SignalDensity + Area_um + log2AspectRatio)^2"
+)
+
+
+## ============================================================
+## 2. Function to run QS ablation models
+## ============================================================
+## For each formula, computeQCScore() is run with a forced model formula.
+## The output stores:
+## - the computed QS vector
+## - the trained QS model stored in metadata(spe)$QCScore_model
+
+run_qs_ablation <- function(spe, formulas, verbose=FALSE) {
+    out <- list()
+
+    for (nm in names(formulas)) {
+        message("Running model: ", nm)
+
+        spe_i <- computeQCScore(
+            spe=spe,
+            modelFormula=formulas[[nm]],
+            verbose=verbose
+        )
+
+        out[[nm]] <- list(
+            spe=spe_i,
+            score=spe_i$QC_score,
+            model=metadata(spe_i)$QCScore_model
+        )
+    }
+
+    return(out)
+}
+
+
+## ============================================================
+## 3. Read and preprocess CosMx data
+## ============================================================
+## The object is read, then per-cell QC metrics are computed.
+## These metrics are required by computeQCScore().
+
+cosm_path <- "~/Downloads/CosMx_data/DBKero/CosMx_Breast/CosMx_data_Case2/"
+
+specosm <- readCosmxSPE(cosm_path)
+
+specosm <- spatialPerCellQC(specosm)
+
+
+## ============================================================
+## 4. Run ablation study on CosMx
+## ============================================================
+## This computes the full QS and one QS for each ablated formula.
+
+set.seed(1998)
+
+abl_cosmx <- run_qs_ablation(
+    spe=specosm,
+    formulas=formulas,
+    verbose=TRUE
+)
+
+
+## ============================================================
+## 5. Functions to summarize ablation results
+## ============================================================
+## bottom_overlap() compares the low-QS tails of two score vectors.
+## By default q=0.10 means the bottom 10% of cells.
+##
+## summarize_qs_ablation() compares every ablated QS against the full QS
+## using:
+## - Pearson correlation
+## - Spearman correlation
+## - absolute score differences
+## - overlap of bottom-tail low-quality cells
+
+bottom_overlap <- function(x, y, q=0.10) {
+    x_bad <- x <= quantile(x, probs=q, na.rm=TRUE)
+    y_bad <- y <= quantile(y, probs=q, na.rm=TRUE)
+
+    overlap <- sum(x_bad & y_bad, na.rm=TRUE)
+    union <- sum(x_bad | y_bad, na.rm=TRUE)
+
+    data.frame(
+        quantile=q,
+        overlap=overlap,
+        union=union,
+        jaccard=overlap / union,
+        prop_ref_recovered=overlap / sum(x_bad, na.rm=TRUE)
+    )
+}
+
+
+summarize_qs_ablation <- function(ablation_results, reference="full", q=0.10) {
+    ref_score <- ablation_results[[reference]]$score
+
+    res <- lapply(names(ablation_results), function(nm) {
+        score <- ablation_results[[nm]]$score
+
+        ov <- bottom_overlap(ref_score, score, q=q)
+
+        data.frame(
+            model=nm,
+            pearson=cor(
+                ref_score,
+                score,
+                use="complete.obs",
+                method="pearson"
+            ),
+            spearman=cor(
+                ref_score,
+                score,
+                use="complete.obs",
+                method="spearman"
+            ),
+            mean_abs_diff=mean(abs(ref_score - score), na.rm=TRUE),
+            median_abs_diff=median(abs(ref_score - score), na.rm=TRUE),
+            max_abs_diff=max(abs(ref_score - score), na.rm=TRUE),
+            bottom_jaccard=ov$jaccard,
+            bottom_ref_recovered=ov$prop_ref_recovered,
+            n_na=sum(is.na(score)),
+            stringsAsFactors=FALSE
+        )
+    })
+
+    do.call(rbind, res)
+}
+
+
+## ============================================================
+## 6. Build ablation summary table
+## ============================================================
+
+tab_cosmx <- summarize_qs_ablation(
+    ablation_results=abl_cosmx,
+    reference="full",
+    q=0.10
+)
+
+tab_cosmx
+
+
+## ============================================================
+## 7. Pairwise plots: full QS vs each ablated QS
+## ============================================================
+## This assumes plot_qc_score_comparison() is already available.
+## Each plot is also saved as an A4 landscape PDF.
+
+ablation_names <- setdiff(names(abl_cosmx), "full")
+
+plots_cosmx <- lapply(ablation_names, function(nm) {
+    plot_qc_score_comparison(
+        x=abl_cosmx$full$score,
+        y=abl_cosmx[[nm]]$score,
+        x_label="Full QS",
+        y_label=paste0("QS: ", nm),
+        title=paste0("CosMx: full QS vs ", nm),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("cosmx_full_vs_", nm, "_A4_landscape.pdf")
+        )
+    )
+})
+
+names(plots_cosmx) <- ablation_names
+
+## Print plots
+lapply(plots_cosmx, function(x) print(x$plot))
+
+
+## ============================================================
+## 8. Heatmap: complete ablation summary
+## ============================================================
+## This heatmap includes correlations, score differences, and bottom-tail
+## overlap metrics.
+
+heat_df <- tab_cosmx[tab_cosmx$model != "full", ]
+
+heat_df <- heat_df[, c(
+    "model",
+    "pearson",
+    "spearman",
+    "mean_abs_diff",
+    "median_abs_diff",
+    "bottom_jaccard",
+    "bottom_ref_recovered"
+)]
+
+heat_long <- reshape(
+    heat_df,
+    varying=names(heat_df)[names(heat_df) != "model"],
+    v.names="value",
+    timevar="metric",
+    times=names(heat_df)[names(heat_df) != "model"],
+    direction="long"
+)
+
+rownames(heat_long) <- NULL
+
+p_heat <- ggplot(
+    heat_long,
+    aes(x=metric, y=model, fill=value)
+) +
+    geom_tile() +
+    geom_text(
+        aes(label=formatC(value, digits=3, format="f")),
+        color="white",
+        size=3
+    ) +
+    labs(
+        title="CosMx QS ablation summary",
+        x="Metric",
+        y="Ablation model",
+        fill="Value"
+    ) +
+    theme_minimal() +
+    theme(
+        axis.text.x=element_text(angle=45, hjust=1)
+    )
+
+p_heat
+
+ggsave(
+    filename=file.path(
+        out_dir,
+        "cosmx_ablation_summary_heatmap_A4_landscape.pdf"
+    ),
+    plot=p_heat,
+    device="pdf",
+    width=11.69,
+    height=8.27,
+    units="in"
+)
+
+
+## ============================================================
+## 9. Heatmap: correlation-only summary
+## ============================================================
+## Cleaner figure showing only Pearson and Spearman correlations
+## between the full QS and each ablated QS.
+
+cor_heat_df <- tab_cosmx[
+    tab_cosmx$model != "full",
+    c("model", "pearson", "spearman")
+]
+
+cor_heat_long <- reshape(
+    cor_heat_df,
+    varying=c("pearson", "spearman"),
+    v.names="correlation",
+    timevar="metric",
+    times=c("pearson", "spearman"),
+    direction="long"
+)
+
+rownames(cor_heat_long) <- NULL
+
+p_cor_heat <- ggplot(
+    cor_heat_long,
+    aes(x=metric, y=model, fill=correlation)
+) +
+    geom_tile() +
+    geom_text(
+        aes(label=formatC(correlation, digits=3, format="f")),
+        color="white",
+        size=4
+    ) +
+    labs(
+        title="CosMx QS ablation correlation with full model",
+        x="Correlation metric",
+        y="Ablation model",
+        fill="Correlation"
+    ) +
+    theme_minimal()
+
+p_cor_heat
+
+ggsave(
+    filename=file.path(
+        out_dir,
+        "cosmx_ablation_correlation_heatmap.pdf"
+    ),
+    plot=p_cor_heat,
+    device="pdf",
+    width=8,
+    height=5,
+    units="in"
+)
+
+
+## ============================================================
+## 10. Optional: save tabular results
+## ============================================================
+
+write.csv(
+    tab_cosmx,
+    file=file.path(out_dir, "cosmx_ablation_summary_table.csv"),
+    row.names=FALSE
+)
+
+
+plot_ablation_colored <- function(ablation_results, ablation_name,
+    reference="full", color_col=NULL,
+    x_label="Full QS", y_label=NULL, title=NULL,
+    cor_method="spearman", output_file=NULL) {
+
+    stopifnot(reference %in% names(ablation_results))
+    stopifnot(ablation_name %in% names(ablation_results))
+
+    ref_score <- ablation_results[[reference]]$score
+    abl_score <- ablation_results[[ablation_name]]$score
+
+    stopifnot(length(ref_score) == length(abl_score))
+
+    spe_ref <- ablation_results[[reference]]$spe
+    cd <- as.data.frame(colData(spe_ref))
+
+    df <- data.frame(
+        full_qs=ref_score,
+        ablated_qs=abl_score
+    )
+
+    if (!is.null(color_col)) {
+        stopifnot(color_col %in% colnames(cd))
+        df$color_var <- cd[[color_col]]
+    }
+
+    cor_val <- cor(
+        df$full_qs,
+        df$ablated_qs,
+        use="complete.obs",
+        method=cor_method
+    )
+
+    if (is.null(y_label)) {
+        y_label <- paste0("QS: ", ablation_name)
+    }
+
+    if (is.null(title)) {
+        title <- paste0("Full QS vs ", ablation_name)
+    }
+
+    if (is.null(color_col)) {
+        p <- ggplot(df, aes(x=full_qs, y=ablated_qs)) +
+            geom_point(alpha=0.35, size=0.6)
+    } else {
+        p <- ggplot(df, aes(x=full_qs, y=ablated_qs, color=color_var)) +
+            geom_point(alpha=0.35, size=0.6)
+    }
+
+    p <- p +
+        geom_abline(
+            slope=1,
+            intercept=0,
+            linetype="dashed",
+            color="red"
+        ) +
+        labs(
+            x=x_label,
+            y=y_label,
+            title=title,
+            color=color_col
+        ) +
+        theme_minimal() +
+        annotate(
+            "text",
+            x=quantile(df$full_qs, 0.99, na.rm=TRUE),
+            y=quantile(df$ablated_qs, 0.01, na.rm=TRUE),
+            label=paste0(
+                cor_method,
+                " r = ",
+                formatC(cor_val, digits=3, format="f")
+            ),
+            hjust=1,
+            vjust=0,
+            color="black"
+        )
+
+    if (!is.null(output_file)) {
+        ggsave(
+            filename=output_file,
+            plot=p,
+            device="pdf",
+            width=11.69,
+            height=8.27,
+            units="in"
+        )
+    }
+
+    return(list(
+        plot=p,
+        correlation=cor_val,
+        data=df
+    ))
+}
+
+p_no_signal_col <- plot_ablation_colored(
+    ablation_results=abl_cosmx,
+    ablation_name="no_log2SignalDensity",
+    color_col="log2Ctrl_total_ratio",
+    title="CosMx: full QS vs QS without log2SignalDensity",
+    y_label="QS without log2SignalDensity",
+    output_file="~/SpaceTrooper_qs_check/cosmx_full_vs_no_log2SignalDensity_log2Ctrl_total_ratiocolored.pdf"
+)
+p_no_signal_col_area <- plot_ablation_colored(
+    ablation_results=abl_cosmx,
+    ablation_name="no_log2SignalDensity",
+    color_col="Area_um",
+    title="CosMx: full QS vs QS without log2SignalDensity",
+    y_label="QS without log2SignalDensity",
+    output_file="~/SpaceTrooper_qs_check/cosmx_full_vs_no_log2SignalDensity_Area_um_colored.pdf"
+)
+
+p_no_signal_col_aspect <- plot_ablation_colored(
+    ablation_results=abl_cosmx,
+    ablation_name="no_log2SignalDensity",
+    color_col="log2AspectRatio",
+    title="CosMx: full QS vs QS without log2SignalDensity",
+    y_label="QS without log2SignalDensity",
+    output_file="~/SpaceTrooper_qs_check/cosmx_full_vs_no_log2SignalDensity_log2AspectRatio_colored.pdf"
+)
+
+p_no_signal_col_aspect$plot
+p_no_signal_col$plot
+
+plot_ablation_colored(
+    ablation_results=abl_cosmx,
+    ablation_name="no_log2Ctrl_total_ratio",
+    color_col="log2SignalDensity",
+    title="CosMx: full QS vs QS without log2Ctrl_total_ratio",
+    y_label="QS without log2Ctrl_total_ratio",
+    output_file="~/SpaceTrooper_qs_check/cosmx_full_vs_no_log2ctrl_total_ratio_log2SignalDensitycolored.pdf"
+)$plot
+
+plot_ablation_colored(
+    ablation_results=abl_cosmx,
+    ablation_name="no_log2Ctrl_total_ratio",
+    color_col="Area_um",
+    title="CosMx: full QS vs QS without log2Ctrl_total_ratio",
+    y_label="QS without log2Ctrl_total_ratio",
+    output_file="~/SpaceTrooper_qs_check/cosmx_full_vs_no_log2Ctrl_total_ratio_Area_um_colored.pdf"
+)$plot
+plot_ablation_colored(
+    ablation_results=abl_cosmx,
+    ablation_name="no_log2Ctrl_total_ratio",
+    color_col="log2AspectRatio",
+    title="CosMx: full QS vs QS without log2Ctrl_total_ratio",
+    y_label="QS without log2Ctrl_total_ratio",
+    output_file="~/SpaceTrooper_qs_check/cosmx_full_vs_no_log2Ctrl_total_ratio_log2AspectRatio_colored.pdf"
+)$plot
diff --git a/inst/scripts/compare_qs_general.R b/inst/scripts/compare_qs_general.R
new file mode 100644
index 0000000..60dd790
--- /dev/null
+++ b/inst/scripts/compare_qs_general.R
@@ -0,0 +1,79 @@
+args <- commandArgs(trailingOnly=TRUE)
+
+if (length(args) != 2) {
+    stop("Usage: Rscript compare_qs_general.R before.rds current.rds")
+}
+
+before <- readRDS(args[[1]])
+current <- readRDS(args[[2]])
+
+qs_before <- before$quality_score
+qs_current <- current$quality_score
+
+cat("BEFORE\n")
+cat("Branch:", before$git_branch, "\n")
+cat("Commit:", before$git_hash, "\n")
+cat("QScore column:", before$qscore_column, "\n")
+cat("Detection:", before$qscore_detection_method, "\n\n")
+
+cat("CURRENT\n")
+cat("Branch:", current$git_branch, "\n")
+cat("Commit:", current$git_hash, "\n")
+cat("QScore column:", current$qscore_column, "\n")
+cat("Detection:", current$qscore_detection_method, "\n\n")
+
+cat("Lengths:\n")
+cat("before:", length(qs_before), "\n")
+cat("current:", length(qs_current), "\n\n")
+
+if (length(qs_before) != length(qs_current)) {
+    stop("Different number of Quality Score values. Cannot compare directly.")
+}
+
+comparison <- data.frame(
+    index=seq_along(qs_before),
+    quality_score_before=qs_before,
+    quality_score_current=qs_current,
+    difference=qs_current - qs_before
+)
+
+cat("Summary before:\n")
+print(summary(qs_before))
+
+cat("\nSummary current:\n")
+print(summary(qs_current))
+
+cat("\nSummary difference current - before:\n")
+print(summary(comparison$difference))
+
+cat("\nall.equal:\n")
+print(all.equal(qs_before, qs_current))
+
+cat("\nExactly different values:\n")
+print(sum(qs_before != qs_current, na.rm=TRUE))
+
+cat("\nDifferent above tolerance 1e-8:\n")
+print(sum(abs(qs_current - qs_before) > 1e-8, na.rm=TRUE))
+
+cat("\nDifferent above tolerance 1e-6:\n")
+print(sum(abs(qs_current - qs_before) > 1e-6, na.rm=TRUE))
+
+cat("\nDifferent above tolerance 1e-4:\n")
+print(sum(abs(qs_current - qs_before) > 1e-4, na.rm=TRUE))
+
+out_csv <- file.path(
+    dirname(args[[2]]),
+    "quality_score_comparison.csv"
+)
+
+out_rds <- file.path(
+    dirname(args[[2]]),
+    "quality_score_comparison.rds"
+)
+
+write.csv(comparison, out_csv, row.names=FALSE)
+saveRDS(comparison, out_rds)
+
+cat("\nSaved:\n")
+cat(out_csv, "\n")
+cat(out_rds, "\n")
diff --git a/inst/scripts/create_cosmx_rds.R b/inst/scripts/create_cosmx_rds.R
new file mode 100644
index 0000000..152d9f8
--- /dev/null
+++ b/inst/scripts/create_cosmx_rds.R
@@ -0,0 +1,39 @@
+suppressPackageStartupMessages({
+    library(devtools)
+    library(SpatialExperiment)
+    library(SummarizedExperiment)
+})
+
+
+input_dir <- "/Users/inzirio/Downloads/CosMx_data/DBKero/CosMx_Breast/CosMx_data_Case2"
+
+output_rds <- "~/SpaceTrooper_qs_check/cosmx_case2_spe_raw.rds"
+output_rds <- path.expand(output_rds)
+
+
+library(SpaceTrooper)
+
+if (!dir.exists(input_dir)) {
+    stop("Input directory does not exist: ", input_dir)
+}
+
+message("Reading CosMx dataset from:")
+message(input_dir)
+
+spe <- readCosmxSPE(input_dir)
+
+message("Object class:")
+print(class(spe))
+
+message("Object dimensions:")
+print(dim(spe))
+
+message("colData columns:")
+print(colnames(SummarizedExperiment::colData(spe)))
+
+message("Saving RDS to:")
+message(output_rds)
+
+saveRDS(spe, output_rds)
+
+message("Done.")
diff --git a/inst/scripts/run_qs_general.R b/inst/scripts/run_qs_general.R
new file mode 100644
index 0000000..607cf52
--- /dev/null
+++ b/inst/scripts/run_qs_general.R
@@ -0,0 +1,275 @@
+
+package_path <- "/Users/inzirio/My Drive/works/coding/SpaceTrooper"
+input_rds <- "~/SpaceTrooper_qs_check/cosmx_case2_spe_raw.rds"
+output_rds <- "~/SpaceTrooper_qs_check/cosmx_case2_qs_result_interaction.rds"
+
+
+message("Package path: ", package_path)
+message("Input RDS: ", input_rds)
+message("Output RDS: ", output_rds)
+
+library(devtools)
+library(SpatialExperiment)
+
+devtools::load_all(package_path, quiet=FALSE)
+
+
+obj <- readRDS(input_rds)
+metadata(obj)$formula_variables <- c(log2CountArea="log2CountArea",
+                                        log2AspectRatio="log2AspectRatio")
+obj <- spatialPerCellQC(obj)
+
+metadata(obj)
+obj <- computeQCScore(obj)
+
+qc_nointeract <- obj$QC_score
+qc_interact <- obj$QC_score
+saveRDS(qc_nointeract, "~/SpaceTrooper_qs_check/qc_nointeract.rds")
+saveRDS(qc_interact, "~/SpaceTrooper_qs_check/qc_interact.rds")
+
+library(ggplot2)
+
+# assume qc_nointeract and qc_interact exist in the environment
+df <- data.frame(qc_nointeract = qc_nointeract, qc_interact = qc_interact)
+
+# correlation
+cor_val <- cor(df$qc_nointeract, df$qc_interact, use = "complete.obs", method = "pearson")
+
+# plot: use 2D binning for large datasets, points otherwise; add y=x line and annotation
+plot_qc <- ggplot(df, aes(x = qc_nointeract, y = qc_interact)) +
+    geom_point(alpha = 0.35, size = 0.6) +
+    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
+#   scale_fill_gradient(low = "white", high = "steelblue") +
+  labs(
+    x = "QC score (no interaction)",
+    y = "QC score (with interaction)",
+    title = "QC: no-interaction vs interaction"
+  ) +
+  theme_minimal() +
+  geom_hline(yintercept = 0.75) +
+  geom_vline(xintercept = 0.75) +
+  annotate(
+    "text",
+    x = quantile(df$qc_nointeract, 0.99, na.rm = TRUE),
+    y = quantile(df$qc_interact, 0.01, na.rm = TRUE),
+    label = paste0("r = ", formatC(cor_val, digits = 3, format = "f")),
+    hjust = 1, vjust = 0
+  )
+
+# return the plot object
+plot_qc
+
+
+library(ggplot2)
+
+df <- data.frame(
+    qc_nointeract=qc_nointeract,
+    qc_interact=qc_interact
+)
+
+threshold <- 0.75
+
+df$quadrant <- with(
+    df,
+    ifelse(
+        qc_nointeract < threshold & qc_interact < threshold,
+        "low / low",
+        ifelse(
+            qc_nointeract >= threshold & qc_interact < threshold,
+            "high no-interaction / low interaction",
+            ifelse(
+                qc_nointeract < threshold & qc_interact >= threshold,
+                "low no-interaction / high interaction",
+                "high / high"
+            )
+        )
+    )
+)
+
+table(df$quadrant)
+
+cor_val <- cor(
+    df$qc_nointeract,
+    df$qc_interact,
+    use="complete.obs",
+    method="pearson"
+)
+
+plot_qc <- ggplot(df, aes(x=qc_nointeract, y=qc_interact, color=quadrant)) +
+    geom_point(alpha=0.35, size=0.6) +
+    geom_abline(
+        slope=1,
+        intercept=0,
+        linetype="dashed",
+        color="red"
+    ) +
+    geom_hline(yintercept=threshold) +
+    geom_vline(xintercept=threshold) +
+    labs(
+        x="QC score (no interaction)",
+        y="QC score (with interaction)",
+        title="QC: no-interaction vs interaction",
+        color="Quadrant"
+    ) +
+    theme_minimal() +
+    annotate(
+        "text",
+        x=quantile(df$qc_nointeract, 0.99, na.rm=TRUE),
+        y=quantile(df$qc_interact, 0.01, na.rm=TRUE),
+        label=paste0("r = ", formatC(cor_val, digits=3, format="f")),
+        hjust=1,
+        vjust=0,
+        color="black"
+    )
+
+plot_qc
+
+plot_qc <- plot_qc +
+    scale_color_manual(
+        values=c(
+            "low / low"="grey60",
+            "high no-interaction / low interaction"="orange",
+            "low no-interaction / high interaction"="dodgerblue",
+            "high / high"="black"
+        )
+    )
+
+ggsave(
+    filename="~/SpaceTrooper_qs_check/qc_nointeraction_vs_interaction_A4_landscape.pdf",
+    plot=plot_qc,
+    device="pdf",
+    width=11.69,
+    height=8.27,
+    units="in"
+)
+# get_coldata_df <- function(x) {
+#     cd <- SummarizedExperiment::colData(x)
+#     as.data.frame(cd)
+# }
+
+# get_qscore_from_coldata <- function(x) {
+#     cd <- get_coldata_df(x)
+
+#     candidate_names <- c(
+#         "quality_score",
+#         "QualityScore",
+#         "qualityScore",
+#         "qscore",
+#         "QScore",
+#         "Q_score",
+#         "q_score",
+#         "score",
+#         "flag_score",
+#         "qc_score",
+#         "QC_score",
+#         "QCScore",
+#         "cell_quality_score",
+#         "spatial_quality_score"
+#     )
+
+#     exact_hits <- intersect(candidate_names, colnames(cd))
+
+#     if (length(exact_hits) > 0) {
+#         selected <- exact_hits[1]
+#         return(list(
+#             values=cd[[selected]],
+#             column=selected,
+#             method="exact_name_match",
+#             all_candidates=exact_hits,
+#             coldata=cd
+#         ))
+#     }
+
+#     pattern_hits <- grep(
+#         "quality|qscore|q_score|qc_score|flag_score|score",
+#         colnames(cd),
+#         ignore.case=TRUE,
+#         value=TRUE
+#     )
+
+#     numeric_pattern_hits <- pattern_hits[
+#         vapply(cd[pattern_hits], is.numeric, logical(1))
+#     ]
+
+#     if (length(numeric_pattern_hits) > 0) {
+#         selected <- numeric_pattern_hits[1]
+#         return(list(
+#             values=cd[[selected]],
+#             column=selected,
+#             method="pattern_numeric_match",
+#             all_candidates=numeric_pattern_hits,
+#             coldata=cd
+#         ))
+#     }
+
+#     stop(
+#         "Could not automatically identify Quality Score column.\n",
+#         "Available colData columns are:\n",
+#         paste(colnames(cd), collapse=", ")
+#     )
+# }
+
+# call_function_if_exists <- function(fun_name, x) {
+#     if (!exists(fun_name, mode="function")) {
+#         stop("Function not found in loaded SpaceTrooper branch: ", fun_name)
+#     }
+
+#     fun <- get(fun_name, mode="function")
+#     fun(x)
+# }
+
+# obj_before <- obj
+
+# if (run_spatial_qc) {
+#     message("Running spatial QC function...")
+#     obj <- call_function_if_exists(spatial_qc_fun, obj)
+# }
+
+# message("Running Quality Score function...")
+# obj_after <- call_function_if_exists(qscore_fun, obj)
+
+# message("Extracting Quality Score...")
+# qs_info <- get_qscore_from_coldata(obj_after)
+
+# qs <- qs_info$values
+
+# if (!is.numeric(qs)) {
+#     warning("Detected Quality Score column is not numeric: ", qs_info$column)
+# }
+
+# out <- list(
+#     git_branch=git_branch,
+#     git_hash=git_hash,
+#     package_path=normalizePath(package_path),
+#     input_rds=normalizePath(input_rds),
+#     spatial_qc_fun=spatial_qc_fun,
+#     qscore_fun=qscore_fun,
+#     run_spatial_qc=run_spatial_qc,
+#     qscore_column=qs_info$column,
+#     qscore_detection_method=qs_info$method,
+#     qscore_candidates=qs_info$all_candidates,
+#     n=length(qs),
+#     object_class=class(obj_after),
+#     object_dim=tryCatch(dim(obj_after), error=function(e) NULL),
+#     object_colnames=tryCatch(colnames(obj_after), error=function(e) NULL),
+#     quality_score=qs,
+#     quality_score_summary=summary(qs),
+#     colData=qs_info$coldata,
+#     session_info=sessionInfo()
+# )
+
+# saveRDS(out, output_rds)
+
+# message("Saved result to: ", output_rds)
+# message("Detected Quality Score column: ", qs_info$column)
+# message("Detection method: ", qs_info$method)
+# message("Number of values: ", length(qs))
+# print(summary(qs))
+
+
+# computeQCScore(obj)
+
+# # model_formula <- paste0("~ log2CountArea + I(abs(log2AspectRatio) ",
+# #                                 "* as.numeric(dist_border<50)) + ",
+# #                                 " log2CountArea:I(abs(log2AspectRatio)",
+# #                                 "* as.numeric(dist_border<50))") #for cosmx
diff --git a/inst/scripts/transfer_coeffs_cosmx_cosmx.R b/inst/scripts/transfer_coeffs_cosmx_cosmx.R
new file mode 100644
index 0000000..d6d0aea
--- /dev/null
+++ b/inst/scripts/transfer_coeffs_cosmx_cosmx.R
@@ -0,0 +1,730 @@
+## ============================================================
+## 0. Libraries and output directory
+## ============================================================
+
+devtools::load_all()
+library(ggplot2)
+
+out_dir <- "~/SpaceTrooper_qs_check"
+
+set.seed(1998)
+
+
+## ============================================================
+## 1. Define common CosMx model formula
+## ============================================================
+## Since both datasets are CosMx, we can use the CosMx-specific formula,
+## including the border-adjusted aspect ratio term and control ratio.
+
+cosmx_formula <- paste0(
+    "~(",
+    "log2SignalDensity + ",
+    "Area_um + ",
+    "I(abs(log2AspectRatio) * as.numeric(dist_border < 50)) + ",
+    "log2Ctrl_total_ratio",
+    ")^2"
+)
+
+
+## ============================================================
+## 2. Read and preprocess CosMx Breast dataset
+## ============================================================
+
+cosmx_breast_path <- "~/Downloads/CosMx_data/DBKero/CosMx_Breast/CosMx_data_Case2/"
+
+specosm_breast <- readCosmxSPE(cosmx_breast_path)
+
+specosm_breast <- spatialPerCellQC(specosm_breast)
+
+
+## ============================================================
+## 3. Read and preprocess CosMx Pancreas dataset
+## ============================================================
+
+cosmx_pancreas_path <- "/Users/inzirio/Downloads/CosMx_data/Pancreas-CosMx-WTx"
+
+specosm_pancreas <- readCosmxSPE(cosmx_pancreas_path)
+
+specosm_pancreas <- spatialPerCellQC(specosm_pancreas)
+
+
+## ============================================================
+## 4. Train native QS models on both datasets
+## ============================================================
+
+specosm_breast <- computeQCScore(
+    spe=specosm_breast,
+    modelFormula=cosmx_formula,
+    verbose=TRUE
+)
+
+specosm_pancreas <- computeQCScore(
+    spe=specosm_pancreas,
+    modelFormula=cosmx_formula,
+    verbose=TRUE
+)
+
+
+## ============================================================
+## 5. Store native QS scores and trained models
+## ============================================================
+
+specosm_breast$QC_score_breast_native <- specosm_breast$QC_score
+specosm_pancreas$QC_score_pancreas_native <- specosm_pancreas$QC_score
+
+breast_model <- metadata(specosm_breast)$QCScore_model
+pancreas_model <- metadata(specosm_pancreas)$QCScore_model
+
+
+## ============================================================
+## 6. Apply transferred models
+## ============================================================
+## Apply Pancreas-trained model to Breast.
+## Apply Breast-trained model to Pancreas.
+
+specosm_breast <- applyQCScoreModel(
+    spe=specosm_breast,
+    qcModel=pancreas_model,
+    scoreName="QC_score_pancreas_model"
+)
+
+specosm_pancreas <- applyQCScoreModel(
+    spe=specosm_pancreas,
+    qcModel=breast_model,
+    scoreName="QC_score_breast_model"
+)
+
+
+## ============================================================
+## 7. Basic checks
+## ============================================================
+
+summary(specosm_breast$QC_score_breast_native)
+summary(specosm_breast$QC_score_pancreas_model)
+
+summary(specosm_pancreas$QC_score_pancreas_native)
+summary(specosm_pancreas$QC_score_breast_model)
+
+range(specosm_breast$QC_score_breast_native, na.rm=TRUE)
+range(specosm_breast$QC_score_pancreas_model, na.rm=TRUE)
+
+range(specosm_pancreas$QC_score_pancreas_native, na.rm=TRUE)
+range(specosm_pancreas$QC_score_breast_model, na.rm=TRUE)
+
+sum(is.na(specosm_breast$QC_score_breast_native))
+sum(is.na(specosm_breast$QC_score_pancreas_model))
+
+sum(is.na(specosm_pancreas$QC_score_pancreas_native))
+sum(is.na(specosm_pancreas$QC_score_breast_model))
+
+
+## ============================================================
+## 8. Correlations: native vs transferred
+## ============================================================
+
+cor_breast_spearman <- cor(
+    specosm_breast$QC_score_breast_native,
+    specosm_breast$QC_score_pancreas_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_breast_pearson <- cor(
+    specosm_breast$QC_score_breast_native,
+    specosm_breast$QC_score_pancreas_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+cor_pancreas_spearman <- cor(
+    specosm_pancreas$QC_score_pancreas_native,
+    specosm_pancreas$QC_score_breast_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_pancreas_pearson <- cor(
+    specosm_pancreas$QC_score_pancreas_native,
+    specosm_pancreas$QC_score_breast_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+cor_breast_spearman
+cor_breast_pearson
+
+cor_pancreas_spearman
+cor_pancreas_pearson
+
+
+## ============================================================
+## 9. Plot: Breast native QS vs Pancreas-trained QS
+## ============================================================
+
+res_breast <- plot_qc_score_comparison(
+    x=specosm_breast$QC_score_breast_native,
+    y=specosm_breast$QC_score_pancreas_model,
+    x_label="Breast native QS",
+    y_label="Pancreas-trained QS",
+    title="CosMx Breast: native QS vs Pancreas-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "cosmx_breast_native_vs_pancreas_model_A4_landscape.pdf"
+    )
+)
+
+res_breast$plot
+res_breast$correlation
+res_breast$quadrant_table
+
+
+## ============================================================
+## 10. Plot: Pancreas native QS vs Breast-trained QS
+## ============================================================
+
+res_pancreas <- plot_qc_score_comparison(
+    x=specosm_pancreas$QC_score_pancreas_native,
+    y=specosm_pancreas$QC_score_breast_model,
+    x_label="Pancreas native QS",
+    y_label="Breast-trained QS",
+    title="CosMx Pancreas: native QS vs Breast-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "cosmx_pancreas_native_vs_breast_model_A4_landscape.pdf"
+    )
+)
+
+res_pancreas$plot
+res_pancreas$correlation
+res_pancreas$quadrant_table
+
+
+## ============================================================
+## 11. Compare model formulas and coefficients
+## ============================================================
+
+metadata(specosm_breast)$QCScore_model$model_formula
+metadata(specosm_pancreas)$QCScore_model$model_formula
+
+metadata(specosm_breast)$QCScore_model$model_matrix_colnames
+metadata(specosm_pancreas)$QCScore_model$model_matrix_colnames
+
+metadata(specosm_breast)$QCScore_model$coefficients_table
+metadata(specosm_pancreas)$QCScore_model$coefficients_table
+
+
+## ============================================================
+## 12. Save summary table
+## ============================================================
+
+cosmx_transfer_summary <- data.frame(
+    comparison=c(
+        "Breast native vs Pancreas-trained",
+        "Pancreas native vs Breast-trained"
+    ),
+    spearman=c(
+        cor_breast_spearman,
+        cor_pancreas_spearman
+    ),
+    pearson=c(
+        cor_breast_pearson,
+        cor_pancreas_pearson
+    ),
+    stringsAsFactors=FALSE
+)
+
+cosmx_transfer_summary
+
+write.csv(
+    cosmx_transfer_summary,
+    file=file.path(out_dir, "cosmx_breast_pancreas_transfer_summary.csv"),
+    row.names=FALSE
+)
+
+plot_qc_score_coldata <- function(x, y, spe, color_col,
+    x_label="x", y_label="y",
+    title="QC score comparison",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=NULL) {
+
+    stopifnot(length(x) == length(y))
+    stopifnot(ncol(spe) == length(x))
+    stopifnot(color_col %in% colnames(colData(spe)))
+
+    df <- data.frame(
+        x=x,
+        y=y,
+        color_var=as.data.frame(colData(spe))[[color_col]]
+    )
+
+    cor_val <- cor(
+        df$x,
+        df$y,
+        use="complete.obs",
+        method=cor_method
+    )
+
+    p <- ggplot(df, aes(x=x, y=y, color=color_var)) +
+        geom_point(alpha=0.35, size=0.6) +
+        geom_abline(
+            slope=1,
+            intercept=0,
+            linetype="dashed",
+            color="red"
+        ) +
+        geom_hline(yintercept=threshold) +
+        geom_vline(xintercept=threshold) +
+        labs(
+            x=x_label,
+            y=y_label,
+            title=title,
+            color=color_col
+        ) +
+        theme_minimal() +
+        annotate(
+            "text",
+            x=quantile(df$x, 0.99, na.rm=TRUE),
+            y=quantile(df$y, 0.01, na.rm=TRUE),
+            label=paste0(
+                cor_method,
+                " r = ",
+                formatC(cor_val, digits=3, format="f")
+            ),
+            hjust=1,
+            vjust=0,
+            color="black"
+        )
+
+    if (!is.null(output_file)) {
+        ggsave(
+            filename=output_file,
+            plot=p,
+            device="pdf",
+            width=11.69,
+            height=8.27,
+            units="in"
+        )
+    }
+
+    return(list(
+        plot=p,
+        correlation=cor_val,
+        data=df
+    ))
+}
+
+
+res_breast_signal <- plot_qc_score_coldata(
+    x=specosm_breast$QC_score_breast_native,
+    y=specosm_breast$QC_score_pancreas_model,
+    spe=specosm_breast,
+    color_col="log2SignalDensity",
+    x_label="Breast native QS",
+    y_label="Pancreas-trained QS",
+    title="CosMx Breast: native QS vs Pancreas-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file="~/SpaceTrooper_qs_check/cosmx_breast_native_vs_pancreas_model_colored_by_log2SignalDensity.pdf"
+)
+
+res_breast_signal$plot
+
+res_breast_area <- plot_qc_score_coldata(
+    x=specosm_breast$QC_score_breast_native,
+    y=specosm_breast$QC_score_pancreas_model,
+    spe=specosm_breast,
+    color_col="Area_um",
+    x_label="Breast native QS",
+    y_label="Pancreas-trained QS",
+    title="CosMx Breast: native QS vs Pancreas-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file="~/SpaceTrooper_qs_check/cosmx_breast_native_vs_pancreas_model_colored_by_Area_um.pdf"
+)
+
+res_breast_area$plot
+
+res_breast_ctrl <- plot_qc_score_coldata(
+    x=specosm_breast$QC_score_breast_native,
+    y=specosm_breast$QC_score_pancreas_model,
+    spe=specosm_breast,
+    color_col="log2Ctrl_total_ratio",
+    x_label="Breast native QS",
+    y_label="Pancreas-trained QS",
+    title="CosMx Breast: native QS vs Pancreas-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file="~/SpaceTrooper_qs_check/cosmx_breast_native_vs_pancreas_model_colored_by_log2Ctrl_total_ratio.pdf"
+)
+
+res_breast_ctrl$plot
+
+res_breast_aspect <- plot_qc_score_coldata(
+    x=specosm_breast$QC_score_breast_native,
+    y=specosm_breast$QC_score_pancreas_model,
+    spe=specosm_breast,
+    color_col="log2AspectRatio",
+    x_label="Breast native QS",
+    y_label="Pancreas-trained QS",
+    title="CosMx Breast: native QS vs Pancreas-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file="~/SpaceTrooper_qs_check/cosmx_breast_native_vs_pancreas_model_colored_by_log2AspectRatio.pdf"
+)
+
+res_breast_aspect$plot
+
+SpaceTrooper::plotMetricHist(spe = specosm_breast, metric="log2Ctrl_total_ratio")
+SpaceTrooper::plotMetricHist(spe = specosm_pancreas, metric="log2Ctrl_total_ratio")
+
+############################################################
+specosm_mb1 <- readCosmxSPE("/Users/inzirio/Downloads/CosMx_data/CosMx1k_MouseBrain1")
+specosm_mb2 <- readCosmxSPE("/Users/inzirio/Downloads/CosMx_data/CosMx1k_MouseBrain2")
+
+
+specosm_mb1 <- spatialPerCellQC(specosm_mb1)
+specosm_mb2 <- spatialPerCellQC(specosm_mb2)
+
+cosmx_formula <- paste0(
+    "~(",
+    "log2SignalDensity + ",
+    "Area_um + ",
+    "I(abs(log2AspectRatio) * as.numeric(dist_border < 50)) + ",
+    "log2Ctrl_total_ratio",
+    ")^2"
+)
+
+specosm_mb1 <- computeQCScore(
+    spe=specosm_mb1,
+    modelFormula=cosmx_formula,
+    verbose=TRUE
+)
+
+specosm_mb2 <- computeQCScore(
+    spe=specosm_mb2,
+    modelFormula=cosmx_formula,
+    verbose=TRUE
+)
+
+specosm_mb1$QC_score_mb1_native <- specosm_mb1$QC_score
+specosm_mb2$QC_score_mb2_native <- specosm_mb2$QC_score
+
+mb1_model <- metadata(specosm_mb1)$QCScore_model
+mb2_model <- metadata(specosm_mb2)$QCScore_model
+
+specosm_mb1 <- applyQCScoreModel(
+    spe=specosm_mb1,
+    qcModel=mb2_model,
+    scoreName="QC_score_mb2_model"
+)
+
+specosm_mb2 <- applyQCScoreModel(
+    spe=specosm_mb2,
+    qcModel=mb1_model,
+    scoreName="QC_score_mb1_model"
+)
+
+cor_mb1_spearman <- cor(
+    specosm_mb1$QC_score_mb1_native,
+    specosm_mb1$QC_score_mb2_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_mb1_pearson <- cor(
+    specosm_mb1$QC_score_mb1_native,
+    specosm_mb1$QC_score_mb2_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+cor_mb2_spearman <- cor(
+    specosm_mb2$QC_score_mb2_native,
+    specosm_mb2$QC_score_mb1_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_mb2_pearson <- cor(
+    specosm_mb2$QC_score_mb2_native,
+    specosm_mb2$QC_score_mb1_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+res_mb1 <- plot_qc_score_comparison(
+    x=specosm_mb1$QC_score_mb1_native,
+    y=specosm_mb1$QC_score_mb2_model,
+    x_label="MouseBrain1 native QS",
+    y_label="MouseBrain2-trained QS",
+    title="CosMx MouseBrain1: native QS vs MouseBrain2-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "cosmx_mousebrain1_native_vs_mousebrain2_model_A4_landscape.pdf"
+    )
+)
+
+res_mb2 <- plot_qc_score_comparison(
+    x=specosm_mb2$QC_score_mb2_native,
+    y=specosm_mb2$QC_score_mb1_model,
+    x_label="MouseBrain2 native QS",
+    y_label="MouseBrain1-trained QS",
+    title="CosMx MouseBrain2: native QS vs MouseBrain1-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "cosmx_mousebrain2_native_vs_mousebrain1_model_A4_landscape.pdf"
+    )
+)
+
+res_mb1$plot
+res_mb2$plot
+
+color_vars <- c(
+    "log2SignalDensity",
+    "Area_um",
+    "log2AspectRatio",
+    "log2Ctrl_total_ratio"
+)
+
+mb1_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=specosm_mb1$QC_score_mb1_native,
+        y=specosm_mb1$QC_score_mb2_model,
+        spe=specosm_mb1,
+        color_col=v,
+        x_label="MouseBrain1 native QS",
+        y_label="MouseBrain2-trained QS",
+        title=paste0("MouseBrain1 native vs MouseBrain2-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("cosmx_mousebrain1_native_vs_mousebrain2_model_", v, ".pdf")
+        )
+    )
+})
+names(mb1_colored_plots) <- color_vars
+
+mb2_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=specosm_mb2$QC_score_mb2_native,
+        y=specosm_mb2$QC_score_mb1_model,
+        spe=specosm_mb2,
+        color_col=v,
+        x_label="MouseBrain2 native QS",
+        y_label="MouseBrain1-trained QS",
+        title=paste0("MouseBrain2 native vs MouseBrain1-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("cosmx_mousebrain2_native_vs_mousebrain1_model_", v, ".pdf")
+        )
+    )
+})
+names(mb2_colored_plots) <- color_vars
+mb2_colored_plots
+cosmx_mousebrain_transfer_summary <- data.frame(
+    comparison=c(
+        "MouseBrain1 native vs MouseBrain2-trained",
+        "MouseBrain2 native vs MouseBrain1-trained"
+    ),
+    spearman=c(cor_mb1_spearman, cor_mb2_spearman),
+    pearson=c(cor_mb1_pearson, cor_mb2_pearson),
+    stringsAsFactors=FALSE
+)
+
+cosmx_mousebrain_transfer_summary
+
+write.csv(
+    cosmx_mousebrain_transfer_summary,
+    file=file.path(out_dir, "cosmx_mousebrain1_mousebrain2_transfer_summary.csv"),
+    row.names=FALSE
+)
+
+metadata(specosm_mb1)$QCScore_model$model_matrix_colnames
+metadata(specosm_mb2)$QCScore_model$model_matrix_colnames
+
+####################
+
+spexen_bc1 <- readXeniumSPE("/Users/inzirio/Downloads/Xenium_data/Xenium_HumanBreast1_Janesick/")
+spexen_bc2 <- readXeniumSPE("/Users/inzirio/Downloads/Xenium_data/Xenium_HumanBreast2_Janesick")
+
+
+spexen_bc1 <- spatialPerCellQC(spexen_bc1)
+spexen_bc2 <- spatialPerCellQC(spexen_bc2)
+
+xenium_formula <- paste0(
+    "~(",
+    "log2SignalDensity + ",
+    "Area_um + ",
+    "log2AspectRatio + ",
+    "log2Ctrl_total_ratio",
+    ")^2"
+)
+
+spexen_bc1 <- computeQCScore(
+    spe=spexen_bc1,
+    modelFormula=xenium_formula,
+    verbose=TRUE
+)
+
+spexen_bc2 <- computeQCScore(
+    spe=spexen_bc2,
+    modelFormula=xenium_formula,
+    verbose=TRUE
+)
+
+spexen_bc1$QC_score_bc1_native <- spexen_bc1$QC_score
+spexen_bc2$QC_score_bc2_native <- spexen_bc2$QC_score
+
+bc1_model <- metadata(spexen_bc1)$QCScore_model
+bc2_model <- metadata(spexen_bc2)$QCScore_model
+
+spexen_bc1 <- applyQCScoreModel(
+    spe=spexen_bc1,
+    qcModel=bc2_model,
+    scoreName="QC_score_bc2_model"
+)
+
+spexen_bc2 <- applyQCScoreModel(
+    spe=spexen_bc2,
+    qcModel=bc1_model,
+    scoreName="QC_score_bc1_model"
+)
+
+cor_bc1_spearman <- cor(
+    spexen_bc1$QC_score_bc1_native,
+    spexen_bc1$QC_score_bc2_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_bc1_pearson <- cor(
+    spexen_bc1$QC_score_bc1_native,
+    spexen_bc1$QC_score_bc2_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+cor_bc2_spearman <- cor(
+    spexen_bc2$QC_score_bc2_native,
+    spexen_bc2$QC_score_bc1_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_bc2_pearson <- cor(
+    spexen_bc2$QC_score_bc2_native,
+    spexen_bc2$QC_score_bc1_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+res_bc1 <- plot_qc_score_comparison(
+    x=spexen_bc1$QC_score_bc1_native,
+    y=spexen_bc1$QC_score_bc2_model,
+    x_label="Breast Cancer 1 native QS",
+    y_label="Breast Cancer 2-trained QS",
+    title="Xenium Breast Cancer 1: native QS vs Breast Cancer 2-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "xenium_breast1_native_vs_breast2_model_A4_landscape.pdf"
+    )
+)
+
+res_bc2 <- plot_qc_score_comparison(
+    x=spexen_bc2$QC_score_bc2_native,
+    y=spexen_bc2$QC_score_bc1_model,
+    x_label="Breast Cancer 2 native QS",
+    y_label="Breast Cancer 1-trained QS",
+    title="Xenium Breast Cancer 2: native QS vs Breast Cancer 1-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "xenium_breast2_native_vs_breast1_model_A4_landscape.pdf"
+    )
+)
+
+res_bc1$plot
+res_bc2$plot
+
+color_vars <- c(
+    "log2SignalDensity",
+    "Area_um",
+    "log2AspectRatio",
+    "log2Ctrl_total_ratio"
+)
+
+bc1_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=spexen_bc1$QC_score_bc1_native,
+        y=spexen_bc1$QC_score_bc2_model,
+        spe=spexen_bc1,
+        color_col=v,
+        x_label="Breast Cancer 1 native QS",
+        y_label="Breast Cancer 2-trained QS",
+        title=paste0("Breast Cancer 1 native vs Breast Cancer 2-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("xenium_breast1_native_vs_breast2_model_", v, ".pdf")
+        )
+    )
+})
+names(bc1_colored_plots) <- color_vars
+
+bc2_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=spexen_bc2$QC_score_bc2_native,
+        y=spexen_bc2$QC_score_bc1_model,
+        spe=spexen_bc2,
+        color_col=v,
+        x_label="Breast Cancer 2 native QS",
+        y_label="Breast Cancer 1-trained QS",
+        title=paste0("Breast Cancer 2 native vs Breast Cancer 1-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("xenium_breast2_native_vs_breast1_model_", v, ".pdf")
+        )
+    )
+})
+names(bc2_colored_plots) <- color_vars
+
+xenium_breast_transfer_summary <- data.frame(
+    comparison=c(
+        "Breast Cancer 1 native vs Breast Cancer 2-trained",
+        "Breast Cancer 2 native vs Breast Cancer 1-trained"
+    ),
+    spearman=c(cor_bc1_spearman, cor_bc2_spearman),
+    pearson=c(cor_bc1_pearson, cor_bc2_pearson),
+    stringsAsFactors=FALSE
+)
+
+xenium_breast_transfer_summary
+
+write.csv(
+    xenium_breast_transfer_summary,
+    file=file.path(out_dir, "xenium_breast1_breast2_transfer_summary.csv"),
+    row.names=FALSE
+)
+
+metadata(spexen_bc1)$QCScore_model$model_matrix_colnames
+metadata(spexen_bc2)$QCScore_model$model_matrix_colnames
+
+########################
diff --git a/inst/scripts/transfer_coeffs.R b/inst/scripts/transfer_coeffs_cosmx_xenium.R
similarity index 56%
rename from inst/scripts/transfer_coeffs.R
rename to inst/scripts/transfer_coeffs_cosmx_xenium.R
index 19812b4..ae260f5 100644
--- a/inst/scripts/transfer_coeffs.R
+++ b/inst/scripts/transfer_coeffs_cosmx_xenium.R
@@ -1,6 +1,6 @@
 library(SpaceTrooper)
-library(SummarizedExperiment)
-library(S4Vectors)
+# library(SummarizedExperiment)
+# library(S4Vectors)
 
 set.seed(1998)
 
@@ -22,7 +22,19 @@ spexen <- computeQCScore(
     verbose=TRUE
 )
 
-xen_model <- metadata(spexen)$QCScore_model
+sum(is.na(spexen$QC_score))
+colSums(is.na(as.data.frame(colData(spexen))[
+    , c("log2SignalDensity", "Area_um", "log2AspectRatio")
+]))
+
+length(spexen$QC_score) == ncol(spexen)
+
+sum(is.na(spexen$QC_score))
+summary(spexen$QC_score)
+
+metadata(spexen)$QCScore_model$model_formula
+metadata(spexen)$QCScore_model$model_matrix_colnames
+metadata(spexen)$QCScore_model$coefficients_table
 
 
 ## -----------------------------
@@ -41,8 +53,14 @@ specosm <- computeQCScore(
     verbose=TRUE
 )
 
-cosmx_model <- metadata(specosm)$QCScore_model
+length(specosm$QC_score) == ncol(specosm)
 
+sum(is.na(specosm$QC_score))
+summary(specosm$QC_score)
+
+metadata(specosm)$QCScore_model$model_formula
+metadata(specosm)$QCScore_model$model_matrix_colnames
+metadata(specosm)$QCScore_model$coefficients_table
 
 ## -----------------------------
 ## Save native scores before transfer
@@ -51,6 +69,8 @@ cosmx_model <- metadata(specosm)$QCScore_model
 spexen$QC_score_xenium_native <- spexen$QC_score
 specosm$QC_score_cosmx_native <- specosm$QC_score
 
+xen_model <- metadata(spexen)$QCScore_model
+cosmx_model <- metadata(specosm)$QCScore_model
 
 ## -----------------------------
 ## Apply transferred models
@@ -124,3 +144,57 @@ print(metadata(spexen)$QCScore_model$coefficients_table)
 
 message("CosMx model coefficients:")
 print(metadata(specosm)$QCScore_model$coefficients_table)
+
+
+## -----------------------------
+## Correlations of scores with model variables
+## -----------------------------
+cor(
+    spexen$QC_score_xenium_native,
+    spexen$QC_score_cosmx_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor(
+    specosm$QC_score_cosmx_native,
+    specosm$QC_score_xenium_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+range(spexen$QC_score_cosmx_model, na.rm=TRUE)
+sum(is.na(spexen$QC_score_cosmx_model))
+
+range(specosm$QC_score_xenium_model, na.rm=TRUE)
+sum(is.na(specosm$QC_score_xenium_model))
+
+res_xen <- plot_qc_score_comparison(
+    x=spexen$QC_score_xenium_native,
+    y=spexen$QC_score_cosmx_model,
+    x_label="Native Xenium QC score",
+    y_label="CosMx-trained QC score",
+    title="Xenium: native vs CosMx-trained QC score",
+    cor_method = "pearson",
+    threshold=0.25,
+    output_file="~/SpaceTrooper_qs_check/xenium_native_vs_cosmx_model.pdf"
+)
+
+res_xen$plot
+res_xen$correlation
+res_xen$quadrant_table
+
+res_cosmx <- plot_qc_score_comparison(
+    x=specosm$QC_score_cosmx_native,
+    y=specosm$QC_score_xenium_model,
+    x_label="Native CosMx QC score",
+    y_label="Xenium-trained QC score",
+    title="CosMx: native vs Xenium-trained QC score",
+    cor_method = "pearson",
+    threshold=0.75,
+    output_file="~/SpaceTrooper_qs_check/cosmx_native_vs_xenium_model.pdf"
+)
+
+res_cosmx$plot
+res_cosmx$correlation
+res_cosmx$quadrant_table
diff --git a/inst/scripts/transfer_coeffs_xenium_xenium.R b/inst/scripts/transfer_coeffs_xenium_xenium.R
new file mode 100644
index 0000000..7ed919e
--- /dev/null
+++ b/inst/scripts/transfer_coeffs_xenium_xenium.R
@@ -0,0 +1,348 @@
+spexen_bc1 <- readXeniumSPE("/Users/inzirio/Downloads/Xenium_data/Xenium_HumanBreast1_Janesick/")
+spexen_bc2 <- readXeniumSPE("/Users/inzirio/Downloads/Xenium_data/Xenium_HumanBreast2_Janesick")
+
+
+spexen_bc1 <- spatialPerCellQC(spexen_bc1)
+spexen_bc2 <- spatialPerCellQC(spexen_bc2)
+
+xenium_formula <- paste0(
+    "~(",
+    "log2SignalDensity + ",
+    "Area_um + ",
+    "log2AspectRatio + ",
+    "log2Ctrl_total_ratio",
+    ")^2"
+)
+
+spexen_bc1 <- computeQCScore(
+    spe=spexen_bc1,
+    modelFormula=xenium_formula,
+    verbose=TRUE
+)
+
+spexen_bc2 <- computeQCScore(
+    spe=spexen_bc2,
+    modelFormula=xenium_formula,
+    verbose=TRUE
+)
+
+spexen_bc1$QC_score_bc1_native <- spexen_bc1$QC_score
+spexen_bc2$QC_score_bc2_native <- spexen_bc2$QC_score
+
+bc1_model <- metadata(spexen_bc1)$QCScore_model
+bc2_model <- metadata(spexen_bc2)$QCScore_model
+
+spexen_bc1 <- applyQCScoreModel(
+    spe=spexen_bc1,
+    qcModel=bc2_model,
+    scoreName="QC_score_bc2_model"
+)
+
+spexen_bc2 <- applyQCScoreModel(
+    spe=spexen_bc2,
+    qcModel=bc1_model,
+    scoreName="QC_score_bc1_model"
+)
+
+cor_bc1_spearman <- cor(
+    spexen_bc1$QC_score_bc1_native,
+    spexen_bc1$QC_score_bc2_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_bc1_pearson <- cor(
+    spexen_bc1$QC_score_bc1_native,
+    spexen_bc1$QC_score_bc2_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+cor_bc2_spearman <- cor(
+    spexen_bc2$QC_score_bc2_native,
+    spexen_bc2$QC_score_bc1_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_bc2_pearson <- cor(
+    spexen_bc2$QC_score_bc2_native,
+    spexen_bc2$QC_score_bc1_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+res_bc1 <- plot_qc_score_comparison(
+    x=spexen_bc1$QC_score_bc1_native,
+    y=spexen_bc1$QC_score_bc2_model,
+    x_label="Breast Cancer 1 native QS",
+    y_label="Breast Cancer 2-trained QS",
+    title="Xenium Breast Cancer 1: native QS vs Breast Cancer 2-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "xenium_breast1_native_vs_breast2_model_A4_landscape.pdf"
+    )
+)
+
+res_bc2 <- plot_qc_score_comparison(
+    x=spexen_bc2$QC_score_bc2_native,
+    y=spexen_bc2$QC_score_bc1_model,
+    x_label="Breast Cancer 2 native QS",
+    y_label="Breast Cancer 1-trained QS",
+    title="Xenium Breast Cancer 2: native QS vs Breast Cancer 1-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "xenium_breast2_native_vs_breast1_model_A4_landscape.pdf"
+    )
+)
+
+res_bc1$plot
+res_bc2$plot
+
+color_vars <- c(
+    "log2SignalDensity",
+    "Area_um",
+    "log2AspectRatio",
+    "log2Ctrl_total_ratio"
+)
+
+bc1_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=spexen_bc1$QC_score_bc1_native,
+        y=spexen_bc1$QC_score_bc2_model,
+        spe=spexen_bc1,
+        color_col=v,
+        x_label="Breast Cancer 1 native QS",
+        y_label="Breast Cancer 2-trained QS",
+        title=paste0("Breast Cancer 1 native vs Breast Cancer 2-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("xenium_breast1_native_vs_breast2_model_", v, ".pdf")
+        )
+    )
+})
+names(bc1_colored_plots) <- color_vars
+
+bc2_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=spexen_bc2$QC_score_bc2_native,
+        y=spexen_bc2$QC_score_bc1_model,
+        spe=spexen_bc2,
+        color_col=v,
+        x_label="Breast Cancer 2 native QS",
+        y_label="Breast Cancer 1-trained QS",
+        title=paste0("Breast Cancer 2 native vs Breast Cancer 1-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("xenium_breast2_native_vs_breast1_model_", v, ".pdf")
+        )
+    )
+})
+names(bc2_colored_plots) <- color_vars
+
+xenium_breast_transfer_summary <- data.frame(
+    comparison=c(
+        "Breast Cancer 1 native vs Breast Cancer 2-trained",
+        "Breast Cancer 2 native vs Breast Cancer 1-trained"
+    ),
+    spearman=c(cor_bc1_spearman, cor_bc2_spearman),
+    pearson=c(cor_bc1_pearson, cor_bc2_pearson),
+    stringsAsFactors=FALSE
+)
+
+xenium_breast_transfer_summary
+
+write.csv(
+    xenium_breast_transfer_summary,
+    file=file.path(out_dir, "xenium_breast1_breast2_transfer_summary.csv"),
+    row.names=FALSE
+)
+
+metadata(spexen_bc1)$QCScore_model$model_matrix_colnames
+metadata(spexen_bc2)$QCScore_model$model_matrix_colnames
+
+########################
+
+dbkx_path <- "~/Downloads/Xenium_data/db_kero_xen"
+
+spexen <- readXeniumSPE(dbkx_path)
+spexen_bc1 <- readXeniumSPE("/Users/inzirio/Downloads/Xenium_data/Xenium_HumanBreast1_Janesick/")
+
+
+
+spexen <- spatialPerCellQC(spexen)
+spexen_bc1 <- spatialPerCellQC(spexen_bc1)
+
+xenium_formula <- paste0(
+    "~(",
+    "log2SignalDensity + ",
+    "Area_um + ",
+    "log2AspectRatio + ",
+    "log2Ctrl_total_ratio",
+    ")^2"
+)
+
+spexen <- computeQCScore(
+    spe=spexen,
+    modelFormula=xenium_formula,
+    verbose=TRUE
+)
+
+spexen_bc1 <- computeQCScore(
+    spe=spexen_bc1,
+    modelFormula=xenium_formula,
+    verbose=TRUE
+)
+
+spexen$QC_score_dbk_native <- spexen$QC_score
+spexen_bc1$QC_score_bc1_native <- spexen_bc1$QC_score
+
+dbk_model <- metadata(spexen)$QCScore_model
+bc1_model <- metadata(spexen_bc1)$QCScore_model
+
+spexen <- applyQCScoreModel(
+    spe=spexen,
+    qcModel=bc1_model,
+    scoreName="QC_score_bc1_model"
+)
+
+spexen_bc1 <- applyQCScoreModel(
+    spe=spexen_bc1,
+    qcModel=dbk_model,
+    scoreName="QC_score_dbk_model"
+)
+
+cor_dbk_spearman <- cor(
+    spexen$QC_score_dbk_native,
+    spexen$QC_score_bc1_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_dbk_pearson <- cor(
+    spexen$QC_score_dbk_native,
+    spexen$QC_score_bc1_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+cor_bc1_spearman <- cor(
+    spexen_bc1$QC_score_bc1_native,
+    spexen_bc1$QC_score_dbk_model,
+    use="complete.obs",
+    method="spearman"
+)
+
+cor_bc1_pearson <- cor(
+    spexen_bc1$QC_score_bc1_native,
+    spexen_bc1$QC_score_dbk_model,
+    use="complete.obs",
+    method="pearson"
+)
+
+res_dbk <- plot_qc_score_comparison(
+    x=spexen$QC_score_dbk_native,
+    y=spexen$QC_score_bc1_model,
+    x_label="DBK Xenium native QS",
+    y_label="Breast 1-trained QS",
+    title="Xenium DBK: native QS vs Breast 1-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "xenium_dbk_native_vs_breast1_model_A4_landscape.pdf"
+    )
+)
+
+res_bc1 <- plot_qc_score_comparison(
+    x=spexen_bc1$QC_score_bc1_native,
+    y=spexen_bc1$QC_score_dbk_model,
+    x_label="Breast 1 native QS",
+    y_label="DBK-trained QS",
+    title="Xenium Breast 1: native QS vs DBK-trained QS",
+    threshold=0.75,
+    cor_method="spearman",
+    output_file=file.path(
+        out_dir,
+        "xenium_breast1_native_vs_dbk_model_A4_landscape.pdf"
+    )
+)
+
+res_dbk$plot
+res_bc1$plot
+
+color_vars <- c(
+    "log2SignalDensity",
+    "Area_um",
+    "log2AspectRatio",
+    "log2Ctrl_total_ratio"
+)
+
+dbk_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=spexen$QC_score_dbk_native,
+        y=spexen$QC_score_bc1_model,
+        spe=spexen,
+        color_col=v,
+        x_label="DBK Xenium native QS",
+        y_label="Breast 1-trained QS",
+        title=paste0("DBK Xenium native vs Breast 1-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("xenium_dbk_native_vs_breast1_model_", v, ".pdf")
+        )
+    )
+})
+names(dbk_colored_plots) <- color_vars
+
+bc1_colored_plots <- lapply(color_vars, function(v) {
+    plot_qc_score_coldata(
+        x=spexen_bc1$QC_score_bc1_native,
+        y=spexen_bc1$QC_score_dbk_model,
+        spe=spexen_bc1,
+        color_col=v,
+        x_label="Breast 1 native QS",
+        y_label="DBK-trained QS",
+        title=paste0("Breast 1 native vs DBK-trained QS: ", v),
+        threshold=0.75,
+        cor_method="spearman",
+        output_file=file.path(
+            out_dir,
+            paste0("xenium_breast1_native_vs_dbk_model_", v, ".pdf")
+        )
+    )
+})
+names(bc1_colored_plots) <- color_vars
+
+xenium_dbk_breast1_transfer_summary <- data.frame(
+    comparison=c(
+        "DBK Xenium native vs Breast 1-trained",
+        "Breast 1 native vs DBK-trained"
+    ),
+    spearman=c(cor_dbk_spearman, cor_bc1_spearman),
+    pearson=c(cor_dbk_pearson, cor_bc1_pearson),
+    stringsAsFactors=FALSE
+)
+
+xenium_dbk_breast1_transfer_summary
+
+write.csv(
+    xenium_dbk_breast1_transfer_summary,
+    file=file.path(out_dir, "xenium_dbk_breast1_transfer_summary.csv"),
+    row.names=FALSE
+)
+
+metadata(spexen)$QCScore_model$model_matrix_colnames
+metadata(spexen_bc1)$QCScore_model$model_matrix_colnames
+
+#########################

From faa1af5f52a0c2a49be3800372ec1c9b76d3550c Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Mon, 25 May 2026 11:25:32 +0200
Subject: [PATCH 6/8] adding citation file

---
 inst/CITATION | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 inst/CITATION

diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..e0a7e38
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,23 @@
+bibentry(
+    bibtype="Article",
+    title="SpaceTrooper: a quality control framework for imaging-based spatial omics data",
+    author=c(
+        person("Benedetta", "Banzi"),
+        person("Dario", "Righelli"),
+        person("Matteo", "Marchionni"),
+        person("Oriana", "Romano"),
+        person("Mattia", "Forcato"),
+        person("Davide", "Risso"),
+        person("Silvio", "Bicciato")
+    ),
+    journal="bioRxiv",
+    year="2025",
+    doi="10.64898/2025.12.24.696336",
+    url="https://doi.org/10.64898/2025.12.24.696336",
+    textVersion=paste(
+        "Banzi B, Righelli D, Marchionni M, Romano O, Forcato M,",
+        "Risso D, Bicciato S. SpaceTrooper: a quality control framework",
+        "for imaging-based spatial omics data. bioRxiv, 2025.",
+        "doi:10.64898/2025.12.24.696336"
+    )
+)

From c732c22608ef5f364488da5602db50f2dd277fe7 Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Mon, 25 May 2026 11:35:10 +0200
Subject: [PATCH 7/8] news version 1.1.8

---
 NEWS.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index b2c76b3..27c1dce 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,10 @@
+# Changes in version 1.1.8
+
+* fixing naming of of spacetrooper utilities vignette
+* adding functions for QC model transfer across datasets
+* several new scripts for QC tranferring and comparison
+* adding citation file with biorxiv paper
+
 # Changes in version 1.1.7
 
 * fixing author name typo

From 031dc0cdd5d1a5c0f42f95ac50703161188c864f Mon Sep 17 00:00:00 2001
From: Dario <dario.righelli@gmail.com>
Date: Mon, 25 May 2026 15:04:37 +0200
Subject: [PATCH 8/8] fixing errors on check

---
 NAMESPACE                |  3 +++
 R/QC.R                   | 11 ++++++++-
 man/applyQCScoreModel.Rd | 48 ++++++++++++++++++++++++++++++++++++++++
 man/computeQCScore.Rd    | 11 ++++++++-
 4 files changed, 71 insertions(+), 2 deletions(-)
 create mode 100644 man/applyQCScoreModel.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4e9db72..cc92c05 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,6 +4,7 @@ export(.getActiveGeometryName)
 export(.renameGeometry)
 export(.setActiveGeometry)
 export(addPolygonsToSPE)
+export(applyQCScoreModel)
 export(checkOutliers)
 export(computeAreaFromPolygons)
 export(computeAspectRatioFromPolygons)
@@ -126,6 +127,8 @@ importFrom(sf,st_sf)
 importFrom(sf,st_sfc)
 importFrom(sfheaders,sf_polygon)
 importFrom(stats,as.formula)
+importFrom(stats,coef)
+importFrom(stats,complete.cases)
 importFrom(stats,model.matrix)
 importFrom(stats,predict)
 importFrom(stats,quantile)
diff --git a/R/QC.R b/R/QC.R
index 6d69efd..78f5798 100644
--- a/R/QC.R
+++ b/R/QC.R
@@ -441,12 +441,20 @@ computeLambda <- function(trainDF, modelFormula) {
 #' @param spe A `SpatialExperiment` object with spatial transcriptomics data.
 #' @param verbose logical for having a verbose output. Default is FALSE.
 #' @param bestLambda the best lambda typically computed using `computeLambda`.
+#' @param modelFormula a character string representing the model formula to be
+#' used for training the model. If NULL, the formula is automatically generated
+#' based on the available metrics and outliers in the dataset.
+#' See Details for more information.
+#' Note that the automatically generated formula will include interaction
+#' terms between the metrics, and will exclude metrics with insufficient
+#' outliers (< 0.1% of the dataset). If a custom `modelFormula` is provided,
+#' it will be used as is without modification or checks for outlier counts.
 #'
 #' @return The `SpatialExperiment` object with added QC score in `colData`.
 #' @export
 #' @importFrom dplyr case_when filter mutate distinct pull
 #' @importFrom glmnet glmnet cv.glmnet
-#' @importFrom stats as.formula model.matrix quantile predict
+#' @importFrom stats as.formula model.matrix quantile predict coef
 #' @examples
 #' example(spatialPerCellQC)
 #' set.seed(1998)
@@ -1323,6 +1331,7 @@ applyQCScoreModel <- function(spe, qcModel, scoreName="QC_score") {
     return(modelMatrix)
 }
 
+#' @importFrom stats complete.cases
 .filterCompleteModelCases <- function(df, modelFormula, response=NULL,
     context="cells") {
 
diff --git a/man/applyQCScoreModel.Rd b/man/applyQCScoreModel.Rd
new file mode 100644
index 0000000..9bea5d2
--- /dev/null
+++ b/man/applyQCScoreModel.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/QC.R
+\name{applyQCScoreModel}
+\alias{applyQCScoreModel}
+\title{applyQCScoreModel}
+\usage{
+applyQCScoreModel(spe, qcModel, scoreName = "QC_score")
+}
+\arguments{
+\item{spe}{A `SpatialExperiment` object with QC metrics already computed.}
+
+\item{qcModel}{A QC score model object, usually stored in
+`metadata(spe)$QCScore_model`.}
+
+\item{scoreName}{Name of the output column in `colData`.}
+}
+\value{
+A `SpatialExperiment` object with added QC score in `colData`.
+}
+\description{
+Apply a previously trained QC score model to a new SpatialExperiment object.
+}
+\examples{
+
+example(readCosmxSPE)
+
+## Compute per-cell quality control metrics
+spe <- spatialPerCellQC(spe)
+
+## Split the object only for example purposes
+set.seed(1998)
+idx <- sample(seq_len(ncol(spe)), floor(ncol(spe) / 2))
+spe_train <- spe[, idx]
+spe_test <- spe[, -idx]
+
+## Train the Quality Control (QC) score model on one dataset
+spe_train <- computeQCScore(spe_train)
+qc_model <- metadata(spe_train)$QCScore_model
+
+## Apply the trained model to another dataset
+spe_test <- applyQCScoreModel(
+    spe=spe_test,
+    qcModel=qc_model,
+    scoreName="QC_score_transferred"
+)
+
+summary(spe_test$QC_score_transferred)
+}
diff --git a/man/computeQCScore.Rd b/man/computeQCScore.Rd
index a9fce89..0af8503 100644
--- a/man/computeQCScore.Rd
+++ b/man/computeQCScore.Rd
@@ -4,13 +4,22 @@
 \alias{computeQCScore}
 \title{computeQCScore}
 \usage{
-computeQCScore(spe, bestLambda = NULL, verbose = FALSE)
+computeQCScore(spe, bestLambda = NULL, modelFormula = NULL, verbose = FALSE)
 }
 \arguments{
 \item{spe}{A `SpatialExperiment` object with spatial transcriptomics data.}
 
 \item{bestLambda}{the best lambda typically computed using `computeLambda`.}
 
+\item{modelFormula}{a character string representing the model formula to be
+used for training the model. If NULL, the formula is automatically generated
+based on the available metrics and outliers in the dataset.
+See Details for more information.
+Note that the automatically generated formula will include interaction
+terms between the metrics, and will exclude metrics with insufficient
+outliers (< 0.1% of the dataset). If a custom `modelFormula` is provided,
+it will be used as is without modification or checks for outlier counts.}
+
 \item{verbose}{logical for having a verbose output. Default is FALSE.}
 }
 \value{