diff --git a/DESCRIPTION b/DESCRIPTION index d7dbf76..93dd25f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: gedi2 Type: Package Title: Gene Expression Decomposition and Integration -Version: 2.3.4 -Date: 2026-05-09 +Version: 2.3.5 +Date: 2026-06-09 Authors@R: c( person("Arsham", "Mikaeili Namini", email = "arsham.mikaeilinamini@mail.mcgill.ca", role = c("aut", "cre")), person("Hamed", "S.Najafabadi", email = "hamed.najafabadi@mcgill.ca", role = c("aut")) diff --git a/NEWS.md b/NEWS.md index c1b437f..20218b9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,18 @@ +# gedi 2.3.5 + +## New features + +* `plot_features()` gains two differential projection types (#26): + * `projection = "diffexp"` plots the per-cell differential expression for + selected genes, given a `contrast` (optionally adding the global offset via + `include_O = TRUE`). The full J x N matrix is never materialised. + * `projection = "diffadb"` plots the per-cell differential pathway activity + for selected pathways (requires a gene-level prior `C`). +* New method `model$diffADB(contrast)` returns the differential pathway + activity (num_pathways x N). It is the exact differential of `ADB` + (i.e. `ADB(Z + dQ) - ADB(Z)`), applying the same `solve_A` shrinkage so it + stays on the same scale as `model$projections$ADB`. + # gedi 2.3.1 ## CRAN Compliance diff --git a/R/RcppExports.R b/R/RcppExports.R index d99e13c..32bdc66 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -208,6 +208,10 @@ getDiffExp_cpp <- function(Rk_list, H_rotation, contrast, D, Bi_list, verbose = .Call(`_gedi2_getDiffExp_cpp`, Rk_list, H_rotation, contrast, D, Bi_list, verbose) } +getDiffADB_cpp <- function(Rk_list, H_rotation, contrast, C_rotation, inputC, D, Bi_list, S_A, verbose = 0L) { + .Call(`_gedi2_getDiffADB_cpp`, Rk_list, H_rotation, contrast, C_rotation, inputC, D, Bi_list, S_A, verbose) +} + compute_svd_factorized_cpp <- function(Z, D, Bi_list, verbose = 0L) { .Call(`_gedi2_compute_svd_factorized_cpp`, Z, D, Bi_list, verbose) } diff --git a/R/gedi_class.R b/R/gedi_class.R index ee00a72..19a04d9 100644 --- a/R/gedi_class.R +++ b/R/gedi_class.R @@ -697,6 +697,9 @@ GEDI <- R6Class( diffExp = function(contrast, include_O = FALSE) { compute_diffExp(self, private, contrast, include_O) }, + diffADB = function(contrast) { + compute_diffADB(self, private, contrast) + }, # ========================================================================= # Dimensionality Reduction Methods diff --git a/R/gedi_plots.R b/R/gedi_plots.R index 13ba033..31ddd88 100644 --- a/R/gedi_plots.R +++ b/R/gedi_plots.R @@ -145,23 +145,44 @@ utils::globalVariables(c( #' Plot Multiple Features on Embedding #' -#' Efficiently plots multiple gene features on a 2D embedding using faceting. -#' Computes projections on-the-fly without storing full ZDB matrix. +#' Efficiently plots multiple gene or pathway features on a 2D embedding using +#' faceting. For standard projections (\code{"zdb"}, \code{"db"}, +#' \code{"adb"}) the values are computed on-the-fly without materialising the +#' full J x N matrix. For differential projections (\code{"diffexp"}, +#' \code{"diffadb"}) a \code{contrast} vector must be supplied. #' -#' @param model GEDI model object -#' @param features Character vector of gene names or integer indices -#' @param embedding Character specifying embedding type ("umap", "pca") or -#' a custom N x 2 matrix -#' @param projection Character, type of projection to compute ("zdb" or "db") -#' @param color_limits Character ("global" for shared scale, "individual" for -#' per-facet scale) or numeric vector c(low, high) -#' @param ncol Integer, number of columns in facet layout -#' @param randomize Logical, whether to randomize point order -#' @param point_size Numeric, size of points -#' @param alpha Numeric, transparency of points -#' @param title Character, plot title +#' @param model GEDI model object (trained). +#' @param features Character vector of gene/pathway names or integer indices. +#' For \code{"zdb"}, \code{"db"}, and \code{"diffexp"} these are gene +#' identifiers. For \code{"adb"} and \code{"diffadb"} these are pathway +#' names (column names of the C matrix supplied at setup). +#' @param embedding Character specifying embedding type (\code{"umap"}, +#' \code{"pca"}) or a custom N x 2 matrix. +#' @param projection Character, type of projection to compute. One of +#' \code{"zdb"} (shared manifold), \code{"db"} (latent factor embedding), +#' \code{"adb"} (pathway activity), \code{"diffexp"} (differential +#' expression), or \code{"diffadb"} (differential pathway activity). +#' \code{"diffexp"} and \code{"diffadb"} require \code{contrast} to be set. +#' \code{"diffexp"} optionally accepts \code{include_O = TRUE}. +#' @param contrast Numeric vector of length equal to the number of original H +#' covariates (\code{ncol(model$priors$H.rotation)}). Required when +#' \code{projection} is \code{"diffexp"} or \code{"diffadb"}; ignored +#' otherwise. The vector encodes the linear contrast in the user-facing +#' covariate space. +#' @param include_O Logical (default \code{FALSE}). When \code{projection = +#' "diffexp"}, adds the global offset differential effect (diffO) to each +#' gene's projection value. Ignored for all other projection types. +#' @param color_limits Character (\code{"global"} for shared scale, +#' \code{"individual"} for per-facet scale) or numeric vector +#' \code{c(low, high)}. +#' @param ncol Integer, number of columns in facet layout. +#' @param randomize Logical, whether to randomize point order before plotting. +#' @param point_size Numeric, size of points. +#' @param alpha Numeric, transparency of points (0-1). +#' @param title Character, plot title (auto-derived from projection type if +#' \code{NULL}). #' -#' @return ggplot2 object with faceted features +#' @return ggplot2 object with faceted features. #' #' @examples #' \donttest{ @@ -177,6 +198,22 @@ utils::globalVariables(c( #' model$train(iterations = 5) #' plot_features(model, c(1, 2), embedding = "pca") #' } +#' # Differential expression projection (requires an H design matrix at setup) +#' set.seed(42) +#' J <- 40; N <- 60; n_samples <- 2 +#' M <- Matrix::Matrix(matrix(rpois(J * N, 5), J, N), sparse = TRUE) +#' rownames(M) <- paste0("Gene_", seq_len(J)) +#' colnames(M) <- paste0("Cell_", seq_len(N)) +#' samples <- factor(rep(paste0("S", seq_len(n_samples)), each = N / n_samples)) +#' H <- matrix(c(0, 1), nrow = 1, +#' dimnames = list("condition", paste0("S", seq_len(n_samples)))) +#' model <- CreateGEDIObject(Samples = samples, M = M, K = 3, H = H, verbose = 0) +#' model$train(iterations = 10) +#' contrast <- c(condition = 1) # length == ncol(model$priors$H.rotation) == 1 +#' p <- plot_features(model, c("Gene_1", "Gene_2"), +#' embedding = "pca", +#' projection = "diffexp", +#' contrast = contrast) #' } #' #' @export @@ -184,12 +221,15 @@ plot_features <- function(model, features, embedding = "umap", projection = "zdb", + contrast = NULL, + include_O = FALSE, color_limits = "global", ncol = NULL, randomize = TRUE, point_size = 0.2, alpha = 0.9, title = NULL) { + # Validate model if (is.null(model$params)) { stop("Model not trained. Run $train() first.", call. = FALSE) @@ -217,27 +257,59 @@ plot_features <- function(model, N <- nrow(emb_mat) # Check projection type - if (!projection %in% c("zdb", "db", "adb")) { - stop("projection must be 'zdb', 'db', or 'adb'", call. = FALSE) + valid_projections <- c("zdb", "db", "adb", "diffexp", "diffadb") + if (!projection %in% valid_projections) { + stop("projection must be one of: ", + paste(paste0("'", valid_projections, "'"), collapse = ", "), + call. = FALSE) } - # For adb, feature names refer to pathway names, not genes - if (projection == "adb") { - if (model$aux$P == 0) { - stop("projection 'adb' requires a model with gene-level prior (C matrix). This model has P=0.", call. = FALSE) + # ---- Differential projection pre-flight checks ---------------------------- + + if (projection %in% c("diffexp", "diffadb")) { + if (is.null(contrast)) { + stop("projection '", projection, "' requires a `contrast` vector. ", + "Supply a numeric vector of length ncol(model$priors$H.rotation).", + call. = FALSE) + } + if (model$aux$L == 0) { + stop("projection '", projection, "' requires a model with sample-level ", + "prior (H matrix). This model has L=0.", call. = FALSE) + } + num_cov <- ncol(model$priors$H.rotation) + if (!is.numeric(contrast) || length(contrast) != num_cov) { + stop("contrast must be a numeric vector of length ", num_cov, + " (ncol(model$priors$H.rotation))", call. = FALSE) + } + } + + if (projection == "diffadb" && model$aux$P == 0) { + stop("projection 'diffadb' requires a model with gene-level prior (C matrix). ", + "This model has P=0.", call. = FALSE) + } + + # ---- Feature resolution --------------------------------------------------- + + # pathway-based projections: adb, diffadb + if (projection %in% c("adb", "diffadb")) { + + if (projection == "adb" && model$aux$P == 0) { + stop("projection 'adb' requires a model with gene-level prior (C matrix). ", + "This model has P=0.", call. = FALSE) } # Get the pathway names pathway_names <- colnames(model$.__enclos_env__$private$.aux_static$inputC) if (is.null(pathway_names)) { - pathway_names <- paste0("Pathway", 1:model$aux$P) + pathway_names <- paste0("Pathway", seq_len(model$aux$P)) } if (is.character(features)) { feature_idx <- match(features, pathway_names) if (any(is.na(feature_idx))) { - missing <- features[is.na(feature_idx)] - stop("Features (pathways) not found: ", paste(missing, collapse = ", "), call. = FALSE) + missing_feats <- features[is.na(feature_idx)] + stop("Features (pathways) not found: ", + paste(missing_feats, collapse = ", "), call. = FALSE) } feature_names <- features } else { @@ -250,23 +322,33 @@ plot_features <- function(model, F_count <- length(feature_idx) - # Evaluate ADB projection specifically for chosen pathways - ADB <- model$projections$ADB # P x N - if (!is.null(rownames(ADB))) { - # If ADB has row names (from C matrix columns), we can extract directly - projections <- t(ADB[feature_names, , drop = FALSE]) # N x F_count + # Compute pathway projections + if (projection == "adb") { + ADB <- model$projections$ADB # P x N + if (!is.null(rownames(ADB))) { + projections <- t(ADB[feature_names, , drop = FALSE]) # N x F_count + } else { + projections <- t(ADB[feature_idx, , drop = FALSE]) # N x F_count + } } else { - # Otherwise fall back to integer indices - projections <- t(ADB[feature_idx, , drop = FALSE]) # N x F_count + # diffadb: materialise full diffADB (num_pathways x N), subset rows + DIFFADB <- model$diffADB(contrast) # num_pathways x N + if (!is.null(rownames(DIFFADB))) { + projections <- t(DIFFADB[feature_names, , drop = FALSE]) # N x F_count + } else { + projections <- t(DIFFADB[feature_idx, , drop = FALSE]) # N x F_count + } } + } else { - # Convert feature names to indices (genes for zdb/db) + # gene-based projections: zdb, db, diffexp geneIDs <- model$metadata$geneIDs if (is.character(features)) { feature_idx <- match(features, geneIDs) if (any(is.na(feature_idx))) { - missing <- features[is.na(feature_idx)] - stop("Features not found: ", paste(missing, collapse = ", "), call. = FALSE) + missing_feats <- features[is.na(feature_idx)] + stop("Features not found: ", + paste(missing_feats, collapse = ", "), call. = FALSE) } feature_names <- features } else { @@ -279,33 +361,68 @@ plot_features <- function(model, F_count <- length(feature_idx) - # Extract feature weights from Z - Z <- model$params$Z - feature_weights <- Z[feature_idx, , drop = FALSE] # F x K - feature_weights <- t(feature_weights) # K x F for C++ - - # Compute projections using C++ if (projection == "zdb") { + # Extract gene loadings from Z; transpose for C++ (K x F) + Z <- model$params$Z + feature_weights <- t(Z[feature_idx, , drop = FALSE]) # K x F + + # compute_multi_feature_projection is the efficient path: avoids full ZDB projections <- compute_multi_feature_projection( feature_weights = feature_weights, - D = model$params$D, - Bi_list = model$params$Bi, - verbose = 0 - ) # Returns N x F matrix + D = model$params$D, + Bi_list = model$params$Bi, + verbose = 0 + ) # N x F + } else if (projection == "db") { - # For DB: just use factor weights directly - DB <- model$projections$DB # K x N - projections <- t(DB) %*% feature_weights # N x F + Z <- model$params$Z + feature_weights <- t(Z[feature_idx, , drop = FALSE]) # K x F + DB <- model$projections$DB # K x N + projections <- t(DB) %*% feature_weights # N x F + + } else { + # diffexp: efficient path — avoids materialising the full J x N diffExp + # matrix. This is the differential analogue of the compute_multi_feature_projection + # path used by "zdb" above. + H_c <- as.vector(model$priors$H.rotation %*% contrast) # length L + + DB <- model$projections$DB # K x N + + # Compute F x K matrix of differential gene loadings for selected genes + # only. vapply iterates over Rk (each K x J), slices to the F selected + # rows, and multiplies by H_c (length L -> scalar per gene per factor). + # Each call returns a numeric vector of length F, so vapply builds + # an F x K matrix directly (rows = features, cols = factors). + diffQ_Z_sub <- vapply( + model$params$Rk, + FUN = function(Rk) as.vector(Rk[feature_idx, , drop = FALSE] %*% H_c), + FUN.VALUE = numeric(F_count) + ) # F x K (vapply stacks results as columns when FUN.VALUE has length F_count) + + # Handle F=1 edge case: vapply with FUN.VALUE=numeric(1) gives a named + # vector of length K rather than a 1 x K matrix. Ensure F x K shape. + if (!is.matrix(diffQ_Z_sub)) { + diffQ_Z_sub <- matrix(diffQ_Z_sub, nrow = F_count) # 1 x K + } + # At this point diffQ_Z_sub is F x K; DB is K x N. + projections <- t(diffQ_Z_sub %*% DB) # N x F + + if (isTRUE(include_O)) { + Ro <- model$params$Ro # J x L + diffO_sub <- as.vector(Ro[feature_idx, , drop = FALSE] %*% H_c) # length F + projections <- sweep(projections, 2, diffO_sub, `+`) + } } } - # Build long-format data frame + # ---- Build long-format data frame ----------------------------------------- + df_list <- vector("list", F_count) - for (f in 1:F_count) { + for (f in seq_len(F_count)) { df_list[[f]] <- data.frame( - Dim1 = emb_mat[, 1], - Dim2 = emb_mat[, 2], - Value = projections[, f], + Dim1 = emb_mat[, 1], + Dim2 = emb_mat[, 2], + Value = projections[, f], Feature = feature_names[f] ) } @@ -326,7 +443,8 @@ plot_features <- function(model, lim <- NULL use_free_scale <- TRUE } else { - stop("color_limits must be 'global', 'individual', or numeric vector", call. = FALSE) + stop("color_limits must be 'global', 'individual', or numeric vector", + call. = FALSE) } } else { lim <- color_limits @@ -335,18 +453,22 @@ plot_features <- function(model, # Determine legend label based on projection type legend_label <- switch(projection, - "zdb" = "Expression", - "db" = "Factor Activity", - "adb" = "Pathway Activity", - "Expression" # default fallback + "zdb" = "Expression", + "db" = "Factor Activity", + "adb" = "Pathway Activity", + "diffexp" = "Diff. Expression", + "diffadb" = "Diff. Pathway Activity", + "Expression" # default fallback ) # Determine default title based on projection type default_title <- switch(projection, - "zdb" = "Gene Expression", - "db" = "Latent Factor Activity", - "adb" = "Pathway Activity", - "Feature Expression" # default fallback + "zdb" = "Gene Expression", + "db" = "Latent Factor Activity", + "adb" = "Pathway Activity", + "diffexp" = "Differential Expression", + "diffadb" = "Differential Pathway Activity", + "Feature Expression" # default fallback ) # Create plot @@ -356,7 +478,6 @@ plot_features <- function(model, # Add color scale if (use_free_scale) { - # Per-facet limits - need to compute per feature p <- p + scale_color_gedi_diverging(name = legend_label) } else { p <- p + scale_color_gedi_diverging(limits = lim, name = legend_label) @@ -366,8 +487,8 @@ plot_features <- function(model, emb_name <- if (is.character(embedding)) toupper(embedding) else "Embedding" p <- p + ggplot2::labs( - x = paste(emb_name, "1"), - y = paste(emb_name, "2"), + x = paste(emb_name, "1"), + y = paste(emb_name, "2"), title = if (is.null(title)) default_title else title ) + theme_gedi() diff --git a/R/gedi_projections.R b/R/gedi_projections.R index dcc3a64..02f880e 100644 --- a/R/gedi_projections.R +++ b/R/gedi_projections.R @@ -24,6 +24,12 @@ ProjectionsAccessor <- R6Class( cat(" $ADB - Pathway activity projection (P x N)\n") } cat("\nAccess with: model$projections$ZDB\n") + cat("\nDifferential projections (require a contrast):\n") + cat(" model$diffExp(contrast) - Differential expression (J x N)\n") + if (private$.gedi_self$aux$P > 0) { + cat(" model$diffADB(contrast) - Differential pathway activity (P x N)\n") + } + cat("\nSee ?plot_features for contrast-based visualization.\n") invisible(self) } ), @@ -377,6 +383,97 @@ compute_diffExp <- function(self, private, contrast, include_O = FALSE) { } +#' Compute Differential Pathway Activity (num_pathways x N) +#' +#' @description +#' Computes the cell-specific differential pathway activity effect +#' (num_pathways x N). This is the R wrapper for the C++ function +#' \code{getDiffADB_cpp()}, which mirrors the ADB computation but uses the +#' H-contrast-projected gene loadings instead of the posterior mean A. +#' +#' @param self Reference to GEDI R6 object +#' @param private Reference to private environment +#' @param contrast Numeric vector of length \code{ncol(H.rotation)} (the number +#' of original H covariates) specifying the contrast in the user-facing +#' covariate space; internally projected into the compressed L-space via +#' \code{H.rotation \%*\% contrast}. +#' +#' @return Dense matrix (num_pathways x N) with rownames = pathway names and +#' colnames = cellIDs. +#' +#' @keywords internal +#' @noRd +compute_diffADB <- function(self, private, contrast) { + + # Validate model state + if (is.null(private$.lastResult)) { + stop("No results yet. Run $train() first.", call. = FALSE) + } + + # Check if H prior was provided + if (self$aux$L == 0) { + stop("Cannot compute diffADB: no sample-level prior (H) was provided during setup.", + call. = FALSE) + } + + # Check if C prior was provided + if (self$aux$P == 0) { + stop("Cannot compute diffADB: no gene-level prior (C) was provided during setup.", + call. = FALSE) + } + + # Check if aux_static exists + if (is.null(private$.aux_static)) { + stop("Missing auxiliary data. Model may not be properly initialized.", + call. = FALSE) + } + + # Validate contrast: must live in the original covariate space + # (= ncol(H.rotation) = number of rows of the user-supplied H matrix). + # H.rotation projects num_covariates -> L, so contrast is num_covariates-long. + num_cov <- ncol(private$.aux_static$H.rotation) + if (!is.numeric(contrast) || length(contrast) != num_cov) { + stop("contrast must be a numeric vector of length ", num_cov, + " (number of original H covariates)", + call. = FALSE) + } + + # solve_A's pathway-loading shrinkage; passed through so diffADB applies the + # exact same operator as ADB and remains the true differential of ADB. + S_A <- private$.lastResult$hyperparams$S_A + + # Coerce the prior matrices to dense double before mapping into Eigen. + # inputC in particular may be stored as an integer base matrix (e.g. a 0/1 + # membership matrix) or a sparse Matrix, neither of which can bind to a + # C++ Eigen::Map ("Wrong R type for mapped matrix"). + inputC <- as.matrix(private$.aux_static$inputC) + storage.mode(inputC) <- "double" + C_rotation <- as.matrix(private$.aux_static$C.rotation) + storage.mode(C_rotation) <- "double" + H_rotation <- as.matrix(private$.aux_static$H.rotation) + storage.mode(H_rotation) <- "double" + + # Call C++ function — no caching because result depends on contrast + diffADB <- getDiffADB_cpp( + Rk_list = self$params$Rk, + H_rotation = H_rotation, + contrast = as.numeric(contrast), + C_rotation = C_rotation, + inputC = inputC, + D = as.numeric(self$params$D), + Bi_list = self$params$Bi, + S_A = as.numeric(S_A), + verbose = private$.verbose + ) + + # Add row/column names + rownames(diffADB) <- colnames(private$.aux_static$inputC) # pathway names + colnames(diffADB) <- private$.cellIDs + + return(diffADB) +} + + #' Clear Projection Cache #' #' @description diff --git a/man/plot_features.Rd b/man/plot_features.Rd index ab1fc1e..b6dd7c9 100644 --- a/man/plot_features.Rd +++ b/man/plot_features.Rd @@ -9,6 +9,8 @@ plot_features( features, embedding = "umap", projection = "zdb", + contrast = NULL, + include_O = FALSE, color_limits = "global", ncol = NULL, randomize = TRUE, @@ -18,34 +20,57 @@ plot_features( ) } \arguments{ -\item{model}{GEDI model object} +\item{model}{GEDI model object (trained).} -\item{features}{Character vector of gene names or integer indices} +\item{features}{Character vector of gene/pathway names or integer indices. +For \code{"zdb"}, \code{"db"}, and \code{"diffexp"} these are gene +identifiers. For \code{"adb"} and \code{"diffadb"} these are pathway +names (column names of the C matrix supplied at setup).} -\item{embedding}{Character specifying embedding type ("umap", "pca") or -a custom N x 2 matrix} +\item{embedding}{Character specifying embedding type (\code{"umap"}, +\code{"pca"}) or a custom N x 2 matrix.} -\item{projection}{Character, type of projection to compute ("zdb" or "db")} +\item{projection}{Character, type of projection to compute. One of +\code{"zdb"} (shared manifold), \code{"db"} (latent factor embedding), +\code{"adb"} (pathway activity), \code{"diffexp"} (differential +expression), or \code{"diffadb"} (differential pathway activity). +\code{"diffexp"} and \code{"diffadb"} require \code{contrast} to be set. +\code{"diffexp"} optionally accepts \code{include_O = TRUE}.} -\item{color_limits}{Character ("global" for shared scale, "individual" for -per-facet scale) or numeric vector c(low, high)} +\item{contrast}{Numeric vector of length equal to the number of original H +covariates (\code{ncol(model$priors$H.rotation)}). Required when +\code{projection} is \code{"diffexp"} or \code{"diffadb"}; ignored +otherwise. The vector encodes the linear contrast in the user-facing +covariate space.} -\item{ncol}{Integer, number of columns in facet layout} +\item{include_O}{Logical (default \code{FALSE}). When \code{projection = + "diffexp"}, adds the global offset differential effect (diffO) to each +gene's projection value. Ignored for all other projection types.} -\item{randomize}{Logical, whether to randomize point order} +\item{color_limits}{Character (\code{"global"} for shared scale, +\code{"individual"} for per-facet scale) or numeric vector +\code{c(low, high)}.} -\item{point_size}{Numeric, size of points} +\item{ncol}{Integer, number of columns in facet layout.} -\item{alpha}{Numeric, transparency of points} +\item{randomize}{Logical, whether to randomize point order before plotting.} -\item{title}{Character, plot title} +\item{point_size}{Numeric, size of points.} + +\item{alpha}{Numeric, transparency of points (0-1).} + +\item{title}{Character, plot title (auto-derived from projection type if +\code{NULL}).} } \value{ -ggplot2 object with faceted features +ggplot2 object with faceted features. } \description{ -Efficiently plots multiple gene features on a 2D embedding using faceting. -Computes projections on-the-fly without storing full ZDB matrix. +Efficiently plots multiple gene or pathway features on a 2D embedding using +faceting. For standard projections (\code{"zdb"}, \code{"db"}, +\code{"adb"}) the values are computed on-the-fly without materialising the +full J x N matrix. For differential projections (\code{"diffexp"}, +\code{"diffadb"}) a \code{contrast} vector must be supplied. } \examples{ \donttest{ @@ -61,6 +86,22 @@ if (requireNamespace("SeuratObject", quietly = TRUE) && model$train(iterations = 5) plot_features(model, c(1, 2), embedding = "pca") } +# Differential expression projection (requires an H design matrix at setup) +set.seed(42) +J <- 40; N <- 60; n_samples <- 2 +M <- Matrix::Matrix(matrix(rpois(J * N, 5), J, N), sparse = TRUE) +rownames(M) <- paste0("Gene_", seq_len(J)) +colnames(M) <- paste0("Cell_", seq_len(N)) +samples <- factor(rep(paste0("S", seq_len(n_samples)), each = N / n_samples)) +H <- matrix(c(0, 1), nrow = 1, + dimnames = list("condition", paste0("S", seq_len(n_samples)))) +model <- CreateGEDIObject(Samples = samples, M = M, K = 3, H = H, verbose = 0) +model$train(iterations = 10) +contrast <- c(condition = 1) # length == ncol(model$priors$H.rotation) == 1 +p <- plot_features(model, c("Gene_1", "Gene_2"), + embedding = "pca", + projection = "diffexp", + contrast = contrast) } } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 325d540..f3c43e4 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -121,6 +121,25 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// getDiffADB_cpp +Eigen::MatrixXd getDiffADB_cpp(const Rcpp::List& Rk_list, const Eigen::Map& H_rotation, const Eigen::Map& contrast, const Eigen::Map& C_rotation, const Eigen::Map& inputC, const Eigen::Map& D, const Rcpp::List& Bi_list, double S_A, int verbose); +RcppExport SEXP _gedi2_getDiffADB_cpp(SEXP Rk_listSEXP, SEXP H_rotationSEXP, SEXP contrastSEXP, SEXP C_rotationSEXP, SEXP inputCSEXP, SEXP DSEXP, SEXP Bi_listSEXP, SEXP S_ASEXP, SEXP verboseSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::List& >::type Rk_list(Rk_listSEXP); + Rcpp::traits::input_parameter< const Eigen::Map& >::type H_rotation(H_rotationSEXP); + Rcpp::traits::input_parameter< const Eigen::Map& >::type contrast(contrastSEXP); + Rcpp::traits::input_parameter< const Eigen::Map& >::type C_rotation(C_rotationSEXP); + Rcpp::traits::input_parameter< const Eigen::Map& >::type inputC(inputCSEXP); + Rcpp::traits::input_parameter< const Eigen::Map& >::type D(DSEXP); + Rcpp::traits::input_parameter< const Rcpp::List& >::type Bi_list(Bi_listSEXP); + Rcpp::traits::input_parameter< double >::type S_A(S_ASEXP); + Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); + rcpp_result_gen = Rcpp::wrap(getDiffADB_cpp(Rk_list, H_rotation, contrast, C_rotation, inputC, D, Bi_list, S_A, verbose)); + return rcpp_result_gen; +END_RCPP +} // compute_svd_factorized_cpp Rcpp::List compute_svd_factorized_cpp(const Eigen::Map& Z, const Eigen::Map& D, const Rcpp::List& Bi_list, int verbose); RcppExport SEXP _gedi2_compute_svd_factorized_cpp(SEXP ZSEXP, SEXP DSEXP, SEXP Bi_listSEXP, SEXP verboseSEXP) { @@ -461,6 +480,7 @@ static const R_CallMethodDef CallEntries[] = { {"_gedi2_getDiffO_cpp", (DL_FUNC) &_gedi2_getDiffO_cpp, 4}, {"_gedi2_getDiffQ_cpp", (DL_FUNC) &_gedi2_getDiffQ_cpp, 4}, {"_gedi2_getDiffExp_cpp", (DL_FUNC) &_gedi2_getDiffExp_cpp, 6}, + {"_gedi2_getDiffADB_cpp", (DL_FUNC) &_gedi2_getDiffADB_cpp, 9}, {"_gedi2_compute_svd_factorized_cpp", (DL_FUNC) &_gedi2_compute_svd_factorized_cpp, 4}, {"_gedi2_run_factorized_svd_cpp", (DL_FUNC) &_gedi2_run_factorized_svd_cpp, 3}, {"_gedi2_Yi_resZ", (DL_FUNC) &_gedi2_Yi_resZ, 5}, diff --git a/src/gedi_differential.cpp b/src/gedi_differential.cpp index 04122a3..2427e96 100644 --- a/src/gedi_differential.cpp +++ b/src/gedi_differential.cpp @@ -283,9 +283,205 @@ Eigen::MatrixXd getDiffExp_cpp( if (verbose >= 1) { double mean_val = diffExp.mean(); double std_val = std::sqrt((diffExp.array() - mean_val).square().mean()); - Rcout << "[OK] diffExp computed: mean = " << mean_val + Rcout << "[OK] diffExp computed: mean = " << mean_val << ", std = " << std_val << std::endl; } - + return diffExp; -} \ No newline at end of file +} + + +/** + * Compute Differential Pathway Activity (Internal C++) + * + * Computes the cell-specific differential pathway activity effect + * (num_pathways x N). This is the differential analogue of compute_ADB_cpp, + * exactly as getDiffExp_cpp is the differential analogue of compute_ZDB_cpp. + * + * Mathematical derivation: + * ADB = C.rotation * A * diag(D) * B, where solve_A() fits A linearly from Z: + * A = (Cmat^T Cmat + lambda I)^{-1} Cmat^T Z, Cmat = inputC * C_rotation, + * lambda = 1 / S_A (Cmat is the orthonormal SVD basis aux_C). + * Because solve_A is linear, the differential of ADB under the contrast is + * diffADB = ADB(Z + diffQ_Z) - ADB(Z) + * = C.rotation * solve_A(diffQ_Z) * diag(D) * B, + * i.e. apply the SAME shrinkage operator to the differential gene loadings + * diffQ_Z. Omitting the shrinkage would mis-scale diffADB relative to ADB by a + * factor of (1 + 1/lambda); we therefore replicate solve_A exactly. + * + * Computation order: + * 1. H_contrast = H_rotation * contrast (L x 1) + * 2. diffQ_Z = [Rk * H_contrast for each k] (J x K) + * 3. Cmat = inputC * C_rotation (J x P, = aux_C) + * 4. A_diff = (Cmat^T Cmat + (1/S_A) I)^{-1} Cmat^T diffQ_Z (P x K) + * 5. CA_diff = C_rotation * A_diff (num_pathways x K) + * 6. DB = diag(D) * B (K x N) + * 7. diffADB = CA_diff * DB (num_pathways x N) + * + * The largest intermediates are J x K (diffQ_Z) and J x P (Cmat, bounded by the + * size of the prior inputC). No J x N intermediate is ever materialised. + * + * @param Rk_list List of K matrices (each J x L), effect of sample variables + * on each latent factor k + * @param H_rotation Rotation matrix (L x num_covariates) + * @param contrast Vector of length num_covariates (= ncol(H_rotation)) + * specifying the contrast in the user-facing covariate space + * @param C_rotation Rotation matrix (num_pathways x P) that maps from the + * P-dimensional reduced SVD space to the original pathway space + * @param inputC Gene-by-pathway input prior matrix (J x num_pathways) + * @param D Scaling vector (length K) representing factor importance + * @param Bi_list List of sample-specific cell projection matrices (K x Ni each) + * @param S_A Pathway-loading shrinkage hyperparameter (= model$hyperparams$S_A); + * defines lambda = 1 / S_A in the solve_A operator + * @param verbose Integer verbosity level (0 = silent, 1 = summary, 2 = detailed) + * + * @return Dense matrix diffADB of dimensions num_pathways x N representing the + * predicted differential pathway activity for each pathway in each cell. + * + * @keywords internal + * @noRd + */ +// [[Rcpp::export]] +Eigen::MatrixXd getDiffADB_cpp( + const Rcpp::List& Rk_list, + const Eigen::Map& H_rotation, + const Eigen::Map& contrast, + const Eigen::Map& C_rotation, + const Eigen::Map& inputC, + const Eigen::Map& D, + const Rcpp::List& Bi_list, + double S_A, + int verbose = 0 +) { + + // ---- Dimension validation ---- + int K = Rk_list.size(); + int L = H_rotation.rows(); + int num_pathways = C_rotation.rows(); + int P = C_rotation.cols(); + int numSamples = Bi_list.size(); + + if (K == 0 || L == 0) { + stop("Cannot compute diffADB: no sample-level prior (H) was provided"); + } + + if (P == 0) { + stop("Cannot compute diffADB: no gene-level prior (C) was provided"); + } + + // contrast lives in the original num_covariates space; H_rotation projects + // num_covariates -> L. So contrast length must equal H_rotation.cols(). + if (contrast.size() != H_rotation.cols()) { + stop("Dimension mismatch: contrast must have length ncol(H_rotation) " + "(= number of original H covariates)"); + } + + if (D.size() != K) { + stop("Dimension mismatch: D must have length K"); + } + + if (S_A <= 0) { + stop("Invalid hyperparameter: S_A must be positive"); + } + + // Validate first Rk and extract J + Eigen::MatrixXd Rk_first = as(Rk_list[0]); + int J = Rk_first.rows(); + if (Rk_first.cols() != L) { + stop("Dimension mismatch: Rk[1] must have L columns"); + } + + // inputC must be J x num_pathways + if (inputC.rows() != J) { + stop("Dimension mismatch: inputC must have J rows (got %d, expected %d)", + inputC.rows(), J); + } + if (inputC.cols() != num_pathways) { + stop("Dimension mismatch: inputC must have ncol(C_rotation) = %d columns " + "(got %d)", num_pathways, inputC.cols()); + } + + if (numSamples == 0) { + stop("Bi_list cannot be empty"); + } + + // Count total cells and validate Bi dimensions + int N = 0; + for (int i = 0; i < numSamples; ++i) { + Eigen::MatrixXd Bi = as(Bi_list[i]); + if (Bi.rows() != K) { + stop("Dimension mismatch: Bi[%d] must have K rows", i + 1); + } + N += Bi.cols(); + } + + if (verbose >= 1) { + Rcout << "Computing diffADB (pathway-space): " + << num_pathways << " pathways x " << N << " cells" << std::endl; + if (verbose >= 2) { + Rcout << " Genes: " << J << ", Factors: " << K + << ", Contrast dim: " << L + << ", SVD components: " << P << std::endl; + } + } + + // ---- Step 1: H_contrast = H_rotation * contrast (L x 1) ---- + VectorXd H_contrast = H_rotation * contrast; + + // ---- Step 2: diffQ_Z = [Rk * H_contrast for k=1..K] (J x K) ---- + if (verbose >= 2) Rcout << " Building diffQ_Z (J x K)..." << std::endl; + + MatrixXd diffQ_Z(J, K); + diffQ_Z.col(0) = Rk_first * H_contrast; + + for (int k = 1; k < K; ++k) { + Eigen::MatrixXd Rk = as(Rk_list[k]); // J x L + if (Rk.rows() != J || Rk.cols() != L) { + stop("Dimension mismatch: all Rk matrices must have the same JxL dimensions"); + } + diffQ_Z.col(k) = Rk * H_contrast; + } + + // ---- Steps 3-5: differential pathway loadings via the EXACT solve_A operator ---- + // diffADB must be the differential of ADB, i.e. ADB(Z + diffQ_Z) - ADB(Z). + // Since ADB = C.rotation * A * DB and solve_A() is linear in Z with + // A = (Cmat^T Cmat + lambda I)^{-1} Cmat^T Z, Cmat = inputC * C_rotation, + // lambda = 1 / S_A (see gedi_core.cpp::solve_A / workspace_CtC_inv), + // the differential pathway loadings are A_diff = solve_A(diffQ_Z), applying + // the SAME shrinkage so diffADB lands on the same scale as ADB. (Cmat is the + // orthonormal SVD basis aux_C, so Cmat^T Cmat = I and the shrinkage reduces to + // the scalar gamma = S_A/(S_A+1); we form CtC explicitly to mirror solve_A + // exactly and stay robust to any deviation from orthonormality.) + if (verbose >= 2) Rcout << " Projecting through pathway space (solve_A operator)..." << std::endl; + + MatrixXd Cmat = inputC * C_rotation; // J x P (= aux_C, orthonormal) + MatrixXd CtC = Cmat.transpose() * Cmat; // P x P (~ I_P) + CtC.diagonal().array() += 1.0 / S_A; // + lambda I, lambda = 1 / S_A + MatrixXd CtdQ = Cmat.transpose() * diffQ_Z; // P x K (= Cmat^T * diffQ_Z) + MatrixXd A_diff = CtC.ldlt().solve(CtdQ); // P x K (= workspace_CtC_inv * CtdQ) + MatrixXd CA_diff = C_rotation * A_diff; // num_pathways x K + + // ---- Steps 6-7: concatenate B, compute DB, then diffADB ---- + if (verbose >= 2) Rcout << " Concatenating B and computing DB..." << std::endl; + + MatrixXd B(K, N); + int col_offset = 0; + for (int i = 0; i < numSamples; ++i) { + Eigen::MatrixXd Bi = as(Bi_list[i]); + int Ni_current = Bi.cols(); + B.block(0, col_offset, K, Ni_current) = Bi; + col_offset += Ni_current; + } + + MatrixXd DB = D.asDiagonal() * B; // K x N + MatrixXd diffADB = CA_diff * DB; // num_pathways x N + + if (verbose >= 1) { + double mean_val = diffADB.mean(); + double std_val = std::sqrt((diffADB.array() - mean_val).square().mean()); + Rcout << "[OK] diffADB computed: mean = " << mean_val + << ", std = " << std_val << std::endl; + } + + return diffADB; +} diff --git a/tests/testthat/test-diffexp-projection-math.R b/tests/testthat/test-diffexp-projection-math.R new file mode 100644 index 0000000..5e25aeb --- /dev/null +++ b/tests/testthat/test-diffexp-projection-math.R @@ -0,0 +1,246 @@ +# tests/testthat/test-diffexp-projection-math.R +# ----------------------------------------------------------------------------- +# Numerical-correctness ("math invariant") tests for the differential +# projections wired in for issue #26: +# * model$diffADB(contrast) (differential pathway activity, P x N) +# * plot_features(..., projection = "diffexp"/"diffadb") +# +# These go beyond "is it a ggplot": they validate that the values produced by +# the C++ kernel getDiffADB_cpp and the efficient plot_features paths match an +# independent pure-R reconstruction of the documented formulae. +# +# Core identities being checked (derivation, with Cmat = inputC %*% C.rotation): +# ZDB = Z %*% DB (shared manifold) +# ADB = C.rotation %*% A %*% DB (pathway activity) +# A = solveA(Z) (solve_A is linear in Z) +# diffExp = diffQ_Z %*% DB (differential expression, J x N) +# diffADB = C.rotation %*% solveA(diffQ_Z) %*% DB +# = ADB(Z + diffQ_Z) - ADB(Z) (TRUE differential of ADB) +# where solveA(X) = (Cmat^T Cmat + (1/S_A) I)^{-1} Cmat^T X applies the same +# shrinkage solve_A() uses (gedi_core.cpp). diffQ_Z[, k] = Rk[[k]] %*% (H.rot %*% contrast). +# ----------------------------------------------------------------------------- + +make_diff_math_fixture <- function() { + set.seed(202606) + n_genes <- 50 + n_cells <- 80 + n_samples <- 4 + n_pathways <- 6 + K <- 4 + + M <- Matrix::Matrix( + matrix(stats::rpois(n_genes * n_cells, lambda = 5), n_genes, n_cells), + sparse = TRUE + ) + rownames(M) <- paste0("Gene_", seq_len(n_genes)) + colnames(M) <- paste0("Cell_", seq_len(n_cells)) + + samples <- factor(rep(paste0("S", seq_len(n_samples)), + each = n_cells / n_samples)) + + # 3 covariates, linearly dependent (cov3 = cov1 + cov2) -> rank-deficient H, + # so L < num_covariates and we also exercise the H.rotation projection. + H <- matrix( + c(1, 1, 0, 0, + 0, 0, 1, 1, + 1, 1, 1, 1), + nrow = 3, byrow = TRUE, + dimnames = list(c("batch", "condition", "combined"), + paste0("S", seq_len(n_samples))) + ) + + # C pathway membership (J x n_pathways), 0/1, each gene in >= 1 pathway. + set.seed(11) + C <- matrix(0L, nrow = n_genes, ncol = n_pathways, + dimnames = list(rownames(M), paste0("PW", seq_len(n_pathways)))) + for (i in seq_len(n_genes)) { + C[i, sample(seq_len(n_pathways), 1)] <- 1L + } + # guarantee every pathway has at least one gene + for (p in seq_len(n_pathways)) if (sum(C[, p]) == 0) C[p, p] <- 1L + + model <- CreateGEDIObject( + Samples = samples, M = M, K = K, H = H, C = C, verbose = 0 + ) + model$train(iterations = 20, track_interval = 5) + + list(model = model, H = H, C = C, num_cov = nrow(H), + n_pathways = n_pathways, n_genes = n_genes, n_cells = n_cells, K = K) +} + +# Pure-R helpers rebuilt from stored model state ------------------------------ + +.rebuild <- function(m) { + Z <- m$params$Z # J x K + D <- m$params$D # K + Bi <- m$params$Bi # list of K x Ni + Rk <- m$params$Rk # list of J x L + A <- m$params$A # P x K + Cin <- as.matrix(m$priors$inputC) # J x num_pathways + Cr <- as.matrix(m$priors$C.rotation) # num_pathways x P + Hr <- as.matrix(m$priors$H.rotation) # L x num_cov + S_A <- m$hyperparams$S_A + K <- m$aux$K + P <- m$aux$P + + B <- do.call(cbind, Bi) # K x N + DB <- diag(D, nrow = K) %*% B # K x N + Cmat <- Cin %*% Cr # J x P (= aux_C) + CtC <- t(Cmat) %*% Cmat + (1 / S_A) * diag(P) # P x P + solveA <- function(X) solve(CtC, t(Cmat) %*% X) # P x ncol(X) + + list(Z = Z, D = D, DB = DB, Rk = Rk, A = A, Cr = Cr, Hr = Hr, + S_A = S_A, K = K, P = P, Cmat = Cmat, solveA = solveA) +} + +.diffQ_Z <- function(r, contrast) { + H_c <- as.vector(r$Hr %*% contrast) # length L + vapply(r$Rk, function(Rk_k) as.vector(Rk_k %*% H_c), + numeric(nrow(r$Rk[[1]]))) # J x K +} + +# ----------------------------------------------------------------------------- +test_that("solve_A identity: stored A == (Cmat^T Cmat + (1/S_A) I)^{-1} Cmat^T Z", { + fx <- make_diff_math_fixture(); m <- fx$model + r <- .rebuild(m) + A_from_Z <- r$solveA(r$Z) # P x K + expect_equal(unname(r$A), unname(A_from_Z), tolerance = 1e-8) +}) + +# ----------------------------------------------------------------------------- +test_that("ADB == C.rotation %*% A %*% DB (stored pathway activity)", { + fx <- make_diff_math_fixture(); m <- fx$model + r <- .rebuild(m) + ADB_R <- r$Cr %*% r$A %*% r$DB # num_pathways x N + ADB_m <- m$projections$ADB + expect_equal(dim(ADB_m), c(fx$n_pathways, fx$n_cells)) + expect_equal(unname(as.matrix(ADB_m)), unname(ADB_R), tolerance = 1e-8) +}) + +# ----------------------------------------------------------------------------- +test_that("diffADB == C.rotation %*% solveA(diffQ_Z) %*% DB (cross-impl check)", { + fx <- make_diff_math_fixture(); m <- fx$model + r <- .rebuild(m) + contrast <- c(0.7, -0.4, 0.2) + + dQZ <- .diffQ_Z(r, contrast) # J x K + diffADB_R <- r$Cr %*% r$solveA(dQZ) %*% r$DB # num_pathways x N + diffADB_m <- m$diffADB(contrast) + + expect_equal(unname(as.matrix(diffADB_m)), unname(diffADB_R), tolerance = 1e-7) +}) + +# ----------------------------------------------------------------------------- +test_that("diffADB is the TRUE differential of ADB: ADB(Z+dQ) - ADB(Z) == diffADB", { + fx <- make_diff_math_fixture(); m <- fx$model + r <- .rebuild(m) + contrast <- c(0.7, -0.4, 0.2) + + dQZ <- .diffQ_Z(r, contrast) # J x K + ADB_base <- r$Cr %*% r$A %*% r$DB # = model$projections$ADB + ADB_pert <- r$Cr %*% r$solveA(r$Z + dQZ) %*% r$DB # perturb gene loadings + diff_via_perturb <- ADB_pert - ADB_base + + expect_equal(unname(as.matrix(m$diffADB(contrast))), + unname(diff_via_perturb), tolerance = 1e-7) +}) + +# ----------------------------------------------------------------------------- +test_that("diffADB carries the solve_A shrinkage: diffADB == gamma * M * diffExp", { + # Regression guard: an implementation that forgot the solve_A shrinkage would + # produce diffADB = M %*% diffExp (too large by 1/gamma = 1 + 1/S_A). + fx <- make_diff_math_fixture(); m <- fx$model + r <- .rebuild(m) + contrast <- c(0.5, 0.3, -0.2) + + # Cmat is orthonormal (SVD basis) => CtC = (1 + 1/S_A) I => solveA = gamma * Cmat^T + gamma <- 1 / (1 + 1 / r$S_A) # = S_A / (S_A + 1) + Mop <- r$Cr %*% t(r$Cmat) # num_pathways x J + diffExp_noO <- m$diffExp(contrast, include_O = FALSE) # J x N + + diffADB_scaled <- gamma * (Mop %*% as.matrix(diffExp_noO)) + expect_equal(unname(as.matrix(m$diffADB(contrast))), + unname(diffADB_scaled), tolerance = 1e-7) + + # And explicitly confirm the shrinkage is NOT unity by default (A_shrinkage=1 => gamma=0.5) + expect_lt(gamma, 0.9) +}) + +# ----------------------------------------------------------------------------- +test_that("diffADB is linear in the contrast", { + fx <- make_diff_math_fixture(); m <- fx$model + c1 <- c(1, 0, 0); c2 <- c(0, 1, 0) + + d1 <- m$diffADB(c1) + d2 <- m$diffADB(c2) + d12 <- m$diffADB(c1 + c2) + d2x <- m$diffADB(2 * c1) + + expect_equal(unname(as.matrix(d12)), unname(as.matrix(d1 + d2)), tolerance = 1e-8) + expect_equal(unname(as.matrix(d2x)), unname(as.matrix(2 * d1)), tolerance = 1e-8) +}) + +# ----------------------------------------------------------------------------- +test_that("diffExp == diffQ_Z %*% DB and relates to ZDB by swapping Z -> diffQ_Z", { + fx <- make_diff_math_fixture(); m <- fx$model + r <- .rebuild(m) + contrast <- c(0.5, -0.3, 0.7) + + dQZ <- .diffQ_Z(r, contrast) + diffExp_R <- dQZ %*% r$DB # J x N + expect_equal(unname(as.matrix(m$diffExp(contrast, include_O = FALSE))), + unname(diffExp_R), tolerance = 1e-8) + + # ZDB = Z %*% DB (same DB map applied to Z instead of diffQ_Z) + ZDB_R <- r$Z %*% r$DB + expect_equal(unname(as.matrix(m$projections$ZDB)), unname(ZDB_R), tolerance = 1e-8) +}) + +# ----------------------------------------------------------------------------- +test_that("plot_features(projection='diffexp') plotted values == full diffExp rows", { + fx <- make_diff_math_fixture(); m <- fx$model + contrast <- c(0.5, -0.3, 0.7) + genes <- m$metadata$geneIDs[c(2, 7, 23)] + emb <- cbind(seq_len(fx$n_cells), rev(seq_len(fx$n_cells))) # N x 2, cell order + + full <- m$diffExp(contrast, include_O = FALSE) # J x N + p <- plot_features(m, features = genes, embedding = emb, + projection = "diffexp", contrast = contrast, + randomize = FALSE) + expect_s3_class(p, "ggplot") + pd <- p$data + for (g in genes) { + vals <- pd$Value[as.character(pd$Feature) == g] # N values, cell order + expect_equal(unname(vals), unname(as.vector(full[g, ])), tolerance = 1e-7) + } + + # include_O path matches the offset-augmented full matrix + full_O <- m$diffExp(contrast, include_O = TRUE) + pO <- plot_features(m, features = genes, embedding = emb, + projection = "diffexp", contrast = contrast, + include_O = TRUE, randomize = FALSE) + pdO <- pO$data + for (g in genes) { + vals <- pdO$Value[as.character(pdO$Feature) == g] + expect_equal(unname(vals), unname(as.vector(full_O[g, ])), tolerance = 1e-7) + } +}) + +# ----------------------------------------------------------------------------- +test_that("plot_features(projection='diffadb') plotted values == diffADB rows", { + fx <- make_diff_math_fixture(); m <- fx$model + contrast <- c(0.5, -0.3, 0.7) + pws <- c("PW1", "PW4", "PW6") + emb <- cbind(seq_len(fx$n_cells), rev(seq_len(fx$n_cells))) + + full <- m$diffADB(contrast) # num_pathways x N + p <- plot_features(m, features = pws, embedding = emb, + projection = "diffadb", contrast = contrast, + randomize = FALSE) + expect_s3_class(p, "ggplot") + pd <- p$data + for (pw in pws) { + vals <- pd$Value[as.character(pd$Feature) == pw] + expect_equal(unname(vals), unname(as.vector(full[pw, ])), tolerance = 1e-7) + } +}) diff --git a/tests/testthat/test-diffexp-projection.R b/tests/testthat/test-diffexp-projection.R new file mode 100644 index 0000000..8233887 --- /dev/null +++ b/tests/testthat/test-diffexp-projection.R @@ -0,0 +1,280 @@ +# tests/testthat/test-diffexp-projection.R +# ----------------------------------------------------------------------------- +# Tests for the "diffexp" and "diffadb" projection types wired into +# model$diffADB() and plot_features(). +# +# Fixture: 40 genes, 60 cells, 2 samples, K=3 latent factors. +# - H design matrix: 1 covariate (condition), 2 samples => +# ncol(H.rotation) == 1 (full rank, so L == num_cov == 1) +# - C pathway matrix: 5 pathways, 0/1 gene membership +# ----------------------------------------------------------------------------- + +make_diff_proj_fixture <- function() { + set.seed(777) + n_genes <- 40 + n_cells <- 60 + n_samples <- 2 + n_pathways <- 5 + K <- 3 + + M <- Matrix::Matrix( + matrix(stats::rpois(n_genes * n_cells, lambda = 4), n_genes, n_cells), + sparse = TRUE + ) + rownames(M) <- paste0("Gene_", seq_len(n_genes)) + colnames(M) <- paste0("Cell_", seq_len(n_cells)) + + samples <- factor(rep(paste0("S", seq_len(n_samples)), + each = n_cells / n_samples)) + + # Single-covariate H: condition (0 = ctrl, 1 = treated) per sample + H <- matrix(c(0, 1), nrow = 1, + dimnames = list("condition", + paste0("S", seq_len(n_samples)))) + + # C pathway matrix: 0/1 membership, each gene in exactly one pathway + set.seed(42) + C <- matrix(0L, nrow = n_genes, ncol = n_pathways, + dimnames = list(rownames(M), + paste0("PW", seq_len(n_pathways)))) + membership <- sample(seq_len(n_pathways), n_genes, replace = TRUE) + for (i in seq_len(n_genes)) C[i, membership[i]] <- 1L + + model <- CreateGEDIObject( + Samples = samples, + M = M, + K = K, + H = H, + C = C, + verbose = 0 + ) + model$train(iterations = 10, track_interval = 5) + + list( + model = model, + H = H, + C = C, + num_cov = nrow(H), # == 1 + n_pathways = n_pathways, + n_genes = n_genes, + n_cells = n_cells + ) +} + +# ---- model$diffADB() return value ------------------------------------------ + +test_that("diffADB returns a num_pathways x N matrix with correct dimnames", { + fx <- make_diff_proj_fixture() + m <- fx$model + contrast <- c(condition = 1) # length 1 == ncol(H.rotation) + + result <- m$diffADB(contrast) + + expect_true(is.matrix(result)) + expect_equal(dim(result), c(fx$n_pathways, fx$n_cells)) + expect_equal(rownames(result), paste0("PW", seq_len(fx$n_pathways))) + expect_equal(colnames(result), m$metadata$cellIDs) +}) + +test_that("diffADB values are numeric and finite", { + fx <- make_diff_proj_fixture() + m <- fx$model + contrast <- c(condition = 1) + + result <- m$diffADB(contrast) + expect_true(all(is.finite(result))) +}) + +# ---- plot_features: projection = "diffexp" ---------------------------------- + +test_that("plot_features returns ggplot for projection='diffexp'", { + fx <- make_diff_proj_fixture() + m <- fx$model + contrast <- c(condition = 1) + genes <- m$metadata$geneIDs[1:3] + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + p <- plot_features(m, + features = genes, + embedding = emb, + projection = "diffexp", + contrast = contrast) + expect_s3_class(p, "ggplot") +}) + +test_that("plot_features diffexp with include_O=TRUE returns ggplot", { + fx <- make_diff_proj_fixture() + m <- fx$model + contrast <- c(condition = 1) + genes <- m$metadata$geneIDs[1:2] + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + p <- plot_features(m, + features = genes, + embedding = emb, + projection = "diffexp", + contrast = contrast, + include_O = TRUE) + expect_s3_class(p, "ggplot") +}) + +test_that("plot_features diffexp works for a single gene (F=1 edge case)", { + fx <- make_diff_proj_fixture() + m <- fx$model + contrast <- c(condition = 1) + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + p <- plot_features(m, + features = m$metadata$geneIDs[1], + embedding = emb, + projection = "diffexp", + contrast = contrast) + expect_s3_class(p, "ggplot") +}) + +# ---- plot_features: projection = "diffadb" ---------------------------------- + +test_that("plot_features returns ggplot for projection='diffadb'", { + fx <- make_diff_proj_fixture() + m <- fx$model + contrast <- c(condition = 1) + pathways <- paste0("PW", 1:3) + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + p <- plot_features(m, + features = pathways, + embedding = emb, + projection = "diffadb", + contrast = contrast) + expect_s3_class(p, "ggplot") +}) + +test_that("plot_features diffadb works for a single pathway (F=1 edge case)", { + fx <- make_diff_proj_fixture() + m <- fx$model + contrast <- c(condition = 1) + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + p <- plot_features(m, + features = "PW1", + embedding = emb, + projection = "diffadb", + contrast = contrast) + expect_s3_class(p, "ggplot") +}) + +# ---- Error: contrast = NULL ------------------------------------------------ + +test_that("plot_features errors when contrast=NULL for projection='diffexp'", { + fx <- make_diff_proj_fixture() + m <- fx$model + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + expect_error( + plot_features(m, features = m$metadata$geneIDs[1:2], + embedding = emb, + projection = "diffexp", + contrast = NULL), + regexp = "requires a `contrast`" + ) +}) + +test_that("plot_features errors when contrast=NULL for projection='diffadb'", { + fx <- make_diff_proj_fixture() + m <- fx$model + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + expect_error( + plot_features(m, features = "PW1", + embedding = emb, + projection = "diffadb", + contrast = NULL), + regexp = "requires a `contrast`" + ) +}) + +# ---- Error: wrong-length contrast ------------------------------------------ + +test_that("plot_features errors on wrong-length contrast for diffexp", { + fx <- make_diff_proj_fixture() + m <- fx$model + set.seed(1) + emb <- matrix(rnorm(fx$n_cells * 2), ncol = 2) + + # correct length is 1 (ncol(H.rotation)); supply length 3 + expect_error( + plot_features(m, features = m$metadata$geneIDs[1:2], + embedding = emb, + projection = "diffexp", + contrast = c(1, 0, 0)), + regexp = "ncol\\(model\\$priors\\$H\\.rotation\\)" + ) +}) + +test_that("model$diffADB errors on wrong-length contrast", { + fx <- make_diff_proj_fixture() + m <- fx$model + + expect_error(m$diffADB(c(1, 0)), + regexp = "number of original H covariates") +}) + +# ---- Error: diffadb on model with no C prior (P=0) ------------------------- + +test_that("plot_features errors for diffadb when model has P=0", { + # Build a model WITHOUT a C matrix (P=0) + set.seed(99) + n_genes <- 20 + n_cells <- 30 + M <- Matrix::Matrix( + matrix(stats::rpois(n_genes * n_cells, lambda = 3), n_genes, n_cells), + sparse = TRUE + ) + rownames(M) <- paste0("G_", seq_len(n_genes)) + colnames(M) <- paste0("C_", seq_len(n_cells)) + samples <- factor(rep(c("S1", "S2"), each = n_cells / 2)) + H <- matrix(c(0, 1), nrow = 1, + dimnames = list("cond", c("S1", "S2"))) + + m_noC <- CreateGEDIObject(Samples = samples, M = M, K = 2, H = H, verbose = 0) + m_noC$train(iterations = 5) + + set.seed(1) + emb <- matrix(rnorm(n_cells * 2), ncol = 2) + + expect_error( + plot_features(m_noC, features = 1L, + embedding = emb, + projection = "diffadb", + contrast = c(1)), + regexp = "P=0" + ) +}) + +test_that("model$diffADB errors when model has P=0", { + set.seed(99) + n_genes <- 20 + n_cells <- 30 + M <- Matrix::Matrix( + matrix(stats::rpois(n_genes * n_cells, lambda = 3), n_genes, n_cells), + sparse = TRUE + ) + rownames(M) <- paste0("G_", seq_len(n_genes)) + colnames(M) <- paste0("C_", seq_len(n_cells)) + samples <- factor(rep(c("S1", "S2"), each = n_cells / 2)) + H <- matrix(c(0, 1), nrow = 1, + dimnames = list("cond", c("S1", "S2"))) + + m_noC <- CreateGEDIObject(Samples = samples, M = M, K = 2, H = H, verbose = 0) + m_noC$train(iterations = 5) + + expect_error(m_noC$diffADB(c(1)), + regexp = "gene-level prior") +})