Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions R/FineMappingEntry.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ setClass("FineMappingEntry",
#' the pipeline's \code{trim} parameter).
#' @param topLoci Per-variant \code{data.frame} in canonical schema:
#' identity columns (\code{variant_id, chrom, pos, A1, A2}), context
#' (\code{N, MAF}), marginal columns (\code{marginal_beta,
#' (\code{N, af}; effect-allele frequency, never MAF), marginal columns (\code{marginal_beta,
#' marginal_se, marginal_z, marginal_p}), posterior columns
#' (\code{pip, posterior_mean, posterior_sd, cs_*, cs_*_purity}),
#' pipeline stamps (\code{method, gene, event, grange_start,
Expand Down Expand Up @@ -268,16 +268,17 @@ setMethod("show", "FineMappingEntry", function(object) {
}

# Project the canonical wide topLoci to the posterior view: identity +
# N/MAF + (beta=posterior_mean, se=posterior_sd) + pip + cs_* + signal_cluster
# N/af + (beta=posterior_mean, se=posterior_sd) + pip + cs_* + signal_cluster
# + pipeline stamps. Renames `posterior_mean`/`posterior_sd` to `beta`/`se`.
# Missing optional columns are NA-filled.
# Exports effect-allele frequency as `af` (never MAF). Missing optional
# columns are NA-filled.
# @noRd
.projectPosteriorView <- function(tl) {
if (nrow(tl) == 0L) {
return(data.frame(
variant_id = character(0), chrom = character(0), pos = integer(0),
A1 = character(0), A2 = character(0),
N = numeric(0), MAF = numeric(0),
N = numeric(0), af = numeric(0),
beta = numeric(0), se = numeric(0), pip = numeric(0),
stringsAsFactors = FALSE))
}
Expand All @@ -288,7 +289,7 @@ setMethod("show", "FineMappingEntry", function(object) {
A1 = .tlCol(tl, "A1", "character"),
A2 = .tlCol(tl, "A2", "character"),
N = .tlCol(tl, "N", "numeric"),
MAF = .tlCol(tl, "MAF", "numeric"),
af = .tlCol(tl, "af", "numeric"),
beta = .tlCol(tl, "posterior_mean", "numeric"),
se = .tlCol(tl, "posterior_sd", "numeric"),
pip = .tlCol(tl, "pip", "numeric"),
Expand All @@ -304,16 +305,17 @@ setMethod("show", "FineMappingEntry", function(object) {
out
}

# Project to the marginal view: identity + N/MAF + (beta, se, z, p) where
# Project to the marginal view: identity + N/af + (beta, se, z, p) where
# beta/se/z/p are the marginal univariate columns renamed from their
# `marginal_*` storage names. Missing optional columns are NA-filled.
# `marginal_*` storage names. Exports effect-allele frequency as `af`
# (never MAF). Missing optional columns are NA-filled.
# @noRd
.projectMarginalView <- function(tl) {
if (nrow(tl) == 0L) {
return(data.frame(
variant_id = character(0), chrom = character(0), pos = integer(0),
A1 = character(0), A2 = character(0),
N = numeric(0), MAF = numeric(0),
N = numeric(0), af = numeric(0),
beta = numeric(0), se = numeric(0), z = numeric(0), p = numeric(0),
stringsAsFactors = FALSE))
}
Expand All @@ -324,7 +326,7 @@ setMethod("show", "FineMappingEntry", function(object) {
A1 = .tlCol(tl, "A1", "character"),
A2 = .tlCol(tl, "A2", "character"),
N = .tlCol(tl, "N", "numeric"),
MAF = .tlCol(tl, "MAF", "numeric"),
af = .tlCol(tl, "af", "numeric"),
beta = .tlCol(tl, "marginal_beta", "numeric"),
se = .tlCol(tl, "marginal_se", "numeric"),
z = .tlCol(tl, "marginal_z", "numeric"),
Expand Down
31 changes: 30 additions & 1 deletion R/fineMappingPipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@
#' \code{0.025}.
#' @param minAbsCorr Minimum absolute correlation for credible-set
#' purity. Default \code{0.8}.
#' @param medianAbsCorr Optional median absolute correlation for
#' credible-set purity, routed to \code{susieR::susie_get_cs}. A set is
#' kept if it passes either \code{minAbsCorr} or \code{medianAbsCorr}
#' (OR-logic). Default \code{NULL} (off).
#' @param fineMappingResult Optional existing \code{FineMappingResult}
#' to use as a resume cache; tuples already present are not refit.
#' @param jointSpecification Optional joint-fit specification (NULL by
Expand Down Expand Up @@ -584,7 +588,8 @@ setGeneric("fineMappingPipeline",
.fmPostprocessOne <- function(fit, method, dataX, dataY,
coverage, secondaryCoverage, signalCutoff,
minAbsCorr, csInput = NULL, af = NULL,
region = NULL, trim = NULL) {
region = NULL, trim = NULL,
medianAbsCorr = NULL) {
# Inherit `trim` from the calling method's frame if not passed in
# explicitly. The 10 internal call sites don't currently forward it
# (they predate the trim knob) so we look it up from the caller. This
Expand All @@ -594,12 +599,19 @@ setGeneric("fineMappingPipeline",
trim <- tryCatch(get("trim", envir = parent.frame()),
error = function(e) TRUE)
}
# `medianAbsCorr` is inherited the same way (each public setMethod gains a
# `medianAbsCorr = NULL` parameter); NULL is a no-op (OR-logic purity off).
if (is.null(medianAbsCorr)) {
medianAbsCorr <- tryCatch(get("medianAbsCorr", envir = parent.frame()),
error = function(e) NULL)
}
fits <- setNames(list(fit), method)
post <- postprocessFinemappingFits(
fits = fits, dataX = dataX, dataY = dataY,
af = af, coverage = coverage,
secondaryCoverage = secondaryCoverage,
signalCutoff = signalCutoff, minAbsCorr = minAbsCorr,
medianAbsCorr = medianAbsCorr,
region = region,
csInput = csInput, trim = isTRUE(trim))
out <- formatFinemappingOutput(post, primaryMethod = method)
Expand Down Expand Up @@ -738,6 +750,7 @@ setMethod("fineMappingPipeline", "QtlDataset",
secondaryCoverage = c(0.7, 0.5),
signalCutoff = 0.025,
minAbsCorr = 0.8,
medianAbsCorr = NULL,
fineMappingResult = NULL,
naAction = c("drop", "impute"),
verbose = 1,
Expand Down Expand Up @@ -1181,6 +1194,7 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset",
secondaryCoverage = c(0.7, 0.5),
signalCutoff = 0.025,
minAbsCorr = 0.8,
medianAbsCorr = NULL,
fineMappingResult = NULL,
naAction = c("drop", "impute"),
verbose = 1,
Expand Down Expand Up @@ -1313,6 +1327,7 @@ setMethod("fineMappingPipeline", "QtlSumStats",
secondaryCoverage = c(0.7, 0.5),
signalCutoff = 0.025,
minAbsCorr = 0.8,
medianAbsCorr = NULL,
fineMappingResult = NULL,
verbose = 1,
trim = TRUE,
Expand Down Expand Up @@ -1406,6 +1421,11 @@ setMethod("fineMappingPipeline", "QtlSumStats",
variantIds <- zn$variantIds
z <- zn$z
n <- zn$n
# Effect-allele frequency for export as `af` (entry MAF mcol, post-QC
# harmonized/complemented); aligned to variantIds, NULL -> af NA.
.qmc <- S4Vectors::mcols(entry)
afByVar <- if ("MAF" %in% colnames(.qmc))
setNames(as.numeric(.qmc$MAF), as.character(.qmc$SNP))[variantIds] else NULL
ldMat <- .fmLdFromSketch(ldSketch, variantIds)
names(z) <- variantIds

Expand Down Expand Up @@ -1441,6 +1461,7 @@ setMethod("fineMappingPipeline", "QtlSumStats",
secondaryCoverage = secondaryCoverage,
signalCutoff = signalCutoff,
minAbsCorr = minAbsCorr,
af = afByVar,
csInput = "Xcorr")
# The method column on the FineMappingResult carries the bare
# token (susie / susieInf / susieAsh), independent of which
Expand Down Expand Up @@ -1552,6 +1573,7 @@ setMethod("fineMappingPipeline", "GwasSumStats",
secondaryCoverage = c(0.7, 0.5),
signalCutoff = 0.025,
minAbsCorr = 0.8,
medianAbsCorr = NULL,
fineMappingResult = NULL,
verbose = 1,
trim = TRUE,
Expand Down Expand Up @@ -1590,6 +1612,12 @@ setMethod("fineMappingPipeline", "GwasSumStats",
variantIds <- zn$variantIds
z <- zn$z
n <- zn$n
# Effect-allele frequency for export as the `af` column (post-QC the
# entry carries the harmonized, complemented frequency in its MAF mcol).
# Aligned to `variantIds`; NULL when absent -> af exported as NA.
.gmc <- S4Vectors::mcols(gr)
afByVar <- if ("MAF" %in% colnames(.gmc))
setNames(as.numeric(.gmc$MAF), as.character(.gmc$SNP))[variantIds] else NULL
# Derive a region_id from the entry's GRanges so multi-block
# genome-wide GWAS sweeps can carry one row per block without
# tripping (study, method, region_id) uniqueness. Format:
Expand Down Expand Up @@ -1651,6 +1679,7 @@ setMethod("fineMappingPipeline", "GwasSumStats",
secondaryCoverage = secondaryCoverage,
signalCutoff = signalCutoff,
minAbsCorr = minAbsCorr,
af = afByVar,
csInput = "Xcorr")
pushRow(st, tk, region_id, ent)
}
Expand Down
4 changes: 2 additions & 2 deletions R/fineMappingWrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL,
A1 = parsed$A1,
A2 = parsed$A2,
N = rep(fitN, nV),
MAF = if (is.null(af)) rep(NA_real_, nV) else as.numeric(af),
af = if (is.null(af)) rep(NA_real_, nV) else as.numeric(af),
marginal_beta = marginalBeta,
marginal_se = marginalSe,
marginal_z = marginalZ,
Expand Down Expand Up @@ -742,7 +742,7 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL,
A1 = character(),
A2 = character(),
N = numeric(),
MAF = numeric(),
af = numeric(),
marginal_beta = numeric(),
marginal_se = numeric(),
marginal_z = numeric(),
Expand Down
77 changes: 45 additions & 32 deletions R/sumstatsQc.R
Original file line number Diff line number Diff line change
Expand Up @@ -2331,45 +2331,56 @@ ldMismatchQc <- function(zScore, R = NULL, X = NULL, nSample = NULL,
#' Kriging-style LD-consistency outlier QC
#'
#' Flags variants whose observed z-score is inconsistent with the value
#' predicted from its LD neighbours. For \code{z ~ N(0, R)} the leave-one-out
#' conditional distribution of \code{z_i} given the rest has mean
#' \code{-(1/Omega_ii) * Omega_{i,-i} z_{-i}} and variance \code{1/Omega_ii},
#' where \code{Omega = R^{-1}}. The standardized residual is ~\code{N(0,1)} when
#' the z-scores and LD are mutually consistent, so a large residual marks an
#' allele-flip / LD-mismatch outlier. RSS-only helper, opt-in via
#' \code{alleleFlipKriging}; never wired into \code{alleleQc()} /
#' \code{matchRefPanel()}.
#' predicted from its LD neighbours, using susieR's kriging diagnostic.
#' \code{susieR::kriging_rss()} computes the leave-one-out conditional
#' distribution of each \code{z_i} given the rest, with the LD-mismatch scale
#' \code{s} estimated by \code{susieR::estimate_s_rss()}; its standardized
#' residual (\code{z_std_diff}) is ~\code{N(0,1)} when z-scores and LD agree, so
#' a large residual marks an allele-flip / LD-mismatch outlier. RSS-only helper,
#' opt-in via \code{alleleFlipKriging}; never wired into \code{alleleQc()} /
#' \code{matchRefPanel()}. Requires a susieR that provides \code{kriging_rss()}
#' and \code{estimate_s_rss()}.
#'
#' @param zScore Numeric vector of harmonized z-scores.
#' @param R Square LD correlation matrix aligned to \code{zScore}.
#' @param n Sample size, forwarded to \code{susieR::estimate_s_rss()} and
#' \code{susieR::kriging_rss()}.
#' @param variantIds Optional variant IDs for the diagnostics table.
#' @param pThreshold Two-sided p-value cutoff for flagging an outlier
#' (default \code{5e-8}).
#' @param ridge Small diagonal added to \code{R} before inversion for numerical
#' stability (default \code{1e-3}).
#' @return A list with \code{outlier} (logical vector) and \code{diagnostics}
#' (data frame of per-variant predicted z, residual, statistic, p-value, and
#' outlier flag).
#' @importFrom stats pnorm
#' @export
krigingOutlierQc <- function(zScore, R, variantIds = NULL,
pThreshold = 5e-8, ridge = 1e-3) {
krigingOutlierQc <- function(zScore, R, n, variantIds = NULL,
pThreshold = 5e-8) {
zScore <- as.numeric(zScore)
m <- length(zScore)
if (is.null(R) || !is.matrix(R) || nrow(R) != m || ncol(R) != m) {
stop("krigingOutlierQc requires a square LD matrix aligned to zScore.")
}
if (missing(n) || length(n) != 1L || is.na(n) || !is.finite(n) || n <= 0) {
stop("krigingOutlierQc requires a single positive sample size 'n'.")
}
if (!requireNamespace("susieR", quietly = TRUE) ||
!all(c("estimate_s_rss", "kriging_rss") %in% getNamespaceExports("susieR"))) {
stop("krigingOutlierQc requires a susieR that provides estimate_s_rss() and ",
"kriging_rss(); the installed susieR does not. Install a susieR with the ",
"kriging RSS diagnostic, or disable alleleFlipKriging.")
}
if (is.null(variantIds)) variantIds <- rownames(R)
# Regularize so the precision matrix is well-defined for collinear panels.
Omega <- solve(R + diag(ridge, m))
d <- diag(Omega)
omegaZ <- as.numeric(Omega %*% zScore)
condMean <- -(omegaZ - d * zScore) / d
condVar <- 1 / d
residual <- zScore - condMean
statistic <- residual / sqrt(condVar)
pValue <- 2 * pnorm(-abs(statistic))
outlier <- !is.na(pValue) & pValue < pThreshold
# susieR's kriging RSS diagnostic: estimate the LD-mismatch scale, then take
# the per-variant conditional distribution. `z_std_diff` is the standardized
# residual we threshold (same p-value rule as before).
s <- tryCatch(susieR::estimate_s_rss(z = zScore, R = R, n = n),
error = function(e) 0)
cd <- susieR::kriging_rss(z = zScore, R = R, n = n, s = s)$conditional_dist
condMean <- as.numeric(cd$condmean)
statistic <- as.numeric(cd$z_std_diff)
residual <- zScore - condMean
pValue <- 2 * stats::pnorm(-abs(statistic))
outlier <- !is.na(pValue) & pValue < pThreshold
list(
outlier = outlier,
diagnostics = data.frame(
Expand Down Expand Up @@ -2989,20 +3000,12 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
entryAudit$skipRegionDropped <- before - nrow(df)
}

# 4. Optional PIP screen.
if (opts$pipCutoffToSkip != 0) {
pip <- .applyPipScreen(df, n = opts$nForPip, cutoff = opts$pipCutoffToSkip)
df <- pip$df
entryAudit$pipScreenSkipped <- isTRUE(pip$skipped)
if (isTRUE(pip$skipped)) entryAudit$pipScreenReason <- pip$reason
}

if (nrow(df) < 2L) {
entryAudit$earlyExit <- "fewer than two variants after pre-harmonization QC"
return(list(gr = .dfToEntryGranges(df), audit = entryAudit))
}

# 5. Panel-vs-sumstats allele harmonization.
# 4. Panel-vs-sumstats allele harmonization.
nHarmIn <- nrow(df)
df <- .matchAgainstSketch(
df, ldSketch,
Expand All @@ -3027,6 +3030,14 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
" variant(s).")
}

# 5. Optional PIP screen (after harmonization, on panel-aligned variants).
if (opts$pipCutoffToSkip != 0) {
pip <- .applyPipScreen(df, n = opts$nForPip, cutoff = opts$pipCutoffToSkip)
df <- pip$df
entryAudit$pipScreenSkipped <- isTRUE(pip$skipped)
if (isTRUE(pip$skipped)) entryAudit$pipScreenReason <- pip$reason
}

# 6. Optional kriging prefilter.
if (isTRUE(opts$alleleFlipKriging) && nrow(df) >= 2L) {
nKrIn <- nrow(df)
Expand All @@ -3035,7 +3046,9 @@ krigingOutlierQc <- function(zScore, R, variantIds = NULL,
dosage <- t(SummarizedExperiment::assay(block, "dosage"))
colnames(dosage) <- df$SNP
R <- computeLd(dosage, method = "sample")
kr <- krigingOutlierQc(df$Z, R, variantIds = df$SNP)
nKrig <- if (!is.null(opts$nForPip) && is.finite(opts$nForPip)) opts$nForPip
else stats::median(as.numeric(df$N), na.rm = TRUE)
kr <- krigingOutlierQc(df$Z, R, n = nKrig, variantIds = df$SNP)
nKr <- sum(kr$outlier)
if (nKr > 0L) df <- df[!kr$outlier, , drop = FALSE]
entryAudit$krigingOutliersDropped <- nKr
Expand Down
Loading
Loading