From b0bd7815c0ce33c44033963bfd564e6864b43110 Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Tue, 23 Jun 2026 15:20:27 -0700 Subject: [PATCH 1/2] add trans support --- R/GenotypeHandle.R | 191 +++++++- R/QtlDataset.R | 39 +- R/fineMappingPipeline.R | 409 ++++++++++++------ R/genotypeIo.R | 64 +++ R/jointSpecification.R | 191 ++++++-- R/twasWeightsPipeline.R | 311 +++++++++---- man/GenotypeHandle-class.Rd | 15 +- man/GenotypeHandle.Rd | 15 + man/fineMappingPipeline.Rd | 11 +- man/twasWeightsPipeline.Rd | 11 +- tests/testthat/test_QtlDataset.R | 94 +++- .../test_data/test_variants_chr22.bed | 22 + .../test_data/test_variants_chr22.bim | 349 +++++++++++++++ .../test_data/test_variants_chr22.fam | 100 +++++ .../test_data/test_variants_chr22.gds | Bin 0 -> 14009 bytes .../test_data/test_variants_chr22.pgen | Bin 0 -> 6364 bytes .../test_data/test_variants_chr22.psam | 101 +++++ .../test_data/test_variants_chr22.pvar | 350 +++++++++++++++ .../test_data/test_variants_chr22.vcf.gz | Bin 0 -> 12248 bytes .../test_data/test_variants_chr22.vcf.gz.tbi | Bin 0 -> 207 bytes tests/testthat/test_fineMappingPipeline.R | 181 ++++++++ tests/testthat/test_genotypeHandle.R | 66 +++ tests/testthat/test_genotypeIo.R | 104 +++++ tests/testthat/test_twasWeightsPipeline.R | 113 +++++ 24 files changed, 2455 insertions(+), 282 deletions(-) create mode 100644 tests/testthat/test_data/test_variants_chr22.bed create mode 100644 tests/testthat/test_data/test_variants_chr22.bim create mode 100644 tests/testthat/test_data/test_variants_chr22.fam create mode 100644 tests/testthat/test_data/test_variants_chr22.gds create mode 100644 tests/testthat/test_data/test_variants_chr22.pgen create mode 100644 tests/testthat/test_data/test_variants_chr22.psam create mode 100644 tests/testthat/test_data/test_variants_chr22.pvar create mode 100644 tests/testthat/test_data/test_variants_chr22.vcf.gz create mode 100644 tests/testthat/test_data/test_variants_chr22.vcf.gz.tbi diff --git a/R/GenotypeHandle.R b/R/GenotypeHandle.R index a184ffbf..9aeb80cf 100644 --- a/R/GenotypeHandle.R +++ b/R/GenotypeHandle.R @@ -13,13 +13,23 @@ NULL #' @description S4 container holding a path + format + metadata for lazy #' genotype access. Supports PLINK1 (.bed/.bim/.fam), PLINK2 #' (.pgen/.pvar/.psam), VCF/BCF, and GDS. -#' @slot path Character, file path. +#' @slot path Character, file path. For a one-file-per-chromosome handle +#' this is the chrom-meta file path (a display/provenance value); the +#' per-chromosome payload files live in \code{chromPaths}. #' @slot format Character, one of \code{"plink1"}, \code{"plink2"}, #' \code{"vcf"}, \code{"gds"}. -#' @slot snpInfo data.frame, SNP metadata read from the index/sidecar. +#' @slot snpInfo data.frame, SNP metadata read from the index/sidecar. For a +#' sharded handle this is the union across chromosomes (row-bound in the +#' order the shards were supplied), so \code{snpIdx} stays a single global +#' index space. #' @slot nSamples Integer, number of samples. #' @slot sampleIds Character vector of sample identifiers. #' @slot pgenPtr Opaque pointer for PLINK2 reader state (NULL otherwise). +#' @slot chromPaths Named character vector mapping canonical chromosome +#' (e.g. \code{"21"}, \code{"X"}) to the per-chromosome payload path/prefix. +#' Empty (\code{character(0)}) for a single-file handle; non-empty marks a +#' one-file-per-chromosome (sharded) handle whose extraction is routed by +#' chromosome. #' @export setClass("GenotypeHandle", representation( @@ -28,7 +38,11 @@ setClass("GenotypeHandle", snpInfo = "data.frame", nSamples = "integer", sampleIds = "character", - pgenPtr = "ANY" + pgenPtr = "ANY", + chromPaths = "character" + ), + prototype = prototype( + chromPaths = character(0) ), validity = function(object) { errors <- character() @@ -38,6 +52,12 @@ setClass("GenotypeHandle", if (!object@format %in% valid_formats) errors <- c(errors, paste("'format' must be one of:", paste(valid_formats, collapse = ", "))) + if (length(object@chromPaths) > 0L) { + nm <- names(object@chromPaths) + if (is.null(nm) || any(!nzchar(nm)) || anyDuplicated(nm)) + errors <- c(errors, paste("'chromPaths' must be a uniquely-named", + "character vector (names = chromosomes)")) + } if (length(errors) == 0) TRUE else errors } ) @@ -45,7 +65,14 @@ setClass("GenotypeHandle", #' @export setMethod("show", "GenotypeHandle", function(object) { cat(sprintf("GenotypeHandle [%s]\n", object@format)) - cat(sprintf(" Path: %s\n", object@path)) + chromPaths <- .genotypeChromPaths(object) + if (length(chromPaths) > 0L) { + cat(sprintf(" Chrom-meta: %s\n", object@path)) + cat(sprintf(" %d per-chromosome files: %s\n", + length(chromPaths), paste(names(chromPaths), collapse = ", "))) + } else { + cat(sprintf(" Path: %s\n", object@path)) + } cat(sprintf(" %d samples, %d SNPs\n", object@nSamples, nrow(object@snpInfo))) }) @@ -67,6 +94,15 @@ setMethod("show", "GenotypeHandle", function(object) { #' \code{plink1Prefix} or arrange symlinks at a common stem.} #' \item{\code{pgen} + \code{pvar} + \code{psam}}{Explicit PLINK2 #' triplet. Same shared-stem requirement.} +#' \item{\code{genoMeta}}{One genotype file per chromosome. Either a path +#' to a whitespace/TSV meta file whose first column is the chromosome +#' (\code{#chr}) and second column the per-chromosome payload +#' (a \code{.bed}/\code{.pgen}/\code{.vcf[.gz]}/\code{.bcf}/\code{.gds} +#' file or a PLINK prefix; relative paths resolve against the meta +#' file's directory), or a named character vector mapping chromosome to +#' payload. All shards must share one format and identical sample IDs in +#' the same order. Extraction is routed to the correct file by the +#' requested region's chromosome.} #' } #' The constructor opens the file for metadata only (sample IDs and SNP #' info); dosage extraction is deferred until \code{extractBlockGenotypes()} @@ -90,6 +126,10 @@ setMethod("show", "GenotypeHandle", function(object) { #' @param region Region specification for \code{ldMeta} lookup: #' \code{"chr:start-end"} string or a one-row data.frame with #' \code{chrom}, \code{start}, \code{end}. +#' @param genoMeta One-file-per-chromosome specification: a path to a +#' \code{#chr,path} meta file or a named character vector +#' (names = chromosomes, values = payload paths/prefixes). Optionally pass +#' \code{format} via \code{...} to force a single backend for every shard. #' @param ... Additional arguments forwarded to the format-specific reader. #' @return A \code{GenotypeHandle} object. #' @export @@ -98,6 +138,7 @@ GenotypeHandle <- function(path = NULL, bed = NULL, bim = NULL, fam = NULL, pgen = NULL, pvar = NULL, psam = NULL, ldMeta = NULL, region = NULL, + genoMeta = NULL, ...) { bedTrioGiven <- !is.null(bed) || !is.null(bim) || !is.null(fam) bedTrioComplete <- !is.null(bed) && !is.null(bim) && !is.null(fam) @@ -123,13 +164,14 @@ GenotypeHandle <- function(path = NULL, plink2Prefix = !is.null(plink2Prefix), plink1Triplet = bedTrioComplete, plink2Triplet = pgenTrioComplete, - ldMeta = !is.null(ldMeta) + ldMeta = !is.null(ldMeta), + genoMeta = !is.null(genoMeta) ) nSources <- sum(sources) if (nSources != 1L) { stop("Exactly one of `path`, `plink1Prefix`, `plink2Prefix`, the ", - "bed/bim/fam triplet, the pgen/pvar/psam triplet, or `ldMeta` ", - "must be specified (got ", nSources, ").") + "bed/bim/fam triplet, the pgen/pvar/psam triplet, `ldMeta`, or ", + "`genoMeta` must be specified (got ", nSources, ").") } if (sources[["path"]]) { @@ -150,6 +192,9 @@ GenotypeHandle <- function(path = NULL, if (sources[["ldMeta"]]) { return(.genotypeHandleFromLdMeta(ldMeta, region, ...)) } + if (sources[["genoMeta"]]) { + return(.genotypeHandleFromChromMeta(genoMeta, ...)) + } } .genotypeHandleFromLdMeta <- function(ldMeta, region, ...) { @@ -236,6 +281,138 @@ GenotypeHandle <- function(path = NULL, .makePlink2Handle(unname(stems[1L]), ...) } +# --------------------------------------------------------------------------- +# One-file-per-chromosome (sharded) handle support +# --------------------------------------------------------------------------- + +# Canonicalize a chromosome label to a bare token (strip a leading "chr"). +# Used as the routing key in @chromPaths and when matching @snpInfo$CHR. +#' @keywords internal +.canonChr <- function(x) sub("^chr", "", as.character(x), ignore.case = TRUE) + +# Per-chromosome shard map, tolerant of GenotypeHandle objects deserialized +# from before the `chromPaths` slot existed (e.g. an RDS saved by an older +# pecotmr). Such objects have no `chromPaths` slot, so a direct `@` access +# errors; treat them as single-file handles. +#' @keywords internal +.genotypeChromPaths <- function(handle) { + tryCatch(handle@chromPaths, error = function(e) character(0)) +} + +# Parse the genoMeta input into a data.frame(chrom, path). Accepts either a +# path to a #chr/path meta file (whitespace- or tab-delimited, with header) +# or a named character vector (names = chromosomes). Relative payload paths +# in a meta file are resolved against the meta file's own directory. +#' @keywords internal +.parseChromMeta <- function(genoMeta) { + isMetaFile <- is.character(genoMeta) && length(genoMeta) == 1L && + is.null(names(genoMeta)) && file.exists(genoMeta) + if (isMetaFile) { + meta <- utils::read.table(genoMeta, header = TRUE, sep = "", + comment.char = "", stringsAsFactors = FALSE, + check.names = FALSE) + if (ncol(meta) < 2L) + stop("GenotypeHandle(genoMeta): meta file '", genoMeta, + "' must have at least 2 columns (chromosome, path).") + chrom <- as.character(meta[[1L]]) + pth <- as.character(meta[[2L]]) + base <- dirname(normalizePath(genoMeta)) + pth <- vapply(pth, function(p) { + if (grepl("^(/|[A-Za-z]:)", p) || file.exists(p)) p else file.path(base, p) + }, character(1), USE.NAMES = FALSE) + return(data.frame(chrom = chrom, path = pth, stringsAsFactors = FALSE)) + } + if (is.character(genoMeta) && length(genoMeta) >= 1L && + !is.null(names(genoMeta))) { + return(data.frame(chrom = names(genoMeta), path = unname(genoMeta), + stringsAsFactors = FALSE)) + } + stop("GenotypeHandle(genoMeta): expected a path to a `#chr,path` meta file ", + "or a named character vector (names = chromosomes, values = paths).") +} + +# Build a single-file GenotypeHandle for one shard payload, dispatching to the +# right reader. `format` (optional) forces a backend; otherwise it is detected +# from the file extension, falling back to PLINK prefix probing. +#' @keywords internal +.resolveGenotypeShard <- function(p, format = NULL) { + lower <- tolower(p) + if (!is.null(format)) { + if (format == "plink1") return(.makePlink1Handle(p)) + if (format == "plink2") return(.makePlink2Handle(p)) + return(readGenotypes(p, format = format)) + } + if (grepl("\\.vcf(\\.b?gz)?$", lower) || endsWith(lower, ".bcf")) + return(readGenotypes(p, format = "vcf")) + if (endsWith(lower, ".gds")) return(readGenotypes(p, format = "gds")) + if (endsWith(lower, ".bed")) + return(.makePlink1Handle(sub("\\.bed$", "", p, ignore.case = TRUE))) + if (endsWith(lower, ".pgen")) + return(.makePlink2Handle(sub("\\.pgen$", "", p, ignore.case = TRUE))) + # No recognized extension: treat as a PLINK prefix, probe for the sidecar. + if (file.exists(paste0(p, ".bed"))) return(.makePlink1Handle(p)) + if (file.exists(paste0(p, ".pgen"))) return(.makePlink2Handle(p)) + stop("GenotypeHandle(genoMeta): cannot determine genotype format for '", p, + "'. Use a recognized extension (.bed/.pgen/.vcf[.gz]/.bcf/.gds), a ", + "PLINK prefix, or pass `format=`.") +} + +# Assemble a sharded handle from a per-chromosome meta. Reads each shard's +# metadata via the existing single-file readers, validates a single shared +# format and identical sample IDs (same order, required for cross-shard +# cbind), and row-binds the per-shard snpInfo into one global index space. +#' @keywords internal +.genotypeHandleFromChromMeta <- function(genoMeta, ...) { + dots <- list(...) + format <- dots$format + parsed <- .parseChromMeta(genoMeta) + if (nrow(parsed) == 0L) + stop("GenotypeHandle(genoMeta): no chromosomes found in the meta input.") + + shards <- lapply(parsed$path, .resolveGenotypeShard, format = format) + + formats <- vapply(shards, function(h) h@format, character(1)) + if (length(unique(formats)) != 1L) + stop("GenotypeHandle(genoMeta): all per-chromosome files must share one ", + "format; got: ", paste(unique(formats), collapse = ", "), ".") + + sample0 <- shards[[1L]]@sampleIds + for (i in seq_along(shards)[-1L]) { + if (!identical(shards[[i]]@sampleIds, sample0)) + stop("GenotypeHandle(genoMeta): all per-chromosome files must have ", + "identical sample IDs in the same order (mismatch at '", + parsed$path[[i]], "').") + } + + unifiedSnpInfo <- do.call(rbind, lapply(shards, function(h) h@snpInfo)) + rownames(unifiedSnpInfo) <- NULL + + chromPaths <- character(0) + for (i in seq_along(shards)) { + chs <- unique(.canonChr(shards[[i]]@snpInfo$CHR)) + for (ch in chs) { + if (ch %in% names(chromPaths)) + stop("GenotypeHandle(genoMeta): chromosome '", ch, "' appears in ", + "more than one per-chromosome file.") + chromPaths[[ch]] <- shards[[i]]@path + } + } + + metaPath <- if (is.character(genoMeta) && length(genoMeta) == 1L && + is.null(names(genoMeta)) && file.exists(genoMeta)) + normalizePath(genoMeta) else "" + + new("GenotypeHandle", + path = metaPath, + format = formats[[1L]], + snpInfo = unifiedSnpInfo, + nSamples = shards[[1L]]@nSamples, + sampleIds = sample0, + pgenPtr = NULL, + chromPaths = chromPaths + ) +} + #' @rdname getSnpInfo #' @export setMethod("getSnpInfo", "GenotypeHandle", function(x) x@snpInfo) diff --git a/R/QtlDataset.R b/R/QtlDataset.R index 048c0911..6fe18cf4 100644 --- a/R/QtlDataset.R +++ b/R/QtlDataset.R @@ -177,11 +177,13 @@ setMethod("getGenotypeCovariates", "QtlDataset", setMethod("getScaleResiduals", "QtlDataset", function(x) x@scaleResiduals) # --- Internal: resolve the variant-selection region for the genotype handle. -# Returns a single GRanges. When `traitId` is supplied, expand each trait's -# rowRange by `cisWindow` bp and take the union span (per the multi-trait rule: -# `[min(start) - cisWindow, max(end) + cisWindow]`). When `region` is supplied, -# extend by `cisWindow` if given. Exactly one of (traitId, region) may be -# supplied; if neither is, return NULL meaning "all variants in handle". +# Returns a GRanges (one or more ranges). When `traitId` is supplied, expand +# each trait's rowRange by `cisWindow` bp and take the union span (per the +# multi-trait rule: `[min(start) - cisWindow, max(end) + cisWindow]`). When +# `region` is supplied it is taken literally and may contain multiple ranges +# (e.g. for joint multi-region extraction), optionally extended per-range by +# `cisWindow`. Exactly one of (traitId, region) may be supplied; if neither is, +# return NULL meaning "all variants in handle". .qtlResolveVariantRegion <- function(x, traitId = NULL, region = NULL, cisWindow = NULL) { if (!is.null(traitId) && !is.null(region)) { @@ -224,21 +226,22 @@ setMethod("getScaleResiduals", "QtlDataset", function(x) x@scaleResiduals) ranges = IRanges::IRanges(start = spanStart, end = spanEnd) )) } - # region path + # region path (one or more ranges, taken literally) if (!methods::is(region, "GRanges")) { stop("`region` must be a GRanges object.") } - if (length(region) != 1L) { - stop("`region` must be a single range.") + if (length(region) == 0L) { + stop("`region` must contain at least one range.") } if (!is.null(cisWindow)) { if (length(cisWindow) != 1L || cisWindow < 0) { stop("`cisWindow` must be a single non-negative value.") } + # Extend every range by cisWindow (vectorised over a multi-range region). region <- GenomicRanges::GRanges( seqnames = GenomicRanges::seqnames(region), ranges = IRanges::IRanges( - start = max(1L, GenomicRanges::start(region) - cisWindow), + start = pmax(1L, GenomicRanges::start(region) - cisWindow), end = GenomicRanges::end(region) + cisWindow ) ) @@ -246,20 +249,26 @@ setMethod("getScaleResiduals", "QtlDataset", function(x) x@scaleResiduals) region } -# Internal: map a GRanges region into 1-based snpIdx into handle@snpInfo. +# Internal: map a GRanges region (one or more ranges) into 1-based snpIdx into +# handle@snpInfo. Indices are unioned across ranges in range order (first +# occurrence wins), so overlapping ranges contribute each variant once. .qtlVariantIndices <- function(x, region = NULL) { handle <- x@genotypes if (is.null(region)) { return(seq_len(nrow(handle@snpInfo))) } - chr <- as.character(GenomicRanges::seqnames(region))[[1L]] - chrCanon <- sub("^chr", "", chr, ignore.case = TRUE) snpInfo <- handle@snpInfo siChr <- sub("^chr", "", as.character(snpInfo$CHR), ignore.case = TRUE) bp <- as.integer(snpInfo$BP) - start <- GenomicRanges::start(region) - end <- GenomicRanges::end(region) - which(siChr == chrCanon & bp >= start & bp <= end) + rChr <- sub("^chr", "", as.character(GenomicRanges::seqnames(region)), + ignore.case = TRUE) + rStart <- GenomicRanges::start(region) + rEnd <- GenomicRanges::end(region) + idx <- integer(0) + for (i in seq_along(region)) { + idx <- c(idx, which(siChr == rChr[i] & bp >= rStart[i] & bp <= rEnd[i])) + } + unique(idx) } # Internal: extract the panel dosage block (samples x variants) for the diff --git a/R/fineMappingPipeline.R b/R/fineMappingPipeline.R index 928b4a06..06ba7ce3 100644 --- a/R/fineMappingPipeline.R +++ b/R/fineMappingPipeline.R @@ -131,7 +131,14 @@ #' selection. Mutually exclusive with \code{traitId}. #' @param cisWindow For QtlDataset: cis-window (bp) around each trait's #' genomic position when extracting variants. Required when -#' \code{traitId} is supplied. +#' \code{traitId} is supplied. Mutually exclusive with \code{region}. +#' @param jointRegions For QtlDataset with a multi-range \code{region}: +#' \code{FALSE} (default) fits each range independently and merges the +#' per-range results into one entry per (study, context, trait, method) — +#' the merged \code{susieFit} is a named list of per-region fits and +#' credible-set labels are renumbered to stay unique. \code{TRUE} +#' concatenates the ranges' genotypes into one joint fit. Ignored for a +#' single-range / cis (\code{traitId} + \code{cisWindow}) request. #' @param addSusieInf Logical. When \code{susieInf} is in #' \code{methods} alongside \code{susie} and/or \code{susieAsh}, #' controls whether the SuSiE-inf fit initialises the chained @@ -624,6 +631,128 @@ setGeneric("fineMappingPipeline", out$finemappingEntry } +# --- Multi-region (jointRegions) helpers ------------------------------------ + +# Resolve the per-trait X windows from a (region, jointRegions) pair. The cis +# path (region NULL) is a single trait-derived block; an explicit `region` is +# taken literally as one joint block (jointRegions=TRUE -> concatenated +# genotypes) or one block per range (jointRegions=FALSE -> independent fits +# merged downstream). Shared by the QtlDataset / MultiStudyQtlDataset +# fineMapping & twas methods. +#' @keywords internal +.makeXRegions <- function(region, jointRegions) { + if (is.null(region)) { + list(NULL) + } else if (isTRUE(jointRegions)) { + list(region) + } else { + lapply(seq_along(region), function(i) region[i]) + } +} + +# Fit every requested univariate token on one residualized (X, y) block, +# returning a named list (token -> FineMappingEntry). Extracted from the +# univariate dispatch so the same logic serves the cis path (one block), the +# jointRegions=TRUE path (one concatenated block) and the jointRegions=FALSE +# path (one block per region, merged afterwards via .fmMergeEntries). +.fmFitXBlock <- function(X, y, toRun, addSusieInf, coverage, + secondaryCoverage, signalCutoff, minAbsCorr, + methodArgs, verbose, ctx, tid) { + chainLocal <- .fmResolveSusieChain(toRun, addSusieInf) + infFit <- NULL + if (chainLocal$runInf) { + if (verbose >= 1) + message(sprintf("Fitting susieInf for (context='%s', trait='%s') ...", + ctx, tid)) + infFit <- .fmFitSusieIndiv(X, y, "susieInf", coverage = coverage, + userArgs = methodArgs[["susieInf"]]) + } + out <- list() + for (tk in toRun) { + if (tk == "susieInf") { + if (!chainLocal$keepInf) next + fit <- infFit + } else { + chainFrom <- if ((tk == "susie" && chainLocal$chainSusie) || + (tk == "susieAsh" && chainLocal$chainAsh)) + infFit else NULL + if (verbose >= 1) + message(sprintf("Fitting %s for (context='%s', trait='%s') ...", + tk, ctx, tid)) + fit <- .fmFitSusieIndiv(X, y, tk, chainFromInf = chainFrom, + coverage = coverage, userArgs = methodArgs[[tk]]) + } + out[[tk]] <- .fmPostprocessOne( + fit = fit, method = tk, dataX = X, dataY = y, coverage = coverage, + secondaryCoverage = secondaryCoverage, signalCutoff = signalCutoff, + minAbsCorr = minAbsCorr, csInput = "X") + } + out +} + +# Extract integer credible-set indices from a "_" vector. +.fmCsIdx <- function(csVec) { + suppressWarnings(as.integer(sub("^.*_([0-9]+)$", "\\1", as.character(csVec)))) +} + +# Re-number credible-set membership labels by `offset`, preserving the +# "_0" (not-in-any-CS) sentinel. +.fmRelabelCs <- function(csVec, offset) { + csVec <- as.character(csVec) + if (offset == 0L) return(csVec) + parts <- regmatches(csVec, regexec("^(.*)_([0-9]+)$", csVec)) + vapply(seq_along(csVec), function(j) { + p <- parts[[j]] + if (length(p) != 3L) return(csVec[[j]]) + idx <- as.integer(p[[3L]]) + if (idx == 0L) csVec[[j]] else paste0(p[[2L]], "_", idx + offset) + }, character(1)) +} + +# Merge per-region FineMappingEntry payloads (same study/context/trait/method, +# independent fits) into one entry: concatenate variants and topLoci rows, +# renumber credible sets so per-region indices do not collide, and keep the +# per-region SuSiE fits as a named list in `susieFit` (consumers needing a +# single fit must iterate the list). +.fmMergeEntries <- function(entries) { + entries <- entries[!vapply(entries, is.null, logical(1))] + if (length(entries) == 0L) return(NULL) + if (length(entries) == 1L) return(entries[[1L]]) + variantIds <- unlist(lapply(entries, function(e) e@variantIds), + use.names = FALSE) + tls <- lapply(entries, function(e) e@topLoci) + csCols <- grep("^cs_[0-9]+$", + unique(unlist(lapply(tls, names))), value = TRUE) + offsets <- setNames(integer(length(csCols)), csCols) + for (i in seq_along(tls)) { + tl <- tls[[i]] + for (cc in csCols) { + if (!cc %in% names(tl)) next + idx <- .fmCsIdx(tl[[cc]]) + tl[[cc]] <- .fmRelabelCs(tl[[cc]], offsets[[cc]]) + offsets[[cc]] <- offsets[[cc]] + max(c(0L, idx), na.rm = TRUE) + } + tls[[i]] <- tl + } + topLoci <- do.call(rbind, tls) + rownames(topLoci) <- NULL + susieFit <- setNames(lapply(entries, function(e) e@susieFit), + paste0("region", seq_along(entries))) + FineMappingEntry(variantIds = variantIds, susieFit = susieFit, + topLoci = topLoci) +} + +# Run a joint-method fit (mvsusie / fsusie) once per region block via the +# method-specific `fitOneRegion(rg)` closure (returns one FineMappingEntry per +# region), then merge across regions into a single shared entry. A single block +# (cis or jointRegions=TRUE) returns its entry unchanged. +.fmJointBlocks <- function(xRegions, fitOneRegion) { + ents <- lapply(seq_along(xRegions), function(i) fitOneRegion(xRegions[[i]])) + ents <- ents[!vapply(ents, is.null, logical(1))] + if (length(ents) == 0L) return(NULL) + if (length(ents) == 1L) ents[[1L]] else .fmMergeEntries(ents) +} + # Merge per-method user kwargs onto a base arg list. `userArgs` is the # per-token kwargs supplied by the caller (e.g. `list(L = 1, refine = @@ -744,6 +873,7 @@ setMethod("fineMappingPipeline", "QtlDataset", traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, addSusieInf = TRUE, coverage = 0.95, @@ -761,6 +891,14 @@ setMethod("fineMappingPipeline", "QtlDataset", residualizeGenotypeCovariates = TRUE, ...) { naAction <- match.arg(naAction) + # `cisWindow` expands a trait's own coordinates; `region` is taken + # literally. Supplying both signals a misunderstanding -> reject. + if (!is.null(region) && !is.null(cisWindow)) { + stop("fineMappingPipeline(QtlDataset): specify either `region` or ", + "`cisWindow`, not both. `cisWindow` expands each trait's own ", + "coordinates, whereas `region` is the literal variant window.") + } + xRegions <- .makeXRegions(region, jointRegions) parsedJointSpec <- parseJointSpecification(jointSpecification, data) norm <- .fmNormalizeMethods(methods) tokens <- norm$tokens @@ -778,7 +916,7 @@ setMethod("fineMappingPipeline", "QtlDataset", parsedJointSpec, data, intersect(tokens, c("mvsusie", "fsusie")), contexts, traitId, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs) + methodArgs = methodArgs, xRegions = xRegions) tokens <- setdiff(tokens, c("mvsusie", "fsusie")) methodArgs <- methodArgs[tokens] if (length(tokens) == 0L) { @@ -861,6 +999,10 @@ setMethod("fineMappingPipeline", "QtlDataset", } # ---- Univariate dispatch: per (context, trait), per method. + # X is drawn from each window in `xRegions` (cis = one trait-derived block; + # multi-region = one block per range), fitted independently, then merged + # per token via .fmMergeEntries so every (study, context, trait, method) + # produces a single entry. if (length(univTokens) > 0L) { for (ctx in useCtx) { for (tid in perCtxTraits[[ctx]]) { @@ -878,53 +1020,33 @@ setMethod("fineMappingPipeline", "QtlDataset", Y <- .fmResidPheno( data, contexts = ctx, traitId = tid, naAction = naAction) - X <- .fmResidGeno( - data, contexts = ctx, traitId = tid, - cisWindow = cisWindow, samples = rownames(Y)) - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) { - stop(sprintf( - "fineMappingPipeline: too few shared samples between residualized X and Y for (context='%s', trait='%s').", - ctx, tid)) - } - X <- X[common, , drop = FALSE] - y <- Y[common, , drop = FALSE] - if (ncol(y) > 1L) y <- y[, 1L, drop = TRUE] else y <- drop(y) - chainLocal <- .fmResolveSusieChain(toRun, addSusieInf) - infFit <- NULL - if (chainLocal$runInf) { - if (verbose >= 1) - message(sprintf("Fitting susieInf for (context='%s', trait='%s') ...", ctx, tid)) - infFit <- .fmFitSusieIndiv(X, y, "susieInf", - coverage = coverage, - userArgs = methodArgs[["susieInf"]]) - } - - for (tk in toRun) { - if (tk == "susieInf") { - if (!chainLocal$keepInf) next - fit <- infFit + blockEntries <- lapply(xRegions, function(rg) { + X <- if (is.null(rg)) { + .fmResidGeno(data, contexts = ctx, traitId = tid, + cisWindow = cisWindow, samples = rownames(Y)) } else { - chainFrom <- if ((tk == "susie" && chainLocal$chainSusie) || - (tk == "susieAsh" && chainLocal$chainAsh)) - infFit else NULL - if (verbose >= 1) - message(sprintf("Fitting %s for (context='%s', trait='%s') ...", - tk, ctx, tid)) - fit <- .fmFitSusieIndiv(X, y, tk, - chainFromInf = chainFrom, - coverage = coverage, - userArgs = methodArgs[[tk]]) + .fmResidGeno(data, contexts = ctx, region = rg, + samples = rownames(Y)) } - entry <- .fmPostprocessOne( - fit = fit, method = tk, - dataX = X, dataY = y, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") + common <- intersect(rownames(X), rownames(Y)) + if (length(common) < 2L) { + stop(sprintf( + "fineMappingPipeline: too few shared samples between residualized X and Y for (context='%s', trait='%s').", + ctx, tid)) + } + X <- X[common, , drop = FALSE] + y <- Y[common, , drop = FALSE] + if (ncol(y) > 1L) y <- y[, 1L, drop = TRUE] else y <- drop(y) + .fmFitXBlock(X, y, toRun, addSusieInf, coverage, + secondaryCoverage, signalCutoff, minAbsCorr, + methodArgs, verbose, ctx, tid) + }) + + for (tk in toRun) { + ents <- lapply(blockEntries, function(be) be[[tk]]) + if (any(vapply(ents, is.null, logical(1)))) next + entry <- if (length(ents) == 1L) ents[[1L]] else .fmMergeEntries(ents) pushRow(study, ctx, tid, tk, entry) } } @@ -965,30 +1087,14 @@ setMethod("fineMappingPipeline", "QtlDataset", for (job in mvJobs) { if (identical(job$mode, "multiContext")) { tid <- job$trait - # Build Y matrix per context for this single trait. Sample - # intersection across contexts; phenotypeCovariates differ per - # context but getResidualizedPhenotypes already residualises. + # Joint Y across contexts for this single trait. X is drawn from each + # region block (cis or explicit region) and merged across regions. contextsHere <- job$contexts - # Use the union of per-context cis-windows for variant extraction. Yres <- .fmResidPheno( data, contexts = contextsHere, traitId = tid, naAction = naAction) if (length(contextsHere) == 1L) Yres <- setNames(list(Yres), contextsHere) - commonSamples <- Reduce(intersect, lapply(Yres, rownames)) - X <- .fmResidGeno( - data, contexts = contextsHere, traitId = tid, - cisWindow = cisWindow, samples = commonSamples) - commonSamples <- intersect(commonSamples, rownames(X)) - if (length(commonSamples) < 2L) { - stop("fineMappingPipeline(QtlDataset, mvsusie multi-context): ", - "insufficient shared samples across selected contexts.") - } - X <- X[commonSamples, , drop = FALSE] - Y <- do.call(cbind, lapply(contextsHere, function(ctx) { - ym <- Yres[[ctx]][commonSamples, , drop = FALSE] - colnames(ym) <- ctx - ym - })) + baseSamples <- Reduce(intersect, lapply(Yres, rownames)) # Resume cache: every (study, ctx, tid, mvsusie) row. allCached <- TRUE @@ -1007,23 +1113,41 @@ setMethod("fineMappingPipeline", "QtlDataset", if (verbose >= 1) message(sprintf("Fitting mvsusie (multi-context) for trait='%s' ...", tid)) - mvBaseArgs <- list( - X = X, Y = Y, - prior_variance = mvsusieR::create_mixture_prior(R = ncol(Y)), - coverage = coverage) - fit <- do.call(fitMvsusie, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") - # Share the joint fit across contexts via copy-on-modify. + fitOneRegion <- function(rg) { + X <- if (is.null(rg)) { + .fmResidGeno(data, contexts = contextsHere, traitId = tid, + cisWindow = cisWindow, samples = baseSamples) + } else { + .fmResidGeno(data, contexts = contextsHere, region = rg, + samples = baseSamples) + } + cs <- intersect(baseSamples, rownames(X)) + if (length(cs) < 2L) { + stop("fineMappingPipeline(QtlDataset, mvsusie multi-context): ", + "insufficient shared samples across selected contexts.") + } + Xc <- X[cs, , drop = FALSE] + Yc <- do.call(cbind, lapply(contextsHere, function(ctx) { + ym <- Yres[[ctx]][cs, , drop = FALSE] + colnames(ym) <- ctx + ym + })) + mvBaseArgs <- list( + X = Xc, Y = Yc, + prior_variance = mvsusieR::create_mixture_prior(R = ncol(Yc)), + coverage = coverage) + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) + fit <- .setFinemappingFitClass(fit, "mvsusie") + .fmPostprocessOne( + fit = fit, method = "mvsusie", dataX = Xc, dataY = NULL, + coverage = coverage, secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, + csInput = "X") + } + entry <- .fmJointBlocks(xRegions, fitOneRegion) + # Share the joint (merged) entry across contexts via copy-on-modify. for (ctx in contextsHere) { pushRow(study, ctx, tid, "mvsusie", entry) } @@ -1048,36 +1172,40 @@ setMethod("fineMappingPipeline", "QtlDataset", Y <- .fmResidPheno( data, contexts = ctx, traitId = traits, naAction = naAction) - X <- .fmResidGeno( - data, contexts = ctx, traitId = traits, - cisWindow = cisWindow, samples = rownames(Y)) - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) { - stop(sprintf( - "fineMappingPipeline(QtlDataset, mvsusie multi-trait): too few shared samples in context '%s'.", - ctx)) - } - X <- X[common, , drop = FALSE] - Y <- Y[common, , drop = FALSE] if (verbose >= 1) message(sprintf("Fitting mvsusie (multi-trait) for context='%s' ...", ctx)) - mvBaseArgs <- list( - X = X, Y = Y, - prior_variance = mvsusieR::create_mixture_prior(R = ncol(Y)), - coverage = coverage) - fit <- do.call(fitMvsusie, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") + fitOneRegion <- function(rg) { + X <- if (is.null(rg)) { + .fmResidGeno(data, contexts = ctx, traitId = traits, + cisWindow = cisWindow, samples = rownames(Y)) + } else { + .fmResidGeno(data, contexts = ctx, region = rg, + samples = rownames(Y)) + } + common <- intersect(rownames(X), rownames(Y)) + if (length(common) < 2L) { + stop(sprintf( + "fineMappingPipeline(QtlDataset, mvsusie multi-trait): too few shared samples in context '%s'.", + ctx)) + } + Xc <- X[common, , drop = FALSE] + Yc <- Y[common, , drop = FALSE] + mvBaseArgs <- list( + X = Xc, Y = Yc, + prior_variance = mvsusieR::create_mixture_prior(R = ncol(Yc)), + coverage = coverage) + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + methodArgs[["mvsusie"]])) + fit <- .setFinemappingFitClass(fit, "mvsusie") + .fmPostprocessOne( + fit = fit, method = "mvsusie", dataX = Xc, dataY = NULL, + coverage = coverage, secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, + csInput = "X") + } + entry <- .fmJointBlocks(xRegions, fitOneRegion) for (tid in traits) { pushRow(study, ctx, tid, "mvsusie", entry) } @@ -1115,44 +1243,46 @@ setMethod("fineMappingPipeline", "QtlDataset", Y <- .fmResidPheno( data, contexts = ctx, traitId = traits, naAction = naAction) - X <- .fmResidGeno( - data, contexts = ctx, traitId = traits, - cisWindow = cisWindow, samples = rownames(Y)) - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) { - stop(sprintf("fineMappingPipeline(QtlDataset, fsusie): too few shared samples in context '%s'.", ctx)) - } - X <- X[common, , drop = FALSE] - Y <- Y[common, , drop = FALSE] - # Per-trait genomic positions for the wavelet model. Use the - # midpoint of each trait's rowRanges in this context. + # Per-trait genomic positions for the wavelet model. Region-independent + # (depends on the trait set / Y columns): midpoint of each trait range. se <- getPhenotypes(data, contexts = ctx, traitId = traits) - rr <- SummarizedExperiment::rowRanges(se) - # Reorder rr to the column order of Y. rrIds <- rownames(se) ord <- match(colnames(Y), rrIds) if (anyNA(ord)) { stop("fineMappingPipeline(QtlDataset, fsusie): unable to align trait positions to Y columns.") } - rr <- rr[ord] + rr <- SummarizedExperiment::rowRanges(se)[ord] pos <- (GenomicRanges::start(rr) + GenomicRanges::end(rr)) / 2 if (verbose >= 1) message(sprintf("Fitting fsusie for context='%s' (multi-trait, %d traits) ...", ctx, length(traits))) - fit <- do.call(fitFsusie, - .fmMergeUserArgs(list(X = X, Y = Y, pos = pos), - "fsusie", methodArgs[["fsusie"]])) - fit <- .setFinemappingFitClass(fit, "fsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "fsusie", - dataX = X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "fsusie") + fitOneRegion <- function(rg) { + X <- if (is.null(rg)) { + .fmResidGeno(data, contexts = ctx, traitId = traits, + cisWindow = cisWindow, samples = rownames(Y)) + } else { + .fmResidGeno(data, contexts = ctx, region = rg, + samples = rownames(Y)) + } + common <- intersect(rownames(X), rownames(Y)) + if (length(common) < 2L) { + stop(sprintf("fineMappingPipeline(QtlDataset, fsusie): too few shared samples in context '%s'.", ctx)) + } + Xc <- X[common, , drop = FALSE] + Yc <- Y[common, , drop = FALSE] + fit <- do.call(fitFsusie, + .fmMergeUserArgs(list(X = Xc, Y = Yc, pos = pos), + "fsusie", methodArgs[["fsusie"]])) + fit <- .setFinemappingFitClass(fit, "fsusie") + .fmPostprocessOne( + fit = fit, method = "fsusie", dataX = Xc, dataY = NULL, + coverage = coverage, secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, + csInput = "fsusie") + } + entry <- .fmJointBlocks(xRegions, fitOneRegion) for (tid in traits) { pushRow(study, ctx, tid, "fsusie", entry) } @@ -1188,6 +1318,7 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset", traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, addSusieInf = TRUE, coverage = 0.95, @@ -1205,6 +1336,11 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset", residualizeGenotypeCovariates = TRUE, ...) { naAction <- match.arg(naAction) + if (!is.null(region) && !is.null(cisWindow)) { + stop("fineMappingPipeline(MultiStudyQtlDataset): specify either ", + "`region` or `cisWindow`, not both.") + } + xRegions <- .makeXRegions(region, jointRegions) parsedJointSpec <- parseJointSpecification(jointSpecification, data) norm <- .fmNormalizeMethods(methods) tokens <- norm$tokens @@ -1220,7 +1356,7 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset", parsedJointSpec, data, intersect(tokens, c("mvsusie", "fsusie")), contexts, traitId, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs) + methodArgs = methodArgs, xRegions = xRegions) # Forward the still-pending (non-joint) tokens + their kwargs to the # per-QtlDataset recursion below, preserving the list shape so # methodArgs land on the right tokens. @@ -1249,6 +1385,7 @@ setMethod("fineMappingPipeline", "MultiStudyQtlDataset", traitId = traitId, region = region, cisWindow = cisWindow, + jointRegions = jointRegions, jointSpecification = NULL, addSusieInf = addSusieInf, coverage = coverage, diff --git a/R/genotypeIo.R b/R/genotypeIo.R index 8b656a7c..a9cd187a 100644 --- a/R/genotypeIo.R +++ b/R/genotypeIo.R @@ -208,6 +208,12 @@ setMethod("readGenotypes", #' } #' @export extractBlockGenotypes <- function(handle, snpIdx, meanImpute = TRUE) { + # One-file-per-chromosome handle: route by chromosome to the right file. + # `.genotypeChromPaths` tolerates handles deserialized before the slot + # existed (treated as single-file). + if (length(.genotypeChromPaths(handle)) > 0L) { + return(.extractBlockSharded(handle, snpIdx, meanImpute = meanImpute)) + } fmt <- getFormat(handle) geno <- switch(fmt, "gds" = .extractBlockGds(handle, snpIdx), @@ -251,6 +257,60 @@ extractBlockGenotypes <- function(handle, snpIdx, meanImpute = TRUE) { ) } +# Extract a block from a one-file-per-chromosome (sharded) handle. The global +# snpIdx index the unified @snpInfo; we group them by chromosome, route each +# group to its per-chromosome payload via a transient single-file view (with +# the chromosome-local snpInfo so positional backends like PLINK2 stay valid), +# then row-bind across chromosomes (samples identical by construction) and +# restore the requested order. A single-chromosome request — the common cis +# case — returns its one SE directly. +#' @keywords internal +.extractBlockSharded <- function(handle, snpIdx, meanImpute = TRUE) { + sampleIds <- handle@sampleIds + if (length(snpIdx) == 0L) { + empty <- matrix(numeric(0), nrow = 0L, ncol = length(sampleIds), + dimnames = list(character(0), sampleIds)) + return(SummarizedExperiment( + assays = list(dosage = empty), + rowRanges = GRanges(), + colData = DataFrame(sampleId = sampleIds, row.names = sampleIds))) + } + + unifiedChr <- .canonChr(handle@snpInfo$CHR) + reqChr <- unifiedChr[snpIdx] + groups <- split(seq_along(snpIdx), reqChr) + + buildForChrom <- function(chrom, posInReq) { + if (!chrom %in% names(handle@chromPaths)) + stop("extractBlockGenotypes: no per-chromosome file for chromosome '", + chrom, "' (have: ", + paste(names(handle@chromPaths), collapse = ", "), ").") + blockGlobal <- which(unifiedChr == chrom) # file-order global indices + localIdx <- match(snpIdx[posInReq], blockGlobal) + th <- handle + th@path <- handle@chromPaths[[chrom]] + th@snpInfo <- handle@snpInfo[blockGlobal, , drop = FALSE] + th@pgenPtr <- NULL + th@chromPaths <- character(0) # treat as single-file + extractBlockGenotypes(th, localIdx, meanImpute = meanImpute) + } + + ses <- Map(buildForChrom, names(groups), groups) + if (length(ses) == 1L) return(ses[[1L]]) + + # Combine at the assay/rowRanges level (rather than rbind-ing the SEs, which + # trips on disjoint seqlevels) and restore the requested snpIdx order. + ord <- order(unlist(groups, use.names = FALSE)) + combinedDos <- do.call(rbind, lapply(ses, function(se) + SummarizedExperiment::assay(se, "dosage")))[ord, , drop = FALSE] + combinedGr <- suppressWarnings( + do.call(c, unname(lapply(ses, SummarizedExperiment::rowRanges))))[ord] + SummarizedExperiment( + assays = list(dosage = combinedDos), + rowRanges = combinedGr, + colData = SummarizedExperiment::colData(ses[[1L]])) +} + #' @keywords internal .extractBlockGds <- function(handle, snpIdx) { gds <- SNPRelate::snpgdsOpen(getPath(handle), readonly = TRUE, @@ -320,6 +380,10 @@ extractBlockGenotypes <- function(handle, snpIdx, meanImpute = TRUE) { # pointer errors out. Opening is cheap relative to dosage extraction. ptr <- getPgenPtr(handle) paths <- resolvePlink2Paths(getPath(handle)) + # A sharded handle routes through a transient view with pgenPtr = NULL (one + # pgen per chromosome), and a deserialized pointer is stale; open a fresh + # pgen up front in those cases rather than provoking a caught read error. + if (is.null(ptr)) ptr <- pgenlibr::NewPgen(paths$pgen) geno <- tryCatch( pgenlibr::ReadList(ptr, variant_subset = snpIdx, meanimpute = FALSE), error = function(e) { diff --git a/R/jointSpecification.R b/R/jointSpecification.R index f4e67230..500b069e 100644 --- a/R/jointSpecification.R +++ b/R/jointSpecification.R @@ -636,7 +636,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # subset is too small to fit. # @noRd .buildIndividualCrossContextXY <- function(data, tid, scopedContexts, - cisWindow, verbose, label) { + cisWindow, verbose, label, + region = NULL) { perTraitContexts <- character(0) for (cx in scopedContexts) { se <- getPhenotypes(data, contexts = cx) @@ -650,9 +651,12 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { label, tid, length(perTraitContexts))) return(NULL) } - X <- .fmResidGeno( - data, contexts = perTraitContexts, traitId = tid, - cisWindow = cisWindow) + X <- if (is.null(region)) { + .fmResidGeno(data, contexts = perTraitContexts, traitId = tid, + cisWindow = cisWindow) + } else { + .fmResidGeno(data, contexts = perTraitContexts, region = region) + } Yres <- .fmResidPheno( data, contexts = perTraitContexts, traitId = tid) commonSamples <- Reduce(intersect, @@ -689,7 +693,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # subset is too small. # @noRd .buildIndividualCrossTraitXY <- function(data, cx, scopedTraits, - cisWindow, verbose, label, study) { + cisWindow, verbose, label, study, + region = NULL) { se <- getPhenotypes(data, contexts = cx) traitsHere <- intersect(scopedTraits, rownames(se)) if (length(traitsHere) < 2L) { @@ -699,8 +704,12 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { label, cx, study, length(traitsHere))) return(NULL) } - X <- .fmResidGeno( - data, contexts = cx, traitId = traitsHere, cisWindow = cisWindow) + X <- if (is.null(region)) { + .fmResidGeno(data, contexts = cx, traitId = traitsHere, + cisWindow = cisWindow) + } else { + .fmResidGeno(data, contexts = cx, region = region) + } Y <- .fmResidPheno( data, contexts = cx, traitId = traitsHere) common <- intersect(rownames(X), rownames(Y)) @@ -717,7 +726,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # QtlDataset. Returns list(X, Y, tuples) or NULL. # @noRd .buildComposedIndividualXY <- function(data, scope, study, cisWindow, - verbose, label) { + verbose, label, region = NULL) { scopedContexts <- scope$contexts[[study]] scopedTraits <- scope$traits[[study]] tuples <- list() @@ -736,8 +745,12 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { } allContexts <- unique(vapply(tuples, function(t) t$context, character(1L))) allTraits <- unique(vapply(tuples, function(t) t$trait, character(1L))) - X <- .fmResidGeno( - data, contexts = allContexts, traitId = allTraits, cisWindow = cisWindow) + X <- if (is.null(region)) { + .fmResidGeno(data, contexts = allContexts, traitId = allTraits, + cisWindow = cisWindow) + } else { + .fmResidGeno(data, contexts = allContexts, region = region) + } YresList <- .fmResidPheno( data, contexts = allContexts, traitId = allTraits) if (length(allContexts) == 1L) YresList <- setNames(list(YresList), allContexts) @@ -812,7 +825,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = list()) { + methodArgs = list(), + region = NULL) { jointMethods <- intersect(methods, "mvsusie") if (length(jointMethods) == 0L) return(NULL) @@ -837,7 +851,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { for (tid in scopedTraits) { xy <- .buildIndividualCrossContextXY( data, tid, scopedContexts, cisWindow, verbose, - label = "jointCrossContext") + label = "jointCrossContext", region = region) if (is.null(xy)) next if (verbose >= 1) @@ -892,7 +906,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = list()) { + methodArgs = list(), + region = NULL) { jointMethods <- intersect(methods, c("mvsusie", "fsusie")) if (length(jointMethods) == 0L) return(NULL) @@ -910,7 +925,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { for (cx in scopedContexts) { xy <- .buildIndividualCrossTraitXY( data, cx, scopedTraits, cisWindow, verbose, - label = "jointCrossTrait", study = study) + label = "jointCrossTrait", study = study, region = region) if (is.null(xy)) next for (mm in jointMethods) { @@ -1257,7 +1272,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = list()) { + methodArgs = list(), + region = NULL) { axes <- spec$axes if ("study" %in% axes) stop("composed jointSpecification (QtlDataset): axes including 'study' require sumstats input.") @@ -1274,7 +1290,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { if (!(study %in% scope$studies)) return(NULL) xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, verbose, - label = "composed joint (QtlDataset)") + label = "composed joint (QtlDataset)", + region = region) if (is.null(xy)) return(NULL) if (verbose >= 1) @@ -1418,13 +1435,66 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # Top-level joint dispatcher for fineMappingPipeline(QtlDataset). # @noRd +# Merge per-region QtlFineMappingResult collections (same keys across regions) +# into one by merging each (study, context, trait, method) row's entry via +# .fmMergeEntries (per-region susieFit list + renumbered credible sets). +# @noRd +.fmMergeResultsByKey <- function(results) { + base <- results[[1L]] + n <- nrow(base) + if (n == 0L) return(base) + keyOf <- function(r) paste(as.character(r$study), as.character(r$context), + as.character(r$trait), as.character(r$method), + sep = "\r") + baseKeys <- keyOf(base) + mergedEntries <- lapply(seq_len(n), function(i) { + perRegion <- lapply(results, function(r) { + hit <- which(keyOf(r) == baseKeys[[i]]) + if (length(hit)) r$entry[[hit[[1L]]]] else NULL + }) + .fmMergeEntries(Filter(Negate(is.null), perRegion)) + }) + QtlFineMappingResult( + study = as.character(base$study), context = as.character(base$context), + trait = as.character(base$trait), method = as.character(base$method), + entry = mergedEntries, + jointStudies = if ("jointStudies" %in% names(base)) base$jointStudies else NULL, + jointContexts = if ("jointContexts" %in% names(base)) base$jointContexts else NULL, + jointTraits = if ("jointTraits" %in% names(base)) base$jointTraits else NULL, + ldSketch = NULL) +} + .fmDispatchJointSpecsQtlDataset <- function(parsedJointSpec, data, methods, contexts, traitIds, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = list()) { + methodArgs = list(), + xRegions = list(NULL)) { + # Run the joint dispatch once per region block, then merge per + # (study, context, trait, method) across regions. A single block (cis or + # jointRegions=TRUE concatenated) returns its result directly. + perRegion <- lapply(xRegions, function(rg) { + .fmDispatchJointSpecsQtlDatasetOneRegion( + parsedJointSpec, data, methods, contexts, traitIds, cisWindow, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs, region = rg) + }) + perRegion <- Filter(Negate(is.null), perRegion) + if (length(perRegion) == 0L) return(NULL) + if (length(perRegion) == 1L) return(perRegion[[1L]]) + .fmMergeResultsByKey(perRegion) +} + +.fmDispatchJointSpecsQtlDatasetOneRegion <- function(parsedJointSpec, data, + methods, contexts, traitIds, + cisWindow, + coverage, secondaryCoverage, + signalCutoff, minAbsCorr, + verbose, + methodArgs = list(), + region = NULL) { out <- NULL for (i in seq_along(parsedJointSpec)) { spec <- parsedJointSpec[[i]] @@ -1433,7 +1503,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { res <- .fmDispatchComposedQtlDataset( spec, data, methods, contexts, traitIds, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs) + methodArgs = methodArgs, region = region) if (!is.null(res)) out <- if (is.null(out)) res else .rbindFineMappingResult(out, res, ldSketch = NULL) @@ -1444,11 +1514,11 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { context = .fmDispatchCrossContextQtlDataset( spec, data, methods, contexts, traitIds, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs), + methodArgs = methodArgs, region = region), trait = .fmDispatchCrossTraitQtlDataset( spec, data, methods, contexts, traitIds, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs), + methodArgs = methodArgs, region = region), study = stop( "fineMappingPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input. ", "QtlDataset represents a single individual-level study; cross-study joints operate on the sumstats slot of MultiStudyQtlDataset or on QtlSumStats directly."), @@ -1518,7 +1588,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = list()) { + methodArgs = list(), + xRegions = list(NULL)) { out <- NULL embeddedLd <- NULL qtlDatasets <- getQtlDatasets(data) @@ -1541,7 +1612,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { qdRes <- .fmDispatchJointSpecsQtlDataset( nonStudyAxisSpecs, qd, methods, contexts, traitIds, cisWindow, coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs) + methodArgs = methodArgs, xRegions = xRegions) if (!is.null(qdRes)) out <- if (is.null(out)) qdRes else .rbindFineMappingResult(out, qdRes, ldSketch = NULL) @@ -1576,7 +1647,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { .twasDispatchCrossContextQtlDataset <- function(spec, data, methods, contexts, traitIds, cisWindow, dataType, - verbose) { + verbose, region = NULL) { jointMethods <- intersect(methods, "mrmash") if (length(jointMethods) == 0L) return(NULL) @@ -1601,7 +1672,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { for (tid in scopedTraits) { xy <- .buildIndividualCrossContextXY( data, tid, scopedContexts, cisWindow, verbose, - label = "jointCrossContext (twas QtlDataset)") + label = "jointCrossContext (twas QtlDataset)", region = region) if (is.null(xy)) next if (verbose >= 1) @@ -1642,7 +1713,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { .twasDispatchCrossTraitQtlDataset <- function(spec, data, methods, contexts, traitIds, cisWindow, dataType, - verbose) { + verbose, region = NULL) { jointMethods <- intersect(methods, "mrmash") if (length(jointMethods) == 0L) return(NULL) @@ -1660,7 +1731,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { for (cx in scopedContexts) { xy <- .buildIndividualCrossTraitXY( data, cx, scopedTraits, cisWindow, verbose, - label = "jointCrossTrait (twas)", study = study) + label = "jointCrossTrait (twas)", study = study, region = region) if (is.null(xy)) next if (verbose >= 1) @@ -2015,7 +2086,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # @noRd .twasDispatchComposedQtlDataset <- function(spec, data, methods, contexts, traitIds, cisWindow, - dataType, verbose) { + dataType, verbose, + region = NULL) { axes <- spec$axes if ("study" %in% axes) stop("composed jointSpecification (twas QtlDataset): axes including 'study' require sumstats input.") @@ -2031,7 +2103,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { if (!(study %in% scope$studies)) return(NULL) xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, verbose, - label = "composed joint (twas QtlDataset)") + label = "composed joint (twas QtlDataset)", + region = region) if (is.null(xy)) return(NULL) if (verbose >= 1) @@ -2061,17 +2134,64 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # Top-level joint dispatcher for twasWeightsPipeline(QtlDataset). # @noRd +# Merge per-region TwasWeights collections (same keys across regions) into one +# by concatenating each (study, context, trait, method) row's entry via +# .twasMergeRegionEntries (stacked weights + flat per-region cvPerformance). +# @noRd +.twasMergeResultsByKey <- function(results, regionLabels) { + base <- results[[1L]] + n <- length(base$method) + if (n == 0L) return(base) + keyOf <- function(r) paste(as.character(r$study), as.character(r$context), + as.character(r$trait), as.character(r$method), + sep = "\r") + baseKeys <- keyOf(base) + mergedEntries <- lapply(seq_len(n), function(i) { + perRegion <- lapply(results, function(r) { + hit <- which(keyOf(r) == baseKeys[[i]]) + if (length(hit)) r$entry[[hit[[1L]]]] else NULL + }) + keep <- !vapply(perRegion, is.null, logical(1)) + .twasMergeRegionEntries(perRegion[keep], regionLabels[keep]) + }) + TwasWeights( + study = as.character(base$study), context = as.character(base$context), + trait = as.character(base$trait), method = as.character(base$method), + entry = mergedEntries) +} + .twasDispatchJointSpecsQtlDataset <- function(parsedJointSpec, data, methods, contexts, traitIds, cisWindow, dataType, - verbose) { + verbose, xRegions = list(NULL)) { + # Run the joint dispatch once per region block, then merge per + # (study, context, trait, method) across regions. A single block (cis or + # jointRegions=TRUE concatenated) returns its result directly. + perRegion <- lapply(xRegions, function(rg) { + .twasDispatchJointSpecsQtlDatasetOneRegion( + parsedJointSpec, data, methods, contexts, traitIds, cisWindow, dataType, + verbose, region = rg) + }) + labs <- vapply(xRegions, .twasRegionLabel, character(1)) + keep <- !vapply(perRegion, is.null, logical(1)) + perRegion <- perRegion[keep]; labs <- labs[keep] + if (length(perRegion) == 0L) return(NULL) + if (length(perRegion) == 1L) return(perRegion[[1L]]) + .twasMergeResultsByKey(perRegion, labs) +} + +.twasDispatchJointSpecsQtlDatasetOneRegion <- function(parsedJointSpec, data, + methods, contexts, traitIds, + cisWindow, dataType, + verbose, region = NULL) { out <- NULL for (i in seq_along(parsedJointSpec)) { spec <- parsedJointSpec[[i]] axes <- spec$axes if (length(axes) > 1L) { res <- .twasDispatchComposedQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose) + spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose, + region = region) if (!is.null(res)) out <- if (is.null(out)) res else .rbindTwasWeights(out, res, ldSketch = NULL) @@ -2080,9 +2200,11 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { axis <- axes[[1L]] res <- switch(axis, context = .twasDispatchCrossContextQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose), + spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose, + region = region), trait = .twasDispatchCrossTraitQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose), + spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose, + region = region), study = stop( "twasWeightsPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input."), stop(sprintf("Unsupported axis: %s", axis))) @@ -2132,7 +2254,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # @noRd .twasDispatchJointSpecsMultiStudy <- function(parsedJointSpec, data, methods, contexts, traitIds, - cisWindow, dataType, verbose) { + cisWindow, dataType, verbose, + xRegions = list(NULL)) { out <- NULL embeddedLd <- NULL qtlDatasets <- getQtlDatasets(data) @@ -2154,7 +2277,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { qd <- qtlDatasets[[qdName]] qdRes <- .twasDispatchJointSpecsQtlDataset( nonStudyAxisSpecs, qd, methods, contexts, traitIds, cisWindow, - dataType, verbose) + dataType, verbose, xRegions = xRegions) if (!is.null(qdRes)) out <- if (is.null(out)) qdRes else .rbindTwasWeights(out, qdRes, ldSketch = NULL) diff --git a/R/twasWeightsPipeline.R b/R/twasWeightsPipeline.R index e1cbbd24..a31b617d 100644 --- a/R/twasWeightsPipeline.R +++ b/R/twasWeightsPipeline.R @@ -25,6 +25,98 @@ ldSketch = ldSketch) } +# --- Multi-region (jointRegions) helpers for the QtlDataset method ---------- + +# Label a region block for per-region reporting: the genomic coordinate of a +# single-range window, or "cis" for the trait-derived (region = NULL) block. +.twasRegionLabel <- function(rg) { + if (is.null(rg)) return("cis") + paste0(as.character(GenomicRanges::seqnames(rg))[[1L]], ":", + GenomicRanges::start(rg)[[1L]], "-", GenomicRanges::end(rg)[[1L]]) +} + +# Select the per-region fine-mapping fits for region block `i`. A +# jointRegions=FALSE multi-region fine-mapping stores its per-region SuSiE fits +# as a named list (region1, region2, ...); pick the matching element. With a +# single block the fits are returned unchanged; a non-region-list fit under +# multiple blocks cannot be aligned and is dropped (the method learns fresh). +.twasFitsForRegion <- function(fits, i, nBlocks) { + if (length(fits) == 0L || nBlocks == 1L) return(fits) + out <- lapply(fits, function(f) { + if (is.list(f) && !is.null(names(f)) && length(f) > 0L && + all(nzchar(names(f))) && all(startsWith(names(f), "region"))) { + if (i <= length(f)) f[[i]] else NULL + } else { + NULL + } + }) + out[!vapply(out, is.null, logical(1))] +} + +# Flat per-region cvPerformance reporting table: one row per region carrying the +# region label plus that region's CV metric columns. Per-sample predictions are +# intentionally omitted — this is a summary-reporting structure. +.twasRegionCvDf <- function(entries, regionLabels) { + rows <- Map(function(e, lab) { + cv <- getCvPerformance(e) + if (is.null(cv) || is.null(cv$metrics)) return(NULL) + cbind(data.frame(region = lab, stringsAsFactors = FALSE), + as.data.frame(as.list(cv$metrics), check.names = FALSE)) + }, entries, regionLabels) + rows <- rows[!vapply(rows, is.null, logical(1))] + if (length(rows) == 0L) return(NULL) + do.call(rbind, rows) +} + +# Concatenate one method's per-region TwasWeightsEntry payloads into a single +# entry. Variants/weights are stacked (regions are disjoint), the per-region +# fits are kept as a named list, and cvPerformance becomes the flat per-region +# reporting data.frame. +.twasMergeRegionEntries <- function(entries, regionLabels) { + keep <- !vapply(entries, is.null, logical(1)) + entries <- entries[keep]; regionLabels <- regionLabels[keep] + if (length(entries) == 0L) return(NULL) + if (length(entries) == 1L) return(entries[[1L]]) + wList <- lapply(entries, getWeights) + weights <- if (is.matrix(wList[[1L]])) do.call(rbind, wList) + else unlist(wList, use.names = FALSE) + TwasWeightsEntry( + variantIds = unlist(lapply(entries, getVariantIds), use.names = FALSE), + weights = weights, + fits = setNames(lapply(entries, getFits), regionLabels), + cvPerformance = .twasRegionCvDf(entries, regionLabels), + standardized = getStandardized(entries[[1L]]), + dataType = getDataType(entries[[1L]])) +} + +# Merge per-region TwasWeights collections (same study/context/trait, same +# methods) into one collection by concatenating each method's entry. +.twasMergeRegions <- function(twList, regionLabels) { + keep <- !vapply(twList, is.null, logical(1)) + twList <- twList[keep]; regionLabels <- regionLabels[keep] + if (length(twList) == 0L) return(NULL) + if (length(twList) == 1L) return(twList[[1L]]) + base <- twList[[1L]] + mergedEntries <- lapply(seq_along(base$method), function(r) { + key <- c(as.character(base$study[[r]]), as.character(base$context[[r]]), + as.character(base$trait[[r]]), as.character(base$method[[r]])) + perRegion <- lapply(twList, function(tw) { + hit <- which(as.character(tw$study) == key[[1L]] & + as.character(tw$context) == key[[2L]] & + as.character(tw$trait) == key[[3L]] & + as.character(tw$method) == key[[4L]]) + if (length(hit)) tw$entry[[hit[[1L]]]] else NULL + }) + .twasMergeRegionEntries(perRegion, regionLabels) + }) + TwasWeights( + study = as.character(base$study), + context = as.character(base$context), + trait = as.character(base$trait), + method = as.character(base$method), + entry = mergedEntries) +} + # Splice per-(method, outcome) cross-validated predictions and the 6-metric # performance row from a `twasWeightsCv()` result into the matching # `TwasWeightsEntry$cvPerformance` slot of every row in a TwasWeights @@ -576,7 +668,14 @@ #' Mutually exclusive with \code{traitId}. #' @param cisWindow For QtlDataset: cis-window (bp) around each trait's #' genomic position when extracting variants. Required when -#' \code{traitId} is supplied. +#' \code{traitId} is supplied. Mutually exclusive with \code{region}. +#' @param jointRegions For QtlDataset with a multi-range \code{region}: +#' \code{FALSE} (default) learns weights for each range independently and +#' concatenates them into one entry per (study, context, trait, method); +#' the per-region fits are kept as a named list and per-region CV is +#' recorded as a flat \code{cvPerformance} data frame (one row per region). +#' \code{TRUE} concatenates the ranges' genotypes into one joint fit. +#' Ignored for a single-range / cis request. #' @param jointSpecification Optional joint-fit specification (NULL by #' default). When NULL, the pipeline runs the implicit multi-trait / #' multi-context mr.mash branches as before. When non-NULL, the @@ -655,6 +754,7 @@ setMethod("twasWeightsPipeline", "QtlDataset", traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, fineMappingResult = NULL, twasWeights = NULL, @@ -677,6 +777,14 @@ setMethod("twasWeightsPipeline", "QtlDataset", verbose = 1, ...) { naAction <- match.arg(naAction) + # `cisWindow` expands a trait's own coordinates; `region` is literal. + # Supplying both signals a misunderstanding -> reject. + if (!is.null(region) && !is.null(cisWindow)) { + stop("twasWeightsPipeline(QtlDataset): specify either `region` or ", + "`cisWindow`, not both. `cisWindow` expands each trait's own ", + "coordinates, whereas `region` is the literal variant window.") + } + xRegions <- .makeXRegions(region, jointRegions) parsedJointSpec <- parseJointSpecification(jointSpecification, data) norm <- .twasNormalizeMethods(methods) .twasCheckMethodCapabilities(norm$tokens, "QtlDataset") @@ -689,7 +797,7 @@ setMethod("twasWeightsPipeline", "QtlDataset", if (length(parsedJointSpec) > 0L) { jointResult <- .twasDispatchJointSpecsQtlDataset( parsedJointSpec, data, intersect(norm$tokens, "mrmash"), - contexts, traitId, cisWindow, dataType, verbose) + contexts, traitId, cisWindow, dataType, verbose, xRegions = xRegions) drop <- intersect(norm$tokens, "mrmash") keep <- setdiff(norm$tokens, drop) if (length(keep) == 0L) { @@ -770,44 +878,57 @@ setMethod("twasWeightsPipeline", "QtlDataset", phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, naAction = naAction) - X <- .fmResidGeno( - data, contexts = ctx, traitId = tid, - cisWindow = cisWindow, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, - samples = rownames(Y)) - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) { - stop(sprintf( - "twasWeightsPipeline: too few shared samples between residualized X and Y for (context='%s', trait='%s').", - ctx, tid)) - } - X <- X[common, , drop = FALSE] - Y <- Y[common, , drop = FALSE] - - fittedModels <- .twasFineMappingFits(fineMappingResult, - study = study, - context = ctx, - trait = tid) - freshTw <- .twasWeightsPipelineMatrix( - X = X, y = Y, - study = study, context = ctx, trait = tid, - fittedModels = fittedModels, - cvFolds = cvFolds, - samplePartition = samplePartition, - weightMethods = remaining, - maxCvVariants = maxCvVariants, - cvThreads = cvThreads, - cvWeightMethods = cvWeightMethods, - ensemble = ensemble, - ensembleR2Threshold = ensembleR2Threshold, - ensembleSolver = ensembleSolver, - ensembleAlpha = ensembleAlpha, - estimatePi = estimatePi, - standardized = FALSE, - dataType = dataType, - ldSketch = NULL, - verbose = verbose)$twasWeights + + # Fine-mapping fits for this (study, ctx, trait); a multi-region fit is a + # per-region list and is selected blockwise inside the loop. + allFits <- .twasFineMappingFits(fineMappingResult, + study = study, context = ctx, trait = tid) + nBlocks <- length(xRegions) + perBlockTw <- lapply(seq_len(nBlocks), function(bi) { + rg <- xRegions[[bi]] + X <- if (is.null(rg)) { + .fmResidGeno( + data, contexts = ctx, traitId = tid, cisWindow = cisWindow, + phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, + genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, + samples = rownames(Y)) + } else { + .fmResidGeno( + data, contexts = ctx, region = rg, + phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, + genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, + samples = rownames(Y)) + } + common <- intersect(rownames(X), rownames(Y)) + if (length(common) < 2L) { + stop(sprintf( + "twasWeightsPipeline: too few shared samples between residualized X and Y for (context='%s', trait='%s').", + ctx, tid)) + } + .twasWeightsPipelineMatrix( + X = X[common, , drop = FALSE], y = Y[common, , drop = FALSE], + study = study, context = ctx, trait = tid, + fittedModels = .twasFitsForRegion(allFits, bi, nBlocks), + cvFolds = cvFolds, + samplePartition = samplePartition, + weightMethods = remaining, + maxCvVariants = maxCvVariants, + cvThreads = cvThreads, + cvWeightMethods = cvWeightMethods, + ensemble = ensemble, + ensembleR2Threshold = ensembleR2Threshold, + ensembleSolver = ensembleSolver, + ensembleAlpha = ensembleAlpha, + estimatePi = estimatePi, + standardized = FALSE, + dataType = dataType, + ldSketch = NULL, + verbose = verbose)$twasWeights + }) + # Single block (cis or jointRegions=TRUE) returns unchanged; multiple + # blocks (jointRegions=FALSE) concatenate per method into one entry. + freshTw <- .twasMergeRegions( + perBlockTw, vapply(xRegions, .twasRegionLabel, character(1))) if (is.null(cachedTw)) freshTw else .rbindTwasWeights(freshTw, cachedTw, ldSketch = NULL) } @@ -815,19 +936,27 @@ setMethod("twasWeightsPipeline", "QtlDataset", runMultivariate <- function(traits) { # Joint over selected (contexts, traits): residualize, intersect # samples across contexts, drop subjects with any-NA in Y. + # Sample basis for Y construction (residualized genotypes are + # region-independent in their sample set): use the cis window when no + # explicit region is given, otherwise the first range. Xlist <- lapply(useCtx, function(ctx) { - .fmResidGeno( - data, contexts = ctx, traitId = traits, - cisWindow = cisWindow, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize) + if (is.null(region)) { + .fmResidGeno( + data, contexts = ctx, traitId = traits, cisWindow = cisWindow, + phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, + genotypeCovariatesToResidualize = genotypeCovariatesToResidualize) + } else { + .fmResidGeno( + data, contexts = ctx, region = region[1L], + phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, + genotypeCovariatesToResidualize = genotypeCovariatesToResidualize) + } }) # Intersect samples across contexts. commonSamples <- Reduce(intersect, lapply(Xlist, rownames)) if (length(commonSamples) < 2L) { stop("twasWeightsPipeline(QtlDataset, multivariate): insufficient samples shared across selected contexts.") } - X <- Xlist[[1L]][commonSamples, , drop = FALSE] Yres <- .fmResidPheno( data, contexts = useCtx, traitId = traits, @@ -867,40 +996,53 @@ setMethod("twasWeightsPipeline", "QtlDataset", stop("twasWeightsPipeline(QtlDataset, multivariate): too few subjects with complete Y across selected (context, trait) columns.") } Y <- Y[keep, , drop = FALSE] - X <- X[rownames(Y), , drop = FALSE] - - # mvsusie joint fits are stored once per (context, trait) row in - # the FineMappingResult; all rows of a single joint fit point at - # the same fit object. Pull the fit using the first (context, - # trait) of the joint group and thread it through. - jointFits <- .twasFineMappingFits(fineMappingResult, - study = study, - context = meta$context[[1L]], - trait = meta$trait[[1L]]) - - # Build per-column identity tuples for learnTwasWeights so multi- - # outcome methods emit one row per (context, trait). - .twasWeightsPipelineMatrix( - X = X, y = Y, - study = study, - context = meta$context, - trait = meta$trait, - fittedModels = jointFits, - cvFolds = cvFolds, - samplePartition = samplePartition, - weightMethods = norm$methodList, - maxCvVariants = maxCvVariants, - cvThreads = cvThreads, - cvWeightMethods = cvWeightMethods, - ensemble = ensemble, - ensembleR2Threshold = ensembleR2Threshold, - ensembleSolver = ensembleSolver, - ensembleAlpha = ensembleAlpha, - estimatePi = estimatePi, - standardized = FALSE, - dataType = dataType, - ldSketch = NULL, - verbose = verbose)$twasWeights + + # Per-region fit + merge. The cis block reuses the already-extracted + # genotypes; an explicit region re-extracts that window (genotype + # residualization is context-independent, so one context suffices). + # mvsusie/mr.mash joint fits are stored once per (context, trait) row in + # the FineMappingResult; pull via the first (context, trait) of the group + # and (for a multi-region fit) select the per-region element. + perBlockTw <- lapply(seq_along(xRegions), function(bi) { + rg <- xRegions[[bi]] + Xr <- if (is.null(rg)) { + Xlist[[1L]] + } else { + .fmResidGeno( + data, contexts = useCtx[[1L]], region = rg, + phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, + genotypeCovariatesToResidualize = genotypeCovariatesToResidualize) + } + Xr <- Xr[rownames(Y), , drop = FALSE] + jointFits <- .twasFitsForRegion( + .twasFineMappingFits(fineMappingResult, study = study, + context = meta$context[[1L]], + trait = meta$trait[[1L]]), + bi, length(xRegions)) + .twasWeightsPipelineMatrix( + X = Xr, y = Y, + study = study, + context = meta$context, + trait = meta$trait, + fittedModels = jointFits, + cvFolds = cvFolds, + samplePartition = samplePartition, + weightMethods = norm$methodList, + maxCvVariants = maxCvVariants, + cvThreads = cvThreads, + cvWeightMethods = cvWeightMethods, + ensemble = ensemble, + ensembleR2Threshold = ensembleR2Threshold, + ensembleSolver = ensembleSolver, + ensembleAlpha = ensembleAlpha, + estimatePi = estimatePi, + standardized = FALSE, + dataType = dataType, + ldSketch = NULL, + verbose = verbose)$twasWeights + }) + .twasMergeRegions( + perBlockTw, vapply(xRegions, .twasRegionLabel, character(1))) } # Top-level dispatch within the QtlDataset method body. @@ -1231,6 +1373,7 @@ setMethod("twasWeightsPipeline", "MultiStudyQtlDataset", traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, fineMappingResult = NULL, twasWeights = NULL, @@ -1242,6 +1385,11 @@ setMethod("twasWeightsPipeline", "MultiStudyQtlDataset", residualizeGenotypeCovariates = TRUE, ...) { naAction <- match.arg(naAction) + if (!is.null(region) && !is.null(cisWindow)) { + stop("twasWeightsPipeline(MultiStudyQtlDataset): specify either ", + "`region` or `cisWindow`, not both.") + } + xRegions <- .makeXRegions(region, jointRegions) parsedJointSpec <- parseJointSpecification(jointSpecification, data) # Gate fine-mapping methods early so the recursion into the embedded @@ -1264,7 +1412,7 @@ setMethod("twasWeightsPipeline", "MultiStudyQtlDataset", names(methods)), "mrmash") jointResult <- .twasDispatchJointSpecsMultiStudy( parsedJointSpec, data, jointMethods, - contexts, traitId, cisWindow, NULL, verbose) + contexts, traitId, cisWindow, NULL, verbose, xRegions = xRegions) # Strip mrmash from the methods passed to the per-component recursion. if (is.character(methods)) methods <- setdiff(methods, "mrmash") else if (is.list(methods)) { @@ -1293,6 +1441,7 @@ setMethod("twasWeightsPipeline", "MultiStudyQtlDataset", traitId = traitId, region = region, cisWindow = cisWindow, + jointRegions = jointRegions, jointSpecification = NULL, fineMappingResult = fineMappingResult, twasWeights = twasWeights, diff --git a/man/GenotypeHandle-class.Rd b/man/GenotypeHandle-class.Rd index 4414cd83..dc42c255 100644 --- a/man/GenotypeHandle-class.Rd +++ b/man/GenotypeHandle-class.Rd @@ -12,17 +12,28 @@ S4 container holding a path + format + metadata for lazy \section{Slots}{ \describe{ -\item{\code{path}}{Character, file path.} +\item{\code{path}}{Character, file path. For a one-file-per-chromosome handle +this is the chrom-meta file path (a display/provenance value); the +per-chromosome payload files live in \code{chromPaths}.} \item{\code{format}}{Character, one of \code{"plink1"}, \code{"plink2"}, \code{"vcf"}, \code{"gds"}.} -\item{\code{snpInfo}}{data.frame, SNP metadata read from the index/sidecar.} +\item{\code{snpInfo}}{data.frame, SNP metadata read from the index/sidecar. For a +sharded handle this is the union across chromosomes (row-bound in the +order the shards were supplied), so \code{snpIdx} stays a single global +index space.} \item{\code{nSamples}}{Integer, number of samples.} \item{\code{sampleIds}}{Character vector of sample identifiers.} \item{\code{pgenPtr}}{Opaque pointer for PLINK2 reader state (NULL otherwise).} + +\item{\code{chromPaths}}{Named character vector mapping canonical chromosome +(e.g. \code{"21"}, \code{"X"}) to the per-chromosome payload path/prefix. +Empty (\code{character(0)}) for a single-file handle; non-empty marks a +one-file-per-chromosome (sharded) handle whose extraction is routed by +chromosome.} }} diff --git a/man/GenotypeHandle.Rd b/man/GenotypeHandle.Rd index b814375a..c6fe0d8f 100644 --- a/man/GenotypeHandle.Rd +++ b/man/GenotypeHandle.Rd @@ -16,6 +16,7 @@ GenotypeHandle( psam = NULL, ldMeta = NULL, region = NULL, + genoMeta = NULL, ... ) } @@ -45,6 +46,11 @@ supported here — use \code{\link{loadLdMatrix}} for that case.} \code{"chr:start-end"} string or a one-row data.frame with \code{chrom}, \code{start}, \code{end}.} +\item{genoMeta}{One-file-per-chromosome specification: a path to a +\code{#chr,path} meta file or a named character vector +(names = chromosomes, values = payload paths/prefixes). Optionally pass +\code{format} via \code{...} to force a single backend for every shard.} + \item{...}{Additional arguments forwarded to the format-specific reader.} } \value{ @@ -67,6 +73,15 @@ Construct a \code{GenotypeHandle} from one of several input \code{plink1Prefix} or arrange symlinks at a common stem.} \item{\code{pgen} + \code{pvar} + \code{psam}}{Explicit PLINK2 triplet. Same shared-stem requirement.} + \item{\code{genoMeta}}{One genotype file per chromosome. Either a path + to a whitespace/TSV meta file whose first column is the chromosome + (\code{#chr}) and second column the per-chromosome payload + (a \code{.bed}/\code{.pgen}/\code{.vcf[.gz]}/\code{.bcf}/\code{.gds} + file or a PLINK prefix; relative paths resolve against the meta + file's directory), or a named character vector mapping chromosome to + payload. All shards must share one format and identical sample IDs in + the same order. Extraction is routed to the correct file by the + requested region's chromosome.} } The constructor opens the file for metadata only (sample IDs and SNP info); dosage extraction is deferred until \code{extractBlockGenotypes()} diff --git a/man/fineMappingPipeline.Rd b/man/fineMappingPipeline.Rd index f0938799..fdd53ef0 100644 --- a/man/fineMappingPipeline.Rd +++ b/man/fineMappingPipeline.Rd @@ -18,6 +18,7 @@ fineMappingPipeline(data, ...) traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, addSusieInf = TRUE, coverage = 0.95, @@ -127,7 +128,15 @@ selection. Mutually exclusive with \code{traitId}.} \item{cisWindow}{For QtlDataset: cis-window (bp) around each trait's genomic position when extracting variants. Required when -\code{traitId} is supplied.} +\code{traitId} is supplied. Mutually exclusive with \code{region}.} + +\item{jointRegions}{For QtlDataset with a multi-range \code{region}: +\code{FALSE} (default) fits each range independently and merges the +per-range results into one entry per (study, context, trait, method) — +the merged \code{susieFit} is a named list of per-region fits and +credible-set labels are renumbered to stay unique. \code{TRUE} +concatenates the ranges' genotypes into one joint fit. Ignored for a +single-range / cis (\code{traitId} + \code{cisWindow}) request.} \item{jointSpecification}{Optional joint-fit specification (NULL by default). When NULL, the pipeline runs the implicit multi-context / diff --git a/man/twasWeightsPipeline.Rd b/man/twasWeightsPipeline.Rd index cbabba63..a777a8d8 100644 --- a/man/twasWeightsPipeline.Rd +++ b/man/twasWeightsPipeline.Rd @@ -17,6 +17,7 @@ twasWeightsPipeline(data, ...) traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, fineMappingResult = NULL, twasWeights = NULL, @@ -101,7 +102,15 @@ Mutually exclusive with \code{traitId}.} \item{cisWindow}{For QtlDataset: cis-window (bp) around each trait's genomic position when extracting variants. Required when -\code{traitId} is supplied.} +\code{traitId} is supplied. Mutually exclusive with \code{region}.} + +\item{jointRegions}{For QtlDataset with a multi-range \code{region}: +\code{FALSE} (default) learns weights for each range independently and +concatenates them into one entry per (study, context, trait, method); +the per-region fits are kept as a named list and per-region CV is +recorded as a flat \code{cvPerformance} data frame (one row per region). +\code{TRUE} concatenates the ranges' genotypes into one joint fit. +Ignored for a single-range / cis request.} \item{jointSpecification}{Optional joint-fit specification (NULL by default). When NULL, the pipeline runs the implicit multi-trait / diff --git a/tests/testthat/test_QtlDataset.R b/tests/testthat/test_QtlDataset.R index be642beb..28df4d96 100644 --- a/tests/testthat/test_QtlDataset.R +++ b/tests/testthat/test_QtlDataset.R @@ -189,18 +189,22 @@ test_that(".qtlResolveVariantRegion: traits across chromosomes error", { ) }) -test_that(".qtlResolveVariantRegion: region path requires single-range GRanges", { +test_that(".qtlResolveVariantRegion: region must be a GRanges; multi-range is allowed", { qd <- .qh_makeDataset() expect_error( pecotmr:::.qtlResolveVariantRegion(qd, region = "chr1:100-200"), "must be a GRanges object" ) - multi <- GenomicRanges::GRanges(c("chr1", "chr1"), - IRanges::IRanges(c(1, 100), c(50, 200))) expect_error( - pecotmr:::.qtlResolveVariantRegion(qd, region = multi), - "single range" + pecotmr:::.qtlResolveVariantRegion(qd, region = GenomicRanges::GRanges()), + "at least one range" ) + # A multi-range region is now taken literally (joint multi-region extraction). + multi <- GenomicRanges::GRanges(c("chr1", "chr1"), + IRanges::IRanges(c(1, 100), c(50, 200))) + gr <- pecotmr:::.qtlResolveVariantRegion(qd, region = multi) + expect_s4_class(gr, "GRanges") + expect_equal(length(gr), 2L) }) test_that(".qtlResolveVariantRegion: region path expands by cisWindow", { @@ -1498,4 +1502,84 @@ test_that("show.MultiStudyQtlDataset reports sumstats studies when present", { expect_true(any(grepl("Sumstats studies: s2", out))) }) +# =========================================================================== +# Multi-region variant extraction: getGenotypes() with a multi-range region +# (the mechanism behind jointRegions). Single-file (chr21) and sharded +# (chr21+chr22) handles wrapped in a QtlDataset with a minimal phenotype SE. +# =========================================================================== +.mr_makeSE <- function(samples, chrom = "chr21", traits = c("g1", "g2")) { + rng <- GenomicRanges::GRanges( + chrom, IRanges::IRanges( + start = seq(1e6L, by = 1e5L, length.out = length(traits)), width = 1000L)) + names(rng) <- traits + expr <- matrix(0, nrow = length(traits), ncol = length(samples), + dimnames = list(traits, samples)) + SummarizedExperiment::SummarizedExperiment( + assays = list(expression = expr), rowRanges = rng, + colData = S4Vectors::DataFrame(row.names = samples)) +} +.mr_ncol <- function(qd, region) ncol(getGenotypes(qd, region = region)) +.mr_vids <- function(qd, region) colnames(getGenotypes(qd, region = region)) + +test_that("multi-range region unions disjoint sub-ranges on one chromosome", { + skip_if_not_installed("snpStats") + h <- GenotypeHandle(plink1Prefix = file.path(test_data_dir, "test_variants")) + qd <- QtlDataset(study = "S", genotypes = h, + phenotypes = list(ctx = .mr_makeSE(h@sampleIds))) + bp <- h@snpInfo$BP + lo <- min(bp); hi <- max(bp); mid <- lo + (hi - lo) %/% 2L + rA <- GenomicRanges::GRanges("chr21", IRanges::IRanges(lo, mid)) + rB <- GenomicRanges::GRanges("chr21", IRanges::IRanges(mid + 1L, hi)) + rAB <- GenomicRanges::GRanges("chr21", IRanges::IRanges(c(lo, mid + 1L), c(mid, hi))) + + nA <- .mr_ncol(qd, rA); nB <- .mr_ncol(qd, rB) + expect_gt(nA, 0L); expect_gt(nB, 0L) + expect_equal(.mr_ncol(qd, rAB), nA + nB) + expect_equal(.mr_vids(qd, rAB), c(.mr_vids(qd, rA), .mr_vids(qd, rB))) +}) + +test_that("multi-range region spans chromosomes on a sharded handle", { + skip_if_not_installed("snpStats") + hs <- GenotypeHandle(genoMeta = c( + "21" = file.path(test_data_dir, "test_variants"), + "22" = file.path(test_data_dir, "test_variants_chr22"))) + qd <- QtlDataset(study = "S", genotypes = hs, + phenotypes = list(ctx = .mr_makeSE(hs@sampleIds))) + bp <- GenotypeHandle(plink1Prefix = file.path(test_data_dir, "test_variants"))@snpInfo$BP + lo <- min(bp); hi <- max(bp) + r21 <- GenomicRanges::GRanges("chr21", IRanges::IRanges(lo, hi)) + r22 <- GenomicRanges::GRanges("chr22", IRanges::IRanges(lo, hi)) + rBoth <- GenomicRanges::GRanges(c("chr21", "chr22"), + IRanges::IRanges(c(lo, lo), c(hi, hi))) + + n21 <- .mr_ncol(qd, r21); n22 <- .mr_ncol(qd, r22) + expect_gt(n21, 0L); expect_gt(n22, 0L) + expect_equal(.mr_ncol(qd, rBoth), n21 + n22) + + gBoth <- getGenotypes(qd, region = rBoth) + g21 <- getGenotypes(qd, region = r21) + expect_equal(unname(gBoth[, seq_len(n21)]), unname(g21)) + expect_true(all(grepl("_c22$", colnames(gBoth)[(n21 + 1):(n21 + n22)]))) +}) + +test_that("multi-region: single-range extraction is unchanged (regression)", { + skip_if_not_installed("snpStats") + h <- GenotypeHandle(plink1Prefix = file.path(test_data_dir, "test_variants")) + qd <- QtlDataset(study = "S", genotypes = h, + phenotypes = list(ctx = .mr_makeSE(h@sampleIds))) + bp <- h@snpInfo$BP + r <- GenomicRanges::GRanges("chr21", IRanges::IRanges(min(bp), max(bp))) + expect_equal(.mr_ncol(qd, r), nrow(h@snpInfo)) +}) + +test_that(".qtlResolveVariantRegion rejects a non-GRanges / empty region", { + skip_if_not_installed("snpStats") + h <- GenotypeHandle(plink1Prefix = file.path(test_data_dir, "test_variants")) + qd <- QtlDataset(study = "S", genotypes = h, + phenotypes = list(ctx = .mr_makeSE(h@sampleIds))) + expect_error(getGenotypes(qd, region = "chr21:1-2"), "must be a GRanges") + expect_error(getGenotypes(qd, region = GenomicRanges::GRanges()), + "at least one range") +}) + diff --git a/tests/testthat/test_data/test_variants_chr22.bed b/tests/testthat/test_data/test_variants_chr22.bed new file mode 100644 index 00000000..b00cff68 --- /dev/null +++ b/tests/testthat/test_data/test_variants_chr22.bed @@ -0,0 +1,22 @@ +l>...?/Ͽ믿￿ᆵ>믿?/Ͽ.ƒ⣯쯲".."믿.Ϯ>#(*>>ο;첊뺨.(>*€#(+>꿯> ʎ>￯꿯?:躯+3뿿̿."( +".ʺ.뿿뿿絛絛:몳:몳뿿:몳:몳?333뿿뿯3請ᄐ"뿿3뿿3뿿뿿뿿?뿿뿿밻￿밻￿뿿?밻￿밻￿뿿.<ό3̿밻￿밻￿몳밻￿⪌.ð뿿?*갻>뿿̿⪬ꮮð돺밻￿밻￿̿뿿돻/<, +ú껯(̺??;?./ +*ʨ+?/ +*ʨ*2몳*2몳*2몳*﮺2몳*﮺2몳*﮺2몳*﮺2몳̿*﮺2몳몳?̿*﮺2몳??*﮺2몳?*﮾몳*﮾몳̿??>;..<,./ + +(*﮺2몳+/:,.*0ꪮώ*﮺2몳?./ꪊ.˺?./ꪊ.˺ꫯ. +:몾ώ̿̿*﮺2몳;..<,./ + +(;..<,./ + +(ꮿ./ꪮ뿿*﮺2몳;..<,./ + +(*﮺2몳*﮺2몳*﮺2몳;..<,./ + +(*﮺2몳??./ꪊ/˺;..<,./ + (*﮺2몳̿ﻯ. +뻿θ̿ﻯ. +뻿θ̿ﻯ. +뿫Ψ̿"..껿*﮾*몳ȿȿ*﮿ȿ?隷.,..꫺.+ꈮ?꫏. +:꺫ώȿκ+#ȿ(/㺪ϼ*0(/㺪ϼ*0??뺣뺣뺣뺣Ⱦ뺣Ⱦ뺣Ⱦ뮿?뺣Ⱦȿȿȿ⿯⊏ +:꺫舿;絛?˻뿿?+;﮾>,ʏ󼯯󼯯󼯯+誯.:(+󼯯>,ʏ󼯯;;???→Ͽ; \ No newline at end of file diff --git a/tests/testthat/test_data/test_variants_chr22.bim b/tests/testthat/test_data/test_variants_chr22.bim new file mode 100644 index 00000000..1d0fce51 --- /dev/null +++ b/tests/testthat/test_data/test_variants_chr22.bim @@ -0,0 +1,349 @@ +22 chr21_17513228_C_G_c22 0 17513228 G C +22 chr21_17513580_C_T_c22 0 17513580 T C +22 chr21_17513624_T_C_c22 0 17513624 C T +22 chr21_17514447_G_A_c22 0 17514447 A G +22 chr21_17514685_G_A_c22 0 17514685 A G +22 chr21_17514813_T_C_c22 0 17514813 C T +22 chr21_17514883_C_G_c22 0 17514883 G C +22 chr21_17514886_A_T_c22 0 17514886 T A +22 chr21_17515165_C_T_c22 0 17515165 T C +22 chr21_17515283_A_G_c22 0 17515283 G A +22 chr21_17515360_T_A_c22 0 17515360 T A +22 chr21_17515408_G_A_c22 0 17515408 A G +22 chr21_17515432_A_T_c22 0 17515432 T A +22 chr21_17515602_G_A_c22 0 17515602 A G +22 chr21_17515636_C_A_c22 0 17515636 A C +22 chr21_17515808_A_C_c22 0 17515808 C A +22 chr21_17516162_C_G_c22 0 17516162 G C +22 chr21_17516223_G_A_c22 0 17516223 A G +22 chr21_17516546_T_C_c22 0 17516546 C T +22 chr21_17516815_AGT_A_c22 0 17516815 A AGT +22 chr21_17516952_GA_G_c22 0 17516952 G GA +22 chr21_17517680_A_C_c22 0 17517680 C A +22 chr21_17517683_T_TTGAAGA_c22 0 17517683 TTGAAGA T +22 chr21_17517734_A_G_c22 0 17517734 G A +22 chr21_17518122_G_A_c22 0 17518122 A G +22 chr21_17518266_C_A_c22 0 17518266 C A +22 chr21_17519634_C_T_c22 0 17519634 T C +22 chr21_17519707_T_C_c22 0 17519707 C T +22 chr21_17519734_G_C_c22 0 17519734 C G +22 chr21_17520407_T_C_c22 0 17520407 C T +22 chr21_17520648_G_A_c22 0 17520648 A G +22 chr21_17520779_T_C_c22 0 17520779 C T +22 chr21_17521022_T_G_c22 0 17521022 G T +22 chr21_17521131_A_G_c22 0 17521131 G A +22 chr21_17521375_T_A_c22 0 17521375 A T +22 chr21_17521464_A_G_c22 0 17521464 G A +22 chr21_17521559_GTGA_G_c22 0 17521559 G GTGA +22 chr21_17522133_G_C_c22 0 17522133 G C +22 chr21_17522315_AT_A_c22 0 17522315 A AT +22 chr21_17522348_A_G_c22 0 17522348 A G +22 chr21_17522480_A_G_c22 0 17522480 G A +22 chr21_17522958_A_G_c22 0 17522958 G A +22 chr21_17523579_G_A_c22 0 17523579 A G +22 chr21_17523652_T_A_c22 0 17523652 A T +22 chr21_17523753_G_A_c22 0 17523753 A G +22 chr21_17523894_T_C_c22 0 17523894 C T +22 chr21_17523928_G_A_c22 0 17523928 A G +22 chr21_17524195_A_T_c22 0 17524195 T A +22 chr21_17524490_C_T_c22 0 17524490 T C +22 chr21_17525474_C_T_c22 0 17525474 T C +22 chr21_17525608_T_A_c22 0 17525608 T A +22 chr21_17526014_T_C_c22 0 17526014 C T +22 chr21_17526279_T_C_c22 0 17526279 C T +22 chr21_17526364_A_T_c22 0 17526364 T A +22 chr21_17526577_G_T_c22 0 17526577 T G +22 chr21_17526911_A_C_c22 0 17526911 C A +22 chr21_17527800_T_TTTTA_c22 0 17527800 TTTTA T +22 chr21_17527957_A_G_c22 0 17527957 G A +22 chr21_17528363_C_T_c22 0 17528363 T C +22 chr21_17529100_G_A_c22 0 17529100 A G +22 chr21_17529502_A_G_c22 0 17529502 G A +22 chr21_17529651_G_A_c22 0 17529651 G A +22 chr21_17529955_T_C_c22 0 17529955 C T +22 chr21_17530220_C_T_c22 0 17530220 T C +22 chr21_17530224_C_G_c22 0 17530224 G C +22 chr21_17530294_AT_A_c22 0 17530294 A AT +22 chr21_17530453_G_C_c22 0 17530453 C G +22 chr21_17530755_G_A_c22 0 17530755 A G +22 chr21_17530762_C_T_c22 0 17530762 T C +22 chr21_17530820_A_C_c22 0 17530820 C A +22 chr21_17531415_A_G_c22 0 17531415 G A +22 chr21_17531536_A_G_c22 0 17531536 A G +22 chr21_17531721_A_G_c22 0 17531721 G A +22 chr21_17531779_CTT_C_c22 0 17531779 C CTT +22 chr21_17532183_C_T_c22 0 17532183 C T +22 chr21_17532261_G_C_c22 0 17532261 C G +22 chr21_17532402_ACTGT_A_c22 0 17532402 A ACTGT +22 chr21_17532474_C_T_c22 0 17532474 T C +22 chr21_17533041_G_A_c22 0 17533041 A G +22 chr21_17533080_T_A_c22 0 17533080 A T +22 chr21_17533581_G_A_c22 0 17533581 A G +22 chr21_17533587_A_G_c22 0 17533587 G A +22 chr21_17533873_G_T_c22 0 17533873 T G +22 chr21_17534211_G_A_c22 0 17534211 A G +22 chr21_17534215_C_T_c22 0 17534215 T C +22 chr21_17534571_G_T_c22 0 17534571 T G +22 chr21_17534583_G_C_c22 0 17534583 C G +22 chr21_17535659_A_C_c22 0 17535659 C A +22 chr21_17536186_A_C_c22 0 17536186 A C +22 chr21_17536232_A_G_c22 0 17536232 G A +22 chr21_17536260_T_C_c22 0 17536260 C T +22 chr21_17536503_C_A_c22 0 17536503 C A +22 chr21_17536949_G_A_c22 0 17536949 A G +22 chr21_17536995_T_A_c22 0 17536995 A T +22 chr21_17537118_A_G_c22 0 17537118 G A +22 chr21_17537728_A_G_c22 0 17537728 A G +22 chr21_17538189_T_C_c22 0 17538189 T C +22 chr21_17539918_G_GT_c22 0 17539918 GT G +22 chr21_17540894_C_T_c22 0 17540894 T C +22 chr21_17541380_G_A_c22 0 17541380 A G +22 chr21_17541396_C_CA_c22 0 17541396 CA C +22 chr21_17541499_A_G_c22 0 17541499 G A +22 chr21_17541596_C_A_c22 0 17541596 A C +22 chr21_17541774_G_C_c22 0 17541774 C G +22 chr21_17542024_ACT_A_c22 0 17542024 A ACT +22 chr21_17542052_T_A_c22 0 17542052 A T +22 chr21_17542068_A_G_c22 0 17542068 G A +22 chr21_17542147_A_G_c22 0 17542147 G A +22 chr21_17542741_A_G_c22 0 17542741 G A +22 chr21_17543019_T_C_c22 0 17543019 C T +22 chr21_17543312_C_T_c22 0 17543312 T C +22 chr21_17543494_C_T_c22 0 17543494 T C +22 chr21_17543820_G_A_c22 0 17543820 A G +22 chr21_17544714_A_G_c22 0 17544714 G A +22 chr21_17544780_C_T_c22 0 17544780 T C +22 chr21_17544865_A_G_c22 0 17544865 G A +22 chr21_17544962_CCT_C_c22 0 17544962 C CCT +22 chr21_17545007_C_T_c22 0 17545007 T C +22 chr21_17545391_T_A_c22 0 17545391 A T +22 chr21_17545666_G_A_c22 0 17545666 A G +22 chr21_17545937_A_G_c22 0 17545937 G A +22 chr21_17546068_C_T_c22 0 17546068 T C +22 chr21_17546736_C_G_c22 0 17546736 G C +22 chr21_17546737_T_G_c22 0 17546737 G T +22 chr21_17546936_A_G_c22 0 17546936 G A +22 chr21_17547434_G_A_c22 0 17547434 A G +22 chr21_17547937_C_G_c22 0 17547937 G C +22 chr21_17548085_A_C_c22 0 17548085 C A +22 chr21_17548112_A_G_c22 0 17548112 G A +22 chr21_17548319_T_C_c22 0 17548319 C T +22 chr21_17548368_C_G_c22 0 17548368 G C +22 chr21_17549420_C_A_c22 0 17549420 A C +22 chr21_17549503_G_T_c22 0 17549503 T G +22 chr21_17549944_C_T_c22 0 17549944 T C +22 chr21_17550109_C_G_c22 0 17550109 G C +22 chr21_17550454_C_T_c22 0 17550454 T C +22 chr21_17550613_A_G_c22 0 17550613 G A +22 chr21_17551097_T_TA_c22 0 17551097 TA T +22 chr21_17551331_T_C_c22 0 17551331 C T +22 chr21_17551392_CAAAAA_C_c22 0 17551392 C CAAAAA +22 chr21_17551547_A_G_c22 0 17551547 G A +22 chr21_17551702_T_G_c22 0 17551702 G T +22 chr21_17551871_A_G_c22 0 17551871 G A +22 chr21_17552019_T_C_c22 0 17552019 C T +22 chr21_17552481_T_G_c22 0 17552481 T G +22 chr21_17552573_T_TTA_c22 0 17552573 TTA T +22 chr21_17552741_G_A_c22 0 17552741 A G +22 chr21_17552956_T_A_c22 0 17552956 A T +22 chr21_17553007_C_T_c22 0 17553007 T C +22 chr21_17553033_T_C_c22 0 17553033 C T +22 chr21_17553036_C_T_c22 0 17553036 T C +22 chr21_17553048_A_G_c22 0 17553048 G A +22 chr21_17553054_C_T_c22 0 17553054 T C +22 chr21_17553066_GCA_G_c22 0 17553066 G GCA +22 chr21_17553071_C_CCT_c22 0 17553071 CCT C +22 chr21_17553072_A_G_c22 0 17553072 G A +22 chr21_17553081_C_A_c22 0 17553081 A C +22 chr21_17553184_G_A_c22 0 17553184 G A +22 chr21_17553223_G_A_c22 0 17553223 A G +22 chr21_17553605_G_C_c22 0 17553605 C G +22 chr21_17554263_T_C_c22 0 17554263 C T +22 chr21_17554291_AG_A_c22 0 17554291 A AG +22 chr21_17554488_G_C_c22 0 17554488 C G +22 chr21_17554491_G_C_c22 0 17554491 C G +22 chr21_17554492_G_C_c22 0 17554492 C G +22 chr21_17554543_C_T_c22 0 17554543 T C +22 chr21_17554567_A_G_c22 0 17554567 G A +22 chr21_17554616_T_C_c22 0 17554616 C T +22 chr21_17555515_C_T_c22 0 17555515 T C +22 chr21_17555598_A_G_c22 0 17555598 G A +22 chr21_17555632_TCTC_T_c22 0 17555632 T TCTC +22 chr21_17555658_A_G_c22 0 17555658 G A +22 chr21_17555970_C_T_c22 0 17555970 T C +22 chr21_17556326_A_G_c22 0 17556326 G A +22 chr21_17556414_T_A_c22 0 17556414 A T +22 chr21_17556432_G_A_c22 0 17556432 G A +22 chr21_17556468_GA_G_c22 0 17556468 G GA +22 chr21_17556741_T_A_c22 0 17556741 A T +22 chr21_17556778_A_T_c22 0 17556778 A T +22 chr21_17556841_A_G_c22 0 17556841 A G +22 chr21_17557016_G_A_c22 0 17557016 A G +22 chr21_17557426_C_A_c22 0 17557426 A C +22 chr21_17557522_G_A_c22 0 17557522 A G +22 chr21_17557671_G_A_c22 0 17557671 A G +22 chr21_17557752_GAA_G_c22 0 17557752 G GAA +22 chr21_17557911_G_A_c22 0 17557911 A G +22 chr21_17558115_A_G_c22 0 17558115 G A +22 chr21_17558685_ATT_A_c22 0 17558685 A ATT +22 chr21_17558747_T_G_c22 0 17558747 G T +22 chr21_17559176_C_T_c22 0 17559176 T C +22 chr21_17559641_T_A_c22 0 17559641 A T +22 chr21_17559865_C_G_c22 0 17559865 C G +22 chr21_17560034_G_A_c22 0 17560034 A G +22 chr21_17560291_A_C_c22 0 17560291 C A +22 chr21_17560332_G_T_c22 0 17560332 T G +22 chr21_17560437_G_C_c22 0 17560437 G C +22 chr21_17560616_T_A_c22 0 17560616 A T +22 chr21_17560678_G_C_c22 0 17560678 C G +22 chr21_17560918_G_T_c22 0 17560918 T G +22 chr21_17561510_A_G_c22 0 17561510 G A +22 chr21_17561586_A_G_c22 0 17561586 G A +22 chr21_17561678_C_G_c22 0 17561678 G C +22 chr21_17561761_G_A_c22 0 17561761 A G +22 chr21_17562194_CAGTG_C_c22 0 17562194 C CAGTG +22 chr21_17562221_G_A_c22 0 17562221 A G +22 chr21_17562230_G_T_c22 0 17562230 T G +22 chr21_17562335_TC_T_c22 0 17562335 T TC +22 chr21_17562965_C_T_c22 0 17562965 T C +22 chr21_17563079_C_T_c22 0 17563079 T C +22 chr21_17563155_GTTAA_G_c22 0 17563155 G GTTAA +22 chr21_17563260_G_C_c22 0 17563260 G C +22 chr21_17563353_G_C_c22 0 17563353 C G +22 chr21_17563664_G_A_c22 0 17563664 A G +22 chr21_17563757_C_T_c22 0 17563757 T C +22 chr21_17563801_T_C_c22 0 17563801 T C +22 chr21_17563829_C_T_c22 0 17563829 T C +22 chr21_17564141_T_A_c22 0 17564141 A T +22 chr21_17564224_G_A_c22 0 17564224 A G +22 chr21_17564536_A_G_c22 0 17564536 A G +22 chr21_17564596_G_C_c22 0 17564596 C G +22 chr21_17564957_A_G_c22 0 17564957 G A +22 chr21_17564994_C_G_c22 0 17564994 G C +22 chr21_17565239_A_G_c22 0 17565239 G A +22 chr21_17565440_A_G_c22 0 17565440 G A +22 chr21_17565725_T_C_c22 0 17565725 C T +22 chr21_17565762_G_T_c22 0 17565762 T G +22 chr21_17565800_ATAAGT_A_c22 0 17565800 A ATAAGT +22 chr21_17565942_T_G_c22 0 17565942 T G +22 chr21_17566167_C_T_c22 0 17566167 T C +22 chr21_17566359_T_C_c22 0 17566359 C T +22 chr21_17566449_G_A_c22 0 17566449 A G +22 chr21_17567111_T_C_c22 0 17567111 C T +22 chr21_17567128_C_T_c22 0 17567128 T C +22 chr21_17567657_G_A_c22 0 17567657 A G +22 chr21_17568012_T_C_c22 0 17568012 C T +22 chr21_17568161_G_C_c22 0 17568161 G C +22 chr21_17568206_G_T_c22 0 17568206 T G +22 chr21_17568330_G_A_c22 0 17568330 G A +22 chr21_17568406_G_A_c22 0 17568406 A G +22 chr21_17568498_CTT_C_c22 0 17568498 CTT C +22 chr21_17568582_A_G_c22 0 17568582 A G +22 chr21_17568605_G_C_c22 0 17568605 C G +22 chr21_17568743_G_C_c22 0 17568743 C G +22 chr21_17568769_G_A_c22 0 17568769 G A +22 chr21_17568797_C_G_c22 0 17568797 G C +22 chr21_17568945_A_G_c22 0 17568945 G A +22 chr21_17569383_C_T_c22 0 17569383 T C +22 chr21_17569657_G_C_c22 0 17569657 G C +22 chr21_17569787_T_C_c22 0 17569787 C T +22 chr21_17569931_G_A_c22 0 17569931 A G +22 chr21_17570136_A_T_c22 0 17570136 T A +22 chr21_17570190_G_C_c22 0 17570190 C G +22 chr21_17570414_G_A_c22 0 17570414 A G +22 chr21_17570499_T_C_c22 0 17570499 C T +22 chr21_17570510_G_C_c22 0 17570510 C G +22 chr21_17570831_A_C_c22 0 17570831 C A +22 chr21_17571153_C_G_c22 0 17571153 G C +22 chr21_17571311_A_G_c22 0 17571311 G A +22 chr21_17571318_T_C_c22 0 17571318 C T +22 chr21_17571369_G_A_c22 0 17571369 A G +22 chr21_17571794_T_A_c22 0 17571794 T A +22 chr21_17571869_G_A_c22 0 17571869 A G +22 chr21_17572000_G_A_c22 0 17572000 G A +22 chr21_17572184_A_C_c22 0 17572184 C A +22 chr21_17572242_A_G_c22 0 17572242 G A +22 chr21_17572383_A_G_c22 0 17572383 A G +22 chr21_17572543_A_G_c22 0 17572543 A G +22 chr21_17572554_CCTT_C_c22 0 17572554 C CCTT +22 chr21_17572563_G_A_c22 0 17572563 A G +22 chr21_17572571_G_A_c22 0 17572571 A G +22 chr21_17572576_A_G_c22 0 17572576 G A +22 chr21_17572717_T_C_c22 0 17572717 C T +22 chr21_17572852_G_A_c22 0 17572852 A G +22 chr21_17573059_A_G_c22 0 17573059 A G +22 chr21_17573157_C_A_c22 0 17573157 A C +22 chr21_17573190_C_CG_c22 0 17573190 CG C +22 chr21_17573533_A_G_c22 0 17573533 A G +22 chr21_17573592_C_T_c22 0 17573592 C T +22 chr21_17573619_G_A_c22 0 17573619 A G +22 chr21_17573759_A_T_c22 0 17573759 T A +22 chr21_17573916_ATTC_A_c22 0 17573916 A ATTC +22 chr21_17573921_T_G_c22 0 17573921 G T +22 chr21_17573923_A_T_c22 0 17573923 T A +22 chr21_17574064_G_C_c22 0 17574064 G C +22 chr21_17574310_T_A_c22 0 17574310 T A +22 chr21_17574383_AT_A_c22 0 17574383 AT A +22 chr21_17574409_A_T_c22 0 17574409 T A +22 chr21_17574719_G_C_c22 0 17574719 C G +22 chr21_17574752_A_G_c22 0 17574752 A G +22 chr21_17575127_C_CTCATTCTA_c22 0 17575127 C CTCATTCTA +22 chr21_17575256_C_G_c22 0 17575256 G C +22 chr21_17575514_G_A_c22 0 17575514 A G +22 chr21_17575543_T_G_c22 0 17575543 T G +22 chr21_17575643_A_G_c22 0 17575643 G A +22 chr21_17575667_A_G_c22 0 17575667 A G +22 chr21_17575695_G_A_c22 0 17575695 A G +22 chr21_17575714_T_C_c22 0 17575714 T C +22 chr21_17575787_T_G_c22 0 17575787 G T +22 chr21_17575947_G_A_c22 0 17575947 A G +22 chr21_17576576_C_T_c22 0 17576576 T C +22 chr21_17576915_C_CA_c22 0 17576915 CA C +22 chr21_17577143_C_A_c22 0 17577143 A C +22 chr21_17578144_A_G_c22 0 17578144 G A +22 chr21_17578173_A_G_c22 0 17578173 G A +22 chr21_17578198_A_G_c22 0 17578198 G A +22 chr21_17578245_T_G_c22 0 17578245 G T +22 chr21_17578336_G_A_c22 0 17578336 A G +22 chr21_17578690_A_G_c22 0 17578690 A G +22 chr21_17578700_G_A_c22 0 17578700 A G +22 chr21_17578815_C_A_c22 0 17578815 A C +22 chr21_17578893_A_G_c22 0 17578893 G A +22 chr21_17578994_T_C_c22 0 17578994 C T +22 chr21_17579244_G_A_c22 0 17579244 A G +22 chr21_17579344_G_T_c22 0 17579344 T G +22 chr21_17579604_T_C_c22 0 17579604 T C +22 chr21_17580257_G_A_c22 0 17580257 A G +22 chr21_17580756_A_G_c22 0 17580756 G A +22 chr21_17580802_G_A_c22 0 17580802 A G +22 chr21_17580832_C_T_c22 0 17580832 T C +22 chr21_17581092_T_C_c22 0 17581092 C T +22 chr21_17581880_C_T_c22 0 17581880 T C +22 chr21_17581976_C_T_c22 0 17581976 T C +22 chr21_17582085_A_G_c22 0 17582085 G A +22 chr21_17582606_C_T_c22 0 17582606 T C +22 chr21_17582730_T_C_c22 0 17582730 C T +22 chr21_17583168_A_G_c22 0 17583168 G A +22 chr21_17583287_A_G_c22 0 17583287 G A +22 chr21_17583375_C_T_c22 0 17583375 T C +22 chr21_17584190_C_T_c22 0 17584190 T C +22 chr21_17584262_G_C_c22 0 17584262 C G +22 chr21_17584440_A_T_c22 0 17584440 T A +22 chr21_17584502_A_G_c22 0 17584502 G A +22 chr21_17584577_G_A_c22 0 17584577 A G +22 chr21_17584717_G_C_c22 0 17584717 C G +22 chr21_17585152_G_T_c22 0 17585152 T G +22 chr21_17585313_A_G_c22 0 17585313 G A +22 chr21_17585496_C_T_c22 0 17585496 T C +22 chr21_17585621_G_A_c22 0 17585621 A G +22 chr21_17586078_T_G_c22 0 17586078 G T +22 chr21_17586363_G_A_c22 0 17586363 G A +22 chr21_17586771_G_A_c22 0 17586771 G A +22 chr21_17586862_G_A_c22 0 17586862 A G +22 chr21_17587003_C_T_c22 0 17587003 T C +22 chr21_17587397_C_T_c22 0 17587397 T C +22 chr21_17588743_G_A_c22 0 17588743 A G +22 chr21_17589032_TAATA_T_c22 0 17589032 T TAATA +22 chr21_17591636_A_G_c22 0 17591636 G A +22 chr21_17591962_C_T_c22 0 17591962 T C +22 chr21_17592874_G_GACTTT_c22 0 17592874 G GACTTT diff --git a/tests/testthat/test_data/test_variants_chr22.fam b/tests/testthat/test_data/test_variants_chr22.fam new file mode 100644 index 00000000..fcc168c8 --- /dev/null +++ b/tests/testthat/test_data/test_variants_chr22.fam @@ -0,0 +1,100 @@ +HG02461 HG02461 0 0 0 -9 +HG02462 HG02462 0 0 0 -9 +HG02464 HG02464 0 0 0 -9 +HG02465 HG02465 0 0 0 -9 +HG02561 HG02561 0 0 0 -9 +HG02562 HG02562 0 0 0 -9 +HG02568 HG02568 0 0 0 -9 +HG02570 HG02570 0 0 0 -9 +HG02571 HG02571 0 0 0 -9 +HG02573 HG02573 0 0 0 -9 +HG02574 HG02574 0 0 0 -9 +HG02582 HG02582 0 0 0 -9 +HG02583 HG02583 0 0 0 -9 +HG02585 HG02585 0 0 0 -9 +HG02586 HG02586 0 0 0 -9 +HG02588 HG02588 0 0 0 -9 +HG02589 HG02589 0 0 0 -9 +HG02594 HG02594 0 0 0 -9 +HG02595 HG02595 0 0 0 -9 +HG02610 HG02610 0 0 0 -9 +HG02611 HG02611 0 0 0 -9 +HG02613 HG02613 0 0 0 -9 +HG02614 HG02614 0 0 0 -9 +HG02620 HG02620 0 0 0 -9 +HG02621 HG02621 0 0 0 -9 +HG02623 HG02623 0 0 0 -9 +HG02624 HG02624 0 0 0 -9 +HG02628 HG02628 0 0 0 -9 +HG02629 HG02629 0 0 0 -9 +HG02634 HG02634 0 0 0 -9 +HG02635 HG02635 0 0 0 -9 +HG02642 HG02642 0 0 0 -9 +HG02643 HG02643 0 0 0 -9 +HG02645 HG02645 0 0 0 -9 +HG02646 HG02646 0 0 0 -9 +HG02666 HG02666 0 0 0 -9 +HG02667 HG02667 0 0 0 -9 +HG02675 HG02675 0 0 0 -9 +HG02676 HG02676 0 0 0 -9 +HG02678 HG02678 0 0 0 -9 +HG02679 HG02679 0 0 0 -9 +HG02702 HG02702 0 0 0 -9 +HG02703 HG02703 0 0 0 -9 +HG02715 HG02715 0 0 0 -9 +HG02716 HG02716 0 0 0 -9 +HG02721 HG02721 0 0 0 -9 +HG02722 HG02722 0 0 0 -9 +HG02757 HG02757 0 0 0 -9 +HG02759 HG02759 0 0 0 -9 +HG02760 HG02760 0 0 0 -9 +HG02763 HG02763 0 0 0 -9 +HG02768 HG02768 0 0 0 -9 +HG02769 HG02769 0 0 0 -9 +HG02771 HG02771 0 0 0 -9 +HG02772 HG02772 0 0 0 -9 +HG02798 HG02798 0 0 0 -9 +HG02799 HG02799 0 0 0 -9 +HG02804 HG02804 0 0 0 -9 +HG02805 HG02805 0 0 0 -9 +HG02807 HG02807 0 0 0 -9 +HG02808 HG02808 0 0 0 -9 +HG02810 HG02810 0 0 0 -9 +HG02811 HG02811 0 0 0 -9 +HG02813 HG02813 0 0 0 -9 +HG02814 HG02814 0 0 0 -9 +HG02816 HG02816 0 0 0 -9 +HG02817 HG02817 0 0 0 -9 +HG02819 HG02819 0 0 0 -9 +HG02820 HG02820 0 0 0 -9 +HG02836 HG02836 0 0 0 -9 +HG02837 HG02837 0 0 0 -9 +HG02839 HG02839 0 0 0 -9 +HG02840 HG02840 0 0 0 -9 +HG02851 HG02851 0 0 0 -9 +HG02852 HG02852 0 0 0 -9 +HG02854 HG02854 0 0 0 -9 +HG02855 HG02855 0 0 0 -9 +HG02860 HG02860 0 0 0 -9 +HG02861 HG02861 0 0 0 -9 +HG02870 HG02870 0 0 0 -9 +HG02878 HG02878 0 0 0 -9 +HG02879 HG02879 0 0 0 -9 +HG02881 HG02881 0 0 0 -9 +HG02882 HG02882 0 0 0 -9 +HG02884 HG02884 0 0 0 -9 +HG02885 HG02885 0 0 0 -9 +HG02887 HG02887 0 0 0 -9 +HG02888 HG02888 0 0 0 -9 +HG02890 HG02890 0 0 0 -9 +HG02891 HG02891 0 0 0 -9 +HG02895 HG02895 0 0 0 -9 +HG02896 HG02896 0 0 0 -9 +HG02922 HG02922 0 0 0 -9 +HG02923 HG02923 0 0 0 -9 +HG02938 HG02938 0 0 0 -9 +HG02941 HG02941 0 0 0 -9 +HG02943 HG02943 0 0 0 -9 +HG02944 HG02944 0 0 0 -9 +HG02946 HG02946 0 0 0 -9 +HG02947 HG02947 0 0 0 -9 diff --git a/tests/testthat/test_data/test_variants_chr22.gds b/tests/testthat/test_data/test_variants_chr22.gds new file mode 100644 index 0000000000000000000000000000000000000000..b2846be2f05c8a1cac7d9118c95cadf20ebccf97 GIT binary patch literal 14009 zcmdTrcU%-#w|8I$78RVqSp*aW1QmNUB35Q_2D{GS4gwKnkwq+k0)jQJ1zU_1K}AJ_ zicvAv#NJ|yC3a)fL}N`fYV4XQ>N|IK(G{J1pTGD1c>{Z=oO@3{=bn3Kw?ofxkq-~& zqee945kVk?=HSK*A2{&kc-&Z966p?f7~bP4;fR4hbQoU!#@IS4ydLg^?2(h*6J!Th zPD1)8uK1B&I1o!ar5Tfk4ly+z7$1P2S^JT2^=?0hwB(`Csew%=hxa;F!JUStqzz0T zn3CMcrmOC~t}eK1|HRakq?ELjBvYVG=SuH&u7o=qhYT?dG1ax{SmnKrx&fx-l=P89 zO}2cxly}3@t3p??3T;e=-a5xeV~b{u#Xv&$XzWFCWUa<~+HH%4s!*Rm%O9dw6Xb}w((skKqiPhaOx-HqP0areX{ z?ZrurcK6H3NSfEzdplP;h)pi_cWBU&P(n&dYX+|0a;D$2(WPx(73|n5?mh0FJG6aK zm9(KD3v1~6{_VUob&heG-RYtSp*v;qQplQrhV<>{_rsVNGI{c`m2;`9!yFP$+!@?{ zW3LknPQ418I1^a!dkhh)h_ArsdUkd`-@d8y7uaBkh>8ZOuo7*pfZg|_tgyKjMuw0Z z_Gvi&vlU`Mp{f-q=x$?$<`{z+K5$@=mx|HW3Z(i((+agVQ9s0>${vT$FqbFQ_|mZR zkEiR(W^Meu-MzRA`m)=**SuV%&z|#qx4&5OP`_wdS=X5B8%_t$p6r-^ddl-KJ|)Dn z7Aiab?be)vvgD}?H@mN>?0tMqzfJo`u60_-#Vja(vR_a29JeH=!|6@CmMt6P;q9N9 z)y-pYePw`C_Q9yUoGqu_R?h5rw?A?B!UCo0@JcykO2Ej~S#6p-mpJdbdeZK@L@p}` z&3-mXH*i8{!_ocA$L_c|u3%8t^Rpd~c|EItT5Ri9bJ)-8TP_Rg{Qca5#~s}VgdWqm zZy{zMX?AYJs%~L_FZMiKc=UDipt^BFy#Vt0=G(LEPW9jT^`)IlTc-rut%;cN9M`m=)86{`s%<{~fSNqw#nda?i;j1%nsfE~j@!>idhaEFzkQ|qc`xr34NB3S zYPaWm4qDOEu8w|UQBY~+$f|#5B_FAM?vqpBWK~Nk?bK?gr_a^7krPs<58f0y^BViT zep1`Jvlfi*b$MoAW@^ieX?=753@P3E@=*S;)Zo1%Hb1z$uHPB`lZhGZ#6?x)g)5zs z2e0fn=lChVgvTw6r-#z|pC{Cvc7vKcZOWv$j_GT?Z*4pkzoui|k_nsFciujCk@9uO z%zO94*k<23nxabFTl?;LJ>PHf6y)+-cJ>o`6S=K>!=QtiRj;@7{xa_Mx<8pukM*e5 zWpw;A`*saKB{n}U8JpSd+RHZQPwf5CJbrY{=~clr!QSgUzuvZ=GRG)S=JzqTEZVb+ zSktL+(JTM7mXE(H_TJFy#;cXY#0G|?JI@Ck8#-{HAtI|%{@2M--k9olVK)cszKDRHZ2&E4c}-nQ-&clv}^X5gn?UoIbUamSx+zqbEn`Cke3 zYt-CO_D9vxbw`Gy^%7uhSe!W8vMT=tti~L

WoAF;~j*)9n;T+RBct%YTu0H)CpULto&^4)u`?1w+gyUxmo9@p-WtX zqPq(I{BM5utF7*EF8oPwV|tiwSTm}}l9avVxhoGH*>gmxYx}W9L49Y8igXWdxc>tK?cvVT;ws~cRt^Y=MZzcjMF^Zi%LZ(lmPq3^ykRod3R9CS@LqUzRvoqvMf5Q!|f0D%_K?_+oDDQAL|x_~-u?nO)`TxU0^8W_P)KY{as=&7ag0 ztA*|8a_^9v*Pb!cPw>&JUSu@C)5=}EdDyLEkMKi3)k*)nca1!@P1%WlwWnn?jazDe zY)hMF)0>RinD)i2i0u7KBPPsQcf-B+Q4;j0+GudIS z{4#Fnd&{Zl=>{wHfQPxkM>GRfpMb3n8Fpcz@M`ybxu z@W}bg{GrAq|C|?5t#!A3BbM3G}sOt?e+s9LLj7&Gj4 z>(gP6em(e5{I=e#)55;zbBS;EXU~f>En6;S_6?j^-_L#M{;poxEvL;|wd`JLVsn~{K|Ibf|&qMJX7&pVa5$Ycy35luPiOD z9~`bPo!@>Ip;17j<5>odtLl9CQBxN#6>X-j*BFBtK5$^rEfu5f)OA9AqB(U@X?D3W z$f4?{t-I_euAcqbi)KUGHEz`L#ZQyTK8c=RRCjBB^^aNod{R$*Ub~Celn%>R&f8J6 ztjDpJg%gybHBB~M2^<~e=di~;Wpv-8YYW-LFK@~dpUu19EPTXSukXXYE!>v!!pr&f zq9JXS&O_@icsTy({FdvU?D!|H<`4oN8KoHqs^LeBLw1)~!P-51HhD`S*>%S?6bed+}NG z--ov>Dc(Hx(#j~Wtn2OOP#Z%$?FU8qR==O#_gN;&FY9%?_N)ofBgCG5Lf)bEOY0Jq zi*7$zcel~abAipey~t~PUnmQz{W8P(V&br`$Bgf~;E&Erev~ekEeyZE)Rghlylv(( z>iC?q-TNQpp2m!yIWTcuBS%q~lf3h^oKebb9x!&v<-^$%a?FSO|4>&-DOrDEshHIu zJHPA!RkhpM#trK9YjrGm?fSH*2Wo`x&L&fq1yiSh&(&8t}Y+!tE)4wb;JXa600~!tI4%mpQq?!rxYOzEtZ%Z|D0h_SEYW;~3OB zW!Gj_Dc!f*C2GTz7d>?&?msPw`*iQdCeC+TY#;9VB4z7=q_kp}*<+ZA)18uzJUf4- zZ}6=hX}hEM(fDu0KF71U!`Tzpfj#V5952wshY z=nC#<#tlRTe2%DBmexrP!$GII|DY9yxE{nugyr-x6(k@p4?`Sw<%Ma~O0H9%Xi83j zE*G|SyS9E~Y}?ye#h1tQU-NfJ^T>h&#EL_|J6}92P5Y%;;=t+lGp-0;M_vV=vj1|C zXQMkS=UwodHT{RbcRq~jTJ2)3{#j$Ycs9zLKHyyYNs?pCRuUDVdt7Z&bHn(*`!-E$JUbLG!FY~7mkJ5>m z3I1Ok{=zHo^#%Kd)7H~|ef~INt2~nUjrzg$ zm(ofJ3s3#Evuf$4F{_vCIAqE{l})Yg{OncM(6i67iyrhGY4A;HTqnNP!~VIQcJIFN=EgIz!J~;pXPoQ90BDriiaMpY7psG=1F7qsC>gYlrN6&9|_R|FPDSvJ00h zWwt)qb^p!BDTy7fOrL+`aPWfFJ~RJZ6qHi?LIHL0`q7h{SCuT9l#|iYzjX4RAG426 ztrC#uJ@OOP4{kN-ScZrz_9ww1TK1NX*m^H`19rYaxT9I^jqqNBLA|uJ04At-e}2+U7PE$&&NYAb!F%b7gi_4QCl9@tcs<43nC>o-js?O_`yT73P5<69^b;UVHk4mtu58cY(wg-*gJnw1b|2jk(IB;f>S z4i=~ci~vz28hWZD7~m5mlmMv~06GS+6eUTdfH)l`vMfz7xs;$uoYWjq0YH_bVklq@ zaRhQRlR1hH<`np484Fk32iTQZiNgYh3bdr7I0h?00xXOsS6Ez5{l}UjBt%%X%S(vn zI3(anfS}=y5D3sVr)a7~3l+Arocoek4VW=ag{nd6~;&9+9|nCg%Ac{4s0uM z0%Z5rPy|ULfN+2j4jX`~t59$TQL=Fch!x6?_a#|@rP*l2NQg2pd@d5eOg!KR0vsCWD7v0=q1*IIKV)F`g3yGD4GvCV+#W1~$|>g`nXAIKqb> z)D9H=UtkCvl#s9tvV(?z7J&YZt!M+%4aMBBA2FERO$A-$%yNQw;l9_z49M4e5pBAY|0x~>95)~we@PHfwc=fEP z(!pT^4``TQtsx~y$k)lhu;VxD3k8RaXc=TJ`Fh2l6gi-Op-CI;L>A7tZEOWWi9_IXA#HK0wS>;&@FOngCjnaA_c83$m2JgX@zBL?Sc+Q3k=y1FM8` z(udMUN{pnkL{{kpDpBPCjo8S+5DW*7iMh7sD@e9hwNoFD%Ia{um0c)kKslIQO#wA& zXaay!_{WAqlB#_W2v6iNGhnNmRWLZ8Wh%mV?{S4qhPeVPRxhcblJ~sC)#@dhsQwQ} zXzB_quX-6Gbr6PFG6Gx*&M<<9GpK~I_^t%L3#&*x1(%OdwF60$$35Q-0|mmMTtLMj ziXH+(>Znc>xDUa1qC}Ba1XBgB3~%Ct(JM3`S|}2Vj11AD%(zO7zzOPX%vrcX??%)+ zy0Rtymd5{j#9PVP3X&rV@`4T$0`MjxJl|RZpRfghlE8H>08Oqm0pL}jV!#hh2-Pth z*x?|`-qHXlCeB!CfIT^aG^@6J$9FYpXaY1{lwlQ`3-u5Qy^Po{0>Lmel+q9engC52 z@5up?&ViWkhgx|8&>W#DIarVYmBEt8L$#m)yfn3|sVhZE_==16P~!Bta=k7>lu?!qv7&$|ThkiAlgl$4izf;K z;oyXS+lfWb7BE7nheCKH5c)2HonmC<@vvyHndmDdGLlzqE?aFbu!uFNHkTEX1i`6< z>t)Q90$i~wSDJEY0w4^_3l{8GFabdAgZ;`@z;{XiysKR(NXyfb zCUMOXnp|l*gXV~LV^Ai_V{o8mrv7*u6)Js<0J0(MBxy>n=_Q&fv2yi(z56dTG$vt9 zU1_RBDNqxcza^2$Ja}Sf?1=+4+5Pwtnrhdi0TE@!Rff!QGAzd8IoQ|ohyAGv)3%K# zN_yj@T({f_dAT_yiAt17kV}|=*|3z+Rtd1hL4^f>0(>ml2XA@EY(jocghy7Ml3P%3 zdZb>FO+Lm5kut2yGW&0#DqypXkE;gQ|| z`%q$TJW;}Ck*vZiz!fA*%@J1Ocs=-Tdet29Z@T*M`a@IgZ)t!MupH%um*xmsFLPLl z1fPQppmf+;gMDRCq9!V>X##1=p-EhGgeGy#5t;x^;vY{#GsysrcRUfvITlZ>NSu_H z{)hO`49<$|!kXVwQ)@b0MML3*Al|Y7G0N9C7B6{&3PuPL>}BLisD$NUvz1e5-!MyQ zpaJ035wdlJf#nRWFOCl;B0}QB3Kc@hg=L?TYjl`%7aArO1Hx}y$yjw4hqOd32}P07 zwGPxrh>caNaLQ7JQ&5Ft07j*M9~I#t7myUtU@;AzmN}WEU^!{v0kkswbT~Rf;)>mJ^Y~oFU{IoRS+{sSRUDDPGkSw)31l0)S_J(wvU}j*O z%n&O%K_a8nM5Y%FKE?!)BQ{Y&BJV3Y*~z3{pHC2a7M50XVXGOA4|2t@5(QI*x{G6= z;t7t%FBt@miJC3Sm7;R|-9?28^$9J2HP3Vr_SvH}0U)4dT?@3{DV^T{xC#IqA73#{ zC@(7pR*WP_y+2J9N4W9-XMj3dsQv{2(?k^FoJjCb*&xy^vKyC((lKhZPjO; zej!4j>VtpSFj2KD0lPG2SHg)*QzatuFvZ3@iN&h7Q|{pHv8DvK1P%rPOCUqV7_7+d zr1I?qbtjduY_Wp3!xk$8@zDw1N4*u$suS35!y6E?fznfEcPE`5l8h{IjL7-J7B@}+ z<#`vl09JqzYYNBjh~a~s1=11L8vJvJ0IRZ`9twZFQ-r_wfj6FUPEy}0t0ORNNOS54 zFgqxbKs?^9h4h25Ln#A!2*K-ASb1W!ET#jAk3+IRAc2EKhs-o7(D|gmO#(P%K%Rk@ zui$7j?G8ziPzc%$Y6*eipgMpJas+RWfhv{(%LJ%sc;O2Cs&%@$bp`;q0|3y(-SEq9 z4!aKCSA!RR@EG$9o$v|XTtPmjp%3iOgS3L89(W)g0Vn*z3osBQ zG%!L3V<>nF2)eRb7$A1IzEo-0sWc!WX?RcJ1BL`I;xXeu1V;K`egqn_x&}a#2IEFk z5-s!NIj1~Hru9^;LWaAF0kOyx#xRKIIbEnEf$zYpXxWv}`)C3{Z3Yr1Uu;D%`djT{ zAvm>khPPXA0h5YqC}c&&y`^D5m_E+?^Nm87Wdv!%l>xY- z^ggN_jA)TVqV;5~0p#dhF2@H(U?#1;3m34n3*0C{p0Nhngg$4 zXh;x~;B||dCcuQ60FDC;GOb1cNlSngJd5Pvl@QM&Mv-_O;`JnB=43lcK%z{Bfk}aq z8Wx}>d~oUxE3P6qi3T~$I$ok#Uy?JB3ImdZIsq^A0wJKg>I^v1!97X1A`apRh#}rg zIDkyD50uwhBMh{i4@WViF9@ip0`m9`7eRglh>;mL2;pD0%u%l_?VBF~*}+5WkAN^E z2>Ge4YIM%1(Ef#dz%9!^6Ot-q}Kz&BddX&yO-H|{^gn3m>JDc+cF zY@Cp4OfpTuNX@uG2$LPAURhd*cH&=u8WXQ}Mkw5R{?9+kz$4VJ-hkc8zj}iQr{Knn z8;dVRsaLk&|Kijqn&1CI{gDHFT`u`6aRlyo_`uXcM0m%4*Tj8iPH3Ss7#)O(ismfe z{+Wtz&A73o&Q$fv(qgF{{`CvJIVv`U;5Rzz@eyLX!T_7Ey=CI@X52t{!^fyz*^1ps ueWEEgk&PNG*J&a8N literal 0 HcmV?d00001 diff --git a/tests/testthat/test_data/test_variants_chr22.pgen b/tests/testthat/test_data/test_variants_chr22.pgen new file mode 100644 index 0000000000000000000000000000000000000000..c50569041f2d6974428cb10a2924c13ee86032b4 GIT binary patch literal 6364 zcmbVQU2No56+Y+sdSXw0#@Df@Dzr#c@WuiX;(^B^5KkO?GP}t* z0~JojV~@|zcYg1=e*RAFvk1@tfW&5Wrb8Hx=Lk>G;pmR8=>owK0(6Zo1qufp`lbUN zM?-<_8-bWI5TQ;Vx)PeuM@L5)acD?CF~_!bTNfJ86CpH>&VWE{n0^2mB9PEgE#;(V zAZ*7T9Tj#;?a_~^0^u0+A$h5~R5N{*=sa8+mJBgmqU#dFQu-LJlAwQKVJdQ%=!R2;M4R#qlNK)0M?`A0 zg$Y82M*5h3Ae+*IBp4P6AwDB>RFPy`MN5)MaLQ)sgiI%r%#2hx-HI{+E)ixzlHO+D zq&~I6!i)I-`(Cz9dP*!Wtse=$Td0OM=dseRM@e=|UM2 zrIJoQ(5*7+CBrIPW%41}R5HjO1Mw$H0v_yzm6-(aA%-xw1i)llp#UlPB7+q9i}w(^obsUr#tb$|%O`YhqxuqHu+Z z)?n3|zsRe;QNa;o)Q*Cx8LWWs@|fJKjvI0zbioH#;j&?9aQ8-HfH=&#lu;Z8+%%(*RhG_k9!x)D9faK&dT~Vz z=y$0yq2$JH!D6$nex_*zwbmQ%CBC9I%&qF=`Q_!;t3848k%i-mkwewS9~1ub8UBlA zypO*dE$`)moL!Vz-G+hd+v`^nC_M3iD5Ax3JUL)_iX)v&A}*8hoFjn zi$rNQTo|)6!S88*YWl--kG#F>+}AG;?J`L#J&53(P1fH?7@Lg*Rk=95V&BBSU6bdF z2}sryn?&UTz~Ju+^>uX`7@nhEXrJAR?Qi0Q4Z$iwZMqy znyK1kJyRP+g!Fo<3OqG6S0ZyiG9v|Ii&T!dysa{as;49hIO!bRE4YdBNvcZ};82y$ zQBNf>cxT!o+OnY|y9;#3y}@nw=2-qunp~Ke!YCpFl$JtE)+9saB7y&)P`|Efz}UH> zgc*s3+ImZEleBgJ>)LI6aWx1mUvNd9lPMnP&B;Na#eTEnwws;25N51-GnWI5Bd9(V zL_!aR@A@vbDQ_IepuC#@k0!%k-p5Zf)#=2p!DhHyPx%L2l$j7<)t92iz%u5F_BUTf0M!g_@2|E&n{D7J3EaU9H!_$@;T7UWfXDGC;X*)vAG z-pngt^%EQK-%Li?uc;|W<*8YAl!n`$>sSo!Bd+=d%N2$qTUnY>*#}3Kea=Bg*b&Fe z+wN0VykxebiO2kf#VD%Q1D|HGwt+*9G-nHGFVm>u9;{%~*b@GX6{EF`=3_pK7e%x} zEA)QvI*};U94_K;m$-bXZm4Nb1ucBLC!{SQCx_O#A|K1JJnEb_}-QnuZbnU z4|hh|(iXO|Hau2o$0La68v_=Te>qUXI%F}*6qq1zTRlMn?t;nRnb_xiXxAqb|#J5 zb2$(noigw8oG`4&4!rz_C$79A9!x+k?XrQ7QyO*0Jv0|Dw92j3&DO@wv++XHk9NfP zC7KI*8wfC$w1*+j1&EC0iyJj-Z8L6dO}sR3#!=H=tOcUC08*KFg_d{D+lZbuHYT%Bs#*NsE34<6Vk^8Gphw=!=b(8!T!w(HR zJw2C8zuuHzeR*=L#lQW>@UGRD=q@^1TxK0m^;5Y~%#JY@ta^b7z32)xEEgcc|#|l5-K& zYN30j+}iY7akUyRw2X4Yl&{p5Jz-XZwC2$Z;i9=5xgtWv^%LPz>DEJ)N>|QG*9@xm z;s_1sj@mGQ5&w9wBKnjzkr)=eUJ8P+kBgV z?K}G~2|M+f17SqT7eSXeVkFAMW9a4Gw#tkuaM?W=EZJ*bU_>xE%a~so^3y znBK7Hu6=BOXoWPJ#tvmuzZBO@3ALgr>q9c_7~UrRdvCdGbB_>4s;I+81YkAm4K=uu zMN+O%BbW`*{fkG|@UCyLBY6_2%{l@K0lP1p)VRoRr2 zimI5vF$!G~xMMzq?j%Ci-V@-R*C;JfRzGdxM7W%^u$ zGCePn5!ba1I?pH}*lN08MQNA0b&k&Jjgf8I&(5NIwQ+TVA0Ju>nKV3Q&`ajCKDUlF z!YgavLLNoQ*}SwcKinbm#86~ zwP8Bj@}xmZy#%TbAz9(vN;ef=aCO(DZ)1WHXz$W^~ z6r3d+b8Mol$)Of`E3Ud3Mw%(tV}4E##_QdA|UL{Ot+L-d>}EDLds_flK0S9 z;5fzva*g4q$qDOlgCoq`1*`TXreS@h@Q>Hvo9B|jAHI2SkvqF1>RqUK_Z8sx>@sr4Yx!emLe@yO+h`aJ2NGYYC;n(SSVmh-Sw{*t%T6X|C>3UX zfzKA6Z`xF5AjR5lHjOK)ZFoI@&RFs*urQ}@nvLw0r-j*jd~`+L!yV6pVykz~-nYzg zNsRb6wWhzlxt(?MM&9q%Z7(ilwLRRN@y-wSdYvE@g_^t#e8t-aGiihdxaT*#1%2G> zbp!Opxu;9I*31+9`h{k9>C!l;X{vkneNSqx#C1#S6X3J!YpWu+R#isILb^j|u6246 z7Ud_+I`x-Fx!-yznVSaHEbl88ICP@_IM?Ypwgs@JZ#f=0E_r>dr)l0(Jh~rBB3)ou z%nEq@d~xbr`DAl(fOI42X>&bqjyT$@&)f8-655TbS`*d2qR}tZ-@2u!XB|5Jw(vv%7vGGw%ZI~-F3HMkZ zn^8P31xX1+12K|pm_OZ0_-M@+2qzwseI?!?-Z!RVLbJzG8Kb`1{4HGEG>`>5$(-3dPEftrERy!6u0cM=MJ>A#T&kTQ=$9YP&-%GfjWF znwxyMRYXt7MNhuE30V804hcI)O-)nd{$-?ohivCj;cykoYQ57FN(zKA6hGRIsInU3 z8x!r$7-EILlEcj!5+*`Mml=tFD<0L@{mg^W#9vznx<09z;AVuL;(7WdkPD0a(>~d8 z_plu!3bX79&+elU&lPY{nRQrwM5X52i}P1_G?-8q}o2Wq&TvwJ7u8&uGiBdMD72 zr0ItYi@xKVT#4316F6d1hjoelQdR;CT2MC)_spPY%9GFh=U@fLViz+oP-%wr=?C%^ zR)H58(ixr3mCh@5J>pJF(f3J#;&H`~Cb7=@%Y!zs>$l)0*^u673`D*bmiedV!Uy!U6r;E3oT=a` zJ~qlBoA(3S^~KK6y4j|g#OtrWm(}g{{Wa`IuV6AUV0MtmcZ^;PDhD*`@$T^FsPK+e z>NT!Jej}pTW49RaFaI>~9&ait|W8UnZMnjYzwHbkw4c?eTO`jG!w@Ho^hF|@zvz`_Yo3)ypoGJSr+EAUXtk$g-?HF?AFUk>JQuyFp&PCihVutN6k6W*hk3l|(zl4FMK z#X=z?3^@@UlyANdE<KXDCLTk{{u%9g5toS@QFM>+2$-I#0~DCqp2w|k2pqA^d&!rTtxXHBKM^= z8X=lGV_Y&aDJmiXtKRt7N+r?BxESoRbRbL8_-n5x`&gHg(0Ws=YKT<*CPdI|&nwl@ zUP;JOyx1@;powJkbAAcMyOWx3c2Eeq)}uwgtdN(SMmYar{7rX(^lR^@!d^3nW=Xj( zV4%P9!{unS3Bo&x=rG2{eNs@qG#b)WZ>%M0l8uRu{ln4p>pFt=O|qonwDhWV`6LEG{+Iv-cEvjozqeNOF@zetR=}5w40n8vB|#Z2d-eMe-dqdAizLHqB~;NdbMXt zq=>73Xo+k1=cd{3W127lq};Tbkmo@b`Z-Yic^g(O#@JML-qGq3V@+ha1I`WEXg|`} zXF8sNy(GBD>Uk>oSw{@{8LF+(?NV2Ch2DA`_vGQ} zR!qEfCLOLdn1_p0vHDr>n(HHnpdoo#K8t3lSs*xmf?%#N=xN>B0ad> zz?|Di5c7d0Sfl8T3Fd(Evg5PS5L@}7kd#Th*qZK9^cy)z_g@4y=MlYA?Kll+NngL$@}6JpodHM?axM!>gJ!?@Dr zCSHP7e7OEelRPs~Nqik`AEPIih?&ZB_S5dW<{sk7gv`snqVqK#N;B4tbB?^T#eYgX9q_3J7K z6qkC>=)P3ZhTlX4Y$b_gt2w}#S%)*EiD#aCf^5hl(;d);JU;9xEo+u`4kr9{N+TOHH*N0yY0w7X=X0nk=5$ z0jFFq9lOpny#}5={doxsVtaP~xV7?o-u~}KPcovCWQ=sk7w9nP6#b6OU%JL9lWm{^ ztJo3WB*sBzw0#VEF=H#@Ew9}Dni1dM__fIfKT(P_3x*P1`rOP$zxVPbrDy*kDGayZ zdkTa~iKxDmAq!LXQx$Z!|MGrs?APqp3pC7&O9t)(=VLkX%4;zN;rs6xXM!-JyWH(} ze=p1@+PF0{8>gpZkxno@$}i3x<8M8zdxOWYkF-*_Pb-*4XrsQMOQH9Rh|hYe56X4!zRfq=%A|MS8YEeq3~R?Dvk=M1gHo6N zgEV_+ZqEtvXX*~**Mah9Jc$9gt2_<5z8V#RZbGsAebS+|`$f3-lIiCC&~6Mx@LF&I zPCk1=t;{-fHA71yaUH7$I^qp&76$F3U$m1iSn%HjWZDT!n^~F)CLEgtKh~m(9bmNf1 zZ|+w&s$*JaWNf0t#+8K>78B<%$xHY3`S1&zZx7`{Gw5#10~+5U@MO*s7@dhqH(?PY zTuF@OM4M8xCA91F+Y>GPcmBSeBJU*ch%?tE_bi@eON{R4?7hfW73g(I5kS_pcCqZ# z@V;AH{kxfnfvqSA?g;9D3&V0g5LcL!M&Gku)4$SRwb}5b@g!5UsHf%m4eeHR-2J)C z*{;CHGLyGn6H1{{OG*lu z|F09`UFKKkG!g@ubd0qAZSWZ}iqN0UW#t2Vfh@X%C&J+<(Z_B$gLBQExMy_Q^gOR9{dcgj-Jiqs z8`BZC%9CMT-xX~+0Zh?i$?Y`nL%G>U87oz(VkO93sUYaP-8d_*ibn7x)0i7oZ!TP? zNs2ujK!+O=)0+6XT$%ok8Fro#u_d8tYd4bRv`?Shbt%};$l3c*EqEePP(nfy@;7!o z^6XtwE>F1iQxSd2jIe~Xd;UvUJQe5C7d!Dc8{fQf)3iTK=?{2-t%}goS0;O8In34i z3c|t;6vbKgb-J9BsI2F_BtlRh(+4R1??dJ#tLEddZKuRvec3RS#HkjevkCbuih8X* zYJBy}uF3uhwY}a?El19I45gfBJSfL6TT7*IE2vp;Q5XcV@i)G_D6Om|6Q?(n~oDjc#Yf_egPAM-?Wm`5E}q)2wL{kTKscF&v7hB|m=K z8P~vxbqj%#jKs2qe|Wyir1sB@;t+|&8rBL8&Em#c^QPNdl~ZbL(fx`-PNUSBqbPZz zxZ4In8Ix%n9K}(@7-~k4L!c$wdUYgEk}mAWmZ$c z?1yo879OFPZMlr^Y>)o9zBc)444+`J7crmCl`3U(-a(A5{mP7sNJ=G_g^%$r{6y~4tluP-<%YoUV zem+>G?+c%ys9e6`azA=;?NzZnkerH`i?VH3`=Q%*& zX}VxTKMMMC8HD@yYAMJGq}LBGn4|t2?b%R??Ot?MO=REs1;b4^3Hl-(w>;doMawZZ zt0i12&tKoMeP&>~O%`MS(e+lnaStQi@q(bS{!7u8Qge)}jBN+{V(O+Fvv~#DUAg%2Uoi5Oua3e#!d~c<+!l3U1MPtE3 zZ$EgN`zqjkO5zrn&EEZ{!NNIGkD@g^J81C|HdPQ0$GW?zPh{|-> z1gz2$9$_p7-tywYNZ-Loxfu z^%z|ae-mA$fQ@~tX@&!7YQ??{ACmrdXp!CeQA2FiJL_>zhhB_X0 zPUr*E2m;YIjse{-|N7bGx=dN3u}x{{E7peYy9m7pgEhp!8pphD1AD&>oX9KampNNy z?TumYp~|D!QiRCrEo~_3tfbZYh0g^C?{qmx3Gp>8e zN!LxS;%(9aRQvf!q`u?&j)Tu_{~<4;Xa(zj@+K$u(`6yl zHcd-1Y|%?L%44BuAj;LM9rcc+WC<|XOzc_zR*dL;wq(VyjF(MFdt|? zSr3xRfr!@vY~{uX<9~6gw=HJ|kMk+-Eb^v+p|g@epMAUXZq1zMe=_3gbD|Rx*!^Pn z#YL1OM!-CZ+Hu&c#h{1-?NsTlif^ML=8hYTe;sIx0vj9|?CW$hG7th<(1M-wtH-C; z_`MN-zt3qRJC?nP#o_g6I$u(|PT|Dzn(=j^of>)gsmy%cl)Mlpuaj_0tl18wmM#;I zv>*w+((d7%a(2|36^4PSAO#n}5O)(WK`YmLW^z9@`!_($zd;{(005f*>x8h9@W8Ms z{)Z@Tj1`S#fS&<&upyOOW9@5|zswXzw`lrZN)!WM%u_Q{`K|C^m>hfevnJNrJKW7< z#))c+Up&?7iiaf{4{;`hJ%Tq5jvnsnJ1*=$?0GERA2!yXAKYJz)CaU8_m&*tQv{ld z2rCw4T;`22GOYCLb(coG^q7#@XaJXA9SD0m-zeq*&^WR~@KwA&LUW>@S^w3Rk`ecfAQKY-?hfb^< z`eqAI=ZlAl5Si8rzh1UDHQRav84*IH^HN3`copnz7ML&l8#QU#fqY= z9tiKL03*oiz3qM6Y`tmGMt0ZA)Wn^wkN^Nd<0;}-<8uRD4S*;uQI~lf>*0zK^Yd%q zQK>dEgtG|{(;)~6;07QP!J!pmha)0a4aU`@!%~rJX;SG3$@Gea`dg$3-qEEUQ~W1? z!Eh?H%BJCvHqt=;tPuXX+a468VFP^vMI&%G1(;e!J{^>4&i^tZY5q##){lLmlgle{ zHbIjLc+4uxs z)_M$=Ut1?t-_*|7`j#&aM*^ti8UQ4Eer+vgkiLoDmB~|eJZ$j1BseV-z%`V!S{_9~ ze)&r5X#_PF6@%?%7WLE>y8;b?pazqsu7qWoD?@2!^ZCM;EQ%yNy7D@^)x)ADb&4-V zN$`U4?yG-+f->!C zhP=Z4ae-?}N|FZPTf(qk)s^&oz9^eeGi__}TJ*<4C?WjZG zkud|W9(nL;ihre~Zx`05V`7wL^?588%VKY;P!@spZHaASZVa*&kBif`p3TbdAZ7lkGEC382tn}h10Bs`s~Vxd`IuBY zWcH_jjj_D zHs(dQ)k#f{9f0F#V08i*ycNemM9e53MBq_*T?E5|E*=q0##3iqZck3sFH&=R22h$aZH-AT}!+t z;WDXq0^R@ZD5BQrdjJ8$CT+tb2iqDdGeI( zg27lX;KK;J)wx}z-W{QV`MlC41R*)e+%|MHw5Ki|@qPW_<|$ct3va{o`uU!Ox#w#Z zq5V7KVmCw_Re-1+=WW@cN*U@n9s{Q}I_FyS@g*t&C zPVGN3AuCSab!6A!r@IPybBd*nYIQ~}7C!0rRlt##_0xvmsN^5WjDi$2!f<{=2)OLH9Wj;F;i%W- zJXWYI;!hFM+~8UF9i{!oL+@tk$j1-|?zNZgMSx}|+!d8$&wKohdMJ)m2oS9_8o;gp zpZMon3lA=qBT{^x>izMnU%*vAC&j#C^{FHuG1&`Ut#gM*c4HuAnf;# zdimbTZ+4N*dgqTgTKqFdMqT!=sEN=RLxH0E?7C84x^AsJX)dsE^y8uns7%|Qb**}z zGfE-$qR)P8Yx9aP(VE+##X9E~a+}u2!-r2bw zBMyRX>wEK__khBgR;}~GO`1oqDt2t3(=QgV5m7hq-6R%s z1po5^e*WCYvL?wSQ5Rmhur&KMg*e9mRfse>Co+rH%i!Z{E$S*ywJSxlFl#uQxIyw6 zh_2-sHhRKd`ED~bBgN zw(_IDq3?sa=0+tw^xx#SlJl+Qwe`0RtH{U zPNmuEje1qQz{5mXJiPTaM{3qE(mouK^16;JK25(xX0)fBGzEVVrD0|0RS2nn<0JBU z=PPVyIUK%kWUYK5^Te|gLz?<^yU6opax7v`z@asp!xPBDN zE9lfb2>=Jex``2zYlY%@OsCm^!39m1>?0fX!SVkI@97ijCsY`k3yywt{5XD(l-}k} z%SUAhdZsZk0Q)eOgt?KgZOEcu{!flV3w-|`2;I<&_%G-7tj(@p|4tq}gdi+T0j`8w z4Fh~nlIxA*kUB*#O?%0mYTwMU&Ou}U7szw+{k z*Q<>*Y+afsIe$-QO51J!nFDqV`mz8&Q0V1?EK-yPNVp4qe_MzAxQ=tUz4cOW)Mz}o3rnTaj{uQ zEuHgkHv^ym7~WxXb1Z)Nuqhw4xQn6R_-*?H+D3yQ@l(!(T7IL8UFLeGQ2OfU%O@F&iU$;+~jyNdr=Ee@3C!Bgf zoi_QMa4*;`Z30Qq(c1GNuSq_~tWTV1M3ROowJzEPiJGj>T|IIgk{_ zXj-_Xh6?eUtPk}}aMkq|%tS3!p#l29lnEK@FfS)Vv^r>~*LY4rS)>9Dw#W!DaaNkb z5kNhEg?t zsC@y?`;8Vz{}L@`tzsd8l;Lpi2E$64bFw_X1IuH0)M{op=hH7MHw4+0D0a)9%e|4flwR=uy7)f%C^7%Vss1x<<)Uv2Yp`(o=ELoI{5C3|jRrIArlbWSk zV^&tcKAC{Nc5C8wP}S7>XFUnfJ}c68Zt;f!P62=r)8~-~3e28q4AG;NG`t z^`2)WNV~Uo+)F(~W6?}Mva|-HR`t}O%VFp3C!BRZf?t(K8t6T(A6wbW4+d~gtK+md zb~r8HnzF>fWf0aoCNK%e6ev`s~36(m?wS;6k5wFw9aM zn{QlfG_Wrc=yxv3#whh5fQ`4YET!#IoN5zlu28u>_EW`@ z48K7O6!X!s*_PVX$o=mZBgufdYA^OJ5g%GmJ=QN#e^%7t+Yjhydg|zCI47rR9 zRDFW|pQbS1gy1dIG2+3>uY=#ii8EV`X&AJ*oF%}*Fp%&8>Df7{`1f{^r|tbhqvlP5 zBgLZ7)>pY?(#W3EllFDHXkA)_ke&RTjs$d6Rg}*oC-O8?Q)!(Cg1} zxCSYymX%lVLuTl{VIy$ql;JzP;)iQjWJlbz$zum^0Zc{~Q)^%Kd#XSeH&a5zG^QP0 zn-|$LV}5~$`y7FxT$_)k0{{40{KE*I<~Ob$zj)XE$~IMDiKB>|-`sFqmKT_FV3RN* zvWqZq zZhhs&S2ANI*R(yf3%3_%(jFk5FF<#lu8`p65aXo6jS#?D+5E%zVL4tl^h0F8d@^!w zFTvi%Kj8cznn>iV{0W&E8xYm^Cn?h|t3)zlB!LA>4o<@`6U64z2o5U)oVF)@ve%o@ zV)Jm&+>KVyl6@&-+vdt^!5Tj!0v4ShvM_y%OaEuK>b1bMuI?_wuyBUC1TWLRX&y z{N^-f6l4SFhIRJL{v312Qlx|~w#c0JuRRy`5jSc}0yrbmjR>vPtbG>l*3bs57X9n! z^Z_MJASL_{yjm=j(@K#VN%_vKx^6|L+3gv#tv;|HndiII7IShVGV1bckCH1_u~{}g zSoySna!&Y{#;7(BRscAS!U-}FF4U!LZ*$-jc9OU8ZpaQ1PNR{TgAwpD4>8*H`*&?> zzK@((IT ziYYXd{&vbL3-jWMkP0~NR9I>HMmZE0DyC*1XMTiZqM9?%I_`%v^{IJX%^}v?i9MMYG;|p0yAupmG1}&Unj- z4G+ZuR8bfUnbC04$?h4Q?OX*@3D(7&AXra9KpG)}sllk&iNbWVIieBVP;$0UVXUk( z6i^q$HidpF~j=1M5{pr{BDBYo~jA0w=l<}19_g7LdsoM+w3 zW(>A+dNnv)4ir4{BL1{xbi`O%wrg!2$}v=aLo_ zUVm%$<{O2L1|$g0f6TOQibeXX&O4YkqAKH>eCobtKn()T()IDi7-JTon?4_buw+1L zD-#iZq6Y4VW~UjD_U+m3o4R^n2KP}Ds8;JUA+@bvddrUkD#2!wxF*=bcWYnfLL;}W zyhL>E+S6@>>FRrjfj0`*9s6?Mp9d^^O^)%$Oh}BH46X!Xi`+>h7Cx@r<|SgL)AGCI z5nP``HXk7-zj7mZQOb6c+iuZQwPR>)bw%=+suZpn1qy;`fG7UZTRx8(VU<$}&$;R0 z6jX8Mb7ETkob{<)YJy~%0BO`%8;(iE9~i;X7R;2}jkMB_ml~Mm1ac)oEPv_HNH!p%IIb0b%*~TaPfVJy+YMN z1DRZ2sx-S5k1kZB&^qah-7?iePL`dOHRRYP~QgHEkaRdvHO4j%uxW4f3NcY)BG>}V44{K literal 0 HcmV?d00001 diff --git a/tests/testthat/test_data/test_variants_chr22.vcf.gz.tbi b/tests/testthat/test_data/test_variants_chr22.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..3e2fc92eff66b57f7b2730d224edba63f3084400 GIT binary patch literal 207 zcmb2|=3rp}f&Xj_PR>jWn;72STFcAiDB}8X^9D{PZw&^aH)`3176mE_FPrusy}&%r zZv{i|a?vAaPPd6hAC&z3^j_s7(GO*-O9L606!idGx=RM*`#~G1@BbRyihWan(gZEdTE^`Y-1Ha$i gYbeRLxh38ArLN#iaAmy`1A{!8lcgD$!9D;H0JsuS_ labels", { + expect_equal(pecotmr:::.fmCsIdx(c("susie_0", "susie_1", "susie_2")), + c(0L, 1L, 2L)) + expect_equal(pecotmr:::.fmRelabelCs(c("susie_0", "susie_1", "susie_2"), 3L), + c("susie_0", "susie_4", "susie_5")) + # offset 0 is a no-op; the "_0" not-in-CS sentinel is always preserved. + expect_equal(pecotmr:::.fmRelabelCs(c("susie_0", "susie_1"), 0L), + c("susie_0", "susie_1")) +}) + +test_that(".fmMergeEntries concatenates variants, renumbers CS, lists susieFit", { + mk <- function(vids, cs95, fit) FineMappingEntry( + variantIds = vids, susieFit = fit, + topLoci = data.frame(variant_id = vids, pip = rep(0.5, length(vids)), + cs_95 = cs95, stringsAsFactors = FALSE)) + e1 <- mk(c("a", "b"), c("susie_1", "susie_0"), list(tag = "f1")) + e2 <- mk(c("c", "d"), c("susie_1", "susie_1"), list(tag = "f2")) + m <- pecotmr:::.fmMergeEntries(list(e1, e2)) + expect_s4_class(m, "FineMappingEntry") + expect_equal(m@variantIds, c("a", "b", "c", "d")) + expect_equal(as.character(m@topLoci$variant_id), c("a", "b", "c", "d")) + # e1's max CS index is 1, so e2's "susie_1" is renumbered to "susie_2". + expect_equal(m@topLoci$cs_95, c("susie_1", "susie_0", "susie_2", "susie_2")) + expect_true(is.list(m@susieFit)) + expect_equal(names(m@susieFit), c("region1", "region2")) +}) + +test_that(".fmMergeEntries returns a single entry unchanged", { + e <- FineMappingEntry(variantIds = "a", susieFit = list(), + topLoci = data.frame(variant_id = "a", pip = 0.5)) + expect_identical(pecotmr:::.fmMergeEntries(list(e)), e) +}) + +test_that("fineMappingPipeline(QtlDataset): region + cisWindow is rejected", { + qd <- .fmp_makeQtlDataset() + expect_error( + fineMappingPipeline( + qd, methods = "susie", + region = GenomicRanges::GRanges("chr1", IRanges::IRanges(1, 600)), + cisWindow = 1000L), + "either `region` or `cisWindow`") +}) + +test_that("fineMappingPipeline(QtlDataset): jointRegions=FALSE merges per-region fits", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 350L), c(350L, 650L))) + res <- suppressMessages(fineMappingPipeline( + qd, methods = "susie", traitId = "ENSG_A", + region = regions, jointRegions = FALSE, addSusieInf = FALSE)) + expect_s4_class(res, "QtlFineMappingResult") + # 1 ctx x 1 trait x 1 method -> a single merged row, not one per region. + expect_equal(nrow(res), 1L) + fit <- getSusieFit(res, study = "study1", context = "brain", + trait = "ENSG_A", method = "susie") + expect_equal(names(fit), c("region1", "region2")) # per-region fit list +}) + +test_that("fineMappingPipeline(QtlDataset): jointRegions=TRUE fits one concatenated block", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 350L), c(350L, 650L))) + res <- suppressMessages(fineMappingPipeline( + qd, methods = "susie", traitId = "ENSG_A", + region = regions, jointRegions = TRUE, addSusieInf = FALSE)) + expect_equal(nrow(res), 1L) + fit <- getSusieFit(res, study = "study1", context = "brain", + trait = "ENSG_A", method = "susie") + # one concatenated fit -> the mock fit object, not a per-region list. + expect_false(identical(names(fit), c("region1", "region2"))) +}) + +test_that("fineMappingPipeline(QtlDataset): mvsusie jointRegions=FALSE merges per-region fits", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie = .fmp_mockMvsusie(), + create_mixture_prior = .fmp_mockMixturePrior(), + .package = "mvsusieR") + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 350L), c(350L, 650L))) + res <- suppressMessages(fineMappingPipeline( + qd, methods = "mvsusie", traitId = c("ENSG_A", "ENSG_B"), + region = regions, jointRegions = FALSE)) + expect_equal(nrow(res), 2L) # joint fit fanned out to both traits + fit <- getSusieFit(res, study = "study1", context = "brain", + trait = "ENSG_A", method = "mvsusie") + expect_equal(names(fit), c("region1", "region2")) # per-region merged fit list +}) + +test_that("fineMappingPipeline(QtlDataset): fsusie jointRegions=FALSE merges per-region fits", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings(susiF = .fmp_mockSusiF(), .package = "fsusieR") + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 350L), c(350L, 650L))) + res <- suppressMessages(fineMappingPipeline( + qd, methods = "fsusie", traitId = c("ENSG_A", "ENSG_B"), + region = regions, jointRegions = FALSE)) + expect_equal(nrow(res), 2L) + fit <- getSusieFit(res, study = "study1", context = "brain", + trait = "ENSG_A", method = "fsusie") + expect_equal(names(fit), c("region1", "region2")) +}) + +test_that("fineMappingPipeline(QtlDataset): jointSpec + jointRegions=FALSE merges per region", { + qd <- .fmp_makeQtlDataset(contexts = c("brain", "liver"), + traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie = .fmp_mockMvsusie(), + create_mixture_prior = .fmp_mockMixturePrior(), + .package = "mvsusieR") + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 350L), c(350L, 650L))) + res <- suppressMessages(fineMappingPipeline( + qd, methods = "mvsusie", traitId = c("ENSG_A", "ENSG_B"), + region = regions, jointRegions = FALSE, jointSpecification = "context")) + expect_s4_class(res, "QtlFineMappingResult") + # cross-context joint -> one row per trait, context collapses to "joint". + expect_equal(nrow(res), 2L) + expect_true(all(as.character(res$context) == "joint")) + fit <- getSusieFit(res, study = "study1", context = "joint", + trait = "ENSG_A", method = "mvsusie") + expect_equal(names(fit), c("region1", "region2")) # merged across regions +}) diff --git a/tests/testthat/test_genotypeHandle.R b/tests/testthat/test_genotypeHandle.R index 35a3c2c5..5a318d00 100644 --- a/tests/testthat/test_genotypeHandle.R +++ b/tests/testthat/test_genotypeHandle.R @@ -328,4 +328,70 @@ test_that("GenotypeHandle rejects invalid format", { ) }) +# =========================================================================== +# One-file-per-chromosome (sharded) handle: constructor + validation. +# genoMeta accepts a #chr,path meta file or a named chrom->path vector. +# Extraction routing for sharded handles is covered in test_genotypeIo.R. +# =========================================================================== +test_that("genoMeta (named vector) builds a sharded handle", { + skip_if_not_installed("snpStats") + h <- GenotypeHandle(genoMeta = c( + "21" = file.path(test_data_dir, "test_variants"), + "22" = file.path(test_data_dir, "test_variants_chr22"))) + expect_s4_class(h, "GenotypeHandle") + expect_equal(h@format, "plink1") + expect_equal(sort(names(h@chromPaths)), c("21", "22")) + expect_equal(nrow(h@snpInfo), 2L * 349L) +}) + +test_that("genoMeta meta-file resolves payloads relative to its own directory", { + skip_if_not_installed("snpStats") + # A meta file living in test_data referencing payloads by basename only. + metafile <- file.path(test_data_dir, "tmp_chrom_meta_relative.tsv") + on.exit(unlink(metafile), add = TRUE) + writeLines(c("#chr\tpath", "21\ttest_variants", "22\ttest_variants_chr22"), + metafile) + h <- GenotypeHandle(genoMeta = metafile) + expect_equal(sort(names(h@chromPaths)), c("21", "22")) + expect_equal(nrow(h@snpInfo), 2L * 349L) +}) + +test_that("genoMeta errors on mismatched samples across shards", { + skip_if_not_installed("snpStats") + # protocol_example.genotype is a different sample panel. + expect_error( + GenotypeHandle(genoMeta = c( + "21" = file.path(test_data_dir, "test_variants"), + "22" = file.path(test_data_dir, "protocol_example.genotype"))), + "identical sample IDs") +}) + +test_that("genoMeta errors on mixed formats across shards", { + skip_if_not_installed("snpStats") + skip_if_not_installed("SNPRelate") + expect_error( + GenotypeHandle(genoMeta = c( + "21" = file.path(test_data_dir, "test_variants"), + "22" = file.path(test_data_dir, "test_variants_chr22.gds"))), + "share one") +}) + +test_that("genoMeta errors when a chromosome appears in two files", { + skip_if_not_installed("snpStats") + expect_error( + GenotypeHandle(genoMeta = c( + "a" = file.path(test_data_dir, "test_variants"), + "b" = file.path(test_data_dir, "test_variants"))), + "more than one") +}) + +test_that("genoMeta sharded handle show() reports the layout", { + skip_if_not_installed("snpStats") + sh <- GenotypeHandle(genoMeta = c( + "21" = file.path(test_data_dir, "test_variants"), + "22" = file.path(test_data_dir, "test_variants_chr22"))) + out <- paste(capture.output(show(sh)), collapse = "\n") + expect_match(out, "per-chromosome files") +}) + diff --git a/tests/testthat/test_genotypeIo.R b/tests/testthat/test_genotypeIo.R index 0212f721..ac1be263 100644 --- a/tests/testthat/test_genotypeIo.R +++ b/tests/testthat/test_genotypeIo.R @@ -1300,4 +1300,108 @@ test_that("extractBlockGenotypes returns SummarizedExperiment", { ) } +# =========================================================================== +# One-file-per-chromosome (sharded) handle: extraction routing. +# extractBlockGenotypes() routes each region to its per-chromosome payload and +# assembles cross-chromosome requests. Fixtures: chr21 test_variants.* + chr22 +# test_variants_chr22.* (same 100 samples; CHR relabelled 22, SNP ids "_c22"), +# available in all four backends. +# =========================================================================== +.shardDose <- function(h, idx) + unname(as.matrix(SummarizedExperiment::assay( + extractBlockGenotypes(h, idx), "dosage"))) + +# Per-backend single-file constructor + per-chromosome genoMeta payloads. +.shardBackends <- list( + plink1 = list(pkg = "snpStats", + ref = function(sfx) GenotypeHandle( + plink1Prefix = file.path(test_data_dir, paste0("test_variants", sfx))), + p21 = file.path(test_data_dir, "test_variants"), + p22 = file.path(test_data_dir, "test_variants_chr22")), + # PLINK2 payloads carry the .pgen extension because the fixtures also ship a + # .bed at the same stem; an explicit extension (or format=) disambiguates. + plink2 = list(pkg = "pgenlibr", + ref = function(sfx) GenotypeHandle( + plink2Prefix = file.path(test_data_dir, paste0("test_variants", sfx))), + p21 = file.path(test_data_dir, "test_variants.pgen"), + p22 = file.path(test_data_dir, "test_variants_chr22.pgen")), + vcf = list(pkg = "VariantAnnotation", + ref = function(sfx) GenotypeHandle( + path = file.path(test_data_dir, paste0("test_variants", sfx, ".vcf.gz"))), + p21 = file.path(test_data_dir, "test_variants.vcf.gz"), + p22 = file.path(test_data_dir, "test_variants_chr22.vcf.gz")), + gds = list(pkg = "SNPRelate", + ref = function(sfx) GenotypeHandle( + path = file.path(test_data_dir, paste0("test_variants", sfx, ".gds"))), + p21 = file.path(test_data_dir, "test_variants.gds"), + p22 = file.path(test_data_dir, "test_variants_chr22.gds")) +) + +# Register the per-backend routing tests for one backend spec. +.shardRoutingTests <- function(spec, label) { + test_that(sprintf("[%s] sharded handle routes per chromosome", label), { + skip_if_not_installed(spec$pkg) + ref21 <- spec$ref("") + ref22 <- spec$ref("_chr22") + shard <- GenotypeHandle(genoMeta = c("21" = spec$p21, "22" = spec$p22)) + expect_s4_class(shard, "GenotypeHandle") + expect_equal(shard@format, ref21@format) + expect_equal(sort(names(shard@chromPaths)), c("21", "22")) + n21 <- nrow(ref21@snpInfo) + n22 <- nrow(ref22@snpInfo) + expect_equal(nrow(shard@snpInfo), n21 + n22) + # A chr21 request routes to the chr21 payload (== single-file chr21). + expect_equal(.shardDose(shard, 1:5), .shardDose(ref21, 1:5)) + # A chr22 request routes to the chr22 payload, and the global->local index + # conversion lines up with the single-file chr22 handle. (PLINK2 is + # positional, so this is the key check for that backend.) + expect_equal(.shardDose(shard, (n21 + 1):(n21 + 5)), .shardDose(ref22, 1:5)) + }) + + test_that(sprintf("[%s] cross-shard request assembles both chromosomes", label), { + skip_if_not_installed(spec$pkg) + ref21 <- spec$ref("") + ref22 <- spec$ref("_chr22") + shard <- GenotypeHandle(genoMeta = c("21" = spec$p21, "22" = spec$p22)) + n21 <- nrow(ref21@snpInfo) + em <- extractBlockGenotypes(shard, c(1L, 2L, n21 + 1L, n21 + 2L)) + dm <- unname(as.matrix(SummarizedExperiment::assay(em, "dosage"))) + expect_equal(nrow(dm), 4L) + expect_equal( + as.character(GenomeInfoDb::seqnames( + SummarizedExperiment::rowRanges(em))), + c("chr21", "chr21", "chr22", "chr22")) + # Requested order preserved; each half matches its single-file source. + expect_equal(dm[1:2, ], .shardDose(ref21, 1:2)) + expect_equal(dm[3:4, ], .shardDose(ref22, 1:2)) + }) +} + +for (.bk in names(.shardBackends)) .shardRoutingTests(.shardBackends[[.bk]], .bk) + +test_that("single-shard sharded handle equals the single-file handle", { + skip_if_not_installed("snpStats") + ref <- GenotypeHandle(plink1Prefix = file.path(test_data_dir, "test_variants")) + sh <- GenotypeHandle(genoMeta = c("21" = file.path(test_data_dir, "test_variants"))) + expect_equal(length(sh@chromPaths), 1L) + expect_equal(nrow(sh@snpInfo), nrow(ref@snpInfo)) + expect_equal(.shardDose(sh, 1:10), .shardDose(ref, 1:10)) +}) + +test_that("genoMeta meta-file form matches the named-vector form", { + skip_if_not_installed("snpStats") + td_abs <- normalizePath(test_data_dir) + metafile <- tempfile(fileext = ".tsv") + writeLines(c("#chr\tpath", + paste0("21\t", file.path(td_abs, "test_variants")), + paste0("22\t", file.path(td_abs, "test_variants_chr22"))), + metafile) + hFile <- GenotypeHandle(genoMeta = metafile) + hVec <- GenotypeHandle(genoMeta = c("21" = file.path(td_abs, "test_variants"), + "22" = file.path(td_abs, "test_variants_chr22"))) + expect_equal(nrow(hFile@snpInfo), nrow(hVec@snpInfo)) + expect_equal(sort(names(hFile@chromPaths)), sort(names(hVec@chromPaths))) + expect_equal(.shardDose(hFile, 1:5), .shardDose(hVec, 1:5)) +}) + diff --git a/tests/testthat/test_twasWeightsPipeline.R b/tests/testthat/test_twasWeightsPipeline.R index c33c87b3..8cd5963d 100644 --- a/tests/testthat/test_twasWeightsPipeline.R +++ b/tests/testthat/test_twasWeightsPipeline.R @@ -2012,3 +2012,116 @@ test_that(".twasWeightsPipelineMatrix: empirical pi from mr.ash gets propagated" expect_true("empiricalPi" %in% names(res)) expect_equal(as.numeric(res$empiricalPi), 1 - 0.8, tolerance = 1e-12) }) + +# =========================================================================== +# Multi-region / jointRegions (P3) +# =========================================================================== + +test_that(".twasRegionLabel / .twasFitsForRegion select per-region fits", { + rg <- GenomicRanges::GRanges("chr1", IRanges::IRanges(100, 350)) + expect_equal(pecotmr:::.twasRegionLabel(rg), "chr1:100-350") + expect_equal(pecotmr:::.twasRegionLabel(NULL), "cis") + fits <- list(susie = list(region1 = "f1", region2 = "f2")) + expect_equal(pecotmr:::.twasFitsForRegion(fits, 2L, 2L), list(susie = "f2")) + # single block returns the fits unchanged + expect_identical(pecotmr:::.twasFitsForRegion(fits, 1L, 1L), fits) + # a non-region-list fit cannot align to a block and is dropped + expect_length( + pecotmr:::.twasFitsForRegion(list(susie = list(alpha = 1, pip = 2)), 1L, 2L), + 0L) +}) + +test_that(".twasMergeRegionEntries stacks weights and builds a flat per-region CV df", { + mk <- function(vids, w, rsq) TwasWeightsEntry( + variantIds = vids, weights = w, + cvPerformance = list(samplePartition = NULL, predictions = NULL, + metrics = c(rsq = rsq, pval = 0.01))) + e1 <- mk(c("v1", "v2"), c(0.1, 0.2), 0.3) + e2 <- mk(c("v3", "v4"), c(0.3, 0.4), 0.5) + m <- pecotmr:::.twasMergeRegionEntries( + list(e1, e2), c("chr1:1-100", "chr1:200-300")) + expect_s4_class(m, "TwasWeightsEntry") + expect_equal(getVariantIds(m), c("v1", "v2", "v3", "v4")) + expect_equal(unname(getWeights(m)), c(0.1, 0.2, 0.3, 0.4)) + cv <- getCvPerformance(m) + expect_s3_class(cv, "data.frame") + expect_equal(cv$region, c("chr1:1-100", "chr1:200-300")) + expect_equal(cv$rsq, c(0.3, 0.5)) + expect_equal(names(getFits(m)), c("chr1:1-100", "chr1:200-300")) +}) + +test_that(".twasMergeRegionEntries returns a single entry unchanged", { + e <- TwasWeightsEntry(variantIds = "v1", weights = 0.5) + expect_identical( + pecotmr:::.twasMergeRegionEntries(list(e), "chr1:1-100"), e) +}) + +test_that("twasWeightsPipeline(QtlDataset): region + cisWindow is rejected", { + qd <- .tp_makeQtlDataset(traits = "ENSG_A") + expect_error( + twasWeightsPipeline( + qd, methods = list(lasso_weights = list()), + region = GenomicRanges::GRanges("chr1", IRanges::IRanges(1, 2000)), + cisWindow = 1000L), + "either `region` or `cisWindow`") +}) + +test_that("twasWeightsPipeline(QtlDataset): jointRegions=FALSE concatenates per-region weights", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockIndividualWeights(), list(.package = "pecotmr"))) + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 1050L), c(1050L, 2050L))) + res <- suppressMessages(twasWeightsPipeline( + qd, methods = list(lasso_weights = list()), + traitId = "ENSG_A", region = regions, jointRegions = FALSE, + cvFolds = 0, ensemble = FALSE, estimatePi = FALSE, verbose = 0)) + expect_s4_class(res, "TwasWeights") + # 1 ctx x 1 trait x 1 method -> a single merged row. + expect_equal(nrow(res), 1L) + w <- getWeights(res, study = "study1", context = "brain", + trait = "ENSG_A", method = "lasso") + # union of both sub-ranges (v1..v10 + v11..v20). + expect_equal(length(w), 20L) +}) + +test_that("twasWeightsPipeline(QtlDataset): jointRegions=TRUE fits one concatenated block", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockIndividualWeights(), list(.package = "pecotmr"))) + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 1050L), c(1050L, 2050L))) + res <- suppressMessages(twasWeightsPipeline( + qd, methods = list(lasso_weights = list()), + traitId = "ENSG_A", region = regions, jointRegions = TRUE, + cvFolds = 0, ensemble = FALSE, estimatePi = FALSE, verbose = 0)) + expect_equal(nrow(res), 1L) + w <- getWeights(res, study = "study1", context = "brain", + trait = "ENSG_A", method = "lasso") + expect_equal(length(w), 20L) +}) + +test_that("twasWeightsPipeline(QtlDataset): mr.mash jointRegions=FALSE concatenates per-region weights", { + qd <- .tp_makeQtlDataset(contexts = c("brain", "liver"), + traits = c("ENSG_A", "ENSG_B")) + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor(), + mrmashWeights = function(X, Y, ...) + matrix(0, nrow = ncol(X), ncol = ncol(Y), + dimnames = list(colnames(X), colnames(Y)))), + list(.package = "pecotmr"))) + regions <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(c(50L, 1050L), c(1050L, 2050L))) + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + qd, methods = "mrmash", traitId = c("ENSG_A", "ENSG_B"), + region = regions, jointRegions = FALSE, + cvFolds = 0, ensemble = FALSE, estimatePi = FALSE, verbose = 0))) + expect_s4_class(res, "TwasWeights") + # 2 contexts x 2 traits = 4 rows, each with weights concatenated over regions. + expect_equal(nrow(res), 4L) + w <- getWeights(res, study = "study1", context = "brain", + trait = "ENSG_A", method = "mrmash") + expect_equal(NROW(w), 20L) +}) From 7741ae40d9fe44c1879897a210d2c104db842200 Mon Sep 17 00:00:00 2001 From: danielnachun Date: Tue, 23 Jun 2026 22:26:10 +0000 Subject: [PATCH 2/2] Update documentation --- man/FineMappingEntry.Rd | 2 +- man/fineMappingPipeline.Rd | 10 ++++++++++ man/krigingOutlierQc.Rd | 31 +++++++++++++------------------ man/twasWeightsPipeline.Rd | 1 + 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/man/FineMappingEntry.Rd b/man/FineMappingEntry.Rd index acbf0e39..75069044 100644 --- a/man/FineMappingEntry.Rd +++ b/man/FineMappingEntry.Rd @@ -14,7 +14,7 @@ the pipeline's \code{trim} parameter).} \item{topLoci}{Per-variant \code{data.frame} in canonical schema: identity columns (\code{variant_id, chrom, pos, A1, A2}), context -(\code{N, MAF}), marginal columns (\code{marginal_beta, +(\code{N, af}; effect-allele frequency, never MAF), marginal columns (\code{marginal_beta, marginal_se, marginal_z, marginal_p}), posterior columns (\code{pip, posterior_mean, posterior_sd, cs_*, cs_*_purity}), pipeline stamps (\code{method, gene, event, grange_start, diff --git a/man/fineMappingPipeline.Rd b/man/fineMappingPipeline.Rd index fdd53ef0..379ba9c1 100644 --- a/man/fineMappingPipeline.Rd +++ b/man/fineMappingPipeline.Rd @@ -25,6 +25,7 @@ fineMappingPipeline(data, ...) secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, minAbsCorr = 0.8, + medianAbsCorr = NULL, fineMappingResult = NULL, naAction = c("drop", "impute"), verbose = 1, @@ -43,12 +44,14 @@ fineMappingPipeline(data, ...) traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, addSusieInf = TRUE, coverage = 0.95, secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, minAbsCorr = 0.8, + medianAbsCorr = NULL, fineMappingResult = NULL, naAction = c("drop", "impute"), verbose = 1, @@ -71,6 +74,7 @@ fineMappingPipeline(data, ...) secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, minAbsCorr = 0.8, + medianAbsCorr = NULL, fineMappingResult = NULL, verbose = 1, trim = TRUE, @@ -85,6 +89,7 @@ fineMappingPipeline(data, ...) secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, minAbsCorr = 0.8, + medianAbsCorr = NULL, fineMappingResult = NULL, verbose = 1, trim = TRUE, @@ -166,6 +171,11 @@ Default \code{0.95}.} \item{minAbsCorr}{Minimum absolute correlation for credible-set purity. Default \code{0.8}.} +\item{medianAbsCorr}{Optional median absolute correlation for +credible-set purity, routed to \code{susieR::susie_get_cs}. A set is +kept if it passes either \code{minAbsCorr} or \code{medianAbsCorr} +(OR-logic). Default \code{NULL} (off).} + \item{fineMappingResult}{Optional existing \code{FineMappingResult} to use as a resume cache; tuples already present are not refit.} diff --git a/man/krigingOutlierQc.Rd b/man/krigingOutlierQc.Rd index 4f464036..f389d508 100644 --- a/man/krigingOutlierQc.Rd +++ b/man/krigingOutlierQc.Rd @@ -4,26 +4,20 @@ \alias{krigingOutlierQc} \title{Kriging-style LD-consistency outlier QC} \usage{ -krigingOutlierQc( - zScore, - R, - variantIds = NULL, - pThreshold = 5e-08, - ridge = 0.001 -) +krigingOutlierQc(zScore, R, n, variantIds = NULL, pThreshold = 5e-08) } \arguments{ \item{zScore}{Numeric vector of harmonized z-scores.} \item{R}{Square LD correlation matrix aligned to \code{zScore}.} +\item{n}{Sample size, forwarded to \code{susieR::estimate_s_rss()} and +\code{susieR::kriging_rss()}.} + \item{variantIds}{Optional variant IDs for the diagnostics table.} \item{pThreshold}{Two-sided p-value cutoff for flagging an outlier (default \code{5e-8}).} - -\item{ridge}{Small diagonal added to \code{R} before inversion for numerical -stability (default \code{1e-3}).} } \value{ A list with \code{outlier} (logical vector) and \code{diagnostics} @@ -32,12 +26,13 @@ A list with \code{outlier} (logical vector) and \code{diagnostics} } \description{ Flags variants whose observed z-score is inconsistent with the value -predicted from its LD neighbours. For \code{z ~ N(0, R)} the leave-one-out -conditional distribution of \code{z_i} given the rest has mean -\code{-(1/Omega_ii) * Omega_{i,-i} z_{-i}} and variance \code{1/Omega_ii}, -where \code{Omega = R^{-1}}. The standardized residual is ~\code{N(0,1)} when -the z-scores and LD are mutually consistent, so a large residual marks an -allele-flip / LD-mismatch outlier. RSS-only helper, opt-in via -\code{alleleFlipKriging}; never wired into \code{alleleQc()} / -\code{matchRefPanel()}. +predicted from its LD neighbours, using susieR's kriging diagnostic. +\code{susieR::kriging_rss()} computes the leave-one-out conditional +distribution of each \code{z_i} given the rest, with the LD-mismatch scale +\code{s} estimated by \code{susieR::estimate_s_rss()}; its standardized +residual (\code{z_std_diff}) is ~\code{N(0,1)} when z-scores and LD agree, so +a large residual marks an allele-flip / LD-mismatch outlier. RSS-only helper, +opt-in via \code{alleleFlipKriging}; never wired into \code{alleleQc()} / +\code{matchRefPanel()}. Requires a susieR that provides \code{kriging_rss()} +and \code{estimate_s_rss()}. } diff --git a/man/twasWeightsPipeline.Rd b/man/twasWeightsPipeline.Rd index a777a8d8..2678b759 100644 --- a/man/twasWeightsPipeline.Rd +++ b/man/twasWeightsPipeline.Rd @@ -61,6 +61,7 @@ twasWeightsPipeline(data, ...) traitId = NULL, region = NULL, cisWindow = NULL, + jointRegions = FALSE, jointSpecification = NULL, fineMappingResult = NULL, twasWeights = NULL,