diff --git a/modules/nf-core/gcta/addgrms/environment.yml b/modules/nf-core/gcta/addgrms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/addgrms/main.nf b/modules/nf-core/gcta/addgrms/main.nf new file mode 100644 index 000000000000..9148ce4ef9f2 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/main.nf @@ -0,0 +1,38 @@ +process GCTA_ADDGRMS { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(mgrm_file), path(grm_files) + + output: + tuple val(meta), path("*.grm.*"), emit: combined_grm + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + gcta \\ + --mgrm ${mgrm_file} \\ + --make-grm \\ + --out ${prefix} \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.grm.id + touch ${prefix}.grm.bin + touch ${prefix}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/addgrms/meta.yml b/modules/nf-core/gcta/addgrms/meta.yml new file mode 100644 index 000000000000..a39563ee1ec4 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/meta.yml @@ -0,0 +1,75 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_addgrms" +description: Combine multiple GRMs listed in an MGRM manifest into a single dense GRM +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" + +input: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - mgrm_file: + type: file + description: MGRM manifest listing the GRM prefixes to combine + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + +output: + combined_grm: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "*.grm.*": + type: file + description: Combined dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test b/modules/nf-core/gcta/addgrms/tests/main.nf.test new file mode 100644 index 000000000000..cb5927caf816 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test @@ -0,0 +1,162 @@ +nextflow_process { + + name "Test Process GCTA_ADDGRMS" + script "../main.nf" + process "GCTA_ADDGRMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/addgrms" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated_ldms1.mbfile').text = 'plink_simulated\\n' + + def ldms1BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + def ldms1ExtractSnps = ldms1BimFile.readLines() + .take(10) + .collect { row -> row.trim().split(/\\s+/)[1] } + .join('\\n') + '\\n' + file('plink_simulated_ldms1.snps.txt').text = ldms1ExtractSnps + + input[0] = [ + [ id:'plink_simulated_ldms1' ], + 1, + 1, + file('plink_simulated_ldms1.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + ldms1BimFile + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'plink_simulated_ldms1' ], file('plink_simulated_ldms1.snps.txt')] + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated_ldms2.mbfile').text = 'plink_simulated\\n' + + def ldms2BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + def ldms2ExtractSnps = ldms2BimFile.readLines() + .drop(10) + .take(10) + .collect { row -> row.trim().split(/\\s+/)[1] } + .join('\\n') + '\\n' + file('plink_simulated_ldms2.snps.txt').text = ldms2ExtractSnps + + input[0] = [ + [ id:'plink_simulated_ldms2' ], + 1, + 1, + file('plink_simulated_ldms2.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + ldms2BimFile + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'plink_simulated_ldms2' ], file('plink_simulated_ldms2.snps.txt')] + """ + } + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm") { + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files } + .collect() + .map { bundles -> bundles.flatten().sort { it.name } } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.combined_grm.size() == 1 }, + { assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" }, + { + assert process.out.combined_grm.get(0).get(1).collect { file(it).name }.toSet() == [ + 'plink_simulated_ldms.grm.id', + 'plink_simulated_ldms.grm.bin', + 'plink_simulated_ldms.grm.N.bin' + ] as Set + }, + { + assert snapshot( + process.out.combined_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm - stub") { + options "-stub" + + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files } + .collect() + .map { bundles -> bundles.flatten().sort { it.name } } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.combined_grm.size() == 1 }, + { assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" }, + { + assert snapshot( + process.out.combined_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap new file mode 100644 index 000000000000..d93d9255ef2f --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap @@ -0,0 +1,62 @@ +{ + "homo_sapiens popgen - merge dense GRMs from mgrm - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated_ldms" + }, + [ + "plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:10:46.231316108" + }, + "homo_sapiens popgen - merge dense GRMs from mgrm": { + "content": [ + [ + [ + { + "id": "plink_simulated_ldms" + }, + [ + "plink_simulated_ldms.grm.N.bin:md5,804f8e1799c8b2d4d3df1b52a2a463c6", + "plink_simulated_ldms.grm.bin:md5,850235911329bf9ab68f03e25bbc1ef1", + "plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T22:52:43.953267272" + } +} \ No newline at end of file