From 5962c65702768ba7f3dcad04c19b9d976beff57e Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 21 May 2026 21:39:52 +0800 Subject: [PATCH 1/3] Add gcta/addgrms module --- modules/nf-core/gcta/addgrms/environment.yml | 7 + modules/nf-core/gcta/addgrms/main.nf | 40 +++++ modules/nf-core/gcta/addgrms/meta.yml | 75 ++++++++ .../nf-core/gcta/addgrms/tests/main.nf.test | 165 ++++++++++++++++++ .../gcta/addgrms/tests/main.nf.test.snap | 62 +++++++ 5 files changed, 349 insertions(+) create mode 100644 modules/nf-core/gcta/addgrms/environment.yml create mode 100644 modules/nf-core/gcta/addgrms/main.nf create mode 100644 modules/nf-core/gcta/addgrms/meta.yml create mode 100644 modules/nf-core/gcta/addgrms/tests/main.nf.test create mode 100644 modules/nf-core/gcta/addgrms/tests/main.nf.test.snap diff --git a/modules/nf-core/gcta/addgrms/environment.yml b/modules/nf-core/gcta/addgrms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/addgrms/main.nf b/modules/nf-core/gcta/addgrms/main.nf new file mode 100644 index 000000000000..1f4458c85597 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/main.nf @@ -0,0 +1,40 @@ +process GCTA_ADDGRMS { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(mgrm_file), path(grm_files) + + output: + tuple val(meta), path("*.grm.*"), emit: combined_grm + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def extra_args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + + gcta \\ + --mgrm ${mgrm_file} \\ + --make-grm \\ + --out ${prefix} \\ + --thread-num ${task.cpus} \\ + ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.grm.id + touch ${prefix}.grm.bin + touch ${prefix}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/addgrms/meta.yml b/modules/nf-core/gcta/addgrms/meta.yml new file mode 100644 index 000000000000..a39563ee1ec4 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/meta.yml @@ -0,0 +1,75 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_addgrms" +description: Combine multiple GRMs listed in an MGRM manifest into a single dense GRM +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" + +input: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - mgrm_file: + type: file + description: MGRM manifest listing the GRM prefixes to combine + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + +output: + combined_grm: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "*.grm.*": + type: file + description: Combined dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test b/modules/nf-core/gcta/addgrms/tests/main.nf.test new file mode 100644 index 000000000000..dde6c8060e03 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test @@ -0,0 +1,165 @@ +nextflow_process { + + name "Test Process GCTA_ADDGRMS" + script "../main.nf" + process "GCTA_ADDGRMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/addgrms" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated_ldms1.mbfile').text = 'plink_simulated\\n' + + def ldms1BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + def ldms1ExtractSnps = ldms1BimFile.readLines() + .take(10) + .collect { row -> row.trim().split(/\\s+/)[1] } + .join('\\n') + '\\n' + file('plink_simulated_ldms1.snps.txt').text = ldms1ExtractSnps + + input[0] = [ + [ id:'plink_simulated_ldms1' ], + 1, + 1, + file('plink_simulated_ldms1.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + ldms1BimFile + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'plink_simulated_ldms1' ], file('plink_simulated_ldms1.snps.txt')] + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated_ldms2.mbfile').text = 'plink_simulated\\n' + + def ldms2BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + def ldms2ExtractSnps = ldms2BimFile.readLines() + .drop(10) + .take(10) + .collect { row -> row.trim().split(/\\s+/)[1] } + .join('\\n') + '\\n' + file('plink_simulated_ldms2.snps.txt').text = ldms2ExtractSnps + + input[0] = [ + [ id:'plink_simulated_ldms2' ], + 1, + 1, + file('plink_simulated_ldms2.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + ldms2BimFile + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'plink_simulated_ldms2' ], file('plink_simulated_ldms2.snps.txt')] + """ + } + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm") { + config "./nextflow.config" + + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files } + .collect() + .map { bundles -> bundles.flatten().sort { it.name } } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.combined_grm.size() == 1 }, + { assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" }, + { + assert process.out.combined_grm.get(0).get(1).collect { file(it).name }.toSet() == [ + 'plink_simulated_ldms.grm.id', + 'plink_simulated_ldms.grm.bin', + 'plink_simulated_ldms.grm.N.bin' + ] as Set + }, + { + assert snapshot( + process.out.combined_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files } + .collect() + .map { bundles -> bundles.flatten().sort { it.name } } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.combined_grm.size() == 1 }, + { assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" }, + { + assert snapshot( + process.out.combined_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap new file mode 100644 index 000000000000..d93d9255ef2f --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap @@ -0,0 +1,62 @@ +{ + "homo_sapiens popgen - merge dense GRMs from mgrm - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated_ldms" + }, + [ + "plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:10:46.231316108" + }, + "homo_sapiens popgen - merge dense GRMs from mgrm": { + "content": [ + [ + [ + { + "id": "plink_simulated_ldms" + }, + [ + "plink_simulated_ldms.grm.N.bin:md5,804f8e1799c8b2d4d3df1b52a2a463c6", + "plink_simulated_ldms.grm.bin:md5,850235911329bf9ab68f03e25bbc1ef1", + "plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T22:52:43.953267272" + } +} \ No newline at end of file From 0f36baf5fa076c63654bcbcba5b4d8aa9b06a27f Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 23 May 2026 23:51:27 +0800 Subject: [PATCH 2/3] Use apptainer-aware container syntax for gcta/addgrms --- modules/nf-core/gcta/addgrms/main.nf | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/gcta/addgrms/main.nf b/modules/nf-core/gcta/addgrms/main.nf index 1f4458c85597..9148ce4ef9f2 100644 --- a/modules/nf-core/gcta/addgrms/main.nf +++ b/modules/nf-core/gcta/addgrms/main.nf @@ -2,7 +2,7 @@ process GCTA_ADDGRMS { tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" @@ -17,17 +17,15 @@ process GCTA_ADDGRMS { task.ext.when == null || task.ext.when script: - def extra_args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - """ - gcta \\ --mgrm ${mgrm_file} \\ --make-grm \\ --out ${prefix} \\ --thread-num ${task.cpus} \\ - ${extra_args} + ${args} """ stub: From 9ecb62d2c786437019f786f5432d4a46bf494fcc Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 23 May 2026 23:52:09 +0800 Subject: [PATCH 3/3] Tidy gcta/addgrms script and test config --- modules/nf-core/gcta/addgrms/tests/main.nf.test | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test b/modules/nf-core/gcta/addgrms/tests/main.nf.test index dde6c8060e03..cb5927caf816 100644 --- a/modules/nf-core/gcta/addgrms/tests/main.nf.test +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test @@ -80,8 +80,6 @@ nextflow_process { } test("homo_sapiens popgen - merge dense GRMs from mgrm") { - config "./nextflow.config" - when { process { """ @@ -126,7 +124,6 @@ nextflow_process { test("homo_sapiens popgen - merge dense GRMs from mgrm - stub") { options "-stub" - config "./nextflow.config" when { process {