nf-core · juliaapolonio · Mar 26, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/modules/nf-core/gcta/gsmr/environment.yml b/modules/nf-core/gcta/gsmr/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::gcta=1.94.1
diff --git a/modules/nf-core/gcta/gsmr/main.nf b/modules/nf-core/gcta/gsmr/main.nf
@@ -0,0 +1,50 @@
+process GCTA_GSMR {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0':
+        'biocontainers/gcta:1.94.1--h9ee0642_0' }"
-        'biocontainers/gcta:1.94.1--h9ee0642_0' }"
+        'quay.io/biocontainers/gcta:1.94.1--h9ee0642_0' }"
-        'biocontainers/gcta:1.94.1--h9ee0642_0' }"
+        'quay.io/biocontainers/gcta:1.94.1--h9ee0642_0' }"
+
+    input:
+    tuple val(meta) , path(exposure)
+    tuple val(meta2), path(outcome)
+    path(reference)
+
+    output:
+    tuple val(meta), val(meta2), path("*.log")          , emit: log
+    tuple val(meta), val(meta2), path("*.gsmr")         , emit: gsmr
+    tuple val(meta), val(meta2), path("*.eff_plot.gz")  , emit: eff_plot, optional: true
+    tuple val(meta), val(meta2), path("*.mono.badsnps"), emit: mono_badsnps, optional: true
+    tuple val("${task.process}"), val('gcta'), eval('gcta 2>&1 | grep -oE "v[0-9]+\\.[0-9]+\\.[0-9]+" | sed \'s/v//\''), emit: versions_gcta, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}_${meta2.id}"
+    """
+    echo "${meta.id} ${exposure}" > ${meta.id}.input.txt
+    echo "${meta2.id} ${outcome}" > ${meta2.id} .outcome.txt
-    echo "${meta2.id} ${outcome}" > ${meta2.id} .outcome.txt
+    echo "${meta2.id} ${outcome}" > ${meta2.id}.outcome.txt
-    echo "${meta2.id} ${outcome}" > ${meta2.id} .outcome.txt
+    echo "${meta2.id} ${outcome}" > ${meta2.id}.outcome.txt
+    file=\$(ls $reference | sed 's/\\.[^.]*\$//')
+    echo "${reference}/\$file" | head -n1 > reference.txt
+
+    gcta  \\
+        $args \\
+        --mbfile reference.txt  \\
+        --gsmr-file ${meta.id}.input.txt ${meta2.id}.outcome.txt \\
+        --out "${prefix}"
+    """
+
+    stub:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}_${meta2.id}"
+    """
+    touch ${prefix}.log
+    touch ${prefix}.gsmr
+    touch ${prefix}.mono.badsnps
+    echo | gzip > ${prefix}.eff_plot.gz
+    """
+}
diff --git a/modules/nf-core/gcta/gsmr/meta.yml b/modules/nf-core/gcta/gsmr/meta.yml
@@ -0,0 +1,116 @@
+name: "gcta_gsmr"
+description: Perform GSMR Mendelian Randomization analysis using GCTA with
+  exposure and outcome summary statistics.
+keywords:
+  - Mendelian Randomization
+  - GWAS
+  - GSMR
+  - causal inference
+tools:
+  - "gcta":
+      description: "GCTA implements GSMR for Mendelian Randomization using summary statistics."
+      homepage: "https://yanglab.westlake.edu.cn/software/gcta"
+      documentation: "https://yanglab.westlake.edu.cn/software/gcta/"
+      tool_dev_url: "https://github.com/jianyangqt/gcta"
+      doi: "10.1038/s41467-017-02317-2"
+      licence:
+        - "GPL v3"
+      identifier: biotools:gcta
+input:
+  - - meta:
+        type: map
+        description: Groovy Map containing exposure dataset metadata
+    - exposure:
+        type: file
+        description: Exposure GWAS summary statistics formatted for GSMR
+        pattern: "*.txt"
+        ontologies: []
+  - - meta2:
+        type: map
+        description: Groovy Map containing outcome dataset metadata
+    - outcome:
+        type: file
+        description: Outcome GWAS summary statistics formatted for GSMR
+        pattern: "*.txt"
+        ontologies: []
+  - reference:
+      type: file
+      description: PLINK reference dataset files (BED/BIM/FAM)
+      pattern: "*.{bed,bim,fam}"
+      multiple: true
+      ontologies: []
+output:
+  log:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information for exposure
+      - meta2:
+          type: map
+          description: Groovy Map containing sample information for outcome
+      - "*.log":
+          type: file
+          description: Log file from GSMR analysis
+          pattern: "*.log"
+          ontologies: []
+  gsmr:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information for exposure
+      - meta2:
+          type: map
+          description: Groovy Map containing sample information for outcome
+      - "*.gsmr":
+          type: file
+          description: Result file from GSMR analysis
+          pattern: "*.gsmr"
+          ontologies: []
+  versions_gcta:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - gcta:
+          type: string
+          description: The name of the tool
+      - gcta 2>&1 | grep -oE "v[0-9]+\.[0-9]+\.[0-9]+" | sed 's/v//':
+          type: eval
+          description: The expression to obtain the version of the tool
+  eff_plot:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information for exposure
+      - meta2:
+          type: map
+          description: Groovy Map containing sample information for outcome
+      - "*.eff_plot.gz":
+          type: file
+          description: Result effect plot from GSMR analysis
+          pattern: "*.eff_plot.gz"
+          ontologies:
+            - edam: http://edamontology.org/format_3989
+  mono_badsnps:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information for exposure
+      - meta2:
+          type: map
+          description: Groovy Map containing sample information for outcome
+      - "*.mono.badsnps":
+          type: file
+          description: Result badsnps file from GSMR analysis
+          pattern: "*.mono.badsnps"
+          ontologies: []
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - gcta:
+          type: string
+          description: The name of the tool
+      - gcta 2>&1 | grep -oE "v[0-9]+\.[0-9]+\.[0-9]+" | sed 's/v//':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@juliaapolonio"
+maintainers:
+  - "@juliaapolonio"
diff --git a/modules/nf-core/gcta/gsmr/tests/main.nf.test b/modules/nf-core/gcta/gsmr/tests/main.nf.test
@@ -0,0 +1,86 @@
+nextflow_process {
+
+    name "Test Process GCTA_GSMR"
+    script "../main.nf"
+    process "GCTA_GSMR"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "gcta"
+    tag "gcta/gsmr"
+
+    test("homo_sapiens - gsmr") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_exposure' ],
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats.tsv")
-                    file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats.tsv")
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/gsmr/sumstats.tsv")
-                    file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats.tsv")
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/gsmr/sumstats.tsv")
+                ]
+                input[1] = [
+                    [ id:'test_outcome' ],
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats_copy.tsv")
+                ]
+
+                def bedFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed")
+                def bimFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim")
+                def famFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam")
+
+                def referenceDir = file("reference")
+                referenceDir.mkdirs()
+                bedFile.copyTo(referenceDir.resolve("reference.bed"))
+                bimFile.copyTo(referenceDir.resolve("reference.bim"))
+                famFile.copyTo(referenceDir.resolve("reference.fam"))
+
+                input[2] = referenceDir
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.gsmr).match("gsmr")},
+                { assert snapshot(process.out.versions_gcta).match("versions_gcta")}
+                { assert snapshot(
+                    process.out.gsmr,
+                    process.out.versions_gcta
+                    ).match("versions_gcta")}
-                { assert snapshot(process.out.gsmr).match("gsmr")},
-                { assert snapshot(process.out.versions_gcta).match("versions_gcta")}
-                { assert snapshot(
-                    process.out.gsmr,
-                    process.out.versions_gcta
-                    ).match("versions_gcta")}
+                { assert snapshot(sanitizeOutput(process.out, unstableKeys:["log"]).match()}
-                { assert snapshot(process.out.gsmr).match("gsmr")},
-                { assert snapshot(process.out.versions_gcta).match("versions_gcta")}
-                { assert snapshot(
-                    process.out.gsmr,
-                    process.out.versions_gcta
-                    ).match("versions_gcta")}
+                { assert snapshot(sanitizeOutput(process.out, unstableKeys:["log"]).match()}
+        }
+    }
+
+    test("homo_sapiens - gsmr - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_exposure' ],
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats.tsv")
+                ]
+                input[1] = [
+                    [ id:'test_outcome' ],
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats_copy.tsv")
+                ]
+
+                def referenceDir = file("reference")
+                referenceDir.mkdirs()
+                input[2] = referenceDir
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.gsmr).match("gsmr_stub")},
+                { assert snapshot(process.out.versions_gcta).match("versions_gcta_stub")}
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/gcta/gsmr/tests/main.nf.test.snap b/modules/nf-core/gcta/gsmr/tests/main.nf.test.snap
@@ -0,0 +1,74 @@
+{
+    "gsmr": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test_exposure"
+                    },
+                    {
+                        "id": "test_outcome"
+                    },
+                    "test_exposure_test_outcome.gsmr:md5,f137c763773522ea37849d919f6de9aa"
+                ]
+            ]
+        ],
+        "timestamp": "2026-03-13T16:02:41.214728046",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "versions_gcta": {
+        "content": [
+            [
+                [
+                    "GCTA_GSMR",
+                    "gcta",
+                    "1.94.1"
+                ]
+            ]
+        ],
+        "timestamp": "2026-03-13T16:16:22.216759135",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "versions_gcta_stub": {
+        "content": [
+            [
+                [
+                    "GCTA_GSMR",
+                    "gcta",
+                    "1.94.1"
+                ]
+            ]
+        ],
+        "timestamp": "2026-03-13T16:16:27.849357278",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "gsmr_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test_exposure"
+                    },
+                    {
+                        "id": "test_outcome"
+                    },
+                    "test_exposure_test_outcome.gsmr:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "timestamp": "2026-03-13T16:04:41.062546751",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    }
+}