Skip to content

Commit

Permalink
Merge pull request #14 from EBI-Metagenomics/modules/checkm2
Browse files Browse the repository at this point in the history
Added Checkm2 module (tests only in stub mode)
  • Loading branch information
KateSakharova authored Oct 6, 2023
2 parents 840a08e + b23a5b9 commit e2204d4
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 0 deletions.
49 changes: 49 additions & 0 deletions modules/ebi-metagenomics/checkm2/checkm2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
process CHECKM2 {
tag "$meta.id"
label 'process_single'

conda "bioconda::checkm2=1.0.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.1--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.1--pyh7cba7a3_0' }"

input:
tuple val(meta), path(bins)
path checkm_db

output:
tuple val(meta), path("*_checkm_output/quality_report.tsv"), emit: checkm2_stats
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
checkm2 predict --threads ${task.cpus} \
--input ${bins} \
--output-directory ${prefix}_checkm_output \
--database_path ${checkm_db} \
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir -p ${prefix}_checkm_output
echo "genome\tcompleteness\tcontamination" > ${prefix}_checkm_output/quality_report.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""
}
47 changes: 47 additions & 0 deletions modules/ebi-metagenomics/checkm2/checkm2/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
name: "checkm2"
description: Rapid assessment of genome bin quality using machine learning
keywords:
- completeness
- contamination
- quality
tools:
- "checkm2":
description: "CheckM2 - Predicting the quality of metagenome-recovered bins"
tool_dev_url: "https://github.com/chklovski/CheckM2/tree/main"
doi: "10.1038/s41592-023-01940-w"
licence: "['GPL v3']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- bins:
type: list
description: Bins list or folder with bins
- checkm_db:
type: file
description: Checkm2 database
pattern: "*.dmnd"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- checkm2_stats:
type: file
description: File with completeness and contamination of bins
pattern: "*.tsv"

authors:
- "@KateSakharova"
41 changes: 41 additions & 0 deletions modules/ebi-metagenomics/checkm2/download_db/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
process CHECKM2_DOWNLOAD_DB {

label 'process_single'

conda "bioconda::checkm2=1.0.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.1--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.1--pyh7cba7a3_0' }"

output:
path("out/CheckM2_database/*.dmnd") , emit: checkm2_db
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
checkm2 database --download --path out
cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''

"""
mkdir -p out/CheckM2_database
touch out/CheckM2_database/uniref100.KO.1.dmnd
cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""
}
27 changes: 27 additions & 0 deletions modules/ebi-metagenomics/checkm2/download_db/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
name: "checkm2_download_db"
description: Download DB for checkm2
keywords:
- completeness
- contamination
- database
tools:
- "checkm2":
description: "CheckM2 - Predicting the quality of metagenome-recovered bins"
tool_dev_url: "https://github.com/chklovski/CheckM2/tree/main"
doi: "10.1038/s41592-023-01940-w"
licence: "['GPL v3']"

output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- checkm2_db:
type: file
description: Diamond file with checkm2 database
pattern: "*.dmnd"

authors:
- "@KateSakharova"
8 changes: 8 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ bmtagger/bmtagger:
- modules/ebi-metagenomics/bmtagger/bmtagger/**
- tests/modules/ebi-metagenomics/bmtagger/bmtagger/**

checkm2/checkm2:
- modules/ebi-metagenomics/checkm2/checkm2/**
- tests/modules/ebi-metagenomics/checkm2/checkm2/**

checkm2/download_db:
- modules/ebi-metagenomics/checkm2/download_db/**
- tests/modules/ebi-metagenomics/checkm2/download_db/**

bmtagger/indexreference:
- modules/ebi-metagenomics/bmtagger/indexreference/**
- tests/modules/ebi-metagenomics/bmtagger/indexreference/**
Expand Down
19 changes: 19 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/checkm2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { CHECKM2 } from '../../../../../modules/ebi-metagenomics/checkm2/checkm2/main.nf'
include { CHECKM2_DOWNLOAD_DB } from '../../../../../modules/ebi-metagenomics/checkm2/download_db/main.nf'

workflow test_checkm2 {

CHECKM2_DOWNLOAD_DB()
checkm_db = CHECKM2_DOWNLOAD_DB.out.checkm2_db

meta = [ id:'test', single_end:false ]
input = [
meta,
[file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]
]
CHECKM2 ( input, checkm_db )
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
9 changes: 9 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/checkm2/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
- name: "checkm2 stub"
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/checkm2 -stub -entry test_checkm2 -c ./tests/config/nextflow.config -c ./tests/modules/ebi-metagenomics/checkm2/checkm2/nextflow.config
tags:
- "checkm2"
- "checkm2/checkm2"
files:
- path: "output/checkm2/test_checkm_output/quality_report.tsv"
md5sum: 32e53028a395debce647fa88da2261e5
- path: "output/checkm2/versions.yml"
11 changes: 11 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/download_db/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { CHECKM2_DOWNLOAD_DB } from '../../../../../modules/ebi-metagenomics/checkm2/download_db/main.nf'

workflow test_checkm2_download_db {

CHECKM2_DOWNLOAD_DB()

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
12 changes: 12 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/download_db/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# stub is used here as the checkm2 db is 2.9GB and to dowlonad it takes more than GA will allow

- name: checkm2 download_db test_checkm2_download_db
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/download_db -stub -entry test_checkm2_download_db -c ./tests/config/nextflow.config
tags:
- checkm2
- checkm2/download_db
files:
- path: output/checkm2/out/CheckM2_database/uniref100.KO.1.dmnd
should_exist: true
# md5sum: 80ddd6a8998e5ebbc79ebbaeaee667f7 REAL db
- path: output/checkm2/versions.yml

0 comments on commit e2204d4

Please sign in to comment.