Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checkm2 module #14

Merged
merged 13 commits into from
Oct 6, 2023
64 changes: 64 additions & 0 deletions modules/ebi-metagenomics/checkm2/checkm2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process CHECKM2 {
tag "$meta.id"
label 'process_single'

conda "bioconda::checkm2=1.0.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.1--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.1--pyh7cba7a3_0' }"

input:
tuple val(meta), path(bins)
path checkm_db

output:
tuple val(meta), path("*_checkm_output/quality_report.tsv"), emit: checkm2_stats
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def bins_list = bins.collect()
if (bins_list.size() == 0) {
"""
mkdir -p ${prefix}_checkm_output
echo "genome\tcompleteness\tcontamination" > ${prefix}_checkm_output/quality_report.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
""" }
else {
"""
checkm2 predict --threads ${task.cpus} \
--input ${bins} \
--output-directory ${prefix}_checkm_output \
--database_path ${checkm_db} \
${args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""
}

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
mkdir -p ${prefix}_checkm_output
echo "genome\tcompleteness\tcontamination" > ${prefix}_checkm_output/quality_report.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""
}
47 changes: 47 additions & 0 deletions modules/ebi-metagenomics/checkm2/checkm2/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
name: "checkm2"
description: Rapid assessment of genome bin quality using machine learning
keywords:
- completeness
- contamination
- quality
tools:
- "checkm2":
description: "CheckM2 - Predicting the quality of metagenome-recovered bins"
tool_dev_url: "https://github.com/chklovski/CheckM2/tree/main"
doi: "10.1038/s41592-023-01940-w"
licence: "['GPL v3']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- bins:
type: list
description: Bins list or folder with bins
- checkm_db:
type: file
description: Checkm2 database
pattern: "*.dmnd"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- checkm2_stats:
type: file
description: File with completeness and contamination of bins
pattern: "*.tsv"

authors:
- "@KateSakharova"
41 changes: 41 additions & 0 deletions modules/ebi-metagenomics/checkm2/download_db/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
process CHECKM2_DOWNLOAD_DB {

label 'process_single'

conda "bioconda::checkm2=1.0.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.1--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.1--pyh7cba7a3_0' }"

output:
path("out/CheckM2_database/*.dmnd") , emit: checkm2_db
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
checkm2 database --download --path out

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''

"""
mkdir -p out/CheckM2_database
touch out/CheckM2_database/uniref100.KO.1.dmnd

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version)
END_VERSIONS
"""
}
27 changes: 27 additions & 0 deletions modules/ebi-metagenomics/checkm2/download_db/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
name: "checkm2_download_db"
description: Download DB for checkm2
keywords:
- completeness
- contamination
- database
tools:
- "checkm2":
description: "CheckM2 - Predicting the quality of metagenome-recovered bins"
tool_dev_url: "https://github.com/chklovski/CheckM2/tree/main"
doi: "10.1038/s41592-023-01940-w"
licence: "['GPL v3']"

output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- checkm2_db:
type: file
description: Diamond file with checkm2 database
pattern: "*.dmnd"

authors:
- "@KateSakharova"
8 changes: 8 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ bmtagger/bmtagger:
- modules/ebi-metagenomics/bmtagger/bmtagger/**
- tests/modules/ebi-metagenomics/bmtagger/bmtagger/**

checkm2/checkm2:
- modules/ebi-metagenomics/checkm2/checkm2/**
- tests/modules/ebi-metagenomics/checkm2/checkm2/**

checkm2/download_db:
- modules/ebi-metagenomics/checkm2/download_db/**
- tests/modules/ebi-metagenomics/checkm2/download_db/**

cmsearchtbloutdeoverlap:
- modules/ebi-metagenomics/cmsearchtbloutdeoverlap/**
- tests/modules/ebi-metagenomics/cmsearchtbloutdeoverlap/**
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>NODE-1
AAA
33 changes: 33 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/checkm2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { CHECKM2 } from '../../../../../modules/ebi-metagenomics/checkm2/checkm2/main.nf'
include { CHECKM2_DOWNLOAD_DB } from '../../../../../modules/ebi-metagenomics/checkm2/download_db/main.nf'

workflow test_checkm2 {

CHECKM2_DOWNLOAD_DB()
meta = [ id:'test', single_end:false ]
input = [
meta,
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
file("./tests/modules/ebi-metagenomics/checkm2/checkm2/data/bins", checkIfExists: true)
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
]

checkm_db = CHECKM2_DOWNLOAD_DB.out.checkm2_db

CHECKM2 ( input, checkm_db )
}

workflow test_checkm2_empty_directory {

meta = [ id:'test', single_end:false ]
input = [
meta,
file("./tests/modules/ebi-metagenomics/checkm2/checkm2/data/empty_bins/*")
]

checkm_db = file("./tests/modules/ebi-metagenomics/checkm2/checkm2/data/NO_FILE")

CHECKM2 ( input, checkm_db )
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
21 changes: 21 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/checkm2/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
- name: "checkm2 stub"
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/checkm2 -stub -entry test_checkm2 -c ./tests/config/nextflow.config -c ./tests/modules/ebi-metagenomics/checkm2/checkm2/nextflow.config
tags:
- "checkm2"
- "checkm2/checkm2"
files:
- path: "output/checkm2/test_checkm_output/quality_report.tsv"
md5sum: 32e53028a395debce647fa88da2261e5
- path: "output/checkm2/versions.yml"
vagkaratzas marked this conversation as resolved.
Show resolved Hide resolved
md5sum: fcebefe734201aedddd0037fb274d572

- name: "checkm2 empty_directory"
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/checkm2 -entry test_checkm2_empty_directory -c ./tests/config/nextflow.config -c ./tests/modules/ebi-metagenomics/checkm2/checkm2/nextflow.config
tags:
- "checkm2"
- "checkm2/checkm2"
files:
- path: "output/checkm2/test_checkm_output/quality_report.tsv"
md5sum: 32e53028a395debce647fa88da2261e5
- path: "output/checkm2/versions.yml"
md5sum: c1ac7bf61a1f5a47711141ca1d34fd8d
11 changes: 11 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/download_db/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { CHECKM2_DOWNLOAD_DB } from '../../../../../modules/ebi-metagenomics/checkm2/download_db/main.nf'

workflow test_checkm2_download_db {

CHECKM2_DOWNLOAD_DB()

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
12 changes: 12 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/download_db/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# stub is used here as the checkm2 db is 2.9GB and to dowlonad it takes more than GA will allow

- name: checkm2 download_db test_checkm2_download_db
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/download_db -stub -entry test_checkm2_download_db -c ./tests/config/nextflow.config
tags:
- checkm2
- checkm2/download_db
files:
- path: output/checkm2/out/CheckM2_database/uniref100.KO.1.dmnd
should_exist: true
# md5sum: 80ddd6a8998e5ebbc79ebbaeaee667f7 REAL db
- path: output/checkm2/versions.yml
Loading