Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checkm2 module #14

Merged
merged 13 commits into from
Oct 6, 2023
80 changes: 80 additions & 0 deletions modules/ebi-metagenomics/checkm2/checkm2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
process CHECKM2 {
tag "$meta.id"
label 'process_single'

conda "bioconda::checkm2=1.0.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.1--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.1--pyh7cba7a3_0' }"

input:
tuple val(meta), path(bins)
path checkm_db

output:
tuple val(meta), path("${meta.id}_checkm_output/quality_report.tsv"), emit: checkm2_stats
path "versions.yml" , emit: versions
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def bins_list = bins.collect()
if (bins_list.size() == 0) {
"""
mkdir -p ${meta.id}_checkm_output
echo "genome\tcompleteness\tcontamination" > ${meta.id}_checkm_output/quality_report.tsv
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version )
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
END_VERSIONS
""" }
else {
"""
checkm2 predict --threads ${task.cpus} \
--input ${bins} \
--output-directory ${meta.id}_checkm_output \
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
--database_path ${checkm_db} \
${args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version )
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
END_VERSIONS
"""
}

stub:
// create empty files for empty bins list
// create files with headers for not empty bins list

def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def bins_list = bins.collect()
if (bins_list.size() == 0) {
"""
mkdir -p ${meta.id}_checkm_output
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
touch ${meta.id}_checkm_output/quality_report.tsv
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version )
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
END_VERSIONS
""" }
else {
"""
mkdir -p ${meta.id}_checkm_output
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
echo "genome\tcompleteness\tcontamination" > ${meta.id}_checkm_output/quality_report.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version )
END_VERSIONS
"""
}
}
47 changes: 47 additions & 0 deletions modules/ebi-metagenomics/checkm2/checkm2/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
name: "checkm2"
description: Rapid assessment of genome bin quality using machine learning
keywords:
- completeness
- contamination
- quality
tools:
- "checkm2":
description: "CheckM2 - Predicting the quality of metagenome-recovered bins"
tool_dev_url: "https://github.com/chklovski/CheckM2/tree/main"
doi: "10.1038/s41592-023-01940-w"
licence: "['GPL v3']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- bins:
type: list
description: Bins list or folder with bins
- checkm_db:
type: file
description: Checkm2 database
pattern: "*.dmnd"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- checkm2_stats:
type: file
description: File with completeness and contamination of bins
pattern: "*.tsv"

authors:
- "@KateSakharova"
46 changes: 46 additions & 0 deletions modules/ebi-metagenomics/checkm2/download_db/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
process CHECKM2_DOWNLOAD_DB {
tag "$meta.id"
label 'process_single'

conda "bioconda::checkm2=1.0.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.1--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.1--pyh7cba7a3_0' }"

input:
val(meta)
mberacochea marked this conversation as resolved.
Show resolved Hide resolved

output:
tuple val(meta), path("out/CheckM2_database/*.dmnd"), emit: checkm2_db
path "versions.yml" , emit: versions
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
checkm2 database --download --path out

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version )
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
mkdir -p out/CheckM2_database
touch out/CheckM2_database/fake.dmnd

cat <<-END_VERSIONS > versions.yml
"${task.process}":
CheckM2 : \$(checkm2 --version )
END_VERSIONS
"""
}
40 changes: 40 additions & 0 deletions modules/ebi-metagenomics/checkm2/download_db/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
name: "checkm2_download_db"
description: Download DB for checkm2
keywords:
- completeness
- contamination
- database
tools:
- "checkm2":
description: "CheckM2 - Predicting the quality of metagenome-recovered bins"
tool_dev_url: "https://github.com/chklovski/CheckM2/tree/main"
doi: "https://doi.org/10.1038/s41592-023-01940-w"
licence: "['GPL v3']"

input:
- meta:
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`

output:
- meta:
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- checkm2_db:
type: file
description: Diamond file with checkm2 database
pattern: "*.dmnd"

authors:
- "@KateSakharova"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ bmtagger/bmtagger:
- modules/ebi-metagenomics/bmtagger/bmtagger/**
- tests/modules/ebi-metagenomics/bmtagger/bmtagger/**

checkm2/checkm2:
- modules/ebi-metagenomics/checkm2/checkm2/**
- tests/modules/ebi-metagenomics/checkm2/checkm2/**

cmsearchtbloutdeoverlap:
- modules/ebi-metagenomics/cmsearchtbloutdeoverlap/**
- tests/modules/ebi-metagenomics/cmsearchtbloutdeoverlap/**
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>NODE-1
AAA
34 changes: 34 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/checkm2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { CHECKM2 } from '../../../../../modules/ebi-metagenomics/checkm2/checkm2/main.nf'
include { CHECKM2_DOWNLOAD_DB } from '../../../../../modules/ebi-metagenomics/checkm2/download_db/main.nf'

workflow test_checkm2 {
meta = [ id:'test', single_end:false ]

CHECKM2_DOWNLOAD_DB(meta)

input = [
meta,
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
file("./tests/modules/ebi-metagenomics/checkm2/checkm2/data/bins", checkIfExists: true)
KateSakharova marked this conversation as resolved.
Show resolved Hide resolved
]

checkm_db = CHECKM2_DOWNLOAD_DB.out.checkm2_db.map{it -> it[1]}

CHECKM2 ( input, checkm_db)
}

workflow test_checkm2_empty_directory {
meta = [ id:'test', single_end:false ]

input = [
meta,
file("./tests/modules/ebi-metagenomics/checkm2/checkm2/data/empty_bins/*")
]

checkm_db = file("./tests/modules/ebi-metagenomics/checkm2/checkm2/data/NO_FILE")

CHECKM2 (input, checkm_db)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
35 changes: 35 additions & 0 deletions tests/modules/ebi-metagenomics/checkm2/checkm2/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# TODO: fix test_checkm2 test
# Doesn't work because of download db issue
# OSError: [Errno 30] Read-only file system: '/usr/local/lib/python3.8/site-packages/checkm2/version/diamond_path.json'

- name: "checkm2_stub"
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/checkm2 -stub -entry test_checkm2 -c ./tests/config/nextflow.config -c ./tests/modules/ebi-metagenomics/checkm2/checkm2/nextflow.config
tags:
- "checkm2"
- "checkm2/checkm2"
files:
- path: "output/checkm2/test_checkm_output/quality_report.tsv"
md5sum: 32e53028a395debce647fa88da2261e5
- path: "output/checkm2/versions.yml"
vagkaratzas marked this conversation as resolved.
Show resolved Hide resolved
md5sum: fcebefe734201aedddd0037fb274d572

- name: "checkm2_empty_directory"
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/checkm2 -entry test_checkm2_empty_directory -c ./tests/config/nextflow.config -c ./tests/modules/ebi-metagenomics/checkm2/checkm2/nextflow.config
tags:
- "checkm2"
- "checkm2/checkm2"
files:
- path: "output/checkm2/test_checkm_output/quality_report.tsv"
md5sum: 32e53028a395debce647fa88da2261e5
- path: "output/checkm2/versions.yml"
md5sum: c1ac7bf61a1f5a47711141ca1d34fd8d

- name: "checkm2_empty_directory_stub"
command: nextflow run ./tests/modules/ebi-metagenomics/checkm2/checkm2 -stub -entry test_checkm2_empty_directory -c ./tests/config/nextflow.config -c ./tests/modules/ebi-metagenomics/checkm2/checkm2/nextflow.config
tags:
- "checkm2"
- "checkm2/checkm2"
files:
- path: "output/checkm2/test_checkm_output/quality_report.tsv"
- path: "output/checkm2/versions.yml"
md5sum: c1ac7bf61a1f5a47711141ca1d34fd8d