Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DSL2: Pull input files from reference sheet columns #1018

Merged
merged 11 commits into from
Oct 27, 2023
8 changes: 4 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ process {
// READ MAPPING
//
withName: BWA_ALN {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" }
ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
publishDir = [
Expand All @@ -384,7 +384,7 @@ process {
}

withName: 'BWA_SAMSE|BWA_SAMPE' {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
ext.args = { "-r '@RG\\tID:ILLUMINA-${meta.library_id}\\tSM:${meta.sample_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}'" }
ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
publishDir = [
Expand All @@ -403,7 +403,7 @@ process {
}

withName: ".*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX" {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
ext.args = { params.fasta_largeref ? "-c" : "" }
ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
publishDir = [
Expand Down Expand Up @@ -828,7 +828,7 @@ process {
]
}

withName: "QUALIMAP_BAMQC" {
withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
publishDir = [
path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/${meta.sample_id}/}" },
Expand Down
3 changes: 3 additions & 0 deletions conf/test_humanbam.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ params {
contamination_estimation_angsd_mapq = 0
contamination_estimation_angsd_minq = 0

// Qualimap
snpcapture_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'

// TODO Reactivate sexDet and genotyping params when those steps get implemented.
// //Sex Determination
// sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'
Expand Down
2 changes: 1 addition & 1 deletion conf/test_multiref.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ params {
input = 'https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/samplesheet_multilane_multilib.tsv'

// Genome references
fasta = 'https://github.com/jfy133/nf-core-test-datasets/raw/eager/reference/reference_sheet_multiref.csv'
scarlhoff marked this conversation as resolved.
Show resolved Hide resolved
fasta = 'https://github.com/nf-core/test-datasets/raw/eager/reference/reference_sheet_multiref.csv'

// BAM filtering
run_bamfiltering = true
Expand Down
7 changes: 6 additions & 1 deletion docs/development/manual_tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,11 @@ Tool Specific combinations
- with stricter threshold

- BAM trimming

- with default parameters
- different length by udg treatment

- All together
- All together

### Multi-reference tests

Expand Down Expand Up @@ -145,6 +146,10 @@ nextflow run ../main.nf -profile singularity,test --outdir ./results --input sam
## Test: (11) Broken path correctly fails pipeline ✅
## Expect: Expect fail
nextflow run ../main.nf -profile singularity,test --outdir ./results --input samplesheet.tsv --fasta reference_sheet_multiref_test11.csv -ansi-log false -dump-channels --save_reference

# Test: File input via reference sheet
# Expect: Qualimap with bed, mtnucratio and angsd successful and bedtools not run for hs37d5, qualimap without bed file, mtnucratio and bedtools successful and angsd not run for Mammoth_MT
nextflow run main.nf -profile test_multiref,docker --outdir ./results --run_bedtools_coverage --run_contamination_estimation_angsd --run_mtnucratio
```

### AdapterRemoval
Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@
},
"fastq_align_bwaaln": {
"branch": "master",
"git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653",
"git_sha": "e2c81fea3daeacfa190f78d2b82f82361b734507",
"installed_by": ["subworkflows"]
}
}
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ params {
fasta_dict = null
fasta_mapperindexdir = null
fasta_circular_target = null
fasta_mitochondrion_header = null
scarlhoff marked this conversation as resolved.
Show resolved Hide resolved
fasta_largeref = false

// References
Expand Down Expand Up @@ -219,6 +218,7 @@ try {
// Additional configs for subworkflows
includeConfig 'subworkflows/nf-core/bam_split_by_region/nextflow.config'
includeConfig 'subworkflows/nf-core/bam_docounts_contamination_angsd/nextflow.config'
includeConfig 'subworkflows/nf-core/fastq_align_bwaaln/nextflow.config'

profiles {
debug {
Expand Down
6 changes: 0 additions & 6 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,6 @@
"description": "Specify the FASTA header of the target chromosome to extend. Only applies when using `circularmapper`.",
"help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`\n",
"fa_icon": "fas fa-bullseye"
},
"fasta_mitochondrion_header": {
"type": "string",
"fa_icon": "fas fa-tag",
"description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome, up to the first space. Only applies when using `--run_mtnucratio`.",
"help_text": "Specify the FASTA entry in the reference file specified as `--fasta` that acts as the mitochondrial 'chromosome' to base a mitochondrial-to-nuclear ratio calculation on. \n\nThe tool only accepts the first section of the header before the first space. For example, mitochondrion chromosome name is `MT` for the hs37d5/GrCH37 human reference genome.\n"
}
}
},
Expand Down
51 changes: 34 additions & 17 deletions subworkflows/local/map.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
// Prepare reference indexing for downstream
//

include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main'
include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main'
include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main'
include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM; SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2; SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main'
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main'

workflow MAP {
take:
Expand All @@ -19,24 +19,32 @@ workflow MAP {
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

ch_input_for_mapping = reads
.combine(index)
.multiMap {
meta, reads, meta2, index ->
new_meta = meta.clone()
new_meta.reference = meta2.id
reads: [ new_meta, reads ]
index: [ meta2, index]
}

if ( params.mapping_tool == 'bwaaln' ) {
FASTQ_ALIGN_BWAALN ( ch_input_for_mapping.reads, ch_input_for_mapping.index )
ch_index_for_mapping = index
ch_reads_for_mapping = reads

FASTQ_ALIGN_BWAALN ( ch_reads_for_mapping, ch_index_for_mapping )
ch_versions = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() )
ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam
.map{
// create meta consistent with rest of workflow
meta, bam ->
new_meta = meta + [ reference: meta.id_index ]
[ new_meta, bam ]
scarlhoff marked this conversation as resolved.
Show resolved Hide resolved
}

ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai

} else if ( params.mapping_tool == 'bwamem' ) {
ch_input_for_mapping = reads
.combine( index )
.multiMap {
meta, reads, meta2, index ->
new_meta = meta + [ reference: meta2.id ]
reads: [ new_meta, reads ]
index: [ meta2, index ]
}

BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true )
ch_versions = ch_versions.mix ( BWA_MEM.out.versions.first() )
ch_mapped_lane_bam = BWA_MEM.out.bam
Expand All @@ -46,6 +54,15 @@ workflow MAP {
ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MEM.out.csi : SAMTOOLS_INDEX_MEM.out.bai

} else if ( params.mapping_tool == 'bowtie2' ) {
ch_input_for_mapping = reads
.combine( index )
.multiMap {
meta, reads, meta2, index ->
new_meta = meta + [ reference: meta2.id ]
reads: [ new_meta, reads ]
index: [ meta2, index ]
}

BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true )
ch_versions = ch_versions.mix ( BOWTIE2_ALIGN.out.versions.first() )
ch_mapped_lane_bam = BOWTIE2_ALIGN.out.bam
Expand Down
62 changes: 60 additions & 2 deletions subworkflows/local/reference_indexing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf'
include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf'
include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf'

workflow REFERENCE_INDEXING {
take:
Expand All @@ -17,20 +18,77 @@ workflow REFERENCE_INDEXING {

// Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc.
if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (fasta_fai || fasta_dict || fasta_mapperindexdir)) log.warn("A TSV or CSV has been supplied to `--fasta` as well as e.g. `--fasta_fai`. --fasta CSV/TSV takes priority and --fasta_* parameters will be ignored.")
if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.mapstats_bedtools_featurefile )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.")
scarlhoff marked this conversation as resolved.
Show resolved Hide resolved

if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) {
// If input (multi-)reference sheet supplied
REFERENCE_INDEXING_MULTI ( fasta )
ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference
ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header
ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap
ch_pmd_mask = REFERENCE_INDEXING_MULTI.out.pmd_mask
ch_snp_capture_bed = REFERENCE_INDEXING_MULTI.out.snp_capture_bed
ch_pileupcaller_snp = REFERENCE_INDEXING_MULTI.out.pileupcaller_snp
ch_sexdeterrmine_bed = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed
ch_bedtools_feature = REFERENCE_INDEXING_MULTI.out.bedtools_feature
ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions )
} else {
// If input FASTA and/or indicies supplied
REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir )
ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header
ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap
ch_pmd_mask = REFERENCE_INDEXING_SINGLE.out.pmd_mask
ch_snp_capture_bed = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed
ch_pileupcaller_snp = REFERENCE_INDEXING_SINGLE.out.pileupcaller_snp
ch_sexdeterrmine_bed = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed
ch_bedtools_feature = REFERENCE_INDEXING_SINGLE.out.bedtools_feature
ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference
ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions )
}

// Filter out input options that are not provided
ch_mitochondrion_header = ch_mitochondrion_header
.filter{ it[1] != "" }

ch_hapmap = ch_hapmap
.filter{ it[1] != "" }

ch_pmd_mask = ch_pmd_mask
.filter{ it[1] != "" && it[2] != "" }

ch_capture_bed = ch_snp_capture_bed //optional
.branch {
meta, capture_bed ->
input: capture_bed != ""
skip: true
}
ch_capture_bed_gunzip = ch_capture_bed.input //unzip
.branch {
meta, capture_bed ->
forgunzip: capture_bed.extension == "gz"
skip: true
}
GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip )
scarlhoff marked this conversation as resolved.
Show resolved Hide resolved
ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ).mix( ch_capture_bed.skip )

ch_pileupcaller_snp = ch_pileupcaller_snp
.filter{ it[1] != "" && it[2] != "" }

ch_sexdeterrmine_bed = ch_sexdeterrmine_bed
.filter{ it[1] != "" }

ch_bedtools_feature = ch_bedtools_feature
.filter{ it[1] != "" }

emit:
reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ]
versions = ch_versions
reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ]
mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ]
hapmap = ch_hapmap // [ meta, hapmap ]
pmd_mask = ch_pmd_mask // [ meta, masked_fasta, capture_bed ]
snp_capture_bed = ch_capture_bed // [ meta, capture_bed ]
pileupcaller_snp = ch_pileupcaller_snp // [ meta, pileupcaller_bed, pileupcaller_snp ]
sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ]
bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ]
versions = ch_versions

}
Loading