diff --git a/nextflow_schema.json b/nextflow_schema.json index 708e20281..62da23522 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -478,16 +478,16 @@ "properties": { "run_fastq_sharding": { "type": "boolean", - "description": "Turn on sharding to split reads into smaller chunks before mapping", + "description": "Turn on FastQ sharding.", "fa_icon": "fas fa-power-off", - "help_text": "Sharding reads before mapping can help parallelise mapping for very large FASTQs. " + "help_text": "Sharding will split the FastQs into smaller chunks before mapping. These chunks are then mapped in parallel. This approach can speed up the mapping process for larger FastQ files." }, "fastq_shard_size": { "type": "integer", "default": 1000000, "description": "Specify the number of reads in each shard when splitting.", "fa_icon": "fas fa-arrows-alt-v", - "help_text": "Make sure to choose a value that makes sense for your dataset. Small values can create many files. Needs `--run_fastq_sharding`" + "help_text": "Make sure to choose a value that makes sense for your dataset. Small values can create many files, which can end up negatively affecting the overall speed of the mapping process." }, "mapping_tool": { "type": "string", diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index ccc978150..4f9b00927 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -27,7 +27,7 @@ workflow MAP { SEQKIT_SPLIT2( ch_input_for_sharding ) ch_versions = ch_versions.mix ( SEQKIT_SPLIT2.out.versions.first() ) - reads = SEQKIT_SPLIT2.out.reads + sharded_reads = SEQKIT_SPLIT2.out.reads .transpose() .map { meta, reads -> @@ -36,18 +36,30 @@ workflow MAP { [ new_meta, reads ] } .groupTuple() + + ch_input_for_mapping = sharded_reads + .combine(index) + .multiMap { + meta, reads, meta2, index -> + new_meta = meta.clone() + new_meta.reference = meta2.id + reads: [ new_meta, reads ] + index: [ meta2, index ] + } - } + } else { + + ch_input_for_mapping = reads + .combine(index) + .multiMap { + meta, reads, meta2, index -> + new_meta = meta.clone() + new_meta.reference = meta2.id + reads: [ new_meta, reads ] + index: [ meta2, index ] + } - ch_input_for_mapping = reads - .combine(index) - .multiMap { - meta, reads, meta2, index -> - new_meta = meta.clone() - new_meta.reference = meta2.id - reads: [ new_meta, reads ] - index: [ meta2, index ] - } + } if ( params.mapping_tool == 'bwaaln' ) { FASTQ_ALIGN_BWAALN ( ch_input_for_mapping.reads, ch_input_for_mapping.index )