Skip to content

Commit

Permalink
refine parameters / format
Browse files Browse the repository at this point in the history
  • Loading branch information
shaomingfu committed Jun 20, 2020
1 parent 47895ac commit 1bda1eb
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 42 deletions.
33 changes: 17 additions & 16 deletions meta/incubator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ See LICENSE for licensing.
incubator::incubator(vector<parameters> &v)
: params(v), tmerge("", params[DEFAULT].min_single_exon_clustering_overlap)
{
if(params[DEFAULT].output_gtf_file == "") return;
if(params[DEFAULT].profile_only == true) return;
meta_gtf.open(params[DEFAULT].output_gtf_file.c_str());
if(meta_gtf.fail())
{
Expand All @@ -40,7 +40,7 @@ incubator::incubator(vector<parameters> &v)

incubator::~incubator()
{
if(params[DEFAULT].output_gtf_file == "") return;
if(params[DEFAULT].profile_only == true) return;
meta_gtf.close();
}

Expand All @@ -49,7 +49,7 @@ int incubator::resolve()
read_bam_list();
init_samples();

if(params[DEFAULT].output_gtf_file == "") return 0;
if(params[DEFAULT].profile_only == true) return 0;

build_sample_index();

Expand Down Expand Up @@ -149,29 +149,30 @@ int incubator::init_samples()
boost::asio::post(pool, [this, &sp]
{
const parameters &cfg = this->params[sp.data_type];
if(cfg.profile_dir != "" && cfg.output_gtf_file != "")
{
sp.load_profile(cfg.profile_dir);
sp.read_index_iterators();
string bdir = cfg.output_bridged_bam_dir;
if(bdir != "") sp.init_bridged_bam(bdir);
}
else if(cfg.profile_dir != "" && cfg.output_gtf_file == "")

if(cfg.profile_only == true)
{
previewer pre(cfg, sp);
pre.infer_library_type();
if(sp.data_type == PAIRED_END) pre.infer_insertsize();
sp.save_profile(cfg.profile_dir);
if(cfg.profile_dir != "") sp.save_profile(cfg.profile_dir);
return;
}
else if(cfg.output_gtf_file != "")

if(cfg.profile_dir != "")
{
sp.load_profile(cfg.profile_dir);
}
else
{
sp.read_index_iterators();
previewer pre(cfg, sp);
pre.infer_library_type();
if(sp.data_type == PAIRED_END) pre.infer_insertsize();
string bdir = cfg.output_bridged_bam_dir;
if(bdir != "") sp.init_bridged_bam(bdir);
}

sp.read_index_iterators();
string bdir = cfg.output_bridged_bam_dir;
if(bdir != "") sp.init_bridged_bam(bdir);
});
}
pool.join();
Expand Down
82 changes: 56 additions & 26 deletions util/parameters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ parameters::parameters()
algo = "meta-scallop";
version = "0.1.7";
max_threads = 10;
profile_only = false;

// for meta-assembly
max_group_size = 20;
Expand Down Expand Up @@ -107,22 +108,37 @@ int parameters::parse_arguments(int argc, const char ** argv, int data_type)
input_bam_list = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "-o")
{
output_gtf_file = string(argv[i + 1]);
i++;
}
if(string(argv[i]) == "-l")
{
chrm_list_file = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "-o")
if(string(argv[i]) == "--chrm_list_file")
{
output_gtf_file = string(argv[i + 1]);
chrm_list_file = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "-d")
{
output_gtf_dir = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "-a")
else if(string(argv[i]) == "--output_gtf_dir")
{
output_gtf_dir = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "-b")
{
output_bridged_bam_dir = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "--output_bridged_bam_dir")
{
output_bridged_bam_dir = string(argv[i + 1]);
i++;
Expand All @@ -132,11 +148,21 @@ int parameters::parse_arguments(int argc, const char ** argv, int data_type)
profile_dir = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "--profile_dir")
{
profile_dir = string(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "-t")
{
max_threads = atoi(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "--max_threads")
{
max_threads = atoi(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "-s")
{
min_grouping_similarity = atof(argv[i + 1]);
Expand All @@ -157,6 +183,10 @@ int parameters::parse_arguments(int argc, const char ** argv, int data_type)
max_group_size = atoi(argv[i + 1]);
i++;
}
else if(string(argv[i]) == "--profile")
{
profile_only = true;
}
else if(string(argv[i]) == "--version")
{
printf("%s\n", version.c_str());
Expand Down Expand Up @@ -456,29 +486,29 @@ int parameters::print_help()
printf("Usage: meta-scallop -i <input-bam-list> -o <output.gtf> [options]\n");
printf("\n");
printf("Options:\n");
printf(" %-42s %s\n", "--help", "print usage of meta-scallop and exit");
printf(" %-42s %s\n", "--version", "print current version of meta-scallop and exit");
printf(" %-42s %s\n", "-l <string>", "list of chromosomes that will be assembled, default: N/A (i.e., assemble all)");
printf(" %-42s %s\n", "-d <string>", "existing directory for individual transcripts, default: N/A");
printf(" %-42s %s\n", "-a <string>", "existing directory for individual bridged alignments, default: N/A");
printf(" %-42s %s\n", "-p <string>", "existing directory for saving/loading profiles of each samples, default: N/A");
printf(" %-42s %s\n", "-t/--max_threads <integer>", "maximized number of threads, default: 10");
printf(" %-42s %s\n", "-c/--max_group_size <integer>", "the maximized number of splice graphs that will be combined, default: 20");
printf(" %-42s %s\n", "-s/--min_grouping_similarity <float>", "the minimized similarity for two graphs to be combined, default: 0.2");
printf(" %-42s %s\n", "--min_bridging_score <float>", "the minimum score for bridging a paired-end reads, default: 1.5");
printf(" %-42s %s\n", "--min_splice_bundary_hits <integer>", "the minimum number of spliced reads required to support a junction, default: 1");
printf(" %-42s %s\n", "--min_transcript_coverage <float>", "minimum coverage required for a multi-exon transcript, default: 1.0");
printf(" %-42s %s\n", "--min_transcript_length_increase <integer>", "default: 50");
printf(" %-42s %s\n", "--min_transcript_length_base <integer>", "default: 150, minimum length of a transcript would be");
printf(" %-42s %s\n", "", "--min_transcript_length_base + --min_transcript_length_increase * num-of-exons");
printf(" %-42s %s\n", "--min_single_exon_coverage <float>", "minimum coverage required for a single-exon transcript, default: 20");
printf(" %-42s %s\n", "--min_single_exon_transcript_length <integer>", "minimum length of single-exon transcript, default: 250");
printf(" %-42s %s\n", "--min_single_exon_clustering_overlap <float>", "minimum overlaping ratio to merge two single-exon transcripts, default: 0.8");
printf(" %-42s %s\n", "--min_mapping_quality <integer>", "ignore reads with mapping quality less than this value, default: 1");
printf(" %-42s %s\n", "--max_num_cigar <integer>", "ignore reads with CIGAR size larger than this value, default: 1000");
printf(" %-42s %s\n", "--min_bundle_gap <integer>", "minimum distances required to start a new bundle, default: 50");
printf(" %-42s %s\n", "--min_num_hits_in_bundle <integer>", "minimum number of reads required in a bundle, default: 20");
printf(" %-42s %s\n", "--min_flank_length <integer>", "minimum match length in each side for a spliced read, default: 3");
printf(" %-46s %s\n", "--help", "print usage of meta-scallop and exit");
printf(" %-46s %s\n", "--version", "print current version of meta-scallop and exit");
printf(" %-46s %s\n", "--profile", "profiling individual samples and exit (will write to files if -p provided)");
printf(" %-46s %s\n", "-l/--chrm_list_file <string>", "list of chromosomes that will be assembled, default: N/A (i.e., assemble all)");
printf(" %-46s %s\n", "-d/--output_gtf_dir <string>", "existing directory for individual transcripts, default: N/A");
printf(" %-46s %s\n", "-b/--output_bridged_bam_dir <string>", "existing directory for individual bridged alignments, default: N/A");
printf(" %-46s %s\n", "-p/--profile_dir <string>", "existing directory for saving/loading profiles of each samples, default: N/A");
printf(" %-46s %s\n", "-t/--max_threads <integer>", "maximized number of threads, default: 10");
printf(" %-46s %s\n", "-c/--max_group_size <integer>", "the maximized number of splice graphs that will be combined, default: 20");
printf(" %-46s %s\n", "-s/--min_grouping_similarity <float>", "the minimized similarity for two graphs to be combined, default: 0.2");
printf(" %-46s %s\n", "--min_bridging_score <float>", "the minimum score for bridging a paired-end reads, default: 1.5");
printf(" %-46s %s\n", "--min_splice_bundary_hits <integer>", "the minimum number of spliced reads required to support a junction, default: 1");
printf(" %-46s %s\n", "--min_transcript_coverage <float>", "minimum coverage required for a multi-exon transcript, default: 1.0");
printf(" %-46s %s\n", "--min_transcript_length_base <integer>", "default: 150");
printf(" %-46s %s\n", "--min_transcript_length_increase <integer>", "default: 50, minimum length of a transcript: base + #exons * increase");
printf(" %-46s %s\n", "--min_single_exon_coverage <float>", "minimum coverage required for a single-exon transcript, default: 20");
printf(" %-46s %s\n", "--min_single_exon_transcript_length <integer>", "minimum length of single-exon transcript, default: 250");
printf(" %-46s %s\n", "--min_single_exon_clustering_overlap <float>", "minimum overlaping ratio to merge two single-exon transcripts, default: 0.8");
printf(" %-46s %s\n", "--min_mapping_quality <integer>", "ignore reads with mapping quality less than this value, default: 1");
printf(" %-46s %s\n", "--max_num_cigar <integer>", "ignore reads with CIGAR size larger than this value, default: 1000");
printf(" %-46s %s\n", "--min_bundle_gap <integer>", "minimum distances required to start a new bundle, default: 50");
printf(" %-46s %s\n", "--min_num_hits_in_bundle <integer>", "minimum number of reads required in a bundle, default: 20");
printf(" %-46s %s\n", "--min_flank_length <integer>", "minimum match length in each side for a spliced read, default: 3");
return 0;
}

Expand Down
1 change: 1 addition & 0 deletions util/parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class parameters
string algo;
string version;
int max_threads;
bool profile_only;

// for meta-assembly
int max_group_size;
Expand Down

0 comments on commit 1bda1eb

Please sign in to comment.