From 2bdf4e7461bfe8efc49cc3b3689c164e9fd4f785 Mon Sep 17 00:00:00 2001 From: Shifu Chen Date: Thu, 10 Dec 2015 10:55:11 +0800 Subject: [PATCH] release 0.1.0 --- README.md | 175 +++++++++++++++++++++++++------------------- after.py | 48 ++++++------ barcodeprocesser.py | 13 ---- bubbledetector.py | 3 - circledetector.py | 4 - debubble.py | 3 - preprocesser.py | 34 +++------ trimmer.py | 6 +- util.py | 4 - 9 files changed, 134 insertions(+), 156 deletions(-) diff --git a/README.md b/README.md index d112244..0f4898f 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,12 @@ # AFTER -Automatic Filtering, Trimming, and Error Removing for fastq data -Currently it supports Illumina 1.8 or newer format, see: -http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm -AFTER can simply go through all fastq files in a folder and then output a good folder and a bad folder, which contains good reads and bad reads of each fastq file +Automatic Filtering, Trimming, and Error Removing for fastq data +AFTER can simply go through all fastq files in a folder and then output a good folder and a bad folder, which contains good reads and bad reads of each fastq file +Currently it supports Illumina 1.8 or newer format, see [here](http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm) -# Version -1.0 +# Latest release +0.1.0 (Released in 2015-12-10) # Feedback/contact -infoteam@haplox.com chen@haplox.com # Features: @@ -16,92 +14,119 @@ AFTER does following tasks automatically: 1, Filter PolyA/PolyT/PolyC/PolyG reads 2, Trim reads at front and tail according to bad per base sequence content 3, Detect and eliminate bubble artifact caused by sequencer due to fluid dynamics issue -4, Filter low-quality reads +4, Filter low-quality reads +5, Barcode sequencing support: if all reads have a random barcode (see duplex sequencing), this program can detect and split the barcode into query name # Simple usage: -##### 1, cd to the folder contains all fastq files -##### 2, run: -##### python after.py +```shell +cd /path/to/fastq/folder +python after.py +``` # Debubble: If you want to eliminate bubble artifact, run: -##### python after.py --debubble=on +```shell +python after.py --debubble=on +``` # Full usage: -###### python after.py [-d input_dir][-1 read1_file] [-2 read1_file] [-7 index1_file] [-5 index2_file] [-g good_output_folder] [-b bad_output_folder] [-f trim_front] [-t trim_tail] [-m min_quality] [-q qualified_quality] [-l max_low_quality] [-p poly_max] [-a allow_poly_mismatch] [-n max_n_count] [--debubble=on/off] [--debubble_dir=xxx] [--draw=on/off] [--read1_flag=\_R1\_] [--read2_flag=\_R2\_] [--index1_flag=\_I1\_] [--index2_flag=\_I2\_] +```shell +python after.py [-d input_dir][-1 read1_file] [-2 read1_file] [-7 index1_file] [-5 index2_file] [-g good_output_folder] [-b bad_output_folder] [-f trim_front] [-t trim_tail] [-q qualified_quality_phred] [-l unqualified_base_limit] [-p poly_size_limit] [-a allow_mismatch_in_poly] [-n n_base_limit] [--debubble=on/off] [--debubble_dir=xxx] [--draw=on/off] [--read1_flag=_R1_] [--read2_flag=_R2_] [--index1_flag=_I1_] [--index2_flag=_I2_] +``` +Common options: +```shell + --version show program's version number and exit + -h, --help show this help message and exit +``` +File (name) options: +```shell -Options: - * --version show program's version number and exit - * -h, --help show this help message and exit - * -1 READ1_FILE, --read1_file=READ1_FILE + -1 READ1_FILE, --read1_file=READ1_FILE file name of read1, required. If input_dir is - specified, then this arg is ignored. - * -2 READ2_FILE, --read2_file=READ2_FILE + specified, then this arg is ignored. + -2 READ2_FILE, --read2_file=READ2_FILE file name of read2, if paired. If input_dir is - specified, then this arg is ignored. - * -7 INDEX1_FILE, --index1_file=INDEX1_FILE + specified, then this arg is ignored. + -7 INDEX1_FILE, --index1_file=INDEX1_FILE file name of 7' index. If input_dir is specified, then - this arg is ignored. - * -5 INDEX2_FILE, --index2_file=INDEX2_FILE + this arg is ignored. + -5 INDEX2_FILE, --index2_file=INDEX2_FILE file name of 5' index. If input_dir is specified, then - this arg is ignored. - * -g GOOD_OUTPUT_FOLDER, --good_output_folder=GOOD_OUTPUT_FOLDER + this arg is ignored. + -d INPUT_DIR, --input_dir=INPUT_DIR + the input dir to process automatically. If read1_file + are input_dir are not specified, then current dir (.) + is specified to input_dir + -g GOOD_OUTPUT_FOLDER, --good_output_folder=GOOD_OUTPUT_FOLDER the folder to store good reads, by default it is the - same folder contains read1 - * -b BAD_OUTPUT_FOLDER, --bad_output_folder=BAD_OUTPUT_FOLDER + same folder contains read1 + -b BAD_OUTPUT_FOLDER, --bad_output_folder=BAD_OUTPUT_FOLDER the folder to store bad reads, by default it is same - as good_output_folder - * -f TRIM_FRONT, --trim_front=TRIM_FRONT + as good_output_folder + --read1_flag=READ1_FLAG + specify the name flag of read1, default is _R1_, which + means a file with name *_R1_* is read1 file + --read2_flag=READ2_FLAG + specify the name flag of read2, default is _R2_, which + means a file with name *_R2_* is read2 file + --index1_flag=INDEX1_FLAG + specify the name flag of index1, default is _I1_, + which means a file with name *_I1_* is index2 file + --index2_flag=INDEX2_FLAG + specify the name flag of index2, default is _I2_, + which means a file with name *_I2_* is index2 file +``` +Filter options: +```shell + -f TRIM_FRONT, --trim_front=TRIM_FRONT number of bases to be trimmed in the head of read. -1 - means auto detect - * -t TRIM_TAIL, --trim_tail=TRIM_TAIL + means auto detect + -t TRIM_TAIL, --trim_tail=TRIM_TAIL number of bases to be trimmed in the tail of read. -1 - means auto detect - * -m MIN_QUALITY, --min_quality=MIN_QUALITY - if exists one base has quality < min_quality, then - this read/pair will be bad. Default 0 means do not - filter reads by the least quality - * -q QUALIFIED_QUALITY, --qualified_quality=QUALIFIED_QUALITY + means auto detect + -q QUALIFIED_QUALITY_PHRED, --qualified_quality_phred=QUALIFIED_QUALITY_PHRED the quality value that a base is qualifyed. Default 20 - means base quality >=Q20 is qualified. - * -l MAX_LOW_QUALITY, --max_low_quality=MAX_LOW_QUALITY - if exists more than maxlq bases that quality is lower - than qualified quality, then this read/pair is bad. - Default 0 means do not filter reads by low quality - base count - * -p POLY_MAX, --poly_max=POLY_MAX + means base quality >=Q20 is qualified. + -u UNQUALIFIED_BASE_LIMIT, --unqualified_base_limit=UNQUALIFIED_BASE_LIMIT + if exists more than unqualified_base_limit bases that + quality is lower than qualified quality, then this + read/pair is bad. Default 0 means do not filter reads + by low quality base count + -p POLY_SIZE_LIMIT, --poly_size_limit=POLY_SIZE_LIMIT if exists one polyX(polyG means GGGGGGGGG...), and its - length is >= poly_max, then this read/pair is bad. - Default is 40 - * -a ALLOW_POLY_MISMATCH, --allow_poly_mismatch=ALLOW_POLY_MISMATCH + length is >= poly_size_limit, then this read/pair is + bad. Default is 40 + -a ALLOW_MISMATCH_IN_POLY, --allow_mismatch_in_poly=ALLOW_MISMATCH_IN_POLY the count of allowed mismatches when evaluating - poly_X. Default 5 means disallow any mismatches - * -n MAX_N_COUNT, --max_n_count=MAX_N_COUNT + poly_X. Default 5 means disallow any mismatches + -n N_BASE_LIMIT, --n_base_limit=N_BASE_LIMIT if exists more than maxn bases have N, then this - read/pair is bad. Default is 5 - * -s MIN_SEQ_LEN, --min_seq_len=MIN_SEQ_LEN - if the trimmed read is shorter than min_seq_len, then - this read/pair is bad. Default is 35 - * -d INPUT_DIR, --input_dir=INPUT_DIR - the input dir to process automatically. If read1_file - are input_dir are not specified, then current dir (.) - is specified to input_dir - * --debubble=DEBUBBLE specify whether apply debubble algorithm to remove the - reads in the bubbles. Default is off - * --debubble_dir=DEBUBBLE_DIR + read/pair is bad. Default is 5 + -s SEQ_LEN_REQ, --seq_len_req=SEQ_LEN_REQ + if the trimmed read is shorter than seq_len_req, then + this read/pair is bad. Default is 35 +``` +Debubble options: +```shell + --debubble=DEBUBBLE specify whether apply debubble algorithm to remove the + reads in the bubbles. Default is off + --debubble_dir=DEBUBBLE_DIR specify the folder to store output of debubble - algorithm, default is debubble - * --draw=DRAW specify whether draw the pictures or not, when use - debubble or QC. Default is on - * --read1_flag=READ1_FLAG - specify the name flag of read1, default is _R1_, which - means a file with name *_R1_* is read1 file - * --read2_flag=READ2_FLAG - specify the name flag of read2, default is _R2_, which - means a file with name *_R2_* is read2 file - * --index1_flag=INDEX1_FLAG - specify the name flag of index1, default is _I1_, - which means a file with name *_I1_* is index2 file - * --index2_flag=INDEX2_FLAG - specify the name flag of index2, default is _I2_, - which means a file with name *_I2_* is index2 file + algorithm, default is debubble + --draw=DRAW specify whether draw the pictures or not, when use + debubble or QC. Default is on +``` +Barcoded sequencing options: +``` + --barcode=BARCODE specify whether deal with barcode sequencing files, default is on + --barcode_length=BARCODE_LENGTH + specify the designed length of barcode + --barcode_flag=BARCODE_FLAG + specify the name flag of a barcoded file, default is + barcode, which means a file with name *barcode* is a + barcoded file + --barcode=BARCODE specify whether deal with barcode sequencing files, + default is on, which means all files with barcode_flag + in filename will be treated as barcode sequencing + files +``` diff --git a/after.py b/after.py index cb7da5d..c3d50ad 100755 --- a/after.py +++ b/after.py @@ -10,8 +10,8 @@ from util import * def parseCommand(): - usage = "Automatic Filtering, Trimming, and Error Removing for Illumina fastq data(Illumina 1.8 or newer format, see http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm)\n\nFull command:\n%prog [-d input_dir][-1 read1_file] [-2 read1_file] [-7 index1_file] [-5 index2_file] [-g good_output_folder] [-b bad_output_folder] [-f trim_front] [-t trim_tail] [-m min_quality] [-q qualified_quality] [-l max_low_quality] [-p poly_max] [-a allow_poly_mismatch] [-n max_n_count] [--debubble=on/off] [--debubble_dir=xxx] [--draw=on/off] [--read1_flag=_R1_] [--read2_flag=_R2_] [--index1_flag=_I1_] [--index2_flag=_I2_] \n\nSimplest usage:\ncd to the folder containing your fastq data, run " - version = "%prog 1.1" + usage = "Automatic Filtering, Trimming, and Error Removing for Illumina fastq data(Illumina 1.8 or newer format, see http://support.illumina.com/help/SequencingAnalysisWorkflow/Content/Vault/Informatics/Sequencing_Analysis/CASAVA/swSEQ_mCA_FASTQFiles.htm)\n\nFull command:\n%prog [-d input_dir][-1 read1_file] [-2 read1_file] [-7 index1_file] [-5 index2_file] [-g good_output_folder] [-b bad_output_folder] [-f trim_front] [-t trim_tail] [-q qualified_quality_phred] [-l unqualified_base_limit] [-p poly_size_limit] [-a allow_mismatch_in_poly] [-n n_base_limit] [--debubble=on/off] [--debubble_dir=xxx] [--draw=on/off] [--read1_flag=_R1_] [--read2_flag=_R2_] [--index1_flag=_I1_] [--index2_flag=_I2_] \n\nSimplest usage:\ncd to the folder containing your fastq data, run " + version = "%prog 0.1.0" parser = OptionParser(usage = usage, version = version) parser.add_option("-1", "--read1_file", dest = "read1_file", help = "file name of read1, required. If input_dir is specified, then this arg is ignored.") @@ -21,46 +21,44 @@ def parseCommand(): help = "file name of 7' index. If input_dir is specified, then this arg is ignored.") parser.add_option("-5", "--index2_file", dest = "index2_file", default = None, help = "file name of 5' index. If input_dir is specified, then this arg is ignored.") + parser.add_option("-d", "--input_dir", dest = "input_dir", default = None, + help = "the input dir to process automatically. If read1_file are input_dir are not specified, then current dir (.) is specified to input_dir") parser.add_option("-g", "--good_output_folder", dest = "good_output_folder", default = "good", help = "the folder to store good reads, by default it is the same folder contains read1") parser.add_option("-b", "--bad_output_folder", dest = "bad_output_folder", default = "bad", help = "the folder to store bad reads, by default it is same as good_output_folder") + parser.add_option("", "--read1_flag", dest = "read1_flag", default = "_R1_", + help = "specify the name flag of read1, default is _R1_, which means a file with name *_R1_* is read1 file") + parser.add_option("", "--read2_flag", dest = "read2_flag", default = "_R2_", + help = "specify the name flag of read2, default is _R2_, which means a file with name *_R2_* is read2 file") + parser.add_option("", "--index1_flag", dest = "index1_flag", default = "_I1_", + help = "specify the name flag of index1, default is _I1_, which means a file with name *_I1_* is index2 file") + parser.add_option("", "--index2_flag", dest = "index2_flag", default = "_I2_", + help = "specify the name flag of index2, default is _I2_, which means a file with name *_I2_* is index2 file") parser.add_option("-f", "--trim_front", dest = "trim_front", default = -1, type = "int", help = "number of bases to be trimmed in the head of read. -1 means auto detect") parser.add_option("-t", "--trim_tail", dest = "trim_tail", default = -1, type = "int", help = "number of bases to be trimmed in the tail of read. -1 means auto detect") - parser.add_option("-m", "--min_quality", dest = "min_quality", default = 0, type = "int", - help = "if exists one base has quality < min_quality, then this read/pair will be bad. Default 0 means do not filter reads by the least quality") - parser.add_option("-q", "--qualified_quality", dest = "qualified_quality", default = 20, type = "int", - help = "the quality value that a base is qualifyed. Default 20 means base quality >=Q20 is qualified.") - parser.add_option("-l", "--max_low_quality", dest = "max_low_quality", default = 0, type = "int", - help = "if exists more than maxlq bases that quality is lower than qualified quality, then this read/pair is bad. Default 0 means do not filter reads by low quality base count") - parser.add_option("-p", "--poly_max", dest = "poly_max", default = 40, type = "int", - help = "if exists one polyX(polyG means GGGGGGGGG...), and its length is >= poly_max, then this read/pair is bad. Default is 40") - parser.add_option("-a", "--allow_poly_mismatch", dest = "allow_poly_mismatch", default = 5, type = "int", + parser.add_option("-q", "--qualified_quality_phred", dest = "qualified_quality_phred", default = 20, type = "int", + help = "the quality value that a base is qualifyed. Default 20 means phred base quality >=Q20 is qualified.") + parser.add_option("-u", "--unqualified_base_limit", dest = "unqualified_base_limit", default = 0, type = "int", + help = "if exists more than unqualified_base_limit bases that quality is lower than qualified quality, then this read/pair is bad. Default 0 means do not filter reads by low quality base count") + parser.add_option("-p", "--poly_size_limit", dest = "poly_size_limit", default = 40, type = "int", + help = "if exists one polyX(polyG means GGGGGGGGG...), and its length is >= poly_size_limit, then this read/pair is bad. Default is 40") + parser.add_option("-a", "--allow_mismatch_in_poly", dest = "allow_mismatch_in_poly", default = 5, type = "int", help = "the count of allowed mismatches when evaluating poly_X. Default 5 means disallow any mismatches") - parser.add_option("-n", "--max_n_count", dest = "max_n_count", default = 5, type = "int", + parser.add_option("-n", "--n_base_limit", dest = "n_base_limit", default = 5, type = "int", help = "if exists more than maxn bases have N, then this read/pair is bad. Default is 5") - parser.add_option("-s", "--min_seq_len", dest = "min_seq_len", default = 35, type = "int", - help = "if the trimmed read is shorter than min_seq_len, then this read/pair is bad. Default is 35") - parser.add_option("-d", "--input_dir", dest = "input_dir", default = None, - help = "the input dir to process automatically. If read1_file are input_dir are not specified, then current dir (.) is specified to input_dir") + parser.add_option("-s", "--seq_len_req", dest = "seq_len_req", default = 35, type = "int", + help = "if the trimmed read is shorter than seq_len_req, then this read/pair is bad. Default is 35") parser.add_option("", "--debubble", dest = "debubble", default = "off", help = "specify whether apply debubble algorithm to remove the reads in the bubbles. Default is off") parser.add_option("", "--debubble_dir", dest = "debubble_dir", default = "debubble", help = "specify the folder to store output of debubble algorithm, default is debubble") parser.add_option("", "--draw", dest = "draw", default = "on", help = "specify whether draw the pictures or not, when use debubble or QC. Default is on") - parser.add_option("", "--read1_flag", dest = "read1_flag", default = "_R1_", - help = "specify the name flag of read1, default is _R1_, which means a file with name *_R1_* is read1 file") - parser.add_option("", "--read2_flag", dest = "read2_flag", default = "_R2_", - help = "specify the name flag of read2, default is _R2_, which means a file with name *_R2_* is read2 file") - parser.add_option("", "--index1_flag", dest = "index1_flag", default = "_I1_", - help = "specify the name flag of index1, default is _I1_, which means a file with name *_I1_* is index2 file") - parser.add_option("", "--index2_flag", dest = "index2_flag", default = "_I2_", - help = "specify the name flag of index2, default is _I2_, which means a file with name *_I2_* is index2 file") parser.add_option("", "--barcode", dest = "barcode", default = "on", - help = "specify whether deal with barcode sequencing files") + help = "specify whether deal with barcode sequencing files, default is on, which means all files with barcode_flag in filename will be treated as barcode sequencing files") parser.add_option("", "--barcode_length", dest = "barcode_length", default = 12, type="int", help = "specify the designed length of barcode") parser.add_option("", "--barcode_flag", dest = "barcode_flag", default = "barcode", diff --git a/barcodeprocesser.py b/barcodeprocesser.py index 5b21933..db00313 100644 --- a/barcodeprocesser.py +++ b/barcodeprocesser.py @@ -80,16 +80,3 @@ def moveAndTrimPair(read1, read2, barcode1len, barcode2len, verify): moveBarcodeToName(read1, barcode1len, verify) moveBarcodeToName(read2, barcode2len, verify) cleanBarcodeTail(read1, read2, readStart1, readStart2) - -#test -if __name__ == "__main__": - read1 = ['@NS500713:17:HFG2YBGXX:1:11101:10560:1202 1:N:0:CGAGTA','ATAAAAAAAACACAGTATGGCAAAACCCCATCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGGCTACTGAAATTTCCCGGG','+','AAAAAEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEAEEEEEE######EEEEEEEEEEE'] - read2 = ['@NS500713:17:HFG2YBGXX:1:11101:10560:1202 2:N:0:CGAGTA','CCCGGGAAATTTCAGTAGCCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTTGCCATACTGTGTTTTTTTTAT','+','AAAAAEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEAEEEEEE######EEEEEEEEEEE'] - verify = 'CAGTA' - barcodeLen = 12 - barcode1len = detectBarcode(read1[1], barcodeLen, verify) - barcode2len = detectBarcode(read2[1], barcodeLen, verify) - print barcode1len, barcode2len - moveAndTrimPair(read1, read2, barcode1len, barcode2len, verify) - print(read1) - print(read2) \ No newline at end of file diff --git a/bubbledetector.py b/bubbledetector.py index d0d224e..ca90482 100644 --- a/bubbledetector.py +++ b/bubbledetector.py @@ -389,6 +389,3 @@ def main(): bd = BubbleDetector(xmax, ymax, xmin,ymin, True) bd.loadRecordsFromFile(argv) bd.detect() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/circledetector.py b/circledetector.py index e1e9ed5..673a27b 100644 --- a/circledetector.py +++ b/circledetector.py @@ -213,7 +213,3 @@ def main(): cd = CircleDetector([], 10.0, 10.0) c = [5, -0.5, 1.0] print(cd.circleAreaInTile(c, 10.0, 10.0)) - -#test -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/debubble.py b/debubble.py index 7193701..1682f39 100755 --- a/debubble.py +++ b/debubble.py @@ -84,6 +84,3 @@ def main(): time2 = time.time() print('Time used: ' + str(time2-time1)) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/preprocesser.py b/preprocesser.py index d67b50b..f32613b 100755 --- a/preprocesser.py +++ b/preprocesser.py @@ -299,47 +299,37 @@ def run(self): continue #filter sequence length - if len(r1[1]) 0: - poly1 = hasPolyX(r1[1], self.options.poly_max, self.options.allow_poly_mismatch) + if self.options.poly_size_limit > 0: + poly1 = hasPolyX(r1[1], self.options.poly_size_limit, self.options.allow_mismatch_in_poly) poly2 = None if r2!=None: - poly2 = hasPolyX(r2[1], self.options.poly_max, self.options.allow_poly_mismatch) + poly2 = hasPolyX(r2[1], self.options.poly_size_limit, self.options.allow_mismatch_in_poly) if poly1!=None or poly2!=None: writeReads(r1, r2, i1, i2, bad_read1_file, bad_read2_file, bad_index1_file, bad_index2_file, "BADPOL") continue - #check min quality - if self.options.min_quality > 0: - minQual1 = minQuality(r1) - minQual2 = 255 - if r2!=None: - minQual2 = minQuality(r2) - if minQual1 < self.options.min_quality or minQual2 < self.options.min_quality: - writeReads(r1, r2, i1, i2, bad_read1_file, bad_read2_file, bad_index1_file, bad_index2_file, "BADMIN") - continue - #check low quality count - if self.options.max_low_quality > 0: - lowQual1 = lowQualityNum(r1, self.options.qualified_quality) + if self.options.unqualified_base_limit > 0: + lowQual1 = lowQualityNum(r1, self.options.qualified_quality_phred) lowQual2 = 0 if r2!=None: - lowQual2 = lowQualityNum(r2, self.options.qualified_quality) - if lowQual1 > self.options.max_low_quality or lowQual1 > self.options.max_low_quality: + lowQual2 = lowQualityNum(r2, self.options.qualified_quality_phred) + if lowQual1 > self.options.unqualified_base_limit or lowQual1 > self.options.unqualified_base_limit: writeReads(r1, r2, i1, i2, bad_read1_file, bad_read2_file, bad_index1_file, bad_index2_file, "BADLQC") continue #check N number - if self.options.max_n_count > 0: + if self.options.n_base_limit > 0: nNum1 = nNumber(r1) nNum2 = 0 if r2!=None: nNum2 = nNumber(r2) - if nNum1 > self.options.max_n_count or nNum2 > self.options.max_n_count: + if nNum1 > self.options.n_base_limit or nNum2 > self.options.n_base_limit: writeReads(r1, r2, i1, i2, bad_read1_file, bad_read2_file, bad_index1_file, bad_index2_file, "BADNCT") continue @@ -359,7 +349,3 @@ def run(self): good_index2_file.flush() bad_index2_file.flush() -#test -if __name__ == "__main__": - seq = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAATGAAAAAAAAAAAAAAAAAAAAA" - print(hasPolyX(seq, 40, 3)) diff --git a/trimmer.py b/trimmer.py index 9c7790a..b1f0707 100644 --- a/trimmer.py +++ b/trimmer.py @@ -135,8 +135,4 @@ def calcTrimLength(self, filename): trimTail = min(readLen*0.05,readLen-1-trimTail) return (int(trimFront), int(trimTail)) - -#test -if __name__ == "__main__": - trimmer = Trimmer() - print(trimmer.calcTrimLength(sys.argv[1])) + diff --git a/util.py b/util.py index cb2e055..aa887a9 100644 --- a/util.py +++ b/util.py @@ -35,7 +35,3 @@ def editDistance(s1, s2): tbl[i,j] = min(tbl[i, j-1]+1, tbl[i-1, j]+1, tbl[i-1, j-1]+cost) return tbl[i,j] - -#test -if __name__ == "__main__": - print editDistance("ATCGAAGTG", "ATCGCAGTG") \ No newline at end of file