-
Notifications
You must be signed in to change notification settings - Fork 2
/
NaiveExactMatching-MatchingArtificialReads.py
47 lines (40 loc) · 1.25 KB
/
NaiveExactMatching-MatchingArtificialReads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def readGenome(filename):
genome = ''
with open(filename, 'r') as f:
for line in f:
# ignore header line with genome information
if not line[0] == '>':
genome += line.rstrip()
return genome
genome = readGenome('phix.fa')
def naive(p, t):
occurrences = []
for i in range(len(t) - len(p) + 1):
match = True
for j in range(len(p)):
if t[i+j] != p[j]:
match = False
break
if match:
occurrences.append(i)
return occurrences
t = 'AGCTTAGATAGC'
p = 'AG'
naive(p, t)
import random
def generateReads(genome, numReads, readLen):
''' Generate reads from random positions in the given genome. '''
reads = []
for _ in range(numReads):
start = random.randint(0, len(genome)-readLen) - 1
reads.append(genome[start : start+readLen])
return reads
# Generate 100 reads of length 100
reads = generateReads(genome, 100, 100)
# Count how many reads match the genome exactly
numMatched = 0
for r in reads:
matches = naive(r, genome)
if len(matches) > 0:
numMatched += 1
print('%d / %d reads matched the genome exactly!' % (numMatched, len(reads)))