-
Notifications
You must be signed in to change notification settings - Fork 0
/
014. finding shared motif.py
49 lines (40 loc) · 1.46 KB
/
014. finding shared motif.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Remember to match username and file name to match your computer
# This change can be made on line 44 under file path
def read_fasta_file(file_path):
with open(file_path, 'r') as file:
fasta_data = file.readlines()
return fasta_data
def parse_fasta(data):
sequences = []
sequence = ""
for line in data:
line = line.strip()
if line.startswith(">"):
if sequence:
sequences.append(sequence)
sequence = ""
else:
sequence += line.strip()
if sequence:
sequences.append(sequence)
return sequences
def longest_common_substring(dna_strings):
if not dna_strings:
return ""
shortest_seq = min(dna_strings, key=len)
length = len(shortest_seq)
longest_substrings = set()
for i in range(length):
for j in range(i + 1, length + 1):
substring = shortest_seq[i:j]
if all(substring in seq for seq in dna_strings):
if not longest_substrings or len(substring) > len(next(iter(longest_substrings))):
longest_substrings = {substring}
elif len(substring) == len(next(iter(longest_substrings))):
longest_substrings.add(substring)
return list(longest_substrings)
file_path = r'C:/Users/username/Desktop/rosalind_lcsmT.txt'
data = read_fasta_file(file_path)
dna_strings = parse_fasta(data)
result = longest_common_substring(dna_strings)
print(result)