-
Notifications
You must be signed in to change notification settings - Fork 0
/
005. Computing GC Content.py
40 lines (34 loc) · 1.66 KB
/
005. Computing GC Content.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# This was a difficult problem, because I had to copy and paste each entry into the variable set as DNA_sample then I had a probelm with
# The newline characters
# Then finally I saw they wanted to the sixth signicant figure
# This is the sample taken from the notepad
DNA_sample = """GATTGCGGCAATAGTTATGGGGGTATACTCCGTCACCACGGCGCTGTACTCCATGCGTTC
CAAACCCACCGAAGTGCGAGTTTTGGTTAGTAGGATCGTATTGCGGGCCGGTAGTATTGA
AACGAAAGTGACACCAGTCCGCAGGTCCCACCCCCACGAACAACTCCCCCCCCCAGCAAA
TGCGATGTACTACTTGAAATTAAATGGCGGTCACTTTGCCCTACAGGATGACGGGTTACA
GGGCTGATAACGGCTTAAATTCTAAGTGAGCTGGGTGGCTTATTCTCAAAACACTAAACT
TGAGTAGACTCAACACTCTGAGGGATCTCCTGCCGCACCGATTTCGAGCATGTGGGGATT
AGGAGCTTTACAGGTTCGGGCGGCTTGTACATGACCAGGAGATGCAGAGGTGACGCGAGA
GTACGCAGTCCACATGACAAGCCAATCTCGTGATGGGTTGTCTACTGGATTATGCGAGCC
TTTTCATGAATCTGGACCAGGACCAGTTAATTAGGATGTAACGTCTGGGGTTCGGTGTGG
ACGCGATGGTTCTAGGCGGGCTGGGTTGTGTAACCTGGTCAGATACTATAATCCAACTTG
CCTTCTCAAGTTGCTCTTCGTGATCACTAGTCAGTTGAGTGCGACTAATCCAACTGGTGG
CTTGGATCTGGCGGCCAACACGCATGAGGCTCAGCGACCCTCTTCCCGCCGTAGCTGACG
AAGAGCGCTGGTGGCGCAGATGTTTCGCGCCCGACTTACCCGTGTATCGCCCTAAGCATT
CGTCTCGGAAACGAGGGAGTCGAAAAATAGGTGCCCGGAAGCTTTAGCATGCAAGTGGAA
AAAAGGCTCTTGTGAAGGTACCATAGTTAGGACCATAGCGAACACTGCGATACCAACCCC
CGAGTG"""
def calculate_gc_content(DNA_sample):
# Remove newline characters from the DNA sample
DNA_sample = DNA_sample.replace('\n', '')
gc_count = 0
total_bases = len(DNA_sample)
for base in DNA_sample:
if base in "GCgc":
gc_count += 1
gc_content = (gc_count / total_bases) * 100
return gc_content
gc_content = calculate_gc_content(DNA_sample)
# Format the output to three decimal places
# This Is for the sixth figure
print(f"{gc_content:.6f}%")