-
Notifications
You must be signed in to change notification settings - Fork 4
/
BedGene.py
executable file
·89 lines (82 loc) · 3.25 KB
/
BedGene.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
from Interval import Interval
#=========================================================================
# Attributes:
# ID : name of gene
# chr : string (chromosome name)
# strand : string ("+" or "-", or "." if unknown)
# CDS : array of Interval
# UTR : array of Interval
# exons : array of Interval : includes both CDS and UTR
# Instance Methods:
# gene=BedGene(chr,strand)
# interval=gene.getInterval()
# gene.addCDS(Interval(begin,end))
# gene.addUTR(Interval(begin,end))
# gene.addExon(Interval(begin,end))
# gene.coalesce() # combines UTR and CDS elements into exons; sorts by coord
# Class Methods:
#=========================================================================
class BedGene:
"""BedGene"""
def __init__(self,ID,chr,strand):
self.ID=ID
self.exons=[]
self.chr=chr
self.strand=strand
self.CDS=[]
self.UTR=[]
self.exons=[]
def getInterval(self):
cdsInterval=self.getInterval_array(self.CDS)
utrInterval=self.getInterval_array(self.UTR)
exonInterval=self.getInterval_array(self.exons)
begin=cdsInterval.begin
end=cdsInterval.begin
if(utrInterval.begin):
if(not begin or utrInterval.begin<begin):
begin=utrInterval.begin
if(not end or utrInterval.end>end):
end=utrInterval.end
if(exonInterval.begin):
if(not begin or exonInterval.begin<begin):
begin=exonInterval.begin
if(not end or exonInterval.end>end):
end=exonInterval.end
return Interval(begin,end)
def getInterval_array(self,array):
begin=end=None
for interval in array:
if(not begin or interval.begin<begin): begin=interval.begin
if(not end or interval.end>end): end=interval.end
return Interval(begin,end)
def addCDS(self,interval):
self.CDS.append(interval)
def addUTR(self,interval):
self.UTR.append(interval)
def addExon(self,interval):
self.exons.append(interval)
def coalesce(self):
exons=self.exons=[]
for cds in self.CDS: exons.append(cds.clone())
for utr in self.UTR:
added=False
for exon in exons:
if(utr.begin<exon.begin and utr.end>=exon.begin):
exon.begin=utr.begin
added=True
break
elif(utr.end>exon.end and utr.begin<=exon.end):
exon.end=utr.end
added=True
break
if(not added): exons.append(utr)
exons.sort(key=lambda exon: exon.begin)