-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsequencing_graph.py
More file actions
67 lines (54 loc) · 2.07 KB
/
Copy pathsequencing_graph.py
File metadata and controls
67 lines (54 loc) · 2.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def prefix(i):
return i[:-1]
def suffix(i):
return i[1:]
class Graph:
def __init__(self, nodes:set,edges:dict):
self.nodes = nodes
self.edges = {i:edges[i] for i in edges if len(edges[i])!=0}
def __str__(self):
output = ""
for so in self.edges:
if len(self.edges[so])!=0:
output += str(so) + " -> " + ",".join(map(str,self.edges[so])) + "\n"
return output.strip()
def __repr__(self):
return self.__str__()
def copy(self):
nodes = self.nodes.copy()
edges = {i:self.edges[i].copy() for i in self.edges}
return Graph(nodes,edges)
def add_edge(self,a,b):
if a in self.edges:
self.edges[a].add(b)
else:
self.edges[a] = set([b])
class Overlap_Graph(Graph):
def __init__(self, patterns):
nodes = set(patterns)
edges = {i:set([j for j in patterns if prefix(j)==suffix(i)])for i in patterns}
Graph.__init__(self,nodes,edges)
class DeBruijn_Graph(Graph):
def __init__(self, nodes,edges):
Graph.__init__(self,nodes,edges)
def text_to_debruijn(k,text):
raw_edges = [(text[i:i+k-1],text[i+1:i+k]) for i in range(len(text)-k+1)]
nodes = set([i for i,j in raw_edges])
edges = {i:set() for i in nodes}
for i,j in raw_edges:
edges[i].add(j)
return DeBruijn_Graph(nodes,edges)
def kmers_to_debruijn(kmers):
raw_edges = [(prefix(mer),suffix(mer)) for mer in kmers]
nodes = set([i for i,j in raw_edges]+[j for i,j in raw_edges])
edges = {i:set() for i in nodes}
for i,j in raw_edges:
edges[i].add(j)
return DeBruijn_Graph(nodes,edges)
def paired_kmers_to_debruijn(paired_kmers): # paired_kmers = [(a_1,b_1),(a_2,b_2),...]
raw_edges = [((prefix(mer[0]),prefix(mer[1])),(suffix(mer[0]),suffix(mer[1]))) for mer in paired_kmers]
nodes = set([i for i,j in raw_edges]+[j for i,j in raw_edges])
edges = {i:set() for i in nodes}
for i,j in raw_edges:
edges[i].add(j)
return DeBruijn_Graph(nodes,edges)