Skip to content

Commit 1b4b589

Browse files
updated CPM with new features: parallelisation, customisation of groups
1 parent 499ae52 commit 1b4b589

File tree

1 file changed

+125
-10
lines changed

1 file changed

+125
-10
lines changed

CliquePercolationMethod.py

Lines changed: 125 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,149 @@
1-
#!/usr/bin/env python3
2-
# -*- coding: utf-8 -*-
3-
from igraph import *
1+
from igraph import Graph, summary, plot
2+
from functools import partial
3+
from multiprocessing.pool import Pool
4+
import time
45

5-
def clique_percolation_method(graph, k = 3):
6+
7+
def clique_percolation_method(graph, k = 3, workers = 1, attribute = None, verbose = False):
8+
"""
9+
Function that implements the Clique Percolation Method (CPM) algorithm for
10+
finding overlapping communities within networks, introduced by Palla et al. (2005).
11+
12+
Parameters
13+
----------
14+
graph : igraph object
15+
The igraph object containing the graph.
16+
k : int, optional
17+
Size of the clique. The default is 3.
18+
workers : int, optional
19+
Number of threads to allocate for running this algorithm. The default is 1.
20+
attribute : str, optional
21+
The attribute of the vertices to use for displaying nodes within the communities.
22+
For displaying purposes, if vertices have names, the latter can be quite handy to figure out which node belongs to a certain community.
23+
If no attribute is given, the algorithm will display their id. The default is None.
24+
verbose : bool, optional
25+
If set to True it shows status updates. The default is False.
26+
27+
Raises
28+
------
29+
TypeError
30+
If the types of the variables passed are incorrect.
31+
ValueError
32+
If the values of the variables passed are incorrect.
33+
34+
Returns
35+
-------
36+
list
37+
Contains lists of communities (lists).
38+
39+
"""
40+
if not isinstance(graph,Graph):
41+
raise TypeError("The object graph must be an instance of the igraph class")
42+
43+
if not isinstance(k,int):
44+
raise TypeError("Expecting size of cliques (k) to be an integer")
45+
46+
if not isinstance(workers,int):
47+
raise TypeError("Number of workers must be integer")
48+
49+
if workers < 1:
50+
raise ValueError("Expecting number of workers greater than or equal to 1")
51+
52+
if attribute is not None:
53+
if not isinstance(attribute,str):
54+
raise TypeError("Expecting attribute to be a string")
55+
56+
if attribute not in graph.vs.attributes():
57+
raise ValueError("Attribute {} in vertices does not exist".format(attribute))
58+
59+
if not isinstance(verbose,bool):
60+
raise TypeError("Field verbose must be set to either True or False")
61+
662
communities = list()
763

64+
if verbose:
65+
start_time = time.time()
66+
67+
# FINDING CLIQUES
868
cliques = graph.cliques(min=k, max=k)
969
num_cliques = len(cliques)
1070

71+
if verbose:
72+
print("Finished cliques --- %s seconds ---" % (time.time() - start_time))
73+
print("Cliques found %s" % (num_cliques))
74+
1175
set_cliques = [set(i) for i in cliques]
1276

77+
# FINDING CLIQUE GRAPH
78+
indices = list(range(num_cliques))
79+
1380
edge_list = list()
14-
for i in range(0,num_cliques):
15-
for j in range(i+1, num_cliques):
16-
if len(set_cliques[i].intersection(set_cliques[j])) == k-1:
17-
edge_list.append((i,j))
81+
minimum = k-1
82+
annotate = partial(partial_clique_graph, set_cliques=set_cliques, minimum=minimum, num_cliques=num_cliques)
83+
84+
pool = Pool(workers)
85+
edges = pool.map(annotate, indices)
86+
edge_list = [j for i in edges for j in i]
87+
88+
if verbose:
89+
print("Finished comparison cliques --- %s seconds ---" % (time.time() - start_time))
1890

1991
clique_graph = Graph(edge_list)
2092
clique_graph.vs["name"] = [i for i in range(0,num_cliques)]
2193

94+
# FINDING CONNECTED COMPONENTS IN THE GRAPH
2295
components = clique_graph.decompose()
2396

97+
# CREATING COMMUNITIES
2498
for component in components:
2599
members_list = [list(cliques[i["name"]]) for i in component.vs]
26100
this_community = [item for sublist in members_list for item in sublist]
27101
communities.append(list(set(this_community)))
28102

103+
if attribute is not None:
104+
communities_with_names = list()
105+
for community in communities:
106+
communities_with_names.append([graph.vs[element][attribute] for element in community])
107+
communities = communities_with_names
108+
109+
if verbose:
110+
print("Finished all --- %s seconds ---" % (time.time() - start_time))
111+
112+
for comm in communities:
113+
print(len(comm))
114+
29115
return communities
30-
31116

32117

118+
def partial_clique_graph(i, set_cliques, minimum, num_cliques):
119+
"""
120+
Function that supports the creation of the clique graph, the second stage of CPM.
121+
This function is detached from the main function since it is parallelised
122+
(based on the amout of workers).
123+
124+
Parameters
125+
----------
126+
i : integer
127+
The iterator for parallelisation.
128+
set_cliques : list(set)
129+
List containing all found cliques. Each clique is a set so it becomes easier to compare
130+
minimum : int
131+
Minimum overlapping between two cliques (size_of_cliques-1).
132+
num_cliques : int
133+
Number of cliques found in the graph.
134+
135+
Returns
136+
-------
137+
edge_list : TYPE
138+
DESCRIPTION.
139+
140+
"""
141+
edge_list = list()
142+
this_set = set_cliques[i]
143+
for j in range(i+1, num_cliques):
144+
if len(this_set.intersection(set_cliques[j])) == minimum:
145+
edge_list.append((i,j))
146+
return edge_list
33147

34148

35149
def test():
@@ -43,10 +157,11 @@ def test():
43157
for count, comm in enumerate(communities):
44158
print("{}: {}".format(count,[g.vs[i]["name"] for i in comm]))
45159

160+
46161
def test_karate():
47162
karate = Graph.Read_GraphML("karate.GraphML")
48163
summary(karate)
49-
communities = clique_percolation_method(karate,3)
164+
communities = clique_percolation_method(karate,3,attribute="name")
50165
print("Cliques:")
51166
for count, comm in enumerate(communities):
52167
print("{}: {}".format(count,comm))

0 commit comments

Comments
 (0)