-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlocal_tree.py
More file actions
135 lines (116 loc) · 4.27 KB
/
local_tree.py
File metadata and controls
135 lines (116 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numpy as np
import tree
import sklearn.neighbors as sknn
def make_tree(vecs,vals,diffusion_time,approx_cluster_size):
diff_vals = vals**diffusion_time
diff_vecs = vecs.dot(np.diag(diff_vals))
cluster_list = []
centroids = diff_vecs
iters = 0
while len(centroids) != 1 and iters < 10:
#print len(cluster_list), len(centroids)
approx_size = min(approx_cluster_size,len(centroids)/2+1)
clusters, partition = cluster(centroids,approx_size)
cluster_list.append(clusters)
diff_vecs = centroids.dot(np.diag(diff_vals))
centroids = calc_centroids(diff_vecs,clusters)
iters += 1
return cluster_list
def construct_tree(cluster_list,n):
child_level = []
parent_level = []
for i in xrange(n):
child_level.append(tree.ClusterTreeNode([i]))
for clusters in cluster_list:
for cluster in clusters:
new_node = tree.ClusterTreeNode([])
for node in cluster:
child_level[node].assign_to_parent(new_node)
parent_level.append(new_node)
child_level = list(parent_level)
parent_level = []
return new_node
def estimate_radius(data,approx_cluster_size):
kp,dists = random_cover(data,n_neighbors=approx_cluster_size*2)
return np.median(dists) + 1e-10
def cluster(data,approx_cluster_size):
est_dist = estimate_radius(data,approx_cluster_size)
#print est_dist
key_points,distances = random_cover(data,radius=est_dist)
knn = sknn.NearestNeighbors(n_neighbors=2)
knn.fit(data[key_points,:])
t_assigns = knn.kneighbors(data)[1][:,0]
assigns = [key_points[x] for x in t_assigns]
clusters = []
for idx in np.unique(assigns):
clusters.append(np.where(np.array(assigns)==idx)[0])
return clusters, assigns
def calc_centroids(data,clusters):
return np.vstack([np.mean(data[x,:],axis=0) for x in clusters])
def diffusion_random_cover(A):
"""
A should be a row-stochastic nxn matrix.
Returns a set of centers such that diffusing these centers one step will
cover the entire dataset.
"""
n_points = np.shape(A)[0]
points_list = range(n_points)
centers = []
while points_list:
r_pt = np.random.choice(points_list)
centers.append(r_pt)
onestep = np.where(A[r_pt,:] > 0.0)[0]
for pt in onestep:
if pt in points_list:
points_list.remove(pt)
return np.array(centers)
def random_cover(data,**kwargs):
n_points = np.shape(data)[0]
if "radius" in kwargs:
base_radius = kwargs["radius"]
knn = sknn.NearestNeighbors(radius=base_radius)
knn.fit(data)
elif "distances" in kwargs:
if "n_neighbors" in kwargs:
n_neighbors = kwargs["n_neighbors"]
elif "radius" in kwargs:
base_radius = kwargs["radius"]
elif "n_neighbors" in kwargs:
n_neighbors = kwargs["n_neighbors"]
knn = sknn.NearestNeighbors(n_neighbors=n_neighbors)
knn.fit(data)
else:
print "need to specify a type of random cover."
return None
points_list = range(n_points)
key_points = []
distances = []
while points_list:
r_pt = np.random.choice(points_list)
key_points.append(r_pt)
if "radius" in kwargs:
nn = knn.radius_neighbors(data[r_pt,:],radius=base_radius)
distances.append(nn[0][0][-1])
for pt in nn[1][0]:
if pt in points_list:
points_list.remove(pt)
elif "distances" in kwargs:
if "radius" in kwargs:
nn = np.where(data[r_pt,:] < base_radius)[0]
else:
nn = np.argsort(data[r_pt,:])
added = 0
for pt in nn:
if pt in points_list:
points_list.remove(pt)
added += 1
if added == n_neighbors:
break
#print len(points_list)
elif "n_neighbors" in kwargs:
nn = knn.kneighbors(data[r_pt,:],n_neighbors=n_neighbors)
distances.append(nn[0][0][-1])
for pt in nn[1][0]:
if pt in points_list:
points_list.remove(pt)
return key_points,distances