Skip to content

Commit 1d55e68

Browse files
committed
minor changes
1 parent e5aa272 commit 1d55e68

3 files changed

Lines changed: 97 additions & 107 deletions

File tree

Centering.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"""
88

99
import numpy as np
10-
from scipy.spatial.distance import cosine
10+
from hub_toolbox import Distances as htd
1111

1212
class Centering(object):
1313
"""Transform data (in vector space) by various 'centering' approaches."""
@@ -42,7 +42,6 @@ def weighted_centering(self, gamma:float):
4242
def localized_centering(self, kappa:int, gamma:float = 1):
4343
"""Perform localized centering."""
4444
# TODO CHECK CORRECTNESS!!
45-
from hub_toolbox.HubnessAnalysis import cosine_distance
4645

4746
if kappa == None:
4847
kappa = 20
@@ -53,23 +52,24 @@ def localized_centering(self, kappa:int, gamma:float = 1):
5352
v = self.vectors / np.sqrt((self.vectors ** 2).sum(-1))[..., np.newaxis]
5453

5554
# for unit vectors it holds inner() == cosine()
56-
sim = -(cosine_distance(v) - 1)
55+
sim = -(htd.cosine_distance(v) - 1)
5756
n = sim.shape[0]
5857
local_affinity = np.zeros(n)
5958
for i in range(n):
6059
x = v[i]
6160
sim_i = sim[i, :]
6261
#TODO randomization
6362
nn = np.argsort(sim_i)[::-1][1 : kappa+1]
64-
c_kappa_x = np.mean(v[nn], 0)
65-
#local_affinity[i] = np.inner(x, c_kappa_x)
66-
local_affinity[i] = cosine(x, c_kappa_x)
63+
c_kappa_x = np.mean(v[nn], 0)
64+
# c_kappa_x has not unit length in general
65+
local_affinity[i] = np.inner(x, c_kappa_x)
66+
#local_affinity[i] = cosine(x, c_kappa_x)
6767
sim_lcent = sim - (local_affinity ** gamma)
6868
return sim_lcent
6969

7070
if __name__ == '__main__':
71-
vectors = np.arange(1200).reshape(30,40)
71+
vectors = np.arange(12).reshape(3,4)
7272
c = Centering(vectors)
73-
c.centering()
74-
c.weighted_centering(0.5)
75-
c.localized_centering(20)
73+
print("Centering: ............. {}".format(c.centering()))
74+
print("Weighted centering: .... {}".format(c.weighted_centering(0.4)))
75+
print("Localized centering: ... {}".format(c.localized_centering(2)))

HubnessAnalysis.py

Lines changed: 85 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from hub_toolbox.LocalScaling import LocalScaling
2424
from hub_toolbox.SharedNN import SharedNN
2525
from hub_toolbox.Centering import Centering
26+
from hub_toolbox import Distances as htd
2627

2728

2829
class HubnessAnalysis():
@@ -57,88 +58,92 @@ def __init__(self, D = None, classes = None, vectors = None):
5758
self.haveVectors = True
5859
self.n = len(self.D)
5960

60-
def analyse_hubness(self):
61+
def analyse_hubness(self, origData=True, mp=True, mp_gauss=False, \
62+
mp_gaussi=True, mp_gammai=True, ls=True, snn=True, \
63+
cent=True, wcent=True, wcent_g=0.4, \
64+
lcent=True, lcent_k=40, lcent_g=1.4):
6165
"""Analyse hubness in original data and rescaled distances.
6266
63-
Rescale algorithms: Mutual Proximity (empiric),
64-
Local Scaling, Shared Nearest Neighbors"""
67+
Use boolean parameters to choose which analyses to perform.
68+
Rescale algorithms: Mutual Proximity (empiric, gaussian, independent
69+
gaussian, independent gamma), Local Scaling, Shared Nearest Neighbors,
70+
Centering, Weighted Centering, Localized Centering"""
6571

6672
print()
6773
print("Hubness Analysis")
6874

69-
#"""
70-
# Hubness in original data
71-
hubness = Hubness(self.D)
72-
# Get hubness and n-occurence (slice omits elem 1, i.e. kNN)
73-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
74-
self.print_results('ORIGINAL DATA', self.D, Sn5, Nk5, True)
75-
#"""
76-
77-
# Hubness in empiric mutual proximity distance space
78-
mut_prox = MutualProximity(self.D)
79-
Dn = mut_prox.calculate_mutual_proximity(Distribution.empiric)
80-
hubness = Hubness(Dn)
81-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
82-
self.print_results('MUTUAL PROXIMITY (Empiric/Slow)', Dn, Sn5, Nk5)
83-
"""
84-
# Hubness in mutual proximity distance space, Gaussian model
85-
Dn = mut_prox.calculate_mutual_proximity(Distribution.gauss)
86-
hubness = Hubness(Dn)
87-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
88-
self.print_results('MUTUAL PROXIMITY (Gaussian)', Dn, Sn5, Nk5)
89-
"""
90-
# Hubness in mutual proximity distance space, independent Gaussians
91-
Dn = mut_prox.calculate_mutual_proximity(Distribution.gaussi)
92-
hubness = Hubness(Dn)
93-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
94-
self.print_results('MUTUAL PROXIMITY (Independent Gaussians)', \
95-
Dn, Sn5, Nk5)
96-
97-
# Hubness in mutual proximity distance space, independent Gamma distr.
98-
Dn = mut_prox.calculate_mutual_proximity(Distribution.gammai)
99-
hubness = Hubness(Dn)
100-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
101-
self.print_results('MUTUAL PROXIMITY (Independent Gamma)', Dn, Sn5, Nk5)
102-
103-
# Hubness in local scaling distance space
104-
ls = LocalScaling(self.D, 10, 'original')
105-
Dn = ls.perform_local_scaling()
106-
hubness = Hubness(Dn)
107-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
108-
self.print_results('LOCAL SCALING (Original, k=10)', Dn, Sn5, Nk5)
109-
110-
# Hubness in shared nearest neighbors space
111-
snn = SharedNN(self.D, 10)
112-
Dn = snn.perform_snn()
113-
hubness = Hubness(Dn)
114-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
115-
self.print_results('SHARED NEAREST NEIGHBORS (k=10)', Dn, Sn5, Nk5)
116-
117-
# Hubness after centering
118-
vectors = self.load_dexter(rawData=True)
119-
cent = Centering(vectors)
120-
D_cent = cosine_distance(cent.centering())
121-
hubness = Hubness(D_cent)
122-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
123-
self.print_results('CENTERING', D_cent, Sn5, Nk5)
124-
125-
126-
# Hubness after weighted centering
127-
gamma = 0.4
128-
D_wcent = cosine_distance(cent.weighted_centering(gamma))
129-
hubness = Hubness(D_wcent)
130-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
131-
self.print_results('WEIGHTED CENTERING (gamma={})'.format(gamma), \
132-
D_wcent, Sn5, Nk5)
133-
134-
# Hubness after localized centering
135-
D_lcent = 1 - cent.localized_centering(10, 1)
136-
hubness = Hubness(D_lcent)
137-
Sn5, Nk5 = hubness.calculate_hubness()[::2]
138-
self.print_results('LOCALIZED CENTERING', D_lcent, Sn5, Nk5)
139-
140-
141-
75+
if origData:
76+
# Hubness in original data
77+
hubness = Hubness(self.D)
78+
# Get hubness and n-occurence (slice omits elem 1, i.e. kNN)
79+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
80+
self.print_results('ORIGINAL DATA', self.D, Sn5, Nk5, True)
81+
if mp:
82+
# Hubness in empiric mutual proximity distance space
83+
mut_prox = MutualProximity(self.D)
84+
Dn = mut_prox.calculate_mutual_proximity(Distribution.empiric)
85+
hubness = Hubness(Dn)
86+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
87+
self.print_results('MUTUAL PROXIMITY (Empiric/Slow)', Dn, Sn5, Nk5)
88+
if mp_gauss:
89+
# Hubness in mutual proximity distance space, Gaussian model
90+
Dn = mut_prox.calculate_mutual_proximity(Distribution.gauss)
91+
hubness = Hubness(Dn)
92+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
93+
self.print_results('MUTUAL PROXIMITY (Gaussian)', Dn, Sn5, Nk5)
94+
if mp_gaussi:
95+
# Hubness in mutual proximity distance space, independent Gaussians
96+
Dn = mut_prox.calculate_mutual_proximity(Distribution.gaussi)
97+
hubness = Hubness(Dn)
98+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
99+
self.print_results('MUTUAL PROXIMITY (Independent Gaussians)', \
100+
Dn, Sn5, Nk5)
101+
if mp_gammai:
102+
# Hubness in mutual proximity distance space, indep. Gamma distr.
103+
Dn = mut_prox.calculate_mutual_proximity(Distribution.gammai)
104+
hubness = Hubness(Dn)
105+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
106+
self.print_results('MUTUAL PROXIMITY (Independent Gamma)', \
107+
Dn, Sn5, Nk5)
108+
if ls:
109+
# Hubness in local scaling distance space
110+
ls = LocalScaling(self.D, 10, 'original')
111+
Dn = ls.perform_local_scaling()
112+
hubness = Hubness(Dn)
113+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
114+
self.print_results('LOCAL SCALING (Original, k=10)', Dn, Sn5, Nk5)
115+
if snn:
116+
# Hubness in shared nearest neighbors space
117+
snn = SharedNN(self.D, 10)
118+
Dn = snn.perform_snn()
119+
hubness = Hubness(Dn)
120+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
121+
self.print_results('SHARED NEAREST NEIGHBORS (k=10)', Dn, Sn5, Nk5)
122+
if cent or wcent or lcent:
123+
cent = Centering(self.vectors)
124+
if cent:
125+
# Hubness after centering
126+
D_cent = htd.cosine_distance(cent.centering())
127+
hubness = Hubness(D_cent)
128+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
129+
self.print_results('CENTERING', D_cent, Sn5, Nk5)
130+
if wcent:
131+
# Hubness after weighted centering
132+
D_wcent = htd.cosine_distance(cent.weighted_centering(wcent_g))
133+
hubness = Hubness(D_wcent)
134+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
135+
self.print_results('WEIGHTED CENTERING (gamma={})'.format(\
136+
wcent_g), D_wcent, Sn5, Nk5)
137+
if lcent:
138+
# Hubness after localized centering
139+
D_lcent = 1 - cent.localized_centering(kappa=lcent_k, \
140+
gamma=lcent_g)
141+
hubness = Hubness(D_lcent)
142+
Sn5, Nk5 = hubness.calculate_hubness()[::2]
143+
self.print_results(\
144+
'LOCALIZED CENTERING (k={}, gamma={})'.format(\
145+
lcent_k, lcent_g), D_lcent, Sn5, Nk5)
146+
142147
def print_results(self, heading : str, distances, Sn5 : float, Nk5 : float,
143148
calc_intrinsic_dimensionality : bool = False):
144149
"""Print the results of a hubness analysis."""
@@ -175,7 +180,7 @@ def print_results(self, heading : str, distances, Sn5 : float, Nk5 : float,
175180
print('original dimensionality : No vectors given')
176181
print('intrinsic dimensionality estimate : No vectors given')
177182

178-
def load_dexter(self, rawData = False):
183+
def load_dexter(self):
179184
"""Load the example data set (dexter)."""
180185

181186
print('\nNO PARAMETERS GIVEN! Loading & evaluating DEXTER data set.\n');
@@ -210,25 +215,10 @@ def load_dexter(self, rawData = False):
210215
vectors[row][int(col)-1] = int(val)
211216
row += 1
212217

213-
if rawData:
214-
return vectors
215-
else:
216-
# Calc distance
217-
D = cosine_distance(vectors)
218-
return D, classes, vectors
218+
# Calc distance
219+
D = htd.cosine_distance(vectors)
220+
return D, classes, vectors
219221

220-
def cosine_distance(x):
221-
"""Calculate the cosine distance."""
222-
223-
xn = np.sqrt(np.sum(x**2, 1))
224-
x = x / np.tile(xn[:, np.newaxis], np.size(x, 1))
225-
D = 1 - np.dot(x, x.T )
226-
#np.clip(D, 0, np.finfo(np.float64).max, out=D) # clip max set to MaxFloat
227-
D[D<0] = 0
228-
D = np.triu(D, 0) + np.triu(D, 0).T
229-
230-
return D
231-
232222
if __name__=="__main__":
233223
hub = HubnessAnalysis()
234224
hub.analyse_hubness()

MutualProximity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def __init__(self, D):
5353
self.D = np.copy(D)
5454

5555
def calculate_mutual_proximity(self, distrType=None):
56-
"""Applies MP on a distance matrix."""
56+
"""Apply MP on a distance matrix."""
5757

5858
if distrType is None:
5959
print("No Mutual Proximity type given. Using: Distribution.empiric")
@@ -109,7 +109,7 @@ def mp_empiric(self):
109109
return Dmp # CHECK: max matlab-numpy difference: 0.0
110110

111111
def mp_gauss(self):
112-
"""Compute Mutual Proximity distances with Gaussian model (really slow)."""
112+
"""Compute Mutual Proximity distances with Gaussian model (very slow)."""
113113

114114
np.fill_diagonal(self.D, 0)
115115
mu = np.mean(self.D, 0)

0 commit comments

Comments
 (0)