|
23 | 23 | from hub_toolbox.LocalScaling import LocalScaling |
24 | 24 | from hub_toolbox.SharedNN import SharedNN |
25 | 25 | from hub_toolbox.Centering import Centering |
| 26 | +from hub_toolbox import Distances as htd |
26 | 27 |
|
27 | 28 |
|
28 | 29 | class HubnessAnalysis(): |
@@ -57,88 +58,92 @@ def __init__(self, D = None, classes = None, vectors = None): |
57 | 58 | self.haveVectors = True |
58 | 59 | self.n = len(self.D) |
59 | 60 |
|
60 | | - def analyse_hubness(self): |
| 61 | + def analyse_hubness(self, origData=True, mp=True, mp_gauss=False, \ |
| 62 | + mp_gaussi=True, mp_gammai=True, ls=True, snn=True, \ |
| 63 | + cent=True, wcent=True, wcent_g=0.4, \ |
| 64 | + lcent=True, lcent_k=40, lcent_g=1.4): |
61 | 65 | """Analyse hubness in original data and rescaled distances. |
62 | 66 | |
63 | | - Rescale algorithms: Mutual Proximity (empiric), |
64 | | - Local Scaling, Shared Nearest Neighbors""" |
| 67 | + Use boolean parameters to choose which analyses to perform. |
| 68 | + Rescale algorithms: Mutual Proximity (empiric, gaussian, independent |
| 69 | + gaussian, independent gamma), Local Scaling, Shared Nearest Neighbors, |
| 70 | + Centering, Weighted Centering, Localized Centering""" |
65 | 71 |
|
66 | 72 | print() |
67 | 73 | print("Hubness Analysis") |
68 | 74 |
|
69 | | - #""" |
70 | | - # Hubness in original data |
71 | | - hubness = Hubness(self.D) |
72 | | - # Get hubness and n-occurence (slice omits elem 1, i.e. kNN) |
73 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
74 | | - self.print_results('ORIGINAL DATA', self.D, Sn5, Nk5, True) |
75 | | - #""" |
76 | | - |
77 | | - # Hubness in empiric mutual proximity distance space |
78 | | - mut_prox = MutualProximity(self.D) |
79 | | - Dn = mut_prox.calculate_mutual_proximity(Distribution.empiric) |
80 | | - hubness = Hubness(Dn) |
81 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
82 | | - self.print_results('MUTUAL PROXIMITY (Empiric/Slow)', Dn, Sn5, Nk5) |
83 | | - """ |
84 | | - # Hubness in mutual proximity distance space, Gaussian model |
85 | | - Dn = mut_prox.calculate_mutual_proximity(Distribution.gauss) |
86 | | - hubness = Hubness(Dn) |
87 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
88 | | - self.print_results('MUTUAL PROXIMITY (Gaussian)', Dn, Sn5, Nk5) |
89 | | - """ |
90 | | - # Hubness in mutual proximity distance space, independent Gaussians |
91 | | - Dn = mut_prox.calculate_mutual_proximity(Distribution.gaussi) |
92 | | - hubness = Hubness(Dn) |
93 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
94 | | - self.print_results('MUTUAL PROXIMITY (Independent Gaussians)', \ |
95 | | - Dn, Sn5, Nk5) |
96 | | - |
97 | | - # Hubness in mutual proximity distance space, independent Gamma distr. |
98 | | - Dn = mut_prox.calculate_mutual_proximity(Distribution.gammai) |
99 | | - hubness = Hubness(Dn) |
100 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
101 | | - self.print_results('MUTUAL PROXIMITY (Independent Gamma)', Dn, Sn5, Nk5) |
102 | | - |
103 | | - # Hubness in local scaling distance space |
104 | | - ls = LocalScaling(self.D, 10, 'original') |
105 | | - Dn = ls.perform_local_scaling() |
106 | | - hubness = Hubness(Dn) |
107 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
108 | | - self.print_results('LOCAL SCALING (Original, k=10)', Dn, Sn5, Nk5) |
109 | | - |
110 | | - # Hubness in shared nearest neighbors space |
111 | | - snn = SharedNN(self.D, 10) |
112 | | - Dn = snn.perform_snn() |
113 | | - hubness = Hubness(Dn) |
114 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
115 | | - self.print_results('SHARED NEAREST NEIGHBORS (k=10)', Dn, Sn5, Nk5) |
116 | | - |
117 | | - # Hubness after centering |
118 | | - vectors = self.load_dexter(rawData=True) |
119 | | - cent = Centering(vectors) |
120 | | - D_cent = cosine_distance(cent.centering()) |
121 | | - hubness = Hubness(D_cent) |
122 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
123 | | - self.print_results('CENTERING', D_cent, Sn5, Nk5) |
124 | | - |
125 | | - |
126 | | - # Hubness after weighted centering |
127 | | - gamma = 0.4 |
128 | | - D_wcent = cosine_distance(cent.weighted_centering(gamma)) |
129 | | - hubness = Hubness(D_wcent) |
130 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
131 | | - self.print_results('WEIGHTED CENTERING (gamma={})'.format(gamma), \ |
132 | | - D_wcent, Sn5, Nk5) |
133 | | - |
134 | | - # Hubness after localized centering |
135 | | - D_lcent = 1 - cent.localized_centering(10, 1) |
136 | | - hubness = Hubness(D_lcent) |
137 | | - Sn5, Nk5 = hubness.calculate_hubness()[::2] |
138 | | - self.print_results('LOCALIZED CENTERING', D_lcent, Sn5, Nk5) |
139 | | - |
140 | | - |
141 | | - |
| 75 | + if origData: |
| 76 | + # Hubness in original data |
| 77 | + hubness = Hubness(self.D) |
| 78 | + # Get hubness and n-occurence (slice omits elem 1, i.e. kNN) |
| 79 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 80 | + self.print_results('ORIGINAL DATA', self.D, Sn5, Nk5, True) |
| 81 | + if mp: |
| 82 | + # Hubness in empiric mutual proximity distance space |
| 83 | + mut_prox = MutualProximity(self.D) |
| 84 | + Dn = mut_prox.calculate_mutual_proximity(Distribution.empiric) |
| 85 | + hubness = Hubness(Dn) |
| 86 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 87 | + self.print_results('MUTUAL PROXIMITY (Empiric/Slow)', Dn, Sn5, Nk5) |
| 88 | + if mp_gauss: |
| 89 | + # Hubness in mutual proximity distance space, Gaussian model |
| 90 | + Dn = mut_prox.calculate_mutual_proximity(Distribution.gauss) |
| 91 | + hubness = Hubness(Dn) |
| 92 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 93 | + self.print_results('MUTUAL PROXIMITY (Gaussian)', Dn, Sn5, Nk5) |
| 94 | + if mp_gaussi: |
| 95 | + # Hubness in mutual proximity distance space, independent Gaussians |
| 96 | + Dn = mut_prox.calculate_mutual_proximity(Distribution.gaussi) |
| 97 | + hubness = Hubness(Dn) |
| 98 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 99 | + self.print_results('MUTUAL PROXIMITY (Independent Gaussians)', \ |
| 100 | + Dn, Sn5, Nk5) |
| 101 | + if mp_gammai: |
| 102 | + # Hubness in mutual proximity distance space, indep. Gamma distr. |
| 103 | + Dn = mut_prox.calculate_mutual_proximity(Distribution.gammai) |
| 104 | + hubness = Hubness(Dn) |
| 105 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 106 | + self.print_results('MUTUAL PROXIMITY (Independent Gamma)', \ |
| 107 | + Dn, Sn5, Nk5) |
| 108 | + if ls: |
| 109 | + # Hubness in local scaling distance space |
| 110 | + ls = LocalScaling(self.D, 10, 'original') |
| 111 | + Dn = ls.perform_local_scaling() |
| 112 | + hubness = Hubness(Dn) |
| 113 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 114 | + self.print_results('LOCAL SCALING (Original, k=10)', Dn, Sn5, Nk5) |
| 115 | + if snn: |
| 116 | + # Hubness in shared nearest neighbors space |
| 117 | + snn = SharedNN(self.D, 10) |
| 118 | + Dn = snn.perform_snn() |
| 119 | + hubness = Hubness(Dn) |
| 120 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 121 | + self.print_results('SHARED NEAREST NEIGHBORS (k=10)', Dn, Sn5, Nk5) |
| 122 | + if cent or wcent or lcent: |
| 123 | + cent = Centering(self.vectors) |
| 124 | + if cent: |
| 125 | + # Hubness after centering |
| 126 | + D_cent = htd.cosine_distance(cent.centering()) |
| 127 | + hubness = Hubness(D_cent) |
| 128 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 129 | + self.print_results('CENTERING', D_cent, Sn5, Nk5) |
| 130 | + if wcent: |
| 131 | + # Hubness after weighted centering |
| 132 | + D_wcent = htd.cosine_distance(cent.weighted_centering(wcent_g)) |
| 133 | + hubness = Hubness(D_wcent) |
| 134 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 135 | + self.print_results('WEIGHTED CENTERING (gamma={})'.format(\ |
| 136 | + wcent_g), D_wcent, Sn5, Nk5) |
| 137 | + if lcent: |
| 138 | + # Hubness after localized centering |
| 139 | + D_lcent = 1 - cent.localized_centering(kappa=lcent_k, \ |
| 140 | + gamma=lcent_g) |
| 141 | + hubness = Hubness(D_lcent) |
| 142 | + Sn5, Nk5 = hubness.calculate_hubness()[::2] |
| 143 | + self.print_results(\ |
| 144 | + 'LOCALIZED CENTERING (k={}, gamma={})'.format(\ |
| 145 | + lcent_k, lcent_g), D_lcent, Sn5, Nk5) |
| 146 | + |
142 | 147 | def print_results(self, heading : str, distances, Sn5 : float, Nk5 : float, |
143 | 148 | calc_intrinsic_dimensionality : bool = False): |
144 | 149 | """Print the results of a hubness analysis.""" |
@@ -175,7 +180,7 @@ def print_results(self, heading : str, distances, Sn5 : float, Nk5 : float, |
175 | 180 | print('original dimensionality : No vectors given') |
176 | 181 | print('intrinsic dimensionality estimate : No vectors given') |
177 | 182 |
|
178 | | - def load_dexter(self, rawData = False): |
| 183 | + def load_dexter(self): |
179 | 184 | """Load the example data set (dexter).""" |
180 | 185 |
|
181 | 186 | print('\nNO PARAMETERS GIVEN! Loading & evaluating DEXTER data set.\n'); |
@@ -210,25 +215,10 @@ def load_dexter(self, rawData = False): |
210 | 215 | vectors[row][int(col)-1] = int(val) |
211 | 216 | row += 1 |
212 | 217 |
|
213 | | - if rawData: |
214 | | - return vectors |
215 | | - else: |
216 | | - # Calc distance |
217 | | - D = cosine_distance(vectors) |
218 | | - return D, classes, vectors |
| 218 | + # Calc distance |
| 219 | + D = htd.cosine_distance(vectors) |
| 220 | + return D, classes, vectors |
219 | 221 |
|
220 | | -def cosine_distance(x): |
221 | | - """Calculate the cosine distance.""" |
222 | | - |
223 | | - xn = np.sqrt(np.sum(x**2, 1)) |
224 | | - x = x / np.tile(xn[:, np.newaxis], np.size(x, 1)) |
225 | | - D = 1 - np.dot(x, x.T ) |
226 | | - #np.clip(D, 0, np.finfo(np.float64).max, out=D) # clip max set to MaxFloat |
227 | | - D[D<0] = 0 |
228 | | - D = np.triu(D, 0) + np.triu(D, 0).T |
229 | | - |
230 | | - return D |
231 | | - |
232 | 222 | if __name__=="__main__": |
233 | 223 | hub = HubnessAnalysis() |
234 | 224 | hub.analyse_hubness() |
|
0 commit comments