Skip to content

Commit d59a0a9

Browse files
committed
Removed neighbor.attributes, which was wasting a lot of space on large graphs
1 parent d879152 commit d59a0a9

5 files changed

Lines changed: 149 additions & 71 deletions

File tree

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,78 @@
1+
/*
2+
* The MIT License
3+
*
4+
* Copyright 2016 Thibault Debatty.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in
14+
* all copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
* THE SOFTWARE.
23+
*/
124
package info.debatty.java.graphs;
225

326
/**
4-
* Represent a weighted edge (a link from node n1 to node n2)
27+
* Represent a weighted edge (a link from node n1 to node n2).
28+
* Internally, the edge is a source node plus a neighbor.
529
*
630
* @author Thibault Debatty
731
*/
832
public class Edge<T> {
933

1034
public T n1;
11-
public T n2;
12-
public double weight = 0;
13-
14-
public static final String SEPARATOR = ";";
35+
public Neighbor<T> neighbor;
1536

1637
public Edge() {
1738

1839
}
1940

20-
public Edge(T n1, T n2, double weight) {
41+
public Edge(T n1, Neighbor<T> neighbor) {
2142
this.n1 = n1;
22-
this.n2 = n2;
23-
this.weight = weight;
43+
this.neighbor = neighbor;
2444
}
2545

2646
@Override
2747
public String toString() {
28-
return n1.toString() + SEPARATOR + n2.toString()+ SEPARATOR + weight;
48+
return n1.toString() + " => " + neighbor.toString();
49+
}
2950

51+
@Override
52+
public int hashCode() {
53+
int hash = 3;
54+
hash = 83 * hash + (this.n1 != null ? this.n1.hashCode() : 0);
55+
hash = 83 * hash + (this.neighbor != null ? this.neighbor.hashCode() : 0);
56+
return hash;
57+
}
58+
59+
@Override
60+
public boolean equals(Object obj) {
61+
if (obj == null) {
62+
return false;
63+
}
64+
if (getClass() != obj.getClass()) {
65+
return false;
66+
}
67+
final Edge<?> other = (Edge<?>) obj;
68+
if (this.n1 != other.n1 && (this.n1 == null || !this.n1.equals(other.n1))) {
69+
return false;
70+
}
71+
if (this.neighbor != other.neighbor && (this.neighbor == null || !this.neighbor.equals(other.neighbor))) {
72+
return false;
73+
}
74+
return true;
3075
}
76+
77+
3178
}
Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,30 @@
1+
/*
2+
* The MIT License
3+
*
4+
* Copyright 2016 Thibault Debatty.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in
14+
* all copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
* THE SOFTWARE.
23+
*/
124
package info.debatty.java.graphs;
225

326
import java.io.Serializable;
427
import java.security.InvalidParameterException;
5-
import java.util.HashMap;
628

729
/**
830
* Neighbor of an edge (stores the other node, and the similarity)
@@ -15,45 +37,27 @@ public class Neighbor<T>
1537
public T node;
1638
public double similarity;
1739

18-
protected HashMap<String, Object> attributes;
19-
20-
public Neighbor(T node, double similarity) {
21-
this.attributes = new HashMap<String, Object>();
22-
this.node = node;
23-
this.similarity = similarity;
24-
}
25-
26-
/**
27-
*
28-
* @param key
29-
* @param value
30-
*/
31-
public void setAttribute(String key, Object value) {
32-
attributes.put(key, value);
33-
}
34-
3540
/**
36-
* Returns the value of this attribute, or null if this neighbor has no such
37-
* attribute
3841
*
39-
* @param key
40-
* @return
42+
* @param node
43+
* @param similarity
4144
*/
42-
public Object getAttribute(String key) {
43-
return attributes.get(key);
45+
public Neighbor(final T node, final double similarity) {
46+
this.node = node;
47+
this.similarity = similarity;
4448
}
4549

4650
/**
4751
*
48-
* @return (node.id,node.value,similarity)
52+
* @return (node.toString(),similarity)
4953
*/
5054
@Override
51-
public String toString() {
52-
return "(" + node.toString() + "," + similarity + ")"; //node.value + "," + similarity + ")";
55+
public final String toString() {
56+
return "(" + node.toString() + "," + similarity + ")";
5357
}
5458

5559
@Override
56-
public boolean equals(Object other) {
60+
public final boolean equals(final Object other) {
5761
if (!other.getClass().getName().equals(this.getClass().getName())) {
5862
return false;
5963
}
@@ -63,14 +67,19 @@ public boolean equals(Object other) {
6367
}
6468

6569
@Override
66-
public int hashCode() {
70+
public final int hashCode() {
6771
int hash = 7;
6872
hash = 17 * hash + (this.node != null ? this.node.hashCode() : 0);
6973
return hash;
7074
}
7175

76+
/**
77+
* This > other if this.similarity > other.similarity.
78+
* @param other
79+
* @return
80+
*/
7281
@Override
73-
public int compareTo(Object other) {
82+
public final int compareTo(final Object other) {
7483
if (other == null) {
7584
return 1;
7685
}
@@ -87,6 +96,10 @@ public int compareTo(Object other) {
8796
return 0;
8897
}
8998

90-
return this.similarity > ((Neighbor) other).similarity ? 1 : -1;
99+
if (this.similarity > ((Neighbor) other).similarity) {
100+
return 1;
101+
}
102+
103+
return -1;
91104
}
92105
}

src/main/java/info/debatty/java/graphs/NeighborList.java

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
import info.debatty.java.util.SynchronizedBoundedPriorityQueue;
44
import java.io.Serializable;
5-
import java.util.ArrayList;
6-
import java.util.HashMap;
7-
import java.util.Map;
85
import java.util.PriorityQueue;
96

107
/**
@@ -15,24 +12,6 @@
1512
public class NeighborList extends SynchronizedBoundedPriorityQueue<Neighbor>
1613
implements Serializable {
1714

18-
public static <T> ArrayList<Edge>
19-
Convert2Edges(HashMap<T, NeighborList> graph) {
20-
ArrayList<Edge> edges = new ArrayList<Edge>();
21-
22-
for (Map.Entry<T, NeighborList> pair : graph.entrySet()) {
23-
for (Neighbor<T> neighbor : pair.getValue()) {
24-
edges.add(
25-
new Edge(
26-
pair.getKey(),
27-
neighbor.node,
28-
neighbor.similarity));
29-
30-
}
31-
}
32-
33-
return edges;
34-
}
35-
3615
/**
3716
* Copy constructor.
3817
*

src/main/java/info/debatty/java/graphs/build/NNDescent.java

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,40 @@
1+
/*
2+
* The MIT License
3+
*
4+
* Copyright 2016 Thibault Debatty.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in
14+
* all copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
* THE SOFTWARE.
23+
*/
124
package info.debatty.java.graphs.build;
225

26+
import info.debatty.java.graphs.Edge;
327
import info.debatty.java.graphs.Graph;
428
import info.debatty.java.graphs.Neighbor;
529
import info.debatty.java.graphs.NeighborList;
630
import java.security.InvalidParameterException;
731
import java.util.ArrayList;
832
import java.util.HashMap;
33+
import java.util.HashSet;
934
import java.util.List;
1035
import java.util.Map;
1136
import java.util.Random;
37+
import java.util.Set;
1238

1339
/**
1440
* Implementation of NN-Descent k-nn graph building algorithm. Based on the
@@ -30,7 +56,12 @@ public class NNDescent<T> extends GraphBuilder<T> {
3056
protected int iterations = 0;
3157
protected int c;
3258

33-
protected static final String IS_PROCESSED = "NNDescent_IS_PROCESSED_KEY";
59+
/**
60+
* Contains the list of neighbors that have been processed. Has we use a
61+
* hashset, we have to use edges (which contain a reference to the source
62+
* node) instead of neighbors for the concrete implementation.
63+
*/
64+
protected Set<Edge> processed;
3465

3566
/**
3667
* Get the number of edges modified at the last iteration
@@ -105,6 +136,7 @@ public void setMaxIterations(int max_iterations) {
105136
protected Graph<T> _computeGraph(List<T> nodes) {
106137

107138
iterations = 0;
139+
processed = new HashSet<Edge>(nodes.size() * k);
108140

109141
if (nodes.size() <= (k + 1)) {
110142
return MakeFullyLinked(nodes);
@@ -135,8 +167,8 @@ protected Graph<T> _computeGraph(List<T> nodes) {
135167
// Mark sampled items in B[v] as false;
136168
for (int i = 0; i < nodes.size(); i++) {
137169
T v = nodes.get(i);
138-
old_lists.put(v, PickFalses(neighborlists.getNeighbors(v)));
139-
new_lists.put(v, PickTruesAndMark(neighborlists.getNeighbors(v)));
170+
old_lists.put(v, PickFalses(v, neighborlists.getNeighbors(v)));
171+
new_lists.put(v, PickTruesAndMark(v, neighborlists.getNeighbors(v)));
140172

141173
}
142174

@@ -242,10 +274,11 @@ protected NeighborList RandomNeighborList(List<T> nodes, T for_node) {
242274
return nl;
243275
}
244276

245-
protected ArrayList<T> PickFalses(NeighborList neighborList) {
277+
protected ArrayList<T> PickFalses(T node, NeighborList neighborList) {
246278
ArrayList<T> falses = new ArrayList<T>();
247279
for (Neighbor<T> n : neighborList) {
248-
if (n.getAttribute(IS_PROCESSED) != null) { // !n.is_new
280+
Edge edge = new Edge(node, n);
281+
if (processed.contains(edge)) {
249282
falses.add(n.node);
250283
}
251284
}
@@ -259,11 +292,12 @@ protected ArrayList<T> PickFalses(NeighborList neighborList) {
259292
* @param neighborList
260293
* @return
261294
*/
262-
protected ArrayList<T> PickTruesAndMark(NeighborList neighborList) {
295+
protected ArrayList<T> PickTruesAndMark(T node, NeighborList neighborList) {
263296
ArrayList<T> r = new ArrayList<T>();
264297
for (Neighbor<T> n : neighborList) {
265-
if (n.getAttribute(IS_PROCESSED) == null && Math.random() < rho) { // n.is_new
266-
n.setAttribute(IS_PROCESSED, true); // n.is_new = false;
298+
Edge<T> edge = new Edge<T>(node, n);
299+
if (!processed.contains(edge) && Math.random() < rho) {
300+
processed.add(edge);
267301
r.add(n.node);
268302
}
269303
}

src/main/java/info/debatty/java/graphs/build/ThreadedNNDescent.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package info.debatty.java.graphs.build;
22

3+
import info.debatty.java.graphs.Edge;
34
import info.debatty.java.graphs.Graph;
45
import java.util.ArrayList;
6+
import java.util.Collections;
57
import java.util.HashMap;
8+
import java.util.HashSet;
69
import java.util.List;
710
import java.util.concurrent.Callable;
811
import java.util.concurrent.ConcurrentHashMap;
@@ -34,6 +37,8 @@ protected final Graph<T> _computeGraph(final List<T> nodes) {
3437
ExecutorService executor = Executors.newFixedThreadPool(thread_count);
3538

3639
iterations = 0;
40+
processed = Collections.synchronizedSet(
41+
new HashSet<Edge>(nodes.size() * k));
3742

3843
if (nodes.size() <= (k + 1)) {
3944
return MakeFullyLinked(nodes);
@@ -65,8 +70,8 @@ protected final Graph<T> _computeGraph(final List<T> nodes) {
6570
// Mark sampled items in B[v] as false;
6671
for (int i = 0; i < nodes.size(); i++) {
6772
T v = nodes.get(i);
68-
old_lists.put(v, PickFalses(graph.getNeighbors(v)));
69-
new_lists.put(v, PickTruesAndMark(graph.getNeighbors(v)));
73+
old_lists.put(v, PickFalses(v, graph.getNeighbors(v)));
74+
new_lists.put(v, PickTruesAndMark(v, graph.getNeighbors(v)));
7075
}
7176

7277
// old′ ←Reverse(old)

0 commit comments

Comments
 (0)