-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.h
More file actions
458 lines (390 loc) · 16.2 KB
/
Copy pathanalysis.h
File metadata and controls
458 lines (390 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#pragma once
/**
* @file analysis.h
* @brief Graph analysis and validation utilities for deglib.
*
* Provides functions for:
* - Graph regularity checks (edge validity, uniqueness, ordering)
* - Edge weight validation and statistics
* - RNG (Relative Neighborhood Graph) conformance checking
* - Connectivity analysis (components, bidirectionality)
* - In-degree computation
* - Reachability testing
*/
#include <math.h>
#include <algorithm>
#include <thread>
#include "concurrent.h"
#include "graph.h"
#include "memory.h"
#include "search.h"
namespace deglib::analysis {
/**
* @brief Check if the graph structure is valid and well-formed.
*
* Validates that:
* - Vertex count matches expected
* - Edges contain unique neighbor indices in ascending order
* - No self-loops exist
* - Optionally checks for bidirectional edges
*
* @param graph The search graph to check
* @param expected_vertices Expected number of vertices
* @param check_back_link If true, verify all edges are bidirectional (expensive)
* @return true if graph passes all checks
*/
static bool check_graph_regularity(const deglib::search::SearchGraph& graph,
const uint32_t expected_vertices,
const bool check_back_link = false) {
// check vertex count
auto vertex_count = graph.size();
if (vertex_count != expected_vertices) {
std::fprintf(stderr, "the graph has an unexpected number of vertices. expected %d got %d \n", expected_vertices, vertex_count);
return false;
}
// an empty graph can not be tested
if (vertex_count == 0) return true;
// skip if the graph is too small to check
auto edges_per_vertex = graph.getEdgesPerVertex();
if (vertex_count <= edges_per_vertex) {
std::fprintf(stderr, "the graph was too small for checking validity \n");
return true;
}
// check edges
for (uint32_t v = 0; v < vertex_count; v++) {
auto neighbor_indices = graph.getNeighborIndices(v);
// check if the neighbor indices of the vertices are in ascending order and unique
int64_t last_index = -1;
for (int64_t e = 0; e < edges_per_vertex; e++) {
auto neighbor_index = neighbor_indices[e];
if (v == neighbor_index) {
std::fprintf(stderr, "vertex %u has a self-loop at position %lld \n", v, e);
return false;
}
if (last_index == neighbor_index) {
std::fprintf(
stderr, "vertex %u has a duplicate neighbor at position %lld with the neighbor index %u \n", v, e, neighbor_index);
return false;
}
if (last_index > neighbor_index) {
std::fprintf(stderr,
"the neighbor order for vertex %u is invalid: pos %lld has index %lld while pos %lld has index %u \n",
v,
e - 1,
last_index,
e,
neighbor_index);
return false;
}
if (check_back_link && graph.hasEdge(neighbor_index, v) == false) {
std::fprintf(stderr, "the neighbor %u of vertex %u does not have a back link to the vertex \n", neighbor_index, v);
return false;
}
last_index = neighbor_index;
}
}
return true;
}
/**
* @brief Calculate the average edge weight across all edges in the graph.
*
* @param graph The mutable graph to analyze
* @param scale Scaling factor for the result
* @return Average edge weight (scaled)
*/
static float calc_avg_edge_weight(const deglib::graph::MutableGraph& graph, const int scale = 1) {
double total_distance = 0;
uint64_t count = 0;
const auto edges_per_vertex = graph.getEdgesPerVertex();
const auto vertex_count = graph.size();
for (uint32_t n = 0; n < vertex_count; n++) {
const auto weights = graph.getNeighborWeights(n);
for (size_t e = 0; e < edges_per_vertex; e++) total_distance += weights[e];
count += edges_per_vertex;
}
total_distance = total_distance * scale / count;
return (float)total_distance;
}
/**
* @brief Calculate edge weight histogram (distribution of edge weights).
*
* @param graph The mutable graph to analyze
* @param sorted If true, sort weights before computing histogram
* @param scale Scaling factor for the result
* @return Vector of 10 average edge weights (one per bin)
*/
static auto calc_edge_weight_histogram(const deglib::graph::MutableGraph& graph, const bool sorted, const int scale = 1) {
const auto edges_per_vertex = graph.getEdgesPerVertex();
const auto vertex_count = graph.size();
auto all_edge_weights = std::vector<float>();
all_edge_weights.reserve(edges_per_vertex * vertex_count);
for (uint32_t n = 0; n < vertex_count; n++) {
const auto weights = graph.getNeighborWeights(n);
for (size_t e = 0; e < edges_per_vertex; e++)
if (weights[e] != 0) all_edge_weights.push_back(weights[e]);
}
if (sorted) std::sort(all_edge_weights.begin(), all_edge_weights.end());
size_t bin_count = 10;
auto bin_size = all_edge_weights.size() / bin_count;
auto avg_edge_weights = std::vector<float>(10);
for (size_t bin = 0; bin < bin_count; bin++) {
float weight_sum = 0;
for (size_t n = 0; n < bin_size; n++) weight_sum += all_edge_weights[bin_size * bin + n];
avg_edge_weights[bin] = weight_sum * scale / bin_size;
}
return avg_edge_weights;
}
/**
* @brief Verify that stored edge weights match actual distances.
*
* Iterates through all edges and compares stored weights to computed distances.
*
* @param graph The mutable graph to check
* @return true if all weights match distances
*/
static auto check_graph_weights(const deglib::graph::MutableGraph& graph) {
const auto& feature_space = graph.getFeatureSpace();
const auto dist_func = feature_space.get_dist_func();
const auto dist_func_param = feature_space.get_dist_func_param();
const auto feature_size = feature_space.get_data_size();
const auto edges_per_vertex = graph.getEdgesPerVertex();
const auto vertex_count = graph.size();
for (uint32_t n = 0; n < vertex_count; n++) {
const auto fv1 = graph.getFeatureVector(n);
const auto neighborIds = graph.getNeighborIndices(n);
const auto neighborWeights = graph.getNeighborWeights(n);
deglib::memory::prefetch(reinterpret_cast<const char*>(graph.getFeatureVector(neighborIds[0])), feature_size);
for (uint8_t e = 0; e < edges_per_vertex; e++) {
deglib::memory::prefetch(
reinterpret_cast<const char*>(graph.getFeatureVector(neighborIds[std::min(e + 1, edges_per_vertex - 1)])), feature_size);
const auto fv2 = graph.getFeatureVector(neighborIds[e]);
const auto dist = dist_func(fv1, fv2, dist_func_param);
if (neighborWeights[e] != dist) {
std::fprintf(stderr,
"Vertex %u at edge index %u has a weight of %f to vertex %u but its distance is %f \n",
n,
e,
neighborWeights[e],
neighborIds[e],
dist);
return false;
}
}
}
return true;
}
/**
* @brief Check if a vertex is RNG-conforming when connected to a target.
*
* A vertex is RNG-conforming if none of its neighbors provide a shorter
* path to the target vertex.
*
* @param graph The mutable graph
* @param edges_per_vertex Number of edges per vertex
* @param vertex_index The vertex to check
* @param target_index The target vertex
* @param vertex_target_weight The weight between vertex and target
* @return true if vertex is RNG-conforming
*/
static auto checkRNG(const deglib::graph::MutableGraph& graph,
const uint32_t edges_per_vertex,
const uint32_t vertex_index,
const uint32_t target_index,
const float vertex_target_weight) {
const auto neighbor_indices = graph.getNeighborIndices(vertex_index);
const auto neighbor_weight = graph.getNeighborWeights(vertex_index);
for (size_t edge_idx = 0; edge_idx < edges_per_vertex; edge_idx++) {
const auto neighbor_target_weight = graph.getEdgeWeight(neighbor_indices[edge_idx], target_index);
if (neighbor_target_weight != std::numeric_limits<float>::lowest() && vertex_target_weight > std::max(neighbor_weight[edge_idx], neighbor_target_weight)) {
return false;
}
}
return true;
}
/**
* @brief Count the number of non-RNG-conforming edges in the graph.
*
* Uses parallel computation to check each edge for RNG conformance.
*
* @param graph The mutable graph to analyze
* @return Number of edges that violate RNG property
*/
static uint32_t calc_non_rng_edges(const deglib::graph::MutableGraph& graph) {
const auto vertex_count = graph.size();
const auto edge_per_vertex = graph.getEdgesPerVertex();
const auto thread_count = std::thread::hardware_concurrency() / 2;
auto removed_rng_edges_per_thread = std::vector<uint32_t>(thread_count);
deglib::concurrent::parallel_for(0, vertex_count, thread_count, 1, [&](size_t vertex_index, size_t thread_id) {
uint32_t removed_rng_edges = 0;
const auto neighbor_indices = graph.getNeighborIndices((uint32_t)vertex_index);
const auto neighbor_weights = graph.getNeighborWeights((uint32_t)vertex_index);
// find all none rng conform neighbors
for (uint32_t n = 0; n < edge_per_vertex; n++) {
const auto neighbor_index = neighbor_indices[n];
const auto neighbor_weight = neighbor_weights[n];
if (checkRNG(graph, edge_per_vertex, (uint32_t)vertex_index, neighbor_index, neighbor_weight) == false) removed_rng_edges++;
}
removed_rng_edges_per_thread[thread_id] += removed_rng_edges;
});
// aggregate
uint32_t removed_rng_edges = 0;
for (uint32_t i = 0; i < thread_count; i++) removed_rng_edges += removed_rng_edges_per_thread[i];
return removed_rng_edges;
}
/**
* @brief Check if the graph is fully connected (single component).
*
* Uses flood fill from vertex 0 to check if all vertices are reachable.
*
* @param graph The search graph to check
* @return true if all vertices are connected
*/
static bool check_graph_connectivity(const deglib::search::SearchGraph& graph) {
const auto vertex_count = graph.size();
const auto edges_per_vertex = graph.getEdgesPerVertex();
// an empty graph can not be tested
if (vertex_count == 0) return true;
// already checked vertices
auto checked_ids = std::vector<bool>(vertex_count);
// vertex the check
auto check = std::vector<uint32_t>();
// start with the first vertex
checked_ids[0] = true;
check.emplace_back(0);
// repeat as long as we have vertices to check
while (check.size() > 0) {
// neighbors which will be checked next round
auto check_next = std::vector<uint32_t>();
// get the neighbors to check next
for (auto&& internal_index : check) {
auto neighbor_indizes = graph.getNeighborIndices(internal_index);
for (size_t e = 0; e < edges_per_vertex; e++) {
auto neighbor_index = neighbor_indizes[e];
if (checked_ids[neighbor_index] == false) {
checked_ids[neighbor_index] = true;
check_next.emplace_back(neighbor_index);
}
}
}
check = std::move(check_next);
}
// how many vertices have been checked
uint32_t checked_vertex_count = 0;
for (size_t i = 0; i < vertex_count; i++)
if (checked_ids[i]) checked_vertex_count++;
return checked_vertex_count == vertex_count;
}
/**
* @brief Count the number of connected components in the graph.
*
* Uses flood fill to identify separate connected subgraphs.
*
* @param graph The search graph to analyze
* @return Number of connected components
*/
static uint32_t count_graph_components(const deglib::search::SearchGraph& graph) {
const auto vertex_count = graph.size();
const auto edges_per_vertex = graph.getEdgesPerVertex();
auto visited = std::vector<bool>(vertex_count, false);
uint32_t component_count = 0;
for (uint32_t start = 0; start < vertex_count; start++) {
if (visited[start]) continue;
// Found a new component, flood fill from this vertex
component_count++;
auto check = std::vector<uint32_t>();
visited[start] = true;
check.emplace_back(start);
while (check.size() > 0) {
auto check_next = std::vector<uint32_t>();
for (auto&& internal_index : check) {
auto neighbor_indices = graph.getNeighborIndices(internal_index);
for (size_t e = 0; e < edges_per_vertex; e++) {
auto neighbor_index = neighbor_indices[e];
if (neighbor_index < vertex_count && !visited[neighbor_index]) {
visited[neighbor_index] = true;
check_next.emplace_back(neighbor_index);
}
}
}
check = std::move(check_next);
}
}
return component_count;
}
/**
* @brief Compute in-degree for each vertex in the graph.
*
* In-degree is the number of edges pointing TO a vertex.
*
* @param graph The search graph to analyze
* @return Vector of in-degrees indexed by vertex
*/
static std::vector<uint32_t> compute_in_degrees(const deglib::search::SearchGraph& graph) {
const auto vertex_count = graph.size();
const auto edges_per_vertex = graph.getEdgesPerVertex();
auto in_degrees = std::vector<uint32_t>(vertex_count, 0);
for (uint32_t n = 0; n < vertex_count; n++) {
auto neighbor_indices = graph.getNeighborIndices(n);
for (size_t e = 0; e < edges_per_vertex; e++) {
auto neighbor_index = neighbor_indices[e];
if (neighbor_index < vertex_count) {
in_degrees[neighbor_index]++;
}
}
}
return in_degrees;
}
/**
* @brief Check if all edges in the graph are bidirectional.
*
* A graph is bidirectional if for every edge (u, v), edge (v, u) also exists.
*
* @param graph The search graph to check
* @return true if all edges have a back-link
*/
static bool check_graph_bidirectional(const deglib::search::SearchGraph& graph) {
const auto vertex_count = graph.size();
const auto edges_per_vertex = graph.getEdgesPerVertex();
for (uint32_t n = 0; n < vertex_count; n++) {
auto neighbor_indices = graph.getNeighborIndices(n);
for (size_t e = 0; e < edges_per_vertex; e++) {
auto neighbor_index = neighbor_indices[e];
if (neighbor_index >= vertex_count) continue;
if (!graph.hasEdge(neighbor_index, n)) {
return false;
}
}
}
return true;
}
/**
* @brief Count vertices reachable from entry points via search.
*
* Tests search reachability by attempting to find each vertex using
* the graph's search function from entry points.
*
* @param graph The search graph to analyze
* @param eps Search epsilon parameter
* @param k Number of results to retrieve per search
* @return Number of vertices that can be found via search
*/
static uint32_t count_search_reachable(const deglib::search::SearchGraph& graph, float eps = 0.5f, uint32_t k = 100) {
const auto vertex_count = graph.size();
const auto entry_vertices = graph.getEntryVertexIndices();
uint32_t reachable_count = 0;
for (uint32_t target = 0; target < vertex_count; target++) {
auto target_feature = graph.getFeatureVector(target);
auto result_queue = graph.search(entry_vertices, target_feature, eps, k);
bool found = false;
while (!result_queue.empty()) {
if (result_queue.top().getInternalIndex() == target) {
found = true;
break;
}
result_queue.pop();
}
if (found) reachable_count++;
}
return reachable_count;
}
} // end namespace deglib::analysis