Skip to content

Commit f25e5f6

Browse files
Merge pull request #4 from SemyonSinchenko/shortests-paths
feat: add reversed option + ldbc test
2 parents 63f3da3 + b35fd88 commit f25e5f6

6 files changed

Lines changed: 146 additions & 3 deletions

File tree

NOTICE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
This project uses the LDBC datasets, which are licensed under the Apache Software License, Version 2.0.
2+
The LDBC datasets are used for testing and evaluation purposes only.
3+
Note that the LDBC benchmark results should not be referred to using the words 'LDBC benchmark' or any equivalent phrase,
4+
as per the LDBC fair use policy.

src/shortest_paths.rs

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::pregel::{PREGEL_MSG, pregel_dst};
1+
use crate::pregel::{MessageDirection, PREGEL_MSG, pregel_dst, pregel_src};
22
use crate::{GraphFrame, VERTEX_ID};
33
use arrow::compute::min;
44
use datafusion::arrow;
@@ -114,6 +114,8 @@ pub struct ShortestPathsBuilder<'a> {
114114
max_iterations: usize,
115115
/// Interval at which to checkpoint the computation state
116116
checkpoint_interval: usize,
117+
/// Apply a reversal shortest paths algorithm to get distances from landmarks to node
118+
reversed: bool,
117119
}
118120

119121
impl<'a> ShortestPathsBuilder<'a> {
@@ -130,9 +132,20 @@ impl<'a> ShortestPathsBuilder<'a> {
130132
landmarks: sorted_landmarks,
131133
max_iterations: i32::MAX as usize,
132134
checkpoint_interval: 1,
135+
reversed: false,
133136
}
134137
}
135138

139+
/// Sets the direction for shortest paths' computation.
140+
///
141+
/// # Arguments
142+
/// * `reversed` - If true, computes shortest paths from landmarks to vertices.
143+
/// If false, computes the shortest paths from vertices to landmarks.
144+
pub fn reversed(mut self, reversed: bool) -> Self {
145+
self.reversed = reversed;
146+
self
147+
}
148+
136149
/// Sets the maximum number of iterations for the algorithm.
137150
///
138151
/// # Arguments
@@ -242,7 +255,11 @@ impl<'a> ShortestPathsBuilder<'a> {
242255
.iter()
243256
.flat_map(|lm| {
244257
let col_name = lm.to_string();
245-
let d_col = pregel_dst(DISTANCES).field(col_name.clone());
258+
let d_col = if self.reversed {
259+
pregel_src(DISTANCES).field(col_name.clone())
260+
} else {
261+
pregel_dst(DISTANCES).field(col_name.clone())
262+
};
246263
vec![
247264
lit(col_name),
248265
when(d_col.clone().lt(lit(i32::MAX)), d_col + lit(1i32))
@@ -266,7 +283,14 @@ impl<'a> ShortestPathsBuilder<'a> {
266283
update_participating.clone(),
267284
)
268285
// Add a message
269-
.add_message(message_expr, crate::pregel::MessageDirection::DstToSrc)
286+
.add_message(
287+
message_expr,
288+
if self.reversed {
289+
MessageDirection::SrcToDst
290+
} else {
291+
MessageDirection::DstToSrc
292+
},
293+
)
270294
// Set aggregate expression
271295
.with_aggregate_expr(aggregate_expr_udaf.call(vec![col(PREGEL_MSG)]))
272296
// Set voting condition
@@ -300,6 +324,7 @@ impl GraphFrame {
300324
#[cfg(test)]
301325
mod tests {
302326
use super::*;
327+
use crate::tests::create_ldbc_test_graph;
303328
use datafusion::arrow::array::{Int64Array, RecordBatch};
304329
use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
305330
use datafusion::prelude::SessionContext;
@@ -448,4 +473,64 @@ mod tests {
448473
);
449474
Ok(())
450475
}
476+
477+
async fn get_ldbc_bfs_results(dataset: &str) -> Result<DataFrame> {
478+
let ctx = SessionContext::new();
479+
let manifest_dir = env!("CARGO_MANIFEST_DIR");
480+
let expected_pr_schema = Schema::new(vec![
481+
Field::new("vertex_id", DataType::Int64, false),
482+
Field::new("expected_distance", DataType::Int64, false),
483+
]);
484+
let expected_sp_path = format!(
485+
"{}/testing/data/ldbc/{}/{}-BFS.csv",
486+
manifest_dir, dataset, dataset
487+
);
488+
let expected_sp = ctx
489+
.read_csv(
490+
&expected_sp_path,
491+
CsvReadOptions::new()
492+
.delimiter(b' ')
493+
.has_header(false)
494+
.schema(&expected_pr_schema),
495+
)
496+
.await?;
497+
Ok(expected_sp)
498+
}
499+
500+
#[tokio::test]
501+
async fn test_ldbc() -> Result<()> {
502+
let expected_distances = get_ldbc_bfs_results("test-bfs-directed").await?;
503+
let graph = create_ldbc_test_graph("test-bfs-directed").await?;
504+
505+
let results = graph
506+
.shortest_paths(vec![1])
507+
.reversed(true) // In LDBC the task is formulated as find distance from the root
508+
.checkpoint_interval(1)
509+
.run()
510+
.await?;
511+
let diff = results
512+
.join(
513+
expected_distances,
514+
JoinType::Left,
515+
&[VERTEX_ID],
516+
&["vertex_id"],
517+
None,
518+
)?
519+
.select(vec![
520+
col(VERTEX_ID),
521+
col(DISTANCES).field("1").alias("got_distance"),
522+
when(
523+
col("expected_distance").eq(lit(9223372036854775807i64)),
524+
lit(i32::MAX as i64),
525+
)
526+
.otherwise(col("expected_distance"))
527+
.unwrap()
528+
.alias("expected_distance"),
529+
])?
530+
.filter(col("got_distance").not_eq(col("expected_distance")))?;
531+
532+
assert_eq!(diff.count().await?, 0);
533+
534+
Ok(())
535+
}
451536
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
1 0
2+
2 1
3+
3 1
4+
4 2
5+
5 2
6+
6 3
7+
7 3
8+
8 3
9+
9 9223372036854775807
10+
10 9223372036854775807
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
1 2
2+
1 3
3+
2 3
4+
2 4
5+
2 5
6+
3 1
7+
4 6
8+
4 7
9+
4 8
10+
5 1
11+
5 2
12+
6 4
13+
6 8
14+
8 1
15+
8 2
16+
8 3
17+
9 10
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Filenames of graph on local filesystem
2+
graph.test-bfs-directed.vertex-file = test-bfs-directed.v
3+
graph.test-bfs-directed.edge-file = test-bfs-directed.e
4+
5+
# Graph metadata for reporting purposes
6+
graph.test-bfs-directed.meta.vertices = 10
7+
graph.test-bfs-directed.meta.edges = 17
8+
9+
# Properties describing the graph format
10+
graph.test-bfs-directed.directed = true
11+
12+
# List of supported algorithms on the graph
13+
graph.test-bfs-directed.algorithms = bfs
14+
15+
16+
# Parameters for BFS
17+
graph.test-bfs-directed.bfs.source-vertex = 1
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
1
2+
2
3+
3
4+
4
5+
5
6+
6
7+
7
8+
8
9+
9
10+
10

0 commit comments

Comments
 (0)