Skip to content

Commit 7146201

Browse files
Merge pull request #1 from CHSZLab/fix/freight-restreaming
Fix FREIGHT restreaming for bit-identical CLI results, upstream fix restreaming
2 parents 8a4e5c6 + e4ec204 commit 7146201

File tree

3 files changed

+95
-49
lines changed

3 files changed

+95
-49
lines changed

CMakeLists.txt

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2085,32 +2085,6 @@ endif()
20852085
# =============================================================================
20862086
set(FREIGHT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external_repositories/FREIGHT/code_for_hypergraphs)
20872087

2088-
# --- Patch definitions.h: remove x86intrin.h (ARM compatibility) ---
2089-
file(READ ${FREIGHT_DIR}/lib/definitions.h _SRC)
2090-
string(REPLACE "#include <x86intrin.h>" "// #include <x86intrin.h> // patched out for portability" _SRC "${_SRC}")
2091-
file(WRITE ${FREIGHT_DIR}/lib/definitions.h "${_SRC}")
2092-
2093-
# --- Patch random_functions.h: constexpr array too large for some compilers ---
2094-
file(READ ${FREIGHT_DIR}/lib/tools/random_functions.h _SRC)
2095-
string(REPLACE "constexpr ConstRandArray rand_nums = ConstRandArray<TAM>();"
2096-
"const ConstRandArray rand_nums = ConstRandArray<TAM>();" _SRC "${_SRC}")
2097-
file(WRITE ${FREIGHT_DIR}/lib/tools/random_functions.h "${_SRC}")
2098-
2099-
# --- Patch buffered_map.h: const-correctness (build() calls non-const redo(),
2100-
# get_min() called on const refs in comparison operators) ---
2101-
file(READ ${FREIGHT_DIR}/lib/data_structure/buffered_map.h _SRC)
2102-
string(REPLACE "void build() const {" "void build() {" _SRC "${_SRC}")
2103-
string(REPLACE "U& get_min() {" "U& get_min() const {" _SRC "${_SRC}")
2104-
file(WRITE ${FREIGHT_DIR}/lib/data_structure/buffered_map.h "${_SRC}")
2105-
2106-
# --- Patch vertex_partitioning.h: const-correctness (get_block_address) ---
2107-
file(READ ${FREIGHT_DIR}/lib/partition/onepass_partitioning/vertex_partitioning.h _SRC)
2108-
string(REPLACE "floating_block* get_block_address(PartitionID block_id) const;"
2109-
"floating_block* get_block_address(PartitionID block_id);" _SRC "${_SRC}")
2110-
string(REPLACE "inline floating_block* vertex_partitioning::get_block_address(PartitionID block_id) const {"
2111-
"inline floating_block* vertex_partitioning::get_block_address(PartitionID block_id) {" _SRC "${_SRC}")
2112-
file(WRITE ${FREIGHT_DIR}/lib/partition/onepass_partitioning/vertex_partitioning.h "${_SRC}")
2113-
21142088
set(FREIGHT_SOURCES
21152089
${FREIGHT_DIR}/lib/partition/onepass_partitioning/vertex_partitioning.cpp
21162090
${FREIGHT_DIR}/lib/partition/onepass_partitioning/floating_block.cpp
@@ -2140,6 +2114,7 @@ target_compile_definitions(freight_static PUBLIC
21402114
-DNDEBUG
21412115
)
21422116
target_compile_options(freight_static PRIVATE $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-w>)
2117+
target_compile_options(freight_static PUBLIC $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-march=native>)
21432118

21442119
if(OpenMP_CXX_FOUND)
21452120
target_link_libraries(freight_static PUBLIC OpenMP::OpenMP_CXX)

bindings/freight_binding.cpp

Lines changed: 93 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
#include <cstdlib>
1919
#include <fstream>
2020
#include <iostream>
21+
#include <limits>
2122
#include <map>
23+
#include <set>
2224
#include <string>
2325
#include <vector>
2426

@@ -119,10 +121,14 @@ class FreightPartitioner {
119121
srand(seed);
120122
random_functions::setSeed(seed);
121123

122-
/* Create partitioner */
124+
/* Create partitioner (order matches freight.cpp initialize_onepass_partitioner) */
123125
partitioner_ = create_partitioner(
124126
algorithm, k, rec_bisection_base, fennel_gamma,
125127
sampling_type, n_samples);
128+
if (sampling_type == SAMPLING_INACTIVE_LINEAR_COMPLEXITY ||
129+
sampling_type == SAMPLING_NEIGHBORS_LINEAR_COMPLEXITY) {
130+
partitioner_->enable_self_sorting_array();
131+
}
126132
partitioner_->set_sampling_threashold(sampling_threshold);
127133

128134
/* Instantiate blocks (matching freight.cpp line 105) */
@@ -357,12 +363,20 @@ static py::array_t<int> freight_partition(
357363
srand(seed);
358364
random_functions::setSeed(seed);
359365

360-
/* Create partitioner */
366+
/* Create partitioner (order matches freight.cpp initialize_onepass_partitioner) */
361367
vertex_partitioning* partitioner = create_partitioner(
362368
algorithm, k, rec_bisection_base, fennel_gamma,
363369
sampling_type, n_samples);
370+
if (sampling_type == SAMPLING_INACTIVE_LINEAR_COMPLEXITY ||
371+
sampling_type == SAMPLING_NEIGHBORS_LINEAR_COMPLEXITY) {
372+
partitioner->enable_self_sorting_array();
373+
}
364374
partitioner->set_sampling_threashold(sampling_threshold);
365375

376+
bool use_self_sorting_array =
377+
(sampling_type == SAMPLING_INACTIVE_LINEAR_COMPLEXITY ||
378+
sampling_type == SAMPLING_NEIGHBORS_LINEAR_COMPLEXITY);
379+
366380
/* Allocate state */
367381
std::vector<PartitionID> stream_nodes_assign(n, INVALID_PARTITION);
368382
std::vector<PartitionID> stream_edges_assign(num_nets, INVALID_PARTITION);
@@ -374,10 +388,29 @@ static py::array_t<int> freight_partition(
374388
PartitionID next_key = 0;
375389
std::vector<int64_t> valid_neighboring_nets;
376390

391+
/* Build edge-to-vertex mapping for evaluation (only needed for multi-pass) */
392+
std::vector<std::vector<int64_t>> net_to_nodes;
393+
if (num_streams_passes > 1) {
394+
net_to_nodes.resize(num_nets);
395+
for (int64_t node = 0; node < n; node++) {
396+
for (int64_t e = vp(node); e < vp(node + 1); e++) {
397+
net_to_nodes[ve(e)].push_back(node);
398+
}
399+
}
400+
}
401+
402+
/* Best partition tracking for restreaming */
403+
std::vector<PartitionID> best_nodes_assign;
404+
std::vector<NodeWeight> best_blocks_weight;
405+
double best_objective = std::numeric_limits<double>::max();
406+
377407
/* Multi-pass streaming */
378408
for (int pass = 0; pass < num_streams_passes; pass++) {
379409
/* Instantiate blocks (only on first pass due to size > 0 guard) */
380410
partitioner->instantiate_blocks(n, num_nets, k, imbalance);
411+
if (pass > 0 && use_self_sorting_array) {
412+
partitioner->reset_sorted_blocks();
413+
}
381414

382415
/* Hierarchical mode */
383416
if (hierarchical && pass == 0) {
@@ -387,25 +420,38 @@ static py::array_t<int> freight_partition(
387420
partitioner->create_problem_tree(nn, mm, kk, false, false, kk);
388421
}
389422

390-
/* Reset state for restreaming passes */
391-
if (pass > 0) {
392-
for (auto& block : partitioner->blocks) {
393-
for (size_t t = 0; t < block.e_weight.size(); t++) {
394-
block.e_weight[t] = 0;
395-
}
396-
for (auto&& recp : block.recompute) recp = true;
423+
/* Reset thread-local state each pass (matching CLI's OpenMP init block) */
424+
std::fill(all_blocks_to_keys.begin(), all_blocks_to_keys.end(), INVALID_PARTITION);
425+
next_key = 0;
426+
427+
/* Restreaming: reset CUT_NET entries in cut-net mode so
428+
previously-cut nets can be reconsidered (matches readFirstLineStream) */
429+
if (pass > 0 && !use_connectivity) {
430+
for (auto& entry : stream_edges_assign) {
431+
if (entry == CUT_NET) entry = INVALID_PARTITION;
397432
}
398-
std::fill(stream_edges_assign.begin(), stream_edges_assign.end(), INVALID_PARTITION);
399-
/* Keep stream_nodes_assign from previous pass */
400433
}
401434

402435
/* Process all nodes */
403436
for (int64_t curr_node = 0; curr_node < n; curr_node++) {
404437
int my_thread = 0;
405438

439+
NodeWeight nw_val = has_node_weights ? static_cast<NodeWeight>(nw_ptr[curr_node]) : 1;
440+
441+
/* Restreaming: remove vertex from its old block before re-evaluating */
442+
if (pass > 0) {
443+
PartitionID old_block = stream_nodes_assign[curr_node];
444+
if (old_block != INVALID_PARTITION) {
445+
stream_blocks_weight[old_block] -= 1;
446+
partitioner->remove_nodeweight(old_block, 1);
447+
if (use_self_sorting_array) {
448+
partitioner->decrement_sorted_block(old_block);
449+
}
450+
}
451+
}
452+
406453
/* Skip I/O for hashing */
407454
if (algorithm == ONEPASS_HASHING || algorithm == ONEPASS_HASHING_CRC32) {
408-
NodeWeight nw_val = has_node_weights ? static_cast<NodeWeight>(nw_ptr[curr_node]) : 1;
409455
PartitionID block = partitioner->solve_node(curr_node, nw_val, my_thread);
410456
stream_nodes_assign[curr_node] = block;
411457
stream_blocks_weight[block] += 1;
@@ -452,18 +498,11 @@ static py::array_t<int> freight_partition(
452498
}
453499

454500
/* Solve */
455-
NodeWeight nw_val = has_node_weights ? static_cast<NodeWeight>(nw_ptr[curr_node]) : 1;
456501
PartitionID block = partitioner->solve_node(curr_node, nw_val, my_thread);
457502

458503
/* Register result */
459504
stream_nodes_assign[curr_node] = block;
460-
461-
if (pass == 0) {
462-
stream_blocks_weight[block] += 1;
463-
} else {
464-
/* Restreaming: adjust block weights */
465-
stream_blocks_weight[block] += 1;
466-
}
505+
stream_blocks_weight[block] += 1;
467506

468507
/* Update per-net tracking */
469508
for (auto& net_id : valid_neighboring_nets) {
@@ -476,6 +515,38 @@ static py::array_t<int> freight_partition(
476515
}
477516
}
478517
}
518+
519+
/* Evaluate this pass and track best partition */
520+
if (num_streams_passes > 1) {
521+
std::vector<PartitionID> saved_edges_assign = stream_edges_assign;
522+
523+
double pass_cut = 0, pass_con = 0;
524+
for (int64_t net = 0; net < num_nets; net++) {
525+
std::set<PartitionID> blocks_in_net;
526+
for (auto node : net_to_nodes[net]) {
527+
blocks_in_net.insert(stream_nodes_assign[node]);
528+
}
529+
if (blocks_in_net.size() > 1) {
530+
pass_cut += 1;
531+
pass_con += blocks_in_net.size() - 1;
532+
}
533+
}
534+
535+
double pass_objective = use_connectivity ? pass_con : pass_cut;
536+
if (pass_objective < best_objective) {
537+
best_objective = pass_objective;
538+
best_nodes_assign = stream_nodes_assign;
539+
best_blocks_weight = stream_blocks_weight;
540+
}
541+
542+
stream_edges_assign = saved_edges_assign;
543+
}
544+
}
545+
546+
/* Restore best partition if restreaming was used */
547+
if (num_streams_passes > 1 && !best_nodes_assign.empty()) {
548+
stream_nodes_assign = best_nodes_assign;
549+
stream_blocks_weight = best_blocks_weight;
479550
}
480551

481552
/* Build result */
@@ -519,7 +590,7 @@ PYBIND11_MODULE(_freight, m) {
519590
py::arg("hierarchical") = false,
520591
py::arg("rec_bisection_base") = 2,
521592
py::arg("fennel_gamma") = 1.5f,
522-
py::arg("sampling_type") = (int)SAMPLING_INACTIVE,
593+
py::arg("sampling_type") = (int)SAMPLING_INACTIVE_LINEAR_COMPLEXITY,
523594
py::arg("n_samples") = 0,
524595
py::arg("sampling_threshold") = 1.0f,
525596
py::arg("suppress_output") = true,
@@ -555,7 +626,7 @@ PYBIND11_MODULE(_freight, m) {
555626
py::arg("hierarchical") = false,
556627
py::arg("rec_bisection_base") = 2,
557628
py::arg("fennel_gamma") = 1.5f,
558-
py::arg("sampling_type") = (int)SAMPLING_INACTIVE,
629+
py::arg("sampling_type") = (int)SAMPLING_INACTIVE_LINEAR_COMPLEXITY,
559630
py::arg("n_samples") = 0,
560631
py::arg("sampling_threshold") = 1.0f)
561632
.def("assign_node", &FreightPartitioner::assign_node,

0 commit comments

Comments
 (0)