diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 5879af9db..fd7248510 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -4,6 +4,8 @@ option(CMAKE_USE_WIN32_THREADS_INIT "using WIN32 threads" ON) if(BENCHMARK) include(${CPM_DOWNLOAD_LOCATION}) + # Find OpenMP for parallelization + find_package(OpenMP REQUIRED) # Always force Google Benchmark to build in Release mode set(BENCHMARK_BUILD_TYPE Release CACHE STRING "Build type for Google Benchmark" FORCE) @@ -50,6 +52,7 @@ if(BENCHMARK) GTest::gtest benchmark::benchmark zlibstatic + OpenMP::OpenMP_CXX ${PROJECT_NAME} ) diff --git a/benchmark/results/all_benchmarks.csv b/benchmark/results/all_benchmarks.csv new file mode 100644 index 000000000..56c1d67bf --- /dev/null +++ b/benchmark/results/all_benchmarks.csv @@ -0,0 +1,306 @@ +2026-01-11T18:12:08+01:00 +Running ./build/benchmark/benchmark_exe +Run on (8 X 4400 MHz CPU s) +CPU Caches: + L1 Data 32 KiB (x4) + L1 Instruction 32 KiB (x4) + L2 Unified 256 KiB (x4) + L3 Unified 6144 KiB (x1) +Load Average: 1.02, 2.57, 2.56 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second,label,error_occurred,error_message +"BFS_X/1",2323278,323.851,322.821,ns,,,,, +"BFS_X/2",1523624,439.648,438.388,ns,,,,, +"BFS_X/4",998377,667.426,666.416,ns,,,,, +"BFS_X/8",527173,1281.76,1280.05,ns,,,,, +"BFS_X/16",267575,2532.13,2526.99,ns,,,,, +"BFS_X/32",128459,5113.95,5107.29,ns,,,,, +"BFS_X/64",64363,10784.9,10766.9,ns,,,,, +"BFS_X/128",27539,23883.4,23843.2,ns,,,,, +"BFS_X/256",12328,53889.6,53780.7,ns,,,,, +"BFS_X/512",5515,130892,130529,ns,,,,, +"BFS_X/1024",2467,273900,273381,ns,,,,, +"BFS_X/2048",1171,596136,594295,ns,,,,, +"BFS_X/4096",570,1.2348e+06,1.23203e+06,ns,,,,, +"BFS_X/8192",199,3.46224e+06,3.44931e+06,ns,,,,, +"BFS_X/16384",67,1.03455e+07,1.03178e+07,ns,,,,, +"BFS_X/32768",29,2.43311e+07,2.42561e+07,ns,,,,, +"BFS_X/65536",12,5.5441e+07,5.52798e+07,ns,,,,, +"BFS_X/131072",7,1.04935e+08,1.04669e+08,ns,,,,, +"BFS_X/262144",1,6.18306e+10,6.14388e+10,ns,,,,, +"BFS_X_BigO",,3.37942e-06,3.35802e-06,N^3,,,,, +"BFS_X_RMS",,5.35344e-10,5.35308e-10,,,,,, +"PSEUDO_CONCURRENCY_BFS_X/1",1043133,668.582,667.347,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/2",800264,855.237,853.974,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/4",522674,1345.9,1343.96,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/8",262447,2646.23,2642.68,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/16",129834,5214.68,5203.9,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/32",66501,10295.7,10281.7,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/64",30054,23606.5,23553.7,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/128",12790,52911.1,52824.8,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/256",6079,114106,113903,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/512",2747,254983,254523,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/1024",1246,544213,543169,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/2048",632,1.09718e+06,1.09427e+06,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/4096",274,2.33789e+06,2.33318e+06,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/8192",103,6.3719e+06,6.34919e+06,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/16384",37,1.80393e+07,1.79903e+07,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/32768",16,4.60492e+07,4.58816e+07,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/65536",7,9.89826e+07,9.87169e+07,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/131072",4,1.82346e+08,1.81882e+08,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X/262144",1,5.24244e+08,5.23e+08,ns,,,,, +"PSEUDO_CONCURRENCY_BFS_X_BigO",,105.153,104.899,NlgN,,,,, +"PSEUDO_CONCURRENCY_BFS_X_RMS",,3.01818e-10,3.02054e-10,,,,,, +"BellmanFord_X/1",3612754,194.625,194.205,ns,,,,, +"BellmanFord_X/2",2266437,303.406,303.008,ns,,,,, +"BellmanFord_X/4",1224397,562.241,561.358,ns,,,,, +"BellmanFord_X/8",550181,1242.12,1240.37,ns,,,,, +"BellmanFord_X/16",257233,2630.25,2626.81,ns,,,,, +"BellmanFord_X/32",121727,5580.38,5567.43,ns,,,,, +"BellmanFord_X/64",54912,11812.8,11791.6,ns,,,,, +"BellmanFord_X/128",27943,26143.5,26090.1,ns,,,,, +"BellmanFord_X/256",11856,57073.1,56975.1,ns,,,,, +"BellmanFord_X/512",5141,136752,136498,ns,,,,, +"BellmanFord_X/1024",2335,296392,295820,ns,,,,, +"BellmanFord_X/2048",1092,623560,622263,ns,,,,, +"BellmanFord_X/4096",431,1.33291e+06,1.32896e+06,ns,,,,, +"BellmanFord_X/8192",184,3.70193e+06,3.68898e+06,ns,,,,, +"BellmanFord_X/16384",66,1.04619e+07,1.04207e+07,ns,,,,, +"BellmanFord_X/32768",26,2.64754e+07,2.64039e+07,ns,,,,, +"BellmanFord_X/65536",13,6.10555e+07,6.0853e+07,ns,,,,, +"BellmanFord_X/131072",3,2.35897e+08,2.35208e+08,ns,,,,, +"BellmanFord_X/262144",2,3.56555e+08,3.55645e+08,ns,,,,, +"BellmanFord_X_BigO",,1412.98,1409.22,N,,,,, +"BellmanFord_X_RMS",,4.1503e-10,4.14872e-10,,,,,, +"Boruvka_X/1",2613109,266.26,265.857,ns,,,,, +"Boruvka_X/2",1690612,407.572,406.736,ns,,,,, +"Boruvka_X/4",901642,751.502,750.561,ns,,,,, +"Boruvka_X/8",424187,1625.95,1623.82,ns,,,,, +"Boruvka_X/16",209386,3318.53,3314.07,ns,,,,, +"Boruvka_X/32",88666,7111.98,7096.75,ns,,,,, +"Boruvka_X/64",45083,15047.1,15014.4,ns,,,,, +"Boruvka_X/128",19904,41288.8,41068,ns,,,,, +"Boruvka_X/256",7706,94997.4,94373.5,ns,,,,, +"Boruvka_X/512",3074,213569,212503,ns,,,,, +"Boruvka_X/1024",1460,478427,475559,ns,,,,, +"Boruvka_X/2048",679,994453,989811,ns,,,,, +"Boruvka_X/4096",351,1.98618e+06,1.92275e+06,ns,,,,, +"Boruvka_X/8192",111,6.20052e+06,6.17053e+06,ns,,,,, +"Boruvka_X/16384",41,1.69724e+07,1.69167e+07,ns,,,,, +"Boruvka_X/32768",18,3.94347e+07,3.93058e+07,ns,,,,, +"Boruvka_X/65536",9,8.9526e+07,8.91633e+07,ns,,,,, +"Boruvka_X/131072",4,1.60044e+08,1.59475e+08,ns,,,,, +"Boruvka_X/262144",3,2.69376e+08,2.68683e+08,ns,,,,, +"Boruvka_X_BigO",,1081.48,1078.36,N,,,,, +"Boruvka_X_RMS",,2.25069e-10,2.23927e-10,,,,,, +"Connectivity_X/1",1568841,450.633,450.032,ns,,,,, +"Connectivity_X/2",1091250,659.216,658.085,ns,,,,, +"Connectivity_X/4",470294,1176.25,1173.43,ns,,,,, +"Connectivity_X/8",302447,2283.08,2279.73,ns,,,,, +"Connectivity_X/16",138110,5015.82,5002.14,ns,,,,, +"Connectivity_X/32",72017,9310.15,9295.43,ns,,,,, +"Connectivity_X/64",32071,21673.6,21640.4,ns,,,,, +"Connectivity_X/128",15024,46815.6,46741.5,ns,,,,, +"Connectivity_X/256",6499,105129,104916,ns,,,,, +"Connectivity_X/512",2908,252864,252237,ns,,,,, +"Connectivity_X/1024",1267,514258,513114,ns,,,,, +"Connectivity_X/2048",643,1.05154e+06,1.04841e+06,ns,,,,, +"Connectivity_X/4096",306,2.17548e+06,2.17066e+06,ns,,,,, +"Connectivity_X/8192",100,6.74058e+06,6.71629e+06,ns,,,,, +"Connectivity_X/16384",37,1.94827e+07,1.94258e+07,ns,,,,, +"Connectivity_X/32768",15,4.45696e+07,4.44458e+07,ns,,,,, +"Connectivity_X/65536",8,9.05454e+07,9.03047e+07,ns,,,,, +"Connectivity_X/131072",4,2.06834e+08,2.05841e+08,ns,,,,, +"Connectivity_X/262144",2,2.82565e+08,2.81835e+08,ns,,,,, +"Connectivity_X_BigO",,1189.22,1185.48,N,,,,, +"Connectivity_X_RMS",,4.02527e-10,4.00233e-10,,,,,, +"StrongConnectivity_X/1",1272229,461.877,461.059,ns,,,,, +"StrongConnectivity_X/2",1093704,641.254,640.249,ns,,,,, +"StrongConnectivity_X/4",598871,1122.37,1120.75,ns,,,,, +"StrongConnectivity_X/8",303782,2309.75,2306.01,ns,,,,, +"StrongConnectivity_X/16",137924,4847.46,4837.6,ns,,,,, +"StrongConnectivity_X/32",74714,9381.6,9364.1,ns,,,,, +"StrongConnectivity_X/64",31335,21781.9,21744,ns,,,,, +"StrongConnectivity_X/128",15071,46697.1,46608.5,ns,,,,, +"StrongConnectivity_X/256",6604,105243,105011,ns,,,,, +"StrongConnectivity_X/512",2819,254001,253309,ns,,,,, +"StrongConnectivity_X/1024",1299,509300,507841,ns,,,,, +"StrongConnectivity_X/2048",647,1.04943e+06,1.04735e+06,ns,,,,, +"StrongConnectivity_X/4096",311,2.20153e+06,2.1949e+06,ns,,,,, +"StrongConnectivity_X/8192",103,6.7404e+06,6.72014e+06,ns,,,,, +"StrongConnectivity_X/16384",37,1.89342e+07,1.88708e+07,ns,,,,, +"StrongConnectivity_X/32768",15,4.38184e+07,4.36875e+07,ns,,,,, +"StrongConnectivity_X/65536",8,8.89027e+07,8.86112e+07,ns,,,,, +"StrongConnectivity_X/131072",4,1.73657e+08,1.73091e+08,ns,,,,, +"StrongConnectivity_X/262144",2,2.80985e+08,2.80203e+08,ns,,,,, +"StrongConnectivity_X_BigO",,1135.7,1132.38,N,,,,, +"StrongConnectivity_X_RMS",,2.4082e-10,2.40305e-10,,,,,, +"CycleCheckBFS_X/1",18098960,38.734,38.648,ns,,,,, +"CycleCheckBFS_X/2",11241740,58.2015,58.1193,ns,,,,, +"CycleCheckBFS_X/4",7143756,97.274,97.0317,ns,,,,, +"CycleCheckBFS_X/8",3569006,188.168,187.847,ns,,,,, +"CycleCheckBFS_X/16",1805411,382.443,381.664,ns,,,,, +"CycleCheckBFS_X/32",842004,828.005,826.9,ns,,,,, +"CycleCheckBFS_X/64",418629,1654.88,1651.54,ns,,,,, +"CycleCheckBFS_X/128",122580,5481.14,5472.04,ns,,,,, +"CycleCheckBFS_X/256",58560,12180,12155.6,ns,,,,, +"CycleCheckBFS_X/512",27309,25101.1,25053.7,ns,,,,, +"CycleCheckBFS_X/1024",12073,55242.1,55134.8,ns,,,,, +"CycleCheckBFS_X/2048",6181,111999,111701,ns,,,,, +"CycleCheckBFS_X/4096",3095,225327,224896,ns,,,,, +"CycleCheckBFS_X/8192",1506,469281,467805,ns,,,,, +"CycleCheckBFS_X/16384",446,1.34798e+06,1.34328e+06,ns,,,,, +"CycleCheckBFS_X/32768",138,4.85217e+06,4.83698e+06,ns,,,,, +"CycleCheckBFS_X/65536",55,1.28071e+07,1.27689e+07,ns,,,,, +"CycleCheckBFS_X/131072",25,2.91879e+07,2.90863e+07,ns,,,,, +"CycleCheckBFS_X/262144",12,5.9215e+07,5.90796e+07,ns,,,,, +"CycleCheckBFS_X_BigO",,12.5928,12.5609,NlgN,,,,, +"CycleCheckBFS_X_RMS",,1.03275e-10,1.02898e-10,,,,,, +"CycleCheckDFS_X/1",17475394,42.2362,42.133,ns,,,,, +"CycleCheckDFS_X/2",11646256,60.6491,60.5373,ns,,,,, +"CycleCheckDFS_X/4",7024063,96.9357,96.7809,ns,,,,, +"CycleCheckDFS_X/8",3928828,178.015,177.77,ns,,,,, +"CycleCheckDFS_X/16",1830755,379.866,379.089,ns,,,,, +"CycleCheckDFS_X/32",840624,842.019,840.714,ns,,,,, +"CycleCheckDFS_X/64",421375,1659.92,1657.51,ns,,,,, +"CycleCheckDFS_X/128",124528,5698,5686.41,ns,,,,, +"CycleCheckDFS_X/256",55463,11896.7,11877.3,ns,,,,, +"CycleCheckDFS_X/512",28476,24781.4,24716.5,ns,,,,, +"CycleCheckDFS_X/1024",12338,55181.1,55075.8,ns,,,,, +"CycleCheckDFS_X/2048",6208,112148,111935,ns,,,,, +"CycleCheckDFS_X/4096",3092,227065,226607,ns,,,,, +"CycleCheckDFS_X/8192",1488,481662,479686,ns,,,,, +"CycleCheckDFS_X/16384",496,1.33314e+06,1.32837e+06,ns,,,,, +"CycleCheckDFS_X/32768",135,4.9144e+06,4.8987e+06,ns,,,,, +"CycleCheckDFS_X/65536",53,1.27611e+07,1.27293e+07,ns,,,,, +"CycleCheckDFS_X/131072",24,2.94117e+07,2.93127e+07,ns,,,,, +"CycleCheckDFS_X/262144",12,5.92786e+07,5.908e+07,ns,,,,, +"CycleCheckDFS_X_BigO",,12.62,12.5781,NlgN,,,,, +"CycleCheckDFS_X_RMS",,1.06152e-10,1.0605e-10,,,,,, +"DFS_X/1",2425686,297.287,296.537,ns,,,,, +"DFS_X/2",1616805,418.837,418.161,ns,,,,, +"DFS_X/4",1089421,628.119,627.193,ns,,,,, +"DFS_X/8",550837,1266.37,1264.54,ns,,,,, +"DFS_X/16",267245,2583.45,2579.37,ns,,,,, +"DFS_X/32",129870,5481.41,5467.64,ns,,,,, +"DFS_X/64",62534,10871.2,10852.6,ns,,,,, +"DFS_X/128",27365,23535.1,23476.2,ns,,,,, +"DFS_X/256",12354,54023.2,53925.8,ns,,,,, +"DFS_X/512",5422,128767,128520,ns,,,,, +"DFS_X/1024",2438,279566,278977,ns,,,,, +"DFS_X/2048",1129,580411,579184,ns,,,,, +"DFS_X/4096",561,1.17924e+06,1.17589e+06,ns,,,,, +"DFS_X/8192",170,3.71521e+06,3.70327e+06,ns,,,,, +"DFS_X/16384",66,1.04421e+07,1.04053e+07,ns,,,,, +"DFS_X/32768",27,2.59698e+07,2.58923e+07,ns,,,,, +"DFS_X/65536",13,5.29676e+07,5.28341e+07,ns,,,,, +"DFS_X/131072",7,1.06072e+08,1.05767e+08,ns,,,,, +"DFS_X/262144",5,1.44485e+08,1.44131e+08,ns,,,,, +"DFS_X_BigO",,614.553,612.972,N,,,,, +"DFS_X_RMS",,4.22999e-10,4.22609e-10,,,,,, +"Dial_X/1",1704105,413.899,413.245,ns,,,,, +"Dial_X/2",947374,682.107,680.716,ns,,,,, +"Dial_X/4",557229,1227.43,1224.74,ns,,,,, +"Dial_X/8",223328,3133.09,3128.48,ns,,,,, +"Dial_X/16",102882,6612.89,6597.15,ns,,,,, +"Dial_X/32",52062,13532.7,13512.9,ns,,,,, +"Dial_X/64",23251,29954.7,29902.3,ns,,,,, +"Dial_X/128",9721,66995.4,66843,ns,,,,, +"Dial_X/256",4973,139216,138935,ns,,,,, +"Dial_X/512",2332,298471,297657,ns,,,,, +"Dial_X/1024",1058,621692,620427,ns,,,,, +"Dial_X/2048",509,1.27779e+06,1.27405e+06,ns,,,,, +"Dial_X/4096",127,6.05486e+06,6.01113e+06,ns,,,,, +"Dial_X/8192",57,1.27395e+07,1.26588e+07,ns,,,,, +"Dial_X/16384",25,2.37066e+07,2.36388e+07,ns,,,,, +"Dial_X/32768",13,5.33969e+07,5.32313e+07,ns,,,,, +"Dial_X/65536",6,1.23587e+08,1.22588e+08,ns,,,,, +"Dial_X/131072",3,2.0841e+08,2.07711e+08,ns,,,,, +"Dial_X/262144",2,3.00727e+08,2.99854e+08,ns,,,,, +"Dial_X_BigO",,1271.7,1267.41,N,,,,, +"Dial_X_RMS",,4.03518e-10,4.0133e-10,,,,,, +"Dijkstra_X/1",3968981,175.518,175.271,ns,,,,, +"Dijkstra_X/2",2233927,309.808,309.037,ns,,,,, +"Dijkstra_X/4",1197200,580.425,579.285,ns,,,,, +"Dijkstra_X/8",559420,1224.87,1223.04,ns,,,,, +"Dijkstra_X/16",266627,2617.98,2614.26,ns,,,,, +"Dijkstra_X/32",123466,5504.22,5494.23,ns,,,,, +"Dijkstra_X/64",61146,11244.2,11218.7,ns,,,,, +"Dijkstra_X/128",25518,29146.4,29093.1,ns,,,,, +"Dijkstra_X/256",11389,57897.9,57758.8,ns,,,,, +"Dijkstra_X/512",5240,134532,134241,ns,,,,, +"Dijkstra_X/1024",2283,303742,303118,ns,,,,, +"Dijkstra_X/2048",1046,630356,629047,ns,,,,, +"Dijkstra_X/4096",504,1.3809e+06,1.37746e+06,ns,,,,, +"Dijkstra_X/8192",134,3.97746e+06,3.95782e+06,ns,,,,, +"Dijkstra_X/16384",60,1.07239e+07,1.0687e+07,ns,,,,, +"Dijkstra_X/32768",18,3.82334e+07,3.81012e+07,ns,,,,, +"Dijkstra_X/65536",12,5.55879e+07,5.54164e+07,ns,,,,, +"Dijkstra_X/131072",4,1.70363e+08,1.69806e+08,ns,,,,, +"Dijkstra_X/262144",3,2.44877e+08,2.44156e+08,ns,,,,, +"Dijkstra_X_BigO",,1000.09,997.054,N,,,,, +"Dijkstra_X_RMS",,3.73205e-10,3.7287e-10,,,,,, +"BM_EdgeCreation",16081443,43.8656,43.7922,ns,,,,, +"EdgeCreationDestruction_new_delete",12455934,58.4779,58.3041,ns,,,,, +"EdgeGetId",1000000000000,2.10001e-10,2.12e-10,ns,,,,, +"NodeGetNodePair",1000000000000,2.23001e-10,2.2e-10,ns,,,,, +"EulerPath_X/1",2690709,260.177,259.722,ns,,,,, +"EulerPath_X/2",1395965,478.421,477.342,ns,,,,, +"EulerPath_X/4",709002,982.458,981.133,ns,,,,, +"EulerPath_X/8",352765,1973.31,1970.55,ns,,,,, +"EulerPath_X/16",175538,4009.74,4003.82,ns,,,,, +"EulerPath_X/32",78139,9433.14,9411.8,ns,,,,, +"EulerPath_X/64",33391,20738,20690.8,ns,,,,, +"EulerPath_X/128",13788,49004.7,48855.7,ns,,,,, +"EulerPath_X/256",7201,87061,86808.9,ns,,,,, +"EulerPath_X/512",3429,181055,180632,ns,,,,, +"EulerPath_X/1024",1791,390416,389399,ns,,,,, +"EulerPath_X/2048",826,875260,872936,ns,,,,, +"EulerPath_X/4096",374,1.79248e+06,1.7885e+06,ns,,,,, +"EulerPath_X/8192",107,5.31995e+06,5.30202e+06,ns,,,,, +"EulerPath_X/16384",44,1.54419e+07,1.5399e+07,ns,,,,, +"EulerPath_X/32768",19,3.63532e+07,3.62575e+07,ns,,,,, +"EulerPath_X/65536",9,8.24837e+07,8.21686e+07,ns,,,,, +"EulerPath_X/131072",6,1.1417e+08,1.13836e+08,ns,,,,, +"EulerPath_X/262144",6,1.08704e+08,1.08451e+08,ns,,,,, +"EulerPath_X_BigO",,549.665,548.197,N,,,,, +"EulerPath_X_RMS",,8.8865e-10,8.87757e-10,,,,,, +"BM_FWDirected",80398,8689.43,8676.95,ns,,,,, +"BM_FWNegCycle",142854,5228.98,5217.54,ns,,,,, +"BM_FWUndirectedWeighted",166537,4049.56,4040.13,ns,,,,, +"BM_FWNoWeighted",746544,931.324,929.971,ns,,,,, +"FordFulkerson_X/1",17565912,38.5059,38.4312,ns,,,,, +"FordFulkerson_X/2",11929661,57.4762,57.3903,ns,,,,, +"FordFulkerson_X/4",7247198,96.5335,96.3734,ns,,,,, +"FordFulkerson_X/8",3582699,187.754,187.415,ns,,,,, +"FordFulkerson_X/16",1767784,388.642,388.055,ns,,,,, +"FordFulkerson_X/32",807091,863.433,862.197,ns,,,,, +"FordFulkerson_X/64",410869,1688.92,1686.57,ns,,,,, +"FordFulkerson_X/128",126076,5505.62,5497.09,ns,,,,, +"FordFulkerson_X/256",57810,12223.8,12200.1,ns,,,,, +"FordFulkerson_X/512",26509,25472.9,25425.1,ns,,,,, +"FordFulkerson_X/1024",12470,55971.3,55843.5,ns,,,,, +"FordFulkerson_X/2048",5919,113993,113761,ns,,,,, +"FordFulkerson_X/4096",3097,226543,226045,ns,,,,, +"FordFulkerson_X/8192",1511,485682,483620,ns,,,,, +"FordFulkerson_X/16384",424,1.36795e+06,1.36302e+06,ns,,,,, +"FordFulkerson_X/32768",133,5.0103e+06,4.99386e+06,ns,,,,, +"FordFulkerson_X/65536",55,1.27475e+07,1.27182e+07,ns,,,,, +"FordFulkerson_X/131072",24,2.92447e+07,2.91607e+07,ns,,,,, +"FordFulkerson_X/262144",12,5.8602e+07,5.84355e+07,ns,,,,, +"FordFulkerson_X_BigO",,12.497,12.4616,NlgN,,,,, +"FordFulkerson_X_RMS",,1.04128e-10,1.04139e-10,,,,,, +"GraphSlicing_X/1",1083623,614.839,613.845,ns,,,,, +"GraphSlicing_X/2",376315,1828.45,1824.01,ns,,,,, +"GraphSlicing_X/4",103189,6060.59,6051.61,ns,,,,, +"GraphSlicing_X/8",30152,22697.3,22648.6,ns,,,,, +"GraphSlicing_X/16",7544,90225.2,90077.4,ns,,,,, +"GraphSlicing_X/32",2058,336933,336168,ns,,,,, +"GraphSlicing_X/64",481,1.45717e+06,1.4547e+06,ns,,,,, +"GraphSlicing_X/128",108,6.71734e+06,6.69804e+06,ns,,,,, +"GraphSlicing_X/256",24,2.88232e+07,2.87622e+07,ns,,,,, +"GraphSlicing_X/512",5,1.31101e+08,1.3084e+08,ns,,,,, +"GraphSlicing_X/1024",1,5.70726e+08,5.68981e+08,ns,,,,, +"GraphSlicing_X/2048",1,2.37468e+09,2.36932e+09,ns,,,,, +"GraphSlicing_X/4096",1,9.83641e+09,9.81382e+09,ns,,,,, +"GraphSlicing_X/8192",1,5.6446e+10,5.62551e+10,ns,,,,, diff --git a/benchmark/results/bellmanford.json b/benchmark/results/bellmanford.json new file mode 100644 index 000000000..25fe0ed18 --- /dev/null +++ b/benchmark/results/bellmanford.json @@ -0,0 +1,336 @@ +{ + "context": { + "date": "2026-01-11T18:11:08+01:00", + "host_name": "thew6rst", + "executable": "./build/benchmark/benchmark_exe", + "num_cpus": 8, + "mhz_per_cpu": 4400, + "cpu_scaling_enabled": true, + "aslr_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 262144, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 6291456, + "num_sharing": 8 + } + ], + "load_avg": [1.22852,2.94092,2.66943], + "library_version": "v1.9.4-93-g27fc2bf4", + "library_build_type": "release", + "json_schema_version": 1 + }, + "benchmarks": [ + { + "name": "BellmanFord_X/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BellmanFord_X/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3998965, + "real_time": 1.7997280596355037e+02, + "cpu_time": 1.7942785620779401e+02, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/2", + "family_index": 0, + "per_family_instance_index": 1, + "run_name": "BellmanFord_X/2", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2354542, + "real_time": 3.0170791092302721e+02, + "cpu_time": 3.0010166520707554e+02, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/4", + "family_index": 0, + "per_family_instance_index": 2, + "run_name": "BellmanFord_X/4", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1172387, + "real_time": 5.8097028882007544e+02, + "cpu_time": 5.7925603746885861e+02, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/8", + "family_index": 0, + "per_family_instance_index": 3, + "run_name": "BellmanFord_X/8", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 573499, + "real_time": 1.2032862202031135e+03, + "cpu_time": 1.2016910822861093e+03, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/16", + "family_index": 0, + "per_family_instance_index": 4, + "run_name": "BellmanFord_X/16", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 279701, + "real_time": 2.4953090836280448e+03, + "cpu_time": 2.4920754448500243e+03, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/32", + "family_index": 0, + "per_family_instance_index": 5, + "run_name": "BellmanFord_X/32", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 124381, + "real_time": 5.3503153777528260e+03, + "cpu_time": 5.3389931420393932e+03, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/64", + "family_index": 0, + "per_family_instance_index": 6, + "run_name": "BellmanFord_X/64", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 63221, + "real_time": 1.0792992534124298e+04, + "cpu_time": 1.0775512788472090e+04, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/128", + "family_index": 0, + "per_family_instance_index": 7, + "run_name": "BellmanFord_X/128", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 29444, + "real_time": 2.3991995686704042e+04, + "cpu_time": 2.3941064495313105e+04, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/256", + "family_index": 0, + "per_family_instance_index": 8, + "run_name": "BellmanFord_X/256", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12379, + "real_time": 5.3637028273676486e+04, + "cpu_time": 5.3549488003877697e+04, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/512", + "family_index": 0, + "per_family_instance_index": 9, + "run_name": "BellmanFord_X/512", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5085, + "real_time": 1.3382506135690544e+05, + "cpu_time": 1.3345707295968483e+05, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/1024", + "family_index": 0, + "per_family_instance_index": 10, + "run_name": "BellmanFord_X/1024", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2120, + "real_time": 3.1290960707492387e+05, + "cpu_time": 3.1167484009433701e+05, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/2048", + "family_index": 0, + "per_family_instance_index": 11, + "run_name": "BellmanFord_X/2048", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 1005, + "real_time": 6.5638481492575712e+05, + "cpu_time": 6.5465965970149601e+05, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/4096", + "family_index": 0, + "per_family_instance_index": 12, + "run_name": "BellmanFord_X/4096", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 528, + "real_time": 1.2578083049268599e+06, + "cpu_time": 1.2542125568181812e+06, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/8192", + "family_index": 0, + "per_family_instance_index": 13, + "run_name": "BellmanFord_X/8192", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 188, + "real_time": 4.2145659202086534e+06, + "cpu_time": 4.1980522074467866e+06, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/16384", + "family_index": 0, + "per_family_instance_index": 14, + "run_name": "BellmanFord_X/16384", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 61, + "real_time": 1.2393260213103218e+07, + "cpu_time": 1.2333252295081891e+07, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/32768", + "family_index": 0, + "per_family_instance_index": 15, + "run_name": "BellmanFord_X/32768", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 9, + "real_time": 6.3810736333228618e+07, + "cpu_time": 6.3536314222222105e+07, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/65536", + "family_index": 0, + "per_family_instance_index": 16, + "run_name": "BellmanFord_X/65536", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12, + "real_time": 6.1420926333388098e+07, + "cpu_time": 6.1219074166666351e+07, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/131072", + "family_index": 0, + "per_family_instance_index": 17, + "run_name": "BellmanFord_X/131072", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.1738646816653879e+08, + "cpu_time": 1.1700121916666599e+08, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/262144", + "family_index": 0, + "per_family_instance_index": 18, + "run_name": "BellmanFord_X/262144", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.6469580049988508e+08, + "cpu_time": 1.6430876000000083e+08, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X_BigO", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BellmanFord_X", + "run_type": "aggregate", + "repetitions": 1, + "threads": 1, + "aggregate_name": "BigO", + "aggregate_unit": "time", + "cpu_coefficient": 7.0662818529808328e+02, + "real_coefficient": 7.0854155926462886e+02, + "big_o": "N", + "time_unit": "ns" + }, + { + "name": "BellmanFord_X_RMS", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BellmanFord_X", + "run_type": "aggregate", + "repetitions": 1, + "threads": 1, + "aggregate_name": "RMS", + "aggregate_unit": "percentage", + "rms": 5.5150389342804440e-01 + } + ] +} diff --git a/benchmark/results/bellmanford_imp.txt b/benchmark/results/bellmanford_imp.txt new file mode 100644 index 000000000..41f043737 --- /dev/null +++ b/benchmark/results/bellmanford_imp.txt @@ -0,0 +1,24 @@ +--------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------- +BellmanFord_X/1 195 ns 194 ns 3661631 +BellmanFord_X/2 320 ns 319 ns 2074582 +BellmanFord_X/4 622 ns 621 ns 1175745 +BellmanFord_X/8 1216 ns 1213 ns 570063 +BellmanFord_X/16 2655 ns 2650 ns 265879 +BellmanFord_X/32 5623 ns 5611 ns 122899 +BellmanFord_X/64 11343 ns 11327 ns 62011 +BellmanFord_X/128 24646 ns 24592 ns 27775 +BellmanFord_X/256 56376 ns 56272 ns 12183 +BellmanFord_X/512 131170 ns 130930 ns 5170 +BellmanFord_X/1024 293640 ns 292910 ns 2400 +BellmanFord_X/2048 698531 ns 696522 ns 1117 +BellmanFord_X/4096 1467403 ns 1462826 ns 475 +BellmanFord_X/8192 3838824 ns 3824605 ns 185 +BellmanFord_X/16384 11268151 ns 11218453 ns 61 +BellmanFord_X/32768 26842603 ns 26755964 ns 26 +BellmanFord_X/65536 103831943 ns 103494733 ns 6 +BellmanFord_X/131072 113103936 ns 112689542 ns 6 +BellmanFord_X/262144 173054096 ns 172526200 ns 4 +BellmanFord_X_BigO 743.22 N 740.83 N +BellmanFord_X_RMS 62 % 62 % diff --git a/benchmark/results/bellmanford_optimized.json b/benchmark/results/bellmanford_optimized.json new file mode 100644 index 000000000..313acb7b5 --- /dev/null +++ b/benchmark/results/bellmanford_optimized.json @@ -0,0 +1,336 @@ +{ + "context": { + "date": "2026-01-11T18:48:50+01:00", + "host_name": "thew6rst", + "executable": "./build/benchmark/benchmark_exe", + "num_cpus": 8, + "mhz_per_cpu": 4400, + "cpu_scaling_enabled": true, + "aslr_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 262144, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 6291456, + "num_sharing": 8 + } + ], + "load_avg": [8.99902,6.79248,4.02246], + "library_version": "v1.9.4-93-g27fc2bf4", + "library_build_type": "release", + "json_schema_version": 1 + }, + "benchmarks": [ + { + "name": "BellmanFord_X/1", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BellmanFord_X/1", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3621506, + "real_time": 1.9697110345791512e+02, + "cpu_time": 1.9634795275777441e+02, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/2", + "family_index": 0, + "per_family_instance_index": 1, + "run_name": "BellmanFord_X/2", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2052941, + "real_time": 3.3671334246744493e+02, + "cpu_time": 3.3535022730804150e+02, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/4", + "family_index": 0, + "per_family_instance_index": 2, + "run_name": "BellmanFord_X/4", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 934457, + "real_time": 6.6771874575030324e+02, + "cpu_time": 6.6485376641193659e+02, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/8", + "family_index": 0, + "per_family_instance_index": 3, + "run_name": "BellmanFord_X/8", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 463008, + "real_time": 1.4749414610527567e+03, + "cpu_time": 1.4671396001796882e+03, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/16", + "family_index": 0, + "per_family_instance_index": 4, + "run_name": "BellmanFord_X/16", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 228269, + "real_time": 3.1311830778698527e+03, + "cpu_time": 3.1170528849734219e+03, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/32", + "family_index": 0, + "per_family_instance_index": 5, + "run_name": "BellmanFord_X/32", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 102140, + "real_time": 6.5170538476693837e+03, + "cpu_time": 6.4897389857059543e+03, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/64", + "family_index": 0, + "per_family_instance_index": 6, + "run_name": "BellmanFord_X/64", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 51380, + "real_time": 1.3739778298918096e+04, + "cpu_time": 1.3686581432463870e+04, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/128", + "family_index": 0, + "per_family_instance_index": 7, + "run_name": "BellmanFord_X/128", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24076, + "real_time": 3.0665295688565708e+04, + "cpu_time": 3.0509460043196672e+04, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/256", + "family_index": 0, + "per_family_instance_index": 8, + "run_name": "BellmanFord_X/256", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 8944, + "real_time": 7.7086023702821272e+04, + "cpu_time": 7.6737912343470845e+04, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/512", + "family_index": 0, + "per_family_instance_index": 9, + "run_name": "BellmanFord_X/512", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4145, + "real_time": 1.6915800337744562e+05, + "cpu_time": 1.6853262774427058e+05, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/1024", + "family_index": 0, + "per_family_instance_index": 10, + "run_name": "BellmanFord_X/1024", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2116, + "real_time": 3.4784709782669070e+05, + "cpu_time": 3.4631584782608732e+05, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/2048", + "family_index": 0, + "per_family_instance_index": 11, + "run_name": "BellmanFord_X/2048", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 825, + "real_time": 7.4361268363480293e+05, + "cpu_time": 7.4091751878788066e+05, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/4096", + "family_index": 0, + "per_family_instance_index": 12, + "run_name": "BellmanFord_X/4096", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 474, + "real_time": 1.6299924514773900e+06, + "cpu_time": 1.6211866160337606e+06, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/8192", + "family_index": 0, + "per_family_instance_index": 13, + "run_name": "BellmanFord_X/8192", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 121, + "real_time": 4.4463647355160518e+06, + "cpu_time": 4.4248341900826665e+06, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/16384", + "family_index": 0, + "per_family_instance_index": 14, + "run_name": "BellmanFord_X/16384", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 55, + "real_time": 1.3263653527312933e+07, + "cpu_time": 1.3187897981818253e+07, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/32768", + "family_index": 0, + "per_family_instance_index": 15, + "run_name": "BellmanFord_X/32768", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24, + "real_time": 3.2779613000002429e+07, + "cpu_time": 3.2614133749999810e+07, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/65536", + "family_index": 0, + "per_family_instance_index": 16, + "run_name": "BellmanFord_X/65536", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.8278449545521826e+07, + "cpu_time": 6.7904681181817949e+07, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/131072", + "family_index": 0, + "per_family_instance_index": 17, + "run_name": "BellmanFord_X/131072", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3, + "real_time": 2.1522079466618988e+08, + "cpu_time": 2.1425474833333170e+08, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X/262144", + "family_index": 0, + "per_family_instance_index": 18, + "run_name": "BellmanFord_X/262144", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 2.8728195650000995e+08, + "cpu_time": 2.8649653050000268e+08, + "time_unit": "ns" + }, + { + "name": "BellmanFord_X_BigO", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BellmanFord_X", + "run_type": "aggregate", + "repetitions": 1, + "threads": 1, + "aggregate_name": "BigO", + "aggregate_unit": "time", + "cpu_coefficient": 1.1892478666704503e+03, + "real_coefficient": 1.1932193956725880e+03, + "big_o": "N", + "time_unit": "ns" + }, + { + "name": "BellmanFord_X_RMS", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BellmanFord_X", + "run_type": "aggregate", + "repetitions": 1, + "threads": 1, + "aggregate_name": "RMS", + "aggregate_unit": "percentage", + "rms": 4.5797527128197441e-01 + } + ] +} diff --git a/benchmark/results/bellmanford_orig.txt b/benchmark/results/bellmanford_orig.txt new file mode 100644 index 000000000..de4651b92 --- /dev/null +++ b/benchmark/results/bellmanford_orig.txt @@ -0,0 +1,24 @@ +--------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------- +BellmanFord_X/1 232 ns 230 ns 2984464 +BellmanFord_X/2 377 ns 376 ns 1474327 +BellmanFord_X/4 708 ns 705 ns 945312 +BellmanFord_X/8 1374 ns 1371 ns 471893 +BellmanFord_X/16 3010 ns 2995 ns 232817 +BellmanFord_X/32 6419 ns 6394 ns 108164 +BellmanFord_X/64 12683 ns 12632 ns 47611 +BellmanFord_X/128 25588 ns 25488 ns 27587 +BellmanFord_X/256 65002 ns 64777 ns 11003 +BellmanFord_X/512 145509 ns 144939 ns 4450 +BellmanFord_X/1024 346280 ns 344774 ns 2045 +BellmanFord_X/2048 714626 ns 711427 ns 919 +BellmanFord_X/4096 1493948 ns 1488415 ns 384 +BellmanFord_X/8192 4854775 ns 4827804 ns 175 +BellmanFord_X/16384 12197350 ns 12151235 ns 48 +BellmanFord_X/32768 34298020 ns 34100221 ns 24 +BellmanFord_X/65536 66532697 ns 66233960 ns 9 +BellmanFord_X/131072 257710108 ns 256576456 ns 3 +BellmanFord_X/262144 358237375 ns 357357122 ns 2 +BellmanFord_X_BigO 1456.14 N 1451.70 N +BellmanFord_X_RMS 47 % 47 % diff --git a/include/CXXGraph/Graph/Algorithm/BellmanFord_impl.hpp b/include/CXXGraph/Graph/Algorithm/BellmanFord_impl.hpp index 83232531e..1ae9cfa3a 100644 --- a/include/CXXGraph/Graph/Algorithm/BellmanFord_impl.hpp +++ b/include/CXXGraph/Graph/Algorithm/BellmanFord_impl.hpp @@ -1,20 +1,22 @@ /***********************************************************/ -/*** ______ ____ ______ _ ***/ -/*** / ___\ \/ /\ \/ / ___|_ __ __ _ _ __ | |__ ***/ -/*** | | \ / \ / | _| '__/ _` | '_ \| '_ \ ***/ -/*** | |___ / \ / \ |_| | | | (_| | |_) | | | | ***/ -/*** \____/_/\_\/_/\_\____|_| \__,_| .__/|_| |_| ***/ -/*** |_| ***/ +/*** ______ ____ ______ ___ __ __ ***/ +/*** /" _ "\(" _ "\( __ "\|" \|" | |" \ ***/ +/*** (: ______)| __/\/" \ | \ | || | ***/ +/*** \/ | (\ | ) |/ \| o ) | |: | ***/ +/*** // ___)_ /\\ \ / / \. / : |\ |___ ***/ +/*** (: "( <_|: / //\\ \ //\\ \| |:::| ***/ +/*** \_______)\_______/ (__)_(_)(__\_)_(__|___| ***/ +/*** ***/ /***********************************************************/ -/*** Header-Only C++ Library for Graph ***/ -/*** Representation and Algorithms ***/ +/*** Header-Only C++ Library for Graph ***/ +/*** Representation and Algorithms ***/ /***********************************************************/ -/*** Author: ZigRazor ***/ -/*** E-Mail: zigrazor@gmail.com ***/ +/*** Author: ZigRazor ***/ +/*** E-Mail: zigrazor@gmail.com ***/ /***********************************************************/ -/*** Collaboration: ----------- ***/ +/*** Collaboration: ----------- ***/ /***********************************************************/ -/*** License: MPL v2.0 ***/ +/*** License: MPL v2.0 ***/ /***********************************************************/ #ifndef __CXXGRAPH_BELLMANFORD_IMPL_H__ @@ -24,11 +26,12 @@ #include #include - +#include #include "CXXGraph/Graph/Graph_decl.h" #include "CXXGraph/Utility/ConstString.hpp" namespace CXXGraph { + template const BellmanFordResult Graph::bellmanford(const Node &source, const Node &target) const { @@ -36,6 +39,7 @@ const BellmanFordResult Graph::bellmanford(const Node &source, result.success = false; result.errorMessage = ""; result.result = INF_DOUBLE; + auto nodeSet = Graph::getNodeSet(); auto source_node_it = std::find_if( nodeSet.begin(), nodeSet.end(), @@ -53,50 +57,81 @@ const BellmanFordResult Graph::bellmanford(const Node &source, result.errorMessage = ERR_TARGET_NODE_NOT_IN_GRAPH; return result; } - // setting all the distances initially to INF_DOUBLE - std::unordered_map>, double, nodeHash> dist, - currentDist; - // n denotes the number of vertices in graph - auto n = nodeSet.size(); - for (const auto &elem : nodeSet) { - dist[elem] = INF_DOUBLE; - currentDist[elem] = INF_DOUBLE; + + // Map nodes to integer indices for efficient vector usage + size_t n = nodeSet.size(); + std::vector>> nodes(n); + std::unordered_map>, size_t, nodeHash> node_to_idx; + size_t idx = 0; + for (const auto &node : nodeSet) { + nodes[idx] = node; + node_to_idx[node] = idx++; } - // marking the distance of source as 0 - dist[*source_node_it] = 0; - // set if node distances in two consecutive - // iterations remain the same. - auto earlyStopping = false; - // outer loop for vertex relaxation + auto source_ptr = *source_node_it; + auto target_ptr = *target_node_it; + size_t src_idx = node_to_idx[source_ptr]; + size_t tgt_idx = node_to_idx[target_ptr]; + + // Prepare edges as struct for better cache and sorting + struct MyEdge { + size_t u, v; + double w; + }; + std::vector edges; + auto edgeSet = Graph::getEdgeSet(); + for (const auto &edge : edgeSet) { + if (!edge->isWeighted().value_or(false)) { + result.errorMessage = ERR_NO_WEIGHTED_EDGE; + return result; + } + auto elem = edge->getNodePair(); + double weight = + (std::dynamic_pointer_cast(edge))->getWeight(); + edges.push_back({node_to_idx[elem.first], node_to_idx[elem.second], weight}); + } + size_t m = edges.size(); + + // OPTIMIZATION: Sort edges by source for better cache locality, but only for larger graphs + const size_t SORT_THRESHOLD = 10000; // Adjust threshold as needed + if (m > SORT_THRESHOLD) { + std::sort(edges.begin(), edges.end(), + [](const MyEdge &a, const MyEdge &b) { return a.u < b.u; }); + } + + // setting all the distances initially to INF_DOUBLE + std::vector dist(n, INF_DOUBLE); + dist[src_idx] = 0.0; + + // outer loop for vertex relaxation with early stopping + bool earlyStopping = false; for (size_t i = 0; i < n - 1; ++i) { - auto edgeSet = Graph::getEdgeSet(); - // inner loop for distance updates of - // each relaxation - for (const auto &edge : edgeSet) { - auto elem = edge->getNodePair(); - if (edge->isWeighted().value_or(false)) { - auto edge_weight = - (std::dynamic_pointer_cast(edge))->getWeight(); - if (dist[elem.first] + edge_weight < dist[elem.second]) - dist[elem.second] = dist[elem.first] + edge_weight; - } else { - // No Weighted Edge - result.errorMessage = ERR_NO_WEIGHTED_EDGE; - return result; + bool relaxed = false; + + // OPTIMIZATION: Process in blocks for better cache utilization + const int BLOCK_SIZE = 64; + for (size_t block_start = 0; block_start < m; block_start += BLOCK_SIZE) { + size_t block_end = std::min(block_start + BLOCK_SIZE, m); + + // OPTIMIZATION: Prefetch next block + if (block_start + BLOCK_SIZE < m) { + __builtin_prefetch(&edges[block_start + BLOCK_SIZE], 0, 3); } - } - auto flag = true; - for (const auto &[key, value] : dist) { - if (currentDist[key] != value) { - flag = false; - break; + + // Process current block + for (size_t j = block_start; j < block_end; ++j) { + const auto &e = edges[j]; + if (dist[e.u] < INF_DOUBLE) { + double new_dist = dist[e.u] + e.w; + if (new_dist < dist[e.v]) { + dist[e.v] = new_dist; + relaxed = true; + } + } } } - for (const auto &[key, value] : dist) { - currentDist[key] = value; // update the current distance - } - if (flag) { + + if (!relaxed) { earlyStopping = true; break; } @@ -104,30 +139,34 @@ const BellmanFordResult Graph::bellmanford(const Node &source, // check if there exists a negative cycle if (!earlyStopping) { - auto edgeSet = Graph::getEdgeSet(); - for (const auto &edge : edgeSet) { - auto elem = edge->getNodePair(); - auto edge_weight = - (std::dynamic_pointer_cast(edge))->getWeight(); - if (dist[elem.first] + edge_weight < dist[elem.second]) { - result.success = true; - result.negativeCycle = true; - result.errorMessage = ""; - return result; + bool hasNegativeCycle = false; + for (const auto &e : edges) { + if (dist[e.u] < INF_DOUBLE && dist[e.u] + e.w < dist[e.v]) { + hasNegativeCycle = true; + break; } } + if (hasNegativeCycle) { + result.success = true; + result.negativeCycle = true; + result.errorMessage = ""; + return result; + } } - if (dist[*target_node_it] != INF_DOUBLE) { + if (dist[tgt_idx] != INF_DOUBLE) { result.success = true; result.errorMessage = ""; result.negativeCycle = false; - result.result = dist[*target_node_it]; + result.result = dist[tgt_idx]; return result; } + result.errorMessage = ERR_TARGET_NODE_NOT_REACHABLE; result.result = -1; return result; } + } // namespace CXXGraph -#endif // __CXXGRAPH_BELLMANFORD_IMPL_H__ + +#endif // __CXXGRAPH_BELLMANFORD_IMPL_H__ \ No newline at end of file