Skip to content

Commit 46af2ab

Browse files
committed
improving entropy uint16_t speed for dense histograms
1 parent 1d6e1f4 commit 46af2ab

29 files changed

Lines changed: 464 additions & 215 deletions

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ foreach(FEATURE_DIR ${FEATURE_DIRS})
6060
if(IS_DIRECTORY ${FEATURE_DIR})
6161
get_filename_component(FEATURE_NAME ${FEATURE_DIR} NAME)
6262
add_subdirectory(features/${FEATURE_NAME})
63+
target_compile_options(${FEATURE_NAME}_obj PRIVATE -falign-functions=64)
6364
list(APPEND FEATURE_OBJECTS $<TARGET_OBJECTS:${FEATURE_NAME}_obj>)
6465
endif()
6566
endforeach()
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.012929,0.012939,0.000031,0.012906,0.012981,0.012980,0.0396,AVX
3-
1024,0.012981,0.012991,0.000017,0.012978,0.013015,0.013015,0.0789,AVX
4-
2048,0.013263,0.013232,0.000046,0.013166,0.013266,0.013266,0.1544,AVX
5-
4096,0.013569,0.013622,0.000091,0.013547,0.013750,0.013746,0.3019,AVX
6-
8192,0.014291,0.014293,0.000009,0.014283,0.014305,0.014305,0.5732,AVX
7-
12288,0.015106,0.015168,0.000095,0.015096,0.015303,0.015299,0.8135,AVX
8-
16384,0.015805,0.015803,0.000006,0.015795,0.015810,0.015810,1.0366,AVX
9-
20480,0.016561,0.016536,0.000095,0.016409,0.016639,0.016637,1.2366,AVX
10-
24576,0.017167,0.017192,0.000043,0.017158,0.017252,0.017251,1.4316,AVX
11-
28672,0.017933,0.017938,0.000008,0.017932,0.017949,0.017949,1.5988,AVX
12-
32768,0.018672,0.018698,0.000039,0.018669,0.018753,0.018751,1.7549,AVX
13-
40960,0.020117,0.020156,0.000071,0.020095,0.020256,0.020253,2.0361,AVX
14-
49152,0.021520,0.021574,0.000084,0.021510,0.021693,0.021690,2.2840,AVX
15-
57344,0.023060,0.023077,0.000030,0.023052,0.023119,0.023117,2.4868,AVX
16-
65536,0.024439,0.024483,0.000081,0.024412,0.024597,0.024593,2.6816,AVX
2+
512,0.012194,0.012233,0.000094,0.012143,0.012363,0.012360,0.0420,AVX
3+
1024,0.012241,0.012289,0.000070,0.012238,0.012388,0.012385,0.0837,AVX
4+
2048,0.012359,0.012360,0.000014,0.012344,0.012377,0.012377,0.1657,AVX
5+
4096,0.012677,0.012718,0.000148,0.012562,0.012917,0.012912,0.3231,AVX
6+
8192,0.013230,0.013485,0.000431,0.013133,0.014092,0.014075,0.6192,AVX
7+
12288,0.013515,0.013508,0.000010,0.013493,0.013515,0.013515,0.9092,AVX
8+
16384,0.013972,0.013995,0.000046,0.013954,0.014058,0.014057,1.1726,AVX
9+
20480,0.014427,0.014436,0.000025,0.014411,0.014470,0.014469,1.4195,AVX
10+
24576,0.014897,0.014897,0.000007,0.014888,0.014905,0.014905,1.6497,AVX
11+
28672,0.015357,0.015356,0.000003,0.015352,0.015359,0.015359,1.8670,AVX
12+
32768,0.015828,0.015827,0.000001,0.015826,0.015828,0.015828,2.0703,AVX
13+
40960,0.016759,0.016750,0.000019,0.016723,0.016766,0.016766,2.4440,AVX
14+
49152,0.017679,0.017684,0.000012,0.017673,0.017701,0.017701,2.7803,AVX
15+
57344,0.018616,0.018613,0.000007,0.018603,0.018619,0.018619,3.0803,AVX
16+
65536,0.019537,0.019540,0.000005,0.019536,0.019546,0.019546,3.3545,AVX
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.013406,0.013716,0.000934,0.012759,0.014982,0.014951,0.0382,AVX2
3-
1024,0.012863,0.012863,0.000004,0.012858,0.012867,0.012867,0.0796,AVX2
4-
2048,0.013064,0.013060,0.000008,0.013049,0.013068,0.013067,0.1568,AVX2
5-
4096,0.013453,0.013455,0.000003,0.013452,0.013459,0.013459,0.3045,AVX2
6-
8192,0.014105,0.014109,0.000018,0.014090,0.014133,0.014132,0.5808,AVX2
7-
12288,0.014834,0.014830,0.000007,0.014820,0.014835,0.014835,0.8284,AVX2
8-
16384,0.015556,0.015556,0.000003,0.015552,0.015561,0.015561,1.0532,AVX2
9-
20480,0.016270,0.016269,0.000003,0.016264,0.016272,0.016272,1.2588,AVX2
10-
24576,0.017106,0.017105,0.000013,0.017089,0.017120,0.017120,1.4367,AVX2
11-
28672,0.017767,0.017773,0.000028,0.017743,0.017811,0.017810,1.6138,AVX2
12-
32768,0.018573,0.018581,0.000012,0.018573,0.018598,0.018597,1.7643,AVX2
13-
40960,0.019974,0.020202,0.000335,0.019957,0.020676,0.020662,2.0507,AVX2
14-
49152,0.021968,0.022020,0.000189,0.021819,0.022272,0.022266,2.2375,AVX2
15-
57344,0.022878,0.022879,0.000008,0.022869,0.022889,0.022888,2.5065,AVX2
16-
65536,0.024295,0.024335,0.000060,0.024291,0.024420,0.024418,2.6975,AVX2
2+
512,0.012179,0.012272,0.000221,0.012061,0.012577,0.012569,0.0420,AVX2
3+
1024,0.012174,0.012219,0.000110,0.012113,0.012370,0.012367,0.0841,AVX2
4+
2048,0.012223,0.012220,0.000005,0.012214,0.012224,0.012224,0.1675,AVX2
5+
4096,0.012601,0.012587,0.000105,0.012453,0.012709,0.012707,0.3251,AVX2
6+
8192,0.012931,0.012964,0.000051,0.012926,0.013035,0.013033,0.6335,AVX2
7+
12288,0.013416,0.013410,0.000009,0.013398,0.013417,0.013417,0.9159,AVX2
8+
16384,0.013870,0.013866,0.000007,0.013856,0.013872,0.013872,1.1812,AVX2
9+
20480,0.014350,0.014417,0.000107,0.014333,0.014569,0.014565,1.4272,AVX2
10+
24576,0.014847,0.014832,0.000024,0.014799,0.014850,0.014850,1.6552,AVX2
11+
28672,0.015267,0.015399,0.000188,0.015264,0.015665,0.015657,1.8780,AVX2
12+
32768,0.015758,0.015770,0.000038,0.015731,0.015821,0.015820,2.0794,AVX2
13+
40960,0.016692,0.017018,0.000468,0.016683,0.017681,0.017661,2.4539,AVX2
14+
49152,0.017646,0.017754,0.000183,0.017603,0.018012,0.018005,2.7855,AVX2
15+
57344,0.018567,0.018638,0.000150,0.018500,0.018847,0.018841,3.0885,AVX2
16+
65536,0.019425,0.019425,0.000013,0.019409,0.019440,0.019440,3.3738,AVX2
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.003617,0.003686,0.000233,0.003442,0.003999,0.003992,0.1416,AVX-512F
3-
1024,0.003399,0.003384,0.000037,0.003332,0.003420,0.003419,0.3013,AVX-512F
4-
2048,0.003609,0.003609,0.000002,0.003606,0.003611,0.003611,0.5675,AVX-512F
5-
4096,0.004279,0.004282,0.000010,0.004272,0.004295,0.004295,0.9572,AVX-512F
6-
8192,0.005024,0.005019,0.000015,0.004999,0.005034,0.005034,1.6304,AVX-512F
7-
12288,0.005462,0.005460,0.000003,0.005457,0.005462,0.005462,2.2498,AVX-512F
8-
16384,0.006499,0.006499,0.000023,0.006471,0.006527,0.006527,2.5209,AVX-512F
9-
20480,0.006942,0.006941,0.000005,0.006935,0.006948,0.006947,2.9501,AVX-512F
10-
24576,0.007666,0.007670,0.000019,0.007648,0.007694,0.007694,3.2059,AVX-512F
11-
28672,0.008387,0.008390,0.000006,0.008386,0.008398,0.008398,3.4188,AVX-512F
12-
32768,0.009123,0.009124,0.000006,0.009117,0.009131,0.009131,3.5917,AVX-512F
13-
40960,0.010824,0.010826,0.000012,0.010812,0.010842,0.010842,3.7840,AVX-512F
14-
49152,0.012411,0.012421,0.000054,0.012360,0.012491,0.012489,3.9603,AVX-512F
15-
57344,0.014081,0.014081,0.000004,0.014076,0.014085,0.014085,4.0725,AVX-512F
16-
65536,0.015109,0.015109,0.000021,0.015082,0.015135,0.015134,4.3374,AVX-512F
2+
512,0.001563,0.001530,0.000053,0.001455,0.001572,0.001572,0.3275,AVX-512F
3+
1024,0.001521,0.001524,0.000004,0.001520,0.001529,0.001529,0.6730,AVX-512F
4+
2048,0.001662,0.001682,0.000029,0.001661,0.001723,0.001722,1.2321,AVX-512F
5+
4096,0.001875,0.001878,0.000013,0.001863,0.001895,0.001895,2.1848,AVX-512F
6+
8192,0.002339,0.002352,0.000020,0.002337,0.002380,0.002379,3.5029,AVX-512F
7+
12288,0.002820,0.002818,0.000005,0.002811,0.002823,0.002822,4.3576,AVX-512F
8+
16384,0.003350,0.003337,0.000037,0.003286,0.003373,0.003373,4.8906,AVX-512F
9+
20480,0.003825,0.003826,0.000052,0.003762,0.003889,0.003888,5.3544,AVX-512F
10+
24576,0.004265,0.004275,0.000045,0.004225,0.004334,0.004333,5.7623,AVX-512F
11+
28672,0.004742,0.004735,0.000017,0.004712,0.004752,0.004752,6.0460,AVX-512F
12+
32768,0.005187,0.005194,0.000024,0.005170,0.005227,0.005226,6.3176,AVX-512F
13+
40960,0.006174,0.006193,0.000040,0.006156,0.006248,0.006246,6.6338,AVX-512F
14+
49152,0.007087,0.007090,0.000032,0.007053,0.007131,0.007130,6.9354,AVX-512F
15+
57344,0.008020,0.008026,0.000029,0.007994,0.008063,0.008062,7.1503,AVX-512F
16+
65536,0.008891,0.008893,0.000003,0.008890,0.008897,0.008897,7.3707,AVX-512F
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.014747,0.014970,0.000540,0.014449,0.015715,0.015695,0.0347,Scalar
3-
1024,0.014806,0.014778,0.000153,0.014578,0.014950,0.014947,0.0692,Scalar
4-
2048,0.015387,0.015424,0.000456,0.014885,0.015999,0.015986,0.1331,Scalar
5-
4096,0.015431,0.015936,0.000782,0.015336,0.017040,0.017008,0.2654,Scalar
6-
8192,0.015801,0.015621,0.000454,0.014997,0.016064,0.016059,0.5184,Scalar
7-
12288,0.016799,0.016907,0.000261,0.016654,0.017267,0.017257,0.7315,Scalar
8-
16384,0.017730,0.017578,0.000312,0.017143,0.017861,0.017858,0.9241,Scalar
9-
20480,0.018116,0.018068,0.000184,0.017823,0.018265,0.018262,1.1305,Scalar
10-
24576,0.019457,0.019395,0.000108,0.019243,0.019484,0.019483,1.2631,Scalar
11-
28672,0.020457,0.020303,0.000516,0.019608,0.020843,0.020836,1.4016,Scalar
12-
32768,0.021200,0.021197,0.000876,0.020122,0.022268,0.022247,1.5456,Scalar
13-
40960,0.022402,0.022444,0.000187,0.022239,0.022691,0.022685,1.8284,Scalar
14-
49152,0.023397,0.023416,0.000258,0.023110,0.023741,0.023734,2.1008,Scalar
15-
57344,0.025695,0.025257,0.000641,0.024351,0.025727,0.025726,2.2318,Scalar
16-
65536,0.027188,0.027109,0.000189,0.026848,0.027290,0.027288,2.4105,Scalar
2+
512,0.012735,0.012751,0.000030,0.012723,0.012793,0.012792,0.0402,Scalar
3+
1024,0.012838,0.012839,0.000006,0.012832,0.012848,0.012847,0.0798,Scalar
4+
2048,0.013227,0.013163,0.000100,0.013022,0.013240,0.013240,0.1548,Scalar
5+
4096,0.013287,0.013328,0.000112,0.013215,0.013481,0.013477,0.3083,Scalar
6+
8192,0.013648,0.013656,0.000019,0.013638,0.013683,0.013682,0.6002,Scalar
7+
12288,0.014301,0.014302,0.000013,0.014286,0.014318,0.014317,0.8592,Scalar
8+
16384,0.014762,0.014760,0.000004,0.014755,0.014764,0.014764,1.1099,Scalar
9+
20480,0.015227,0.015229,0.000002,0.015227,0.015232,0.015232,1.3450,Scalar
10+
24576,0.015695,0.015720,0.000038,0.015690,0.015774,0.015772,1.5658,Scalar
11+
28672,0.016154,0.016157,0.000005,0.016152,0.016164,0.016164,1.7750,Scalar
12+
32768,0.016611,0.016610,0.000002,0.016608,0.016612,0.016612,1.9727,Scalar
13+
40960,0.017536,0.017535,0.000002,0.017532,0.017537,0.017537,2.3358,Scalar
14+
49152,0.018484,0.018479,0.000009,0.018466,0.018487,0.018487,2.6592,Scalar
15+
57344,0.019405,0.019410,0.000013,0.019398,0.019428,0.019427,2.9552,Scalar
16+
65536,0.020152,0.020149,0.000005,0.020142,0.020153,0.020153,3.2521,Scalar
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.014133,0.014743,0.001105,0.013802,0.016295,0.016252,0.0362,SSE2
3-
1024,0.013393,0.013365,0.000086,0.013248,0.013453,0.013452,0.0765,SSE2
4-
2048,0.013500,0.013508,0.000072,0.013424,0.013601,0.013599,0.1517,SSE2
5-
4096,0.013856,0.013832,0.000043,0.013772,0.013870,0.013869,0.2956,SSE2
6-
8192,0.014507,0.014506,0.000004,0.014500,0.014510,0.014510,0.5647,SSE2
7-
12288,0.015229,0.015248,0.000036,0.015216,0.015298,0.015296,0.8069,SSE2
8-
16384,0.015953,0.015958,0.000016,0.015942,0.015979,0.015978,1.0270,SSE2
9-
20480,0.016670,0.016670,0.000008,0.016660,0.016679,0.016679,1.2286,SSE2
10-
24576,0.017473,0.017469,0.000008,0.017458,0.017476,0.017476,1.4065,SSE2
11-
28672,0.018161,0.018163,0.000020,0.018140,0.018189,0.018188,1.5788,SSE2
12-
32768,0.018932,0.018923,0.000029,0.018885,0.018953,0.018953,1.7308,SSE2
13-
40960,0.020402,0.020402,0.000015,0.020385,0.020420,0.020420,2.0076,SSE2
14-
49152,0.021830,0.021844,0.000036,0.021808,0.021893,0.021892,2.2515,SSE2
15-
57344,0.023288,0.023290,0.000024,0.023262,0.023321,0.023320,2.4624,SSE2
16-
65536,0.024767,0.024755,0.000027,0.024717,0.024780,0.024780,2.6461,SSE2
2+
512,0.012594,0.012590,0.000013,0.012573,0.012604,0.012604,0.0407,SSE2
3+
1024,0.012722,0.012724,0.000062,0.012649,0.012802,0.012800,0.0805,SSE2
4+
2048,0.012853,0.012824,0.000056,0.012745,0.012872,0.012872,0.1593,SSE2
5+
4096,0.012951,0.013048,0.000139,0.012949,0.013245,0.013239,0.3163,SSE2
6+
8192,0.013546,0.013550,0.000052,0.013488,0.013615,0.013614,0.6048,SSE2
7+
12288,0.013996,0.013991,0.000036,0.013944,0.014033,0.014032,0.8780,SSE2
8+
16384,0.014229,0.014249,0.000029,0.014229,0.014290,0.014288,1.1514,SSE2
9+
20480,0.014683,0.014681,0.000005,0.014675,0.014686,0.014686,1.3949,SSE2
10+
24576,0.015153,0.015155,0.000003,0.015152,0.015159,0.015159,1.6218,SSE2
11+
28672,0.015617,0.015617,0.000001,0.015617,0.015618,0.015618,1.8359,SSE2
12+
32768,0.016090,0.016089,0.000007,0.016080,0.016097,0.016097,2.0365,SSE2
13+
40960,0.017011,0.017014,0.000005,0.017009,0.017021,0.017020,2.4078,SSE2
14+
49152,0.017950,0.017954,0.000010,0.017944,0.017968,0.017968,2.7383,SSE2
15+
57344,0.018887,0.018883,0.000006,0.018874,0.018889,0.018888,3.0362,SSE2
16+
65536,0.019844,0.019842,0.000004,0.019837,0.019845,0.019845,3.3026,SSE2
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.014300,0.014127,0.000337,0.013655,0.014425,0.014423,0.0358,SSE4.2
3-
1024,0.013395,0.013401,0.000018,0.013383,0.013426,0.013425,0.0764,SSE4.2
4-
2048,0.013563,0.013558,0.000009,0.013545,0.013566,0.013566,0.1510,SSE4.2
5-
4096,0.013928,0.013927,0.000016,0.013906,0.013946,0.013946,0.2941,SSE4.2
6-
8192,0.014557,0.014552,0.000013,0.014534,0.014563,0.014563,0.5627,SSE4.2
7-
12288,0.015588,0.015579,0.000114,0.015435,0.015714,0.015711,0.7883,SSE4.2
8-
16384,0.015938,0.015960,0.000058,0.015903,0.016039,0.016037,1.0280,SSE4.2
9-
20480,0.016671,0.016668,0.000007,0.016658,0.016675,0.016675,1.2285,SSE4.2
10-
24576,0.017400,0.017402,0.000004,0.017397,0.017407,0.017407,1.4124,SSE4.2
11-
28672,0.018133,0.018129,0.000012,0.018114,0.018142,0.018142,1.5812,SSE4.2
12-
32768,0.018860,0.018864,0.000010,0.018854,0.018878,0.018878,1.7374,SSE4.2
13-
40960,0.020327,0.020319,0.000031,0.020277,0.020353,0.020352,2.0150,SSE4.2
14-
49152,0.021866,0.021873,0.000012,0.021863,0.021889,0.021889,2.2478,SSE4.2
15-
57344,0.023231,0.023222,0.000014,0.023202,0.023234,0.023233,2.4684,SSE4.2
16-
65536,0.024691,0.024690,0.000001,0.024689,0.024692,0.024692,2.6543,SSE4.2
2+
512,0.012563,0.012656,0.000177,0.012502,0.012904,0.012897,0.0408,SSE4.2
3+
1024,0.012521,0.012516,0.000010,0.012502,0.012525,0.012525,0.0818,SSE4.2
4+
2048,0.012602,0.012604,0.000005,0.012598,0.012611,0.012611,0.1625,SSE4.2
5+
4096,0.012820,0.012819,0.000003,0.012815,0.012822,0.012822,0.3195,SSE4.2
6+
8192,0.013318,0.013311,0.000012,0.013293,0.013320,0.013320,0.6151,SSE4.2
7+
12288,0.013772,0.013770,0.000004,0.013764,0.013773,0.013773,0.8922,SSE4.2
8+
16384,0.014226,0.014225,0.000005,0.014219,0.014230,0.014230,1.1517,SSE4.2
9+
20480,0.014693,0.014692,0.000006,0.014683,0.014699,0.014698,1.3938,SSE4.2
10+
24576,0.015155,0.015156,0.000004,0.015152,0.015160,0.015160,1.6216,SSE4.2
11+
28672,0.015634,0.015637,0.000013,0.015624,0.015654,0.015654,1.8339,SSE4.2
12+
32768,0.016098,0.016106,0.000011,0.016098,0.016121,0.016121,2.0355,SSE4.2
13+
40960,0.017038,0.017047,0.000014,0.017037,0.017067,0.017066,2.4041,SSE4.2
14+
49152,0.017972,0.017987,0.000031,0.017959,0.018030,0.018029,2.7349,SSE4.2
15+
57344,0.018895,0.018895,0.000009,0.018884,0.018905,0.018905,3.0350,SSE4.2
16+
65536,0.019829,0.019835,0.000014,0.019821,0.019855,0.019854,3.3051,SSE4.2
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.000851,0.000861,0.000016,0.000848,0.000883,0.000882,0.6013,AVX
3-
1024,0.001496,0.001477,0.000052,0.001405,0.001529,0.001528,0.6844,AVX
4-
2048,0.002647,0.002653,0.000010,0.002645,0.002667,0.002666,0.7737,AVX
5-
4096,0.005208,0.005217,0.000029,0.005187,0.005256,0.005255,0.7864,AVX
6-
8192,0.010002,0.010023,0.000036,0.009995,0.010074,0.010072,0.8191,AVX
7-
12288,0.016848,0.016796,0.000097,0.016661,0.016880,0.016880,0.7294,AVX
8-
16384,0.019221,0.019188,0.000075,0.019084,0.019260,0.019259,0.8524,AVX
9-
20480,0.069192,0.069077,0.000461,0.068464,0.069575,0.069567,0.2960,AVX
10-
24576,0.070742,0.070798,0.000240,0.070535,0.071116,0.071108,0.3474,AVX
11-
28672,0.072824,0.072792,0.000070,0.072694,0.072857,0.072856,0.3937,AVX
12-
32768,0.076594,0.076342,0.000473,0.075679,0.076753,0.076749,0.4278,AVX
13-
40960,0.155152,0.154964,0.000694,0.154035,0.155704,0.155693,0.2640,AVX
14-
49152,0.159117,0.158844,0.001122,0.157354,0.160062,0.160043,0.3089,AVX
15-
57344,0.165602,0.166699,0.002093,0.164866,0.169629,0.169548,0.3463,AVX
16-
65536,0.168977,0.169163,0.001400,0.167549,0.170963,0.170923,0.3878,AVX
2+
512,0.000733,0.000730,0.000010,0.000717,0.000741,0.000741,0.6990,AVX
3+
1024,0.001089,0.001090,0.000036,0.001045,0.001135,0.001134,0.9405,AVX
4+
2048,0.002033,0.002035,0.000088,0.001928,0.002143,0.002141,1.0073,AVX
5+
4096,0.003653,0.003740,0.000125,0.003650,0.003916,0.003911,1.1211,AVX
6+
8192,0.007176,0.007383,0.000306,0.007157,0.007816,0.007803,1.1416,AVX
7+
12288,0.013336,0.013179,0.000356,0.012686,0.013515,0.013511,0.9214,AVX
8+
16384,0.014006,0.013998,0.000102,0.013869,0.014120,0.014117,1.1698,AVX
9+
20480,0.068689,0.068090,0.000919,0.066792,0.068789,0.068787,0.2982,AVX
10+
24576,0.069162,0.068936,0.002168,0.066175,0.071470,0.071424,0.3553,AVX
11+
28672,0.070745,0.070774,0.000312,0.070406,0.071169,0.071161,0.4053,AVX
12+
32768,0.072792,0.073056,0.001363,0.071534,0.074842,0.074801,0.4502,AVX
13+
40960,0.155017,0.155653,0.005038,0.149825,0.162117,0.161975,0.2642,AVX
14+
49152,0.153035,0.154101,0.001614,0.152888,0.156382,0.156315,0.3212,AVX
15+
57344,0.163572,0.164298,0.001134,0.163422,0.165899,0.165853,0.3506,AVX
16+
65536,0.168260,0.168509,0.000456,0.168119,0.169148,0.169131,0.3895,AVX
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
array_size,median_ms,mean_ms,stddev_ms,min_ms,max_ms,p99_ms,gflops,simd_level
2-
512,0.000698,0.000712,0.000045,0.000666,0.000774,0.000772,0.7338,AVX2
3-
1024,0.001346,0.001326,0.000053,0.001253,0.001378,0.001378,0.7610,AVX2
4-
2048,0.002546,0.002533,0.000041,0.002477,0.002576,0.002575,0.8043,AVX2
5-
4096,0.005289,0.005406,0.000195,0.005248,0.005680,0.005672,0.7744,AVX2
6-
8192,0.010954,0.010916,0.000553,0.010220,0.011573,0.011561,0.7478,AVX2
7-
12288,0.017217,0.017144,0.000218,0.016848,0.017366,0.017363,0.7137,AVX2
8-
16384,0.019773,0.019789,0.000332,0.019391,0.020203,0.020194,0.8286,AVX2
9-
20480,0.069519,0.069689,0.000333,0.069393,0.070154,0.070142,0.2946,AVX2
10-
24576,0.073079,0.073050,0.000280,0.072694,0.073377,0.073371,0.3363,AVX2
11-
28672,0.075487,0.075387,0.000915,0.074220,0.076455,0.076436,0.3798,AVX2
12-
32768,0.078461,0.079716,0.002907,0.076953,0.083733,0.083628,0.4176,AVX2
13-
40960,0.155162,0.155979,0.001317,0.154938,0.157838,0.157784,0.2640,AVX2
14-
49152,0.160627,0.161836,0.002025,0.160192,0.164688,0.164607,0.3060,AVX2
15-
57344,0.167037,0.167267,0.001589,0.165446,0.169319,0.169273,0.3433,AVX2
16-
65536,0.169594,0.170346,0.001134,0.169496,0.171949,0.171902,0.3864,AVX2
2+
512,0.000606,0.000618,0.000019,0.000603,0.000645,0.000644,0.8455,AVX2
3+
1024,0.000957,0.000956,0.000008,0.000945,0.000965,0.000965,1.0706,AVX2
4+
2048,0.001829,0.001834,0.000036,0.001793,0.001880,0.001879,1.1200,AVX2
5+
4096,0.003748,0.003752,0.000014,0.003736,0.003770,0.003770,1.0927,AVX2
6+
8192,0.007228,0.007208,0.000047,0.007144,0.007253,0.007253,1.1333,AVX2
7+
12288,0.012687,0.012730,0.000141,0.012582,0.012919,0.012915,0.9686,AVX2
8+
16384,0.015261,0.015398,0.000858,0.014423,0.016511,0.016486,1.0736,AVX2
9+
20480,0.065957,0.065803,0.001395,0.064022,0.067429,0.067399,0.3105,AVX2
10+
24576,0.069181,0.068317,0.001718,0.065919,0.069851,0.069838,0.3552,AVX2
11+
28672,0.073056,0.073056,0.000357,0.072619,0.073493,0.073485,0.3925,AVX2
12+
32768,0.073568,0.073737,0.000990,0.072619,0.075026,0.074997,0.4454,AVX2
13+
40960,0.153687,0.153364,0.000464,0.152707,0.153698,0.153698,0.2665,AVX2
14+
49152,0.152021,0.151726,0.000493,0.151031,0.152126,0.152124,0.3233,AVX2
15+
57344,0.156096,0.156220,0.000992,0.155072,0.157493,0.157465,0.3674,AVX2
16+
65536,0.159734,0.161154,0.002043,0.159686,0.164043,0.163957,0.4103,AVX2

0 commit comments

Comments
 (0)