22// Copyright (c) 2020 LLC «V Kontakte»
33// Distributed under the GPL v3 License, see LICENSE.notice.txt
44
5- #include " runtime/vkext_stats .h"
5+ #include " runtime-common/stdlib/vkext/vkext-stats .h"
66
7- #include < assert.h>
8- #include < limits.h>
9- #include < string.h>
7+ #include < climits>
8+ #include < cstring>
109
11- #define HLL_FIRST_RANK_CHAR 0x30
12- #define HLL_PACK_CHAR ' !'
13- #define HLL_PACK_CHAR_V2 ' $'
14- #define TO_HALF_BYTE (c ) ((int )(((c > ' 9' ) ? (c - 7 ) : c) - ' 0' ))
15- #define MAX_HLL_SIZE (1 << 14 )
16- #define HLL_BUF_SIZE (MAX_HLL_SIZE + 1000 )
10+ namespace {
1711
18- static char hll_buf[HLL_BUF_SIZE];
12+ constexpr auto HLL_FIRST_RANK_CHAR = 0x30 ;
13+ constexpr auto HLL_PACK_CHAR = ' !' ;
14+ constexpr auto HLL_PACK_CHAR_V2 = ' $' ;
15+ constexpr auto MAX_HLL_SIZE = (1 << 14 );
16+ constexpr auto HLL_BUF_SIZE = (MAX_HLL_SIZE + 1000 );
17+
18+ int to_half_byte (char c) {
19+ return (((c > ' 9' ) ? (c - 7 ) : c) - ' 0' );
20+ }
1921
2022// ////
2123// hll fuctions
2224// ////
2325
24- static bool is_hll_unpacked (const string& hll) {
26+ bool is_hll_unpacked (const string& hll) noexcept {
2527 return hll.empty () || (hll[0 ] != HLL_PACK_CHAR && hll[0 ] != HLL_PACK_CHAR_V2);
2628}
2729
28- static int get_hll_size (const string& hll) {
30+ int get_hll_size (const string& hll) noexcept {
2931 if (is_hll_unpacked (hll)) {
3032 return hll.size ();
3133 }
3234 return hll[0 ] == HLL_PACK_CHAR ? (1 << 8 ) : (1 << (hll[1 ] - ' 0' ));
3335}
3436
35- Optional<string> f$vk_stats_hll_merge(const array<mixed>& a) {
36- string result;
37- char * result_buff = nullptr ;
38- int result_len = -1 ;
39- for (array<mixed>::const_iterator it = a.begin (); it != a.end (); ++it) {
40- if (!it.get_value ().is_string ()) {
41- return false ;
42- }
43- string cur = it.get_value ().to_string ();
44- if (result_len == -1 ) {
45- result_len = get_hll_size (cur);
46- result.assign ((string::size_type)result_len, (char )HLL_FIRST_RANK_CHAR);
47- result_buff = result.buffer ();
48- }
49- if (is_hll_unpacked (cur)) {
50- if (result_len != cur.size ()) {
51- return false ;
52- }
53- int i;
54- for (i = 0 ; i < result_len; i++) {
55- if (result_buff[i] < cur[i]) {
56- result_buff[i] = cur[i];
57- }
58- }
59- } else {
60- int i = 1 + (cur[0 ] == HLL_PACK_CHAR_V2);
61- while (i + 2 < cur.size ()) {
62- int p;
63- if (cur[0 ] == HLL_PACK_CHAR) {
64- p = (TO_HALF_BYTE (cur[i]) << 4 ) + TO_HALF_BYTE (cur[i + 1 ]);
65- } else {
66- p = (((int )cur[i] - 1 ) & 0x7f ) + (((int )cur[i + 1 ] - 1 ) << 7 );
67- }
68- if (p >= result_len) {
69- return false ;
70- }
71- if (result_buff[p] < cur[i + 2 ]) {
72- result_buff[p] = cur[i + 2 ];
73- }
74- i += 3 ;
75- }
76- }
77- }
78- return result;
79- }
80-
81- static int unpack_hll (const string& hll, char * res) {
82- assert (!is_hll_unpacked (hll));
37+ int unpack_hll (const string& hll, char * res) noexcept {
38+ php_assert (!is_hll_unpacked (hll));
8339 int m = get_hll_size (hll);
8440 int pos = 1 + (hll[0 ] == HLL_PACK_CHAR_V2);
85- memset (res, HLL_FIRST_RANK_CHAR, ( size_t ) m);
41+ memset (res, HLL_FIRST_RANK_CHAR, m);
8642 while (pos + 2 < hll.size ()) {
8743 int p;
8844 if (hll[0 ] == HLL_PACK_CHAR) {
89- p = (TO_HALF_BYTE (hll[pos]) << 4 ) + TO_HALF_BYTE (hll[pos + 1 ]);
45+ p = (to_half_byte (hll[pos]) << 4 ) + to_half_byte (hll[pos + 1 ]);
9046 } else {
91- p = ((( int ) hll[pos] - 1 ) & 0x7f ) + ((( int ) hll[pos + 1 ] - 1 ) << 7 );
47+ p = ((hll[pos] - 1 ) & 0x7f ) + ((hll[pos + 1 ] - 1 ) << 7 );
9248 }
9349 if (p >= m) {
9450 return -1 ;
@@ -104,7 +60,9 @@ static int unpack_hll(const string& hll, char* res) {
10460 return m;
10561}
10662
107- static Optional<double > hll_count (const string& hll, int m) {
63+ Optional<double > hll_count (const string& hll, int m) noexcept {
64+ char hll_buf[HLL_BUF_SIZE];
65+
10866 double pow_2_32 = (1LL << 32 );
10967 double alpha_m = 0.7213 / (1.0 + 1.079 / m);
11068 char const * s;
@@ -137,7 +95,7 @@ static Optional<double> hll_count(const string& hll, int m) {
13795 e -= e * (bias / 100.0 );
13896 }
13997 } else {
140- assert (0 );
98+ php_assert (0 );
14199 }
142100 }
143101 return e;
@@ -147,17 +105,17 @@ static Optional<double> hll_count(const string& hll, int m) {
147105 * Do not change implementation of this hash function, because hashes may be saved in a permanent storage.
148106 * A full copy of the same function exists in vkext-stats.c in vkext.
149107 */
150- static long long dl_murmur64a_hash (const void * data, size_t len) {
151- assert ((len & 7 ) == 0 );
108+ long long dl_murmur64a_hash (const void * data, size_t len) noexcept {
109+ php_assert ((len & 7 ) == 0 );
152110 unsigned long long m = 0xc6a4a7935bd1e995 ;
153111 int r = 47 ;
154112 unsigned long long h = 0xcafebabeull ^ (m * len);
155113
156- const unsigned char * start = ( const unsigned char *) data;
114+ const unsigned char * start = static_cast < const unsigned char *>( data) ;
157115 const unsigned char * end = start + len;
158116
159117 while (start != end) {
160- unsigned long long k = *( unsigned long long *) start;
118+ unsigned long long k = *reinterpret_cast < const unsigned long long *>( start) ;
161119 k *= m;
162120 k ^= k >> r;
163121 k *= m;
@@ -166,23 +124,24 @@ static long long dl_murmur64a_hash(const void* data, size_t len) {
166124 start += 8 ;
167125 }
168126
169- start = ( const unsigned char *) data;
127+ start = static_cast < const unsigned char *>( data) ;
170128
129+ // It looks like `len & 7 == 0` here
171130 switch (len & 7 ) {
172131 case 7 :
173- h ^= ( unsigned long long ) start[6 ] << 48 ; /* fallthrough */
132+ h ^= static_cast < unsigned long long >( start[6 ]) << 48 ; /* fallthrough */
174133 case 6 :
175- h ^= ( unsigned long long ) start[5 ] << 40 ; /* fallthrough */
134+ h ^= static_cast < unsigned long long >( start[5 ]) << 40 ; /* fallthrough */
176135 case 5 :
177- h ^= ( unsigned long long ) start[4 ] << 32 ; /* fallthrough */
136+ h ^= static_cast < unsigned long long >( start[4 ]) << 32 ; /* fallthrough */
178137 case 4 :
179- h ^= ( unsigned long long ) start[3 ] << 24 ; /* fallthrough */
138+ h ^= static_cast < unsigned long long >( start[3 ]) << 24 ; /* fallthrough */
180139 case 3 :
181- h ^= ( unsigned long long ) start[2 ] << 16 ; /* fallthrough */
140+ h ^= static_cast < unsigned long long >( start[2 ]) << 16 ; /* fallthrough */
182141 case 2 :
183- h ^= ( unsigned long long ) start[1 ] << 8 ; /* fallthrough */
142+ h ^= static_cast < unsigned long long >( start[1 ]) << 8 ; /* fallthrough */
184143 case 1 :
185- h ^= ( unsigned long long ) start[0 ];
144+ h ^= static_cast < unsigned long long >( start[0 ]) ;
186145 h *= m;
187146 };
188147
@@ -192,17 +151,93 @@ static long long dl_murmur64a_hash(const void* data, size_t len) {
192151 return h;
193152}
194153
195- static void hll_add_shifted (unsigned char * hll, int hll_size, long long value) {
154+ void hll_add_shifted (unsigned char * hll, int hll_size, long long value) noexcept {
196155 unsigned long long hash = dl_murmur64a_hash (&(value), sizeof (long long ));
197156 unsigned int idx = hash >> (64LL - hll_size);
198- unsigned char rank = (hash == 0 ) ? 0 : ( unsigned char ) fmin (__builtin_ctzll (hash) + 1 , 64 - hll_size);
157+ unsigned char rank = (hash == 0 ) ? 0 : static_cast < unsigned char >( fmin (__builtin_ctzll (hash) + 1 , 64 - hll_size) );
199158 rank += HLL_FIRST_RANK_CHAR;
200159 if (hll[idx] < rank) {
201160 hll[idx] = rank;
202161 }
203162}
204163
205- Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) {
164+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copypaste from common/statistics.c
165+ string hll_pack (const string& s, int len) noexcept {
166+ if (len > MAX_HLL_SIZE || len == 0 || s[0 ] == HLL_PACK_CHAR || s[0 ] == HLL_PACK_CHAR_V2) {
167+ return s;
168+ }
169+ unsigned char buf[HLL_BUF_SIZE];
170+ int p = 0 ;
171+ buf[p++] = HLL_PACK_CHAR_V2;
172+ buf[p++] = ' 0' + __builtin_ctz (len);
173+ php_assert (__builtin_popcount (len) == 1 );
174+ for (int i = 0 ; i < len; i++) {
175+ if (s[i] > HLL_FIRST_RANK_CHAR) {
176+ if (p + 2 >= len) {
177+ return s;
178+ }
179+ buf[p++] = static_cast <unsigned char >((i & 0x7f ) + 1 );
180+ buf[p++] = (i >> 7 ) + 1 ;
181+ buf[p++] = s[i];
182+ }
183+ php_assert (p < HLL_BUF_SIZE);
184+ }
185+ return {reinterpret_cast <char *>(buf), static_cast <string::size_type>(p)};
186+ }
187+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
188+
189+ } // namespace
190+
191+ Optional<string> f$vk_stats_hll_merge(const array<mixed>& a) noexcept {
192+ string result;
193+ char * result_buff = nullptr ;
194+ int result_len = -1 ;
195+ for (array<mixed>::const_iterator it = a.begin (); it != a.end (); ++it) {
196+ if (!it.get_value ().is_string ()) {
197+ return false ;
198+ }
199+ string cur = it.get_value ().to_string ();
200+ if (result_len == -1 ) {
201+ result_len = get_hll_size (cur);
202+ result.assign (result_len, static_cast <char >(HLL_FIRST_RANK_CHAR));
203+ result_buff = result.buffer ();
204+ }
205+ if (is_hll_unpacked (cur)) {
206+ if (result_len != cur.size ()) {
207+ return false ;
208+ }
209+ int i;
210+ for (i = 0 ; i < result_len; i++) {
211+ if (result_buff[i] < cur[i]) {
212+ result_buff[i] = cur[i];
213+ }
214+ }
215+ } else {
216+ int i = 1 + (cur[0 ] == HLL_PACK_CHAR_V2);
217+ while (i + 2 < cur.size ()) {
218+ int p;
219+ if (cur[0 ] == HLL_PACK_CHAR) {
220+ p = (to_half_byte (cur[i]) << 4 ) + to_half_byte (cur[i + 1 ]);
221+ } else {
222+ p = ((cur[i] - 1 ) & 0x7f ) + ((cur[i + 1 ] - 1 ) << 7 );
223+ }
224+ if (p >= result_len) {
225+ return false ;
226+ }
227+ if (result_buff[p] < cur[i + 2 ]) {
228+ result_buff[p] = cur[i + 2 ];
229+ }
230+ i += 3 ;
231+ }
232+ }
233+ }
234+ return result;
235+ }
236+
237+ Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) noexcept {
238+ auto res = string (HLL_BUF_SIZE, false );
239+ auto hll_buf = res.buffer ();
240+
206241 if (!is_hll_unpacked (hll)) {
207242 return false ;
208243 }
@@ -212,19 +247,21 @@ Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) {
212247 int hll_size = __builtin_ctz (get_hll_size (hll));
213248 memcpy (hll_buf, hll.c_str (), hll.size ());
214249 for (array<mixed>::const_iterator it = a.begin (); it != a.end (); ++it) {
215- hll_add_shifted (( unsigned char *) hll_buf, hll_size, it.get_value ().to_int ());
250+ hll_add_shifted (reinterpret_cast < unsigned char *>( hll_buf) , hll_size, it.get_value ().to_int ());
216251 }
217- return string (hll_buf, hll.size ());
252+
253+ res.shrink (hll.size ());
254+ return res;
218255}
219256
220- Optional<string> f$vk_stats_hll_create(const array<mixed>& a, int64_t size) {
257+ Optional<string> f$vk_stats_hll_create(const array<mixed>& a, int64_t size) noexcept {
221258 if (size != (1 << 8 ) && size != (1 << 14 )) {
222259 return false ;
223260 }
224- return f$vk_stats_hll_add (string ((string::size_type) size, ( char ) HLL_FIRST_RANK_CHAR), a);
261+ return f$vk_stats_hll_add (string (size, static_cast < char >( HLL_FIRST_RANK_CHAR) ), a);
225262}
226263
227- Optional<double > f$vk_stats_hll_count(const string& hll) {
264+ Optional<double > f$vk_stats_hll_count(const string& hll) noexcept {
228265 int size = get_hll_size (hll);
229266 if (size == (1 << 8 ) || size == (1 << 14 )) {
230267 return hll_count (hll, size);
@@ -233,39 +270,14 @@ Optional<double> f$vk_stats_hll_count(const string& hll) {
233270 }
234271}
235272
236- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copypaste from common/statistics.c
237- string hll_pack (const string& s, int len) {
238- if (len > MAX_HLL_SIZE || len == 0 || s[0 ] == HLL_PACK_CHAR || s[0 ] == HLL_PACK_CHAR_V2) {
239- return s;
240- }
241- unsigned char buf[HLL_BUF_SIZE];
242- int p = 0 ;
243- buf[p++] = HLL_PACK_CHAR_V2;
244- buf[p++] = (unsigned char )(' 0' + (unsigned char )(__builtin_ctz (len)));
245- assert (__builtin_popcount (len) == 1 );
246- for (int i = 0 ; i < len; i++) {
247- if (s[i] > HLL_FIRST_RANK_CHAR) {
248- if (p + 2 >= len) {
249- return s;
250- }
251- buf[p++] = (unsigned char )((i & 0x7f ) + 1 );
252- buf[p++] = (unsigned char )((i >> 7 ) + 1 );
253- buf[p++] = (unsigned char )s[i];
254- }
255- assert (p < HLL_BUF_SIZE);
256- }
257- return {(char *)buf, static_cast <string::size_type>(p)};
258- }
259- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
260-
261- Optional<string> f$vk_stats_hll_pack(const string& hll) {
273+ Optional<string> f$vk_stats_hll_pack(const string& hll) noexcept {
262274 if (!is_hll_unpacked (hll)) {
263275 return false ;
264276 }
265277 return hll_pack (hll, hll.size ());
266278}
267279
268- Optional<string> f$vk_stats_hll_unpack(const string& hll) {
280+ Optional<string> f$vk_stats_hll_unpack(const string& hll) noexcept {
269281 if (is_hll_unpacked (hll)) {
270282 return false ;
271283 }
@@ -277,6 +289,6 @@ Optional<string> f$vk_stats_hll_unpack(const string& hll) {
277289 return string (res, m);
278290}
279291
280- bool f$vk_stats_hll_is_packed(const string& hll) {
292+ bool f$vk_stats_hll_is_packed(const string& hll) noexcept {
281293 return !is_hll_unpacked (hll);
282294}
0 commit comments