Skip to content

Commit eeee03e

Browse files
authored
move HyperLogLog functions to rumtime-common (#1424)
* reformat vkext-functions.txt * use php_assert instead of assert * c to c++ style * anonymous namespace instead of static
1 parent eb28b1b commit eeee03e

9 files changed

Lines changed: 168 additions & 137 deletions

File tree

builtin-functions/kphp-light/stdlib/vkext-functions.txt

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,21 @@ function vk_sp_full_simplify ($str ::: string) ::: string;
2222

2323
function vk_json_encode_safe ($v ::: mixed) ::: string;
2424

25-
// ===== UNSUPPORTED =====
26-
27-
/** @kphp-extern-func-info stub generation-required */
2825
function vk_stats_hll_merge($str ::: mixed) ::: string | false;
29-
/** @kphp-extern-func-info stub generation-required */
26+
3027
function vk_stats_hll_count($hll ::: string) ::: float | false;
3128

29+
function vk_stats_hll_create($a ::: array = array(), $size ::: int = 256) ::: string | false;
30+
31+
function vk_stats_hll_add($hll ::: string, $a ::: array) ::: string | false;
32+
33+
function vk_stats_hll_pack($hll ::: string) ::: string | false;
34+
35+
function vk_stats_hll_unpack($hll ::: string) ::: string | false;
36+
37+
function vk_stats_hll_is_packed($hll ::: string) ::: bool;
38+
39+
// ===== UNSUPPORTED =====
40+
3241
/** @kphp-extern-func-info stub generation-required */
3342
function vk_flex ($name ::: string, $case_name ::: string, $sex ::: int, $type ::: string, $lang_id ::: int = 0) ::: string;

runtime-common/stdlib/stdlib.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ prepend(STDLIB_SYSTEM stdlib/system/ system-functions.cpp)
1212
prepend(STDLIB_SERVER stdlib/server/ url-functions.cpp
1313
net-functions.cpp)
1414
prepend(STDLIB_VKEXT stdlib/vkext/ string-processing.cpp
15-
vkext-functions.cpp)
15+
vkext-functions.cpp vkext-stats.cpp)
1616

1717
if(COMPILER_CLANG)
1818
set_source_files_properties(${RUNTIME_COMMON_DIR}/stdlib/vkext/string-processing.cpp PROPERTIES COMPILE_FLAGS -Wno-invalid-source-encoding)

runtime/vkext_stats.cpp renamed to runtime-common/stdlib/vkext/vkext-stats.cpp

Lines changed: 126 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -2,93 +2,49 @@
22
// Copyright (c) 2020 LLC «V Kontakte»
33
// Distributed under the GPL v3 License, see LICENSE.notice.txt
44

5-
#include "runtime/vkext_stats.h"
5+
#include "runtime-common/stdlib/vkext/vkext-stats.h"
66

7-
#include <assert.h>
8-
#include <limits.h>
9-
#include <string.h>
7+
#include <climits>
8+
#include <cstring>
109

11-
#define HLL_FIRST_RANK_CHAR 0x30
12-
#define HLL_PACK_CHAR '!'
13-
#define HLL_PACK_CHAR_V2 '$'
14-
#define TO_HALF_BYTE(c) ((int)(((c > '9') ? (c - 7) : c) - '0'))
15-
#define MAX_HLL_SIZE (1 << 14)
16-
#define HLL_BUF_SIZE (MAX_HLL_SIZE + 1000)
10+
namespace {
1711

18-
static char hll_buf[HLL_BUF_SIZE];
12+
constexpr auto HLL_FIRST_RANK_CHAR = 0x30;
13+
constexpr auto HLL_PACK_CHAR = '!';
14+
constexpr auto HLL_PACK_CHAR_V2 = '$';
15+
constexpr auto MAX_HLL_SIZE = (1 << 14);
16+
constexpr auto HLL_BUF_SIZE = (MAX_HLL_SIZE + 1000);
17+
18+
int to_half_byte(char c) {
19+
return (((c > '9') ? (c - 7) : c) - '0');
20+
}
1921

2022
//////
2123
// hll fuctions
2224
//////
2325

24-
static bool is_hll_unpacked(const string& hll) {
26+
bool is_hll_unpacked(const string& hll) noexcept {
2527
return hll.empty() || (hll[0] != HLL_PACK_CHAR && hll[0] != HLL_PACK_CHAR_V2);
2628
}
2729

28-
static int get_hll_size(const string& hll) {
30+
int get_hll_size(const string& hll) noexcept {
2931
if (is_hll_unpacked(hll)) {
3032
return hll.size();
3133
}
3234
return hll[0] == HLL_PACK_CHAR ? (1 << 8) : (1 << (hll[1] - '0'));
3335
}
3436

35-
Optional<string> f$vk_stats_hll_merge(const array<mixed>& a) {
36-
string result;
37-
char* result_buff = nullptr;
38-
int result_len = -1;
39-
for (array<mixed>::const_iterator it = a.begin(); it != a.end(); ++it) {
40-
if (!it.get_value().is_string()) {
41-
return false;
42-
}
43-
string cur = it.get_value().to_string();
44-
if (result_len == -1) {
45-
result_len = get_hll_size(cur);
46-
result.assign((string::size_type)result_len, (char)HLL_FIRST_RANK_CHAR);
47-
result_buff = result.buffer();
48-
}
49-
if (is_hll_unpacked(cur)) {
50-
if (result_len != cur.size()) {
51-
return false;
52-
}
53-
int i;
54-
for (i = 0; i < result_len; i++) {
55-
if (result_buff[i] < cur[i]) {
56-
result_buff[i] = cur[i];
57-
}
58-
}
59-
} else {
60-
int i = 1 + (cur[0] == HLL_PACK_CHAR_V2);
61-
while (i + 2 < cur.size()) {
62-
int p;
63-
if (cur[0] == HLL_PACK_CHAR) {
64-
p = (TO_HALF_BYTE(cur[i]) << 4) + TO_HALF_BYTE(cur[i + 1]);
65-
} else {
66-
p = (((int)cur[i] - 1) & 0x7f) + (((int)cur[i + 1] - 1) << 7);
67-
}
68-
if (p >= result_len) {
69-
return false;
70-
}
71-
if (result_buff[p] < cur[i + 2]) {
72-
result_buff[p] = cur[i + 2];
73-
}
74-
i += 3;
75-
}
76-
}
77-
}
78-
return result;
79-
}
80-
81-
static int unpack_hll(const string& hll, char* res) {
82-
assert(!is_hll_unpacked(hll));
37+
int unpack_hll(const string& hll, char* res) noexcept {
38+
php_assert(!is_hll_unpacked(hll));
8339
int m = get_hll_size(hll);
8440
int pos = 1 + (hll[0] == HLL_PACK_CHAR_V2);
85-
memset(res, HLL_FIRST_RANK_CHAR, (size_t)m);
41+
memset(res, HLL_FIRST_RANK_CHAR, m);
8642
while (pos + 2 < hll.size()) {
8743
int p;
8844
if (hll[0] == HLL_PACK_CHAR) {
89-
p = (TO_HALF_BYTE(hll[pos]) << 4) + TO_HALF_BYTE(hll[pos + 1]);
45+
p = (to_half_byte(hll[pos]) << 4) + to_half_byte(hll[pos + 1]);
9046
} else {
91-
p = (((int)hll[pos] - 1) & 0x7f) + (((int)hll[pos + 1] - 1) << 7);
47+
p = ((hll[pos] - 1) & 0x7f) + ((hll[pos + 1] - 1) << 7);
9248
}
9349
if (p >= m) {
9450
return -1;
@@ -104,7 +60,9 @@ static int unpack_hll(const string& hll, char* res) {
10460
return m;
10561
}
10662

107-
static Optional<double> hll_count(const string& hll, int m) {
63+
Optional<double> hll_count(const string& hll, int m) noexcept {
64+
char hll_buf[HLL_BUF_SIZE];
65+
10866
double pow_2_32 = (1LL << 32);
10967
double alpha_m = 0.7213 / (1.0 + 1.079 / m);
11068
char const* s;
@@ -137,7 +95,7 @@ static Optional<double> hll_count(const string& hll, int m) {
13795
e -= e * (bias / 100.0);
13896
}
13997
} else {
140-
assert(0);
98+
php_assert(0);
14199
}
142100
}
143101
return e;
@@ -147,17 +105,17 @@ static Optional<double> hll_count(const string& hll, int m) {
147105
* Do not change implementation of this hash function, because hashes may be saved in a permanent storage.
148106
* A full copy of the same function exists in vkext-stats.c in vkext.
149107
*/
150-
static long long dl_murmur64a_hash(const void* data, size_t len) {
151-
assert((len & 7) == 0);
108+
long long dl_murmur64a_hash(const void* data, size_t len) noexcept {
109+
php_assert((len & 7) == 0);
152110
unsigned long long m = 0xc6a4a7935bd1e995;
153111
int r = 47;
154112
unsigned long long h = 0xcafebabeull ^ (m * len);
155113

156-
const unsigned char* start = (const unsigned char*)data;
114+
const unsigned char* start = static_cast<const unsigned char*>(data);
157115
const unsigned char* end = start + len;
158116

159117
while (start != end) {
160-
unsigned long long k = *(unsigned long long*)start;
118+
unsigned long long k = *reinterpret_cast<const unsigned long long*>(start);
161119
k *= m;
162120
k ^= k >> r;
163121
k *= m;
@@ -166,23 +124,24 @@ static long long dl_murmur64a_hash(const void* data, size_t len) {
166124
start += 8;
167125
}
168126

169-
start = (const unsigned char*)data;
127+
start = static_cast<const unsigned char*>(data);
170128

129+
// It looks like `len & 7 == 0` here
171130
switch (len & 7) {
172131
case 7:
173-
h ^= (unsigned long long)start[6] << 48; /* fallthrough */
132+
h ^= static_cast<unsigned long long>(start[6]) << 48; /* fallthrough */
174133
case 6:
175-
h ^= (unsigned long long)start[5] << 40; /* fallthrough */
134+
h ^= static_cast<unsigned long long>(start[5]) << 40; /* fallthrough */
176135
case 5:
177-
h ^= (unsigned long long)start[4] << 32; /* fallthrough */
136+
h ^= static_cast<unsigned long long>(start[4]) << 32; /* fallthrough */
178137
case 4:
179-
h ^= (unsigned long long)start[3] << 24; /* fallthrough */
138+
h ^= static_cast<unsigned long long>(start[3]) << 24; /* fallthrough */
180139
case 3:
181-
h ^= (unsigned long long)start[2] << 16; /* fallthrough */
140+
h ^= static_cast<unsigned long long>(start[2]) << 16; /* fallthrough */
182141
case 2:
183-
h ^= (unsigned long long)start[1] << 8; /* fallthrough */
142+
h ^= static_cast<unsigned long long>(start[1]) << 8; /* fallthrough */
184143
case 1:
185-
h ^= (unsigned long long)start[0];
144+
h ^= static_cast<unsigned long long>(start[0]);
186145
h *= m;
187146
};
188147

@@ -192,17 +151,93 @@ static long long dl_murmur64a_hash(const void* data, size_t len) {
192151
return h;
193152
}
194153

195-
static void hll_add_shifted(unsigned char* hll, int hll_size, long long value) {
154+
void hll_add_shifted(unsigned char* hll, int hll_size, long long value) noexcept {
196155
unsigned long long hash = dl_murmur64a_hash(&(value), sizeof(long long));
197156
unsigned int idx = hash >> (64LL - hll_size);
198-
unsigned char rank = (hash == 0) ? 0 : (unsigned char)fmin(__builtin_ctzll(hash) + 1, 64 - hll_size);
157+
unsigned char rank = (hash == 0) ? 0 : static_cast<unsigned char>(fmin(__builtin_ctzll(hash) + 1, 64 - hll_size));
199158
rank += HLL_FIRST_RANK_CHAR;
200159
if (hll[idx] < rank) {
201160
hll[idx] = rank;
202161
}
203162
}
204163

205-
Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) {
164+
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copypaste from common/statistics.c
165+
string hll_pack(const string& s, int len) noexcept {
166+
if (len > MAX_HLL_SIZE || len == 0 || s[0] == HLL_PACK_CHAR || s[0] == HLL_PACK_CHAR_V2) {
167+
return s;
168+
}
169+
unsigned char buf[HLL_BUF_SIZE];
170+
int p = 0;
171+
buf[p++] = HLL_PACK_CHAR_V2;
172+
buf[p++] = '0' + __builtin_ctz(len);
173+
php_assert(__builtin_popcount(len) == 1);
174+
for (int i = 0; i < len; i++) {
175+
if (s[i] > HLL_FIRST_RANK_CHAR) {
176+
if (p + 2 >= len) {
177+
return s;
178+
}
179+
buf[p++] = static_cast<unsigned char>((i & 0x7f) + 1);
180+
buf[p++] = (i >> 7) + 1;
181+
buf[p++] = s[i];
182+
}
183+
php_assert(p < HLL_BUF_SIZE);
184+
}
185+
return {reinterpret_cast<char*>(buf), static_cast<string::size_type>(p)};
186+
}
187+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
188+
189+
} // namespace
190+
191+
Optional<string> f$vk_stats_hll_merge(const array<mixed>& a) noexcept {
192+
string result;
193+
char* result_buff = nullptr;
194+
int result_len = -1;
195+
for (array<mixed>::const_iterator it = a.begin(); it != a.end(); ++it) {
196+
if (!it.get_value().is_string()) {
197+
return false;
198+
}
199+
string cur = it.get_value().to_string();
200+
if (result_len == -1) {
201+
result_len = get_hll_size(cur);
202+
result.assign(result_len, static_cast<char>(HLL_FIRST_RANK_CHAR));
203+
result_buff = result.buffer();
204+
}
205+
if (is_hll_unpacked(cur)) {
206+
if (result_len != cur.size()) {
207+
return false;
208+
}
209+
int i;
210+
for (i = 0; i < result_len; i++) {
211+
if (result_buff[i] < cur[i]) {
212+
result_buff[i] = cur[i];
213+
}
214+
}
215+
} else {
216+
int i = 1 + (cur[0] == HLL_PACK_CHAR_V2);
217+
while (i + 2 < cur.size()) {
218+
int p;
219+
if (cur[0] == HLL_PACK_CHAR) {
220+
p = (to_half_byte(cur[i]) << 4) + to_half_byte(cur[i + 1]);
221+
} else {
222+
p = ((cur[i] - 1) & 0x7f) + ((cur[i + 1] - 1) << 7);
223+
}
224+
if (p >= result_len) {
225+
return false;
226+
}
227+
if (result_buff[p] < cur[i + 2]) {
228+
result_buff[p] = cur[i + 2];
229+
}
230+
i += 3;
231+
}
232+
}
233+
}
234+
return result;
235+
}
236+
237+
Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) noexcept {
238+
auto res = string(HLL_BUF_SIZE, false);
239+
auto hll_buf = res.buffer();
240+
206241
if (!is_hll_unpacked(hll)) {
207242
return false;
208243
}
@@ -212,19 +247,21 @@ Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) {
212247
int hll_size = __builtin_ctz(get_hll_size(hll));
213248
memcpy(hll_buf, hll.c_str(), hll.size());
214249
for (array<mixed>::const_iterator it = a.begin(); it != a.end(); ++it) {
215-
hll_add_shifted((unsigned char*)hll_buf, hll_size, it.get_value().to_int());
250+
hll_add_shifted(reinterpret_cast<unsigned char*>(hll_buf), hll_size, it.get_value().to_int());
216251
}
217-
return string(hll_buf, hll.size());
252+
253+
res.shrink(hll.size());
254+
return res;
218255
}
219256

220-
Optional<string> f$vk_stats_hll_create(const array<mixed>& a, int64_t size) {
257+
Optional<string> f$vk_stats_hll_create(const array<mixed>& a, int64_t size) noexcept {
221258
if (size != (1 << 8) && size != (1 << 14)) {
222259
return false;
223260
}
224-
return f$vk_stats_hll_add(string((string::size_type)size, (char)HLL_FIRST_RANK_CHAR), a);
261+
return f$vk_stats_hll_add(string(size, static_cast<char>(HLL_FIRST_RANK_CHAR)), a);
225262
}
226263

227-
Optional<double> f$vk_stats_hll_count(const string& hll) {
264+
Optional<double> f$vk_stats_hll_count(const string& hll) noexcept {
228265
int size = get_hll_size(hll);
229266
if (size == (1 << 8) || size == (1 << 14)) {
230267
return hll_count(hll, size);
@@ -233,39 +270,14 @@ Optional<double> f$vk_stats_hll_count(const string& hll) {
233270
}
234271
}
235272

236-
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copypaste from common/statistics.c
237-
string hll_pack(const string& s, int len) {
238-
if (len > MAX_HLL_SIZE || len == 0 || s[0] == HLL_PACK_CHAR || s[0] == HLL_PACK_CHAR_V2) {
239-
return s;
240-
}
241-
unsigned char buf[HLL_BUF_SIZE];
242-
int p = 0;
243-
buf[p++] = HLL_PACK_CHAR_V2;
244-
buf[p++] = (unsigned char)('0' + (unsigned char)(__builtin_ctz(len)));
245-
assert(__builtin_popcount(len) == 1);
246-
for (int i = 0; i < len; i++) {
247-
if (s[i] > HLL_FIRST_RANK_CHAR) {
248-
if (p + 2 >= len) {
249-
return s;
250-
}
251-
buf[p++] = (unsigned char)((i & 0x7f) + 1);
252-
buf[p++] = (unsigned char)((i >> 7) + 1);
253-
buf[p++] = (unsigned char)s[i];
254-
}
255-
assert(p < HLL_BUF_SIZE);
256-
}
257-
return {(char*)buf, static_cast<string::size_type>(p)};
258-
}
259-
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
260-
261-
Optional<string> f$vk_stats_hll_pack(const string& hll) {
273+
Optional<string> f$vk_stats_hll_pack(const string& hll) noexcept {
262274
if (!is_hll_unpacked(hll)) {
263275
return false;
264276
}
265277
return hll_pack(hll, hll.size());
266278
}
267279

268-
Optional<string> f$vk_stats_hll_unpack(const string& hll) {
280+
Optional<string> f$vk_stats_hll_unpack(const string& hll) noexcept {
269281
if (is_hll_unpacked(hll)) {
270282
return false;
271283
}
@@ -277,6 +289,6 @@ Optional<string> f$vk_stats_hll_unpack(const string& hll) {
277289
return string(res, m);
278290
}
279291

280-
bool f$vk_stats_hll_is_packed(const string& hll) {
292+
bool f$vk_stats_hll_is_packed(const string& hll) noexcept {
281293
return !is_hll_unpacked(hll);
282294
}

0 commit comments

Comments
 (0)