Skip to content

Commit 1d88f5b

Browse files
committed
updated external/essentials; added tests for mmap for all classes
1 parent d033722 commit 1d88f5b

11 files changed

Lines changed: 470 additions & 3 deletions

test/test_bit_vector.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,17 +214,22 @@ TEST_CASE("save_mmap") {
214214
}
215215

216216
const std::string output_filename("bv.bin");
217+
uint64_t num_saved_bytes = 0;
218+
uint64_t num_mapped_bytes = 0;
217219

218220
{
219221
bit_vector bv;
220222
bv_builder.build(bv);
221-
uint64_t num_saved_bytes = essentials::save(bv, output_filename.c_str());
223+
num_saved_bytes = essentials::save(bv, output_filename.c_str());
222224
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
223225
}
224226

225227
{
226228
bit_vector bv_mmapped;
227-
auto mmap_owner = essentials::mmap(bv_mmapped, output_filename.c_str());
229+
num_mapped_bytes = essentials::mmap(bv_mmapped, output_filename.c_str());
230+
std::cout << "num_mapped_bytes = " << num_mapped_bytes << std::endl;
231+
REQUIRE(num_saved_bytes == num_mapped_bytes);
232+
228233
std::cout << "checking correctness of bit_vector::iterator::prev..." << std::endl;
229234
REQUIRE(bv_mmapped.num_bits() == sequence_length * width);
230235
for (uint64_t i = 0; i != bv_mmapped.num_bits(); ++i) {

test/test_cache_line_elias_fano.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,35 @@ TEST_CASE("save_load_and_swap") {
5858
}
5959
std::cout << "EVERYTHING OK!" << std::endl;
6060
}
61+
62+
TEST_CASE("save_mmap_cache_line_elias_fano") {
63+
std::vector<uint64_t> seq = test::get_sorted_sequence(sequence_length, 500);
64+
const std::string output_filename("ef.bin");
65+
uint64_t num_saved_bytes = 0;
66+
uint64_t num_mapped_bytes = 0;
67+
68+
{
69+
cache_line_elias_fano ef = encode_with_cache_line_elias_fano(seq);
70+
num_saved_bytes = essentials::save(ef, output_filename.c_str());
71+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
72+
}
73+
74+
{
75+
cache_line_elias_fano ef_mmapped;
76+
num_mapped_bytes = essentials::mmap(ef_mmapped, output_filename.c_str());
77+
std::cout << "num_mapped_bytes = " << num_mapped_bytes << std::endl;
78+
REQUIRE(num_saved_bytes == num_mapped_bytes);
79+
80+
std::cout << "checking correctness of access..." << std::endl;
81+
for (uint64_t i = 0; i != sequence_length; ++i) {
82+
uint64_t got = ef_mmapped.access(i);
83+
uint64_t expected = seq[i];
84+
REQUIRE_MESSAGE(got == expected, "got " << got << " at position " << i << "/"
85+
<< sequence_length << " but expected "
86+
<< expected);
87+
}
88+
std::cout << "EVERYTHING OK!" << std::endl;
89+
}
90+
91+
std::remove(output_filename.c_str());
92+
}

test/test_compact_vector.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,35 @@ TEST_CASE("save_load_and_swap") {
151151
}
152152
std::cout << "EVERYTHING OK!" << std::endl;
153153
}
154+
155+
TEST_CASE("save_mmap_compact_vector") {
156+
const uint64_t max_int = test::get_random_uint();
157+
std::cout << "max_int = " << max_int << std::endl;
158+
std::vector<uint64_t> seq = test::get_sequence(sequence_length, max_int);
159+
const std::string output_filename("cv.bin");
160+
uint64_t num_saved_bytes = 0;
161+
uint64_t num_mapped_bytes = 0;
162+
163+
{
164+
auto cv = encode_with_compact_vector(seq);
165+
num_saved_bytes = essentials::save(cv, output_filename.c_str());
166+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
167+
}
168+
169+
{
170+
compact_vector cv_mmapped;
171+
num_mapped_bytes = essentials::mmap(cv_mmapped, output_filename.c_str());
172+
std::cout << "num_mapped_bytes = " << num_mapped_bytes << std::endl;
173+
REQUIRE(num_saved_bytes == num_mapped_bytes);
174+
175+
std::cout << "checking correctness of iterator..." << std::endl;
176+
auto it = cv_mmapped.begin();
177+
for (uint64_t i = 0; i != seq.size(); ++i, ++it) {
178+
REQUIRE_MESSAGE(*it == seq[i], "got " << *it << " at position " << i << "/"
179+
<< seq.size() << " but expected " << seq[i]);
180+
}
181+
std::cout << "EVERYTHING OK!" << std::endl;
182+
}
183+
184+
std::remove(output_filename.c_str());
185+
}

test/test_darray.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,93 @@ TEST_CASE("very_dense0") { run_test<darray0, false>(8); }
7878

7979
TEST_CASE("super_dense1") { run_test<darray1, true>(2); }
8080
TEST_CASE("super_dense0") { run_test<darray0, false>(2); }
81+
82+
template <typename DArray, bool index_ones>
83+
void test_save_load_swap(const uint64_t max_int) {
84+
constexpr bool all_distinct = true;
85+
std::vector<uint64_t> seq = test::get_sorted_sequence(num_positions, max_int, all_distinct);
86+
auto bv = encode_with_bit_vector(seq, index_ones);
87+
88+
const std::string output_filename("darray_swap.bin");
89+
uint64_t num_saved_bytes = 0;
90+
91+
{
92+
DArray select_index;
93+
select_index.build(bv);
94+
REQUIRE(select_index.num_positions() == seq.size());
95+
96+
// Save to disk
97+
num_saved_bytes = essentials::save(select_index, output_filename.c_str());
98+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
99+
}
100+
101+
DArray select_index_loaded;
102+
uint64_t num_loaded_bytes = essentials::load(select_index_loaded, output_filename.c_str());
103+
std::cout << "num_loaded_bytes = " << num_loaded_bytes << std::endl;
104+
REQUIRE(num_saved_bytes == num_loaded_bytes);
105+
106+
std::cout << "checking correctness of swap and select..." << std::endl;
107+
DArray other;
108+
select_index_loaded.swap(other);
109+
110+
REQUIRE(other.num_positions() == seq.size());
111+
for (uint64_t i = 0; i != seq.size(); ++i) {
112+
uint64_t pos = other.select(bv, i);
113+
REQUIRE_MESSAGE(pos == seq[i], "got " << pos << " but expected " << seq[i]);
114+
}
115+
116+
std::remove(output_filename.c_str());
117+
std::cout << "EVERYTHING OK!" << std::endl;
118+
}
119+
120+
template <typename DArray, bool index_ones>
121+
void test_save_mmap(const uint64_t max_int) {
122+
constexpr bool all_distinct = true;
123+
std::vector<uint64_t> seq = test::get_sorted_sequence(num_positions, max_int, all_distinct);
124+
125+
// The bit_vector must remain alive for the mmapped select_index to use it
126+
auto bv = encode_with_bit_vector(seq, index_ones);
127+
128+
const std::string output_filename("darray_mmap.bin");
129+
uint64_t num_saved_bytes = 0;
130+
uint64_t num_mapped_bytes = 0;
131+
132+
{
133+
DArray select_index;
134+
select_index.build(bv);
135+
REQUIRE(select_index.num_positions() == seq.size());
136+
137+
num_saved_bytes = essentials::save(select_index, output_filename.c_str());
138+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
139+
}
140+
141+
{
142+
DArray select_index_mmapped;
143+
num_mapped_bytes = essentials::mmap(select_index_mmapped, output_filename.c_str());
144+
std::cout << "num_mapped_bytes = " << num_mapped_bytes << std::endl;
145+
REQUIRE(num_saved_bytes == num_mapped_bytes);
146+
147+
std::cout << "checking correctness of mmapped select..." << std::endl;
148+
REQUIRE(select_index_mmapped.num_positions() == seq.size());
149+
150+
for (uint64_t i = 0; i != seq.size(); ++i) {
151+
uint64_t pos = select_index_mmapped.select(bv, i);
152+
REQUIRE_MESSAGE(pos == seq[i], "got " << pos << " but expected " << seq[i]);
153+
}
154+
std::cout << "EVERYTHING OK!" << std::endl;
155+
}
156+
157+
std::remove(output_filename.c_str());
158+
}
159+
160+
TEST_CASE("darray_save_load_swap_super_sparse1") { test_save_load_swap<darray1, true>(32 * 1024); }
161+
TEST_CASE("darray_save_load_swap_super_sparse0") { test_save_load_swap<darray0, false>(32 * 1024); }
162+
163+
TEST_CASE("darray_save_load_swap_dense1") { test_save_load_swap<darray1, true>(32); }
164+
TEST_CASE("darray_save_load_swap_dense0") { test_save_load_swap<darray0, false>(32); }
165+
166+
TEST_CASE("darray_save_mmap_super_sparse1") { test_save_mmap<darray1, true>(32 * 1024); }
167+
TEST_CASE("darray_save_mmap_super_sparse0") { test_save_mmap<darray0, false>(32 * 1024); }
168+
169+
TEST_CASE("darray_save_mmap_dense1") { test_save_mmap<darray1, true>(32); }
170+
TEST_CASE("darray_save_mmap_dense0") { test_save_mmap<darray0, false>(32); }

test/test_elias_fano.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,3 +639,39 @@ TEST_CASE("save_load_and_swap") {
639639
}
640640
std::cout << "EVERYTHING OK!" << std::endl;
641641
}
642+
643+
TEST_CASE("save_mmap_elias_fano") {
644+
std::vector<uint64_t> seq = test::get_sorted_sequence(sequence_length);
645+
constexpr bool index_zeros = true;
646+
constexpr bool encode_prefix_sum = false;
647+
using ef_type = elias_fano<index_zeros, encode_prefix_sum>;
648+
const std::string output_filename("ef.bin");
649+
uint64_t num_saved_bytes = 0;
650+
uint64_t num_mapped_bytes = 0;
651+
652+
{
653+
ef_type ef = encode_with_elias_fano<index_zeros, encode_prefix_sum>(seq);
654+
num_saved_bytes = essentials::save(ef, output_filename.c_str());
655+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
656+
}
657+
658+
{
659+
ef_type ef_mmapped;
660+
num_mapped_bytes = essentials::mmap(ef_mmapped, output_filename.c_str());
661+
std::cout << "num_mapped_bytes = " << num_mapped_bytes << std::endl;
662+
REQUIRE(num_saved_bytes == num_mapped_bytes);
663+
664+
std::cout << "checking correctness of iterator..." << std::endl;
665+
auto it = ef_mmapped.begin();
666+
for (uint64_t i = 0; i != sequence_length; ++i, it.next()) {
667+
uint64_t got = it.value();
668+
uint64_t expected = seq[i];
669+
REQUIRE_MESSAGE(got == expected, "got " << got << " at position " << i << "/"
670+
<< sequence_length << " but expected "
671+
<< expected);
672+
}
673+
std::cout << "EVERYTHING OK!" << std::endl;
674+
}
675+
676+
std::remove(output_filename.c_str());
677+
}

test/test_endpoints_sequence.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,68 @@ TEST_CASE("locate") {
232232

233233
std::cout << "EVERYTHING OK!" << std::endl;
234234
}
235+
236+
TEST_CASE("endpoints_sequence_save_load_swap") {
237+
std::vector<uint64_t> seq = get_sequence(sequence_length);
238+
const std::string output_filename("es_swap.bin");
239+
uint64_t num_saved_bytes = 0;
240+
241+
{
242+
auto es = encode(seq);
243+
num_saved_bytes = essentials::save(es, output_filename.c_str());
244+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
245+
}
246+
247+
endpoints_sequence<> es_loaded;
248+
uint64_t num_loaded_bytes = essentials::load(es_loaded, output_filename.c_str());
249+
std::cout << "num_loaded_bytes = " << num_loaded_bytes << std::endl;
250+
REQUIRE(num_saved_bytes == num_loaded_bytes);
251+
252+
std::cout << "checking correctness of access after swap..." << std::endl;
253+
endpoints_sequence<> other;
254+
es_loaded.swap(other);
255+
256+
REQUIRE(other.size() == seq.size());
257+
for (uint64_t i = 0; i != seq.size(); ++i) {
258+
uint64_t got = other.access(i);
259+
uint64_t expected = seq[i];
260+
REQUIRE_MESSAGE(got == expected, "got " << got << " at position " << i << "/" << seq.size()
261+
<< " but expected " << expected);
262+
}
263+
264+
std::remove(output_filename.c_str());
265+
std::cout << "EVERYTHING OK!" << std::endl;
266+
}
267+
268+
TEST_CASE("endpoints_sequence_save_mmap") {
269+
std::vector<uint64_t> seq = get_sequence(sequence_length);
270+
const std::string output_filename("es_mmap.bin");
271+
uint64_t num_saved_bytes = 0;
272+
uint64_t num_mapped_bytes = 0;
273+
274+
{
275+
auto es = encode(seq);
276+
num_saved_bytes = essentials::save(es, output_filename.c_str());
277+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
278+
}
279+
280+
{
281+
endpoints_sequence<> es_mmapped;
282+
num_mapped_bytes = essentials::mmap(es_mmapped, output_filename.c_str());
283+
std::cout << "num_mapped_bytes = " << num_mapped_bytes << std::endl;
284+
REQUIRE(num_saved_bytes == num_mapped_bytes);
285+
286+
std::cout << "checking correctness of access after mmap..." << std::endl;
287+
REQUIRE(es_mmapped.size() == seq.size());
288+
289+
for (uint64_t i = 0; i != seq.size(); ++i) {
290+
uint64_t got = es_mmapped.access(i);
291+
uint64_t expected = seq[i];
292+
REQUIRE_MESSAGE(got == expected, "got " << got << " at position " << i << "/"
293+
<< seq.size() << " but expected " << expected);
294+
}
295+
std::cout << "EVERYTHING OK!" << std::endl;
296+
}
297+
298+
std::remove(output_filename.c_str());
299+
}

test/test_rank9.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,96 @@ TEST_CASE("sparse") { run_test(128); }
7676
TEST_CASE("dense") { run_test(32); }
7777
TEST_CASE("very_dense") { run_test(8); }
7878
TEST_CASE("super_dense") { run_test(2); }
79+
80+
void test_save_load_swap(const uint64_t max_int) {
81+
constexpr bool all_distinct = true;
82+
std::vector<uint64_t> seq = test::get_sorted_sequence(num_positions, max_int, all_distinct);
83+
auto [B, rank_index] = encode_with_ranked_bit_vector(seq);
84+
85+
const std::string output_filename("r9_swap.bin");
86+
uint64_t num_saved_bytes = 0;
87+
88+
{
89+
// Assertions on original
90+
REQUIRE(rank_index.num_ones() == seq.size());
91+
REQUIRE(rank_index.rank1(B, 0) == 0);
92+
93+
num_saved_bytes = essentials::save(rank_index, output_filename.c_str());
94+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
95+
}
96+
97+
rank9 rank_index_loaded;
98+
uint64_t num_loaded_bytes = essentials::load(rank_index_loaded, output_filename.c_str());
99+
std::cout << "num_loaded_bytes = " << num_loaded_bytes << std::endl;
100+
REQUIRE(num_saved_bytes == num_loaded_bytes);
101+
102+
std::cout << "checking correctness of rank1 and rank0 after swap..." << std::endl;
103+
rank9 other;
104+
rank_index_loaded.swap(other);
105+
106+
REQUIRE(other.num_ones() == seq.size());
107+
REQUIRE(other.rank1(B, 0) == 0);
108+
for (uint64_t i = 0; i != seq.size(); ++i) {
109+
uint64_t j = seq[i];
110+
uint64_t num_ones = other.rank1(B, j); // number of 1s in B[0..j)
111+
uint64_t num_zeros = other.rank0(B, j); // number of 0s in B[0..j)
112+
REQUIRE(num_ones + num_zeros == j);
113+
REQUIRE_MESSAGE(num_ones == i, "got " << num_ones << " but expected " << i);
114+
REQUIRE_MESSAGE(num_zeros == j - i, "got " << num_zeros << " but expected " << j - i);
115+
}
116+
117+
std::remove(output_filename.c_str());
118+
std::cout << "EVERYTHING OK!" << std::endl;
119+
}
120+
121+
void test_save_mmap(const uint64_t max_int) {
122+
constexpr bool all_distinct = true;
123+
std::vector<uint64_t> seq = test::get_sorted_sequence(num_positions, max_int, all_distinct);
124+
auto [B, rank_index] = encode_with_ranked_bit_vector(seq);
125+
126+
const std::string output_filename("r9_mmap.bin");
127+
uint64_t num_saved_bytes = 0;
128+
uint64_t num_mapped_bytes = 0;
129+
130+
{
131+
num_saved_bytes = essentials::save(rank_index, output_filename.c_str());
132+
std::cout << "num_saved_bytes = " << num_saved_bytes << std::endl;
133+
}
134+
135+
{
136+
rank9 rank_index_mmapped;
137+
num_mapped_bytes = essentials::mmap(rank_index_mmapped, output_filename.c_str());
138+
std::cout << "num_mapped_bytes = " << num_mapped_bytes << std::endl;
139+
REQUIRE(num_saved_bytes == num_mapped_bytes);
140+
141+
std::cout << "checking correctness of rank1 and rank0 after mmap..." << std::endl;
142+
REQUIRE(rank_index_mmapped.num_ones() == seq.size());
143+
REQUIRE(rank_index_mmapped.rank1(B, 0) == 0);
144+
145+
for (uint64_t i = 0; i != seq.size(); ++i) {
146+
uint64_t j = seq[i];
147+
uint64_t num_ones = rank_index_mmapped.rank1(B, j);
148+
uint64_t num_zeros = rank_index_mmapped.rank0(B, j);
149+
REQUIRE(num_ones + num_zeros == j);
150+
REQUIRE_MESSAGE(num_ones == i, "got " << num_ones << " but expected " << i);
151+
REQUIRE_MESSAGE(num_zeros == j - i, "got " << num_zeros << " but expected " << j - i);
152+
}
153+
std::cout << "EVERYTHING OK!" << std::endl;
154+
}
155+
156+
std::remove(output_filename.c_str());
157+
}
158+
159+
TEST_CASE("rank9_save_load_swap_super_sparse") { test_save_load_swap(32 * 1024); }
160+
TEST_CASE("rank9_save_load_swap_very_sparse") { test_save_load_swap(1024); }
161+
TEST_CASE("rank9_save_load_swap_sparse") { test_save_load_swap(128); }
162+
TEST_CASE("rank9_save_load_swap_dense") { test_save_load_swap(32); }
163+
TEST_CASE("rank9_save_load_swap_very_dense") { test_save_load_swap(8); }
164+
TEST_CASE("rank9_save_load_swap_super_dense") { test_save_load_swap(2); }
165+
166+
TEST_CASE("rank9_save_mmap_super_sparse") { test_save_mmap(32 * 1024); }
167+
TEST_CASE("rank9_save_mmap_very_sparse") { test_save_mmap(1024); }
168+
TEST_CASE("rank9_save_mmap_sparse") { test_save_mmap(128); }
169+
TEST_CASE("rank9_save_mmap_dense") { test_save_mmap(32); }
170+
TEST_CASE("rank9_save_mmap_very_dense") { test_save_mmap(8); }
171+
TEST_CASE("rank9_save_mmap_super_dense") { test_save_mmap(2); }

0 commit comments

Comments
 (0)