Skip to content

Commit 1c0b546

Browse files
authored
Implement a SearchView::for_each helper (#881)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 14223a3 commit 1c0b546

4 files changed

Lines changed: 354 additions & 7 deletions

File tree

src/search/include/sourcemeta/one/search.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <cstddef> // std::size_t
1212
#include <cstdint> // std::uint8_t
1313
#include <filesystem> // std::filesystem::path
14+
#include <functional> // std::function
1415
#include <memory> // std::unique_ptr
1516
#include <string> // std::string
1617
#include <string_view> // std::string_view
@@ -83,6 +84,9 @@ class SOURCEMETA_ONE_SEARCH_EXPORT SearchView {
8384

8485
auto count() -> std::size_t;
8586
auto at(std::size_t index) -> SearchListEntry;
87+
auto for_each(std::size_t offset, std::size_t count,
88+
const std::function<void(const SearchListEntry &)> &callback)
89+
-> void;
8690

8791
private:
8892
std::filesystem::path path_;

src/search/search.cc

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22

33
#include <sourcemeta/one/metapack.h>
44

5-
#include <algorithm> // std::ranges::search
6-
#include <cassert> // assert
7-
#include <cctype> // std::tolower
8-
#include <cstring> // std::memcpy
9-
#include <limits> // std::numeric_limits
10-
#include <utility> // std::move
5+
#include <algorithm> // std::min, std::ranges::search
6+
#include <cassert> // assert
7+
#include <cctype> // std::tolower
8+
#include <cstring> // std::memcpy
9+
#include <functional> // std::function
10+
#include <limits> // std::numeric_limits
11+
#include <utility> // std::move
1112

1213
namespace sourcemeta::one {
1314

@@ -288,4 +289,75 @@ auto SearchView::at(const std::size_t index) -> SearchListEntry {
288289
.bytes_bundled = record_header->bytes_bundled};
289290
}
290291

292+
auto SearchView::for_each(
293+
const std::size_t offset, const std::size_t count,
294+
const std::function<void(const SearchListEntry &)> &callback) -> void {
295+
this->ensure_open();
296+
if (this->payload_ == nullptr ||
297+
this->payload_size_ < sizeof(SearchIndexHeader)) {
298+
return;
299+
}
300+
301+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
302+
const auto *header{
303+
reinterpret_cast<const SearchIndexHeader *>(this->payload_)};
304+
const auto total{static_cast<std::size_t>(header->entry_count)};
305+
if (offset >= total || count == 0) {
306+
return;
307+
}
308+
309+
const auto offset_table_end{sizeof(SearchIndexHeader) +
310+
total * sizeof(std::uint32_t)};
311+
if (offset_table_end > this->payload_size_) {
312+
return;
313+
}
314+
315+
const auto remaining{total - offset};
316+
const auto effective_count{std::min(count, remaining)};
317+
const auto last{offset + effective_count};
318+
319+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
320+
const auto *offset_table{reinterpret_cast<const std::uint32_t *>(
321+
this->payload_ + sizeof(SearchIndexHeader))};
322+
323+
for (std::size_t index{offset}; index < last; ++index) {
324+
const auto record_offset{offset_table[index]};
325+
if (record_offset + sizeof(SearchRecordHeader) > this->payload_size_) {
326+
break;
327+
}
328+
329+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
330+
const auto *record_header{reinterpret_cast<const SearchRecordHeader *>(
331+
this->payload_ + record_offset)};
332+
const auto field_data_offset{record_offset + sizeof(SearchRecordHeader)};
333+
const auto total_field_length{
334+
static_cast<std::size_t>(record_header->path_length) +
335+
record_header->title_length + record_header->description_length};
336+
if (field_data_offset + total_field_length > this->payload_size_) {
337+
break;
338+
}
339+
340+
const auto *field_data{this->payload_ + field_data_offset};
341+
342+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
343+
const std::string_view path{reinterpret_cast<const char *>(field_data),
344+
record_header->path_length};
345+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
346+
const std::string_view title{
347+
reinterpret_cast<const char *>(field_data + record_header->path_length),
348+
record_header->title_length};
349+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
350+
const std::string_view description{
351+
reinterpret_cast<const char *>(field_data + record_header->path_length +
352+
record_header->title_length),
353+
record_header->description_length};
354+
355+
callback({.path = path,
356+
.title = title,
357+
.description = description,
358+
.bytes_raw = record_header->bytes_raw,
359+
.bytes_bundled = record_header->bytes_bundled});
360+
}
361+
}
362+
291363
} // namespace sourcemeta::one

test/unit/search/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
sourcemeta_googletest(NAMESPACE sourcemeta PROJECT one NAME search
2-
SOURCES search_build_test.cc search_query_test.cc search_view_test.cc)
2+
SOURCES search_build_test.cc search_query_test.cc search_view_test.cc
3+
search_view_for_each_test.cc)
34

45
target_link_libraries(sourcemeta_one_search_unit
56
PRIVATE sourcemeta::one::search)
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
#include <sourcemeta/one/metapack.h>
2+
#include <sourcemeta/one/search.h>
3+
4+
#include <gtest/gtest.h>
5+
6+
#include <chrono> // std::chrono
7+
#include <cstdint> // std::uint32_t, std::uint64_t
8+
#include <cstring> // std::memcpy
9+
#include <filesystem> // std::filesystem
10+
#include <limits> // std::numeric_limits
11+
#include <string> // std::string
12+
#include <string_view> // std::string_view
13+
#include <utility> // std::move
14+
#include <vector> // std::vector
15+
16+
struct VisitedEntry {
17+
std::string path;
18+
std::string title;
19+
std::string description;
20+
std::uint64_t bytes_raw;
21+
std::uint64_t bytes_bundled;
22+
auto operator==(const VisitedEntry &) const -> bool = default;
23+
};
24+
25+
static auto test_path(const std::string &name) -> std::filesystem::path {
26+
return std::filesystem::path{SEARCH_TEST_DIRECTORY} / name;
27+
}
28+
29+
static auto write_search_file(const std::filesystem::path &path,
30+
std::vector<sourcemeta::one::SearchEntry> entries)
31+
-> void {
32+
const auto payload{sourcemeta::one::make_search(std::move(entries))};
33+
const std::string_view payload_view{
34+
payload.empty()
35+
? std::string_view{}
36+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
37+
: std::string_view{reinterpret_cast<const char *>(payload.data()),
38+
payload.size()}};
39+
sourcemeta::one::metapack_write_text(
40+
path, payload_view, "application/octet-stream",
41+
sourcemeta::one::MetapackEncoding::Identity, {},
42+
std::chrono::milliseconds{0});
43+
}
44+
45+
static auto write_raw_search_file(const std::filesystem::path &path,
46+
const std::vector<std::uint8_t> &payload)
47+
-> void {
48+
const std::string_view payload_view{
49+
payload.empty()
50+
? std::string_view{}
51+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
52+
: std::string_view{reinterpret_cast<const char *>(payload.data()),
53+
payload.size()}};
54+
sourcemeta::one::metapack_write_text(
55+
path, payload_view, "application/octet-stream",
56+
sourcemeta::one::MetapackEncoding::Identity, {},
57+
std::chrono::milliseconds{0});
58+
}
59+
60+
static auto collect(sourcemeta::one::SearchView &view, std::size_t offset,
61+
std::size_t count) -> std::vector<VisitedEntry> {
62+
std::vector<VisitedEntry> visited;
63+
view.for_each(offset, count,
64+
[&](const sourcemeta::one::SearchListEntry &entry) -> void {
65+
visited.push_back(
66+
{.path = std::string{entry.path},
67+
.title = std::string{entry.title},
68+
.description = std::string{entry.description},
69+
.bytes_raw = entry.bytes_raw,
70+
.bytes_bundled = entry.bytes_bundled});
71+
});
72+
return visited;
73+
}
74+
75+
TEST(Search_view_for_each, visits_full_range) {
76+
const auto path{test_path("for_each_full.metapack")};
77+
write_search_file(path,
78+
{{"/zebra", "Zebra Title", "Zebra Desc", 80, 11, 22},
79+
{"/apple", "Apple Title", "Apple Desc", 80, 33, 44},
80+
{"/mango", "Mango Title", "Mango Desc", 80, 55, 66}});
81+
sourcemeta::one::SearchView view{path};
82+
EXPECT_EQ(collect(view, 0, 3),
83+
(std::vector<VisitedEntry>{{.path = "/apple",
84+
.title = "Apple Title",
85+
.description = "Apple Desc",
86+
.bytes_raw = 33,
87+
.bytes_bundled = 44},
88+
{.path = "/mango",
89+
.title = "Mango Title",
90+
.description = "Mango Desc",
91+
.bytes_raw = 55,
92+
.bytes_bundled = 66},
93+
{.path = "/zebra",
94+
.title = "Zebra Title",
95+
.description = "Zebra Desc",
96+
.bytes_raw = 11,
97+
.bytes_bundled = 22}}));
98+
}
99+
100+
TEST(Search_view_for_each, visits_subset_with_offset) {
101+
const auto path{test_path("for_each_offset.metapack")};
102+
write_search_file(path, {{"/a", "A Title", "A Desc", 80, 1, 2},
103+
{"/b", "B Title", "B Desc", 80, 3, 4},
104+
{"/c", "C Title", "C Desc", 80, 5, 6},
105+
{"/d", "D Title", "D Desc", 80, 7, 8}});
106+
sourcemeta::one::SearchView view{path};
107+
EXPECT_EQ(collect(view, 1, 2),
108+
(std::vector<VisitedEntry>{{.path = "/b",
109+
.title = "B Title",
110+
.description = "B Desc",
111+
.bytes_raw = 3,
112+
.bytes_bundled = 4},
113+
{.path = "/c",
114+
.title = "C Title",
115+
.description = "C Desc",
116+
.bytes_raw = 5,
117+
.bytes_bundled = 6}}));
118+
}
119+
120+
TEST(Search_view_for_each, clamps_count_to_total) {
121+
const auto path{test_path("for_each_clamp.metapack")};
122+
write_search_file(path, {{"/a", "A Title", "A Desc", 80, 1, 2},
123+
{"/b", "B Title", "B Desc", 80, 3, 4}});
124+
sourcemeta::one::SearchView view{path};
125+
EXPECT_EQ(collect(view, 1, 100),
126+
(std::vector<VisitedEntry>{{.path = "/b",
127+
.title = "B Title",
128+
.description = "B Desc",
129+
.bytes_raw = 3,
130+
.bytes_bundled = 4}}));
131+
}
132+
133+
TEST(Search_view_for_each, skips_when_offset_at_end) {
134+
const auto path{test_path("for_each_end.metapack")};
135+
write_search_file(path, {{"/a", "A Title", "A Desc", 80, 1, 2},
136+
{"/b", "B Title", "B Desc", 80, 3, 4}});
137+
sourcemeta::one::SearchView view{path};
138+
EXPECT_EQ(collect(view, 2, 10), std::vector<VisitedEntry>{});
139+
}
140+
141+
TEST(Search_view_for_each, skips_when_offset_past_end) {
142+
const auto path{test_path("for_each_past_end.metapack")};
143+
write_search_file(path, {{"/a", "A Title", "A Desc", 80, 1, 2}});
144+
sourcemeta::one::SearchView view{path};
145+
EXPECT_EQ(collect(view, 99, 10), std::vector<VisitedEntry>{});
146+
}
147+
148+
TEST(Search_view_for_each, skips_when_count_zero) {
149+
const auto path{test_path("for_each_zero.metapack")};
150+
write_search_file(path, {{"/a", "A Title", "A Desc", 80, 1, 2},
151+
{"/b", "B Title", "B Desc", 80, 3, 4}});
152+
sourcemeta::one::SearchView view{path};
153+
EXPECT_EQ(collect(view, 0, 0), std::vector<VisitedEntry>{});
154+
}
155+
156+
TEST(Search_view_for_each, visit_order_matches_at) {
157+
const auto path{test_path("for_each_matches_at.metapack")};
158+
write_search_file(path, {{"/zebra", "", "", 80, 11, 22},
159+
{"/apple", "", "", 80, 33, 44},
160+
{"/mango", "", "", 80, 55, 66}});
161+
sourcemeta::one::SearchView view{path};
162+
const auto from_for_each{collect(view, 0, view.count())};
163+
std::vector<VisitedEntry> from_at;
164+
for (std::size_t index{0}; index < view.count(); ++index) {
165+
const auto entry{view.at(index)};
166+
from_at.push_back({.path = std::string{entry.path},
167+
.title = std::string{entry.title},
168+
.description = std::string{entry.description},
169+
.bytes_raw = entry.bytes_raw,
170+
.bytes_bundled = entry.bytes_bundled});
171+
}
172+
EXPECT_EQ(from_for_each, from_at);
173+
}
174+
175+
TEST(Search_view_for_each, empty_strings_for_empty_metadata) {
176+
const auto path{test_path("for_each_empty_meta.metapack")};
177+
write_search_file(path, {{"/only/path", "", "", 80, 7, 8}});
178+
sourcemeta::one::SearchView view{path};
179+
EXPECT_EQ(collect(view, 0, 1),
180+
(std::vector<VisitedEntry>{{.path = "/only/path",
181+
.title = "",
182+
.description = "",
183+
.bytes_raw = 7,
184+
.bytes_bundled = 8}}));
185+
}
186+
187+
TEST(Search_view_for_each, count_size_max_does_not_overflow) {
188+
const auto path{test_path("for_each_count_max.metapack")};
189+
write_search_file(path, {{"/a", "A Title", "A Desc", 80, 1, 2},
190+
{"/b", "B Title", "B Desc", 80, 3, 4}});
191+
sourcemeta::one::SearchView view{path};
192+
EXPECT_EQ(collect(view, 0, std::numeric_limits<std::size_t>::max()),
193+
(std::vector<VisitedEntry>{{.path = "/a",
194+
.title = "A Title",
195+
.description = "A Desc",
196+
.bytes_raw = 1,
197+
.bytes_bundled = 2},
198+
{.path = "/b",
199+
.title = "B Title",
200+
.description = "B Desc",
201+
.bytes_raw = 3,
202+
.bytes_bundled = 4}}));
203+
}
204+
205+
TEST(Search_view_for_each, count_size_max_with_offset_does_not_overflow) {
206+
const auto path{test_path("for_each_count_max_offset.metapack")};
207+
write_search_file(path, {{"/a", "A Title", "A Desc", 80, 1, 2},
208+
{"/b", "B Title", "B Desc", 80, 3, 4}});
209+
sourcemeta::one::SearchView view{path};
210+
EXPECT_EQ(collect(view, 1, std::numeric_limits<std::size_t>::max()),
211+
(std::vector<VisitedEntry>{{.path = "/b",
212+
.title = "B Title",
213+
.description = "B Desc",
214+
.bytes_raw = 3,
215+
.bytes_bundled = 4}}));
216+
}
217+
218+
TEST(Search_view_for_each, malformed_offset_table_too_large_returns_nothing) {
219+
const auto path{test_path("for_each_malformed_offset_table.metapack")};
220+
sourcemeta::one::SearchIndexHeader header{};
221+
header.entry_count = 1000;
222+
header.records_offset =
223+
static_cast<std::uint32_t>(sizeof(sourcemeta::one::SearchIndexHeader) +
224+
1000 * sizeof(std::uint32_t));
225+
std::vector<std::uint8_t> payload(sizeof(sourcemeta::one::SearchIndexHeader));
226+
std::memcpy(payload.data(), &header,
227+
sizeof(sourcemeta::one::SearchIndexHeader));
228+
write_raw_search_file(path, payload);
229+
230+
sourcemeta::one::SearchView view{path};
231+
EXPECT_EQ(collect(view, 0, 100), std::vector<VisitedEntry>{});
232+
}
233+
234+
TEST(Search_view_for_each, malformed_record_offset_out_of_bounds_stops) {
235+
const auto path{test_path("for_each_malformed_record_offset.metapack")};
236+
sourcemeta::one::SearchIndexHeader header{};
237+
header.entry_count = 1;
238+
header.records_offset = static_cast<std::uint32_t>(
239+
sizeof(sourcemeta::one::SearchIndexHeader) + sizeof(std::uint32_t));
240+
std::vector<std::uint8_t> payload(sizeof(sourcemeta::one::SearchIndexHeader) +
241+
sizeof(std::uint32_t));
242+
std::memcpy(payload.data(), &header,
243+
sizeof(sourcemeta::one::SearchIndexHeader));
244+
const std::uint32_t bad_record_offset{99999};
245+
std::memcpy(payload.data() + sizeof(sourcemeta::one::SearchIndexHeader),
246+
&bad_record_offset, sizeof(std::uint32_t));
247+
write_raw_search_file(path, payload);
248+
249+
sourcemeta::one::SearchView view{path};
250+
EXPECT_EQ(collect(view, 0, 100), std::vector<VisitedEntry>{});
251+
}
252+
253+
TEST(Search_view_for_each, malformed_record_field_lengths_stops) {
254+
const auto path{test_path("for_each_malformed_record_field.metapack")};
255+
auto payload{
256+
sourcemeta::one::make_search({{"/foo", "Title", "Desc", 80, 1, 2}})};
257+
sourcemeta::one::SearchIndexHeader header{};
258+
std::memcpy(&header, payload.data(),
259+
sizeof(sourcemeta::one::SearchIndexHeader));
260+
sourcemeta::one::SearchRecordHeader bad_record{};
261+
bad_record.path_length = 60000;
262+
bad_record.title_length = 60000;
263+
bad_record.description_length = 60000;
264+
std::memcpy(payload.data() + header.records_offset, &bad_record,
265+
sizeof(sourcemeta::one::SearchRecordHeader));
266+
write_raw_search_file(path, payload);
267+
268+
sourcemeta::one::SearchView view{path};
269+
EXPECT_EQ(collect(view, 0, 100), std::vector<VisitedEntry>{});
270+
}

0 commit comments

Comments
 (0)