Skip to content

Commit e06d128

Browse files
authored
Optimise Blaze JSON output for space-efficiency (#505)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 83953b0 commit e06d128

4 files changed

Lines changed: 431 additions & 191 deletions

File tree

src/compiler/compile_json.cc

Lines changed: 71 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,88 @@
11
#include <sourcemeta/blaze/compiler.h>
22

3-
#include <cassert> // assert
4-
#include <variant> // std::visit
3+
#include <cassert> // assert
4+
#include <string_view> // std::string_view
5+
#include <variant> // std::visit
56

67
namespace {
7-
auto to_json(const sourcemeta::blaze::Instruction &instruction)
8+
auto to_json(const sourcemeta::blaze::Instruction &instruction,
9+
std::vector<sourcemeta::core::JSON::String> &resources)
810
-> sourcemeta::core::JSON {
9-
auto result{sourcemeta::core::JSON::make_object()};
11+
// Note that we purposely avoid objects to help consumers avoid potentially
12+
// expensive hash-map or flat-map lookups when parsing back
13+
auto result{sourcemeta::core::JSON::make_array()};
14+
1015
// We use single characters to save space, as this serialised format
1116
// is not meant to be human-readable anyway
12-
result.assign("t", sourcemeta::core::to_json(instruction.type));
13-
result.assign(
14-
"s", sourcemeta::core::to_json(instruction.relative_schema_location));
15-
result.assign(
16-
"i", sourcemeta::core::to_json(instruction.relative_instance_location));
17-
result.assign("k", sourcemeta::core::to_json(instruction.keyword_location));
18-
result.assign("r", sourcemeta::core::to_json(instruction.schema_resource));
19-
20-
auto value{sourcemeta::core::JSON::make_object()};
21-
value.assign("t", sourcemeta::core::to_json(instruction.value.index()));
22-
value.assign("v", std::visit(
23-
[](const auto &variant) {
24-
return sourcemeta::core::to_json(variant);
25-
},
26-
instruction.value));
27-
result.assign("v", std::move(value));
28-
29-
assert(result.at("v").is_object());
30-
assert(result.at("v").size() == 2);
31-
assert(result.at("v").defines("t"));
32-
assert(result.at("v").defines("v"));
33-
assert(result.at("v").at("t").is_integer());
34-
35-
auto children_json{sourcemeta::core::JSON::make_array()};
36-
result.assign("c", sourcemeta::core::to_json(instruction.children,
37-
[](const auto &subinstruction) {
38-
return to_json(subinstruction);
39-
}));
17+
result.push_back(sourcemeta::core::to_json(instruction.type));
18+
19+
result.push_back(
20+
sourcemeta::core::to_json(instruction.relative_schema_location));
21+
result.push_back(
22+
sourcemeta::core::to_json(instruction.relative_instance_location));
23+
24+
const auto match{instruction.keyword_location.find('#')};
25+
if (instruction.schema_resource > 0 && match != std::string::npos) {
26+
if (resources.size() < instruction.schema_resource) {
27+
resources.resize(instruction.schema_resource);
28+
}
29+
30+
if (resources[instruction.schema_resource - 1].empty()) {
31+
resources[instruction.schema_resource - 1] =
32+
instruction.keyword_location.substr(0, match);
33+
}
34+
35+
result.push_back(
36+
sourcemeta::core::JSON{instruction.keyword_location.substr(match)});
37+
} else {
38+
result.push_back(sourcemeta::core::to_json(instruction.keyword_location));
39+
}
40+
41+
result.push_back(sourcemeta::core::to_json(instruction.schema_resource));
42+
43+
// Note that we purposely avoid objects to help consumers avoid potentially
44+
// expensive hash-map or flat-map lookups when parsing back
45+
auto value{sourcemeta::core::JSON::make_array()};
46+
const auto value_index{instruction.value.index()};
47+
value.push_back(sourcemeta::core::to_json(value_index));
48+
// Don't encode empty values, which tend to happen a lot
49+
if (value_index != 0) {
50+
value.push_back(std::visit(
51+
[](const auto &variant) { return sourcemeta::core::to_json(variant); },
52+
instruction.value));
53+
}
54+
assert(value.is_array());
55+
assert(!value.empty());
56+
assert(value.at(0).is_integer());
57+
result.push_back(std::move(value));
58+
59+
if (!instruction.children.empty()) {
60+
auto children_json{sourcemeta::core::JSON::make_array()};
61+
result.push_back(sourcemeta::core::to_json(
62+
instruction.children, [&resources](const auto &subinstruction) {
63+
return to_json(subinstruction, resources);
64+
}));
65+
}
66+
4067
return result;
4168
}
4269
} // namespace
4370

4471
namespace sourcemeta::blaze {
4572

4673
auto to_json(const Template &schema_template) -> sourcemeta::core::JSON {
47-
auto result{sourcemeta::core::JSON::make_object()};
48-
result.assign("dynamic", sourcemeta::core::JSON{schema_template.dynamic});
49-
result.assign("track", sourcemeta::core::JSON{schema_template.track});
50-
result.assign("instructions",
51-
sourcemeta::core::to_json(schema_template.instructions,
52-
[](const auto &instruction) {
53-
return ::to_json(instruction);
54-
}));
74+
// Note that we purposely avoid objects to help consumers avoid potentially
75+
// expensive hash-map or flat-map lookups when parsing back
76+
auto result{sourcemeta::core::JSON::make_array()};
77+
result.push_back(sourcemeta::core::JSON{schema_template.dynamic});
78+
result.push_back(sourcemeta::core::JSON{schema_template.track});
79+
std::vector<sourcemeta::core::JSON::String> resources;
80+
auto instructions{sourcemeta::core::to_json(
81+
schema_template.instructions, [&resources](const auto &instruction) {
82+
return ::to_json(instruction, resources);
83+
})};
84+
result.push_back(sourcemeta::core::to_json(resources));
85+
result.push_back(std::move(instructions));
5586
return result;
5687
}
5788

src/evaluator/evaluator_json.cc

Lines changed: 82 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -5,49 +5,49 @@
55
namespace {
66
auto value_from_json(const sourcemeta::core::JSON &wrapper)
77
-> std::optional<sourcemeta::blaze::Value> {
8-
if (!wrapper.is_object()) {
8+
if (!wrapper.is_array() || wrapper.array_size() == 0 ||
9+
!wrapper.at(0).is_integer()) {
910
return std::nullopt;
11+
} else if (wrapper.array_size() == 1) {
12+
return sourcemeta::blaze::ValueNone{};
1013
}
1114

12-
const auto type{wrapper.try_at("t")};
13-
const auto value{wrapper.try_at("v")};
14-
if (!type || !value) {
15-
return std::nullopt;
16-
}
15+
const auto &value{wrapper.at(1)};
1716

1817
using namespace sourcemeta::blaze;
19-
switch (type->to_integer()) {
18+
switch (wrapper.at(0).to_integer()) {
2019
// clang-format off
21-
case 0: return sourcemeta::core::from_json<ValueNone>(*value);
22-
case 1: return sourcemeta::core::from_json<ValueJSON>(*value);
23-
case 2: return sourcemeta::core::from_json<ValueSet>(*value);
24-
case 3: return sourcemeta::core::from_json<ValueString>(*value);
25-
case 4: return sourcemeta::core::from_json<ValueProperty>(*value);
26-
case 5: return sourcemeta::core::from_json<ValueStrings>(*value);
27-
case 6: return sourcemeta::core::from_json<ValueStringSet>(*value);
28-
case 7: return sourcemeta::core::from_json<ValueTypes>(*value);
29-
case 8: return sourcemeta::core::from_json<ValueType>(*value);
30-
case 9: return sourcemeta::core::from_json<ValueRegex>(*value);
31-
case 10: return sourcemeta::core::from_json<ValueUnsignedInteger>(*value);
32-
case 11: return sourcemeta::core::from_json<ValueRange>(*value);
33-
case 12: return sourcemeta::core::from_json<ValueBoolean>(*value);
34-
case 13: return sourcemeta::core::from_json<ValueNamedIndexes>(*value);
35-
case 14: return sourcemeta::core::from_json<ValueStringType>(*value);
36-
case 15: return sourcemeta::core::from_json<ValueStringMap>(*value);
37-
case 16: return sourcemeta::core::from_json<ValuePropertyFilter>(*value);
38-
case 17: return sourcemeta::core::from_json<ValueIndexPair>(*value);
39-
case 18: return sourcemeta::core::from_json<ValuePointer>(*value);
40-
case 19: return sourcemeta::core::from_json<ValueTypedProperties>(*value);
41-
case 20: return sourcemeta::core::from_json<ValueStringHashes>(*value);
42-
case 21: return sourcemeta::core::from_json<ValueTypedHashes>(*value);
20+
case 0: return ValueNone{};
21+
case 1: return sourcemeta::core::from_json<ValueJSON>(value);
22+
case 2: return sourcemeta::core::from_json<ValueSet>(value);
23+
case 3: return sourcemeta::core::from_json<ValueString>(value);
24+
case 4: return sourcemeta::core::from_json<ValueProperty>(value);
25+
case 5: return sourcemeta::core::from_json<ValueStrings>(value);
26+
case 6: return sourcemeta::core::from_json<ValueStringSet>(value);
27+
case 7: return sourcemeta::core::from_json<ValueTypes>(value);
28+
case 8: return sourcemeta::core::from_json<ValueType>(value);
29+
case 9: return sourcemeta::core::from_json<ValueRegex>(value);
30+
case 10: return sourcemeta::core::from_json<ValueUnsignedInteger>(value);
31+
case 11: return sourcemeta::core::from_json<ValueRange>(value);
32+
case 12: return sourcemeta::core::from_json<ValueBoolean>(value);
33+
case 13: return sourcemeta::core::from_json<ValueNamedIndexes>(value);
34+
case 14: return sourcemeta::core::from_json<ValueStringType>(value);
35+
case 15: return sourcemeta::core::from_json<ValueStringMap>(value);
36+
case 16: return sourcemeta::core::from_json<ValuePropertyFilter>(value);
37+
case 17: return sourcemeta::core::from_json<ValueIndexPair>(value);
38+
case 18: return sourcemeta::core::from_json<ValuePointer>(value);
39+
case 19: return sourcemeta::core::from_json<ValueTypedProperties>(value);
40+
case 20: return sourcemeta::core::from_json<ValueStringHashes>(value);
41+
case 21: return sourcemeta::core::from_json<ValueTypedHashes>(value);
4342
// clang-format on
4443
default:
4544
assert(false);
4645
return ValueNone{};
4746
}
4847
}
4948

50-
auto instructions_from_json(const sourcemeta::core::JSON &instructions)
49+
auto instructions_from_json(const sourcemeta::core::JSON &instructions,
50+
const sourcemeta::core::JSON &resources)
5151
-> std::optional<sourcemeta::blaze::Instructions> {
5252
if (!instructions.is_array()) {
5353
return std::nullopt;
@@ -56,42 +56,36 @@ auto instructions_from_json(const sourcemeta::core::JSON &instructions)
5656
sourcemeta::blaze::Instructions result;
5757
result.reserve(instructions.size());
5858
for (const auto &instruction : instructions.as_array()) {
59-
if (!instruction.is_object()) {
59+
if (!instruction.is_array() || instruction.array_size() < 6) {
6060
return std::nullopt;
6161
}
6262

63-
const auto type{instruction.try_at("t")};
64-
const auto relative_schema_location{instruction.try_at("s")};
65-
const auto relative_instance_location{instruction.try_at("i")};
66-
const auto keyword_location{instruction.try_at("k")};
67-
const auto schema_resource{instruction.try_at("r")};
68-
const auto value{instruction.try_at("v")};
69-
const auto children{instruction.try_at("c")};
70-
71-
if (!type || !relative_schema_location || !relative_instance_location ||
72-
!keyword_location || !schema_resource || !value || !children ||
73-
!type->is_positive() || !relative_schema_location->is_string() ||
74-
!relative_instance_location->is_string() ||
75-
!keyword_location->is_string() || !schema_resource->is_positive() ||
76-
!value->is_object() || !children->is_array()) {
77-
return std::nullopt;
78-
}
63+
const auto &type{instruction.at(0)};
64+
const auto &relative_schema_location{instruction.at(1)};
65+
const auto &relative_instance_location{instruction.at(2)};
66+
const auto &keyword_location{instruction.at(3)};
67+
const auto &schema_resource{instruction.at(4)};
68+
const auto &value{instruction.at(5)};
7969

8070
auto type_result{
81-
sourcemeta::core::from_json<sourcemeta::blaze::InstructionIndex>(
82-
*type)};
71+
sourcemeta::core::from_json<sourcemeta::blaze::InstructionIndex>(type)};
8372
auto relative_schema_location_result{
8473
sourcemeta::core::from_json<sourcemeta::core::Pointer>(
85-
*relative_schema_location)};
74+
relative_schema_location)};
8675
auto relative_instance_location_result{
8776
sourcemeta::core::from_json<sourcemeta::core::Pointer>(
88-
*relative_instance_location)};
77+
relative_instance_location)};
8978
auto keyword_location_result{
90-
sourcemeta::core::from_json<std::string>(*keyword_location)};
79+
sourcemeta::core::from_json<std::string>(keyword_location)};
9180
auto schema_resource_result{
92-
sourcemeta::core::from_json<std::size_t>(*schema_resource)};
93-
auto value_result{value_from_json(*value)};
94-
auto children_result{instructions_from_json(*children)};
81+
sourcemeta::core::from_json<std::size_t>(schema_resource)};
82+
auto value_result{value_from_json(value)};
83+
84+
// Parse children if there
85+
std::optional<sourcemeta::blaze::Instructions> children_result{
86+
instruction.array_size() == 7
87+
? instructions_from_json(instruction.at(6), resources)
88+
: sourcemeta::blaze::Instructions{}};
9589

9690
if (!type_result.has_value() ||
9791
!relative_schema_location_result.has_value() ||
@@ -102,14 +96,28 @@ auto instructions_from_json(const sourcemeta::core::JSON &instructions)
10296
return std::nullopt;
10397
}
10498

105-
// TODO: Maybe we should emplace here?
106-
result.push_back({std::move(type_result).value(),
107-
std::move(relative_schema_location_result).value(),
108-
std::move(relative_instance_location_result).value(),
109-
std::move(keyword_location_result).value(),
110-
std::move(schema_resource_result).value(),
111-
std::move(value_result).value(),
112-
std::move(children_result).value()});
99+
if (schema_resource_result.value() > 0 &&
100+
resources.array_size() >= schema_resource_result.value() &&
101+
keyword_location_result.value().starts_with('#')) {
102+
// TODO: Maybe we should emplace here?
103+
result.push_back(
104+
{std::move(type_result).value(),
105+
std::move(relative_schema_location_result).value(),
106+
std::move(relative_instance_location_result).value(),
107+
resources.at(schema_resource_result.value() - 1).to_string() +
108+
std::move(keyword_location_result).value(),
109+
schema_resource_result.value(), std::move(value_result).value(),
110+
std::move(children_result).value()});
111+
} else {
112+
// TODO: Maybe we should emplace here?
113+
result.push_back({std::move(type_result).value(),
114+
std::move(relative_schema_location_result).value(),
115+
std::move(relative_instance_location_result).value(),
116+
std::move(keyword_location_result).value(),
117+
std::move(schema_resource_result).value(),
118+
std::move(value_result).value(),
119+
std::move(children_result).value()});
120+
}
113121
}
114122

115123
return result;
@@ -120,26 +128,27 @@ auto instructions_from_json(const sourcemeta::core::JSON &instructions)
120128
namespace sourcemeta::blaze {
121129

122130
auto from_json(const sourcemeta::core::JSON &json) -> std::optional<Template> {
123-
if (!json.is_object()) {
131+
if (!json.is_array() || json.array_size() != 4) {
124132
return std::nullopt;
125133
}
126134

127-
const auto instructions{json.try_at("instructions")};
128-
const auto dynamic{json.try_at("dynamic")};
129-
const auto track{json.try_at("track")};
130-
if (!instructions || !dynamic || !track) {
135+
const auto &dynamic{json.at(0)};
136+
const auto &track{json.at(1)};
137+
const auto &resources{json.at(2)};
138+
139+
if (!dynamic.is_boolean() || !track.is_boolean() || !resources.is_array()) {
131140
return std::nullopt;
132141
}
133142

134-
auto instructions_result{instructions_from_json(*instructions)};
135-
if (!instructions_result.has_value() || !dynamic->is_boolean() ||
136-
!track->is_boolean()) {
143+
const auto &instructions{json.at(3)};
144+
auto instructions_result{instructions_from_json(instructions, resources)};
145+
if (!instructions_result.has_value()) {
137146
return std::nullopt;
138147
}
139148

140149
return Template{.instructions = std::move(instructions_result).value(),
141-
.dynamic = dynamic->to_boolean(),
142-
.track = track->to_boolean()};
150+
.dynamic = dynamic.to_boolean(),
151+
.track = track.to_boolean()};
143152
}
144153

145154
} // namespace sourcemeta::blaze

0 commit comments

Comments
 (0)