Skip to content

Commit a02a187

Browse files
authored
Make various compiler optimisations configurable (#536)
Mainly for pre-compilation, but inherit some other ones from the linked branch too. See: #374 Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent b877570 commit a02a187

3 files changed

Lines changed: 108 additions & 70 deletions

File tree

src/compiler/compile.cc

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,10 @@ auto compile(const sourcemeta::core::JSON &schema,
113113
const Compiler &compiler,
114114
const sourcemeta::core::SchemaFrame &frame, const Mode mode,
115115
const std::optional<std::string> &default_dialect,
116-
const std::optional<std::string> &default_id) -> Template {
116+
const std::optional<std::string> &default_id,
117+
const std::optional<Tweaks> &tweaks) -> Template {
117118
assert(is_schema(schema));
119+
const auto effective_tweaks{tweaks.value_or(Tweaks{})};
118120

119121
///////////////////////////////////////////////////////////////////
120122
// (1) Determine the root frame entry
@@ -173,12 +175,9 @@ auto compile(const sourcemeta::core::JSON &schema,
173175
// Use string views to avoid copying the actual strings, as we know
174176
// that the frame survives the entire compilation process
175177
std::vector<std::tuple<std::string_view, std::size_t, std::size_t>>
176-
sorted_references;
178+
sorted_precompile_references;
177179

178-
constexpr auto PRECOMPILED_SCHEMAS_MAXIMUM{10};
179-
constexpr auto PRECOMPILED_SCHEMAS_MINIMUM_COUNT{10};
180-
181-
{
180+
if (effective_tweaks.precompile_static_references) {
182181
std::unordered_map<std::string_view, std::pair<std::size_t, std::size_t>>
183182
static_reference_destinations;
184183
for (const auto &reference : frame.references()) {
@@ -187,10 +186,9 @@ auto compile(const sourcemeta::core::JSON &schema,
187186
frame.locations().contains(
188187
{sourcemeta::core::SchemaReferenceType::Static,
189188
reference.second.destination})) {
190-
// TODO: Maybe try circular references or non-circular with >100 inbound
191-
// locations or something like that?
192189
std::unordered_set<std::string> visited;
193-
if (!is_circular(frame, reference.first.second, reference.second,
190+
if (!effective_tweaks.precompile_static_references_non_circular &&
191+
!is_circular(frame, reference.first.second, reference.second,
194192
visited)) {
195193
continue;
196194
}
@@ -204,36 +202,45 @@ auto compile(const sourcemeta::core::JSON &schema,
204202
}
205203
}
206204

207-
sorted_references.reserve(static_reference_destinations.size());
205+
sorted_precompile_references.reserve(static_reference_destinations.size());
208206
for (const auto &reference : static_reference_destinations) {
209-
if (reference.second.second >= PRECOMPILED_SCHEMAS_MINIMUM_COUNT) {
210-
sorted_references.emplace_back(reference.first, reference.second.first,
211-
reference.second.second);
207+
if (reference.second.second >=
208+
effective_tweaks
209+
.precompile_static_references_minimum_reference_count) {
210+
sorted_precompile_references.emplace_back(
211+
reference.first, reference.second.first, reference.second.second);
212212
}
213213
}
214-
std::ranges::sort(sorted_references,
214+
std::ranges::sort(sorted_precompile_references,
215215
[](const auto &left, const auto &right) {
216216
return std::get<2>(left) > std::get<2>(right);
217217
});
218218

219-
if (sorted_references.size() > PRECOMPILED_SCHEMAS_MAXIMUM) {
220-
sorted_references.erase(sorted_references.begin() +
221-
PRECOMPILED_SCHEMAS_MAXIMUM,
222-
sorted_references.end());
219+
if (sorted_precompile_references.size() >
220+
effective_tweaks.precompile_static_references_maximum_schemas) {
221+
sorted_precompile_references.erase(
222+
sorted_precompile_references.begin() +
223+
static_cast<std::ptrdiff_t>(
224+
effective_tweaks
225+
.precompile_static_references_maximum_schemas),
226+
sorted_precompile_references.end());
223227
}
224228

225229
// We do not apply this pre-compilation optimisation on meta-schemas
226230
if (sourcemeta::core::schema_official_resolver(base).has_value() ||
227231
(uses_dynamic_scopes && schema.is_object() &&
228232
schema.defines("$vocabulary"))) {
229-
sorted_references.clear();
233+
sorted_precompile_references.clear();
230234
}
231235
}
232236

233-
assert(sorted_references.size() <= PRECOMPILED_SCHEMAS_MAXIMUM);
237+
assert(sorted_precompile_references.size() <=
238+
effective_tweaks.precompile_static_references_maximum_schemas);
234239
std::unordered_set<std::size_t> precompiled_labels;
235-
for (const auto &reference : sorted_references) {
236-
assert(std::get<2>(reference) >= PRECOMPILED_SCHEMAS_MINIMUM_COUNT);
240+
for (const auto &reference : sorted_precompile_references) {
241+
assert(
242+
std::get<2>(reference) >=
243+
effective_tweaks.precompile_static_references_minimum_reference_count);
237244
precompiled_labels.emplace(std::get<1>(reference));
238245
}
239246

@@ -265,7 +272,8 @@ auto compile(const sourcemeta::core::JSON &schema,
265272
.mode = mode,
266273
.uses_dynamic_scopes = uses_dynamic_scopes,
267274
.unevaluated = std::move(unevaluated),
268-
.precompiled_labels = std::move(precompiled_labels)};
275+
.precompiled_labels = std::move(precompiled_labels),
276+
.tweaks = effective_tweaks};
269277

270278
///////////////////////////////////////////////////////////////////
271279
// (7) Build the initial dynamic context
@@ -304,7 +312,7 @@ auto compile(const sourcemeta::core::JSON &schema,
304312

305313
// Attempt to precompile static destinations to avoid explosive compilation
306314
Instructions static_reference_template;
307-
for (const auto &reference : sorted_references) {
315+
for (const auto &reference : sorted_precompile_references) {
308316
const auto entry{context.frame.locations().find(
309317
{sourcemeta::core::SchemaReferenceType::Static,
310318
std::string{std::get<0>(reference)}})};
@@ -377,7 +385,8 @@ auto compile(const sourcemeta::core::JSON &schema,
377385
const sourcemeta::core::SchemaResolver &resolver,
378386
const Compiler &compiler, const Mode mode,
379387
const std::optional<std::string> &default_dialect,
380-
const std::optional<std::string> &default_id) -> Template {
388+
const std::optional<std::string> &default_id,
389+
const std::optional<Tweaks> &tweaks) -> Template {
381390
assert(is_schema(schema));
382391

383392
// Make sure the input schema is bundled, otherwise we won't be able to
@@ -391,7 +400,7 @@ auto compile(const sourcemeta::core::JSON &schema,
391400
frame.analyse(result, walker, resolver, default_dialect, default_id);
392401

393402
return compile(result, walker, resolver, compiler, frame, mode,
394-
default_dialect, default_id);
403+
default_dialect, default_id, tweaks);
395404
}
396405

397406
auto compile(const Context &context, const SchemaContext &schema_context,

src/compiler/default_compiler_draft4.h

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -108,36 +108,38 @@ compile_properties(const sourcemeta::blaze::Context &context,
108108
// and some subschemas that are large. To attempt to improve performance,
109109
// we prefer to evaluate smaller subschemas first, in the hope of failing
110110
// earlier without spending a lot of time on other subschemas
111-
std::sort(properties.begin(), properties.end(),
112-
[](const auto &left, const auto &right) {
113-
const auto left_size{recursive_template_size(left.second)};
114-
const auto right_size{recursive_template_size(right.second)};
115-
if (left_size == right_size) {
116-
const auto left_direct_enumeration{
117-
defines_direct_enumeration(left.second)};
118-
const auto right_direct_enumeration{
119-
defines_direct_enumeration(right.second)};
120-
121-
// Enumerations always take precedence
122-
if (left_direct_enumeration.has_value() &&
123-
right_direct_enumeration.has_value()) {
124-
// If both options have a direct enumeration, we choose
125-
// the one with the shorter relative schema location
126-
return relative_schema_location_size(
127-
left.second.at(left_direct_enumeration.value())) <
128-
relative_schema_location_size(
129-
right.second.at(right_direct_enumeration.value()));
130-
} else if (left_direct_enumeration.has_value()) {
131-
return true;
132-
} else if (right_direct_enumeration.has_value()) {
133-
return false;
111+
if (context.tweaks.properties_reorder) {
112+
std::sort(properties.begin(), properties.end(),
113+
[](const auto &left, const auto &right) {
114+
const auto left_size{recursive_template_size(left.second)};
115+
const auto right_size{recursive_template_size(right.second)};
116+
if (left_size == right_size) {
117+
const auto left_direct_enumeration{
118+
defines_direct_enumeration(left.second)};
119+
const auto right_direct_enumeration{
120+
defines_direct_enumeration(right.second)};
121+
122+
// Enumerations always take precedence
123+
if (left_direct_enumeration.has_value() &&
124+
right_direct_enumeration.has_value()) {
125+
// If both options have a direct enumeration, we choose
126+
// the one with the shorter relative schema location
127+
return relative_schema_location_size(left.second.at(
128+
left_direct_enumeration.value())) <
129+
relative_schema_location_size(right.second.at(
130+
right_direct_enumeration.value()));
131+
} else if (left_direct_enumeration.has_value()) {
132+
return true;
133+
} else if (right_direct_enumeration.has_value()) {
134+
return false;
135+
}
136+
137+
return left.first < right.first;
138+
} else {
139+
return left_size < right_size;
134140
}
135-
136-
return left.first < right.first;
137-
} else {
138-
return left_size < right_size;
139-
}
140-
});
141+
});
142+
}
141143

142144
return properties;
143145
}
@@ -776,6 +778,10 @@ auto compiler_draft4_applicator_oneof(const Context &context,
776778
auto properties_as_loop(const Context &context,
777779
const SchemaContext &schema_context,
778780
const sourcemeta::core::JSON &properties) -> bool {
781+
if (context.tweaks.properties_always_unroll) {
782+
return false;
783+
}
784+
779785
const auto size{properties.size()};
780786
const auto imports_validation_vocabulary =
781787
schema_context.vocabularies.contains(

src/compiler/include/sourcemeta/blaze/compiler.h

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,25 @@ enum class Mode : std::uint8_t {
8181
Exhaustive
8282
};
8383

84+
/// @ingroup compiler
85+
/// Advanced knobs that you can tweak for higher control and optimisations
86+
struct Tweaks {
87+
/// Attempt to precompile static references to speed up compilation
88+
const bool precompile_static_references{true};
89+
/// Consider static references that are not circular when precompiling static
90+
/// references
91+
const bool precompile_static_references_non_circular{false};
92+
/// The maximum amount of static references to precompile
93+
const std::size_t precompile_static_references_maximum_schemas{10};
94+
/// The minimum amount of references to a destination before considering it
95+
/// for precompilation
96+
const std::size_t precompile_static_references_minimum_reference_count{10};
97+
/// Always unroll `properties` in a logical AND operation
98+
const bool properties_always_unroll{false};
99+
/// Attempt to re-order `properties` subschemas to evaluate cheaper ones first
100+
const bool properties_reorder{true};
101+
};
102+
84103
/// @ingroup compiler
85104
/// The static compiler context is the information you have at your
86105
/// disposal to implement a keyword that will never change throughout
@@ -106,6 +125,8 @@ struct Context {
106125
const SchemaUnevaluatedEntries unevaluated;
107126
/// The set of global labels identifier during precompilation
108127
std::unordered_set<std::size_t> precompiled_labels;
128+
/// The set of global labels identifier during precompilation
129+
const Tweaks tweaks;
109130
};
110131

111132
/// @ingroup compiler
@@ -139,13 +160,14 @@ auto SOURCEMETA_BLAZE_COMPILER_EXPORT default_schema_compiler(
139160
///
140161
/// // Evaluate or encode
141162
/// ```
142-
auto SOURCEMETA_BLAZE_COMPILER_EXPORT compile(
143-
const sourcemeta::core::JSON &schema,
144-
const sourcemeta::core::SchemaWalker &walker,
145-
const sourcemeta::core::SchemaResolver &resolver, const Compiler &compiler,
146-
const Mode mode = Mode::FastValidation,
147-
const std::optional<std::string> &default_dialect = std::nullopt,
148-
const std::optional<std::string> &default_id = std::nullopt) -> Template;
163+
auto SOURCEMETA_BLAZE_COMPILER_EXPORT
164+
compile(const sourcemeta::core::JSON &schema,
165+
const sourcemeta::core::SchemaWalker &walker,
166+
const sourcemeta::core::SchemaResolver &resolver,
167+
const Compiler &compiler, const Mode mode = Mode::FastValidation,
168+
const std::optional<std::string> &default_dialect = std::nullopt,
169+
const std::optional<std::string> &default_id = std::nullopt,
170+
const std::optional<Tweaks> &tweaks = std::nullopt) -> Template;
149171

150172
/// @ingroup compiler
151173
///
@@ -156,14 +178,15 @@ auto SOURCEMETA_BLAZE_COMPILER_EXPORT compile(
156178
/// behavior.
157179
///
158180
/// Don't use this function unless you know what you are doing.
159-
auto SOURCEMETA_BLAZE_COMPILER_EXPORT compile(
160-
const sourcemeta::core::JSON &schema,
161-
const sourcemeta::core::SchemaWalker &walker,
162-
const sourcemeta::core::SchemaResolver &resolver, const Compiler &compiler,
163-
const sourcemeta::core::SchemaFrame &frame,
164-
const Mode mode = Mode::FastValidation,
165-
const std::optional<std::string> &default_dialect = std::nullopt,
166-
const std::optional<std::string> &default_id = std::nullopt) -> Template;
181+
auto SOURCEMETA_BLAZE_COMPILER_EXPORT
182+
compile(const sourcemeta::core::JSON &schema,
183+
const sourcemeta::core::SchemaWalker &walker,
184+
const sourcemeta::core::SchemaResolver &resolver,
185+
const Compiler &compiler, const sourcemeta::core::SchemaFrame &frame,
186+
const Mode mode = Mode::FastValidation,
187+
const std::optional<std::string> &default_dialect = std::nullopt,
188+
const std::optional<std::string> &default_id = std::nullopt,
189+
const std::optional<Tweaks> &tweaks = std::nullopt) -> Template;
167190

168191
/// @ingroup compiler
169192
///

0 commit comments

Comments
 (0)