Skip to content

Commit b877570

Browse files
authored
Conditionally apply pre-compilation optimisations (#531)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent d1d7f9d commit b877570

11 files changed

Lines changed: 330 additions & 191 deletions

File tree

benchmark/micro/draft4.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,50 @@ static void Micro_Draft4_Ref_Single_100(benchmark::State &state) {
11141114
}
11151115
}
11161116

1117+
// Pathological Case: AdaptiveCard-like Pattern
1118+
static void Micro_Draft4_Ref_Many_Nested(benchmark::State &state) {
1119+
const sourcemeta::core::JSON schema{sourcemeta::core::parse_json(R"JSON({
1120+
"$schema": "http://json-schema.org/draft-04/schema#",
1121+
"definitions": {
1122+
"def0": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def1" }, "b": { "$ref": "#/definitions/def2" } } },
1123+
"def1": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def2" }, "b": { "$ref": "#/definitions/def3" } } },
1124+
"def2": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def3" }, "b": { "$ref": "#/definitions/def4" } } },
1125+
"def3": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def4" }, "b": { "$ref": "#/definitions/def5" } } },
1126+
"def4": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def5" }, "b": { "$ref": "#/definitions/def6" } } },
1127+
"def5": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def6" }, "b": { "$ref": "#/definitions/def7" } } },
1128+
"def6": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def7" }, "b": { "$ref": "#/definitions/def8" } } },
1129+
"def7": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def8" }, "b": { "$ref": "#/definitions/def9" } } },
1130+
"def8": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def9" }, "b": { "$ref": "#/definitions/def10" } } },
1131+
"def9": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def10" }, "b": { "$ref": "#/definitions/def11" } } },
1132+
"def10": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def11" }, "b": { "$ref": "#/definitions/def12" } } },
1133+
"def11": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def12" }, "b": { "$ref": "#/definitions/def13" } } },
1134+
"def12": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def13" }, "b": { "$ref": "#/definitions/def14" } } },
1135+
"def13": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def14" }, "b": { "$ref": "#/definitions/def0" } } },
1136+
"def14": { "type": "object", "properties": { "a": { "$ref": "#/definitions/def0" }, "b": { "$ref": "#/definitions/def1" } } }
1137+
},
1138+
"properties": {
1139+
"p0": { "$ref": "#/definitions/def0" },
1140+
"p1": { "$ref": "#/definitions/def0" },
1141+
"p2": { "$ref": "#/definitions/def0" },
1142+
"p3": { "$ref": "#/definitions/def0" },
1143+
"p4": { "$ref": "#/definitions/def0" },
1144+
"p5": { "$ref": "#/definitions/def1" },
1145+
"p6": { "$ref": "#/definitions/def1" },
1146+
"p7": { "$ref": "#/definitions/def1" },
1147+
"p8": { "$ref": "#/definitions/def1" },
1148+
"p9": { "$ref": "#/definitions/def1" }
1149+
}
1150+
})JSON")};
1151+
1152+
for (auto _ : state) {
1153+
auto result{sourcemeta::blaze::compile(
1154+
schema, sourcemeta::core::schema_official_walker,
1155+
sourcemeta::core::schema_official_resolver,
1156+
sourcemeta::blaze::default_schema_compiler)};
1157+
benchmark::DoNotOptimize(result.instructions);
1158+
}
1159+
}
1160+
11171161
BENCHMARK(Micro_Draft4_Meta_1_No_Callback);
11181162
BENCHMARK(Micro_Draft4_Required_Properties);
11191163
BENCHMARK(Micro_Draft4_Many_Optional_Properties_Minimal_Match);
@@ -1134,3 +1178,4 @@ BENCHMARK(Micro_Draft4_Long_Enum);
11341178
BENCHMARK(Micro_Draft4_Long_Enum_Short_Strings);
11351179
BENCHMARK(Micro_Draft4_Type_Object);
11361180
BENCHMARK(Micro_Draft4_Ref_Single_100);
1181+
BENCHMARK(Micro_Draft4_Ref_Many_Nested);

src/compiler/compile.cc

Lines changed: 159 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
#include <algorithm> // std::move, std::sort, std::unique
77
#include <cassert> // assert
88
#include <iterator> // std::back_inserter
9-
#include <utility> // std::move
9+
#include <tuple> // std::tuple, std::get
10+
#include <utility> // std::move, std::pair
1011

1112
#include "compile_helpers.h"
1213

@@ -48,7 +49,6 @@ auto compile_subschema(const sourcemeta::blaze::Context &context,
4849
.base = schema_context.base,
4950
// TODO: This represents a copy
5051
.labels = schema_context.labels,
51-
.references = schema_context.references,
5252
.is_property_name = schema_context.is_property_name},
5353
{.keyword = keyword,
5454
.base_schema_location = dynamic_context.base_schema_location,
@@ -91,7 +91,6 @@ auto precompile(
9191
.vocabularies = std::move(nested_vocabularies),
9292
.base = entry.second.base,
9393
.labels = {},
94-
.references = {},
9594
.is_property_name = schema_context.is_property_name};
9695

9796
return {make(sourcemeta::blaze::InstructionIndex::ControlMark, context,
@@ -117,37 +116,24 @@ auto compile(const sourcemeta::core::JSON &schema,
117116
const std::optional<std::string> &default_id) -> Template {
118117
assert(is_schema(schema));
119118

119+
///////////////////////////////////////////////////////////////////
120+
// (1) Determine the root frame entry
121+
///////////////////////////////////////////////////////////////////
122+
120123
const std::string base{sourcemeta::core::URI::canonicalize(
121124
sourcemeta::core::identify(
122125
schema, resolver,
123126
sourcemeta::core::SchemaIdentificationStrategy::Strict,
124127
default_dialect, default_id)
125128
.value_or(""))};
126-
127129
assert(frame.locations().contains(
128130
{sourcemeta::core::SchemaReferenceType::Static, base}));
129131
const auto root_frame_entry{frame.locations().at(
130132
{sourcemeta::core::SchemaReferenceType::Static, base})};
131133

132-
// Check whether dynamic referencing takes places in this schema. If not,
133-
// we can avoid the overhead of keeping track of dynamics scopes, etc
134-
bool uses_dynamic_scopes{false};
135-
for (const auto &reference : frame.references()) {
136-
if (reference.first.first ==
137-
sourcemeta::core::SchemaReferenceType::Dynamic) {
138-
uses_dynamic_scopes = true;
139-
break;
140-
}
141-
}
142-
143-
SchemaContext schema_context{
144-
.relative_pointer = sourcemeta::core::empty_pointer,
145-
.schema = schema,
146-
.vocabularies = vocabularies(schema, resolver, root_frame_entry.dialect),
147-
.base = sourcemeta::core::URI::canonicalize(root_frame_entry.base),
148-
.labels = {},
149-
.references = {},
150-
.is_property_name = false};
134+
///////////////////////////////////////////////////////////////////
135+
// (2) Determine all the schema resources in the schema
136+
///////////////////////////////////////////////////////////////////
151137

152138
std::vector<std::string> resources;
153139
for (const auto &entry : frame.locations()) {
@@ -165,50 +151,108 @@ auto compile(const sourcemeta::core::JSON &schema,
165151
assert(resources.size() ==
166152
std::set<std::string>(resources.cbegin(), resources.cend()).size());
167153

168-
// Calculate the top static reference destinations for precompilation purposes
169-
// TODO: Replace this logic with `.frame()` `destination_of` information
170-
std::set<std::string> precompiled_static_schemas;
171-
// As a workaround, we avoid pre-compiling schemas on schemas
172-
// that look like they are just wrapping other schemas
173-
if (schema.is_object() && !schema.defines("$ref")) {
174-
std::map<std::string, std::size_t> static_references_count;
154+
///////////////////////////////////////////////////////////////////
155+
// (3) Check if the schema relies on dynamic scopes
156+
///////////////////////////////////////////////////////////////////
157+
158+
bool uses_dynamic_scopes{false};
159+
for (const auto &reference : frame.references()) {
160+
// Check whether dynamic referencing takes places in this schema. If not,
161+
// we can avoid the overhead of keeping track of dynamics scopes, etc
162+
if (reference.first.first ==
163+
sourcemeta::core::SchemaReferenceType::Dynamic) {
164+
uses_dynamic_scopes = true;
165+
break;
166+
}
167+
}
168+
169+
///////////////////////////////////////////////////////////////////
170+
// (4) Plan which static references we will precompile
171+
///////////////////////////////////////////////////////////////////
172+
173+
// Use string views to avoid copying the actual strings, as we know
174+
// that the frame survives the entire compilation process
175+
std::vector<std::tuple<std::string_view, std::size_t, std::size_t>>
176+
sorted_references;
177+
178+
constexpr auto PRECOMPILED_SCHEMAS_MAXIMUM{10};
179+
constexpr auto PRECOMPILED_SCHEMAS_MINIMUM_COUNT{10};
180+
181+
{
182+
std::unordered_map<std::string_view, std::pair<std::size_t, std::size_t>>
183+
static_reference_destinations;
175184
for (const auto &reference : frame.references()) {
176-
if (reference.first.first !=
177-
sourcemeta::core::SchemaReferenceType::Static ||
178-
!frame.locations().contains(
185+
if (reference.first.first ==
186+
sourcemeta::core::SchemaReferenceType::Static &&
187+
frame.locations().contains(
179188
{sourcemeta::core::SchemaReferenceType::Static,
180189
reference.second.destination})) {
181-
continue;
190+
// TODO: Maybe try circular references or non-circular with >100 inbound
191+
// locations or something like that?
192+
std::unordered_set<std::string> visited;
193+
if (!is_circular(frame, reference.first.second, reference.second,
194+
visited)) {
195+
continue;
196+
}
197+
198+
const auto label{Evaluator{}.hash(
199+
schema_resource_id(resources, reference.second.base.value_or("")),
200+
reference.second.fragment.value_or(""))};
201+
auto [iterator, inserted] = static_reference_destinations.try_emplace(
202+
reference.second.destination, std::make_pair(label, 0));
203+
iterator->second.second++;
182204
}
205+
}
183206

184-
const auto &entry{
185-
frame.locations().at({sourcemeta::core::SchemaReferenceType::Static,
186-
reference.second.destination})};
187-
for (const auto &subreference : frame.references()) {
188-
if (subreference.first.second.starts_with(entry.pointer)) {
189-
static_references_count[reference.second.destination] += 1;
190-
}
207+
sorted_references.reserve(static_reference_destinations.size());
208+
for (const auto &reference : static_reference_destinations) {
209+
if (reference.second.second >= PRECOMPILED_SCHEMAS_MINIMUM_COUNT) {
210+
sorted_references.emplace_back(reference.first, reference.second.first,
211+
reference.second.second);
191212
}
192213
}
193-
std::vector<std::pair<std::string, std::size_t>> top_static_destinations(
194-
static_references_count.cbegin(), static_references_count.cend());
195-
std::ranges::sort(top_static_destinations,
214+
std::ranges::sort(sorted_references,
196215
[](const auto &left, const auto &right) {
197-
return left.second > right.second;
216+
return std::get<2>(left) > std::get<2>(right);
198217
});
199-
constexpr auto MAXIMUM_NUMBER_OF_SCHEMAS_TO_PRECOMPILE{5};
200-
for (auto iterator = top_static_destinations.cbegin();
201-
iterator != top_static_destinations.cend() &&
202-
iterator != top_static_destinations.cbegin() +
203-
MAXIMUM_NUMBER_OF_SCHEMAS_TO_PRECOMPILE;
204-
++iterator) {
205-
// Only consider highly referenced schemas
206-
if (iterator->second > 100) {
207-
precompiled_static_schemas.insert(iterator->first);
208-
}
218+
219+
if (sorted_references.size() > PRECOMPILED_SCHEMAS_MAXIMUM) {
220+
sorted_references.erase(sorted_references.begin() +
221+
PRECOMPILED_SCHEMAS_MAXIMUM,
222+
sorted_references.end());
209223
}
224+
225+
// We do not apply this pre-compilation optimisation on meta-schemas
226+
if (sourcemeta::core::schema_official_resolver(base).has_value() ||
227+
(uses_dynamic_scopes && schema.is_object() &&
228+
schema.defines("$vocabulary"))) {
229+
sorted_references.clear();
230+
}
231+
}
232+
233+
assert(sorted_references.size() <= PRECOMPILED_SCHEMAS_MAXIMUM);
234+
std::unordered_set<std::size_t> precompiled_labels;
235+
for (const auto &reference : sorted_references) {
236+
assert(std::get<2>(reference) >= PRECOMPILED_SCHEMAS_MINIMUM_COUNT);
237+
precompiled_labels.emplace(std::get<1>(reference));
210238
}
211239

240+
///////////////////////////////////////////////////////////////////
241+
// (5) Build the starting schema context
242+
///////////////////////////////////////////////////////////////////
243+
244+
SchemaContext schema_context{
245+
.relative_pointer = sourcemeta::core::empty_pointer,
246+
.schema = schema,
247+
.vocabularies = vocabularies(schema, resolver, root_frame_entry.dialect),
248+
.base = sourcemeta::core::URI::canonicalize(root_frame_entry.base),
249+
.labels = {},
250+
.is_property_name = false};
251+
252+
///////////////////////////////////////////////////////////////////
253+
// (6) Build the gloal compilation context
254+
///////////////////////////////////////////////////////////////////
255+
212256
auto unevaluated{
213257
sourcemeta::blaze::unevaluated(schema, frame, walker, resolver)};
214258

@@ -221,22 +265,19 @@ auto compile(const sourcemeta::core::JSON &schema,
221265
.mode = mode,
222266
.uses_dynamic_scopes = uses_dynamic_scopes,
223267
.unevaluated = std::move(unevaluated),
224-
.precompiled_static_schemas =
225-
std::move(precompiled_static_schemas)};
268+
.precompiled_labels = std::move(precompiled_labels)};
269+
270+
///////////////////////////////////////////////////////////////////
271+
// (7) Build the initial dynamic context
272+
///////////////////////////////////////////////////////////////////
273+
226274
const DynamicContext dynamic_context{relative_dynamic_context()};
227-
Instructions compiler_template;
228275

229-
for (const auto &destination : context.precompiled_static_schemas) {
230-
assert(context.frame.locations().contains(
231-
{sourcemeta::core::SchemaReferenceType::Static, destination}));
232-
const auto match{context.frame.locations().find(
233-
{sourcemeta::core::SchemaReferenceType::Static, destination})};
234-
for (auto &&substep :
235-
precompile(context, schema_context, dynamic_context, *match)) {
236-
compiler_template.push_back(std::move(substep));
237-
}
238-
}
276+
///////////////////////////////////////////////////////////////////
277+
// (8) Pre compile dynamic reference locations
278+
///////////////////////////////////////////////////////////////////
239279

280+
Instructions compiler_template;
240281
if (uses_dynamic_scopes &&
241282
(schema_context.vocabularies.contains(
242283
"https://json-schema.org/draft/2019-09/vocab/core") ||
@@ -257,9 +298,59 @@ auto compile(const sourcemeta::core::JSON &schema,
257298
}
258299
}
259300

301+
///////////////////////////////////////////////////////////////////
302+
// (9) Pre compile static reference locations
303+
///////////////////////////////////////////////////////////////////
304+
305+
// Attempt to precompile static destinations to avoid explosive compilation
306+
Instructions static_reference_template;
307+
for (const auto &reference : sorted_references) {
308+
const auto entry{context.frame.locations().find(
309+
{sourcemeta::core::SchemaReferenceType::Static,
310+
std::string{std::get<0>(reference)}})};
311+
assert(entry != context.frame.locations().cend());
312+
auto subschema{sourcemeta::core::get(context.root, entry->second.pointer)};
313+
if (!sourcemeta::core::is_schema(subschema)) {
314+
continue;
315+
}
316+
317+
auto nested_vocabularies{sourcemeta::core::vocabularies(
318+
subschema, context.resolver, entry->second.dialect)};
319+
const sourcemeta::blaze::SchemaContext nested_schema_context{
320+
.relative_pointer = entry->second.relative_pointer,
321+
.schema = std::move(subschema),
322+
.vocabularies = std::move(nested_vocabularies),
323+
// TODO: I think this is hiding a framing bug that we should later
324+
// investigate
325+
.base = entry->second.base.starts_with('#') ? "" : entry->second.base,
326+
.labels = {},
327+
.is_property_name = schema_context.is_property_name};
328+
static_reference_template.push_back(
329+
make(sourcemeta::blaze::InstructionIndex::ControlMark, context,
330+
nested_schema_context, dynamic_context,
331+
sourcemeta::blaze::ValueUnsignedInteger{std::get<1>(reference)},
332+
sourcemeta::blaze::compile(
333+
context, nested_schema_context,
334+
sourcemeta::blaze::relative_dynamic_context(),
335+
sourcemeta::core::empty_pointer,
336+
sourcemeta::core::empty_pointer, entry->first.second)));
337+
}
338+
339+
for (auto &&substep : static_reference_template) {
340+
compiler_template.push_back(std::move(substep));
341+
}
342+
343+
///////////////////////////////////////////////////////////////////
344+
// (10) Compile the actual schema
345+
///////////////////////////////////////////////////////////////////
346+
260347
auto children{compile_subschema(context, schema_context, dynamic_context,
261348
root_frame_entry.dialect)};
262349

350+
///////////////////////////////////////////////////////////////////
351+
// (11) Return final template
352+
///////////////////////////////////////////////////////////////////
353+
263354
const bool track{
264355
context.mode != Mode::FastValidation ||
265356
requires_evaluation(context, schema_context) ||
@@ -353,7 +444,6 @@ auto compile(const Context &context, const SchemaContext &schema_context,
353444
.value_or(""),
354445
// TODO: This represents a copy
355446
.labels = schema_context.labels,
356-
.references = schema_context.references,
357447
.is_property_name = schema_context.is_property_name},
358448
{.keyword = dynamic_context.keyword,
359449
.base_schema_location = destination_pointer,

0 commit comments

Comments
 (0)