66#include < algorithm> // std::move, std::sort, std::unique
77#include < cassert> // assert
88#include < iterator> // std::back_inserter
9- #include < utility> // std::move
9+ #include < tuple> // std::tuple, std::get
10+ #include < utility> // std::move, std::pair
1011
1112#include " compile_helpers.h"
1213
@@ -48,7 +49,6 @@ auto compile_subschema(const sourcemeta::blaze::Context &context,
4849 .base = schema_context.base ,
4950 // TODO: This represents a copy
5051 .labels = schema_context.labels ,
51- .references = schema_context.references ,
5252 .is_property_name = schema_context.is_property_name },
5353 {.keyword = keyword,
5454 .base_schema_location = dynamic_context.base_schema_location ,
@@ -91,7 +91,6 @@ auto precompile(
9191 .vocabularies = std::move (nested_vocabularies),
9292 .base = entry.second .base ,
9393 .labels = {},
94- .references = {},
9594 .is_property_name = schema_context.is_property_name };
9695
9796 return {make (sourcemeta::blaze::InstructionIndex::ControlMark, context,
@@ -117,37 +116,24 @@ auto compile(const sourcemeta::core::JSON &schema,
117116 const std::optional<std::string> &default_id) -> Template {
118117 assert (is_schema (schema));
119118
119+ // /////////////////////////////////////////////////////////////////
120+ // (1) Determine the root frame entry
121+ // /////////////////////////////////////////////////////////////////
122+
120123 const std::string base{sourcemeta::core::URI::canonicalize (
121124 sourcemeta::core::identify (
122125 schema, resolver,
123126 sourcemeta::core::SchemaIdentificationStrategy::Strict,
124127 default_dialect, default_id)
125128 .value_or (" " ))};
126-
127129 assert (frame.locations ().contains (
128130 {sourcemeta::core::SchemaReferenceType::Static, base}));
129131 const auto root_frame_entry{frame.locations ().at (
130132 {sourcemeta::core::SchemaReferenceType::Static, base})};
131133
132- // Check whether dynamic referencing takes places in this schema. If not,
133- // we can avoid the overhead of keeping track of dynamics scopes, etc
134- bool uses_dynamic_scopes{false };
135- for (const auto &reference : frame.references ()) {
136- if (reference.first .first ==
137- sourcemeta::core::SchemaReferenceType::Dynamic) {
138- uses_dynamic_scopes = true ;
139- break ;
140- }
141- }
142-
143- SchemaContext schema_context{
144- .relative_pointer = sourcemeta::core::empty_pointer,
145- .schema = schema,
146- .vocabularies = vocabularies (schema, resolver, root_frame_entry.dialect ),
147- .base = sourcemeta::core::URI::canonicalize (root_frame_entry.base ),
148- .labels = {},
149- .references = {},
150- .is_property_name = false };
134+ // /////////////////////////////////////////////////////////////////
135+ // (2) Determine all the schema resources in the schema
136+ // /////////////////////////////////////////////////////////////////
151137
152138 std::vector<std::string> resources;
153139 for (const auto &entry : frame.locations ()) {
@@ -165,50 +151,108 @@ auto compile(const sourcemeta::core::JSON &schema,
165151 assert (resources.size () ==
166152 std::set<std::string>(resources.cbegin (), resources.cend ()).size ());
167153
168- // Calculate the top static reference destinations for precompilation purposes
169- // TODO: Replace this logic with `.frame()` `destination_of` information
170- std::set<std::string> precompiled_static_schemas;
171- // As a workaround, we avoid pre-compiling schemas on schemas
172- // that look like they are just wrapping other schemas
173- if (schema.is_object () && !schema.defines (" $ref" )) {
174- std::map<std::string, std::size_t > static_references_count;
154+ // /////////////////////////////////////////////////////////////////
155+ // (3) Check if the schema relies on dynamic scopes
156+ // /////////////////////////////////////////////////////////////////
157+
158+ bool uses_dynamic_scopes{false };
159+ for (const auto &reference : frame.references ()) {
160+ // Check whether dynamic referencing takes places in this schema. If not,
161+ // we can avoid the overhead of keeping track of dynamics scopes, etc
162+ if (reference.first .first ==
163+ sourcemeta::core::SchemaReferenceType::Dynamic) {
164+ uses_dynamic_scopes = true ;
165+ break ;
166+ }
167+ }
168+
169+ // /////////////////////////////////////////////////////////////////
170+ // (4) Plan which static references we will precompile
171+ // /////////////////////////////////////////////////////////////////
172+
173+ // Use string views to avoid copying the actual strings, as we know
174+ // that the frame survives the entire compilation process
175+ std::vector<std::tuple<std::string_view, std::size_t , std::size_t >>
176+ sorted_references;
177+
178+ constexpr auto PRECOMPILED_SCHEMAS_MAXIMUM{10 };
179+ constexpr auto PRECOMPILED_SCHEMAS_MINIMUM_COUNT{10 };
180+
181+ {
182+ std::unordered_map<std::string_view, std::pair<std::size_t , std::size_t >>
183+ static_reference_destinations;
175184 for (const auto &reference : frame.references ()) {
176- if (reference.first .first ! =
177- sourcemeta::core::SchemaReferenceType::Static ||
178- ! frame.locations ().contains (
185+ if (reference.first .first = =
186+ sourcemeta::core::SchemaReferenceType::Static &&
187+ frame.locations ().contains (
179188 {sourcemeta::core::SchemaReferenceType::Static,
180189 reference.second .destination })) {
181- continue ;
190+ // TODO: Maybe try circular references or non-circular with >100 inbound
191+ // locations or something like that?
192+ std::unordered_set<std::string> visited;
193+ if (!is_circular (frame, reference.first .second , reference.second ,
194+ visited)) {
195+ continue ;
196+ }
197+
198+ const auto label{Evaluator{}.hash (
199+ schema_resource_id (resources, reference.second .base .value_or (" " )),
200+ reference.second .fragment .value_or (" " ))};
201+ auto [iterator, inserted] = static_reference_destinations.try_emplace (
202+ reference.second .destination , std::make_pair (label, 0 ));
203+ iterator->second .second ++;
182204 }
205+ }
183206
184- const auto &entry{
185- frame.locations ().at ({sourcemeta::core::SchemaReferenceType::Static,
186- reference.second .destination })};
187- for (const auto &subreference : frame.references ()) {
188- if (subreference.first .second .starts_with (entry.pointer )) {
189- static_references_count[reference.second .destination ] += 1 ;
190- }
207+ sorted_references.reserve (static_reference_destinations.size ());
208+ for (const auto &reference : static_reference_destinations) {
209+ if (reference.second .second >= PRECOMPILED_SCHEMAS_MINIMUM_COUNT) {
210+ sorted_references.emplace_back (reference.first , reference.second .first ,
211+ reference.second .second );
191212 }
192213 }
193- std::vector<std::pair<std::string, std::size_t >> top_static_destinations (
194- static_references_count.cbegin (), static_references_count.cend ());
195- std::ranges::sort (top_static_destinations,
214+ std::ranges::sort (sorted_references,
196215 [](const auto &left, const auto &right) {
197- return left. second > right. second ;
216+ return std::get< 2 >( left) > std::get< 2 >( right) ;
198217 });
199- constexpr auto MAXIMUM_NUMBER_OF_SCHEMAS_TO_PRECOMPILE{5 };
200- for (auto iterator = top_static_destinations.cbegin ();
201- iterator != top_static_destinations.cend () &&
202- iterator != top_static_destinations.cbegin () +
203- MAXIMUM_NUMBER_OF_SCHEMAS_TO_PRECOMPILE;
204- ++iterator) {
205- // Only consider highly referenced schemas
206- if (iterator->second > 100 ) {
207- precompiled_static_schemas.insert (iterator->first );
208- }
218+
219+ if (sorted_references.size () > PRECOMPILED_SCHEMAS_MAXIMUM) {
220+ sorted_references.erase (sorted_references.begin () +
221+ PRECOMPILED_SCHEMAS_MAXIMUM,
222+ sorted_references.end ());
209223 }
224+
225+ // We do not apply this pre-compilation optimisation on meta-schemas
226+ if (sourcemeta::core::schema_official_resolver (base).has_value () ||
227+ (uses_dynamic_scopes && schema.is_object () &&
228+ schema.defines (" $vocabulary" ))) {
229+ sorted_references.clear ();
230+ }
231+ }
232+
233+ assert (sorted_references.size () <= PRECOMPILED_SCHEMAS_MAXIMUM);
234+ std::unordered_set<std::size_t > precompiled_labels;
235+ for (const auto &reference : sorted_references) {
236+ assert (std::get<2 >(reference) >= PRECOMPILED_SCHEMAS_MINIMUM_COUNT);
237+ precompiled_labels.emplace (std::get<1 >(reference));
210238 }
211239
240+ // /////////////////////////////////////////////////////////////////
241+ // (5) Build the starting schema context
242+ // /////////////////////////////////////////////////////////////////
243+
244+ SchemaContext schema_context{
245+ .relative_pointer = sourcemeta::core::empty_pointer,
246+ .schema = schema,
247+ .vocabularies = vocabularies (schema, resolver, root_frame_entry.dialect ),
248+ .base = sourcemeta::core::URI::canonicalize (root_frame_entry.base ),
249+ .labels = {},
250+ .is_property_name = false };
251+
252+ // /////////////////////////////////////////////////////////////////
253+ // (6) Build the gloal compilation context
254+ // /////////////////////////////////////////////////////////////////
255+
212256 auto unevaluated{
213257 sourcemeta::blaze::unevaluated (schema, frame, walker, resolver)};
214258
@@ -221,22 +265,19 @@ auto compile(const sourcemeta::core::JSON &schema,
221265 .mode = mode,
222266 .uses_dynamic_scopes = uses_dynamic_scopes,
223267 .unevaluated = std::move (unevaluated),
224- .precompiled_static_schemas =
225- std::move (precompiled_static_schemas)};
268+ .precompiled_labels = std::move (precompiled_labels)};
269+
270+ // /////////////////////////////////////////////////////////////////
271+ // (7) Build the initial dynamic context
272+ // /////////////////////////////////////////////////////////////////
273+
226274 const DynamicContext dynamic_context{relative_dynamic_context ()};
227- Instructions compiler_template;
228275
229- for (const auto &destination : context.precompiled_static_schemas ) {
230- assert (context.frame .locations ().contains (
231- {sourcemeta::core::SchemaReferenceType::Static, destination}));
232- const auto match{context.frame .locations ().find (
233- {sourcemeta::core::SchemaReferenceType::Static, destination})};
234- for (auto &&substep :
235- precompile (context, schema_context, dynamic_context, *match)) {
236- compiler_template.push_back (std::move (substep));
237- }
238- }
276+ // /////////////////////////////////////////////////////////////////
277+ // (8) Pre compile dynamic reference locations
278+ // /////////////////////////////////////////////////////////////////
239279
280+ Instructions compiler_template;
240281 if (uses_dynamic_scopes &&
241282 (schema_context.vocabularies .contains (
242283 " https://json-schema.org/draft/2019-09/vocab/core" ) ||
@@ -257,9 +298,59 @@ auto compile(const sourcemeta::core::JSON &schema,
257298 }
258299 }
259300
301+ // /////////////////////////////////////////////////////////////////
302+ // (9) Pre compile static reference locations
303+ // /////////////////////////////////////////////////////////////////
304+
305+ // Attempt to precompile static destinations to avoid explosive compilation
306+ Instructions static_reference_template;
307+ for (const auto &reference : sorted_references) {
308+ const auto entry{context.frame .locations ().find (
309+ {sourcemeta::core::SchemaReferenceType::Static,
310+ std::string{std::get<0 >(reference)}})};
311+ assert (entry != context.frame .locations ().cend ());
312+ auto subschema{sourcemeta::core::get (context.root , entry->second .pointer )};
313+ if (!sourcemeta::core::is_schema (subschema)) {
314+ continue ;
315+ }
316+
317+ auto nested_vocabularies{sourcemeta::core::vocabularies (
318+ subschema, context.resolver , entry->second .dialect )};
319+ const sourcemeta::blaze::SchemaContext nested_schema_context{
320+ .relative_pointer = entry->second .relative_pointer ,
321+ .schema = std::move (subschema),
322+ .vocabularies = std::move (nested_vocabularies),
323+ // TODO: I think this is hiding a framing bug that we should later
324+ // investigate
325+ .base = entry->second .base .starts_with (' #' ) ? " " : entry->second .base ,
326+ .labels = {},
327+ .is_property_name = schema_context.is_property_name };
328+ static_reference_template.push_back (
329+ make (sourcemeta::blaze::InstructionIndex::ControlMark, context,
330+ nested_schema_context, dynamic_context,
331+ sourcemeta::blaze::ValueUnsignedInteger{std::get<1 >(reference)},
332+ sourcemeta::blaze::compile (
333+ context, nested_schema_context,
334+ sourcemeta::blaze::relative_dynamic_context (),
335+ sourcemeta::core::empty_pointer,
336+ sourcemeta::core::empty_pointer, entry->first .second )));
337+ }
338+
339+ for (auto &&substep : static_reference_template) {
340+ compiler_template.push_back (std::move (substep));
341+ }
342+
343+ // /////////////////////////////////////////////////////////////////
344+ // (10) Compile the actual schema
345+ // /////////////////////////////////////////////////////////////////
346+
260347 auto children{compile_subschema (context, schema_context, dynamic_context,
261348 root_frame_entry.dialect )};
262349
350+ // /////////////////////////////////////////////////////////////////
351+ // (11) Return final template
352+ // /////////////////////////////////////////////////////////////////
353+
263354 const bool track{
264355 context.mode != Mode::FastValidation ||
265356 requires_evaluation (context, schema_context) ||
@@ -353,7 +444,6 @@ auto compile(const Context &context, const SchemaContext &schema_context,
353444 .value_or (" " ),
354445 // TODO: This represents a copy
355446 .labels = schema_context.labels ,
356- .references = schema_context.references ,
357447 .is_property_name = schema_context.is_property_name },
358448 {.keyword = dynamic_context.keyword ,
359449 .base_schema_location = destination_pointer,
0 commit comments