Skip to content

Commit cd9f9ae

Browse files
committed
[WIP] Misc performance improvements
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 7b214cf commit cd9f9ae

16 files changed

Lines changed: 1296 additions & 44 deletions

src/compiler/default_compiler_draft3.h

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88

99
#include <algorithm> // std::sort, std::ranges::any_of, std::ranges::all_of, std::find_if, std::ranges::none_of
1010
#include <cassert> // assert
11+
#include <map> // std::map
1112
#include <set> // std::set
12-
#include <utility> // std::move, std::to_underlying
13+
#include <string> // std::string
14+
#include <unordered_map> // std::unordered_map
15+
#include <utility> // std::move, std::to_underlying
1316

1417
#include "compile_helpers.h"
1518

@@ -104,10 +107,17 @@ compile_properties(const sourcemeta::blaze::Context &context,
104107
// we prefer to evaluate smaller subschemas first, in the hope of failing
105108
// earlier without spending a lot of time on other subschemas
106109
if (context.tweaks.properties_reorder) {
107-
std::ranges::sort(properties, [&context](const auto &left,
108-
const auto &right) {
109-
const auto left_size{recursive_template_size(left.second)};
110-
const auto right_size{recursive_template_size(right.second)};
110+
std::unordered_map<std::string, std::size_t> template_sizes;
111+
template_sizes.reserve(properties.size());
112+
for (const auto &property : properties) {
113+
template_sizes.emplace(property.first,
114+
recursive_template_size(property.second));
115+
}
116+
117+
std::ranges::sort(properties, [&context, &template_sizes](
118+
const auto &left, const auto &right) {
119+
const auto left_size{template_sizes.at(left.first)};
120+
const auto right_size{template_sizes.at(right.first)};
111121
if (left_size == right_size) {
112122
const auto left_direct_enumeration{
113123
defines_direct_enumeration(left.second)};
@@ -548,10 +558,11 @@ auto compiler_draft3_applicator_properties_with_options(
548558
ValueNamedIndexes indexes;
549559
Instructions children;
550560
std::size_t cursor = 0;
561+
std::map<sourcemeta::core::JSON::String, std::size_t> child_positions;
551562

552563
for (auto &&[name, substeps] : compile_properties(
553564
context, schema_context, relative_dynamic_context(), current)) {
554-
indexes.emplace(name, cursor);
565+
child_positions.emplace(name, cursor);
555566

556567
if (track_evaluation) {
557568
substeps.push_back(make(
@@ -574,6 +585,17 @@ auto compiler_draft3_applicator_properties_with_options(
574585
cursor += 1;
575586
}
576587

588+
// Lay the lookup table out in schema declaration order, as instances
589+
// commonly follow it, which makes scanning the table for each instance
590+
// property terminate sooner on average
591+
for (const auto &entry :
592+
schema_context.schema.at(dynamic_context.keyword).as_object()) {
593+
const auto match{child_positions.find(entry.first)};
594+
if (match != child_positions.cend()) {
595+
indexes.emplace(entry.first, match->second);
596+
}
597+
}
598+
577599
if (context.mode == Mode::FastValidation && !track_evaluation &&
578600
!schema_context.schema.defines("patternProperties") &&
579601
schema_context.schema.defines("additionalProperties") &&

src/compiler/default_compiler_draft4.h

Lines changed: 287 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,14 @@
55
#include <sourcemeta/blaze/evaluator.h>
66

77
#include <algorithm> // std::ranges::any_of, std::ranges::all_of, std::ranges::none_of, std::find_if
8-
#include <cassert> // assert
9-
#include <set> // std::set
10-
#include <utility> // std::move, std::to_underlying
8+
#include <cassert> // assert
9+
#include <map> // std::map
10+
#include <optional> // std::optional, std::nullopt
11+
#include <set> // std::set
12+
#include <string_view> // std::string_view
13+
#include <unordered_set> // std::unordered_set
14+
#include <utility> // std::move, std::to_underlying
15+
#include <vector> // std::vector
1116

1217
#include "compile_helpers.h"
1318
#include "default_compiler_draft3.h"
@@ -73,6 +78,264 @@ auto compiler_draft4_applicator_allof(const Context &context,
7378
}
7479
}
7580

81+
// The set of facts about a disjunct that hold no matter what, used to
82+
// statically prove that a disjunction is a discriminated union
83+
struct SwitchDisjunctFacts {
84+
bool object_only{false};
85+
bool never_object{false};
86+
std::set<sourcemeta::core::JSON::String> required;
87+
std::map<sourcemeta::core::JSON::String,
88+
std::vector<sourcemeta::core::JSON::String>>
89+
pinned;
90+
};
91+
92+
auto is_known_non_object_type_name(const sourcemeta::core::JSON &entry)
93+
-> bool {
94+
if (!entry.is_string()) {
95+
return false;
96+
}
97+
98+
const auto &name{entry.to_string()};
99+
return name == "null" || name == "boolean" || name == "array" ||
100+
name == "string" || name == "number" || name == "integer";
101+
}
102+
103+
// Gather facts that necessarily hold for any instance that validates
104+
// against the given subschema, following static references and `allOf`
105+
// conjunctions. Note that when a subschema declares `$ref`, we ignore its
106+
// siblings: older dialects consider them inert, and on newer dialects
107+
// doing so only loses information, never inventing facts
108+
auto collect_switch_disjunct_facts(
109+
const Context &context, const sourcemeta::core::JSON &subschema,
110+
const sourcemeta::blaze::SchemaFrame::Location &location,
111+
const std::string_view root_dialect, const Vocabularies &root_vocabularies,
112+
std::unordered_set<const sourcemeta::blaze::SchemaFrame::Location *>
113+
&visited,
114+
SwitchDisjunctFacts &facts) -> void {
115+
if (!subschema.is_object() || !visited.insert(&location).second) {
116+
return;
117+
}
118+
119+
if (subschema.defines("$ref") && subschema.at("$ref").is_string()) {
120+
static const sourcemeta::core::JSON::String ref_keyword{"$ref"};
121+
const auto reference{context.frame.reference(
122+
sourcemeta::blaze::SchemaReferenceType::Static,
123+
location.pointer.concat(make_weak_pointer(ref_keyword)))};
124+
if (reference.has_value()) {
125+
const auto destination{
126+
context.frame.traverse(reference->get().destination)};
127+
if (destination.has_value()) {
128+
collect_switch_disjunct_facts(
129+
context,
130+
sourcemeta::core::get(context.root, destination->get().pointer),
131+
destination->get(), root_dialect, root_vocabularies, visited,
132+
facts);
133+
}
134+
}
135+
136+
return;
137+
}
138+
139+
using Known = sourcemeta::blaze::Vocabularies::Known;
140+
const auto vocabularies{
141+
location.dialect == root_dialect
142+
? root_vocabularies
143+
: context.frame.vocabularies(location, context.resolver)};
144+
const auto supports_validation{
145+
vocabularies.contains(Known::JSON_Schema_Draft_4) ||
146+
vocabularies.contains(Known::JSON_Schema_Draft_6) ||
147+
vocabularies.contains(Known::JSON_Schema_Draft_7) ||
148+
vocabularies.contains(Known::JSON_Schema_2019_09_Validation) ||
149+
vocabularies.contains(Known::JSON_Schema_2020_12_Validation)};
150+
const auto supports_applicator{
151+
vocabularies.contains(Known::JSON_Schema_Draft_4) ||
152+
vocabularies.contains(Known::JSON_Schema_Draft_6) ||
153+
vocabularies.contains(Known::JSON_Schema_Draft_7) ||
154+
vocabularies.contains(Known::JSON_Schema_2019_09_Applicator) ||
155+
vocabularies.contains(Known::JSON_Schema_2020_12_Applicator)};
156+
const auto supports_const{
157+
vocabularies.contains(Known::JSON_Schema_Draft_6) ||
158+
vocabularies.contains(Known::JSON_Schema_Draft_7) ||
159+
vocabularies.contains(Known::JSON_Schema_2019_09_Validation) ||
160+
vocabularies.contains(Known::JSON_Schema_2020_12_Validation)};
161+
162+
if (supports_validation) {
163+
if (subschema.defines("type")) {
164+
const auto &type{subschema.at("type")};
165+
if ((type.is_string() && type.to_string() == "object") ||
166+
(type.is_array() && !type.empty() &&
167+
std::ranges::all_of(type.as_array(), [](const auto &entry) {
168+
return entry.is_string() && entry.to_string() == "object";
169+
}))) {
170+
facts.object_only = true;
171+
} else if (is_known_non_object_type_name(type) ||
172+
(type.is_array() && !type.empty() &&
173+
std::ranges::all_of(type.as_array(),
174+
is_known_non_object_type_name))) {
175+
facts.never_object = true;
176+
}
177+
}
178+
179+
if (subschema.defines("enum") && subschema.at("enum").is_array() &&
180+
std::ranges::none_of(
181+
subschema.at("enum").as_array(),
182+
[](const auto &option) { return option.is_object(); })) {
183+
facts.never_object = true;
184+
}
185+
186+
if (supports_const && subschema.defines("const") &&
187+
!subschema.at("const").is_object()) {
188+
facts.never_object = true;
189+
}
190+
191+
if (subschema.defines("required") && subschema.at("required").is_array()) {
192+
for (const auto &entry : subschema.at("required").as_array()) {
193+
if (entry.is_string()) {
194+
facts.required.insert(entry.to_string());
195+
}
196+
}
197+
}
198+
}
199+
200+
if (supports_applicator && subschema.defines("properties") &&
201+
subschema.at("properties").is_object()) {
202+
for (const auto &entry : subschema.at("properties").as_object()) {
203+
if (facts.pinned.contains(entry.first) || !entry.second.is_object()) {
204+
continue;
205+
}
206+
207+
std::vector<sourcemeta::core::JSON::String> values;
208+
if (supports_const && entry.second.defines("const") &&
209+
entry.second.at("const").is_string()) {
210+
values.push_back(entry.second.at("const").to_string());
211+
} else if (supports_validation && entry.second.defines("enum") &&
212+
entry.second.at("enum").is_array() &&
213+
!entry.second.at("enum").empty() &&
214+
std::ranges::all_of(
215+
entry.second.at("enum").as_array(),
216+
[](const auto &option) { return option.is_string(); })) {
217+
for (const auto &option : entry.second.at("enum").as_array()) {
218+
values.push_back(option.to_string());
219+
}
220+
} else {
221+
continue;
222+
}
223+
224+
facts.pinned.emplace(entry.first, std::move(values));
225+
}
226+
}
227+
228+
if (supports_applicator && subschema.defines("allOf") &&
229+
subschema.at("allOf").is_array()) {
230+
static const sourcemeta::core::JSON::String allof_keyword{"allOf"};
231+
for (std::size_t index = 0; index < subschema.at("allOf").size(); index++) {
232+
collect_switch_disjunct_facts(
233+
context, subschema.at("allOf").at(index),
234+
context.frame.traverse(location,
235+
make_weak_pointer(allof_keyword, index)),
236+
root_dialect, root_vocabularies, visited, facts);
237+
}
238+
}
239+
}
240+
241+
// Attempt to compile a disjunction whose disjuncts all statically pin a
242+
// shared discriminator property to disjoint constant strings (on top of
243+
// requiring it on an object instance) into a single switch instruction
244+
// that picks the only disjunct that can possibly match in constant time.
245+
// Note that the given disjuncts are left untouched unless an instruction
246+
// could indeed be derived
247+
auto compile_switch_property_string(const Context &context,
248+
const SchemaContext &schema_context,
249+
const DynamicContext &dynamic_context,
250+
Instructions &disjunctors)
251+
-> std::optional<Instruction> {
252+
const auto &disjunction{schema_context.schema.at(dynamic_context.keyword)};
253+
if (disjunction.size() < 2) {
254+
return std::nullopt;
255+
}
256+
257+
const auto &keyword_entry{static_frame_entry(context, schema_context)};
258+
std::vector<SwitchDisjunctFacts> facts_list;
259+
facts_list.reserve(disjunction.size());
260+
// The index plus one of the only disjunct that can match non-object
261+
// instances, where zero means no such disjunct exists
262+
ValueUnsignedInteger otherwise{0};
263+
std::vector<std::size_t> discriminated;
264+
discriminated.reserve(disjunction.size());
265+
for (std::size_t index = 0; index < disjunction.size(); index++) {
266+
sourcemeta::core::WeakPointer disjunct_suffix;
267+
disjunct_suffix.push_back(index);
268+
SwitchDisjunctFacts facts;
269+
std::unordered_set<const sourcemeta::blaze::SchemaFrame::Location *>
270+
visited;
271+
collect_switch_disjunct_facts(
272+
context, disjunction.at(index),
273+
context.frame.traverse(keyword_entry, disjunct_suffix),
274+
keyword_entry.dialect, schema_context.vocabularies, visited, facts);
275+
if (facts.never_object) {
276+
if (otherwise > 0) {
277+
return std::nullopt;
278+
}
279+
280+
otherwise = index + 1;
281+
} else if (facts.object_only && !facts.pinned.empty()) {
282+
discriminated.push_back(index);
283+
} else {
284+
return std::nullopt;
285+
}
286+
287+
facts_list.push_back(std::move(facts));
288+
}
289+
290+
if (discriminated.size() < 2) {
291+
return std::nullopt;
292+
}
293+
294+
for (const auto &candidate : facts_list[discriminated.front()].pinned) {
295+
const auto &name{candidate.first};
296+
ValueNamedIndexes indexes;
297+
bool valid{true};
298+
for (const auto index : discriminated) {
299+
const auto &facts{facts_list[index]};
300+
if (!facts.required.contains(name)) {
301+
valid = false;
302+
break;
303+
}
304+
305+
const auto match{facts.pinned.find(name)};
306+
if (match == facts.pinned.cend()) {
307+
valid = false;
308+
break;
309+
}
310+
311+
for (const auto &option : match->second) {
312+
const auto hash{indexes.hash(option)};
313+
if (indexes.defines(option, hash)) {
314+
valid = false;
315+
break;
316+
}
317+
318+
indexes.emplace_assume_new(option, ValueUnsignedInteger{index}, hash);
319+
}
320+
321+
if (!valid) {
322+
break;
323+
}
324+
}
325+
326+
if (valid) {
327+
return make(
328+
sourcemeta::blaze::InstructionIndex::LogicalSwitchPropertyString,
329+
context, schema_context, dynamic_context,
330+
ValuePropertySwitch{make_property(name), std::move(indexes),
331+
otherwise},
332+
std::move(disjunctors));
333+
}
334+
}
335+
336+
return std::nullopt;
337+
}
338+
76339
auto compiler_draft4_applicator_anyof(const Context &context,
77340
const SchemaContext &schema_context,
78341
const DynamicContext &dynamic_context,
@@ -143,6 +406,15 @@ auto compiler_draft4_applicator_anyof(const Context &context,
143406
const auto requires_exhaustive{context.mode == Mode::Exhaustive ||
144407
requires_evaluation(context, schema_context)};
145408

409+
if (context.mode == Mode::FastValidation && !requires_exhaustive &&
410+
!schema_context.is_property_name) {
411+
auto switched{compile_switch_property_string(context, schema_context,
412+
dynamic_context, disjunctors)};
413+
if (switched.has_value()) {
414+
return {std::move(switched).value()};
415+
}
416+
}
417+
146418
return {make(sourcemeta::blaze::InstructionIndex::LogicalOr, context,
147419
schema_context, dynamic_context,
148420
ValueBoolean{requires_exhaustive}, std::move(disjunctors))};
@@ -173,6 +445,18 @@ auto compiler_draft4_applicator_oneof(const Context &context,
173445
const auto requires_exhaustive{context.mode == Mode::Exhaustive ||
174446
requires_evaluation(context, schema_context)};
175447

448+
// As the discriminator property values across disjuncts are proven to be
449+
// disjoint, at most one disjunct can ever match, so the exclusivity that
450+
// `oneOf` demands holds statically and the switch is also valid here
451+
if (context.mode == Mode::FastValidation && !requires_exhaustive &&
452+
!schema_context.is_property_name) {
453+
auto switched{compile_switch_property_string(context, schema_context,
454+
dynamic_context, disjunctors)};
455+
if (switched.has_value()) {
456+
return {std::move(switched).value()};
457+
}
458+
}
459+
176460
return {make(sourcemeta::blaze::InstructionIndex::LogicalXor, context,
177461
schema_context, dynamic_context,
178462
ValueBoolean{requires_exhaustive}, std::move(disjunctors))};

0 commit comments

Comments
 (0)