|
5 | 5 | #include <sourcemeta/blaze/evaluator.h> |
6 | 6 |
|
7 | 7 | #include <algorithm> // std::ranges::any_of, std::ranges::all_of, std::ranges::none_of, std::find_if |
8 | | -#include <cassert> // assert |
9 | | -#include <set> // std::set |
10 | | -#include <utility> // std::move, std::to_underlying |
| 8 | +#include <cassert> // assert |
| 9 | +#include <map> // std::map |
| 10 | +#include <optional> // std::optional, std::nullopt |
| 11 | +#include <set> // std::set |
| 12 | +#include <string_view> // std::string_view |
| 13 | +#include <unordered_set> // std::unordered_set |
| 14 | +#include <utility> // std::move, std::to_underlying |
| 15 | +#include <vector> // std::vector |
11 | 16 |
|
12 | 17 | #include "compile_helpers.h" |
13 | 18 | #include "default_compiler_draft3.h" |
@@ -73,6 +78,264 @@ auto compiler_draft4_applicator_allof(const Context &context, |
73 | 78 | } |
74 | 79 | } |
75 | 80 |
|
| 81 | +// The set of facts about a disjunct that hold no matter what, used to |
| 82 | +// statically prove that a disjunction is a discriminated union |
| 83 | +struct SwitchDisjunctFacts { |
| 84 | + bool object_only{false}; |
| 85 | + bool never_object{false}; |
| 86 | + std::set<sourcemeta::core::JSON::String> required; |
| 87 | + std::map<sourcemeta::core::JSON::String, |
| 88 | + std::vector<sourcemeta::core::JSON::String>> |
| 89 | + pinned; |
| 90 | +}; |
| 91 | + |
| 92 | +auto is_known_non_object_type_name(const sourcemeta::core::JSON &entry) |
| 93 | + -> bool { |
| 94 | + if (!entry.is_string()) { |
| 95 | + return false; |
| 96 | + } |
| 97 | + |
| 98 | + const auto &name{entry.to_string()}; |
| 99 | + return name == "null" || name == "boolean" || name == "array" || |
| 100 | + name == "string" || name == "number" || name == "integer"; |
| 101 | +} |
| 102 | + |
| 103 | +// Gather facts that necessarily hold for any instance that validates |
| 104 | +// against the given subschema, following static references and `allOf` |
| 105 | +// conjunctions. Note that when a subschema declares `$ref`, we ignore its |
| 106 | +// siblings: older dialects consider them inert, and on newer dialects |
| 107 | +// doing so only loses information, never inventing facts |
| 108 | +auto collect_switch_disjunct_facts( |
| 109 | + const Context &context, const sourcemeta::core::JSON &subschema, |
| 110 | + const sourcemeta::blaze::SchemaFrame::Location &location, |
| 111 | + const std::string_view root_dialect, const Vocabularies &root_vocabularies, |
| 112 | + std::unordered_set<const sourcemeta::blaze::SchemaFrame::Location *> |
| 113 | + &visited, |
| 114 | + SwitchDisjunctFacts &facts) -> void { |
| 115 | + if (!subschema.is_object() || !visited.insert(&location).second) { |
| 116 | + return; |
| 117 | + } |
| 118 | + |
| 119 | + if (subschema.defines("$ref") && subschema.at("$ref").is_string()) { |
| 120 | + static const sourcemeta::core::JSON::String ref_keyword{"$ref"}; |
| 121 | + const auto reference{context.frame.reference( |
| 122 | + sourcemeta::blaze::SchemaReferenceType::Static, |
| 123 | + location.pointer.concat(make_weak_pointer(ref_keyword)))}; |
| 124 | + if (reference.has_value()) { |
| 125 | + const auto destination{ |
| 126 | + context.frame.traverse(reference->get().destination)}; |
| 127 | + if (destination.has_value()) { |
| 128 | + collect_switch_disjunct_facts( |
| 129 | + context, |
| 130 | + sourcemeta::core::get(context.root, destination->get().pointer), |
| 131 | + destination->get(), root_dialect, root_vocabularies, visited, |
| 132 | + facts); |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + return; |
| 137 | + } |
| 138 | + |
| 139 | + using Known = sourcemeta::blaze::Vocabularies::Known; |
| 140 | + const auto vocabularies{ |
| 141 | + location.dialect == root_dialect |
| 142 | + ? root_vocabularies |
| 143 | + : context.frame.vocabularies(location, context.resolver)}; |
| 144 | + const auto supports_validation{ |
| 145 | + vocabularies.contains(Known::JSON_Schema_Draft_4) || |
| 146 | + vocabularies.contains(Known::JSON_Schema_Draft_6) || |
| 147 | + vocabularies.contains(Known::JSON_Schema_Draft_7) || |
| 148 | + vocabularies.contains(Known::JSON_Schema_2019_09_Validation) || |
| 149 | + vocabularies.contains(Known::JSON_Schema_2020_12_Validation)}; |
| 150 | + const auto supports_applicator{ |
| 151 | + vocabularies.contains(Known::JSON_Schema_Draft_4) || |
| 152 | + vocabularies.contains(Known::JSON_Schema_Draft_6) || |
| 153 | + vocabularies.contains(Known::JSON_Schema_Draft_7) || |
| 154 | + vocabularies.contains(Known::JSON_Schema_2019_09_Applicator) || |
| 155 | + vocabularies.contains(Known::JSON_Schema_2020_12_Applicator)}; |
| 156 | + const auto supports_const{ |
| 157 | + vocabularies.contains(Known::JSON_Schema_Draft_6) || |
| 158 | + vocabularies.contains(Known::JSON_Schema_Draft_7) || |
| 159 | + vocabularies.contains(Known::JSON_Schema_2019_09_Validation) || |
| 160 | + vocabularies.contains(Known::JSON_Schema_2020_12_Validation)}; |
| 161 | + |
| 162 | + if (supports_validation) { |
| 163 | + if (subschema.defines("type")) { |
| 164 | + const auto &type{subschema.at("type")}; |
| 165 | + if ((type.is_string() && type.to_string() == "object") || |
| 166 | + (type.is_array() && !type.empty() && |
| 167 | + std::ranges::all_of(type.as_array(), [](const auto &entry) { |
| 168 | + return entry.is_string() && entry.to_string() == "object"; |
| 169 | + }))) { |
| 170 | + facts.object_only = true; |
| 171 | + } else if (is_known_non_object_type_name(type) || |
| 172 | + (type.is_array() && !type.empty() && |
| 173 | + std::ranges::all_of(type.as_array(), |
| 174 | + is_known_non_object_type_name))) { |
| 175 | + facts.never_object = true; |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + if (subschema.defines("enum") && subschema.at("enum").is_array() && |
| 180 | + std::ranges::none_of( |
| 181 | + subschema.at("enum").as_array(), |
| 182 | + [](const auto &option) { return option.is_object(); })) { |
| 183 | + facts.never_object = true; |
| 184 | + } |
| 185 | + |
| 186 | + if (supports_const && subschema.defines("const") && |
| 187 | + !subschema.at("const").is_object()) { |
| 188 | + facts.never_object = true; |
| 189 | + } |
| 190 | + |
| 191 | + if (subschema.defines("required") && subschema.at("required").is_array()) { |
| 192 | + for (const auto &entry : subschema.at("required").as_array()) { |
| 193 | + if (entry.is_string()) { |
| 194 | + facts.required.insert(entry.to_string()); |
| 195 | + } |
| 196 | + } |
| 197 | + } |
| 198 | + } |
| 199 | + |
| 200 | + if (supports_applicator && subschema.defines("properties") && |
| 201 | + subschema.at("properties").is_object()) { |
| 202 | + for (const auto &entry : subschema.at("properties").as_object()) { |
| 203 | + if (facts.pinned.contains(entry.first) || !entry.second.is_object()) { |
| 204 | + continue; |
| 205 | + } |
| 206 | + |
| 207 | + std::vector<sourcemeta::core::JSON::String> values; |
| 208 | + if (supports_const && entry.second.defines("const") && |
| 209 | + entry.second.at("const").is_string()) { |
| 210 | + values.push_back(entry.second.at("const").to_string()); |
| 211 | + } else if (supports_validation && entry.second.defines("enum") && |
| 212 | + entry.second.at("enum").is_array() && |
| 213 | + !entry.second.at("enum").empty() && |
| 214 | + std::ranges::all_of( |
| 215 | + entry.second.at("enum").as_array(), |
| 216 | + [](const auto &option) { return option.is_string(); })) { |
| 217 | + for (const auto &option : entry.second.at("enum").as_array()) { |
| 218 | + values.push_back(option.to_string()); |
| 219 | + } |
| 220 | + } else { |
| 221 | + continue; |
| 222 | + } |
| 223 | + |
| 224 | + facts.pinned.emplace(entry.first, std::move(values)); |
| 225 | + } |
| 226 | + } |
| 227 | + |
| 228 | + if (supports_applicator && subschema.defines("allOf") && |
| 229 | + subschema.at("allOf").is_array()) { |
| 230 | + static const sourcemeta::core::JSON::String allof_keyword{"allOf"}; |
| 231 | + for (std::size_t index = 0; index < subschema.at("allOf").size(); index++) { |
| 232 | + collect_switch_disjunct_facts( |
| 233 | + context, subschema.at("allOf").at(index), |
| 234 | + context.frame.traverse(location, |
| 235 | + make_weak_pointer(allof_keyword, index)), |
| 236 | + root_dialect, root_vocabularies, visited, facts); |
| 237 | + } |
| 238 | + } |
| 239 | +} |
| 240 | + |
| 241 | +// Attempt to compile a disjunction whose disjuncts all statically pin a |
| 242 | +// shared discriminator property to disjoint constant strings (on top of |
| 243 | +// requiring it on an object instance) into a single switch instruction |
| 244 | +// that picks the only disjunct that can possibly match in constant time. |
| 245 | +// Note that the given disjuncts are left untouched unless an instruction |
| 246 | +// could indeed be derived |
| 247 | +auto compile_switch_property_string(const Context &context, |
| 248 | + const SchemaContext &schema_context, |
| 249 | + const DynamicContext &dynamic_context, |
| 250 | + Instructions &disjunctors) |
| 251 | + -> std::optional<Instruction> { |
| 252 | + const auto &disjunction{schema_context.schema.at(dynamic_context.keyword)}; |
| 253 | + if (disjunction.size() < 2) { |
| 254 | + return std::nullopt; |
| 255 | + } |
| 256 | + |
| 257 | + const auto &keyword_entry{static_frame_entry(context, schema_context)}; |
| 258 | + std::vector<SwitchDisjunctFacts> facts_list; |
| 259 | + facts_list.reserve(disjunction.size()); |
| 260 | + // The index plus one of the only disjunct that can match non-object |
| 261 | + // instances, where zero means no such disjunct exists |
| 262 | + ValueUnsignedInteger otherwise{0}; |
| 263 | + std::vector<std::size_t> discriminated; |
| 264 | + discriminated.reserve(disjunction.size()); |
| 265 | + for (std::size_t index = 0; index < disjunction.size(); index++) { |
| 266 | + sourcemeta::core::WeakPointer disjunct_suffix; |
| 267 | + disjunct_suffix.push_back(index); |
| 268 | + SwitchDisjunctFacts facts; |
| 269 | + std::unordered_set<const sourcemeta::blaze::SchemaFrame::Location *> |
| 270 | + visited; |
| 271 | + collect_switch_disjunct_facts( |
| 272 | + context, disjunction.at(index), |
| 273 | + context.frame.traverse(keyword_entry, disjunct_suffix), |
| 274 | + keyword_entry.dialect, schema_context.vocabularies, visited, facts); |
| 275 | + if (facts.never_object) { |
| 276 | + if (otherwise > 0) { |
| 277 | + return std::nullopt; |
| 278 | + } |
| 279 | + |
| 280 | + otherwise = index + 1; |
| 281 | + } else if (facts.object_only && !facts.pinned.empty()) { |
| 282 | + discriminated.push_back(index); |
| 283 | + } else { |
| 284 | + return std::nullopt; |
| 285 | + } |
| 286 | + |
| 287 | + facts_list.push_back(std::move(facts)); |
| 288 | + } |
| 289 | + |
| 290 | + if (discriminated.size() < 2) { |
| 291 | + return std::nullopt; |
| 292 | + } |
| 293 | + |
| 294 | + for (const auto &candidate : facts_list[discriminated.front()].pinned) { |
| 295 | + const auto &name{candidate.first}; |
| 296 | + ValueNamedIndexes indexes; |
| 297 | + bool valid{true}; |
| 298 | + for (const auto index : discriminated) { |
| 299 | + const auto &facts{facts_list[index]}; |
| 300 | + if (!facts.required.contains(name)) { |
| 301 | + valid = false; |
| 302 | + break; |
| 303 | + } |
| 304 | + |
| 305 | + const auto match{facts.pinned.find(name)}; |
| 306 | + if (match == facts.pinned.cend()) { |
| 307 | + valid = false; |
| 308 | + break; |
| 309 | + } |
| 310 | + |
| 311 | + for (const auto &option : match->second) { |
| 312 | + const auto hash{indexes.hash(option)}; |
| 313 | + if (indexes.defines(option, hash)) { |
| 314 | + valid = false; |
| 315 | + break; |
| 316 | + } |
| 317 | + |
| 318 | + indexes.emplace_assume_new(option, ValueUnsignedInteger{index}, hash); |
| 319 | + } |
| 320 | + |
| 321 | + if (!valid) { |
| 322 | + break; |
| 323 | + } |
| 324 | + } |
| 325 | + |
| 326 | + if (valid) { |
| 327 | + return make( |
| 328 | + sourcemeta::blaze::InstructionIndex::LogicalSwitchPropertyString, |
| 329 | + context, schema_context, dynamic_context, |
| 330 | + ValuePropertySwitch{make_property(name), std::move(indexes), |
| 331 | + otherwise}, |
| 332 | + std::move(disjunctors)); |
| 333 | + } |
| 334 | + } |
| 335 | + |
| 336 | + return std::nullopt; |
| 337 | +} |
| 338 | + |
76 | 339 | auto compiler_draft4_applicator_anyof(const Context &context, |
77 | 340 | const SchemaContext &schema_context, |
78 | 341 | const DynamicContext &dynamic_context, |
@@ -143,6 +406,15 @@ auto compiler_draft4_applicator_anyof(const Context &context, |
143 | 406 | const auto requires_exhaustive{context.mode == Mode::Exhaustive || |
144 | 407 | requires_evaluation(context, schema_context)}; |
145 | 408 |
|
| 409 | + if (context.mode == Mode::FastValidation && !requires_exhaustive && |
| 410 | + !schema_context.is_property_name) { |
| 411 | + auto switched{compile_switch_property_string(context, schema_context, |
| 412 | + dynamic_context, disjunctors)}; |
| 413 | + if (switched.has_value()) { |
| 414 | + return {std::move(switched).value()}; |
| 415 | + } |
| 416 | + } |
| 417 | + |
146 | 418 | return {make(sourcemeta::blaze::InstructionIndex::LogicalOr, context, |
147 | 419 | schema_context, dynamic_context, |
148 | 420 | ValueBoolean{requires_exhaustive}, std::move(disjunctors))}; |
@@ -173,6 +445,18 @@ auto compiler_draft4_applicator_oneof(const Context &context, |
173 | 445 | const auto requires_exhaustive{context.mode == Mode::Exhaustive || |
174 | 446 | requires_evaluation(context, schema_context)}; |
175 | 447 |
|
| 448 | + // As the discriminator property values across disjuncts are proven to be |
| 449 | + // disjoint, at most one disjunct can ever match, so the exclusivity that |
| 450 | + // `oneOf` demands holds statically and the switch is also valid here |
| 451 | + if (context.mode == Mode::FastValidation && !requires_exhaustive && |
| 452 | + !schema_context.is_property_name) { |
| 453 | + auto switched{compile_switch_property_string(context, schema_context, |
| 454 | + dynamic_context, disjunctors)}; |
| 455 | + if (switched.has_value()) { |
| 456 | + return {std::move(switched).value()}; |
| 457 | + } |
| 458 | + } |
| 459 | + |
176 | 460 | return {make(sourcemeta::blaze::InstructionIndex::LogicalXor, context, |
177 | 461 | schema_context, dynamic_context, |
178 | 462 | ValueBoolean{requires_exhaustive}, std::move(disjunctors))}; |
|
0 commit comments