Skip to content

Commit 17342fa

Browse files
authored
Fix other allOf unnecessary type union canonical explosions (#748)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent ef705a0 commit 17342fa

12 files changed

Lines changed: 3425 additions & 4443 deletions

src/alterschema/alterschema.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ auto WALK_UP_IN_PLACE_APPLICATORS(const JSON &root, const SchemaFrame &frame,
134134
#include "canonicalizer/if_then_else_implicit.h"
135135
#include "canonicalizer/implicit_contains_keywords.h"
136136
#include "canonicalizer/implicit_object_keywords.h"
137+
#include "canonicalizer/inline_single_use_ref.h"
137138
#include "canonicalizer/items_implicit.h"
138139
#include "canonicalizer/max_contains_covered_by_max_items.h"
139140
#include "canonicalizer/max_decimal_implicit.h"
@@ -286,6 +287,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void {
286287
}
287288

288289
if (mode == AlterSchemaMode::Canonicalizer) {
290+
bundle.add<InlineSingleUseRef>();
289291
bundle.add<AllOfMergeCompatibleBranches>();
290292
bundle.add<TypeInheritInPlace>();
291293
bundle.add<TypeUnionImplicit>();
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
class InlineSingleUseRef final : public SchemaTransformRule {
2+
public:
3+
using mutates = std::true_type;
4+
using reframe_after_transform = std::true_type;
5+
InlineSingleUseRef() : SchemaTransformRule{"inline_single_use_ref", ""} {};
6+
7+
[[nodiscard]] auto
8+
condition(const sourcemeta::core::JSON &schema,
9+
const sourcemeta::core::JSON &root,
10+
const sourcemeta::core::Vocabularies &vocabularies,
11+
const sourcemeta::core::SchemaFrame &frame,
12+
const sourcemeta::core::SchemaFrame::Location &location,
13+
const sourcemeta::core::SchemaWalker &,
14+
const sourcemeta::core::SchemaResolver &) const
15+
-> SchemaTransformRule::Result override {
16+
ONLY_CONTINUE_IF(schema.is_object() && schema.defines("$ref") &&
17+
schema.at("$ref").is_string() && schema.size() == 1);
18+
19+
if (!location.parent.has_value()) {
20+
return false;
21+
}
22+
{
23+
const auto &parent_pointer{location.parent.value()};
24+
const auto relative{location.pointer.resolve_from(parent_pointer)};
25+
ONLY_CONTINUE_IF(!relative.empty() && relative.at(0).is_property() &&
26+
relative.at(0).to_property() == "allOf" &&
27+
relative.size() >= 2 && relative.at(1).is_index());
28+
const auto &parent_schema{sourcemeta::core::get(root, parent_pointer)};
29+
ONLY_CONTINUE_IF(parent_schema.is_object() &&
30+
parent_schema.defines("allOf") &&
31+
parent_schema.at("allOf").is_array());
32+
const auto current_index{relative.at(1).to_index()};
33+
bool has_typed_sibling{false};
34+
for (std::size_t index = 0; index < parent_schema.at("allOf").size();
35+
++index) {
36+
if (index == current_index) {
37+
continue;
38+
}
39+
const auto &sibling{parent_schema.at("allOf").at(index)};
40+
if (sibling.is_object() &&
41+
(sibling.defines("type") || sibling.defines("enum"))) {
42+
has_typed_sibling = true;
43+
break;
44+
}
45+
}
46+
ONLY_CONTINUE_IF(has_typed_sibling);
47+
}
48+
49+
ONLY_CONTINUE_IF(vocabularies.contains_any(
50+
{Vocabularies::Known::JSON_Schema_2020_12_Core,
51+
Vocabularies::Known::JSON_Schema_2019_09_Core,
52+
Vocabularies::Known::JSON_Schema_Draft_7,
53+
Vocabularies::Known::JSON_Schema_Draft_6,
54+
Vocabularies::Known::JSON_Schema_Draft_4}));
55+
56+
const auto target{frame.traverse(schema.at("$ref").to_string())};
57+
ONLY_CONTINUE_IF(target.has_value());
58+
const auto &target_pointer{target->get().pointer};
59+
60+
if (target_pointer.size() < 2 || !target_pointer.at(0).is_property()) {
61+
return false;
62+
}
63+
const auto &container{target_pointer.at(0).to_property()};
64+
ONLY_CONTINUE_IF(container == "definitions" || container == "$defs");
65+
66+
std::size_t ref_count{0};
67+
for (const auto &reference : frame.references()) {
68+
const auto dest{frame.traverse(reference.second.destination)};
69+
if (!dest.has_value()) {
70+
continue;
71+
}
72+
if (dest->get().pointer.starts_with(target_pointer) ||
73+
target_pointer.starts_with(dest->get().pointer)) {
74+
++ref_count;
75+
}
76+
}
77+
78+
ONLY_CONTINUE_IF(ref_count == 1);
79+
80+
const auto &target_schema{sourcemeta::core::get(root, target_pointer)};
81+
ONLY_CONTINUE_IF(!target_schema.is_boolean());
82+
ONLY_CONTINUE_IF(target_schema.is_object() &&
83+
!target_schema.defines("type") &&
84+
!target_schema.defines("enum"));
85+
ONLY_CONTINUE_IF((!target_schema.defines("$id") &&
86+
!target_schema.defines("id") &&
87+
!target_schema.defines("$anchor") &&
88+
!target_schema.defines("$dynamicAnchor") &&
89+
!target_schema.defines("$recursiveAnchor")));
90+
91+
this->target_pointer_ = sourcemeta::core::to_pointer(target_pointer);
92+
this->target_copy_ = target_schema;
93+
return true;
94+
}
95+
96+
auto transform(JSON &schema, const Result &) const -> void override {
97+
schema.into(std::move(this->target_copy_));
98+
}
99+
100+
[[nodiscard]] auto rereference(const std::string_view, const Pointer &,
101+
const Pointer &target,
102+
const Pointer &current) const
103+
-> Pointer override {
104+
if (target.starts_with(this->target_pointer_)) {
105+
const auto relative{target.resolve_from(this->target_pointer_)};
106+
return current.concat(relative);
107+
}
108+
return target;
109+
}
110+
111+
private:
112+
mutable Pointer target_pointer_;
113+
mutable sourcemeta::core::JSON target_copy_{sourcemeta::core::JSON{nullptr}};
114+
};

src/alterschema/canonicalizer/type_inherit_in_place.h

Lines changed: 137 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,150 @@ class TypeInheritInPlace final : public SchemaTransformRule {
5959
return ancestor_schema.defines("type");
6060
})};
6161

62-
ONLY_CONTINUE_IF(ancestor.has_value());
63-
this->inherited_type_ = get(root, ancestor.value().get()).at("type");
64-
return true;
62+
if (ancestor.has_value()) {
63+
this->inherited_type_ = get(root, ancestor.value().get()).at("type");
64+
return true;
65+
}
66+
67+
auto walk_pointer{location.pointer};
68+
auto walk_parent{location.parent};
69+
while (walk_parent.has_value()) {
70+
const auto &wp{walk_parent.value()};
71+
const auto walk_relative{walk_pointer.resolve_from(wp)};
72+
if (walk_relative.empty() || !walk_relative.at(0).is_property()) {
73+
break;
74+
}
75+
const auto walk_entry{frame.traverse(frame.uri(wp).value().get())};
76+
if (!walk_entry.has_value()) {
77+
break;
78+
}
79+
const auto walk_vocabularies{
80+
frame.vocabularies(walk_entry.value().get(), resolver)};
81+
const auto walk_keyword_type{
82+
walker(walk_relative.at(0).to_property(), walk_vocabularies).type};
83+
84+
if (!IS_IN_PLACE_APPLICATOR(walk_keyword_type)) {
85+
break;
86+
}
87+
88+
if (walk_keyword_type == SchemaKeywordType::ApplicatorElementsInPlace &&
89+
walk_relative.size() >= 2 && walk_relative.at(1).is_index()) {
90+
const auto branch_index{walk_relative.at(1).to_index()};
91+
const auto &allof_parent{get(root, wp)};
92+
const auto &keyword_name{walk_relative.at(0).to_property()};
93+
if (allof_parent.is_object() && allof_parent.defines(keyword_name)) {
94+
const auto &branches{allof_parent.at(keyword_name)};
95+
if (branches.is_array()) {
96+
for (std::size_t index = 0; index < branches.size(); ++index) {
97+
if (index == branch_index) {
98+
continue;
99+
}
100+
const auto &sibling{branches.at(index)};
101+
if (sibling.is_object() && sibling.defines("type") &&
102+
sibling.at("type").is_string()) {
103+
this->inherited_type_ = sibling.at("type");
104+
return true;
105+
}
106+
if (sibling.is_object() && sibling.defines("enum") &&
107+
sibling.at("enum").is_array() &&
108+
!sibling.at("enum").empty()) {
109+
const auto inferred{infer_type_from_enum(sibling.at("enum"))};
110+
if (!inferred.empty()) {
111+
this->inherited_type_ = JSON{inferred};
112+
return true;
113+
}
114+
}
115+
if (sibling.is_object() && sibling.defines("$ref") &&
116+
sibling.at("$ref").is_string()) {
117+
const auto ref_target{
118+
frame.traverse(sibling.at("$ref").to_string())};
119+
if (ref_target.has_value()) {
120+
const auto &ref_schema{
121+
get(root, ref_target.value().get().pointer)};
122+
if (ref_schema.is_object() && ref_schema.defines("type") &&
123+
ref_schema.at("type").is_string()) {
124+
this->inherited_type_ = ref_schema.at("type");
125+
return true;
126+
}
127+
}
128+
}
129+
}
130+
}
131+
}
132+
}
133+
134+
walk_pointer = wp;
135+
walk_parent = walk_entry.value().get().parent;
136+
}
137+
138+
return false;
65139
}
66140

67141
auto transform(JSON &schema, const Result &) const -> void override {
68142
schema.assign("type", this->inherited_type_);
69143
}
70144

71145
private:
146+
static auto infer_type_from_enum(const sourcemeta::core::JSON &enum_array)
147+
-> sourcemeta::core::JSON::String {
148+
using Type = sourcemeta::core::JSON::Type;
149+
bool all_null{true};
150+
bool all_boolean{true};
151+
bool all_integer{true};
152+
bool all_number{true};
153+
bool all_string{true};
154+
bool all_array{true};
155+
bool all_object{true};
156+
157+
for (const auto &value : enum_array.as_array()) {
158+
const auto value_type{value.type()};
159+
if (value_type != Type::Null) {
160+
all_null = false;
161+
}
162+
if (value_type != Type::Boolean) {
163+
all_boolean = false;
164+
}
165+
if (value_type != Type::Integer) {
166+
all_integer = false;
167+
}
168+
if (value_type != Type::Integer && value_type != Type::Real) {
169+
all_number = false;
170+
}
171+
if (value_type != Type::String) {
172+
all_string = false;
173+
}
174+
if (value_type != Type::Array) {
175+
all_array = false;
176+
}
177+
if (value_type != Type::Object) {
178+
all_object = false;
179+
}
180+
}
181+
182+
if (all_string) {
183+
return "string";
184+
}
185+
if (all_integer) {
186+
return "integer";
187+
}
188+
if (all_number) {
189+
return "number";
190+
}
191+
if (all_object) {
192+
return "object";
193+
}
194+
if (all_array) {
195+
return "array";
196+
}
197+
if (all_null) {
198+
return "null";
199+
}
200+
if (all_boolean) {
201+
return "boolean";
202+
}
203+
return "";
204+
}
205+
72206
mutable sourcemeta::core::JSON inherited_type_{
73207
sourcemeta::core::JSON{nullptr}};
74208
};

src/alterschema/canonicalizer/type_union_implicit.h

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ class TypeUnionImplicit final : public SchemaTransformRule {
99

1010
[[nodiscard]] auto
1111
condition(const sourcemeta::core::JSON &schema,
12-
const sourcemeta::core::JSON &,
12+
const sourcemeta::core::JSON &root,
1313
const sourcemeta::core::Vocabularies &vocabularies,
14-
const sourcemeta::core::SchemaFrame &,
15-
const sourcemeta::core::SchemaFrame::Location &,
14+
const sourcemeta::core::SchemaFrame &frame,
15+
const sourcemeta::core::SchemaFrame::Location &location,
1616
const sourcemeta::core::SchemaWalker &walker,
17-
const sourcemeta::core::SchemaResolver &) const
17+
const sourcemeta::core::SchemaResolver &resolver) const
1818
-> SchemaTransformRule::Result override {
1919
using namespace sourcemeta::core;
2020
ONLY_CONTINUE_IF(schema.is_object() && !schema.empty());
@@ -54,6 +54,9 @@ class TypeUnionImplicit final : public SchemaTransformRule {
5454
!IS_IN_PLACE_APPLICATOR(keyword_type));
5555
}
5656

57+
ONLY_CONTINUE_IF(!this->allof_sibling_constrains_type(root, frame, location,
58+
walker, resolver));
59+
5760
return true;
5861
}
5962

@@ -71,4 +74,68 @@ class TypeUnionImplicit final : public SchemaTransformRule {
7174

7275
schema.assign("type", std::move(types));
7376
}
77+
78+
private:
79+
static auto allof_sibling_constrains_type(
80+
const sourcemeta::core::JSON &root,
81+
const sourcemeta::core::SchemaFrame &frame,
82+
const sourcemeta::core::SchemaFrame::Location &location,
83+
const sourcemeta::core::SchemaWalker &walker,
84+
const sourcemeta::core::SchemaResolver &resolver) -> bool {
85+
using namespace sourcemeta::core;
86+
auto walk_pointer{location.pointer};
87+
auto walk_parent{location.parent};
88+
while (walk_parent.has_value()) {
89+
const auto &wp{walk_parent.value()};
90+
const auto walk_relative{walk_pointer.resolve_from(wp)};
91+
if (walk_relative.empty() || !walk_relative.at(0).is_property()) {
92+
break;
93+
}
94+
const auto walk_entry{frame.traverse(frame.uri(wp).value().get())};
95+
if (!walk_entry.has_value()) {
96+
break;
97+
}
98+
const auto walk_vocabularies{
99+
frame.vocabularies(walk_entry.value().get(), resolver)};
100+
const auto walk_keyword_type{
101+
walker(walk_relative.at(0).to_property(), walk_vocabularies).type};
102+
103+
if (!IS_IN_PLACE_APPLICATOR(walk_keyword_type)) {
104+
break;
105+
}
106+
107+
if (walk_keyword_type == SchemaKeywordType::ApplicatorElementsInPlace &&
108+
walk_relative.size() >= 2 && walk_relative.at(1).is_index()) {
109+
const auto branch_index{walk_relative.at(1).to_index()};
110+
const auto &allof_parent{get(root, wp)};
111+
const auto &keyword_name{walk_relative.at(0).to_property()};
112+
if (allof_parent.is_object() && allof_parent.defines(keyword_name) &&
113+
allof_parent.at(keyword_name).is_array()) {
114+
const auto &branches{allof_parent.at(keyword_name)};
115+
for (std::size_t index = 0; index < branches.size(); ++index) {
116+
if (index == branch_index) {
117+
continue;
118+
}
119+
const auto &sibling{branches.at(index)};
120+
if (!sibling.is_object()) {
121+
continue;
122+
}
123+
124+
if (sibling.defines("type")) {
125+
return true;
126+
}
127+
128+
if (sibling.defines("enum") && sibling.at("enum").is_array() &&
129+
!sibling.at("enum").empty()) {
130+
return true;
131+
}
132+
}
133+
}
134+
}
135+
136+
walk_pointer = wp;
137+
walk_parent = walk_entry.value().get().parent;
138+
}
139+
return false;
140+
}
74141
};

test/alterschema/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,5 @@ target_link_libraries(sourcemeta_blaze_alterschema_unit
3838
PRIVATE sourcemeta::blaze::evaluator)
3939
target_compile_definitions(sourcemeta_blaze_alterschema_unit
4040
PRIVATE SCHEMAS_PATH="${PROJECT_SOURCE_DIR}/schemas")
41+
42+
set_tests_properties(blaze.alterschema PROPERTIES TIMEOUT 60)

0 commit comments

Comments
 (0)