Skip to content

Commit ef705a0

Browse files
authored
Try to better merge allOf during canonicalization (#745)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 0e61829 commit ef705a0

6 files changed

Lines changed: 751 additions & 36 deletions

src/alterschema/alterschema.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ auto WALK_UP_IN_PLACE_APPLICATORS(const JSON &root, const SchemaFrame &frame,
108108
}
109109

110110
#include "canonicalizer/additional_items_implicit.h"
111+
#include "canonicalizer/allof_merge_compatible_branches.h"
111112
#include "canonicalizer/comment_drop.h"
112113
#include "canonicalizer/const_as_enum.h"
113114
#include "canonicalizer/dependencies_to_any_of.h"
@@ -285,6 +286,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void {
285286
}
286287

287288
if (mode == AlterSchemaMode::Canonicalizer) {
289+
bundle.add<AllOfMergeCompatibleBranches>();
288290
bundle.add<TypeInheritInPlace>();
289291
bundle.add<TypeUnionImplicit>();
290292
bundle.add<TypeArrayToAnyOf>();
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
class AllOfMergeCompatibleBranches final : public SchemaTransformRule {
2+
public:
3+
using mutates = std::true_type;
4+
using reframe_after_transform = std::true_type;
5+
AllOfMergeCompatibleBranches()
6+
: SchemaTransformRule{"allof_merge_compatible_branches", ""} {};
7+
8+
[[nodiscard]] auto
9+
condition(const sourcemeta::core::JSON &schema,
10+
const sourcemeta::core::JSON &,
11+
const sourcemeta::core::Vocabularies &vocabularies,
12+
const sourcemeta::core::SchemaFrame &frame,
13+
const sourcemeta::core::SchemaFrame::Location &location,
14+
const sourcemeta::core::SchemaWalker &walker,
15+
const sourcemeta::core::SchemaResolver &) const
16+
-> SchemaTransformRule::Result override {
17+
static const JSON::String KEYWORD{"allOf"};
18+
ONLY_CONTINUE_IF(vocabularies.contains_any(
19+
{Vocabularies::Known::JSON_Schema_2020_12_Applicator,
20+
Vocabularies::Known::JSON_Schema_2019_09_Applicator,
21+
Vocabularies::Known::JSON_Schema_Draft_7,
22+
Vocabularies::Known::JSON_Schema_Draft_6,
23+
Vocabularies::Known::JSON_Schema_Draft_4}) &&
24+
schema.is_object() && schema.defines(KEYWORD) &&
25+
schema.at(KEYWORD).is_array() &&
26+
schema.at(KEYWORD).size() >= 2);
27+
28+
ONLY_CONTINUE_IF(!frame.has_references_through(
29+
location.pointer, WeakPointer::Token{std::cref(KEYWORD)}));
30+
31+
const auto &branches{schema.at(KEYWORD)};
32+
33+
for (std::size_t index_a = 0; index_a < branches.size(); ++index_a) {
34+
const auto &branch_a{branches.at(index_a)};
35+
if (!is_mergeable_branch(branch_a)) {
36+
continue;
37+
}
38+
39+
for (std::size_t index_b = index_a + 1; index_b < branches.size();
40+
++index_b) {
41+
const auto &branch_b{branches.at(index_b)};
42+
if (!is_mergeable_branch(branch_b)) {
43+
continue;
44+
}
45+
46+
const auto a_is_type_only{is_type_only_branch(branch_a)};
47+
const auto b_is_type_only{is_type_only_branch(branch_b)};
48+
if (!a_is_type_only && !b_is_type_only) {
49+
continue;
50+
}
51+
52+
const auto &non_type_branch{a_is_type_only ? branch_b : branch_a};
53+
if (has_in_place_applicators(non_type_branch)) {
54+
continue;
55+
}
56+
57+
if (has_overlapping_keywords(branch_a, branch_b)) {
58+
continue;
59+
}
60+
61+
if (has_cross_dependencies(branch_a, branch_b, walker, vocabularies)) {
62+
continue;
63+
}
64+
65+
this->merge_into_ = index_a;
66+
this->merge_from_ = index_b;
67+
return true;
68+
}
69+
}
70+
71+
return false;
72+
}
73+
74+
auto transform(JSON &schema, const Result &) const -> void override {
75+
static const JSON::String KEYWORD{"allOf"};
76+
auto &target{schema.at(KEYWORD).at(this->merge_into_)};
77+
const auto &source{schema.at(KEYWORD).at(this->merge_from_)};
78+
target.merge(source.as_object());
79+
schema.at(KEYWORD).erase(
80+
std::next(schema.at(KEYWORD).as_array().begin(),
81+
static_cast<std::ptrdiff_t>(this->merge_from_)));
82+
}
83+
84+
[[nodiscard]] auto rereference(const std::string_view, const Pointer &,
85+
const Pointer &target,
86+
const Pointer &current) const
87+
-> Pointer override {
88+
static const JSON::String KEYWORD{"allOf"};
89+
const auto relative{target.resolve_from(current)};
90+
if (relative.size() < 2 || !relative.at(0).is_property() ||
91+
relative.at(0).to_property() != KEYWORD || !relative.at(1).is_index()) {
92+
return target;
93+
}
94+
95+
const auto index{relative.at(1).to_index()};
96+
if (index == this->merge_from_) {
97+
const Pointer old_prefix{current.concat({KEYWORD, this->merge_from_})};
98+
const Pointer new_prefix{current.concat({KEYWORD, this->merge_into_})};
99+
return target.rebase(old_prefix, new_prefix);
100+
}
101+
102+
if (index > this->merge_from_) {
103+
const Pointer old_prefix{current.concat({KEYWORD, index})};
104+
const Pointer new_prefix{current.concat({KEYWORD, index - 1})};
105+
return target.rebase(old_prefix, new_prefix);
106+
}
107+
108+
return target;
109+
}
110+
111+
private:
112+
static auto is_type_only_branch(const JSON &branch) -> bool {
113+
return branch.size() == 1 && branch.defines("type");
114+
}
115+
116+
static auto has_in_place_applicators(const JSON &branch) -> bool {
117+
return branch.defines("anyOf") || branch.defines("oneOf") ||
118+
branch.defines("allOf") || branch.defines("not") ||
119+
branch.defines("if");
120+
}
121+
122+
static auto is_mergeable_branch(const JSON &branch) -> bool {
123+
if (!branch.is_object()) {
124+
return false;
125+
}
126+
return !branch.defines("$ref") && !branch.defines("$dynamicRef") &&
127+
!branch.defines("$recursiveRef") && !branch.defines("$id") &&
128+
!branch.defines("$schema") && !branch.defines("id") &&
129+
!branch.defines("$anchor") && !branch.defines("$dynamicAnchor") &&
130+
!branch.defines("$recursiveAnchor");
131+
}
132+
133+
static auto has_overlapping_keywords(const JSON &branch_a,
134+
const JSON &branch_b) -> bool {
135+
for (const auto &entry : branch_a.as_object()) {
136+
if (branch_b.defines(entry.first)) {
137+
return true;
138+
}
139+
}
140+
return false;
141+
}
142+
143+
static auto
144+
has_cross_dependencies(const JSON &branch_a, const JSON &branch_b,
145+
const sourcemeta::core::SchemaWalker &walker,
146+
const sourcemeta::core::Vocabularies &vocabularies)
147+
-> bool {
148+
for (const auto &entry_a : branch_a.as_object()) {
149+
const auto &metadata{walker(entry_a.first, vocabularies)};
150+
for (const auto &dependency : metadata.dependencies) {
151+
if (branch_b.defines(JSON::String{dependency})) {
152+
return true;
153+
}
154+
}
155+
}
156+
157+
for (const auto &entry_b : branch_b.as_object()) {
158+
const auto &metadata{walker(entry_b.first, vocabularies)};
159+
for (const auto &dependency : metadata.dependencies) {
160+
if (branch_a.defines(JSON::String{dependency})) {
161+
return true;
162+
}
163+
}
164+
}
165+
166+
return false;
167+
}
168+
169+
mutable std::size_t merge_into_{0};
170+
mutable std::size_t merge_from_{0};
171+
};

test/alterschema/alterschema_canonicalize_2020_12_test.cc

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3836,6 +3836,180 @@ TEST_F(Canonicalizer202012Test, safe_extract_wraps_modern_ref_into_allof) {
38363836
CANONICALIZE_AND_VALIDATE(document, expected, *compiled_meta_);
38373837
}
38383838

3839+
TEST_F(Canonicalizer202012Test, allof_merge_type_and_required) {
3840+
auto document = sourcemeta::core::parse_json(R"JSON({
3841+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3842+
"allOf": [
3843+
{ "type": "object" },
3844+
{ "required": [ "foo" ] }
3845+
]
3846+
})JSON");
3847+
3848+
const auto expected = sourcemeta::core::parse_json(R"JSON({
3849+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3850+
"type": "object",
3851+
"required": [ "foo" ],
3852+
"minProperties": 1,
3853+
"propertyNames": true,
3854+
"properties": {
3855+
"foo": true
3856+
},
3857+
"patternProperties": {}
3858+
})JSON");
3859+
3860+
CANONICALIZE_AND_VALIDATE(document, expected, *compiled_meta_);
3861+
}
3862+
3863+
TEST_F(Canonicalizer202012Test, allof_no_merge_branch_has_anchor) {
3864+
auto document = sourcemeta::core::parse_json(R"JSON({
3865+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3866+
"allOf": [
3867+
{ "type": "object" },
3868+
{ "$anchor": "test", "required": [ "x" ] }
3869+
]
3870+
})JSON");
3871+
3872+
const auto expected = sourcemeta::core::parse_json(R"JSON({
3873+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3874+
"allOf": [
3875+
{
3876+
"type": "object",
3877+
"minProperties": 0,
3878+
"propertyNames": true,
3879+
"properties": {},
3880+
"patternProperties": {}
3881+
},
3882+
{
3883+
"$anchor": "test",
3884+
"anyOf": [
3885+
{ "enum": [ null ] },
3886+
{ "enum": [ false, true ] },
3887+
{
3888+
"type": "object",
3889+
"required": [ "x" ],
3890+
"minProperties": 1,
3891+
"propertyNames": true,
3892+
"properties": { "x": true },
3893+
"patternProperties": {}
3894+
},
3895+
{
3896+
"type": "array",
3897+
"minItems": 0,
3898+
"uniqueItems": false,
3899+
"minContains": 0,
3900+
"contains": true,
3901+
"items": true
3902+
},
3903+
{ "type": "string", "minLength": 0 },
3904+
{ "type": "number" }
3905+
]
3906+
}
3907+
]
3908+
})JSON");
3909+
3910+
CANONICALIZE_AND_VALIDATE(document, expected, *compiled_meta_);
3911+
}
3912+
3913+
TEST_F(Canonicalizer202012Test, allof_no_merge_branch_has_dynamic_anchor) {
3914+
auto document = sourcemeta::core::parse_json(R"JSON({
3915+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3916+
"allOf": [
3917+
{ "type": "object" },
3918+
{ "$dynamicAnchor": "node", "required": [ "x" ] }
3919+
]
3920+
})JSON");
3921+
3922+
const auto expected = sourcemeta::core::parse_json(R"JSON({
3923+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3924+
"allOf": [
3925+
{
3926+
"type": "object",
3927+
"minProperties": 0,
3928+
"propertyNames": true,
3929+
"properties": {},
3930+
"patternProperties": {}
3931+
},
3932+
{
3933+
"$dynamicAnchor": "node",
3934+
"anyOf": [
3935+
{ "enum": [ null ] },
3936+
{ "enum": [ false, true ] },
3937+
{
3938+
"type": "object",
3939+
"required": [ "x" ],
3940+
"minProperties": 1,
3941+
"propertyNames": true,
3942+
"properties": { "x": true },
3943+
"patternProperties": {}
3944+
},
3945+
{
3946+
"type": "array",
3947+
"minItems": 0,
3948+
"uniqueItems": false,
3949+
"minContains": 0,
3950+
"contains": true,
3951+
"items": true
3952+
},
3953+
{ "type": "string", "minLength": 0 },
3954+
{ "type": "number" }
3955+
]
3956+
}
3957+
]
3958+
})JSON");
3959+
3960+
CANONICALIZE_AND_VALIDATE(document, expected, *compiled_meta_);
3961+
}
3962+
3963+
TEST_F(Canonicalizer202012Test, allof_no_merge_branch_has_id) {
3964+
auto document = sourcemeta::core::parse_json(R"JSON({
3965+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3966+
"allOf": [
3967+
{ "type": "object" },
3968+
{ "$id": "https://example.com/branch", "required": [ "x" ] }
3969+
]
3970+
})JSON");
3971+
3972+
const auto expected = sourcemeta::core::parse_json(R"JSON({
3973+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3974+
"allOf": [
3975+
{
3976+
"type": "object",
3977+
"minProperties": 0,
3978+
"propertyNames": true,
3979+
"properties": {},
3980+
"patternProperties": {}
3981+
},
3982+
{
3983+
"$id": "https://example.com/branch",
3984+
"anyOf": [
3985+
{ "enum": [ null ] },
3986+
{ "enum": [ false, true ] },
3987+
{
3988+
"type": "object",
3989+
"required": [ "x" ],
3990+
"minProperties": 1,
3991+
"propertyNames": true,
3992+
"properties": { "x": true },
3993+
"patternProperties": {}
3994+
},
3995+
{
3996+
"type": "array",
3997+
"minItems": 0,
3998+
"uniqueItems": false,
3999+
"minContains": 0,
4000+
"contains": true,
4001+
"items": true
4002+
},
4003+
{ "type": "string", "minLength": 0 },
4004+
{ "type": "number" }
4005+
]
4006+
}
4007+
]
4008+
})JSON");
4009+
4010+
CANONICALIZE_AND_VALIDATE(document, expected, *compiled_meta_);
4011+
}
4012+
38394013
TEST_F(Canonicalizer202012Test,
38404014
ref_with_applicator_sibling_preserves_cross_refs) {
38414015
auto document = sourcemeta::core::parse_json(R"JSON({

0 commit comments

Comments
 (0)