Skip to content

Commit c9e2764

Browse files
authored
Revamp AlterSchema to make use of framing (#1608)
Signed-off-by: Juan Cruz Viotti <[email protected]>
1 parent bf08681 commit c9e2764

File tree

147 files changed

+467
-387
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

147 files changed

+467
-387
lines changed

src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,22 +82,23 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule {
8282
/// Apply the rule to a schema
8383
auto
8484
apply(JSON &schema, const Pointer &pointer, const SchemaResolver &resolver,
85+
const SchemaFrame &frame,
8586
const std::optional<std::string> &default_dialect = std::nullopt) const
8687
-> bool;
8788

8889
/// Check if the rule applies to a schema
8990
auto
9091
check(const JSON &schema, const Pointer &pointer,
91-
const SchemaResolver &resolver,
92+
const SchemaResolver &resolver, const SchemaFrame &frame,
9293
const std::optional<std::string> &default_dialect = std::nullopt) const
9394
-> bool;
9495

9596
private:
9697
/// The rule condition
9798
[[nodiscard]] virtual auto
9899
condition(const JSON &schema, const std::string &dialect,
99-
const std::set<std::string> &vocabularies,
100-
const Pointer &pointer) const -> bool = 0;
100+
const std::set<std::string> &vocabularies, const Pointer &pointer,
101+
const SchemaFrame &frame) const -> bool = 0;
101102

102103
/// The rule transformation
103104
virtual auto transform(JSON &schema) const -> void = 0;
@@ -134,8 +135,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule {
134135
/// [[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
135136
/// const std::string &dialect,
136137
/// const std::set<std::string> &vocabularies,
137-
/// const sourcemeta::core::Pointer
138-
/// &pointer) const
138+
/// const sourcemeta::core::Pointer &pointer,
139+
/// const sourcemeta::core::SchemaFrame &) const
139140
/// -> bool override {
140141
/// return schema.defines("foo");
141142
/// }
@@ -201,12 +202,15 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer {
201202
/// Remove a rule from the bundle
202203
auto remove(const std::string &name) -> bool;
203204

205+
// TODO: Still take a CheckCallback for rules that failed but do not
206+
// support auto-fixing
207+
204208
/// Apply the bundle of rules to a schema
205209
auto
206210
apply(JSON &schema, const SchemaWalker &walker,
207-
const SchemaResolver &resolver, const Pointer &pointer = empty_pointer,
211+
const SchemaResolver &resolver,
208212
const std::optional<std::string> &default_dialect = std::nullopt) const
209-
-> void;
213+
-> bool;
210214

211215
/// The callback that is called whenever the "check" functionality reports a
212216
/// rule whose condition holds true. The arguments are as follows:
@@ -221,7 +225,6 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer {
221225
auto
222226
check(const JSON &schema, const SchemaWalker &walker,
223227
const SchemaResolver &resolver, const CheckCallback &callback,
224-
const Pointer &pointer = empty_pointer,
225228
const std::optional<std::string> &default_dialect = std::nullopt) const
226229
-> bool;
227230

src/core/jsonschema/transformer.cc

Lines changed: 65 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <set> // std::set
55
#include <sstream> // std::ostringstream
66
#include <stdexcept> // std::runtime_error
7-
#include <utility> // std::move
7+
#include <utility> // std::move, std::pair
88

99
namespace {
1010

@@ -41,17 +41,19 @@ auto SchemaTransformRule::message() const -> const std::string & {
4141

4242
auto SchemaTransformRule::apply(
4343
JSON &schema, const Pointer &pointer, const SchemaResolver &resolver,
44+
const SchemaFrame &frame,
4445
const std::optional<std::string> &default_dialect) const -> bool {
4546
const std::optional<std::string> effective_dialect{
4647
dialect(schema, default_dialect)};
4748
if (!effective_dialect.has_value()) {
4849
throw SchemaError("Could not determine the schema dialect");
4950
}
5051

52+
// TODO: Stop converting to set. This hurts performance
5153
const auto current_vocabularies{
5254
vocabularies_to_set(vocabularies(schema, resolver, default_dialect))};
5355
if (!this->condition(schema, effective_dialect.value(), current_vocabularies,
54-
pointer)) {
56+
pointer, frame)) {
5557
return false;
5658
}
5759

@@ -60,7 +62,7 @@ auto SchemaTransformRule::apply(
6062
// The condition must always be false after applying the
6163
// transformation in order to avoid infinite loops
6264
if (this->condition(schema, effective_dialect.value(), current_vocabularies,
63-
pointer)) {
65+
pointer, frame)) {
6466
std::ostringstream error;
6567
error << "Rule condition holds after application: " << this->name();
6668
throw std::runtime_error(error.str());
@@ -71,6 +73,7 @@ auto SchemaTransformRule::apply(
7173

7274
auto SchemaTransformRule::check(
7375
const JSON &schema, const Pointer &pointer, const SchemaResolver &resolver,
76+
const SchemaFrame &frame,
7477
const std::optional<std::string> &default_dialect) const -> bool {
7578
const std::optional<std::string> effective_dialect{
7679
dialect(schema, default_dialect)};
@@ -81,88 +84,88 @@ auto SchemaTransformRule::check(
8184
return this->condition(
8285
schema, effective_dialect.value(),
8386
vocabularies_to_set(vocabularies(schema, resolver, default_dialect)),
84-
pointer);
87+
pointer, frame);
88+
}
89+
90+
auto SchemaTransformer::check(
91+
const JSON &schema, const SchemaWalker &walker,
92+
const SchemaResolver &resolver,
93+
const SchemaTransformer::CheckCallback &callback,
94+
const std::optional<std::string> &default_dialect) const -> bool {
95+
SchemaFrame frame{SchemaFrame::Mode::Locations};
96+
frame.analyse(schema, walker, resolver, default_dialect);
97+
98+
bool result{true};
99+
for (const auto &entry : frame.locations()) {
100+
if (entry.second.type != SchemaFrame::LocationType::Resource &&
101+
entry.second.type != SchemaFrame::LocationType::Subschema) {
102+
continue;
103+
}
104+
105+
const auto &current{get(schema, entry.second.pointer)};
106+
for (const auto &[name, rule] : this->rules) {
107+
if (rule->check(current, entry.second.pointer, resolver, frame,
108+
entry.second.dialect)) {
109+
result = false;
110+
callback(entry.second.pointer, name, rule->message());
111+
}
112+
}
113+
}
114+
115+
return result;
85116
}
86117

87118
auto SchemaTransformer::apply(
88119
JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver,
89-
const Pointer &pointer,
90-
const std::optional<std::string> &default_dialect) const -> void {
120+
const std::optional<std::string> &default_dialect) const -> bool {
91121
// There is no point in applying an empty bundle
92122
assert(!this->rules.empty());
123+
std::set<std::pair<Pointer, JSON::String>> processed_rules;
93124

94-
auto &current{get(schema, pointer)};
95-
const std::optional<std::string> root_dialect{
96-
dialect(schema, default_dialect)};
97-
const std::optional<std::string> effective_dialect{
98-
dialect(current, root_dialect)};
99-
100-
// (1) Transform the current schema object
101-
// Avoid recursion to not blow up the stack even on highly complex schemas
102-
std::set<std::string> processed_rules;
103125
while (true) {
104-
auto matches{processed_rules.size()};
105-
for (const auto &[name, rule] : this->rules) {
106-
// TODO: Process traces to fixup references
107-
const auto applied{
108-
rule->apply(current, pointer, resolver, effective_dialect)};
109-
if (applied) {
110-
if (processed_rules.contains(name)) {
126+
SchemaFrame frame{SchemaFrame::Mode::Locations};
127+
frame.analyse(schema, walker, resolver, default_dialect);
128+
129+
bool applied{false};
130+
for (const auto &entry : frame.locations()) {
131+
if (entry.second.type != SchemaFrame::LocationType::Resource &&
132+
entry.second.type != SchemaFrame::LocationType::Subschema) {
133+
continue;
134+
}
135+
136+
auto &current{get(schema, entry.second.pointer)};
137+
for (const auto &[name, rule] : this->rules) {
138+
applied = rule->apply(current, entry.second.pointer, resolver, frame,
139+
entry.second.dialect) ||
140+
applied;
141+
if (!applied) {
142+
continue;
143+
}
144+
145+
if (processed_rules.contains({entry.second.pointer, name})) {
146+
// TODO: Throw a better custom error that also highlights the schema
147+
// location
111148
std::ostringstream error;
112149
error << "Rules must only be processed once: " << name;
113150
throw std::runtime_error(error.str());
114151
}
115152

116-
processed_rules.insert(name);
153+
processed_rules.emplace(entry.second.pointer, name);
154+
goto alterschema_start_again;
117155
}
118156
}
119157

120-
if (matches < processed_rules.size()) {
121-
continue;
158+
alterschema_start_again:
159+
if (!applied) {
160+
break;
122161
}
123-
124-
break;
125162
}
126163

127-
// (2) Transform its sub-schemas
128-
for (const auto &entry :
129-
// TODO: Replace `SchemaIteratorFlat` with framing and then just get
130-
// rid of the idea of flat iterators, as we don't need it anywhere else
131-
SchemaIteratorFlat{current, walker, resolver, effective_dialect}) {
132-
apply(schema, walker, resolver, pointer.concat(entry.pointer),
133-
effective_dialect);
134-
}
164+
return true;
135165
}
136166

137167
auto SchemaTransformer::remove(const std::string &name) -> bool {
138168
return this->rules.erase(name) > 0;
139169
}
140170

141-
auto SchemaTransformer::check(
142-
const JSON &schema, const SchemaWalker &walker,
143-
const SchemaResolver &resolver,
144-
const SchemaTransformer::CheckCallback &callback, const Pointer &pointer,
145-
const std::optional<std::string> &default_dialect) const -> bool {
146-
const auto &current{get(schema, pointer)};
147-
const std::optional<std::string> root_dialect{
148-
dialect(schema, default_dialect)};
149-
const std::optional<std::string> effective_dialect{
150-
dialect(current, root_dialect)};
151-
152-
bool result{true};
153-
for (const auto &entry :
154-
SchemaIterator{current, walker, resolver, effective_dialect}) {
155-
const auto current_pointer{pointer.concat(entry.pointer)};
156-
for (const auto &[name, rule] : this->rules) {
157-
if (rule->check(get(current, entry.pointer), current_pointer, resolver,
158-
effective_dialect)) {
159-
result = false;
160-
callback(current_pointer, name, rule->message());
161-
}
162-
}
163-
}
164-
165-
return result;
166-
}
167-
168171
} // namespace sourcemeta::core

src/extension/alterschema/antipattern/const_with_type.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class ConstWithType final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &) const
1314
-> bool override {
1415
return contains_any(
1516
vocabularies,

src/extension/alterschema/antipattern/duplicate_enum_values.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ class DuplicateEnumValues final : public SchemaTransformRule {
88
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
99
const std::string &,
1010
const std::set<std::string> &vocabularies,
11-
const sourcemeta::core::Pointer &) const
11+
const sourcemeta::core::Pointer &,
12+
const sourcemeta::core::SchemaFrame &) const
1213
-> bool override {
1314
return contains_any(
1415
vocabularies,

src/extension/alterschema/antipattern/duplicate_required_values.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class DuplicateRequiredValues final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &) const
1314
-> bool override {
1415
return contains_any(
1516
vocabularies,

src/extension/alterschema/antipattern/enum_with_type.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class EnumWithType final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &) const
1314
-> bool override {
1415
return contains_any(
1516
vocabularies,

src/extension/alterschema/antipattern/exclusive_maximum_number_and_maximum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class ExclusiveMaximumNumberAndMaximum final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &) const
1314
-> bool override {
1415
return contains_any(
1516
vocabularies,

src/extension/alterschema/antipattern/exclusive_minimum_number_and_minimum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class ExclusiveMinimumNumberAndMinimum final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &) const
1314
-> bool override {
1415
return contains_any(
1516
vocabularies,

src/extension/alterschema/desugar/boolean_true.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@ class BooleanTrue final : public SchemaTransformRule {
66
"The boolean schema `true` is syntax sugar for the empty schema"} {
77
};
88

9-
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
10-
const std::string &,
11-
const std::set<std::string> &,
12-
const sourcemeta::core::Pointer &) const
13-
-> bool override {
9+
[[nodiscard]] auto
10+
condition(const sourcemeta::core::JSON &schema, const std::string &,
11+
const std::set<std::string> &, const sourcemeta::core::Pointer &,
12+
const sourcemeta::core::SchemaFrame &) const -> bool override {
1413
return schema.is_boolean() && schema.to_boolean();
1514
}
1615

src/extension/alterschema/desugar/const_as_enum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ class ConstAsEnum final : public SchemaTransformRule {
88
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
99
const std::string &,
1010
const std::set<std::string> &vocabularies,
11-
const sourcemeta::core::Pointer &) const
11+
const sourcemeta::core::Pointer &,
12+
const sourcemeta::core::SchemaFrame &) const
1213
-> bool override {
1314
return contains_any(
1415
vocabularies,

src/extension/alterschema/desugar/exclusive_maximum_integer_to_maximum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class ExclusiveMaximumIntegerToMaximum final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &) const
1314
-> bool override {
1415
return contains_any(
1516
vocabularies,

src/extension/alterschema/desugar/exclusive_minimum_integer_to_minimum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class ExclusiveMinimumIntegerToMinimum final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &) const
1314
-> bool override {
1415
return contains_any(
1516
vocabularies,

src/extension/alterschema/desugar/type_array_to_any_of_2020_12.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,29 @@ class TypeArrayToAnyOf_2020_12 final : public SchemaTransformRule {
99
[[nodiscard]] auto condition(const sourcemeta::core::JSON &schema,
1010
const std::string &,
1111
const std::set<std::string> &vocabularies,
12-
const sourcemeta::core::Pointer &) const
12+
const sourcemeta::core::Pointer &,
13+
const sourcemeta::core::SchemaFrame &frame) const
1314
-> bool override {
15+
16+
// Note that a big limitation of this rule is that it cannot apply to
17+
// schemas that have identifiers. For example, consider a schema that has
18+
// a type union declaration alongside of an `anyOf` where one branch defines
19+
// `$id` or `$anchor`. We will end up duplicating identifiers (leading to
20+
// invalid schemas) and there is no silver bullet to avoid these cases.
21+
const auto has_identifiers{std::any_of(
22+
frame.locations().cbegin(), frame.locations().cend(),
23+
[](const auto &entry) {
24+
return entry.second.type ==
25+
sourcemeta::core::SchemaFrame::LocationType::Resource ||
26+
entry.second.type ==
27+
sourcemeta::core::SchemaFrame::LocationType::Anchor;
28+
})};
29+
1430
return contains_any(
1531
vocabularies,
1632
{"https://json-schema.org/draft/2020-12/vocab/validation",
1733
"https://json-schema.org/draft/2020-12/vocab/applicator"}) &&
18-
schema.is_object() && schema.defines("type") &&
34+
!has_identifiers && schema.is_object() && schema.defines("type") &&
1935
schema.at("type").is_array() &&
2036
// Non type-specific applicators can leads to invalid schemas
2137
!schema.defines("$defs") && !schema.defines("$ref") &&

0 commit comments

Comments
 (0)