From f5252b4b6a8a2d3e197e09b6301c2b6349db00c9 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 9 Sep 2024 14:12:00 -0400 Subject: [PATCH] De-duplicate `anyOf`/`allOf` Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 2 +- src/linter/CMakeLists.txt | 2 ++ src/linter/linter.cc | 8 +++-- .../redundant/duplicate_allof_branches.h | 33 +++++++++++++++++ .../redundant/duplicate_anyof_branches.h | 33 +++++++++++++++++ test/linter/2019_09_test.cc | 36 +++++++++++++++++++ test/linter/2020_12_test.cc | 36 +++++++++++++++++++ test/linter/draft4_test.cc | 36 +++++++++++++++++++ test/linter/draft6_test.cc | 36 +++++++++++++++++++ test/linter/draft7_test.cc | 36 +++++++++++++++++++ .../sourcemeta/jsontoolkit/json_object.h | 20 ++++++++++- .../src/jsonschema/compile_evaluate.cc | 10 ++++++ .../sourcemeta/jsontoolkit/jsonschema_error.h | 14 ++++++++ 13 files changed, 298 insertions(+), 4 deletions(-) create mode 100644 src/linter/redundant/duplicate_allof_branches.h create mode 100644 src/linter/redundant/duplicate_anyof_branches.h diff --git a/DEPENDENCIES b/DEPENDENCIES index 3a8af51..a75e1aa 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,4 +1,4 @@ vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4 noa https://github.com/sourcemeta/noa 7e26abce7a4e31e86a16ef2851702a56773ca527 -jsontoolkit https://github.com/sourcemeta/jsontoolkit 6c78f131029c647be79cc5da5874fa8d50415cde +jsontoolkit https://github.com/sourcemeta/jsontoolkit a0aafd3f62c03514db9b71b1c89f2ed69669b130 googletest https://github.com/google/googletest a7f443b80b105f940225332ed3c31f2790092f47 diff --git a/src/linter/CMakeLists.txt b/src/linter/CMakeLists.txt index eac1b4d..4638dfc 100644 --- a/src/linter/CMakeLists.txt +++ b/src/linter/CMakeLists.txt @@ -20,6 +20,8 @@ noa_library(NAMESPACE sourcemeta PROJECT alterschema NAME linter redundant/content_media_type_without_encoding.h redundant/content_schema_default.h redundant/content_schema_without_media_type.h + redundant/duplicate_allof_branches.h + redundant/duplicate_anyof_branches.h redundant/else_without_if.h redundant/items_array_default.h redundant/items_schema_default.h diff --git a/src/linter/linter.cc b/src/linter/linter.cc index 03e6dc4..531c9af 100644 --- a/src/linter/linter.cc +++ b/src/linter/linter.cc @@ -3,8 +3,8 @@ #include // assert // For built-in rules -#include // std::any_of -#include // std::cbegin, std::cend +#include +#include namespace sourcemeta::alterschema { template auto contains_any(const T &container, const T &values) -> bool { @@ -29,6 +29,8 @@ auto contains_any(const T &container, const T &values) -> bool { #include "redundant/content_media_type_without_encoding.h" #include "redundant/content_schema_default.h" #include "redundant/content_schema_without_media_type.h" +#include "redundant/duplicate_allof_branches.h" +#include "redundant/duplicate_anyof_branches.h" #include "redundant/else_without_if.h" #include "redundant/items_array_default.h" #include "redundant/items_schema_default.h" @@ -62,6 +64,8 @@ auto add(Bundle &bundle, const LinterCategory category) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); + bundle.add(); bundle.add(); bundle.add(); bundle.add(); diff --git a/src/linter/redundant/duplicate_allof_branches.h b/src/linter/redundant/duplicate_allof_branches.h new file mode 100644 index 0000000..66a82a8 --- /dev/null +++ b/src/linter/redundant/duplicate_allof_branches.h @@ -0,0 +1,33 @@ +class DuplicateAllOfBranches final : public Rule { +public: + DuplicateAllOfBranches() + : Rule{"duplicate_allof_branches", + "Setting duplicate subschemas in `allOf` is redundant, as it " + "produces " + "unnecessary additional validation that is guaranteed to not " + "affect the validation result"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::jsontoolkit::JSON &schema, const std::string &, + const std::set &vocabularies, + const sourcemeta::jsontoolkit::Pointer &) const -> bool override { + return contains_any( + vocabularies, + {"https://json-schema.org/draft/2020-12/vocab/applicator", + "https://json-schema.org/draft/2019-09/vocab/applicator", + "http://json-schema.org/draft-07/schema#", + "http://json-schema.org/draft-06/schema#", + "http://json-schema.org/draft-04/schema#"}) && + schema.is_object() && schema.defines("allOf") && + schema.at("allOf").is_array() && !schema.at("allOf").unique(); + } + + auto transform(Transformer &transformer) const -> void override { + auto collection = transformer.schema().at("allOf"); + std::sort(collection.as_array().begin(), collection.as_array().end()); + auto last = + std::unique(collection.as_array().begin(), collection.as_array().end()); + collection.erase(last, collection.as_array().end()); + transformer.replace({"allOf"}, std::move(collection)); + } +}; diff --git a/src/linter/redundant/duplicate_anyof_branches.h b/src/linter/redundant/duplicate_anyof_branches.h new file mode 100644 index 0000000..c9b9a54 --- /dev/null +++ b/src/linter/redundant/duplicate_anyof_branches.h @@ -0,0 +1,33 @@ +class DuplicateAnyOfBranches final : public Rule { +public: + DuplicateAnyOfBranches() + : Rule{"duplicate_anyof_branches", + "Setting duplicate subschemas in `anyOf` is redundant, as it " + "produces " + "unnecessary additional validation that is guaranteed to not " + "affect the validation result"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::jsontoolkit::JSON &schema, const std::string &, + const std::set &vocabularies, + const sourcemeta::jsontoolkit::Pointer &) const -> bool override { + return contains_any( + vocabularies, + {"https://json-schema.org/draft/2020-12/vocab/applicator", + "https://json-schema.org/draft/2019-09/vocab/applicator", + "http://json-schema.org/draft-07/schema#", + "http://json-schema.org/draft-06/schema#", + "http://json-schema.org/draft-04/schema#"}) && + schema.is_object() && schema.defines("anyOf") && + schema.at("anyOf").is_array() && !schema.at("anyOf").unique(); + } + + auto transform(Transformer &transformer) const -> void override { + auto collection = transformer.schema().at("anyOf"); + std::sort(collection.as_array().begin(), collection.as_array().end()); + auto last = + std::unique(collection.as_array().begin(), collection.as_array().end()); + collection.erase(last, collection.as_array().end()); + transformer.replace({"anyOf"}, std::move(collection)); + } +}; diff --git a/test/linter/2019_09_test.cc b/test/linter/2019_09_test.cc index 155de32..385c460 100644 --- a/test/linter/2019_09_test.cc +++ b/test/linter/2019_09_test.cc @@ -700,3 +700,39 @@ TEST(Lint_2019_09, exclusive_minimum_number_and_minimum_3) { EXPECT_EQ(document, expected); } + +TEST(Lint_2019_09, duplicate_allof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "allOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "allOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} + +TEST(Lint_2019_09, duplicate_anyof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "anyOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "anyOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} diff --git a/test/linter/2020_12_test.cc b/test/linter/2020_12_test.cc index 0776794..6d5c46c 100644 --- a/test/linter/2020_12_test.cc +++ b/test/linter/2020_12_test.cc @@ -683,3 +683,39 @@ TEST(Lint_2020_12, exclusive_minimum_number_and_minimum_3) { EXPECT_EQ(document, expected); } + +TEST(Lint_2020_12, duplicate_allof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} + +TEST(Lint_2020_12, duplicate_anyof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "anyOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "anyOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} diff --git a/test/linter/draft4_test.cc b/test/linter/draft4_test.cc index 03f6981..896ac6f 100644 --- a/test/linter/draft4_test.cc +++ b/test/linter/draft4_test.cc @@ -197,3 +197,39 @@ TEST(Lint_draft4, duplicate_required_values_1) { EXPECT_EQ(document, expected); } + +TEST(Lint_draft4, duplicate_allof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-04/schema#", + "allOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-04/schema#", + "allOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} + +TEST(Lint_draft4, duplicate_anyof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-04/schema#", + "anyOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-04/schema#", + "anyOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} diff --git a/test/linter/draft6_test.cc b/test/linter/draft6_test.cc index 275bde0..c6b9174 100644 --- a/test/linter/draft6_test.cc +++ b/test/linter/draft6_test.cc @@ -348,3 +348,39 @@ TEST(Lint_draft6, exclusive_minimum_number_and_minimum_3) { EXPECT_EQ(document, expected); } + +TEST(Lint_draft6, duplicate_allof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-06/schema#", + "allOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-06/schema#", + "allOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} + +TEST(Lint_draft6, duplicate_anyof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-06/schema#", + "anyOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-06/schema#", + "anyOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} diff --git a/test/linter/draft7_test.cc b/test/linter/draft7_test.cc index f1e57f9..72393bc 100644 --- a/test/linter/draft7_test.cc +++ b/test/linter/draft7_test.cc @@ -439,3 +439,39 @@ TEST(Lint_draft7, exclusive_minimum_number_and_minimum_3) { EXPECT_EQ(document, expected); } + +TEST(Lint_draft7, duplicate_allof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-07/schema#", + "allOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} + +TEST(Lint_draft7, duplicate_anyof_branches_1) { + sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-07/schema#", + "anyOf": [ { "type": "string" }, { "type": "integer" }, { "type": "string" } ] + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::jsontoolkit::JSON expected = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "http://json-schema.org/draft-07/schema#", + "anyOf": [ { "type": "integer" }, { "type": "string" } ] + })JSON"); + + EXPECT_EQ(document, expected); +} diff --git a/vendor/jsontoolkit/src/json/include/sourcemeta/jsontoolkit/json_object.h b/vendor/jsontoolkit/src/json/include/sourcemeta/jsontoolkit/json_object.h index 1330eee..b9d2b77 100644 --- a/vendor/jsontoolkit/src/json/include/sourcemeta/jsontoolkit/json_object.h +++ b/vendor/jsontoolkit/src/json/include/sourcemeta/jsontoolkit/json_object.h @@ -35,9 +35,27 @@ template class JSONObject { // Operators // We cannot default given that this class references // a JSON "value" as an incomplete type - auto operator<(const JSONObject &) const noexcept -> bool { + + auto operator<(const JSONObject &other) const noexcept -> bool { + // The `std::unordered_map` container, by definition, does not provide + // ordering. However, we still want some level of ordering to allow + // arrays of objects to be sorted. + + // First try a size comparison + if (this->data.size() != other.data.size()) { + return this->data.size() < other.data.size(); + } + + // Otherwise do value comparison for common properties + for (const auto &[key, value] : this->data) { + if (other.data.contains(key) && value < other.data.at(key)) { + return true; + } + } + return false; } + auto operator<=(const JSONObject &other) const noexcept -> bool { return this->data <= other.data; } diff --git a/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc b/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc index 516abaf..834f81d 100644 --- a/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc +++ b/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc @@ -166,6 +166,16 @@ class EvaluationContext { } template auto push(const T &step) -> void { + // Guard against infinite recursion in a cheap manner, as + // infinite recursion will manifest itself through huge + // ever-growing evaluate paths + constexpr auto EVALUATE_PATH_LIMIT{400}; + if (this->evaluate_path_.size() > EVALUATE_PATH_LIMIT) [[unlikely]] { + throw sourcemeta::jsontoolkit::SchemaEvaluationError( + "The evaluation path depth limit was reached " + "likely due to infinite recursion"); + } + assert(step.relative_instance_location.size() <= 1); this->frame_sizes.emplace_back(step.relative_schema_location.size(), step.relative_instance_location.size()); diff --git a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_error.h b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_error.h index 76f1b10..8eef6f8 100644 --- a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_error.h +++ b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_error.h @@ -99,6 +99,20 @@ class SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT SchemaReferenceError std::string message_; }; +/// @ingroup jsonschema +/// An error that represents a schema evaluation error event +class SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT SchemaEvaluationError + : public std::exception { +public: + SchemaEvaluationError(std::string message) : message_{std::move(message)} {} + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->message_.c_str(); + } + +private: + std::string message_; +}; + #if defined(_MSC_VER) #pragma warning(default : 4251 4275) #endif