diff --git a/development/cycle.py b/development/cycle.py index ef2031af..a5da07aa 100644 --- a/development/cycle.py +++ b/development/cycle.py @@ -628,6 +628,18 @@ def fix_signature(path, signature): if parameter[0] == "upset_roots": assert len(parameter) == 2 parameter[1] = "Iterable[Iterable[int]]" + if path == ["lincs", "classification", "AcceptedValues", "RealThresholds", "__init__"]: + if parameter[0] == "thresholds": + assert len(parameter) == 2 + parameter[1] = "Iterable[Optional[float]]" + if path == ["lincs", "classification", "AcceptedValues", "IntegerThresholds", "__init__"]: + if parameter[0] == "thresholds": + assert len(parameter) == 2 + parameter[1] = "Iterable[Optional[int]]" + if path == ["lincs", "classification", "AcceptedValues", "EnumeratedThresholds", "__init__"]: + if parameter[0] == "thresholds": + assert len(parameter) == 2 + parameter[1] = "Iterable[Optional[str]]" if path == ["lincs", "classification", "Alternative", "__init__"]: if parameter[0] == "category_index": assert parameter[2] == "None" diff --git a/doc-sources/reference/lincs.rst b/doc-sources/reference/lincs.rst index 5085a3c8..a1c4e226 100644 --- a/doc-sources/reference/lincs.rst +++ b/doc-sources/reference/lincs.rst @@ -339,12 +339,12 @@ Descriptor for thresholds for an real-valued criterion. - .. method:: __init__(thresholds: Iterable[float]) + .. method:: __init__(thresholds: Iterable[Optional[float]]) Parameters map exactly to attributes with identical names. .. property:: thresholds - :type: Iterable[float] + :type: Iterable[Optional[float]] The thresholds for this descriptor. @@ -357,12 +357,12 @@ Descriptor for thresholds for an integer-valued criterion. - .. method:: __init__(thresholds: Iterable[int]) + .. method:: __init__(thresholds: Iterable[Optional[int]]) Parameters map exactly to attributes with identical names. .. property:: thresholds - :type: Iterable[int] + :type: Iterable[Optional[int]] The thresholds for this descriptor. @@ -375,12 +375,12 @@ Descriptor for thresholds for a criterion taking enumerated values. - .. method:: __init__(thresholds: Iterable[str]) + .. method:: __init__(thresholds: Iterable[Optional[str]]) Parameters map exactly to attributes with identical names. .. property:: thresholds - :type: Iterable[str] + :type: Iterable[Optional[str]] The thresholds for this descriptor. diff --git a/doc-sources/reference/lincs.yml b/doc-sources/reference/lincs.yml index c53041cc..a82b2adc 100644 --- a/doc-sources/reference/lincs.yml +++ b/doc-sources/reference/lincs.yml @@ -216,21 +216,21 @@ children: children: - name: __init__ - name: thresholds - type: Iterable[float] + type: Iterable[Optional[float]] - name: real_thresholds type: RealThresholds - name: IntegerThresholds children: - name: __init__ - name: thresholds - type: Iterable[int] + type: Iterable[Optional[int]] - name: integer_thresholds type: IntegerThresholds - name: EnumeratedThresholds children: - name: __init__ - name: thresholds - type: Iterable[str] + type: Iterable[Optional[str]] - name: enumerated_thresholds type: EnumeratedThresholds - name: SufficientCoalitions diff --git a/lincs/liblincs/classification.cpp b/lincs/liblincs/classification.cpp index 1a394b2f..4b0d189a 100644 --- a/lincs/liblincs/classification.cpp +++ b/lincs/liblincs/classification.cpp @@ -26,31 +26,43 @@ bool better_or_equal( model.get_accepted_values()[criterion_index].get(), [&model, &performance, &criterion, boundary_index](const AcceptedValues::RealThresholds& accepted_values) { const float value = performance.get_real().get_value(); - const float threshold = accepted_values.get_thresholds()[boundary_index]; - switch (criterion.get_real_values().get_preference_direction()) { - case Criterion::PreferenceDirection::increasing: - return value >= threshold; - case Criterion::PreferenceDirection::decreasing: - return value <= threshold; + const std::optional threshold = accepted_values.get_thresholds()[boundary_index]; + if (threshold) { + switch (criterion.get_real_values().get_preference_direction()) { + case Criterion::PreferenceDirection::increasing: + return value >= *threshold; + case Criterion::PreferenceDirection::decreasing: + return value <= *threshold; + } + unreachable(); + } else { + return false; } - unreachable(); }, [&model, &performance, &criterion, boundary_index](const AcceptedValues::IntegerThresholds& accepted_values) { const int value = performance.get_integer().get_value(); - const int threshold = accepted_values.get_thresholds()[boundary_index]; - switch (criterion.get_integer_values().get_preference_direction()) { - case Criterion::PreferenceDirection::increasing: - return value >= threshold; - case Criterion::PreferenceDirection::decreasing: - return value <= threshold; + const std::optional threshold = accepted_values.get_thresholds()[boundary_index]; + if (threshold) { + switch (criterion.get_integer_values().get_preference_direction()) { + case Criterion::PreferenceDirection::increasing: + return value >= *threshold; + case Criterion::PreferenceDirection::decreasing: + return value <= *threshold; + } + unreachable(); + } else { + return false; } - unreachable(); }, [&model, &performance, &criterion, boundary_index](const AcceptedValues::EnumeratedThresholds& accepted_values) { const auto& ranks = criterion.get_enumerated_values().get_value_ranks(); const std::string& value = performance.get_enumerated().get_value(); - const std::string& threshold = accepted_values.get_thresholds()[boundary_index]; - return ranks.at(value) >= ranks.at(threshold); + const std::optional& threshold = accepted_values.get_thresholds()[boundary_index]; + if (threshold) { + return ranks.at(value) >= ranks.at(*threshold); + } else { + return false; + } } ); } @@ -270,4 +282,66 @@ TEST_CASE("Classification with decreasing criteria") { CHECK(result.changed == 8); } +TEST_CASE("Classification with unreachable thresholds") { + Problem problem{ + { + Criterion("Criterion 1", Criterion::RealValues(Criterion::PreferenceDirection::increasing, 0, 1)), + }, + {{"Cat 1"}, {"Cat 2"}, {"Cat 3"}, {"Cat 4"}}, + }; + + Alternatives alternatives{problem, { + {"A", {Performance(Performance::Real(0.24))}, std::nullopt}, + {"A", {Performance(Performance::Real(0.25))}, std::nullopt}, + {"A", {Performance(Performance::Real(0.49))}, std::nullopt}, + {"A", {Performance(Performance::Real(0.50))}, std::nullopt}, + {"A", {Performance(Performance::Real(0.74))}, std::nullopt}, + {"A", {Performance(Performance::Real(0.75))}, std::nullopt}, + }}; + + classify_alternatives( + problem, + Model{ + problem, + { + AcceptedValues(AcceptedValues::RealThresholds({0.25, 0.5, 0.75})), + }, + { + SufficientCoalitions(SufficientCoalitions::Weights({1})), + SufficientCoalitions(SufficientCoalitions::Weights({1})), + SufficientCoalitions(SufficientCoalitions::Weights({1})), + }, + }, + &alternatives); + + CHECK(*alternatives.get_alternatives()[0].get_category_index() == 0); + CHECK(*alternatives.get_alternatives()[1].get_category_index() == 1); + CHECK(*alternatives.get_alternatives()[2].get_category_index() == 1); + CHECK(*alternatives.get_alternatives()[3].get_category_index() == 2); + CHECK(*alternatives.get_alternatives()[4].get_category_index() == 2); + CHECK(*alternatives.get_alternatives()[5].get_category_index() == 3); + + classify_alternatives( + problem, + Model{ + problem, + { + AcceptedValues(AcceptedValues::RealThresholds({0.25, 0.5, std::nullopt})), + }, + { + SufficientCoalitions(SufficientCoalitions::Weights({1})), + SufficientCoalitions(SufficientCoalitions::Weights({1})), + SufficientCoalitions(SufficientCoalitions::Weights({1})), + }, + }, + &alternatives); + + CHECK(*alternatives.get_alternatives()[0].get_category_index() == 0); + CHECK(*alternatives.get_alternatives()[1].get_category_index() == 1); + CHECK(*alternatives.get_alternatives()[2].get_category_index() == 1); + CHECK(*alternatives.get_alternatives()[3].get_category_index() == 2); + CHECK(*alternatives.get_alternatives()[4].get_category_index() == 2); + CHECK(*alternatives.get_alternatives()[5].get_category_index() == 2); +} + } // namespace lincs diff --git a/lincs/liblincs/generation.cpp b/lincs/liblincs/generation.cpp index a0cc8ece..a61df5ba 100644 --- a/lincs/liblincs/generation.cpp +++ b/lincs/liblincs/generation.cpp @@ -211,7 +211,7 @@ Model generate_mrsort_classification_model(const Problem& problem, const unsigne accepted_values.push_back(dispatch( problem.get_criteria()[criterion_index].get_values(), [boundaries_count, &profiles, criterion_index](const Criterion::RealValues&) { - std::vector thresholds; + std::vector> thresholds; thresholds.reserve(boundaries_count); for (unsigned boundary_index = 0; boundary_index != boundaries_count; ++boundary_index) { thresholds.push_back(std::get(profiles[boundary_index][criterion_index])); @@ -219,7 +219,7 @@ Model generate_mrsort_classification_model(const Problem& problem, const unsigne return AcceptedValues(AcceptedValues::RealThresholds(thresholds)); }, [boundaries_count, &profiles, criterion_index](const Criterion::IntegerValues&) { - std::vector thresholds; + std::vector> thresholds; thresholds.reserve(boundaries_count); for (unsigned boundary_index = 0; boundary_index != boundaries_count; ++boundary_index) { thresholds.push_back(std::get(profiles[boundary_index][criterion_index])); @@ -227,7 +227,7 @@ Model generate_mrsort_classification_model(const Problem& problem, const unsigne return AcceptedValues(AcceptedValues::IntegerThresholds(thresholds)); }, [boundaries_count, &profiles, criterion_index](const Criterion::EnumeratedValues&) { - std::vector thresholds; + std::vector> thresholds; thresholds.reserve(boundaries_count); for (unsigned boundary_index = 0; boundary_index != boundaries_count; ++boundary_index) { thresholds.push_back(std::get(profiles[boundary_index][criterion_index])); @@ -671,12 +671,12 @@ TEST_CASE("Random min/max") { CHECK(problem.get_criteria()[0].get_real_values().get_min_value() == doctest::Approx(-25.092)); CHECK(problem.get_criteria()[0].get_real_values().get_max_value() == doctest::Approx(59.3086)); - CHECK(model.get_accepted_values()[0].get_real_thresholds().get_thresholds()[0] == doctest::Approx(6.52194)); + CHECK(*model.get_accepted_values()[0].get_real_thresholds().get_thresholds()[0] == doctest::Approx(6.52194)); CHECK(alternatives.get_alternatives()[0].get_profile()[0].get_real().get_value() == doctest::Approx(45.3692)); CHECK(problem.get_criteria()[1].get_real_values().get_min_value() == doctest::Approx(-63.313)); CHECK(problem.get_criteria()[1].get_real_values().get_max_value() == doctest::Approx(46.3988)); - CHECK(model.get_accepted_values()[1].get_real_thresholds().get_thresholds()[0] == doctest::Approx(24.0712)); + CHECK(*model.get_accepted_values()[1].get_real_thresholds().get_thresholds()[0] == doctest::Approx(24.0712)); CHECK(alternatives.get_alternatives()[0].get_profile()[1].get_real().get_value() == doctest::Approx(-15.8581)); } @@ -692,8 +692,8 @@ TEST_CASE("Decreasing criterion") { CHECK(problem.get_criteria()[0].get_real_values().get_preference_direction() == Criterion::PreferenceDirection::decreasing); // Profiles are in decreasing order - CHECK(model.get_accepted_values()[0].get_real_thresholds().get_thresholds()[0] == doctest::Approx(0.790612)); - CHECK(model.get_accepted_values()[0].get_real_thresholds().get_thresholds()[1] == doctest::Approx(0.377049)); + CHECK(*model.get_accepted_values()[0].get_real_thresholds().get_thresholds()[0] == doctest::Approx(0.790612)); + CHECK(*model.get_accepted_values()[0].get_real_thresholds().get_thresholds()[1] == doctest::Approx(0.377049)); CHECK(alternatives.get_alternatives()[0].get_profile()[0].get_real().get_value() == doctest::Approx(0.834842)); CHECK(*alternatives.get_alternatives()[0].get_category_index() == 0); diff --git a/lincs/liblincs/io/model.cpp b/lincs/liblincs/io/model.cpp index 8cd5f38e..863157c4 100644 --- a/lincs/liblincs/io/model.cpp +++ b/lincs/liblincs/io/model.cpp @@ -28,6 +28,32 @@ TEST_CASE("libyaml-cpp uses sufficient precision for floats") { CHECK(YAML::Load(ss).as() == 0x1.c78b0cp-2f); // No approximation: no loss of precision } +namespace YAML { + +template +Emitter& operator<<(Emitter& out, const std::optional& o) { + if (o) { + out << *o; + } else { + out << Null; + } + return out; +} + +template +struct convert> { + static bool decode(const Node& node, std::optional& rhs) { + if (node.IsNull()) { + rhs.reset(); + } else { + rhs = node.as(); + } + return true; + } +}; + +} + namespace lincs { const std::string Model::json_schema(R"($schema: https://json-schema.org/draft/2020-12/schema @@ -144,16 +170,27 @@ Model::Model(const Problem& problem, const std::vector& accepted validate(thresholds.get_thresholds().size() == boundaries_count, "The number of real thresholds in an accepted values descriptor must be one less than the number of categories in the problem"); const auto& criterion_values = criterion.get_real_values(); for (unsigned boundary_index = 0; boundary_index != boundaries_count; ++boundary_index) { - validate(criterion_values.is_acceptable(thresholds.get_thresholds()[boundary_index]), "Each threshold in an accepted values descriptor must be between the min and max values for the corresponding real criterion"); + const std::optional threshold = thresholds.get_thresholds()[boundary_index]; + if (threshold) { + validate(criterion_values.is_acceptable(*threshold), "Each threshold in an accepted values descriptor must be between the min and max values for the corresponding real criterion"); + } } for (unsigned boundary_index = 1; boundary_index != boundaries_count; ++boundary_index) { - switch (criterion_values.get_preference_direction()) { - case Criterion::PreferenceDirection::increasing: - validate(thresholds.get_thresholds()[boundary_index] >= thresholds.get_thresholds()[boundary_index - 1], "The real thresholds in an accepted values descriptor must be in preference order"); - break; - case Criterion::PreferenceDirection::decreasing: - validate(thresholds.get_thresholds()[boundary_index] <= thresholds.get_thresholds()[boundary_index - 1], "The real thresholds in an accepted values descriptor must be in preference order"); - break; + const std::optional previous_threshold = thresholds.get_thresholds()[boundary_index - 1]; + const std::optional threshold = thresholds.get_thresholds()[boundary_index]; + if (previous_threshold) { + if (threshold) { + switch (criterion_values.get_preference_direction()) { + case Criterion::PreferenceDirection::increasing: + validate(*threshold >= *previous_threshold, "The real thresholds in an accepted values descriptor must be in preference order"); + break; + case Criterion::PreferenceDirection::decreasing: + validate(*threshold <= *previous_threshold, "The real thresholds in an accepted values descriptor must be in preference order"); + break; + } + } + } else { + validate(!threshold, "After a null threshold, all subsequent thresholds must be null"); } } }, @@ -161,16 +198,27 @@ Model::Model(const Problem& problem, const std::vector& accepted validate(thresholds.get_thresholds().size() == boundaries_count, "The number of integer thresholds in an accepted values descriptor must be one less than the number of categories in the problem"); const auto& criterion_values = criterion.get_integer_values(); for (unsigned boundary_index = 0; boundary_index != boundaries_count; ++boundary_index) { - validate(criterion_values.is_acceptable(thresholds.get_thresholds()[boundary_index]), "Each threshold in an accepted values descriptor must be between the min and max values for the corresponding integer criterion"); + const std::optional threshold = thresholds.get_thresholds()[boundary_index]; + if (threshold) { + validate(criterion_values.is_acceptable(*threshold), "Each threshold in an accepted values descriptor must be between the min and max values for the corresponding integer criterion"); + } } for (unsigned boundary_index = 1; boundary_index != boundaries_count; ++boundary_index) { - switch (criterion_values.get_preference_direction()) { - case Criterion::PreferenceDirection::increasing: - validate(thresholds.get_thresholds()[boundary_index] >= thresholds.get_thresholds()[boundary_index - 1], "The integer thresholds in an accepted values descriptor must be in preference order"); - break; - case Criterion::PreferenceDirection::decreasing: - validate(thresholds.get_thresholds()[boundary_index] <= thresholds.get_thresholds()[boundary_index - 1], "The integer thresholds in an accepted values descriptor must be in preference order"); - break; + const std::optional previous_threshold = thresholds.get_thresholds()[boundary_index - 1]; + const std::optional threshold = thresholds.get_thresholds()[boundary_index]; + if (previous_threshold) { + if (threshold) { + switch (criterion_values.get_preference_direction()) { + case Criterion::PreferenceDirection::increasing: + validate(*threshold >= *previous_threshold, "The integer thresholds in an accepted values descriptor must be in preference order"); + break; + case Criterion::PreferenceDirection::decreasing: + validate(*threshold <= *previous_threshold, "The integer thresholds in an accepted values descriptor must be in preference order"); + break; + } + } + } else { + validate(!threshold, "After a null threshold, all subsequent thresholds must be null"); } } }, @@ -178,13 +226,24 @@ Model::Model(const Problem& problem, const std::vector& accepted validate(thresholds.get_thresholds().size() == boundaries_count, "The number of enumerated thresholds in an accepted values descriptor must be one less than the number of categories in the problem"); const auto& criterion_values = criterion.get_enumerated_values(); for (unsigned boundary_index = 0; boundary_index != boundaries_count; ++boundary_index) { - validate(criterion_values.is_acceptable(thresholds.get_thresholds()[boundary_index]), "Each threshold in an accepted values descriptor must be in the enumerated values for the corresponding criterion"); + const std::optional& threshold = thresholds.get_thresholds()[boundary_index]; + if (threshold) { + validate(criterion_values.is_acceptable(*threshold), "Each threshold in an accepted values descriptor must be in the enumerated values for the corresponding criterion"); + } } for (unsigned boundary_index = 1; boundary_index != boundaries_count; ++boundary_index) { - validate( - criterion_values.get_value_rank(thresholds.get_thresholds()[boundary_index]) >= criterion_values.get_value_rank(thresholds.get_thresholds()[boundary_index - 1]), - "The enumerated thresholds in an accepted values descriptor must be in preference order" - ); + const std::optional& previous_threshold = thresholds.get_thresholds()[boundary_index - 1]; + const std::optional& threshold = thresholds.get_thresholds()[boundary_index]; + if (previous_threshold) { + if (threshold) { + validate( + criterion_values.get_value_rank(*threshold) >= criterion_values.get_value_rank(*previous_threshold), + "The enumerated thresholds in an accepted values descriptor must be in preference order" + ); + } + } else { + validate(!threshold, "After a null threshold, all subsequent thresholds must be null"); + } } } ); @@ -236,6 +295,8 @@ void Model::dump(const Problem& problem, std::ostream& os) const { YAML::Emitter out(ss); #endif + out.SetNullFormat(YAML::EMITTER_MANIP::LowerNull); + bool use_coalitions_alias = sufficient_coalitions.size() > 1 && std::all_of(std::next(sufficient_coalitions.begin()), sufficient_coalitions.end(), [&](const SufficientCoalitions& suff_coals) { @@ -339,13 +400,13 @@ Model Model::load(const Problem& problem, std::istream& is) { accepted_values.push_back(dispatch( criterion.get_values(), [&thresholds, boundaries_count](const Criterion::RealValues&) { - return AcceptedValues(AcceptedValues::RealThresholds(thresholds.as>())); + return AcceptedValues(AcceptedValues::RealThresholds(thresholds.as>>())); }, [&thresholds, boundaries_count](const Criterion::IntegerValues&) { - return AcceptedValues(AcceptedValues::IntegerThresholds(thresholds.as>())); + return AcceptedValues(AcceptedValues::IntegerThresholds(thresholds.as>>())); }, [&thresholds, boundaries_count](const Criterion::EnumeratedValues&) { - return AcceptedValues(AcceptedValues::EnumeratedThresholds(thresholds.as>())); + return AcceptedValues(AcceptedValues::EnumeratedThresholds(thresholds.as>>())); } )); } @@ -505,6 +566,51 @@ format_version: 1 CHECK(Model::load(problem, ss) == model); } +TEST_CASE("dumping then loading model preserves data - null thresholds") { + Problem problem{ + { + Criterion("Real", Criterion::RealValues(Criterion::PreferenceDirection::increasing, -1, 1)), + Criterion("Integer", Criterion::IntegerValues(Criterion::PreferenceDirection::increasing, 0, 100)), + }, + {{"Cat 1"}, {"Cat 2"}, {"Cat 3"}, {"Cat 4"}, {"Cat 5"}}, + }; + + Model model{ + problem, + { + AcceptedValues(AcceptedValues::RealThresholds({-0.5, 0, std::nullopt, std::nullopt})), + AcceptedValues(AcceptedValues::IntegerThresholds({20, 40, 60, std::nullopt})), + }, + { + SufficientCoalitions(SufficientCoalitions::Weights({0.5, 0.5})), + SufficientCoalitions(SufficientCoalitions::Weights({0.5, 0.5})), + SufficientCoalitions(SufficientCoalitions::Weights({0.5, 0.5})), + SufficientCoalitions(SufficientCoalitions::Weights({0.5, 0.5})), + }, + }; + + std::stringstream ss; + model.dump(problem, ss); + + CHECK(ss.str() == R"(kind: ncs-classification-model +format_version: 1 +accepted_values: + - kind: thresholds + thresholds: [-0.5, 0, null, null] + - kind: thresholds + thresholds: [20, 40, 60, null] +sufficient_coalitions: + - &coalitions + kind: weights + criterion_weights: [0.5, 0.5] + - *coalitions + - *coalitions + - *coalitions +)"); + + CHECK(Model::load(problem, ss) == model); +} + TEST_CASE("dumping then loading model preserves data - enumerated criterion") { Problem problem{ {Criterion("Criterion 1", Criterion::EnumeratedValues({"F", "E", "D", "C", "B", "A"}))}, diff --git a/lincs/liblincs/io/model.hpp b/lincs/liblincs/io/model.hpp index 38653bb8..9a137245 100644 --- a/lincs/liblincs/io/model.hpp +++ b/lincs/liblincs/io/model.hpp @@ -3,6 +3,8 @@ #ifndef LINCS__IO__MODEL_HPP #define LINCS__IO__MODEL_HPP +#include + #include #include "../internal.hpp" @@ -15,7 +17,7 @@ class AcceptedValues { public: class RealThresholds { public: - RealThresholds(const std::vector& thresholds_) : thresholds(thresholds_) {} + RealThresholds(const std::vector>& thresholds_) : thresholds(thresholds_) {} public: bool operator==(const RealThresholds& other) const { @@ -23,15 +25,15 @@ class AcceptedValues { } public: - const std::vector& get_thresholds() const { return thresholds; } + const std::vector>& get_thresholds() const { return thresholds; } private: - std::vector thresholds; + std::vector> thresholds; }; class IntegerThresholds { public: - IntegerThresholds(const std::vector& thresholds_) : thresholds(thresholds_) {} + IntegerThresholds(const std::vector>& thresholds_) : thresholds(thresholds_) {} public: bool operator==(const IntegerThresholds& other) const { @@ -39,15 +41,15 @@ class AcceptedValues { } public: - const std::vector& get_thresholds() const { return thresholds; } + const std::vector>& get_thresholds() const { return thresholds; } private: - std::vector thresholds; + std::vector> thresholds; }; class EnumeratedThresholds { public: - EnumeratedThresholds(const std::vector& thresholds_) : thresholds(thresholds_) {} + EnumeratedThresholds(const std::vector>& thresholds_) : thresholds(thresholds_) {} public: bool operator==(const EnumeratedThresholds& other) const { @@ -55,10 +57,10 @@ class AcceptedValues { } public: - const std::vector& get_thresholds() const { return thresholds; } + const std::vector>& get_thresholds() const { return thresholds; } private: - std::vector thresholds; + std::vector> thresholds; }; // WARNING: keep the enum and the variant consistent with 'Criterion::ValueType' diff --git a/lincs/liblincs/learning/pre-processing.cpp b/lincs/liblincs/learning/pre-processing.cpp index ae91e50e..923cfd31 100644 --- a/lincs/liblincs/learning/pre-processing.cpp +++ b/lincs/liblincs/learning/pre-processing.cpp @@ -117,35 +117,42 @@ Model PreProcessedLearningSet::post_process(const std::vector thresholds; + std::vector> thresholds; thresholds.reserve(boundaries_count); for (const auto& boundary: boundaries) { - unsigned rank = boundary.profile_ranks[criterion_index]; - // Handle past-the-end rank - rank = std::min(rank, values_counts[criterion_index] - 1); - thresholds.push_back(real_sorted_values.at(criterion_index)[rank]); + const unsigned rank = boundary.profile_ranks[criterion_index]; + if (rank < values_counts[criterion_index]) { + thresholds.push_back(real_sorted_values.at(criterion_index)[rank]); + } else { + // Past-the-end rank => this criterion cannot help reach this category + thresholds.push_back(std::nullopt); + } } return AcceptedValues(AcceptedValues::RealThresholds(thresholds)); }, [this, &boundaries, criterion_index](const Criterion::IntegerValues&) { - std::vector thresholds; + std::vector> thresholds; thresholds.reserve(boundaries_count); for (const auto& boundary: boundaries) { - unsigned rank = boundary.profile_ranks[criterion_index]; - // Handle past-the-end rank - rank = std::min(rank, values_counts[criterion_index] - 1); - thresholds.push_back(integer_sorted_values.at(criterion_index)[rank]); + const unsigned rank = boundary.profile_ranks[criterion_index]; + if (rank < values_counts[criterion_index]) { + thresholds.push_back(integer_sorted_values.at(criterion_index)[rank]); + } else { + thresholds.push_back(std::nullopt); + } } return AcceptedValues(AcceptedValues::IntegerThresholds(thresholds)); }, [this, &boundaries, criterion_index](const Criterion::EnumeratedValues& values) { - std::vector thresholds; + std::vector> thresholds; thresholds.reserve(boundaries_count); for (const auto& boundary: boundaries) { - unsigned rank = boundary.profile_ranks[criterion_index]; - // Handle past-the-end rank - rank = std::min(rank, values_counts[criterion_index] - 1); - thresholds.push_back(values.get_ordered_values()[rank]); + const unsigned rank = boundary.profile_ranks[criterion_index]; + if (rank < values_counts[criterion_index]) { + thresholds.push_back(values.get_ordered_values()[rank]); + } else { + thresholds.push_back(std::nullopt); + } } return AcceptedValues(AcceptedValues::EnumeratedThresholds(thresholds)); })); diff --git a/lincs/liblincs/learning/ucncs-by-max-sat-by-coalitions.cpp b/lincs/liblincs/learning/ucncs-by-max-sat-by-coalitions.cpp index f3f4b024..94b565ce 100644 --- a/lincs/liblincs/learning/ucncs-by-max-sat-by-coalitions.cpp +++ b/lincs/liblincs/learning/ucncs-by-max-sat-by-coalitions.cpp @@ -245,10 +245,7 @@ Model MaxSatCoalitionsUcncsLearning::decode(const std::vector::decode(const std::vector::decode(const std::vector& so } const Model model = learning_set.post_process(boundaries); - #ifndef NDEBUG - // @todo(bug, now) Replace with a plain assert (when we don't need to catch it from the Python unit-tests anymore) - if (count_correctly_classified_alternatives(input_problem, model, input_learning_set) != learning_set.alternatives_count) { - throw LearningFailureException("The learned model failed to classify all alternatives correctly. THIS IS A BUG, please report it."); - } - #endif + assert(count_correctly_classified_alternatives(input_problem, model, input_learning_set) == learning_set.alternatives_count); return model; } diff --git a/lincs/liblincs/learning/ucncs-by-sat-by-separation.cpp b/lincs/liblincs/learning/ucncs-by-sat-by-separation.cpp index 464cb1c7..f05e60d7 100644 --- a/lincs/liblincs/learning/ucncs-by-sat-by-separation.cpp +++ b/lincs/liblincs/learning/ucncs-by-sat-by-separation.cpp @@ -248,12 +248,7 @@ Model SatSeparationUcncsLearning::decode(const std::vector& so } const Model model = learning_set.post_process(boundaries); - #ifndef NDEBUG - // @todo(bug, now) Replace with a plain assert (when we don't need to catch it from the Python unit-tests anymore) - if (count_correctly_classified_alternatives(input_problem, model, input_learning_set) != learning_set.alternatives_count) { - throw LearningFailureException("The learned model failed to classify all alternatives correctly. THIS IS A BUG, please report it."); - } - #endif + assert(count_correctly_classified_alternatives(input_problem, model, input_learning_set) == learning_set.alternatives_count); return model; } diff --git a/lincs/liblincs/liblincs-module/converters.cpp b/lincs/liblincs/liblincs-module/converters.cpp index b79dcb9e..df0e754f 100644 --- a/lincs/liblincs/liblincs-module/converters.cpp +++ b/lincs/liblincs/liblincs-module/converters.cpp @@ -98,6 +98,50 @@ struct std_vector_converter> { } }; +template +struct std_vector_converter> { + static PyObject* convert(const std::vector>& xs) { + bp::list result; + for (const std::optional& x : xs) { + if (x) { + result.append(*x); + } else { + result.append(bp::object()); + } + } + return bp::incref(result.ptr()); + } + + static void* convertible(PyObject* obj) { + if (PyObject_GetIter(obj)) { + return obj; + } else { + return nullptr; + } + } + + static void construct(PyObject* obj, bp::converter::rvalue_from_python_stage1_data* data) { + bp::handle<> handle(bp::borrowed(obj)); + + typedef bp::converter::rvalue_from_python_storage>> storage_type; + void* storage = reinterpret_cast(data)->storage.bytes; + + typedef bp::stl_input_iterator>::value_type> iterator; + + new (storage) std::vector>(iterator(bp::object(handle)), iterator()); + data->convertible = storage; + } + + static void enroll() { + bp::to_python_converter>, std_vector_converter>>(); + bp::converter::registry::push_back( + &std_vector_converter>::convertible, + &std_vector_converter>::construct, + bp::type_id>>() + ); + } +}; + template struct std_optional_converter { static PyObject* convert(const std::optional& value) { @@ -151,6 +195,12 @@ void enroll_standard_converters() { std_vector_converter::enroll(); bp::class_>("Iterable[str]").def(bp::vector_indexing_suite>()); + std_vector_converter>::enroll(); + + std_vector_converter>::enroll(); + + std_vector_converter>::enroll(); + std_vector_converter>::enroll(); std_vector_converter::enroll(); @@ -194,6 +244,8 @@ void enroll_standard_converters() { bp::class_>("Iterable[UniformRandomBitsGenerator]").def(bp::vector_indexing_suite>()); std_optional_converter::enroll(); + std_optional_converter::enroll(); + std_optional_converter::enroll(); std_optional_converter::enroll(); } diff --git a/lincs/liblincs/liblincs-module/io-classes.cpp b/lincs/liblincs/liblincs-module/io-classes.cpp index 425d1b16..dc9cb6bc 100644 --- a/lincs/liblincs/liblincs-module/io-classes.cpp +++ b/lincs/liblincs/liblincs-module/io-classes.cpp @@ -317,7 +317,7 @@ void define_model_classes() { "Descriptor for thresholds for an real-valued criterion.", bp::no_init ) - .def(bp::init&>( + .def(bp::init>&>( (bp::arg("self"), "thresholds"), "Parameters map exactly to attributes with identical names." )) @@ -336,7 +336,7 @@ void define_model_classes() { "Descriptor for thresholds for an integer-valued criterion.", bp::no_init ) - .def(bp::init&>( + .def(bp::init>&>( (bp::arg("self"), "thresholds"), "Parameters map exactly to attributes with identical names." )) @@ -355,7 +355,7 @@ void define_model_classes() { "Descriptor for thresholds for a criterion taking enumerated values.", bp::no_init ) - .def(bp::init&>( + .def(bp::init>&>( (bp::arg("self"), "thresholds"), "Parameters map exactly to attributes with identical names." )) diff --git a/lincs/liblincs_module_tests.py b/lincs/liblincs_module_tests.py index 14287bc3..d59760ed 100644 --- a/lincs/liblincs_module_tests.py +++ b/lincs/liblincs_module_tests.py @@ -1435,6 +1435,13 @@ def test_learning_failure_exception(self): learned_model = learning.perform() def test_bug_found_by_laurent_cabaret_in_real_life_data(self): + # Previously, in (max-)SAT learning methods, when the SAT solver returned a solution where no value was accepted + # for a given criterion, we used the maximum value for that criterion (as configured in the problem) as the threshold. + # This was incorrect, and would in rare occasions cause the resulting model to classify alternatives in a higher + # category that expected. To solve this issue, we had to allow "unreachable thresholds", materialized by None in + # Python, and null in YAML. That change broke the public interface of the library, so it required releasing a + # major version. + problem = Problem.load(io.StringIO(textwrap.dedent("""\ kind: classification-problem format_version: 1 @@ -1540,38 +1547,200 @@ def test_bug_found_by_laurent_cabaret_in_real_life_data(self): 50,1,0,1,1,3,15,4,1,3 """))) - def make_wpb_learning(): - learning_data = LearnMrsortByWeightsProfilesBreed.LearningData(problem, learning_set, models_count=9, random_seed=43) - profiles_initialization_strategy = InitializeProfilesForProbabilisticMaximalDiscriminationPowerPerCriterion(learning_data) - weights_optimization_strategy = OptimizeWeightsUsingGlop(learning_data) - profiles_improvement_strategy = ImproveProfilesWithAccuracyHeuristicOnCpu(learning_data) - breeding_strategy = ReinitializeLeastAccurate(learning_data, profiles_initialization_strategy=profiles_initialization_strategy, count=4) - termination_strategy = TerminateAtAccuracy(learning_data, target_accuracy=len(learning_set.alternatives)) - return LearnMrsortByWeightsProfilesBreed( - learning_data, - profiles_initialization_strategy, - weights_optimization_strategy, - profiles_improvement_strategy, - breeding_strategy, - termination_strategy, - ) - - learnings = [ - # @todo(bug, now) Investigate and fix bug: these should all be 50. - (make_wpb_learning(), 50, True), - (LearnUcncsBySatBySeparationUsingMinisat(problem, learning_set), 50, True), - (LearnUcncsBySatByCoalitionsUsingMinisat(problem, learning_set), 45, False), - (LearnUcncsByMaxSatBySeparationUsingEvalmaxsat(problem, learning_set), 46, False), - (LearnUcncsByMaxSatByCoalitionsUsingEvalmaxsat(problem, learning_set), 28, False), - ] - - for (learning, expected_accuracy, expect_success) in learnings: - if expect_success: - model = learning.perform() - - learning_set_copy = copy.deepcopy(learning_set) - classification_result = classify_alternatives(problem, model, learning_set_copy) - self.assertEqual(classification_result.unchanged, expected_accuracy) - else: - with self.assertRaises(LearningFailureException): - learning.perform() + learning_data = LearnMrsortByWeightsProfilesBreed.LearningData(problem, learning_set, models_count=9, random_seed=43) + profiles_initialization_strategy = InitializeProfilesForProbabilisticMaximalDiscriminationPowerPerCriterion(learning_data) + weights_optimization_strategy = OptimizeWeightsUsingGlop(learning_data) + profiles_improvement_strategy = ImproveProfilesWithAccuracyHeuristicOnCpu(learning_data) + breeding_strategy = ReinitializeLeastAccurate(learning_data, profiles_initialization_strategy=profiles_initialization_strategy, count=4) + termination_strategy = TerminateAtAccuracy(learning_data, target_accuracy=len(learning_set.alternatives)) + model = LearnMrsortByWeightsProfilesBreed(learning_data, profiles_initialization_strategy, weights_optimization_strategy, profiles_improvement_strategy, breeding_strategy, termination_strategy).perform() + model_dump = io.StringIO() + model.dump(problem, model_dump) + self.assertEqual(model_dump.getvalue(), textwrap.dedent("""\ + kind: ncs-classification-model + format_version: 1 + accepted_values: + - kind: thresholds + thresholds: [0, 1, 1] + - kind: thresholds + thresholds: [0, 1, 1] + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [3, 3, 4] + - kind: thresholds + thresholds: [5, 5, 14] + - kind: thresholds + thresholds: [3, 9, 9] + - kind: thresholds + thresholds: [0, 1, 5] + sufficient_coalitions: + - &coalitions + kind: weights + criterion_weights: [0.166666836, 0.333332658, 0.166666836, 0.166666836, 0.166666836, 0.166666836, 0.166665822, 0] + - *coalitions + - *coalitions + """)) + learning_set_copy = copy.deepcopy(learning_set) + classification_result = classify_alternatives(problem, model, learning_set_copy) + self.assertEqual(classification_result.unchanged, 50) + + model = LearnUcncsBySatBySeparationUsingMinisat(problem, learning_set).perform() + model_dump = io.StringIO() + model.dump(problem, model_dump) + self.assertEqual(model_dump.getvalue(), textwrap.dedent("""\ + kind: ncs-classification-model + format_version: 1 + accepted_values: + - kind: thresholds + thresholds: [0, 0, 1] + - kind: thresholds + thresholds: [0, 1, 1] + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [1, null, null] + - kind: thresholds + thresholds: [0, 3, 3] + - kind: thresholds + thresholds: [0, 6, 6] + - kind: thresholds + thresholds: [5, 5, 5] + - kind: thresholds + thresholds: [0, 8, 10] + sufficient_coalitions: + - &coalitions + kind: roots + upset_roots: + - [0, 1, 2, 4, 5] + - [0, 1, 2, 5, 6] + - [0, 2, 4, 5, 6, 7] + - *coalitions + - *coalitions + """)) + learning_set_copy = copy.deepcopy(learning_set) + classification_result = classify_alternatives(problem, model, learning_set_copy) + self.assertEqual(classification_result.unchanged, 50) + + model = LearnUcncsBySatByCoalitionsUsingMinisat(problem, learning_set).perform() + model_dump = io.StringIO() + model.dump(problem, model_dump) + self.assertEqual(model_dump.getvalue(), textwrap.dedent("""\ + kind: ncs-classification-model + format_version: 1 + accepted_values: + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [0, 1, 1] + - kind: thresholds + thresholds: [1, null, null] + - kind: thresholds + thresholds: [1, 1, null] + - kind: thresholds + thresholds: [3, 3, null] + - kind: thresholds + thresholds: [6, 9, null] + - kind: thresholds + thresholds: [0, 4, 13] + - kind: thresholds + thresholds: [0, 8, null] + sufficient_coalitions: + - &coalitions + kind: roots + upset_roots: + - [0, 1] + - [1, 2, 6] + - [1, 4, 5, 6] + - [1, 2, 4, 5, 7] + - [1, 3, 4, 6, 7] + - [0, 3, 4, 5, 6, 7] + - *coalitions + - *coalitions + """)) + learning_set_copy = copy.deepcopy(learning_set) + classification_result = classify_alternatives(problem, model, learning_set_copy) + self.assertEqual(classification_result.unchanged, 50) + + model = LearnUcncsByMaxSatBySeparationUsingEvalmaxsat(problem, learning_set).perform() + model_dump = io.StringIO() + model.dump(problem, model_dump) + self.assertEqual(model_dump.getvalue(), textwrap.dedent("""\ + kind: ncs-classification-model + format_version: 1 + accepted_values: + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [0, 1, 1] + - kind: thresholds + thresholds: [1, 1, null] + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [0, 3, 4] + - kind: thresholds + thresholds: [0, 6, 10] + - kind: thresholds + thresholds: [0, 3, 8] + - kind: thresholds + thresholds: [0, 3, 4] + sufficient_coalitions: + - &coalitions + kind: roots + upset_roots: + - [0, 1, 3, 4, 7] + - [0, 1, 3, 5, 7] + - [0, 2, 3, 4, 5, 6, 7] + - [1, 2, 3, 4, 5, 6, 7] + - *coalitions + - *coalitions + """)) + learning_set_copy = copy.deepcopy(learning_set) + classification_result = classify_alternatives(problem, model, learning_set_copy) + self.assertEqual(classification_result.unchanged, 50) + + model = LearnUcncsByMaxSatByCoalitionsUsingEvalmaxsat(problem, learning_set).perform() + model_dump = io.StringIO() + model.dump(problem, model_dump) + self.assertEqual(model_dump.getvalue(), textwrap.dedent("""\ + kind: ncs-classification-model + format_version: 1 + accepted_values: + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [1, 1, 1] + - kind: thresholds + thresholds: [1, null, null] + - kind: thresholds + thresholds: [null, null, null] + - kind: thresholds + thresholds: [3, null, null] + - kind: thresholds + thresholds: [11, 11, null] + - kind: thresholds + thresholds: [7, null, null] + - kind: thresholds + thresholds: [8, 8, null] + sufficient_coalitions: + - &coalitions + kind: roots + upset_roots: + - [0, 1] + - [1, 2, 4] + - [1, 3, 4] + - [1, 5] + - [2, 5] + - [3, 5] + - [4, 5] + - [6] + - [7] + - *coalitions + - *coalitions + """)) + learning_set_copy = copy.deepcopy(learning_set) + classification_result = classify_alternatives(problem, model, learning_set_copy) + self.assertEqual(classification_result.unchanged, 50)