diff --git a/site/zenodo_rdm/curation/config.py b/site/zenodo_rdm/curation/config.py index bb846513..ac936fe9 100644 --- a/site/zenodo_rdm/curation/config.py +++ b/site/zenodo_rdm/curation/config.py @@ -13,8 +13,13 @@ award_acronym_in_additional_description, award_acronym_in_description, award_acronym_in_title, + award_number_in_additional_description, + award_number_in_description, + community_data_award_acronym, contains_high_conf_keywords, contains_low_conf_keywords, + eu_community_request, + eu_subcommunity_declined_request, published_before_award_start, test_phrases_in_record, user_verified, @@ -31,6 +36,11 @@ "additional_desc_contains_low_conf_keywords": additional_desc_contains_low_conf_keywords, "additional_desc_contains_high_conf_keywords": additional_desc_contains_high_conf_keywords, "award_acronym_in_additional_description": award_acronym_in_additional_description, + "eu_community_request": eu_community_request, + "eu_subcommunity_declined_request": eu_subcommunity_declined_request, + "community_data_award_acronym": community_data_award_acronym, + "award_number_in_additional_description": award_number_in_additional_description, + "award_number_in_description": award_number_in_description, } """Rules to run for EU Curation.""" @@ -45,8 +55,13 @@ "additional_desc_contains_low_conf_keywords": 0, "additional_desc_contains_high_conf_keywords": 0, "award_acronym_in_additional_description": 0, + "eu_community_request": False, + "eu_subcommunity_declined_request": False, + "community_data_award_acronym": 0, + "award_number_in_additional_description": 0, + "award_number_in_description": 0, } -"""Rule scores for EU Curation.""" +"""Rule scores for EU Curation (bool value implies direct approval/decline).""" CURATION_THRESHOLDS = {"EU_RECORDS_CURATION": 100} diff --git a/site/zenodo_rdm/curation/rules.py b/site/zenodo_rdm/curation/rules.py index 4e503daf..fd489fdf 100644 --- a/site/zenodo_rdm/curation/rules.py +++ b/site/zenodo_rdm/curation/rules.py @@ -8,42 +8,69 @@ import arrow from flask import current_app +from invenio_access.permissions import system_identity +from invenio_communities.proxies import current_communities +from invenio_rdm_records.requests import CommunityInclusion, CommunitySubmission from invenio_records_resources.proxies import current_service_registry +from invenio_requests.proxies import current_requests_service +from invenio_search.engine import dsl -def award_acronym_in_description(record): - """Check if EU award name in record description.""" - award_service = current_service_registry.get("awards") - description = record.metadata.get("description") - if not description: - return False +def _award_acronym_in_text(award, text): + """Check for award acronym in data.""" + if award.get("acronym") and (award.get("acronym").lower() in text.lower()): + return True + return False + + +def _award_number_in_text(award, text): + """Check for award number in data.""" + if award.get("number") and (str(award.get("number")) in text): + return True + return False + +def _get_ec_awards(record): + """Get all EC funded awards of record.""" + award_service = current_service_registry.get("awards") + awards = [] funding = record.metadata.get("funding", []) for f in funding: if f["funder"].get("id") == "00k4n6c32": if award_id := f.get("award", {}).get("id"): award = award_service.record_cls.pid.resolve(award_id) - if award.get("acronym") and ( - award.get("acronym").lower() in description.lower() - ): - return True + awards.append(award) + return awards + + +def award_acronym_in_description(record): + """Check if EU award name in record description.""" + if description := record.metadata.get("description"): + awards = _get_ec_awards(record) + for award in awards: + if _award_acronym_in_text(award, description): + return True + return False + + +def award_number_in_description(record): + """Check if EU award number in record description.""" + if description := record.metadata.get("description"): + awards = _get_ec_awards(record) + for award in awards: + if _award_number_in_text(award, description): + return True return False def award_acronym_in_title(record): """Check if EU award name in record title.""" - award_service = current_service_registry.get("awards") title = record.metadata["title"] - funding = record.metadata.get("funding", []) - for f in funding: - if f["funder"].get("id") == "00k4n6c32": - if award_id := f.get("award", {}).get("id"): - award = award_service.record_cls.pid.resolve(award_id) - if award.get("acronym") and ( - award.get("acronym").lower() in title.lower() - ): - return True + awards = _get_ec_awards(record) + for award in awards: + if _award_acronym_in_text(award, title): + return True return False @@ -62,18 +89,13 @@ def test_phrases_in_record(record): def published_before_award_start(record): """Check if published before award start date.""" - award_service = current_service_registry.get("awards") - - funding = record.metadata.get("funding", []) - for f in funding: - if f["funder"].get("id") == "00k4n6c32": - if award_id := f.get("award", {}).get("id"): - award = award_service.record_cls.pid.resolve(award_id) - if award.get("start_date") and ( - record.created.timestamp() - < arrow.get(award.get("start_date")).datetime.timestamp() - ): - return True + awards = _get_ec_awards(record) + for award in awards: + if award.get("start_date") and ( + record.created.timestamp() + < arrow.get(award.get("start_date")).datetime.timestamp() + ): + return True return False @@ -143,17 +165,103 @@ def additional_desc_contains_low_conf_keywords(record): def award_acronym_in_additional_description(record): """Check if EU award name in record additional description.""" - award_service = current_service_registry.get("awards") additional_descriptions = record.metadata.get("additional_descriptions", []) record_data = " ".join([x.get("description", "") for x in additional_descriptions]) - funding = record.metadata.get("funding", []) - for f in funding: - if f["funder"].get("id") == "00k4n6c32": - if award_id := f.get("award", {}).get("id"): - award = award_service.record_cls.pid.resolve(award_id) - if award.get("acronym") and ( - award.get("acronym").lower() in record_data.lower() - ): - return True + awards = _get_ec_awards(record) + for award in awards: + if _award_acronym_in_text(award, record_data): + return True + return False + + +def award_number_in_additional_description(record): + """Check if EU award number in record additional description.""" + additional_descriptions = record.metadata.get("additional_descriptions", []) + record_data = " ".join([x.get("description", "") for x in additional_descriptions]) + + awards = _get_ec_awards(record) + for award in awards: + if _award_number_in_text(award, record_data): + return True + return False + + +def eu_community_request(record): + """Check if record was rejected from EU community.""" + community_requests = dsl.Q( + "bool", + must=[ + dsl.Q( + "term", + **{"receiver.community": current_app.config.get("EU_COMMUNITY_UUID")}, + ), + dsl.Q("term", **{"topic.record": record.pid.pid_value}), + ], + ) + request_types = dsl.Q( + "bool", + should=[ + dsl.Q("term", **{"type": CommunityInclusion.type_id}), + dsl.Q("term", **{"type": CommunitySubmission.type_id}), + ], + minimum_should_match=1, + ) + finalq = community_requests & request_types + results = current_requests_service.search(system_identity, extra_filter=finalq) + + for result in results: + # return true if there was a declined request or an existing open request + # as we respond to open requests ourselves. + if result["is_closed"] and result["status"] == "declined": + return True + if result["is_open"] and not result["is_expired"]: + return True + return False + + +def eu_subcommunity_declined_request(record): + """Check if record was rejected from EU sub community.""" + record_requests = dsl.Q( + "bool", + must=[ + dsl.Q("term", **{"topic.record": record.pid.pid_value}), + dsl.Q("term", **{"is_open": False}), + ], + ) + request_types = dsl.Q( + "bool", + should=[ + dsl.Q("term", **{"type": CommunityInclusion.type_id}), + dsl.Q("term", **{"type": CommunitySubmission.type_id}), + ], + minimum_should_match=1, + ) + finalq = record_requests & request_types + results = current_requests_service.search(system_identity, extra_filter=finalq) + + for result in results: + community = current_communities.service.record_cls.pid.resolve( + result["receiver"]["community"] + ) + if community.parent and str(community.parent.id) == current_app.config.get( + "EU_COMMUNITY_UUID" + ): + if result["status"] == "declined": + return True + return False + + +def community_data_award_acronym(record): + """Check if award acronym in community data.""" + comm_text = "" + for comm in record.parent.communities: + comm_text += comm.metadata.get("title", "") + comm_text += " " + comm.metadata.get("page", "") + + if comm_text: + awards = _get_ec_awards(record) + for award in awards: + if _award_acronym_in_text(award, comm_text): + return True return False