From 08c9f74f6a124bd4b35bec0c5d9c21916eb55e83 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Mon, 2 Dec 2024 09:57:56 +0100 Subject: [PATCH 01/23] deposit-ui: log errors on all deposit form actions * This can help with debugging unexpected non-network related errors that might occur in the logic before/after a REST API requests. --- .../src/deposit/state/actions/deposit.js | 6 ++++++ .../invenio_rdm_records/src/deposit/state/actions/files.js | 3 +++ 2 files changed, 9 insertions(+) diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js index 0e83f475a..f9c01c121 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js @@ -72,6 +72,7 @@ async function _saveDraft( try { response = await saveDraftWithUrlUpdate(draft, draftsService, failType); } catch (error) { + console.error("Error saving draft", error, draft); dispatchFn({ type: failType, payload: { errors: error.errors }, @@ -180,6 +181,7 @@ export const publish = (draft, { removeSelectedCommunity = false }) => { const recordURL = response.data.links.self_html; window.location.replace(recordURL); } catch (error) { + console.error("Error publishing draft", error, draft); dispatch({ type: DRAFT_PUBLISH_FAILED, payload: { errors: error.errors }, @@ -215,6 +217,7 @@ export const submitReview = (draft, { reviewComment, directPublish }) => { const nextURL = reqResponse.data.links.next_html; window.location.replace(nextURL); } catch (error) { + console.error("Error submitting review", error, draft); dispatch({ type: DRAFT_SUBMIT_REVIEW_FAILED, payload: { errors: error.errors }, @@ -261,6 +264,7 @@ export const delete_ = () => { const redirectURL = config.config.dashboard_routes.uploads; window.location.replace(redirectURL); } catch (error) { + console.error("Error deleting draft", error); dispatch({ type: DRAFT_DELETE_FAILED, payload: { errors: error.errors }, @@ -291,6 +295,7 @@ export const reservePID = (draft, { pidType }) => { payload: { data: response.data }, }); } catch (error) { + console.error("Error reserving PID", error, draft); dispatch({ type: RESERVE_PID_FAILED, payload: { errors: error.errors }, @@ -321,6 +326,7 @@ export const discardPID = (draft, { pidType }) => { payload: { data: response.data }, }); } catch (error) { + console.error("Error discarding PID", error, draft); dispatch({ type: DISCARD_PID_FAILED, payload: { errors: error.errors }, diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js index 982867b16..c62f00e32 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js @@ -33,6 +33,7 @@ export const uploadFiles = (draft, files) => { config.service.files.upload(uploadFileUrl, file); } } catch (error) { + console.error("Error uploading files", error, draft, files); dispatch({ type: FILE_UPLOAD_SAVE_DRAFT_FAILED, payload: { errors: error.errors }, @@ -64,6 +65,7 @@ export const deleteFile = (file) => { }, }); } else { + console.error("Error deleting file", error, file); dispatch({ type: FILE_DELETE_FAILED }); throw error; } @@ -86,6 +88,7 @@ export const importParentFiles = () => { payload: { files: files }, }); } catch (error) { + console.error("Error importing parent record files", error); dispatch({ type: FILE_IMPORT_FAILED }); throw error; } From deb357184feac2b5df9421d38f6f1163c9313633 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Mon, 2 Dec 2024 09:58:13 +0100 Subject: [PATCH 02/23] deposit-ui: skip unecessary removal of empty values in serialization * This initial removal of empty values can be dangerous, since the `record` at this point is a UI object representation that could potentially include circular references or very deeply nested objects. Since `_removeEmptyValues` is recursive this can lead to stack overflow errors. --- .../src/deposit/api/DepositRecordSerializer.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositRecordSerializer.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositRecordSerializer.js index 0c0d59a19..cb799dbc3 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositRecordSerializer.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositRecordSerializer.js @@ -383,7 +383,7 @@ export class RDMDepositRecordSerializer extends DepositRecordSerializer { * */ serialize(record) { - // NOTE: cloning nows allows us to manipulate the copy with impunity without + // NOTE: cloning now allows us to manipulate the copy with impunity without // affecting the original let originalRecord = _pick(_cloneDeep(record), [ "access", @@ -400,8 +400,7 @@ export class RDMDepositRecordSerializer extends DepositRecordSerializer { // Save pids so they are not removed when an empty value is passed let savedPIDsFieldValue = originalRecord.pids || {}; - let serializedRecord = this._removeEmptyValues(originalRecord); - + let serializedRecord = originalRecord; for (let key in this.depositRecordSchema) { serializedRecord = this.depositRecordSchema[key].serialize( serializedRecord, From e9c2704bafe8c5f56e2c0b23915a5aded0d09936 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Mon, 2 Dec 2024 16:23:55 +0100 Subject: [PATCH 03/23] deposit-ui: make sure we handle null/undefined for SchemaField --- .../invenio_rdm_records/src/deposit/serializers/SchemaField.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js index a41bc5553..f90e311af 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js @@ -57,7 +57,7 @@ export class SchemaField extends Field { */ serialize(deserialized, defaultLocale) { const fieldValues = _get(deserialized, this.fieldpath, this.serializedDefault); - const serializedElements = fieldValues.map((value) => { + const serializedElements = fieldValues?.map((value) => { let serializedElement = _pick(value, this.schemaKeys); this.schemaKeys.forEach((key) => { serializedElement = this.schema[key].serialize( From a736fb5c6740117ca07780ce81ff87ba102aee39 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Mon, 2 Dec 2024 17:06:57 +0100 Subject: [PATCH 04/23] =?UTF-8?q?=F0=9F=93=A6=20release:=20v16.3.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 13 +++++++++++++ invenio_rdm_records/__init__.py | 2 +- .../src/deposit/serializers/SchemaField.js | 2 +- .../src/deposit/state/actions/deposit.js | 2 +- .../src/deposit/state/actions/files.js | 2 +- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index cb7a87aa5..091833bb8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,19 @@ Changes ======= +Version v16.3.1 (released 2024-12-02) + +- deposit-ui: make sure we handle null/undefined for SchemaField +- deposit-ui: skip unecessary removal of empty values in serialization + * This initial removal of empty values can be dangerous, since the + `record` at this point is a UI object representation that could + potentially include circular references or very deeply nested objects. + Since `_removeEmptyValues` is recursive this can lead to stack + overflow errors. +- deposit-ui: log errors on all deposit form actions + * This can help with debugging unexpected non-network related errors + that might occur in the logic before/after a REST API requests. + Version v16.3.0 (released 2024-11-27) - github: added default license from Github API diff --git a/invenio_rdm_records/__init__.py b/invenio_rdm_records/__init__.py index 8c4659b42..9c294024b 100644 --- a/invenio_rdm_records/__init__.py +++ b/invenio_rdm_records/__init__.py @@ -12,6 +12,6 @@ from .ext import InvenioRDMRecords -__version__ = "16.3.0" +__version__ = "16.3.1" __all__ = ("__version__", "InvenioRDMRecords") diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js index f90e311af..0e70913bf 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/serializers/SchemaField.js @@ -1,5 +1,5 @@ // This file is part of Invenio-RDM-Records -// Copyright (C) 2020-2023 CERN. +// Copyright (C) 2020-2024 CERN. // Copyright (C) 2020-2022 Northwestern University. // // Invenio-RDM-Records is free software; you can redistribute it and/or modify it diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js index f9c01c121..f26659f68 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/deposit.js @@ -1,5 +1,5 @@ // This file is part of Invenio-RDM-Records -// Copyright (C) 2020-2023 CERN. +// Copyright (C) 2020-2024 CERN. // Copyright (C) 2020-2022 Northwestern University. // // Invenio-RDM-Records is free software; you can redistribute it and/or modify it diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js index c62f00e32..20270bd92 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/state/actions/files.js @@ -1,5 +1,5 @@ // This file is part of Invenio-RDM-Records -// Copyright (C) 2020-2023 CERN. +// Copyright (C) 2020-2024 CERN. // Copyright (C) 2020-2022 Northwestern University. // // Invenio-RDM-Records is free software; you can redistribute it and/or modify it From 808a45c255115819f4bfba6056af82800add9b92 Mon Sep 17 00:00:00 2001 From: alejandromumo Date: Wed, 4 Dec 2024 11:55:21 +0100 Subject: [PATCH 05/23] github: lower license spdx id --- invenio_rdm_records/services/github/release.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/invenio_rdm_records/services/github/release.py b/invenio_rdm_records/services/github/release.py index aa89e1380..31a0e0818 100644 --- a/invenio_rdm_records/services/github/release.py +++ b/invenio_rdm_records/services/github/release.py @@ -64,7 +64,9 @@ def metadata(self): # Add default license if not yet added if not output.get("rights"): - output.update({"rights": [{"id": metadata.repo_license or "cc-by-4.0"}]}) + output.update( + {"rights": [{"id": metadata.repo_license.lower() or "cc-by-4.0"}]} + ) return output def get_custom_fields(self): From 0f6bd91783d0d34a732c9d994135febfc6ff3d11 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Wed, 4 Dec 2024 14:09:43 +0100 Subject: [PATCH 06/23] =?UTF-8?q?=F0=9F=93=A6=20release:=20v16.3.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 4 ++++ invenio_rdm_records/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 091833bb8..596751242 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,10 @@ Changes ======= +Version v16.3.2 (released 2024-12-04) + +- github: lower license spdx id + Version v16.3.1 (released 2024-12-02) - deposit-ui: make sure we handle null/undefined for SchemaField diff --git a/invenio_rdm_records/__init__.py b/invenio_rdm_records/__init__.py index 9c294024b..2e59f0227 100644 --- a/invenio_rdm_records/__init__.py +++ b/invenio_rdm_records/__init__.py @@ -12,6 +12,6 @@ from .ext import InvenioRDMRecords -__version__ = "16.3.1" +__version__ = "16.3.2" __all__ = ("__version__", "InvenioRDMRecords") From e00f1773d5b03b3e955d57005fab1f8bdf3b37d1 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Wed, 4 Dec 2024 16:37:45 +0100 Subject: [PATCH 07/23] github: handle missing repo license --- invenio_rdm_records/services/github/release.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/invenio_rdm_records/services/github/release.py b/invenio_rdm_records/services/github/release.py index 31a0e0818..83772f1bc 100644 --- a/invenio_rdm_records/services/github/release.py +++ b/invenio_rdm_records/services/github/release.py @@ -64,9 +64,10 @@ def metadata(self): # Add default license if not yet added if not output.get("rights"): - output.update( - {"rights": [{"id": metadata.repo_license.lower() or "cc-by-4.0"}]} - ) + default_license = "cc-by-4.0" + if metadata.repo_license: + default_license = metadata.repo_license.lower() + output.update({"rights": [{"id": default_license}]}) return output def get_custom_fields(self): From 731ac6801fc55e33cfedf4392f583ac7efa35b5f Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Wed, 4 Dec 2024 16:40:52 +0100 Subject: [PATCH 08/23] =?UTF-8?q?=F0=9F=93=A6=20release:=20v16.3.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 4 ++++ invenio_rdm_records/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 596751242..e0884e225 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,10 @@ Changes ======= +Version v16.3.3 (released 2024-12-04) + +- github: handle missing repo license + Version v16.3.2 (released 2024-12-04) - github: lower license spdx id diff --git a/invenio_rdm_records/__init__.py b/invenio_rdm_records/__init__.py index 2e59f0227..81db0d883 100644 --- a/invenio_rdm_records/__init__.py +++ b/invenio_rdm_records/__init__.py @@ -12,6 +12,6 @@ from .ext import InvenioRDMRecords -__version__ = "16.3.2" +__version__ = "16.3.3" __all__ = ("__version__", "InvenioRDMRecords") From e175a3134882de4c9bbf05197484ee156b1092b2 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Thu, 5 Dec 2024 15:28:27 +0100 Subject: [PATCH 09/23] datacite: fix funding serialization for optional award fields * Makes sure that we handle missing values for optional award fields like "title" and "number". --- .../resources/serializers/datacite/schema.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/invenio_rdm_records/resources/serializers/datacite/schema.py b/invenio_rdm_records/resources/serializers/datacite/schema.py index 82d685e41..27b65b4e7 100644 --- a/invenio_rdm_records/resources/serializers/datacite/schema.py +++ b/invenio_rdm_records/resources/serializers/datacite/schema.py @@ -16,7 +16,6 @@ from flask_resources.serializers import BaseSerializerSchema from invenio_access.permissions import system_identity from invenio_i18n import lazy_gettext as _ -from invenio_records_resources.proxies import current_service_registry from marshmallow import Schema, ValidationError, fields, missing, post_dump, validate from marshmallow_utils.fields import SanitizedUnicode from marshmallow_utils.html import strip_html @@ -617,8 +616,12 @@ def get_funding(self, obj): # award award = funding.get("award") if award: # having an award is optional - funding_ref["awardTitle"] = award.get("title", {}).get("en", missing) - funding_ref["awardNumber"] = award["number"] + award_title = award.get("title", {}).get("en") + if award_title: + funding_ref["awardTitle"] = award_title + award_number = award.get("number") + if award_number: + funding_ref["awardNumber"] = award_number identifiers = award.get("identifiers", []) if identifiers: From 72c812bbf99988a9e70065f50e03c868e801a35d Mon Sep 17 00:00:00 2001 From: alejandromumo Date: Fri, 6 Dec 2024 10:53:40 +0100 Subject: [PATCH 10/23] github: map license NOASSERTION to other --- invenio_rdm_records/services/github/metadata.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/invenio_rdm_records/services/github/metadata.py b/invenio_rdm_records/services/github/metadata.py index 2a64ddbd9..b6e0d438a 100644 --- a/invenio_rdm_records/services/github/metadata.py +++ b/invenio_rdm_records/services/github/metadata.py @@ -80,9 +80,15 @@ def default_metadata(self): @property def repo_license(self): - """Get license from repository, if any. Falls back to default.""" + """Get license from repository, if any.""" repo_license_obj = self.rdm_release.repository_payload.get("license", {}) - return repo_license_obj.get("spdx_id") if repo_license_obj else None + if not repo_license_obj: + return None + spdx_id = repo_license_obj.get("spdx_id") + # For 'other' type of licenses, Github sets the spdx_id to NOASSERTION + if spdx_id == "NOASSERTION": + spdx_id = "other" + return spdx_id @property def contributors(self): From 843c0e4f47f9eba69d5526ad6a95038cdf2d53c8 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Fri, 6 Dec 2024 11:00:31 +0100 Subject: [PATCH 11/23] github: return None for `NOASSERTION` license --- invenio_rdm_records/services/github/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invenio_rdm_records/services/github/metadata.py b/invenio_rdm_records/services/github/metadata.py index b6e0d438a..8bba7d6d7 100644 --- a/invenio_rdm_records/services/github/metadata.py +++ b/invenio_rdm_records/services/github/metadata.py @@ -87,7 +87,7 @@ def repo_license(self): spdx_id = repo_license_obj.get("spdx_id") # For 'other' type of licenses, Github sets the spdx_id to NOASSERTION if spdx_id == "NOASSERTION": - spdx_id = "other" + return None return spdx_id @property From 35e968f458cb753d4be780440214c43de8665e9f Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Fri, 6 Dec 2024 11:01:46 +0100 Subject: [PATCH 12/23] =?UTF-8?q?=F0=9F=93=A6=20release:=20v16.3.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 7 +++++++ invenio_rdm_records/__init__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index e0884e225..98fa377dd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,13 @@ Changes ======= +Version v16.3.4 (released 2024-12-06) + +- github: return None for `NOASSERTION` license +- datacite: fix funding serialization for optional award fields + * Makes sure that we handle missing values for optional award fields + like "title" and "number". + Version v16.3.3 (released 2024-12-04) - github: handle missing repo license diff --git a/invenio_rdm_records/__init__.py b/invenio_rdm_records/__init__.py index 81db0d883..ace8f9b86 100644 --- a/invenio_rdm_records/__init__.py +++ b/invenio_rdm_records/__init__.py @@ -12,6 +12,6 @@ from .ext import InvenioRDMRecords -__version__ = "16.3.3" +__version__ = "16.3.4" __all__ = ("__version__", "InvenioRDMRecords") From f4ddb41a1110d9a56041635695c83e3c30f53fb2 Mon Sep 17 00:00:00 2001 From: roll Date: Mon, 9 Dec 2024 09:31:22 +0000 Subject: [PATCH 13/23] serializers: add datapackage serializer (#1742) --- .../resources/serializers/__init__.py | 2 + .../serializers/datapackage/__init__.py | 26 +++++ .../serializers/datapackage/schema.py | 84 ++++++++++++++ .../test_datapackage_serializer.py | 108 ++++++++++++++++++ 4 files changed, 220 insertions(+) create mode 100644 invenio_rdm_records/resources/serializers/datapackage/__init__.py create mode 100644 invenio_rdm_records/resources/serializers/datapackage/schema.py create mode 100644 tests/resources/serializers/test_datapackage_serializer.py diff --git a/invenio_rdm_records/resources/serializers/__init__.py b/invenio_rdm_records/resources/serializers/__init__.py index f150cfa14..3b67a347a 100644 --- a/invenio_rdm_records/resources/serializers/__init__.py +++ b/invenio_rdm_records/resources/serializers/__init__.py @@ -22,6 +22,7 @@ from .csl import CSLJSONSerializer, StringCitationSerializer from .csv import CSVRecordSerializer from .datacite import DataCite43JSONSerializer, DataCite43XMLSerializer +from .datapackage import DataPackageSerializer from .dcat import DCATSerializer from .dublincore import DublinCoreJSONSerializer, DublinCoreXMLSerializer from .geojson import GeoJSONSerializer @@ -43,6 +44,7 @@ "CSVRecordSerializer", "DataCite43JSONSerializer", "DataCite43XMLSerializer", + "DataPackageSerializer", "DublinCoreJSONSerializer", "DublinCoreXMLSerializer", "FAIRSignpostingProfileLvl2Serializer", diff --git a/invenio_rdm_records/resources/serializers/datapackage/__init__.py b/invenio_rdm_records/resources/serializers/datapackage/__init__.py new file mode 100644 index 000000000..e55d5e8d8 --- /dev/null +++ b/invenio_rdm_records/resources/serializers/datapackage/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 Open Knowledge Foundation +# +# Invenio-RDM-Records is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""Data Package Serializers for Invenio RDM Records.""" + +from flask_resources import BaseListSchema, MarshmallowSerializer +from flask_resources.serializers import JSONSerializer + +from .schema import DataPackageSchema + + +class DataPackageSerializer(MarshmallowSerializer): + """Marshmallow based Data Package serializer for records.""" + + def __init__(self, **options): + """Constructor.""" + super().__init__( + format_serializer_cls=JSONSerializer, + object_schema_cls=DataPackageSchema, + list_schema_cls=BaseListSchema, + **options + ) diff --git a/invenio_rdm_records/resources/serializers/datapackage/schema.py b/invenio_rdm_records/resources/serializers/datapackage/schema.py new file mode 100644 index 000000000..7da1dd09a --- /dev/null +++ b/invenio_rdm_records/resources/serializers/datapackage/schema.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 Open Knowledge Foundation +# +# Invenio-RDM-Records is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""Data Package based Schema for Invenio RDM Records.""" + +from marshmallow import Schema, fields, missing + +PROFILE_URL = "https://datapackage.org/profiles/2.0/datapackage.json" + + +class DataPackageSchema(Schema): + """Schema for Data Package in JSON.""" + + profile = fields.Constant(PROFILE_URL, data_key="$schema") + + id = fields.Str(attribute="links.doi") + name = fields.Str(attribute="id") + title = fields.Str(attribute="metadata.title") + description = fields.Str(attribute="metadata.description") + version = fields.Str(attribute="metadata.version") + created = fields.Str(attribute="created") + homepage = fields.Str(attribute="links.self_html") + keywords = fields.Method("get_keywords") + resources = fields.Method("get_resources") + licenses = fields.Method("get_licenses") + contributors = fields.Method("get_contributors") + + def get_keywords(self, obj): + keywords = [] + for subject in obj.get("metadata", {}).get("subjects", []): + keyword = subject.get("subject") + if keyword: + keywords.append(keyword) + return keywords if keywords else missing + + def get_resources(self, obj): + resources = [] + basepath = obj.get("links", {}).get("self_html") + if basepath: + for file in obj.get("files", {}).get("entries", {}).values(): + resource = {} + resource["name"] = file.get("key") + resource["path"] = f'{basepath}/files/{file.get("key")}' + resource["format"] = file.get("ext") + resource["mimetype"] = file.get("mimetype") + resource["bytes"] = file.get("size") + resource["hash"] = file.get("checksum") + resource = {k: v for k, v in resource.items() if v is not None} + if resource.get("name") and resource.get("path"): + resources.append(resource) + return resources + + def get_licenses(self, obj): + licenses = [] + for item in obj.get("metadata", {}).get("rights", []): + license = {} + license["name"] = item.get("id") + license["path"] = item.get("link") or item.get("props", {}).get("url") + license["title"] = item.get("title", {}).get("en") + license = {k: v for k, v in license.items() if v is not None} + if license.get("name"): + licenses.append(license) + return licenses if licenses else missing + + def get_contributors(self, obj): + contributors = [] + for type in ["creator", "contributor"]: + for item in obj.get("metadata", {}).get(f"{type}s", []): + entity = item.get("person_or_org", {}) + parent = (item.get("affiliations") or [{}])[0] + contributor = {} + contributor["title"] = entity.get("name") + contributor["givenName"] = entity.get("given_name") + contributor["familyName"] = entity.get("family_name") + contributor["roles"] = [item.get("role", {}).get("id", type)] + contributor["organization"] = parent.get("name") + contributor = {k: v for k, v in contributor.items() if v is not None} + if contributor: + contributors.append(contributor) + return contributors if contributors else missing diff --git a/tests/resources/serializers/test_datapackage_serializer.py b/tests/resources/serializers/test_datapackage_serializer.py new file mode 100644 index 000000000..7eeeb66b3 --- /dev/null +++ b/tests/resources/serializers/test_datapackage_serializer.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 Open Knowledge Foundation +# +# Invenio-RDM-Records is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""Resources serializers tests.""" + +from invenio_rdm_records.resources.serializers.datapackage import DataPackageSerializer + + +def test_data_package_serializer_empty_record(): + serializer = DataPackageSerializer() + serialized_record = serializer.dump_obj({}) + assert serialized_record == { + "$schema": "https://datapackage.org/profiles/2.0/datapackage.json", + "resources": [], + } + + +def test_data_package_serializer_minimal_record(minimal_record_to_dict): + serializer = DataPackageSerializer() + serialized_record = serializer.dump_obj(minimal_record_to_dict) + assert serialized_record == { + "$schema": "https://datapackage.org/profiles/2.0/datapackage.json", + "id": "https://handle.stage.datacite.org/10.1234/67890-fghij", + "name": "67890-fghij", + "title": "A Romans story", + "created": "2023-11-14T19:33:09.837080+00:00", + "homepage": "https://127.0.0.1:5000/records/67890-fghij", + "resources": [], + "contributors": [ + { + "familyName": "Brown", + "givenName": "Troy", + "roles": ["creator"], + }, + { + "roles": ["creator"], + "title": "Troy Inc.", + }, + ], + } + + +def test_data_package_serializer_full_record(full_record_to_dict): + serializer = DataPackageSerializer() + serialized_record = serializer.dump_obj(full_record_to_dict) + assert serialized_record == { + "$schema": "https://datapackage.org/profiles/2.0/datapackage.json", + "id": "https://handle.stage.datacite.org/10.1234/inveniordm.1234", + "name": "12345-abcde", + "title": "InvenioRDM", + "description": "

A description

with HTML tags

", + "version": "v1.0", + "created": "2023-11-14T18:30:55.738898+00:00", + "homepage": "https://127.0.0.1:5000/records/12345-abcde", + "keywords": [ + "Abdominal Injuries", + "custom", + ], + "resources": [ + { + "name": "test.txt", + "path": "https://127.0.0.1:5000/records/12345-abcde/files/test.txt", + "format": "txt", + "mimetype": "text/plain", + "bytes": 9, + "hash": "md5:e795abeef2c38de2b064be9f6364ceae", + }, + ], + "licenses": [ + { + "name": "cc-by-4.0", + "path": "https://creativecommons.org/licenses/by/4.0/legalcode", + "title": "Creative Commons Attribution 4.0 International", + }, + ], + "contributors": [ + { + "familyName": "Nielsen", + "givenName": "Lars Holm", + "organization": "CERN", + "roles": ["creator"], + "title": "Nielsen, Lars Holm", + }, + { + "familyName": "Tom", + "givenName": "Blabin", + "roles": ["creator"], + "title": "Tom, Blabin", + }, + { + "familyName": "Nielsen", + "givenName": "Lars Holm", + "organization": "CERN", + "roles": ["other"], + "title": "Nielsen, Lars Holm", + }, + { + "familyName": "Dirk", + "givenName": "Dirkin", + "roles": ["other"], + "title": "Dirk, Dirkin", + }, + ], + } From 8c0a2674c23c127f08f3949419dfe28ccf5e7f90 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Mon, 9 Dec 2024 11:17:28 +0100 Subject: [PATCH 14/23] chore: fix missing docstrings --- .../resources/serializers/datapackage/schema.py | 4 ++++ invenio_rdm_records/services/schemas/metadata.py | 1 + 2 files changed, 5 insertions(+) diff --git a/invenio_rdm_records/resources/serializers/datapackage/schema.py b/invenio_rdm_records/resources/serializers/datapackage/schema.py index 7da1dd09a..02f1d697e 100644 --- a/invenio_rdm_records/resources/serializers/datapackage/schema.py +++ b/invenio_rdm_records/resources/serializers/datapackage/schema.py @@ -30,6 +30,7 @@ class DataPackageSchema(Schema): contributors = fields.Method("get_contributors") def get_keywords(self, obj): + """Get keywords.""" keywords = [] for subject in obj.get("metadata", {}).get("subjects", []): keyword = subject.get("subject") @@ -38,6 +39,7 @@ def get_keywords(self, obj): return keywords if keywords else missing def get_resources(self, obj): + """Get resources.""" resources = [] basepath = obj.get("links", {}).get("self_html") if basepath: @@ -55,6 +57,7 @@ def get_resources(self, obj): return resources def get_licenses(self, obj): + """Get licenses.""" licenses = [] for item in obj.get("metadata", {}).get("rights", []): license = {} @@ -67,6 +70,7 @@ def get_licenses(self, obj): return licenses if licenses else missing def get_contributors(self, obj): + """Get contributors.""" contributors = [] for type in ["creator", "contributor"]: for item in obj.get("metadata", {}).get(f"{type}s", []): diff --git a/invenio_rdm_records/services/schemas/metadata.py b/invenio_rdm_records/services/schemas/metadata.py index 3269b003a..17c17a813 100644 --- a/invenio_rdm_records/services/schemas/metadata.py +++ b/invenio_rdm_records/services/schemas/metadata.py @@ -8,6 +8,7 @@ # it under the terms of the MIT License; see LICENSE file for more details. """RDM record schemas.""" + from functools import partial from urllib import parse From 32ada14621800eecee398a7e2dff36e3059616a4 Mon Sep 17 00:00:00 2001 From: Fatimah Zulfiqar Date: Wed, 4 Dec 2024 14:05:01 +0100 Subject: [PATCH 15/23] schema: added identifiers to subjects --- invenio_rdm_records/records/api.py | 2 +- .../os-v1/rdmrecords/drafts/draft-v6.0.0.json | 10 ++++++++++ .../rdmrecords/records/record-v7.0.0.json | 10 ++++++++++ .../os-v2/rdmrecords/drafts/draft-v6.0.0.json | 10 ++++++++++ .../rdmrecords/records/record-v7.0.0.json | 19 ++++++++++++------- .../v7/rdmrecords/drafts/draft-v6.0.0.json | 10 ++++++++++ .../v7/rdmrecords/records/record-v6.0.0.json | 10 ++++++++++ 7 files changed, 63 insertions(+), 8 deletions(-) diff --git a/invenio_rdm_records/records/api.py b/invenio_rdm_records/records/api.py index 377ef6c82..83180c770 100644 --- a/invenio_rdm_records/records/api.py +++ b/invenio_rdm_records/records/api.py @@ -184,7 +184,7 @@ class CommonFieldsMixin: ), subjects=PIDListRelation( "metadata.subjects", - keys=["subject", "scheme", "props"], + keys=["subject", "scheme", "props", "identifiers"], pid_field=Subject.pid, cache_key="subjects", ), diff --git a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json index 3c532097e..2e78f22ae 100644 --- a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json @@ -1238,6 +1238,16 @@ "props": { "type": "object", "dynamic": "true" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "keyword" + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json index 3e3cd6600..ef3b4c32a 100644 --- a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json @@ -1256,6 +1256,16 @@ "props": { "type": "object", "dynamic": "true" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "keyword" + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json index 0d93f5410..a9fedc421 100644 --- a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json @@ -1238,6 +1238,16 @@ "props": { "type": "object", "dynamic": "true" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "keyword" + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json index de3b0a3de..6af64c11c 100644 --- a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json @@ -45,13 +45,8 @@ "accent_analyzer": { "tokenizer": "standard", "type": "custom", - "char_filter": [ - "strip_special_chars" - ], - "filter": [ - "lowercase", - "asciifolding" - ] + "char_filter": ["strip_special_chars"], + "filter": ["lowercase", "asciifolding"] } } } @@ -1248,6 +1243,16 @@ "props": { "type": "object", "dynamic": "true" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "keyword" + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/v7/rdmrecords/drafts/draft-v6.0.0.json b/invenio_rdm_records/records/mappings/v7/rdmrecords/drafts/draft-v6.0.0.json index 1a901cbfe..cb454eeeb 100644 --- a/invenio_rdm_records/records/mappings/v7/rdmrecords/drafts/draft-v6.0.0.json +++ b/invenio_rdm_records/records/mappings/v7/rdmrecords/drafts/draft-v6.0.0.json @@ -1238,6 +1238,16 @@ "props": { "type": "object", "dynamic": "true" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "keyword" + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/v7/rdmrecords/records/record-v6.0.0.json b/invenio_rdm_records/records/mappings/v7/rdmrecords/records/record-v6.0.0.json index 941fca4e4..9330c674c 100644 --- a/invenio_rdm_records/records/mappings/v7/rdmrecords/records/record-v6.0.0.json +++ b/invenio_rdm_records/records/mappings/v7/rdmrecords/records/record-v6.0.0.json @@ -1195,6 +1195,16 @@ "props": { "type": "object", "dynamic": "true" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "keyword" + }, + "scheme": { + "type": "keyword" + } + } } } }, From 49a9770967ace31c8299a243975ca983f5403e03 Mon Sep 17 00:00:00 2001 From: Fatimah Zulfiqar Date: Thu, 5 Dec 2024 09:42:44 +0100 Subject: [PATCH 16/23] serializer: updated subjects and affiliations in dcat --- .../resources/serializers/dcat/__init__.py | 103 ++++++++++++++++++ .../resources/serializers/dcat/schema.py | 43 +++++++- .../serializers/test_dcat_serializer.py | 2 +- 3 files changed, 146 insertions(+), 2 deletions(-) diff --git a/invenio_rdm_records/resources/serializers/dcat/__init__.py b/invenio_rdm_records/resources/serializers/dcat/__init__.py index 25cc8d02f..d56d92711 100644 --- a/invenio_rdm_records/resources/serializers/dcat/__init__.py +++ b/invenio_rdm_records/resources/serializers/dcat/__init__.py @@ -12,6 +12,7 @@ from datacite import schema43 from flask_resources import BaseListSchema, MarshmallowSerializer from flask_resources.serializers import SimpleSerializer +from idutils import detect_identifier_schemes, to_url from lxml import etree as ET from pkg_resources import resource_stream from werkzeug.utils import cached_property @@ -93,6 +94,100 @@ def access_url(file): if isinstance(tag_value, dict): el.attrib.update(tag_value) + def add_missing_creatibutor_links(self, rdf_tree): + """Add missing `rdf:about` attributes to within and and within .""" + namespaces = rdf_tree.nsmap + + # Helper function to add rdf:about based on identifier + def add_rdf_about(element, identifier_elem): + identifier = identifier_elem.text.strip() + schemes = detect_identifier_schemes(identifier) + rdf_about_url = next( + ( + to_url(identifier, scheme=scheme) + for scheme in schemes + if to_url(identifier, scheme) + ), + None, + ) + if rdf_about_url: + element.set( + "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", rdf_about_url + ) + + # Process and + contributors_and_creators = rdf_tree.xpath( + "//dct:creator/rdf:Description | //dct:contributor/rdf:Description", + namespaces=namespaces, + ) + + for description in contributors_and_creators: + # Add rdf:about for creator/contributor if missing + if not description.get( + "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about" + ): + identifier_elem = description.find("dct:identifier", namespaces) + if identifier_elem is not None: + add_rdf_about(description, identifier_elem) + + # Process within at any level + organizations = rdf_tree.xpath( + "//org:memberOf//foaf:Organization[not(@rdf:about)]", + namespaces=namespaces, + ) + + for org in organizations: + org_identifier_elem = org.find("dct:identifier", namespaces) + if org_identifier_elem is not None: + add_rdf_about(org, org_identifier_elem) + + return rdf_tree + + def add_subjects_uri(self, rdf_tree, subjects): + """Add valueURI of subjects to the corresponding dct:subject elements in the RDF tree.""" + namespaces = rdf_tree.nsmap + for subject in subjects: + value_uri = subject.get("valueURI") + subject_label = subject.get("subject") + subject_scheme = subject.get("subjectScheme") + subject_props = subject.get("subjectProps", {}) + + if value_uri and subject_label and subject_scheme: + # Find the corresponding dct:subject element by prefLabel and subjectScheme + subject_element = rdf_tree.xpath( + f""" + //dct:subject[ + skos:Concept[ + skos:prefLabel[text()='{subject_label}'] + and skos:inScheme/skos:ConceptScheme/dct:title[text()='{subject_scheme}'] + ] + ] + """, + namespaces=namespaces, + )[0] + + if subject_element: + # Add the valueURI to the dct:subject element as rdf:about + subject_element.set( + "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", value_uri + ) + + # Check if + # subject has a definition in its props + definition = subject_props.get("definition") + if definition: + concept_elem = subject_element.find( + ".//skos:Concept", namespaces=namespaces + ) + if concept_elem is not None: + skos_definition = ET.Element( + "{http://www.w3.org/2004/02/skos/core#}definition" + ) + skos_definition.text = definition + concept_elem.append(skos_definition) + + return rdf_tree + def transform_with_xslt(self, dc_record, **kwargs): """Transform record with XSLT.""" dc_etree = schema43.dump_etree(dc_record) @@ -100,6 +195,14 @@ def transform_with_xslt(self, dc_record, **kwargs): dc_etree.tag = "{{{0}}}resource".format(dc_namespace) dcat_etree = self.xslt_transform_func(dc_etree).getroot() + # Add valueURI to subjects + subjects = dc_record.get("subjects", []) + if subjects: + dcat_etree = self.add_subjects_uri(dcat_etree, subjects) + + # Add the identifier links for creators & contributors if missing + dcat_etree = self.add_missing_creatibutor_links(dcat_etree) + # Inject files in results (since the XSLT can't do that by default) files_data = dc_record.get("_files", []) if files_data: diff --git a/invenio_rdm_records/resources/serializers/dcat/schema.py b/invenio_rdm_records/resources/serializers/dcat/schema.py index 0d5060128..1ac4d8c03 100644 --- a/invenio_rdm_records/resources/serializers/dcat/schema.py +++ b/invenio_rdm_records/resources/serializers/dcat/schema.py @@ -9,7 +9,7 @@ import idutils from flask import current_app -from marshmallow import fields, missing +from marshmallow import ValidationError, fields, missing, validate from marshmallow_utils.html import sanitize_unicode from invenio_rdm_records.resources.serializers.datacite import DataCite43Schema @@ -49,3 +49,44 @@ def get_files(self, obj): ) return files_list or missing + + def get_subjects(self, obj): + """Get subjects.""" + subjects = obj["metadata"].get("subjects", []) + if not subjects: + return missing + + validator = validate.URL() + serialized_subjects = [] + + for subject in subjects: + entry = {"subject": subject.get("subject")} + + id_ = subject.get("id") + if id_: + entry["subjectScheme"] = subject.get("scheme") + try: + validator(id_) + entry["valueURI"] = id_ + except ValidationError: + pass + + # Get identifiers and assign valueURI if scheme is 'url' and id_ was not a valid url + if "valueURI" not in entry: + entry["valueURI"] = next( + ( + identifier.get("identifier") + for identifier in subject.get("identifiers", []) + if identifier.get("scheme") == "url" + ), + None, + ) + + # Add props if it exists + props = subject.get("props", {}) + if props: + entry["subjectProps"] = props + + serialized_subjects.append(entry) + + return serialized_subjects if serialized_subjects else missing diff --git a/tests/resources/serializers/test_dcat_serializer.py b/tests/resources/serializers/test_dcat_serializer.py index cc6137b06..14455fadb 100644 --- a/tests/resources/serializers/test_dcat_serializer.py +++ b/tests/resources/serializers/test_dcat_serializer.py @@ -71,7 +71,7 @@ def test_dcat_serializer(running_app, full_record_to_dict): " \n" " 2018\n' - " \n" + ' \n' " \n" " Abdominal Injuries\n" " \n" From 5090c802154120265c47070ac2ae6faf8a47cf38 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Mon, 9 Dec 2024 12:20:11 +0100 Subject: [PATCH 17/23] community-records: allow scan search * Adds `scan` and `scan_params` arguments to `CommunityRecordsService.search(...)`, to allow for serving scan results (but only via the service). --- .../services/community_records/service.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/invenio_rdm_records/services/community_records/service.py b/invenio_rdm_records/services/community_records/service.py index 8ed6c4c85..c2c7fb8fc 100644 --- a/invenio_rdm_records/services/community_records/service.py +++ b/invenio_rdm_records/services/community_records/service.py @@ -45,6 +45,8 @@ def search( params=None, search_preference=None, extra_filter=None, + scan=False, + scan_params=None, **kwargs, ): """Search for records published in the given community.""" @@ -76,7 +78,12 @@ def search( permission_action="read", **kwargs, ) - search_result = search.execute() + + if scan: + scan_params = scan_params or {} + search_result = search.scan(**scan_params) + else: + search_result = search.execute() return self.result_list( self, From 2b28f50e784026a50c670da6f3a5711bf7123d13 Mon Sep 17 00:00:00 2001 From: alejandromumo Date: Fri, 6 Dec 2024 11:19:20 +0100 Subject: [PATCH 18/23] bibtex: add trailing comma in url field --- invenio_rdm_records/resources/serializers/bibtex/schema.py | 2 +- tests/resources/serializers/test_bibtex_serializer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/invenio_rdm_records/resources/serializers/bibtex/schema.py b/invenio_rdm_records/resources/serializers/bibtex/schema.py index da4372df1..e80f2aa28 100644 --- a/invenio_rdm_records/resources/serializers/bibtex/schema.py +++ b/invenio_rdm_records/resources/serializers/bibtex/schema.py @@ -269,7 +269,7 @@ def _format_output_row(self, field, value): elif field == "month": out = " {0:<12} = {1},\n".format(field, value) elif field == "url": - out = " {0:<12} = {{{1}}}\n".format(field, value) + out = " {0:<12} = {{{1}}},\n".format(field, value) else: if not isinstance(value, list) and value.isdigit(): out = " {0:<12} = {1},\n".format(field, value) diff --git a/tests/resources/serializers/test_bibtex_serializer.py b/tests/resources/serializers/test_bibtex_serializer.py index 170881fdd..5d2d54610 100644 --- a/tests/resources/serializers/test_bibtex_serializer.py +++ b/tests/resources/serializers/test_bibtex_serializer.py @@ -74,7 +74,7 @@ def test_bibtex_serializer_full_record(running_app, updated_full_record): " publisher = {InvenioRDM},\n" " version = {v1.0},\n" " doi = {10.1234/12345-abcde},\n" - " url = {https://doi.org/10.1234/12345-abcde}\n" + " url = {https://doi.org/10.1234/12345-abcde},\n" "}" ) From fa450c3142c414836e9e837c65a0e79f78177b85 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Tue, 10 Dec 2024 10:01:11 +0100 Subject: [PATCH 19/23] =?UTF-8?q?=F0=9F=93=A6=20release:=20v16.4.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 11 +++++++++++ invenio_rdm_records/__init__.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 98fa377dd..ccc161ca5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,17 @@ Changes ======= +Version v16.4.0 (released 2024-12-10) + +- bibtex: add trailing comma in url field +- community-records: allow scan search + * Adds `scan` and `scan_params` arguments to + `CommunityRecordsService.search(...)`, to allow for serving scan + results (but only via the service). +- serializer: updated subjects and affiliations in dcat +- schema: added identifiers to subjects +- serializers: add datapackage serializer (#1742) + Version v16.3.4 (released 2024-12-06) - github: return None for `NOASSERTION` license diff --git a/invenio_rdm_records/__init__.py b/invenio_rdm_records/__init__.py index ace8f9b86..e39d5f043 100644 --- a/invenio_rdm_records/__init__.py +++ b/invenio_rdm_records/__init__.py @@ -12,6 +12,6 @@ from .ext import InvenioRDMRecords -__version__ = "16.3.4" +__version__ = "16.4.0" __all__ = ("__version__", "InvenioRDMRecords") From 38bfaedcee20a6df1f5b623772a145ec2aead736 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Wed, 11 Dec 2024 09:57:54 +0100 Subject: [PATCH 20/23] mappings: add missing `identifiers` to community orgs * Adds the missing `identifiers` mapping field to community organizations. --- .../os-v1/rdmrecords/drafts/draft-v6.0.0.json | 30 +++++++++++++++++++ .../rdmrecords/records/record-v7.0.0.json | 30 +++++++++++++++++++ .../os-v2/rdmrecords/drafts/draft-v6.0.0.json | 30 +++++++++++++++++++ .../rdmrecords/records/record-v7.0.0.json | 30 +++++++++++++++++++ 4 files changed, 120 insertions(+) diff --git a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json index 2e78f22ae..0d5aa3fe3 100644 --- a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/drafts/draft-v6.0.0.json @@ -307,6 +307,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, @@ -499,6 +514,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json index ef3b4c32a..eaa526292 100644 --- a/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v1/rdmrecords/records/record-v7.0.0.json @@ -362,6 +362,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, @@ -554,6 +569,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json index a9fedc421..597a31824 100644 --- a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/drafts/draft-v6.0.0.json @@ -307,6 +307,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, @@ -499,6 +514,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, diff --git a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json index 6af64c11c..495b2071b 100644 --- a/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json +++ b/invenio_rdm_records/records/mappings/os-v2/rdmrecords/records/record-v7.0.0.json @@ -362,6 +362,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, @@ -554,6 +569,21 @@ }, "name": { "type": "text" + }, + "identifiers": { + "properties": { + "identifier": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } } } }, From 7e17fad16868ed40c2b65082246dcac77ad45fb1 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Wed, 11 Dec 2024 10:34:30 +0100 Subject: [PATCH 21/23] =?UTF-8?q?=F0=9F=93=A6=20release:=20v16.4.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 5 +++++ invenio_rdm_records/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index ccc161ca5..5b95b4c80 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,11 @@ Changes ======= +Version v16.4.1 (released 2024-12-11) + +- mappings: add missing `identifiers` to community orgs + * Adds the missing `identifiers` mapping field to community organizations + Version v16.4.0 (released 2024-12-10) - bibtex: add trailing comma in url field diff --git a/invenio_rdm_records/__init__.py b/invenio_rdm_records/__init__.py index e39d5f043..034902c72 100644 --- a/invenio_rdm_records/__init__.py +++ b/invenio_rdm_records/__init__.py @@ -12,6 +12,6 @@ from .ext import InvenioRDMRecords -__version__ = "16.4.0" +__version__ = "16.4.1" __all__ = ("__version__", "InvenioRDMRecords") From 4d5c649259fcdde84a805925abcade7378afc04a Mon Sep 17 00:00:00 2001 From: Anika Churilova Date: Thu, 5 Dec 2024 11:40:12 +0100 Subject: [PATCH 22/23] doi: handle UI for optional DOI feature * closes https://github.com/CERNDocumentServer/cds-rdm/issues/163 Co-authored-by: Zacharias Zacharodimos --- .../src/deposit/api/DepositApiClient.js | 7 +- .../controls/PublishButton/PublishButton.js | 58 +++++++-- .../deposit/fields/Identifiers/PIDField.js | 123 ++++++++++++++---- invenio_rdm_records/config.py | 1 + invenio_rdm_records/records/api.py | 32 ++++- .../resources/serializers/datacite/schema.py | 2 +- .../services/components/pids.py | 101 +++++++++++++- .../components/test_pids_component.py | 37 +++--- tests/services/test_rdm_service.py | 114 +++++++++++++++- 9 files changed, 408 insertions(+), 67 deletions(-) diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositApiClient.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositApiClient.js index 0d35c2c8d..7a443698a 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositApiClient.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/api/DepositApiClient.js @@ -103,7 +103,12 @@ export class RDMDepositApiClient extends DepositApiClient { ); return new DepositApiClientResponse(data, errors); } catch (error) { - const errorData = error.response.data; + let errorData = error.response.data; + const errors = this.recordSerializer.deserializeErrors( + error.response.data.errors || [] + ); + // this is to serialize raised error from the backend on publish + if (errors) errorData = errors; throw new DepositApiClientResponse({}, errorData); } } diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/controls/PublishButton/PublishButton.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/controls/PublishButton/PublishButton.js index 71dafb704..34518b157 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/controls/PublishButton/PublishButton.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/controls/PublishButton/PublishButton.js @@ -18,6 +18,8 @@ import { DepositFormSubmitContext, } from "../../api/DepositFormSubmitContext"; import { DRAFT_PUBLISH_STARTED } from "../../state/types"; +import { scrollTop } from "../../utils"; +import { DRAFT_PUBLISH_FAILED_WITH_VALIDATION_ERRORS } from "../../state/types"; class PublishButtonComponent extends Component { state = { isConfirmModalOpen: false }; @@ -30,14 +32,36 @@ class PublishButtonComponent extends Component { handlePublish = (event, handleSubmit, publishWithoutCommunity) => { const { setSubmitContext } = this.context; - - setSubmitContext( - publishWithoutCommunity - ? DepositFormSubmitActions.PUBLISH_WITHOUT_COMMUNITY - : DepositFormSubmitActions.PUBLISH - ); - handleSubmit(event); - this.closeConfirmModal(); + const { formik, raiseDOINeededButNotReserved, isDOIRequired } = this.props; + const noINeedOne = formik?.values?.noINeedOne; + // Check for explicit DOI reservation via the "GET DOI button" only when DOI is + // optional in the instance's settings. If it is required, backend will automatically + // mint one even if it was not explicitly reserved + const shouldCheckForExplicitDOIReservation = + isDOIRequired !== undefined && // isDOIRequired is undefined when no value was provided from Invenio-app-rdm + !isDOIRequired && + noINeedOne && + Object.keys(formik?.values?.pids).length === 0; + if (shouldCheckForExplicitDOIReservation) { + const errors = { + pids: { + doi: i18next.t("DOI is needed. Please click on the button to reserve it."), + }, + }; + formik.setErrors(errors); + raiseDOINeededButNotReserved(formik?.values, errors); + this.closeConfirmModal(); + } else { + setSubmitContext( + publishWithoutCommunity + ? DepositFormSubmitActions.PUBLISH_WITHOUT_COMMUNITY + : DepositFormSubmitActions.PUBLISH + ); + handleSubmit(event); + this.closeConfirmModal(); + } + // scroll top to show the global error + scrollTop(); }; isDisabled = (values, isSubmitting, filesState) => { @@ -67,6 +91,7 @@ class PublishButtonComponent extends Component { publishWithoutCommunity, formik, publishModalExtraContent, + raiseDOINeededButNotReserved, ...ui } = this.props; const { isConfirmModalOpen } = this.state; @@ -139,6 +164,8 @@ PublishButtonComponent.propTypes = { formik: PropTypes.object.isRequired, publishModalExtraContent: PropTypes.string, filesState: PropTypes.object, + raiseDOINeededButNotReserved: PropTypes.func.isRequired, + isDOIRequired: PropTypes.bool, }; PublishButtonComponent.defaultProps = { @@ -147,15 +174,22 @@ PublishButtonComponent.defaultProps = { actionState: undefined, publishModalExtraContent: undefined, filesState: undefined, + isDOIRequired: undefined, }; const mapStateToProps = (state) => ({ actionState: state.deposit.actionState, publishModalExtraContent: state.deposit.config.publish_modal_extra, filesState: state.files, + isDOIRequired: state.deposit.config.is_doi_required, }); -export const PublishButton = connect( - mapStateToProps, - null -)(connectFormik(PublishButtonComponent)); +export const PublishButton = connect(mapStateToProps, (dispatch) => { + return { + raiseDOINeededButNotReserved: (data, errors) => + dispatch({ + type: DRAFT_PUBLISH_FAILED_WITH_VALIDATION_ERRORS, + payload: { data: data, errors: errors }, + }), + }; +})(connectFormik(PublishButtonComponent)); diff --git a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/fields/Identifiers/PIDField.js b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/fields/Identifiers/PIDField.js index bd5b00cae..e49437ef1 100644 --- a/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/fields/Identifiers/PIDField.js +++ b/invenio_rdm_records/assets/semantic-ui/js/invenio_rdm_records/src/deposit/fields/Identifiers/PIDField.js @@ -33,7 +33,7 @@ const getFieldErrors = (form, fieldPath) => { */ class ReservePIDBtn extends Component { render() { - const { disabled, handleReservePID, label, loading } = this.props; + const { disabled, handleReservePID, label, loading, fieldError } = this.props; return ( {({ form: formik }) => ( @@ -44,6 +44,7 @@ class ReservePIDBtn extends Component { disabled={disabled || loading} onClick={(e) => handleReservePID(e, formik)} content={label} + error={fieldError} /> )} @@ -54,6 +55,7 @@ class ReservePIDBtn extends Component { ReservePIDBtn.propTypes = { disabled: PropTypes.bool, handleReservePID: PropTypes.func.isRequired, + fieldError: PropTypes.object, label: PropTypes.string.isRequired, loading: PropTypes.bool, }; @@ -61,6 +63,7 @@ ReservePIDBtn.propTypes = { ReservePIDBtn.defaultProps = { disabled: false, loading: false, + fieldError: null, }; /** @@ -110,11 +113,13 @@ class ManagedUnmanagedSwitch extends Component { handleChange = (e, { value }) => { const { onManagedUnmanagedChange } = this.props; const isManagedSelected = value === "managed"; - onManagedUnmanagedChange(isManagedSelected); + const isNoNeedSelected = value === "notneeded"; + onManagedUnmanagedChange(isManagedSelected, isNoNeedSelected); }; render() { - const { disabled, isManagedSelected, pidLabel } = this.props; + const { disabled, isManagedSelected, isNoNeedSelected, pidLabel, required } = + this.props; return ( @@ -123,28 +128,41 @@ class ManagedUnmanagedSwitch extends Component { pidLabel: pidLabel, })} - + - + + {!required && ( + + + + )} ); } @@ -153,8 +171,10 @@ class ManagedUnmanagedSwitch extends Component { ManagedUnmanagedSwitch.propTypes = { disabled: PropTypes.bool, isManagedSelected: PropTypes.bool.isRequired, + isNoNeedSelected: PropTypes.bool.isRequired, onManagedUnmanagedChange: PropTypes.func.isRequired, pidLabel: PropTypes.string, + required: PropTypes.bool.isRequired, }; ManagedUnmanagedSwitch.defaultProps = { @@ -198,6 +218,8 @@ class ManagedIdentifierComponent extends Component { identifier, pidPlaceholder, pidType, + form, + fieldPath, } = this.props; const hasIdentifier = identifier !== ""; @@ -209,6 +231,7 @@ class ManagedIdentifierComponent extends Component { actionState === RESERVE_PID_STARTED && actionStateExtra.pidType === pidType } handleReservePID={this.handleReservePID} + fieldError={getFieldErrors(form, fieldPath)} /> ); @@ -253,6 +276,8 @@ ManagedIdentifierComponent.propTypes = { btnLabelDiscardPID: PropTypes.string.isRequired, pidPlaceholder: PropTypes.string.isRequired, pidType: PropTypes.string.isRequired, + form: PropTypes.object.isRequired, + fieldPath: PropTypes.string.isRequired, /* from Redux */ actionState: PropTypes.string, actionStateExtra: PropTypes.object, @@ -307,7 +332,7 @@ class UnmanagedIdentifierCmp extends Component { render() { const { localIdentifier } = this.state; - const { form, fieldPath, helpText, pidPlaceholder } = this.props; + const { form, fieldPath, helpText, pidPlaceholder, disabled } = this.props; const fieldError = getFieldErrors(form, fieldPath); return ( <> @@ -318,6 +343,7 @@ class UnmanagedIdentifierCmp extends Component { placeholder={pidPlaceholder} width={16} error={fieldError} + disabled={disabled} /> {helpText && } @@ -333,10 +359,12 @@ UnmanagedIdentifierCmp.propTypes = { identifier: PropTypes.string.isRequired, onIdentifierChanged: PropTypes.func.isRequired, pidPlaceholder: PropTypes.string.isRequired, + disabled: PropTypes.bool, }; UnmanagedIdentifierCmp.defaultProps = { helpText: null, + disabled: false, }; /** @@ -349,11 +377,18 @@ class CustomPIDField extends Component { constructor(props) { super(props); - const { canBeManaged, canBeUnmanaged } = this.props; + const { canBeManaged, canBeUnmanaged, record, field } = this.props; this.canBeManagedAndUnmanaged = canBeManaged && canBeUnmanaged; + const value = field?.value; + const isInternalProvider = value?.provider !== PROVIDER_EXTERNAL; + const isDraft = record?.is_draft === true; + const hasIdentifier = value?.identifier; + const isManagedSelected = + isDraft && hasIdentifier && isInternalProvider ? true : undefined; this.state = { - isManagedSelected: undefined, + isManagedSelected: isManagedSelected, + isNoNeedSelected: undefined, }; } @@ -373,7 +408,7 @@ class CustomPIDField extends Component { }; render() { - const { isManagedSelected } = this.state; + const { isManagedSelected, isNoNeedSelected } = this.state; const { btnLabelDiscardPID, btnLabelGetPID, @@ -394,6 +429,8 @@ class CustomPIDField extends Component { record, } = this.props; + let { doiDefaultSelection } = this.props; + const value = field.value || {}; const currentIdentifier = value.identifier || ""; const currentProvider = value.provider || ""; @@ -407,19 +444,43 @@ class CustomPIDField extends Component { } const hasManagedIdentifier = managedIdentifier !== ""; + const hasUnmanagedIdentifier = unmanagedIdentifier !== ""; + const doi = record?.pids?.doi?.identifier || ""; + const parentDoi = record.parent?.pids?.doi?.identifier || ""; + + const hasDoi = doi !== ""; + const hasParentDoi = parentDoi !== ""; + const isDoiCreated = currentIdentifier !== ""; + const isDraft = record.is_draft; + + const _isUnmanagedSelected = + isManagedSelected === undefined + ? hasUnmanagedIdentifier || + (currentIdentifier === "" && doiDefaultSelection === "yes") + : !isManagedSelected; const _isManagedSelected = isManagedSelected === undefined - ? hasManagedIdentifier || currentProvider === "" // i.e pids: {} + ? hasManagedIdentifier || + (currentIdentifier === "" && doiDefaultSelection === "no") // i.e pids: {} : isManagedSelected; - const doi = record?.pids?.doi?.identifier || ""; - const hasDoi = doi !== ""; - const isDoiCreated = currentIdentifier !== ""; + const _isNoNeedSelected = + isNoNeedSelected === undefined + ? (!_isManagedSelected && !_isUnmanagedSelected) || + (isDraft !== true && + currentIdentifier === "" && + doiDefaultSelection === "not_needed") + : isNoNeedSelected; + const fieldError = getFieldErrors(form, fieldPath); + return ( <> - + @@ -427,20 +488,32 @@ class CustomPIDField extends Component { { + isNoNeedSelected={_isNoNeedSelected} + onManagedUnmanagedChange={(userSelectedManaged, userSelectedNoNeed) => { if (userSelectedManaged) { form.setFieldValue("pids", {}); + if (!required) { + // We set the + form.setFieldValue("noINeedOne", true); + } + } else if (userSelectedNoNeed) { + form.setFieldValue("pids", {}); + form.setFieldValue("noINeedOne", false); } else { this.onExternalIdentifierChanged(""); + form.setFieldValue("noINeedOne", false); } + form.setFieldError(fieldPath, false); this.setState({ isManagedSelected: userSelectedManaged, + isNoNeedSelected: userSelectedNoNeed, }); }} pidLabel={pidLabel} + required={required} /> )} @@ -450,6 +523,7 @@ class CustomPIDField extends Component { btnLabelDiscardPID={btnLabelDiscardPID} btnLabelGetPID={btnLabelGetPID} form={form} + fieldPath={fieldPath} identifier={managedIdentifier} helpText={managedHelpText} pidPlaceholder={pidPlaceholder} @@ -458,7 +532,7 @@ class CustomPIDField extends Component { /> )} - {canBeUnmanaged && !_isManagedSelected && ( + {canBeUnmanaged && (!_isManagedSelected || _isNoNeedSelected) && ( { @@ -468,6 +542,7 @@ class CustomPIDField extends Component { fieldPath={fieldPath} pidPlaceholder={pidPlaceholder} helpText={unmanagedHelpText} + disabled={_isNoNeedSelected || isEditingPublishedRecord} /> )} @@ -493,6 +568,7 @@ CustomPIDField.propTypes = { required: PropTypes.bool.isRequired, unmanagedHelpText: PropTypes.string, record: PropTypes.object.isRequired, + doiDefaultSelection: PropTypes.object.isRequired, }; CustomPIDField.defaultProps = { @@ -542,6 +618,7 @@ PIDField.propTypes = { required: PropTypes.bool, unmanagedHelpText: PropTypes.string, record: PropTypes.object.isRequired, + doiDefaultSelection: PropTypes.object.isRequired, }; PIDField.defaultProps = { diff --git a/invenio_rdm_records/config.py b/invenio_rdm_records/config.py index 9ea88dddd..fdb96df32 100644 --- a/invenio_rdm_records/config.py +++ b/invenio_rdm_records/config.py @@ -391,6 +391,7 @@ def always_valid(identifier): "validator": idutils.is_doi, "normalizer": idutils.normalize_doi, "is_enabled": providers.DataCitePIDProvider.is_enabled, + "ui": {"default_selected": "yes"}, # "yes", "no" or "not_needed" }, "oai": { "providers": ["oai"], diff --git a/invenio_rdm_records/records/api.py b/invenio_rdm_records/records/api.py index 83180c770..7646105d1 100644 --- a/invenio_rdm_records/records/api.py +++ b/invenio_rdm_records/records/api.py @@ -536,10 +536,40 @@ def get_latest_published_by_parent(cls, parent): published yet or all versions are deleted. """ latest_record = cls.get_latest_by_parent(parent) - if latest_record.deletion_status != RecordDeletionStatusEnum.PUBLISHED.value: + if ( + latest_record + and latest_record.deletion_status + != RecordDeletionStatusEnum.PUBLISHED.value + ): return None return latest_record + @classmethod + def get_previous_published_by_parent(cls, parent): + """Get the previous of latest published record for the specified parent record. + + It might return None if there is no latest published version i.e not + published yet or all versions are deleted or there is only one published record. + + This method is needed instead of `get_latest_published_by_parent` because during + publish the version state is updated before the record is actually published. + That means, that `get_latest_published_by_parent` returns always the record that + is about to be published and thus, we cannot use it in the `component.publish()` + method to retrieve the actual last published record. + + Check `services.components.pids.PIDsComponent.publish()` for how it is used. + """ + # We need no_autoflush because the record.versions access triggers automatically + # one + with db.session.no_autoflush: + records = cls.get_records_by_parent(parent) + for record in records: + latest_version_index = record.versions.latest_index + if latest_version_index > 1: + if record.versions.index == latest_version_index - 1: + return record + return None + RDMFileRecord.record_cls = RDMRecord diff --git a/invenio_rdm_records/resources/serializers/datacite/schema.py b/invenio_rdm_records/resources/serializers/datacite/schema.py index 27b65b4e7..ee9889d26 100644 --- a/invenio_rdm_records/resources/serializers/datacite/schema.py +++ b/invenio_rdm_records/resources/serializers/datacite/schema.py @@ -417,7 +417,7 @@ def get_related_identifiers(self, obj): params={"_source_includes": "pids.doi"}, ) for version in record_versions: - version_doi = version["pids"]["doi"] + version_doi = version.get("pids", {}).get("doi") id_scheme = get_scheme_datacite( "doi", "RDM_RECORDS_IDENTIFIERS_SCHEMES", diff --git a/invenio_rdm_records/services/components/pids.py b/invenio_rdm_records/services/components/pids.py index 8a08a67b2..20d50fed7 100644 --- a/invenio_rdm_records/services/components/pids.py +++ b/invenio_rdm_records/services/components/pids.py @@ -15,14 +15,76 @@ from flask import current_app from invenio_drafts_resources.services.records.components import ServiceComponent from invenio_drafts_resources.services.records.uow import ParentRecordCommitOp +from invenio_i18n import lazy_gettext as _ from invenio_records_resources.services.uow import TaskOp +from ..errors import ValidationErrorWithMessageAsList from ..pids.tasks import register_or_update_pid class PIDsComponent(ServiceComponent): """Service component for PIDs.""" + ALLOWED_DOI_PROVIDERS_TRANSITIONS = { + "datacite": { + "allowed_providers": ["datacite"], + "message": _( + "A previous version used a DOI registered from {sitename}. This version must also use a DOI from {sitename}." + ), + }, + "external": { + "allowed_providers": ["external", "not_needed"], + "message": _( + "A previous version was published with a DOI from an external provider or without one. You cannot use a DOI registered from {sitename} for this version." + ), + }, + "not_needed": { + "allowed_providers": ["external", "not_needed"], + "message": _( + "A previous version was published with a DOI from an external provider or without one. You cannot use a DOI registered from {sitename} for this version." + ), + }, + } + + def _validate_doi_transition( + self, new_provider, previous_published_provider, errors=None + ): + """If DOI is not required then we validate allowed DOI providers. + + Each new version that is published must follow the ALLOWED_DOI_PROVIDERS_TRANSITIONS. + """ + sitename = current_app.config.get("THEME_SITENAME", "this repository") + sitename = current_app.config.get("THEME_SITENAME", "this repository") + + valid_transitions = self.ALLOWED_DOI_PROVIDERS_TRANSITIONS.get( + previous_published_provider, {} + ) + if new_provider not in valid_transitions.get("allowed_providers", []): + error_message = { + "field": "pids.doi", + "messages": [ + valid_transitions.get("message").format(sitename=sitename) + ], + } + + if errors is not None: + errors.append(error_message) + else: + raise ValidationErrorWithMessageAsList(message=[error_message]) + + def _validate_optional_doi(self, record, previous_published, errors=None): + """Reusable method to validate optional DOI.""" + if previous_published: + previous_published_pids = previous_published.get("pids", {}) + doi_pid = [pid for pid in record.pids.values() if "doi" in record.pids] + previous_published_provider = previous_published_pids.get("doi", {}).get( + "provider", "not_needed" + ) + new_provider = "not_needed" if not doi_pid else doi_pid[0]["provider"] + self._validate_doi_transition( + new_provider, previous_published_provider, errors + ) + def create(self, identity, data=None, record=None, errors=None): """This method is called on draft creation. @@ -41,6 +103,16 @@ def update_draft(self, identity, data=None, record=None, errors=None): if "pids" in data: # there is new input data for PIDs pids_data = data["pids"] + required_schemes = set(self.service.config.pids_required) + + # if DOI is not required in an instance check validate allowed providers + # for each record version + if "doi" not in required_schemes: + previous_published = self.service.record_cls.get_latest_published_by_parent( + record.parent + ) + self._validate_optional_doi(record, previous_published, errors) + self.service.pids.pid_manager.validate(pids_data, record, errors) record.pids = pids_data @@ -75,7 +147,19 @@ def publish(self, identity, draft=None, record=None): record_schemes = set(record_pids.keys()) required_schemes = set(self.service.config.pids_required) - # Validate the draft PIDs + # if DOI is not required in an instance check validate allowed providers + # for each record version + if "doi" not in required_schemes: + # if a doi was ever minted for the parent record then we always require one + # for any version of the record that will be published + if draft.parent.get("pids", {}).get("doi"): + required_schemes.add("doi") + + previous_published = ( + self.service.record_cls.get_previous_published_by_parent(record.parent) + ) + self._validate_optional_doi(draft, previous_published) + self.service.pids.pid_manager.validate(draft_pids, draft, raise_errors=True) # Detect which PIDs on a published record that has been changed. @@ -129,12 +213,7 @@ def publish(self, identity, draft=None, record=None): def new_version(self, identity, draft=None, record=None): """A new draft should not have any pids from the previous record.""" - # This makes the draft use the same identifier as the previous - # version - if record.pids.get("doi", {}).get("provider") == "external": - draft.pids = {"doi": {"provider": "external", "identifier": ""}} - else: - draft.pids = {} + draft.pids = {} def edit(self, identity, draft=None, record=None): """Add current pids from the record to the draft. @@ -172,6 +251,14 @@ def publish(self, identity, draft=None, record=None): current_schemes = set(current_pids.keys()) required_schemes = set(self.service.config.parent_pids_required) + # Check if a doi was added in the draft and create a parent DOI independently if + # doi is required. + # Note: we don't have to check explicitely to the parent DOI creation only for + # datacite provider because we pass a `condition_func` below that it omits the + # minting if the pid selected is external + if draft.get("pids", {}).get("doi"): + required_schemes.add("doi") + conditional_schemes = self.service.config.parent_pids_conditional for scheme in set(required_schemes): condition_func = conditional_schemes.get(scheme) diff --git a/tests/services/components/test_pids_component.py b/tests/services/components/test_pids_component.py index cbd3dc841..dc102a6f0 100644 --- a/tests/services/components/test_pids_component.py +++ b/tests/services/components/test_pids_component.py @@ -123,12 +123,11 @@ class TestServiceConfigRequiredExternalPID(RDMRecordServiceConfig): @pytest.fixture(scope="module") -def no_pids_cmp(): +def no_pids_cmp(app): + service_config = TestServiceConfigNoPIDs.build(app) service = RDMRecordService( - config=TestServiceConfigNoPIDs, - pids_service=PIDsService( - config=TestServiceConfigNoPIDs, manager_cls=PIDManager - ), + config=service_config, + pids_service=PIDsService(config=service_config, manager_cls=PIDManager), ) c = PIDsComponent(service=service) c.uow = UnitOfWork() @@ -136,12 +135,11 @@ def no_pids_cmp(): @pytest.fixture(scope="module") -def no_required_pids_service(): +def no_required_pids_service(app): + service_config = TestServiceConfigNoRequiredPIDs.build(app) return RDMRecordService( - config=TestServiceConfigNoRequiredPIDs, - pids_service=PIDsService( - config=TestServiceConfigNoRequiredPIDs, manager_cls=PIDManager - ), + config=service_config, + pids_service=PIDsService(config=service_config, manager_cls=PIDManager), ) @@ -153,12 +151,11 @@ def no_required_pids_cmp(no_required_pids_service): @pytest.fixture(scope="module") -def required_managed_pids_cmp(): +def required_managed_pids_cmp(app): + service_config = TestServiceConfigRequiredManagedPID.build(app) service = RDMRecordService( - config=TestServiceConfigRequiredManagedPID, - pids_service=PIDsService( - config=TestServiceConfigRequiredManagedPID, manager_cls=PIDManager - ), + config=service_config, + pids_service=PIDsService(config=service_config, manager_cls=PIDManager), ) c = PIDsComponent(service=service) c.uow = UnitOfWork() @@ -166,12 +163,11 @@ def required_managed_pids_cmp(): @pytest.fixture(scope="module") -def required_external_pids_cmp(): +def required_external_pids_cmp(app): + service_config = TestServiceConfigRequiredExternalPID.build(app) service = RDMRecordService( - config=TestServiceConfigRequiredExternalPID, - pids_service=PIDsService( - config=TestServiceConfigRequiredExternalPID, manager_cls=PIDManager - ), + config=service_config, + pids_service=PIDsService(config=service_config, manager_cls=PIDManager), ) c = PIDsComponent(service=service) c.uow = UnitOfWork() @@ -318,6 +314,7 @@ def test_publish_no_pids(no_pids_cmp, minimal_record, identity_simple, location) ], ) def test_publish_no_required_pids( + app, pids, no_required_pids_service, no_required_pids_cmp, diff --git a/tests/services/test_rdm_service.py b/tests/services/test_rdm_service.py index ae1158d99..daf7aedb2 100644 --- a/tests/services/test_rdm_service.py +++ b/tests/services/test_rdm_service.py @@ -10,10 +10,15 @@ """Service level tests for Invenio RDM Records.""" +from copy import deepcopy + import pytest from invenio_rdm_records.proxies import current_rdm_records -from invenio_rdm_records.services.errors import EmbargoNotLiftedError +from invenio_rdm_records.services.errors import ( + EmbargoNotLiftedError, + ValidationErrorWithMessageAsList, +) def test_minimal_draft_creation(running_app, search_clear, minimal_record): @@ -50,7 +55,7 @@ def test_draft_w_languages_creation(running_app, search_clear, minimal_record): def test_publish_public_record_with_default_doi( - running_app, search_clear, minimal_record + running_app, search_clear, minimal_record, uploader ): superuser_identity = running_app.superuser_identity service = current_rdm_records.records_service @@ -68,10 +73,115 @@ def test_publish_public_record_with_optional_doi( draft = service.create(superuser_identity, minimal_record) record = service.publish(id_=draft.id, identity=superuser_identity) assert "doi" not in record._record.pids + assert "doi" not in record._record.parent.pids + # Reset the running_app config for next tests + running_app.app.config["RDM_PERSISTENT_IDENTIFIERS"]["doi"]["required"] = True + + +def test_publish_public_record_versions_no_or_external_doi_managed_doi( + running_app, search_clear, minimal_record, verified_user +): + running_app.app.config["RDM_PERSISTENT_IDENTIFIERS"]["doi"]["required"] = False + verified_user_identity = verified_user.identity + service = current_rdm_records.records_service + # Publish without DOI + draft = service.create(verified_user_identity, minimal_record) + record = service.publish(id_=draft.id, identity=verified_user_identity) + assert "doi" not in record._record.pids + assert "doi" not in record._record.parent.pids + + # create a new version with an external DOI + draft = service.new_version(verified_user_identity, record.id) + draft_data = deepcopy(draft.data) + draft_data["metadata"]["publication_date"] = "2023-01-01" + draft_data["pids"]["doi"] = { + "identifier": "10.4321/test.1234", + "provider": "external", + } + draft = service.update_draft(verified_user_identity, draft.id, data=draft_data) + record = service.publish(id_=draft.id, identity=verified_user_identity) + assert "doi" in record._record.pids + assert "doi" not in record._record.parent.pids + + # create a new version and and try to mint a managed DOI now when you publish + draft = service.new_version(verified_user_identity, record.id) + draft = service.pids.create(verified_user_identity, draft.id, "doi") + draft_data = deepcopy(draft.data) + draft_data["metadata"]["publication_date"] = "2023-01-01" + draft = service.update_draft(verified_user_identity, draft.id, data=draft_data) + + with pytest.raises(ValidationErrorWithMessageAsList): + record = service.publish(id_=draft.id, identity=verified_user_identity) + + # Reset the running_app config for next tests + running_app.app.config["RDM_PERSISTENT_IDENTIFIERS"]["doi"]["required"] = True + + +def test_publish_public_record_versions_managed_doi_external_doi( + running_app, search_clear, minimal_record, verified_user +): + running_app.app.config["RDM_PERSISTENT_IDENTIFIERS"]["doi"]["required"] = False + verified_user_identity = verified_user.identity + service = current_rdm_records.records_service + # Publish with locally managed DOI + draft = service.create(verified_user_identity, minimal_record) + draft = service.pids.create(verified_user_identity, draft.id, "doi") + record = service.publish(id_=draft.id, identity=verified_user_identity) + assert "doi" in record._record.pids + assert "doi" in record._record.parent.pids + + # create a new version with an external DOI + draft = service.new_version(verified_user_identity, record.id) + draft_data = deepcopy(draft.data) + draft_data["metadata"]["publication_date"] = "2023-01-01" + draft_data["pids"]["doi"] = { + "identifier": "10.4321/test.1234", + "provider": "external", + } + draft = service.update_draft(verified_user_identity, draft.id, data=draft_data) + with pytest.raises(ValidationErrorWithMessageAsList): + record = service.publish(id_=draft.id, identity=verified_user_identity) + # Reset the running_app config for next tests running_app.app.config["RDM_PERSISTENT_IDENTIFIERS"]["doi"]["required"] = True +def test_publish_public_record_versions_managed_doi_no_doi( + running_app, search_clear, minimal_record, verified_user +): + running_app.app.config["RDM_PERSISTENT_IDENTIFIERS"]["doi"]["required"] = False + verified_user_identity = verified_user.identity + service = current_rdm_records.records_service + # Publish with locally managed DOI + draft = service.create(verified_user_identity, minimal_record) + draft = service.pids.create(verified_user_identity, draft.id, "doi") + record = service.publish(id_=draft.id, identity=verified_user_identity) + assert "doi" in record._record.pids + assert "doi" in record._record.parent.pids + + # create a new version with no DOI + draft = service.new_version(verified_user_identity, record.id) + draft_data = deepcopy(draft.data) + draft_data["metadata"]["publication_date"] = "2023-01-01" + draft_data["pids"] = {} + draft = service.update_draft(verified_user_identity, draft.id, data=draft_data) + with pytest.raises(ValidationErrorWithMessageAsList): + record = service.publish(id_=draft.id, identity=verified_user_identity) + + # Reset the running_app config for next tests + running_app.app.config["RDM_PERSISTENT_IDENTIFIERS"]["doi"]["required"] = True + + +def test_publish_public_record_with_default_doi( + running_app, search_clear, minimal_record, uploader +): + superuser_identity = running_app.superuser_identity + service = current_rdm_records.records_service + draft = service.create(superuser_identity, minimal_record) + record = service.publish(id_=draft.id, identity=superuser_identity) + assert "doi" in record._record.pids + + def test_publish_restricted_record_without_default_doi( running_app, search_clear, minimal_restricted_record ): From 938b7cf588d986c8e15de10d5d81baae17e2fddc Mon Sep 17 00:00:00 2001 From: Zacharias Zacharodimos Date: Mon, 16 Dec 2024 09:05:22 +0100 Subject: [PATCH 23/23] release: v16.5.0 --- CHANGES.rst | 4 ++++ invenio_rdm_records/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5b95b4c80..8355e27d6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,10 @@ Changes ======= +Version v16.5.0 (released 2024-12-16) + +- pids: add support for optional DOI + Version v16.4.1 (released 2024-12-11) - mappings: add missing `identifiers` to community orgs diff --git a/invenio_rdm_records/__init__.py b/invenio_rdm_records/__init__.py index 034902c72..0531060a2 100644 --- a/invenio_rdm_records/__init__.py +++ b/invenio_rdm_records/__init__.py @@ -12,6 +12,6 @@ from .ext import InvenioRDMRecords -__version__ = "16.4.1" +__version__ = "16.5.0" __all__ = ("__version__", "InvenioRDMRecords")