Skip to content

Commit

Permalink
documents: export records in Dublic Core format
Browse files Browse the repository at this point in the history
This PR configures OAI server to export documents records in Dublin Core formats.

* Installs `dcxml` library to generate the output in Dublic CORE XML.
* Creates and configures the serializer for `oai_dc` format.
* Dumps `mimetype` from file object to record's file metadata.
* Adds a serializer schema for Dublic Core.
* Adds `mimetype` property to all resources that use files.
* Avoids to display issue and pages if volume is not defined in `partOf` property.
* Adds a custom serializers for dumping DC objects.
* Closes #325.

Co-Authored-by: Sébastien Délèze <[email protected]>
  • Loading branch information
Sébastien Délèze committed Nov 12, 2020
1 parent 98c75e1 commit 1a5799e
Show file tree
Hide file tree
Showing 17 changed files with 948 additions and 40 deletions.
88 changes: 56 additions & 32 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ flask-cors = ">3.0.8"
nbconvert = {version = ">=5.6.1,<6.0.0", extras = ["execute"]}
cryptography = ">=3.2"
netaddr = "*"
dcxml = "*"

[tool.poetry.dev-dependencies]
Flask-Debugtoolbar = ">=0.10.1"
Expand Down
10 changes: 10 additions & 0 deletions sonar/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,3 +665,13 @@ def _(x):
OAISERVER_ID_PREFIX = 'oai:sonar.ch:'
#: XSL file
OAISERVER_XSL_URL = 'static/xsl/oai2.xsl'
#: Export formats
OAISERVER_METADATA_FORMATS = {
'oai_dc': {
'namespace': 'http://www.openarchives.org/OAI/2.0/oai_dc/',
'schema': 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
'serializer': 'sonar.modules.documents.serializers.oaipmh_oai_dc',
}
}
#: Number of records to return per page in OAI-PMH results.
OAISERVER_PAGE_SIZE = 100
20 changes: 19 additions & 1 deletion sonar/modules/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,32 @@
from invenio_jsonschemas import current_jsonschemas
from invenio_pidstore.errors import PIDDoesNotExistError
from invenio_pidstore.models import PersistentIdentifier, PIDStatus
from invenio_records_files.api import FilesMixin, Record
from invenio_records_files.api import FileObject as InvenioFileObjet
from invenio_records_files.api import FilesMixin as InvenioFilesMixin
from invenio_records_files.api import Record
from invenio_records_files.models import RecordsBuckets
from invenio_records_rest.utils import obj_or_import_string
from invenio_search import current_search
from invenio_search.api import RecordsSearch
from sqlalchemy.orm.exc import NoResultFound


class FileObject(InvenioFileObjet):
"""Wrapper for files."""

def dumps(self):
"""Create a dump of the metadata associated to the record."""
super(FileObject, self).dumps()
self.data.update({'mimetype': self.obj.mimetype})
return self.data


class FilesMixin(InvenioFilesMixin):
"""Implement files attribute for Record models."""

file_cls = FileObject


class SonarRecord(Record, FilesMixin):
"""SONAR Record."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
"type": "string",
"minLength": 1
},
"mimetype": {
"title": "MIME type",
"type": "string",
"minLength": 1
},
"checksum": {
"title": "Checksum",
"description": "MD5 checksum of the file.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
"key": {
"type": "keyword"
},
"mimetype": {
"type": "keyword"
},
"checksum": {
"type": "keyword"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
"type": "string",
"minLength": 1
},
"mimetype": {
"title": "MIME type",
"type": "string",
"minLength": 1
},
"checksum": {
"title": "Checksum",
"description": "MD5 checksum of the file.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
"key": {
"type": "keyword"
},
"mimetype": {
"type": "keyword"
},
"checksum": {
"type": "keyword"
},
Expand Down
1 change: 1 addition & 0 deletions sonar/modules/documents/marshmallow/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class Meta:
file_id = SanitizedUnicode()
version_id = SanitizedUnicode()
key = SanitizedUnicode()
mimetype = SanitizedUnicode()
checksum = SanitizedUnicode()
size = fields.Number()
label = SanitizedUnicode()
Expand Down
9 changes: 9 additions & 0 deletions sonar/modules/documents/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from invenio_records_rest.serializers.response import record_responsify, \
search_responsify

from sonar.modules.documents.serializers.dc import SonarDublinCoreSerializer
from sonar.modules.documents.serializers.schemas.dc import DublinCoreV1
from sonar.modules.organisations.api import OrganisationRecord
from sonar.modules.serializers import JSONSerializer as _JSONSerializer
from sonar.modules.users.api import current_user_record
Expand Down Expand Up @@ -72,6 +74,8 @@ def post_process_serialize_search(self, results, pid_fetcher):
# ===========
#: JSON serializer definition.
json_v1 = JSONSerializer(DocumentSchemaV1)
#: Dublin Core serializer
dc_v1 = SonarDublinCoreSerializer(DublinCoreV1, replace_refs=True)

# Records-REST serializers
# ========================
Expand All @@ -85,3 +89,8 @@ def post_process_serialize_search(self, results, pid_fetcher):
'json_v1_response',
'json_v1_search',
)

# OAI-PMH record serializers.
# ===========================
#: OAI-PMH OAI Dublin Core record serializer.
oaipmh_oai_dc = dc_v1.serialize_oaipmh
32 changes: 32 additions & 0 deletions sonar/modules/documents/serializers/dc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Dublin Core serializer."""

from invenio_records_rest.serializers.dc import DublinCoreSerializer


class SonarDublinCoreSerializer(DublinCoreSerializer):
"""Marshmallow based DublinCore serializer for records."""

def dump(self, obj, context=None):
"""Serialize object with schema.
Mandatory to override this method, as invenio-records-rest does not
use the right way to dump objects (compatible with marshmallow 3.9).
"""
return self.schema_class(context=context).dump(obj)
Loading

0 comments on commit 1a5799e

Please sign in to comment.