Skip to content

Commit

Permalink
marc21: fix rule for external identifiers (035)
Browse files Browse the repository at this point in the history
* Adds new lists for external identifiers in order to determine
  which values should be allowed and which to be ignored.
  (closes CERNDocumentServer#167)

Signed-off-by: Ludmila Marian <[email protected]>
  • Loading branch information
ludmilamarian committed Oct 29, 2018
1 parent 1470947 commit d97d8b7
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 25 deletions.
21 changes: 13 additions & 8 deletions cds_dojson/marc21/fields/books/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,17 @@
from cds_dojson.marc21.fields.books.errors import UnexpectedValue, \
MissingRequiredField
from cds_dojson.marc21.fields.books.values_mapping import mapping, \
DOCUMENT_TYPE, AUTHOR_ROLE, COLLECTION, ACQUISITION_METHOD, MEDIUM_TYPES, \
ARXIV_CATEGORIES, MATERIALS, SUBJECT_CLASSIFICATION_EXCEPTIONS
DOCUMENT_TYPE, AUTHOR_ROLE, COLLECTION, ACQUISITION_METHOD, \
EXTERNAL_SYSTEM_IDENTIFIERS, EXTERNAL_SYSTEM_IDENTIFIERS_TO_IGNORE, \
MEDIUM_TYPES, ARXIV_CATEGORIES, MATERIALS, \
SUBJECT_CLASSIFICATION_EXCEPTIONS
from cds_dojson.marc21.fields.utils import clean_email, filter_list_values, \
out_strip, clean_val, \
ManualMigrationRequired, replace_in_result, related_url, clean_pages_range, \
clean_str

from cds_dojson.marc21.fields.utils import get_week_start
from ...models.books.book import model
from cds_dojson.marc21.models.books.book import model


@model.over('acquisition_source', '(^916__)|(^859__)|(^595__)')
Expand Down Expand Up @@ -363,15 +365,18 @@ def external_system_identifiers(self, key, value):
raise UnexpectedValue
if key == '035__':
sub_9 = clean_val('9', value, str, req=True)
if 'inspire-cnum' == sub_9.lower() or 'inspirecnum' == sub_9.lower():
# TODO check this
self['inspire_cnum'] = sub_a
if sub_9.upper() == 'INSPIRE-CNUM':
_conference_info = self.get('conference_info', [{}])
_conference_info[0].update({'inspire_cnum': sub_a})
self['conference_info'] = _conference_info
raise IgnoreKey('external_system_identifiers')
elif 'CERCER' not in sub_9:
elif sub_9.upper() in EXTERNAL_SYSTEM_IDENTIFIERS:
system_id.update({'value': sub_a,
'schema': sub_9})
else:
elif sub_9.upper() in EXTERNAL_SYSTEM_IDENTIFIERS_TO_IGNORE:
raise IgnoreKey('external_system_identifiers')
else:
raise UnexpectedValue
if key == '036__':
system_id.update({'value': sub_a,
'schema': clean_val('9', value, str, req=True),
Expand Down
28 changes: 28 additions & 0 deletions cds_dojson/marc21/fields/books/values_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,34 @@
SUBJECT_CLASSIFICATION_EXCEPTIONS = \
['PACS', 'CERN LIBRARY', 'CERN YELLOW REPORT']

EXTERNAL_SYSTEM_IDENTIFIERS = [
'ARXIV',
'DCL',
'DESY',
'DOE',
'EBL',
'FIZ',
'HAL',
'IEECONF',
'INDICO.CERN.CH',
'INIS',
'INSPIRE',
'KEK',
'LHCLHC',
'SAFARI',
'SCEM',
'UDCCERN',
'WAI01',
]

EXTERNAL_SYSTEM_IDENTIFIERS_TO_IGNORE = [
'CERN ANNUAL REPORT',
'HTTP://INSPIREHEP.NET/OAI2D',
'SLAC',
'SLACCONF',
'SPIRES',
]


def mapping(field_map, val, raise_exception=False):
"""
Expand Down
19 changes: 19 additions & 0 deletions cds_dojson/schemas/records/books/ymls/base-v0.0.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,25 @@ properties:
Identifies the external system, and allows to interpret
unambiguously the :ref:`value`.
:example: ``ADS``
enum:
- ARXIV
- DCL
- DESY
- DOE
- EBL
- FIZ
- HAL
- IEECONF
- INDICO.CERN.CH
- INIS
- INSPIRE
- INSPIRE-CNUM
- KEK
- LHCLHC
- SAFARI
- SCEM
- UDCCERN
- WAI01
minLength: 1
pattern: ^\w+$
type: string
Expand Down
44 changes: 27 additions & 17 deletions tests/test_books.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,34 +1006,44 @@ def test_external_system_identifiers(app):
<subfield code="a">2365039</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="9">Random</subfield>
<subfield code="9">Inspire</subfield>
<subfield code="a">2365039</subfield>
</datafield>
""", {
'inspire_cnum': '2365039',
'external_system_identifiers': [{
'schema': 'Random',
'value': '2365039',
'conference_info': [{
'inspire_cnum': '2365039',
}],
})

check_transformation(
"""
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="9">Random</subfield>
<subfield code="a">2365039</subfield>
</datafield>
""", {
'external_system_identifiers': [{
'schema': 'Random',
'schema': 'Inspire',
'value': '2365039',
}],
})

with pytest.raises(UnexpectedValue):
check_transformation(
"""
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="9">Random</subfield>
<subfield code="a">2365039</subfield>
</datafield>
""", {
})

with pytest.raises(UnexpectedValue):
check_transformation(
"""
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="9">CERCER</subfield>
<subfield code="a">2365039</subfield>
</datafield>
""", {
})

check_transformation(
"""
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="9">CERCER</subfield>
<subfield code="a">2365039</subfield>
<subfield code="9">SLAC</subfield>
<subfield code="a">5231528</subfield>
</datafield>
""", {
})
Expand Down

0 comments on commit d97d8b7

Please sign in to comment.