Skip to content

Commit

Permalink
APS: duplicated affids removal
Browse files Browse the repository at this point in the history
  • Loading branch information
ErnestaP committed Dec 7, 2023
1 parent 804b21d commit c41f697
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 16 deletions.
2 changes: 1 addition & 1 deletion hepcrawl/extractors/aps_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _get_authors_and_collab(self, article, dois):
author_affiliations = []
if 'affiliations' in article and 'affiliationIds' in author:
affiliations = build_dict(article['affiliations'], 'id')
for aff_id in author['affiliationIds']:
for aff_id in set(author['affiliationIds']):
if aff_id in affiliations:
author_affiliations.append({'value': affiliations[aff_id]['name']})

Expand Down
1 change: 1 addition & 0 deletions tests/responses/aps/aps_single_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
{
"surname":"Alemi",
"affiliationIds":[
"a1",
"a1"
],
"type":"Person",
Expand Down
40 changes: 25 additions & 15 deletions tests/test_aps.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def test_abstract(results):

def test_title(results):
"""Test extracting title."""
titles = ("You can run, you can hide: The epidemiology and statistical mechanics of zombies",)
titles = (
"You can run, you can hide: The epidemiology and statistical mechanics of zombies",)
for title, record in zip(titles, results):
assert 'title' in record
assert record['title'] == title
Expand Down Expand Up @@ -151,20 +152,29 @@ def test_publication_info(results):

def test_authors(results):
"""Test authors."""
expected_results = (
dict(
affiliation='Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA',
author_full_names=['Alemi, Alexander A.', 'Bierbaum, Matthew', 'Myers, Christopher R.', 'Sethna, James P.']
),
)
for expected, record in zip(expected_results, results):
assert 'authors' in record
assert len(record['authors']) == len(expected['author_full_names'])

record_full_names = [author['full_name'] for author in record['authors']]
assert set(expected['author_full_names']) == set(record_full_names) # assert that we have the same list of authors
for author in record['authors']:
assert author['affiliations'][0]['value'] == expected['affiliation']
expected_results = [{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Alemi, Alexander A.',
'given_names': u'Alexander A.',
'raw_name': u'Alexander A. Alemi',
'surname': u'Alemi'},
{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Bierbaum, Matthew',
'given_names': u'Matthew',
'raw_name': u'Matthew Bierbaum',
'surname': u'Bierbaum'},
{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'},
{'value': u'Institute of Biotechnology, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Myers, Christopher R.',
'given_names': u'Christopher R.',
'raw_name': u'Christopher R. Myers',
'surname': u'Myers'},
{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Sethna, James P.',
'given_names': u'James P.',
'raw_name': u'James P. Sethna',
'surname': u'Sethna'}]

assert results[0]['authors'] == expected_results


def test_copyrights(results):
Expand Down

0 comments on commit c41f697

Please sign in to comment.