Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APS: duplicated affids removal #116

Merged
merged 1 commit into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hepcrawl/extractors/aps_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _get_authors_and_collab(self, article, dois):
author_affiliations = []
if 'affiliations' in article and 'affiliationIds' in author:
affiliations = build_dict(article['affiliations'], 'id')
for aff_id in author['affiliationIds']:
for aff_id in set(author['affiliationIds']):
if aff_id in affiliations:
author_affiliations.append({'value': affiliations[aff_id]['name']})

Expand Down
1 change: 1 addition & 0 deletions tests/responses/aps/aps_single_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
{
"surname":"Alemi",
"affiliationIds":[
"a1",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, at the end we are not testing this anywhere, right? So no need to change it

Copy link
Contributor Author

@ErnestaP ErnestaP Dec 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are testing it, we see that parsed author does not have repetitive affiliations

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but where do we assert it?

"a1"
],
"type":"Person",
Expand Down
40 changes: 25 additions & 15 deletions tests/test_aps.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def test_abstract(results):

def test_title(results):
"""Test extracting title."""
titles = ("You can run, you can hide: The epidemiology and statistical mechanics of zombies",)
titles = (
"You can run, you can hide: The epidemiology and statistical mechanics of zombies",)
for title, record in zip(titles, results):
assert 'title' in record
assert record['title'] == title
Expand Down Expand Up @@ -151,20 +152,29 @@ def test_publication_info(results):

def test_authors(results):
"""Test authors."""
expected_results = (
dict(
affiliation='Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA',
author_full_names=['Alemi, Alexander A.', 'Bierbaum, Matthew', 'Myers, Christopher R.', 'Sethna, James P.']
),
)
for expected, record in zip(expected_results, results):
assert 'authors' in record
assert len(record['authors']) == len(expected['author_full_names'])

record_full_names = [author['full_name'] for author in record['authors']]
assert set(expected['author_full_names']) == set(record_full_names) # assert that we have the same list of authors
for author in record['authors']:
assert author['affiliations'][0]['value'] == expected['affiliation']
expected_results = [{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Alemi, Alexander A.',
'given_names': u'Alexander A.',
'raw_name': u'Alexander A. Alemi',
'surname': u'Alemi'},
{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Bierbaum, Matthew',
'given_names': u'Matthew',
'raw_name': u'Matthew Bierbaum',
'surname': u'Bierbaum'},
{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'},
{'value': u'Institute of Biotechnology, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Myers, Christopher R.',
'given_names': u'Christopher R.',
'raw_name': u'Christopher R. Myers',
'surname': u'Myers'},
{'affiliations': [{'value': u'Laboratory of Atomic and Solid State Physics, Cornell University, Ithaca, New York 14853, USA'}],
'full_name': u'Sethna, James P.',
'given_names': u'James P.',
'raw_name': u'James P. Sethna',
'surname': u'Sethna'}]

assert results[0]['authors'] == expected_results


def test_copyrights(results):
Expand Down
Loading