From 5fce3f6bb6e97a0b5a72e54c35832d0837f0e67d Mon Sep 17 00:00:00 2001 From: Harris Tzovanakis Date: Fri, 30 Jun 2023 08:49:49 +0200 Subject: [PATCH] workflows: avoid populating countries with cern cooperation agreement (#440) * ref: cern-sis/issues-scoap3#168 --- .../modules/workflows/workflows/articles_upload.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/scoap3/modules/workflows/workflows/articles_upload.py b/scoap3/modules/workflows/workflows/articles_upload.py index abd2339c..948aa9a3 100644 --- a/scoap3/modules/workflows/workflows/articles_upload.py +++ b/scoap3/modules/workflows/workflows/articles_upload.py @@ -24,6 +24,7 @@ from __future__ import absolute_import, division, print_function +import re import json import logging from StringIO import StringIO @@ -127,11 +128,22 @@ def add_nations(obj, eng): if "authors" not in obj.data: __halt_and_notify("No authors for article.", eng) + pattern_for_cern_cooperation_agreement = re.compile( + r'cooperation agreement with cern', re.IGNORECASE) + for author_index, author in enumerate(obj.data["authors"]): if "affiliations" not in author: __halt_and_notify("No affiliations for author: %s." % author, eng) - for affiliation_index, affiliation in enumerate(author["affiliations"]): + # hack to avoid populating country + match_pattern = pattern_for_cern_cooperation_agreement.search(affiliation['value']) + if match_pattern: + logger.warning( + "The affiliation contains cooperation agreement: '%s'" + % affiliation['value'] + ) + continue + obj.data["authors"][author_index]["affiliations"][affiliation_index][ "country" ] = find_country(affiliation["value"])