diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index f73dd4b..527f3fa 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -37,4 +37,12 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest + coverage run -m pytest + coverage report -m + - name: Test & publish code coverage + uses: paambaati/codeclimate-action@v5.0.0 + env: + CC_TEST_REPORTER_ID: ${{secrets.CC_TEST_REPORTER_ID}} + with: + coverageCommand: coverage xml + debug: true diff --git a/README.md b/README.md index 72e3c2a..9efe9ef 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,10 @@ # python-zbMathRest2Oai + +[![DeepSource](https://app.deepsource.com/gh/MaRDI4NFDI/python-zbMathRest2Oai.svg/?label=resolved+issues&show_trend=true&token=SovMnB53sVw8-JcWeL8YRnsG)](https://app.deepsource.com/gh/MaRDI4NFDI/python-zbMathRest2Oai/) [![SWH](https://archive.softwareheritage.org/badge/origin/https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/)](https://archive.softwareheritage.org/browse/origin/?origin_url=https://github.com/MaRDI4NFDI/python-zbMathRest2Oai) [![Maintainability](https://api.codeclimate.com/v1/badges/88fa012874c78bfeb8bf/maintainability)](https://codeclimate.com/github/MaRDI4NFDI/python-zbMathRest2Oai/maintainability) [![Test Coverage](https://api.codeclimate.com/v1/badges/88fa012874c78bfeb8bf/test_coverage)](https://codeclimate.com/github/MaRDI4NFDI/python-zbMathRest2Oai/test_coverage) + Read data from the zbMATH Open API https://api.zbmath.org/docs and feed it to the OAI-PMH server https://oai.portal.mardi4nfdi.de/oai/ Hint for a proper installation: _ point to the directory -_ execute "pip install -e ." \ No newline at end of file +_ execute "pip install -e ." diff --git a/codemeta.json b/codemeta.json new file mode 100644 index 0000000..4c9f18f --- /dev/null +++ b/codemeta.json @@ -0,0 +1,79 @@ +{ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "@type": "SoftwareSourceCode", + "codeRepository": "https://github.com/MaRDI4NFDI/python-zbMathRest2Oai", + "contIntegration": "https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/blob/main/.github/workflows/python-app.yml", + "dateCreated": "2024-02-13", + "datePublished": "2024-02-13", + "dateModified": "2024-02-13", + "downloadUrl": "https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/archive/refs/heads/main.zip", + "issueTracker": "https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/issues", + "name": "python-zbMathRest2Oai", + "version": "1.0.0", + "identifier": "swh:1:dir:de327a8f9f82353f91c8dbbdd3f54d1efb3ba04e", + "description": "Read data from the zbMATH Open API https://api.zbmath.org/docs and feed it to the OAI-PMH server https://oai.portal.mardi4nfdi.de/oai/", + "releaseNotes": "This is the first codemeta file we create, and should be considered as a prototype for the future beta integration in the faircore4eosc.", + "developmentStatus": "wip", + "isPartOf": "https://portal.mardi4nfdi.de/wiki/Portal", + "funder": { + "@type": "Organization", + "name": "FIZ Karlstruhe" + }, + "keywords": [ + "zbmath", + "swmath", + "EOSC", + "PIDGraph", + "RDGraph", + "RSAC" + ], + "programmingLanguage": [ + "Python3", + "XSLT", + "shell" + ], + "operatingSystem": [ + "Linux" + ], + "softwareRequirements": [ + "Python 3.9" + ], + "relatedLink": [ + "https://faircore4eosc.eu/" + ], + "author": [ + { + "@type": "Person", + "@id": "https://orcid.org/0000-0002-8710-9548", + "givenName": "Maxence", + "familyName": "Azzouz-Thuderoz", + "email": "maxence.azzouz-thudero@fiz-karlsruhe.de", + "affiliation": { + "@type": "Organization", + "name": "Department of Mathematics, FIZ Karlsruhe" + } + }, + { + "@type": "Person", + "@id": "https://orcid.org/0009-0005-1697-3282", + "givenName": "Shiraz", + "familyName": "Malla Mohamad", + "email": "Shiraz.Malla_Mohamad@fiz-Karlsruhe.de", + "affiliation": { + "@type": "Organization", + "name": "Department of Mathematics, FIZ Karlsruhe" + } + }, + { + "@type": "Person", + "@id": "https://orcid.org/0000-0001-7141-4997", + "givenName": "Moritz", + "familyName": "Schubotz", + "email": "Moritz.Schubotz@fiz-Karlsruhe.de", + "affiliation": { + "@type": "Organization", + "name": "Department of Mathematics, FIZ Karlsruhe" + } + } + ] +} \ No newline at end of file diff --git a/formats/createFormats.sh b/formats/createFormats.sh index ba358aa..dad4df3 100755 --- a/formats/createFormats.sh +++ b/formats/createFormats.sh @@ -8,4 +8,8 @@ curl --noproxy '*' -X POST -H 'Content-Type: application/json' -H "Authorizatio printf "\n\nCreate Format datacite\n\n" curl --noproxy '*' -X POST -H 'Content-Type: application/json' -H "Authorization: Basic $AUTH" -i 'https://oai-input.portal.mardi4nfdi.de/oai-backend/format' --data '{"metadataPrefix":"oai_zb_preview","schemaLocation":"https://zbmath.org/OAI/2.0/oai_zb_preview/","schemaNamespace":"https://zbmath.org/zbmath/elements/1.0/","identifierXpath":""}' printf "\n\n Read all formats\n\n" +curl --noproxy '*' -X POST -H 'Content-Type: application/json' -H "Authorization: Basic $AUTH" -i 'https://oai-input.portal.mardi4nfdi.de/oai-backend/format' --data '{"metadataPrefix":"zbmath_rest_api","schemaLocation":"https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/blob/main/tests/data/plain.xml","schemaNamespace":"https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/blob/main/tests/data/","identifierXpath":""}' +printf "\n\nCreate Format zbmath_rest_api\n\n" + + curl --noproxy '*' -X GET -H "Authorization: Basic $AUTH" 'https://oai-input.portal.mardi4nfdi.de/oai-backend/format' \ No newline at end of file diff --git a/json mapping/mapping restoai.md b/json mapping/mapping restoai.md deleted file mode 100644 index 8260fa5..0000000 --- a/json mapping/mapping restoai.md +++ /dev/null @@ -1,87 +0,0 @@ -Below you can find the mapping table of the xml nodes -some nodes meet more than once by our side -# - - -## Github issue : ours - - -### parent nodes -author : name -author_ids : codes -keywords : keywords -review : reviewer -serial : publisher , title -references : _text , _msc , _doucment_id , -classifications: msc -document_id : zbmath_id - -document_title : title -document_type: not found -doi : identifier -language : language -publication_year: _year -source : _source -spelling : _name -zbl_id : not found -serial : _publisher -serial : _title -rights : no rights node -pagination : _pages -publication_year : _year -links : links ( empty) url is missing -rights : missing - - -we have in some sections -Parent nodes from our code , which they are actually child nodes in the github issue . - -publisher is parent node , which it suits the child node from -the serial node -_title is also the same which it suits the title in the serial parent node - -in references and review parent nodes we have also some the same - - -Missing nodes are : links , rights - -the first node is ours and the response represents the node of the github issue. - -below you can see an instance about some complexity with the nodes -this is the review parent node with its child nodes - - - English - Jonas Šiaulys (Vilnius) // chk - The prime \(k\)-tuples and small gaps between prime numbers are considered. Using a refinement of the Goldston-Pintz-Yildirim sieve method [\textit{D. A. Goldston} et al., Ann. Math. (2) 170, No. 2, 819--862 (2009; Zbl 1207.11096)] the author proves, for instance, the following estimates - \[ - - \liminf_{n\to\infty}\,(p_{n+m}-p_n)\ll m^3\text{{e}}^{4m}, \quad \liminf_{n\to\infty}\,(p_{n+1}-p_n)\leq 600 - - \] - with an absolute constant in sign \(\ll\). Here \(m\) is a natural number, and \(p_{\,l}\) denote the \(l\)-th prime number. // chk - review // chk - 11807 // chk - siaulys.jonas // chk - - - - and our response was in this form as separated parent nodes with matching values - - - siaulys.jonas - 11807 - Jonas Šiaulys - Jonas Šiaulys (Vilnius) - The prime \(k\)-tuples and small gaps between prime numbers are considered. Using a refinement of the Goldston-Pintz-Yildirim sieve method [\textit{D. A. Goldston} et al., Ann. Math. (2) 170, No. 2, 819--862 (2009; Zbl 1207.11096)] the author proves, for instance, the following estimates - \[ - \liminf_{n\to\infty}\,(p_{n+m}-p_n)\ll m^3\text{{e}}^{4m}, \quad \liminf_{n\to\infty}\,(p_{n+1}-p_n)\leq 600 - \] - with an absolute constant in sign \(\ll\). Here \(m\) is a natural number, and \(p_{\,l}\) denote the \(l\)-th prime number. - review - - for further information and questions dont hesitate to contact me - - - - \ No newline at end of file diff --git a/json mapping/output mapping.json b/json mapping/output mapping.json deleted file mode 100644 index b1f1e6b..0000000 --- a/json mapping/output mapping.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "_codes": [ - "author_ids" - ], - - - "_code":[ "author_id" - ], - - - "_name": [ "author" - - ], - - "_document_type": ["document_type" - ], - - "_title": [ "document_title" - ], - - - - "_id": [ "document_id" - - ], - - "_identifier": ["doi" - ], - - "_keywords": ["keywords" - - ], - "_keyword": ["keyword" - ], - - "_languages": ["language" - ], - - "_pages": ["pagination" - ], - - "_year": ["publication_year" - ], - - "_source": ["source" - ], - - "spelling": ["_author_code" - ], - - "reviewer_id": ["reviewer_id" - ], - - "_sign": ["review_sign" - ], - - "_text": ["review_text" - ], - - "_contribution_type": ["review_type" - ], - - - "_id": ["document_id" - ], - - "_publisher": ["serial_publisher" - ], - - "_title": ["serial_title" - ], - - - "_text": ["text" - ], - "_document_id": ["ref_id" - ], - "_msc": ["ref_classification" - - ] - - - - } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 78a7d39..3ca197a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,11 +12,11 @@ dynamic = ["version"] dependencies = [ "dict2xml", "requests", - "swagger_client @ git+https://github.com/zbMATHOpen/zbRestApiClient.git" ] [project.optional-dependencies] test = [ "black", + "coverage", "flake8", "isort", "pytest", @@ -26,7 +26,8 @@ test = [ "requests_mock", "python-dateutil", "pytest-coverage", - "xmldiff" + "xmldiff", + "lxml" ] [tool.setuptools] diff --git a/requirements.txt b/requirements.txt index 1c58750..c8c135b 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/src/zbmath_rest2oai/fake_dict_sw.py b/src/zbmath_rest2oai/fake_dict_sw.py new file mode 100644 index 0000000..5ea5fa1 --- /dev/null +++ b/src/zbmath_rest2oai/fake_dict_sw.py @@ -0,0 +1,6 @@ +def fake_dict_sw(): + with open('software.csv','w') as f: + return f.write("\n".join(map(str,list(range(2, 10))))) + +if __name__ == '__main__': + fake_dict_sw() \ No newline at end of file diff --git a/src/zbmath_rest2oai/getAsXml.py b/src/zbmath_rest2oai/getAsXml.py index af7c560..d241cdb 100644 --- a/src/zbmath_rest2oai/getAsXml.py +++ b/src/zbmath_rest2oai/getAsXml.py @@ -2,20 +2,21 @@ import requests -def final_xml2(de): +def final_xml2(de, api_source): headers = {'Accept': 'application/json'} - - r = requests.get('https://api.zbmath.org/v1/document/' + de, headers=headers) + r = requests.get(api_source + de, headers=headers) if r.status_code != 200: raise Exception(f"Unexpected response with status code {r.status_code}: {r.text}") json = r.json() # Bugfix as the sates are lists of lists which has no canonical XML mapping - states = {} - for lst in json['result']['states']: - [k, v] = lst - states[k] = v - json['result']['states'] = states + if type(json['result'])==dict: + + states = {} + for lst in json['result']['states']: + [k, v] = lst + states[k] = v + json['result']['states'] = states # End of fix return ( dict2xml.Converter(wrap="root") @@ -23,4 +24,4 @@ def final_xml2(de): # Bugfix for wired XML output such as the string None or [], '', [None], None ]) - ) \ No newline at end of file + ) diff --git a/src/zbmath_rest2oai/getWithSwagger.py b/src/zbmath_rest2oai/getWithSwagger.py deleted file mode 100644 index 4d5fb9a..0000000 --- a/src/zbmath_rest2oai/getWithSwagger.py +++ /dev/null @@ -1,256 +0,0 @@ -import os.path - -import swagger_client -from zbmath_rest2oai.xml_writer import create_document - -import json - -with open(os.path.join(os.path.dirname(__file__), './output mapping - Copy.json')) as f: - d = json.load(f) - - -def append_text_child(xmld, parent, name, value): - """ - Creates new text node and appends it to parent - :param xmld: - :param parent: the node to append to - :param name: - :param value: - """ - string_name = name - if "zbmath:" not in name: - string_name = f"zbmath:{name}" - x_elem = xmld.createElement(string_name) - text = xmld.createTextNode(str(value)) - x_elem.appendChild(text) - parent.appendChild(x_elem) - return parent - - -def func_get_doc_to_xml(obj, xml, root_doc): - swagger_client_dicttype_list = [ - swagger_client.models.all_ofzbmath_api_data_models_display_documents_result_id_result.AllOfzbmathApiDataModelsDisplayDocumentsResultIDResult, - swagger_client.models.all_of_document_contributors.AllOfDocumentContributors, - swagger_client.models.zbmath_api_data_models_display_documents_submodels_author.ZbmathApiDataModelsDisplayDocumentsSubmodelsAuthor, - swagger_client.models.editorial_contribution.EditorialContribution, - swagger_client.models.all_of_editorial_contribution_reviewer.AllOfEditorialContributionReviewer, - swagger_client.models.all_of_document_language.AllOfDocumentLanguage, - swagger_client.models.link.Link, - swagger_client.models.msc.MSC, - swagger_client.models.reference.Reference, - swagger_client.models.all_of_reference_zbmath.AllOfReferenceZbmath, - swagger_client.models.all_of_document_source.AllOfDocumentSource, - swagger_client.models.series.Series, - swagger_client.models.all_of_document_title.AllOfDocumentTitle, - swagger_client.models.zbmath_api_data_models_display_documents_submodels_issn.ZbmathApiDataModelsDisplayDocumentsSubmodelsISSN] - - all_iter_list = [list, dict] - all_iter_list.extend(swagger_client_dicttype_list) - - nodes_names_not_to_add = ['_position', '_series_id', '_prefix', '_number', '_type', '_states', 'discriminator', - '_biographic_references', '_data_source', '_checked'] - if type(obj) in swagger_client_dicttype_list: - obj = obj.__dict__ - - if type(obj) is list: - for i in range(len(obj)): - - if xml.lastChild and xml.lastChild.nodeName in ["zbmath:author_ids", "zbmath:author_id", "zbmath:review", "zbmath:keywords", - "zbmath:keyword"]: - parent_name = xml.lastChild.nodeName - else: - parent_name = xml.nodeName - - if type(obj[i]) in [str, int]: - if parent_name in ['zbmath:ref_id', 'zbmath:_doi', 'zbmath:text']: - xml = append_text_child(root_doc, xml, parent_name, obj[i]) - - elif parent_name in ["zbmath:author_ids", "zbmath:author_id", "zbmath:keywords", "zbmath:keyword", - 'zbmath:ref_classifications']: - if parent_name.endswith('s'): - parent_name = parent_name[:-1] - - if parent_name in ["zbmath:keyword", "zbmath:author_id"]: - if xml.nodeName in ["zbmath:keywords", "zbmath:author_ids"]: - xml = append_text_child(root_doc, xml, parent_name, obj[i]) - if xml.nodeName == "oai_zb_preview:zbmath": - xml = append_text_child(root_doc, xml.getElementsByTagName(xml.lastChild.nodeName)[0], - parent_name, obj[i]) - else: - xml = append_text_child(root_doc, xml, parent_name, obj[i]) - - elif type(obj[i]) in all_iter_list: - func_get_doc_to_xml(obj[i], xml, root_doc) - - if type(obj) is dict: - new_obj = {} - for key_init in obj.keys(): - if key_init in d.keys(): - if key_init == '_code': - if xml.lastChild.nodeName == 'zbmath:ref_classifications': - new_obj[d[key_init][1]] = obj[key_init] - else: - new_obj[d[key_init][0]] = obj[key_init] - else: - new_obj[d[key_init][0]] = obj[key_init] - else: - new_obj[key_init] = obj[key_init] - - for key in new_obj.keys(): - if key not in nodes_names_not_to_add: - - if type(new_obj[key]) in [str, int, [], None]: - if key in ['ref_classifications', 'ref_id', '_doi', '_text', 'text', '#text']: - - if xml.nodeName == 'zbmath:references': - xml = xml.lastChild - l = [node.nodeName.replace("zbmath:", "") for node in xml.childNodes] - if '_text' in l or 'text' in l or '#text' in l: - xml = xml.parentNode - xml = append_text_child(root_doc, xml, 'reference', "") - xml = xml.lastChild - - if xml.nodeName == 'zbmath:_author_codes': - xml = xml.parentNode - l = [node.nodeName.replace("zbmath:", "") for node in xml.childNodes] - if '_text' in l or 'text' in l or '#text' in l: - xml = xml.parentNode - xml = append_text_child(root_doc, xml, 'reference', "") - xml = xml.lastChild - - xml = append_text_child(root_doc, xml, key, new_obj[key]) - - elif type(new_obj[key]) in [[], None]: - xml = append_text_child(root_doc, xml, key, 'missing') - else: - - if xml.nodeName == 'zbmath:references': - b = xml.getElementsByTagName('zbmath:reference') - b = [s for s in b if not b == ''] - xml = append_text_child(root_doc, b[-1], key, new_obj[key]) - elif xml.nodeName == 'zbmath:reference': - - xml = append_text_child(root_doc, xml, key, new_obj[key]) - else: - xml = append_text_child(root_doc, xml, key, new_obj[key]) - - elif type(new_obj[key]) in all_iter_list: - if type(new_obj[key]) not in [list, dict]: - new_obj[key] = new_obj[key].__dict__ - - if type(new_obj[key]) is dict: - - l_values = [node.nodeName for node in xml.childNodes] - l_values.sort() - - if len(list(set(l_values))) < len(l_values): - if xml.nodeName == "zbmath:reference": - xml = xml.parentNode - xml = append_text_child(root_doc, xml, 'reference', "") - elif xml.nodeName == "zbmath:references": - xml = append_text_child(root_doc, xml, 'reference', "") - else: - print(xml.nodeName) - elif xml.lastChild is not None: - if xml.lastChild.nodeName == "zbmath:review": - xml = append_text_child(root_doc, xml.lastChild, key, "") - - if xml.lastChild.nodeName == "zbmath:reference": - xml = append_text_child(root_doc, xml.lastChild, key, "") - # else: - # why add this - # xml = append_text_child(root_doc, xml, key, "") - - if type(new_obj[key]) is list: - if key == 'references': - xml = append_text_child(root_doc, xml, 'references', "") - xml = append_text_child(root_doc, xml.lastChild, 'reference', "") - - elif xml.nodeName == 'zbmath:reference' and key != 'ref_classifications': - a = xml.childNodes - h = [node.nodeName for node in a] - if 'zbmath:' + key in h: - xml = xml.parentNode - xml = append_text_child(root_doc, xml, 'reference', "") - xml = append_text_child(root_doc, xml.lastChild, key, "") - xml = xml.lastChild - - elif key == 'ref_classifications': - if xml.nodeName == "oai_zb_preview:zbmath": - xml = append_text_child(root_doc, xml, key, "") - - if xml.nodeName == "zbmath:_author_codes": - xml = xml.parentNode - - if xml.nodeName == "zbmath:reference": - xml = append_text_child(root_doc, xml, key, "") - xml = xml.lastChild - - elif key == "_author_codes": - - if xml.nodeName == "oai_zb_preview:zbmath": - xml = append_text_child(root_doc, xml, key, "") - if xml.nodeName == "zbmath:references": - xml = append_text_child(root_doc, xml.lastChild, key, "") - - elif not key.startswith("_"): - # this adds _elements, why do we need those - xml = append_text_child(root_doc, xml, key, "") - - func_get_doc_to_xml(new_obj[key], xml, root_doc) - return xml - - -def append_zb_rights(xmld, ron): - x_links = xmld.createElement("zbmath:rights") - text = xmld.createTextNode( - """Content generated by zbMATH Open, such as reviews, - classifications, software, or author disambiguation data, - are distributed under CC-BY-SA 4.0. This defines the license for the - whole dataset, which also contains non-copyrighted bibliographic - metadata and reference data derived from I4OC (CC0). Note that the API - only provides a subset of the data in the zbMATH Open Web interface. In - several cases, third-party information, such as abstracts, cannot be - made available under a suitable license through the API. In those cases, - we replaced the data with the string 'zbMATH Open Web Interface contents - unavailable due to conflicting licenses.' - """ - ) - x_links.appendChild(text) - ron.appendChild(x_links) - - -def get_final_xml(de: str): - api_instance = swagger_client.DocumentApi(swagger_client.ApiClient()) - res = api_instance.get_document_by_zbmath_id_document_id_get(id=de) - doc = res.result - root_doc = create_document(doc) - - ron = root_doc.createElement("oai_zb_preview:zbmath") - ron.setAttributeNS( - "xmls", - "xmlns:oai_zb_preview", - "https://zbmath.org/OAI/2.0/oai_zb_preview/", - ) - ron.setAttributeNS( - "xmls", - "xmlns:zbmath", - "https://zbmath.org/zbmath/elements/1.0/", - ) - ron.setAttributeNS( - "xmls", - "xmlns:xsi", - "http://www.w3.org/2001/XMLSchema-instance", - ) - xml = func_get_doc_to_xml( - res.result, - ron, - root_doc - ) - append_zb_rights(root_doc, ron) - return xml - - -final_xml = get_final_xml("6383667") - -print(final_xml.parentNode.parentNode.toprettyxml()) diff --git a/src/zbmath_rest2oai/get_all_de.py b/src/zbmath_rest2oai/get_all_de.py index 3e9c3f2..3beb80a 100644 --- a/src/zbmath_rest2oai/get_all_de.py +++ b/src/zbmath_rest2oai/get_all_de.py @@ -6,20 +6,16 @@ from zbmath_rest2oai.writeOai import write_oai # from https://stackoverflow.com/a/38677619/9215209 -CSV_URL = 'https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/releases/download/test/all_de_240115.csv' - -def get_all_de(): +def get_all_de(api_source, CSV_URL): with requests.get(CSV_URL, stream=True) as r: lines = (line.decode('utf-8') for line in r.iter_lines()) for row in csv.reader(lines): de = row[0] try: - write_oai(de) + write_oai(de, api_source) except Exception as error: print(de, error, file=sys.stderr) -if __name__ == '__main__': - get_all_de() diff --git a/src/zbmath_rest2oai/output mapping - Copy.json b/src/zbmath_rest2oai/output mapping - Copy.json deleted file mode 100644 index 00da7e3..0000000 --- a/src/zbmath_rest2oai/output mapping - Copy.json +++ /dev/null @@ -1,88 +0,0 @@ -{ - "_codes": [ - "author_ids" - ], - - - "_code":[ "author_id", "classification" - ], - - - "_name": [ "author" - - ], - - "_document_type": ["document_type" - ], - - "_title": [ "document_title" - ], - "_author_code":["reviewer_id"], - - "_id": [ "document_id" - - ], - - "_identifier": ["doi" - ], - - "_keywords": ["keywords" - - ], - "_keyword": ["keyword" - ], - - "_languages": ["language" - ], - - "_pages": ["pagination" - ], - - "_year": ["publication_year" - ], - - "_source": ["source" - ], - - "spelling": ["_author_code" - ], - - "_reviewer_id": ["reviewer" - ], - - "_sign": ["review_sign" - ], - - "_text": ["review_text" - ], - - "_contribution_type": ["review_type" - ], - - - "_id": ["document_id" - ], - - "_publisher": ["serial_publisher" - ], - - "_title": ["serial_title" - ], - - - "_text": ["text" - ], - "_document_id": ["ref_id" - ], - "_msc": ["ref_classifications" - - ], - "_zbmath": ["reference_codes"], - - "_references" : ["references"], - - "_editorial_contributions": ["review"] - - - - } \ No newline at end of file diff --git a/src/zbmath_rest2oai/run_get_all_de_documents.py b/src/zbmath_rest2oai/run_get_all_de_documents.py new file mode 100644 index 0000000..bdba05e --- /dev/null +++ b/src/zbmath_rest2oai/run_get_all_de_documents.py @@ -0,0 +1,6 @@ +from zbmath_rest2oai.get_all_de import get_all_de + +CSV_URL = 'https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/releases/download/test/all_de_240115.csv' + +if __name__ == '__main__': + get_all_de('https://api.zbmath.org/v1/document/',CSV_URL) \ No newline at end of file diff --git a/src/zbmath_rest2oai/run_get_all_de_software.py b/src/zbmath_rest2oai/run_get_all_de_software.py new file mode 100644 index 0000000..cfd1846 --- /dev/null +++ b/src/zbmath_rest2oai/run_get_all_de_software.py @@ -0,0 +1,5 @@ +from zbmath_rest2oai.get_all_de import get_all_de + +CSV_URL = 'https://raw.githubusercontent.com/MaRDI4NFDI/python-zbMathRest2Oai/main/src/zbmath_rest2oai/software.csv' +if __name__ == '__main__': + get_all_de('https://api.zbmath.org/v1/software/_search?search_string=si%3A',CSV_URL) \ No newline at end of file diff --git a/src/zbmath_rest2oai/software.csv b/src/zbmath_rest2oai/software.csv new file mode 100644 index 0000000..d4ad588 --- /dev/null +++ b/src/zbmath_rest2oai/software.csv @@ -0,0 +1,8 @@ +2 +3 +4 +5 +6 +7 +8 +9 \ No newline at end of file diff --git a/src/zbmath_rest2oai/writeOai.py b/src/zbmath_rest2oai/writeOai.py index 566241d..72b3fb8 100644 --- a/src/zbmath_rest2oai/writeOai.py +++ b/src/zbmath_rest2oai/writeOai.py @@ -7,10 +7,9 @@ from zbmath_rest2oai import getAsXml -def write_oai(x): - testXML = getAsXml.final_xml2(x) - url = "https://www.w3schools.com/python/demopage.php" +def write_oai(x, api_source): + test_xml = getAsXml.final_xml2(x, api_source) files = { "item": ( None, @@ -18,21 +17,22 @@ def write_oai(x): { "identifier": x, "deleteFlag": False, - "ingestFormat": "radar", + "ingestFormat": "zbmath_rest_api", } ), "application/json", ), - "content": (None, testXML), + "content": (None, test_xml), } - # x = requests.delete('http://localhost:8081/oai-backend/item/10.5072%2F38238') - #x = requests.post("http://localhost:8081/oai-backend/item", files=files) basic = HTTPBasicAuth('swmath', os.environ.get('OAI_BASIC_PASSWORD')) - x = requests.post("http://oai-input.portal.mardi4nfdi.de/oai-backend/item", files=files , auth=basic) + x = requests.post("http://oai-input.portal.mardi4nfdi.de/oai-backend/item", files=files, auth=basic) if x.status_code != 200: raise Exception(f"Unexpected response with status code {x.status_code}: {x.text}") else: return x.text -# x = requests.get('http://localhost:8081/oai-backend/item/10.5072%2F38236') +if __name__ == '__main__': + import sys + + write_oai(sys.argv[1], sys.argv[2]) diff --git a/support/run_write_local_test.sh b/support/run_write_local_test.sh new file mode 100644 index 0000000..1cbbea4 --- /dev/null +++ b/support/run_write_local_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python support/write_local_test.py 2 https://api.zbmath.org/v1/software/_search?search_string=si%3A /home/maxence/myvenv/python-zbMathRest2Oai/test/data/software/plain.xml diff --git a/support/write_local_test.py b/support/write_local_test.py new file mode 100644 index 0000000..f21bebb --- /dev/null +++ b/support/write_local_test.py @@ -0,0 +1,19 @@ +from zbmath_rest2oai import getAsXml +import sys +import re +xml_string = getAsXml.final_xml2(sys.argv[1], sys.argv[2]) + +xml_string = re.sub( + '0.\\d+', + '0', + xml_string) +xml_string = re.sub( + '[\\d\\-: .]+', + '0', + xml_string) + +print(xml_string) +if __name__ == '__main__': + with open(sys.argv[3], 'w') as f: + + f.write(xml_string) \ No newline at end of file diff --git a/tests/data/plain.xml b/test/data/articles/plain.xml similarity index 100% rename from tests/data/plain.xml rename to test/data/articles/plain.xml diff --git a/tests/data/reference.xml b/test/data/articles/reference.xml similarity index 100% rename from tests/data/reference.xml rename to test/data/articles/reference.xml diff --git a/test/data/software/plain.xml b/test/data/software/plain.xml new file mode 100644 index 0000000..5093612 --- /dev/null +++ b/test/data/software/plain.xml @@ -0,0 +1,129 @@ + + + 102 + Li, X.S., Demmel, J.W. + 35 + 65 + 68 + 74 + 76 + 05 + 15 + 60 + 78 + 80 + 81 + 82 + 85 + 86 + 90 + 92 + + zbMATH Open Web Interface contents unavailable due to conflicting licenses. + http://crd-legacy.lbl.gov/~xiaoye/SuperLU/ + 2 + orms + + SuperLU-DIST + + 265 + + + 4013 + MUMPS + + + 4012 + PETSc + + + 930 + SuperLU + + + 4629 + SparseMatrix + + + 503 + LAPACK + + + 679 + PARDISO + + + 426 + hypre + + + 989 + UMFPACK + + + 4089 + METIS + + + 4028 + Trilinos + + + 830 + ScaLAPACK + + + 4827 + mctoolbox + + + 6749 + WSMP + + + 3216 + BLAS + + + 418 + HSL + + + 8067 + ML + + + 2780 + SPIKE + + + 6496 + MPI + + + 9043 + MFEM + + + 17483 + STRUMPACK + + + + + 2187846 + zbMATH Open Web Interface contents unavailable due to conflicting licenses. + zbMATH Open Web Interface contents unavailable due to conflicting licenses. + 2003 + + https://zbmath.org/software/2 + + + successful request + True + ok + 0 + 200 + 0 + + \ No newline at end of file diff --git a/test/data/software/reference.xml b/test/data/software/reference.xml new file mode 100644 index 0000000..4e7beb5 --- /dev/null +++ b/test/data/software/reference.xml @@ -0,0 +1,125 @@ + + + 2 + + + Li, X.S. + + + Demmel, J.W. + + + + SuperLU-DIST + + swMATH + + + 35 + 65 + 68 + 74 + 76 + 05 + 15 + 60 + 78 + 80 + 81 + 82 + 85 + 86 + 90 + 92 + + + 2003 + + en + + + https://zbmath.org/software/?q=si%3A2 + + + + 4013 + MUMPS + + + 4012 + PETSc + + + 930 + SuperLU + + + 4629 + SparseMatrix + + + 503 + LAPACK + + + 679 + PARDISO + + + 426 + hypre + + + 989 + UMFPACK + + + 4089 + METIS + + + 4028 + Trilinos + + + 830 + ScaLAPACK + + + 4827 + mctoolbox + + + 6749 + WSMP + + + 3216 + BLAS + + + 418 + HSL + + + 8067 + ML + + + 2780 + SPIKE + + + 6496 + MPI + + + 9043 + MFEM + + + 17483 + STRUMPACK + + + \ No newline at end of file diff --git a/tests/oai_connect_test.py b/test/oai_connect_test.py similarity index 82% rename from tests/oai_connect_test.py rename to test/oai_connect_test.py index 6c287b1..595dd89 100644 --- a/tests/oai_connect_test.py +++ b/test/oai_connect_test.py @@ -6,7 +6,7 @@ class PlainXmlTest(unittest.TestCase): def test_similarity(self): real_string = oai_connection.get_version() - self.assertEqual("1.2.8", real_string) + self.assertEqual("1.2.11", real_string) if __name__ == '__main__': diff --git a/test/test_node_names_software_datacite.py b/test/test_node_names_software_datacite.py new file mode 100644 index 0000000..7158e97 --- /dev/null +++ b/test/test_node_names_software_datacite.py @@ -0,0 +1,7 @@ +import lxml.etree as ET + +dom = ET.parse('test/data/software/plain.xml') +xslt = ET.parse('xslt/software/xslt-software-transformation.xslt') +transform = ET.XSLT(xslt) +newdom = transform(dom) +print(ET.tostring(newdom, pretty_print=True)) \ No newline at end of file diff --git a/tests/test_plain_xml.py b/test/test_plain_xml.py similarity index 87% rename from tests/test_plain_xml.py rename to test/test_plain_xml.py index ae4c167..5c3fd81 100644 --- a/tests/test_plain_xml.py +++ b/test/test_plain_xml.py @@ -1,14 +1,16 @@ import os -import unittest import re -from xmldiff import main, formatting +import unittest + +from xmldiff import main from xmldiff.actions import MoveNode + from zbmath_rest2oai import getAsXml class PlainXmlTest(unittest.TestCase): def test_similarity(self): - real_string = getAsXml.final_xml2("6383667") + real_string = getAsXml.final_xml2("6383667", 'https://api.zbmath.org/v1/document/') real_string = re.sub( '0.\\d+', '0', @@ -18,7 +20,7 @@ def test_similarity(self): '0', real_string) - ref_location = os.path.join(os.path.dirname(__file__), './data/plain.xml') + ref_location = os.path.join(os.path.dirname(__file__), 'data/articles/plain.xml') with open(ref_location) as f: expected_string = f.read() diff = main.diff_texts(expected_string, real_string, { diff --git a/test/test_software_metadata.py b/test/test_software_metadata.py new file mode 100644 index 0000000..a31928f --- /dev/null +++ b/test/test_software_metadata.py @@ -0,0 +1,40 @@ +import os +import re +import unittest + +from xmldiff import main +from xmldiff.actions import MoveNode + +from zbmath_rest2oai import getAsXml + + +class PlainXmlTest(unittest.TestCase): + def test_similarity(self): + real_string = getAsXml.final_xml2("2", 'https://api.zbmath.org/v1/software/_search?search_string=si%3A') + real_string = re.sub( + '0.\\d+', + '0', + real_string) + real_string = re.sub( + '[\\d\\-: .]+', + '0', + real_string) + real_string = [line for line in real_string.splitlines() if line.strip() != ''] + real_string = '\n'.join(real_string) + + ref_location = os.path.join(os.path.dirname(__file__), 'data/software/plain.xml') + with open(ref_location) as f: + expected_string = f.read() + + + + diff = main.diff_texts(expected_string, real_string, { + 'ratio_mode': 'fast', + 'F': 1, + }) + essentials = list(filter(lambda e: not isinstance(e, MoveNode), diff)) + self.assertLessEqual(len(essentials), 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/test_init.py b/test_init.py deleted file mode 100644 index 3f058f4..0000000 --- a/test_init.py +++ /dev/null @@ -1,7 +0,0 @@ -# -*- coding: utf-8 -*- -# content of test_sample.py -def first_func_for_test(x): - return x - -def test_the_first_func(): - assert first_func_for_test(1) == 1 diff --git a/tests/test_oai_preview.py b/tests/test_oai_preview.py deleted file mode 100644 index abbce73..0000000 --- a/tests/test_oai_preview.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import unittest -from xml.dom.minidom import parse -from xmldiff import main, formatting -from xmldiff.actions import MoveNode -from lxml import etree -from zbmath_rest2oai import getWithSwagger - - -class MyTestCase(unittest.TestCase): - def test_something(self): - real = getWithSwagger.get_final_xml("6383667") - real_string = real.parentNode.parentNode.toprettyxml() - ref_location = os.path.join(os.path.dirname(__file__), './data/reference.xml') - with open(ref_location) as f: - dom = parse(f) - expected_string = dom.toprettyxml() - diff = main.diff_texts(expected_string, real_string, { - 'ratio_mode': 'accurate' - }) - essentials = list(filter(lambda e: not isinstance(e, MoveNode), diff)) - formatter = formatting.XMLFormatter() - # parser = etree.XMLParser(remove_blank_text=True) - # real_tree = etree.fromstring(expected_string, parser=parser) - # for item in essentials: - # result = formatter.format([item], real_tree) - # print(result) - self.assertLessEqual(len(essentials), 84) - diff_text = main.diff_texts(expected_string, real_string, { - 'ratio_mode': 'accurate' - }, formatter=formatter) - print(diff_text) - - -if __name__ == '__main__': - unittest.main() diff --git a/xslt/xslt-article-transformation.xslt b/xslt/articles/xslt-article-transformation.xslt similarity index 99% rename from xslt/xslt-article-transformation.xslt rename to xslt/articles/xslt-article-transformation.xslt index 2ac7a49..b818449 100644 --- a/xslt/xslt-article-transformation.xslt +++ b/xslt/articles/xslt-article-transformation.xslt @@ -152,4 +152,4 @@ - \ No newline at end of file + diff --git a/xslt/articles/xslt-articles-oai_zb_preview.sh b/xslt/articles/xslt-articles-oai_zb_preview.sh new file mode 100755 index 0000000..63e61da --- /dev/null +++ b/xslt/articles/xslt-articles-oai_zb_preview.sh @@ -0,0 +1,6 @@ +#!/bin/bash +AUTH=$(echo -n "$OAI_BASIC_USER:$OAI_BASIC_PASSWORD" | base64) +XSLT_RADAR_DATACITE='cat xslt-article-transformation.xslt | jq -Rsa . ' +VAR_XSLT_RADAR_DATACITE=$(eval "$XSLT_RADAR_DATACITE") + +curl --noproxy '*' -X POST -H 'Content-Type: application/json' -i 'https://oai-input.portal.mardi4nfdi.de/oai-backend/crosswalk' --header "Authorization: Basic $AUTH" --data '{"name":"rest2preview","formatFrom":"zbmath_rest_api","formatTo":"oai_zb_preview","xsltStylesheet":'"$VAR_XSLT_RADAR_DATACITE}"'}' diff --git a/xslt/software/xslt-software-datacite.sh b/xslt/software/xslt-software-datacite.sh new file mode 100755 index 0000000..941859a --- /dev/null +++ b/xslt/software/xslt-software-datacite.sh @@ -0,0 +1,6 @@ +#!/bin/bash +AUTH=$(echo -n "$OAI_BASIC_USER:$OAI_BASIC_PASSWORD" | base64) +XSLT_ZBMATH_REST_API_DATACITE='cat xslt-software-transformation.xslt | jq -Rsa . ' +VAR_XSLT_ZBMATH_REST_API_DATACITE=$(eval "$XSLT_ZBMATH_REST_API_DATACITE") + +curl --noproxy '*' -X POST -H 'Content-Type: application/json' -i 'https://oai-input.portal.mardi4nfdi.de/oai-backend/crosswalk' --header "Authorization: Basic $AUTH" --data '{"name":"software_restapi_to_datacite","formatFrom":"zbmath_rest_api","formatTo":"datacite","xsltStylesheet":'"$VAR_XSLT_ZBMATH_REST_API_DATACITE}"'}' diff --git a/xslt/software/xslt-software-transformation.xslt b/xslt/software/xslt-software-transformation.xslt new file mode 100644 index 0000000..b19a73f --- /dev/null +++ b/xslt/software/xslt-software-transformation.xslt @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + <xsl:value-of select="root/result/name"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xslt/xslt.sh b/xslt/xslt.sh deleted file mode 100755 index 888eaf3..0000000 --- a/xslt/xslt.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -AUTH=$(echo -n "$OAI_BASIC_USER:$OAI_BASIC_PASSWORD" | base64) -XSLT_RADAR_DC='https://github.com/MaRDI4NFDI/python-zbMathRest2Oai/blob/main/xslt/xslt-article-transformation.xslt' -curl --noproxy '*' -X POST -H 'Content-Type: application/json' -i 'https://oai-input.portal.mardi4nfdi.de/oai-backend/item' --header "Authorization: Basic $AUTH" --data '{"name":"Radar2OAI_DC_v09","formatFrom":"radar","formatTo":"oai_dc","xsltStylesheet":'"$XSLT_RADAR_DC}"'}'