-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cd330e2
commit b1b1fa2
Showing
2 changed files
with
401 additions
and
401 deletions.
There are no files selected for viewing
306 changes: 153 additions & 153 deletions
306
src/zbmath_rest2oai/convertSoftware_from_json_toXml.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,153 +1,153 @@ | ||
import json | ||
import xml.etree.ElementTree as ET | ||
|
||
|
||
def json_to_xml(json_data): | ||
# Create the root element | ||
root = ET.Element('root' , attrib= { | ||
'xmlns:swhdeposit': "https://www.softwareheritage.org/schema/2018/deposit", | ||
'xmlns:swh': "https://www.softwareheritage.org/schema/2018/deposit", | ||
'xmlns:schema': "https://schema.org/" | ||
}) | ||
|
||
# Add articles_count | ||
articles_count = ET.SubElement(root, 'articles_count') | ||
articles_count.text = str(json_data['result']['articles_count']) | ||
|
||
# Add authors | ||
for author in json_data['result']['authors']: | ||
author_elem = ET.SubElement(root, 'authors') | ||
author_elem.text = author | ||
|
||
# Add classification | ||
for classif in json_data['result']['classification']: | ||
classif_elem = ET.SubElement(root, 'classification') | ||
classif_elem.text = classif | ||
|
||
# Add swhdeposit:deposit section | ||
swhdeposit_elem = ET.SubElement(root, 'swhdeposit:deposit') | ||
swhdeposit_reference = ET.SubElement(swhdeposit_elem, 'swhdeposit:reference') | ||
swhdeposit_object = ET.SubElement(swhdeposit_reference, 'swhdeposit:object') | ||
swhdeposit_object.set('swhid', | ||
json_data['swhdeposit:deposit']['swhdeposit:reference']['swhdeposit:object']['@swhid']) | ||
|
||
metadata_provenance = ET.SubElement(swhdeposit_elem, 'swhdeposit:metadata-provenance') | ||
schema_url = ET.SubElement(metadata_provenance, 'schema:url') | ||
schema_url.text = json_data['swhdeposit:deposit']['swhdeposit:metadata-provenance']['schema:url'] | ||
|
||
# Add dependencies | ||
dependencies = ET.SubElement(root, 'dependencies') | ||
dependencies.text = str(json_data['result']['dependencies']) | ||
|
||
# Add description | ||
description = ET.SubElement(root, 'description') | ||
description.text = json_data['result']['description'] | ||
|
||
# Add homepage | ||
homepage = ET.SubElement(root, 'homepage') | ||
homepage.text = json_data['result']['homepage'] | ||
|
||
# Add id | ||
id_elem = ET.SubElement(root, 'id') | ||
id_elem.text = str(json_data['result']['id']) | ||
|
||
# Add keywords | ||
for keyword in json_data['result']['keywords']: | ||
keyword_elem = ET.SubElement(root, 'keywords') | ||
keyword_elem.text = keyword | ||
|
||
# Add license_terms | ||
license_terms = ET.SubElement(root, 'license_terms') | ||
license_terms.text = str(json_data['result']['license_terms']) | ||
|
||
# Add name | ||
name = ET.SubElement(root, 'name') | ||
name.text = json_data['result']['name'] | ||
|
||
# Add operating_systems | ||
operating_systems = ET.SubElement(root, 'operating_systems') | ||
operating_systems.text = str(json_data['result']['operating_systems']) | ||
|
||
# Add orms_id | ||
orms_id = ET.SubElement(root, 'orms_id') | ||
orms_id.text = str(json_data['result']['orms_id']) | ||
|
||
# Add programming_languages | ||
programming_languages = ET.SubElement(root, 'programming_languages') | ||
programming_languages.text = str(json_data['result']['programming_languages']) | ||
|
||
# Add related_software | ||
for software in json_data['result']['related_software']: | ||
related_software_elem = ET.SubElement(root, 'related_software') | ||
software_id = ET.SubElement(related_software_elem, 'id') | ||
software_id.text = str(software['id']) | ||
software_name = ET.SubElement(related_software_elem, 'name') | ||
software_name.text = software['name'] | ||
|
||
# Add source_code | ||
source_code = ET.SubElement(root, 'source_code') | ||
source_code.text = json_data['result']['source_code'] | ||
|
||
# Add standard_articles | ||
for article in json_data['result']['standard_articles']: | ||
article_elem = ET.SubElement(root, 'standard_articles') | ||
article_authors = ET.SubElement(article_elem, 'authors') | ||
article_id = ET.SubElement(article_elem, 'id') | ||
article_id.text = str(article['id']) | ||
article_source = ET.SubElement(article_elem, 'source') | ||
article_source.text = article['source'] | ||
article_title = ET.SubElement(article_elem, 'title') | ||
article_title.text = article['title'] | ||
article_year = ET.SubElement(article_elem, 'year') | ||
article_year.text = article['year'] | ||
|
||
# Add zbmath_url | ||
zbmath_url = ET.SubElement(root, 'zbmath_url') | ||
zbmath_url.text = json_data['result']['zbmath_url'] | ||
|
||
return root | ||
|
||
def indent_xml(elem, level=0): | ||
"""Function to add indentation to XML.""" | ||
i = "\n" + level * " " | ||
if len(elem): | ||
if not elem.text or not elem.text.strip(): | ||
elem.text = i + " " | ||
if not elem.tail or not elem.tail.strip(): | ||
elem.tail = i | ||
for sub_elem in elem: | ||
indent_xml(sub_elem, level + 1) | ||
if not sub_elem.tail or not sub_elem.tail.strip(): | ||
sub_elem.tail = i | ||
else: | ||
if level and (not elem.tail or not elem.tail.strip()): | ||
elem.tail = i | ||
|
||
|
||
def convert_json_to_xml(json_file_path, xml_output_path): | ||
# Load JSON from the given file path | ||
with open(json_file_path, 'r') as json_file: | ||
json_data = json.load(json_file) | ||
|
||
# Convert JSON to XML ElementTree | ||
root_element = json_to_xml(json_data) | ||
|
||
# Indent the XML properly | ||
indent_xml(root_element) | ||
|
||
# Create an ElementTree from the root element | ||
tree = ET.ElementTree(root_element) | ||
|
||
# Write the XML to a file with declaration and UTF-8 encoding | ||
tree.write(xml_output_path, encoding='utf-8', xml_declaration=True) | ||
|
||
print(f"XML data has been saved to {xml_output_path}") | ||
|
||
|
||
# Example usage | ||
json_file_path = '../../test/data/software/software_with_swhid.json' # Update with your file path | ||
xml_output_path = '../../test/data/software/software_with_swhid.xml' # Update with your desired output path | ||
|
||
convert_json_to_xml(json_file_path, xml_output_path) | ||
|
||
import json | ||
import xml.etree.ElementTree as ET | ||
|
||
|
||
def json_to_xml(json_data): | ||
# Create the root element | ||
root = ET.Element('root' , attrib= { | ||
'xmlns:swhdeposit': "https://www.softwareheritage.org/schema/2018/deposit", | ||
'xmlns:swh': "https://www.softwareheritage.org/schema/2018/deposit", | ||
'xmlns:schema': "https://schema.org/" | ||
}) | ||
|
||
# Add articles_count | ||
articles_count = ET.SubElement(root, 'articles_count') | ||
articles_count.text = str(json_data['result']['articles_count']) | ||
|
||
# Add authors | ||
for author in json_data['result']['authors']: | ||
author_elem = ET.SubElement(root, 'authors') | ||
author_elem.text = author | ||
|
||
# Add classification | ||
for classif in json_data['result']['classification']: | ||
classif_elem = ET.SubElement(root, 'classification') | ||
classif_elem.text = classif | ||
|
||
# Add swhdeposit:deposit section | ||
swhdeposit_elem = ET.SubElement(root, 'swhdeposit:deposit') | ||
swhdeposit_reference = ET.SubElement(swhdeposit_elem, 'swhdeposit:reference') | ||
swhdeposit_object = ET.SubElement(swhdeposit_reference, 'swhdeposit:object') | ||
swhdeposit_object.set('swhid', | ||
json_data['swhdeposit:deposit']['swhdeposit:reference']['swhdeposit:object']['@swhid']) | ||
|
||
metadata_provenance = ET.SubElement(swhdeposit_elem, 'swhdeposit:metadata-provenance') | ||
schema_url = ET.SubElement(metadata_provenance, 'schema:url') | ||
schema_url.text = json_data['swhdeposit:deposit']['swhdeposit:metadata-provenance']['schema:url'] | ||
|
||
# Add dependencies | ||
dependencies = ET.SubElement(root, 'dependencies') | ||
dependencies.text = str(json_data['result']['dependencies']) | ||
|
||
# Add description | ||
description = ET.SubElement(root, 'description') | ||
description.text = json_data['result']['description'] | ||
|
||
# Add homepage | ||
homepage = ET.SubElement(root, 'homepage') | ||
homepage.text = json_data['result']['homepage'] | ||
|
||
# Add id | ||
id_elem = ET.SubElement(root, 'id') | ||
id_elem.text = str(json_data['result']['id']) | ||
|
||
# Add keywords | ||
for keyword in json_data['result']['keywords']: | ||
keyword_elem = ET.SubElement(root, 'keywords') | ||
keyword_elem.text = keyword | ||
|
||
# Add license_terms | ||
license_terms = ET.SubElement(root, 'license_terms') | ||
license_terms.text = str(json_data['result']['license_terms']) | ||
|
||
# Add name | ||
name = ET.SubElement(root, 'name') | ||
name.text = json_data['result']['name'] | ||
|
||
# Add operating_systems | ||
operating_systems = ET.SubElement(root, 'operating_systems') | ||
operating_systems.text = str(json_data['result']['operating_systems']) | ||
|
||
# Add orms_id | ||
orms_id = ET.SubElement(root, 'orms_id') | ||
orms_id.text = str(json_data['result']['orms_id']) | ||
|
||
# Add programming_languages | ||
programming_languages = ET.SubElement(root, 'programming_languages') | ||
programming_languages.text = str(json_data['result']['programming_languages']) | ||
|
||
# Add related_software | ||
for software in json_data['result']['related_software']: | ||
related_software_elem = ET.SubElement(root, 'related_software') | ||
software_id = ET.SubElement(related_software_elem, 'id') | ||
software_id.text = str(software['id']) | ||
software_name = ET.SubElement(related_software_elem, 'name') | ||
software_name.text = software['name'] | ||
|
||
# Add source_code | ||
source_code = ET.SubElement(root, 'source_code') | ||
source_code.text = json_data['result']['source_code'] | ||
|
||
# Add standard_articles | ||
for article in json_data['result']['standard_articles']: | ||
article_elem = ET.SubElement(root, 'standard_articles') | ||
article_authors = ET.SubElement(article_elem, 'authors') | ||
article_id = ET.SubElement(article_elem, 'id') | ||
article_id.text = str(article['id']) | ||
article_source = ET.SubElement(article_elem, 'source') | ||
article_source.text = article['source'] | ||
article_title = ET.SubElement(article_elem, 'title') | ||
article_title.text = article['title'] | ||
article_year = ET.SubElement(article_elem, 'year') | ||
article_year.text = article['year'] | ||
|
||
# Add zbmath_url | ||
zbmath_url = ET.SubElement(root, 'zbmath_url') | ||
zbmath_url.text = json_data['result']['zbmath_url'] | ||
|
||
return root | ||
|
||
def indent_xml(elem, level=0): | ||
"""Function to add indentation to XML.""" | ||
i = "\n" + level * " " | ||
if len(elem): | ||
if not elem.text or not elem.text.strip(): | ||
elem.text = i + " " | ||
if not elem.tail or not elem.tail.strip(): | ||
elem.tail = i | ||
for sub_elem in elem: | ||
indent_xml(sub_elem, level + 1) | ||
if not sub_elem.tail or not sub_elem.tail.strip(): | ||
sub_elem.tail = i | ||
else: | ||
if level and (not elem.tail or not elem.tail.strip()): | ||
elem.tail = i | ||
|
||
|
||
def convert_json_to_xml(json_file_path, xml_output_path): | ||
# Load JSON from the given file path | ||
with open(json_file_path, 'r') as json_file: | ||
json_data = json.load(json_file) | ||
|
||
# Convert JSON to XML ElementTree | ||
root_element = json_to_xml(json_data) | ||
|
||
# Indent the XML properly | ||
indent_xml(root_element) | ||
|
||
# Create an ElementTree from the root element | ||
tree = ET.ElementTree(root_element) | ||
|
||
# Write the XML to a file with declaration and UTF-8 encoding | ||
tree.write(xml_output_path, encoding='utf-8', xml_declaration=True) | ||
|
||
print(f"XML data has been saved to {xml_output_path}") | ||
|
||
|
||
# Example usage | ||
json_file_path = '../../test/data/software/software_with_swhid.json' # Update with your file path | ||
xml_output_path = '../../test/data/software/software_with_swhid.xml' # Update with your desired output path | ||
|
||
convert_json_to_xml(json_file_path, xml_output_path) | ||
|
||
|
Oops, something went wrong.