Skip to content

Commit

Permalink
adjusting the parser to be defusedxml and lxml for xslt transformation
Browse files Browse the repository at this point in the history
  • Loading branch information
Shirazos7 committed Dec 12, 2024
1 parent 9b452a1 commit 5f487dd
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions src/swmath2swh/staging_deposit_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,18 @@
r = requests.get("https://oai.staging.mardi4nfdi.org/oai/OAIHandler?verb=GetRecord&metadataPrefix=codemeta&identifier=oai:swmath.org:4532")
xml_str = r.content

# Parse the XML safely using defusedxml
dom_safe = DET.fromstring(xml_str)
dom = DET.fromstring(xml_str)

# Convert the defusedxml tree to a string so lxml can parse it
dom_str = DET.tostring(dom_safe)
dom = ET.fromstring(dom_str) # Convert to lxml's Element for XSLT processing
# Convert the defusedxml-parsed XML to a string for lxml processing
dom_str = DET.tostring(dom, encoding='unicode')

# Load and apply the XSLT transformation
# Use lxml to parse the XML string for XSLT transformation
lxml_dom = ET.fromstring(dom_str)

# Perform XSLT transformation using lxml
xslt = ET.parse(xsl_filename)
transform = ET.XSLT(xslt)
newdom = transform(dom)
newdom = transform(lxml_dom)
formatted_newdom = ET.tostring(newdom, pretty_print=True, encoding='unicode')
formatted_newdom = re.sub(r'xmlns:ns\d+="[^"]+"', '', formatted_newdom)
formatted_newdom = re.sub(r'ns\d+:', 'codemeta:', formatted_newdom)
Expand Down

0 comments on commit 5f487dd

Please sign in to comment.