From 63caa5c9ae853fa3d80656baa473ffe5554fa353 Mon Sep 17 00:00:00 2001 From: rubenada Date: Sat, 3 Sep 2022 19:25:25 +0100 Subject: [PATCH] [CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder Co-authored-by: David Handermann (cherry picked from commit ba80b9156afc0db26b194d97a031fcc0dc7f4c03) --- .../apache/calcite/runtime/XmlFunctions.java | 67 ++++++++++++++++--- .../calcite/test/SqlXmlFunctionsTest.java | 48 +++++++++++++ gradle.properties | 2 +- 3 files changed, 107 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java index 8c816475252..03134321a4d 100644 --- a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java @@ -24,7 +24,9 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.util.ArrayList; @@ -33,6 +35,10 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.ErrorListener; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; @@ -48,6 +54,7 @@ import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; +import javax.xml.xpath.XPathFactoryConfigurationException; import static org.apache.calcite.linq4j.Nullness.castNonNull; import static org.apache.calcite.util.Static.RESOURCE; @@ -60,13 +67,41 @@ public class XmlFunctions { private static final ThreadLocal<@Nullable XPathFactory> XPATH_FACTORY = - ThreadLocal.withInitial(XPathFactory::newInstance); + ThreadLocal.withInitial(() -> { + final XPathFactory xPathFactory = XPathFactory.newInstance(); + try { + xPathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + } catch (XPathFactoryConfigurationException e) { + throw new IllegalStateException("XPath Factory configuration failed", e); + } + return xPathFactory; + }); private static final ThreadLocal<@Nullable TransformerFactory> TRANSFORMER_FACTORY = ThreadLocal.withInitial(() -> { - TransformerFactory transformerFactory = TransformerFactory.newInstance(); + final TransformerFactory transformerFactory = TransformerFactory.newInstance(); transformerFactory.setErrorListener(new InternalErrorListener()); + try { + transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + } catch (TransformerConfigurationException e) { + throw new IllegalStateException("Transformer Factory configuration failed", e); + } return transformerFactory; }); + private static final ThreadLocal<@Nullable DocumentBuilderFactory> DOCUMENT_BUILDER_FACTORY = + ThreadLocal.withInitial(() -> { + final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + documentBuilderFactory.setXIncludeAware(false); + documentBuilderFactory.setExpandEntityReferences(false); + documentBuilderFactory.setNamespaceAware(true); + try { + documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + documentBuilderFactory + .setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + } catch (final ParserConfigurationException e) { + throw new IllegalStateException("Document Builder configuration failed", e); + } + return documentBuilderFactory; + }); private static final Pattern VALID_NAMESPACE_PATTERN = Pattern .compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")( [0-9a-zA-Z:_-]+=\"[^\"]*\")*)$"); @@ -81,10 +116,11 @@ private XmlFunctions() { return null; } try { + final Node documentNode = getDocumentNode(input); XPathExpression xpathExpression = castNonNull(XPATH_FACTORY.get()).newXPath().compile(xpath); try { NodeList nodes = (NodeList) xpathExpression - .evaluate(new InputSource(new StringReader(input)), XPathConstants.NODESET); + .evaluate(documentNode, XPathConstants.NODESET); List<@Nullable String> result = new ArrayList<>(); for (int i = 0; i < nodes.getLength(); i++) { Node item = castNonNull(nodes.item(i)); @@ -94,9 +130,9 @@ private XmlFunctions() { } return StringUtils.join(result, " "); } catch (XPathExpressionException e) { - return xpathExpression.evaluate(new InputSource(new StringReader(input))); + return xpathExpression.evaluate(documentNode); } - } catch (XPathExpressionException ex) { + } catch (IllegalArgumentException | XPathExpressionException ex) { throw RESOURCE.invalidInputForExtractValue(input, xpath).ex(); } } @@ -140,17 +176,18 @@ private XmlFunctions() { XPathExpression xpathExpression = xPath.compile(xpath); + final Node documentNode = getDocumentNode(xml); try { List result = new ArrayList<>(); NodeList nodes = (NodeList) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET); + .evaluate(documentNode, XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { result.add(convertNodeToString(castNonNull(nodes.item(i)))); } return StringUtils.join(result, ""); } catch (XPathExpressionException e) { Node node = (Node) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE); + .evaluate(documentNode, XPathConstants.NODE); return convertNodeToString(node); } } catch (IllegalArgumentException | XPathExpressionException | TransformerException ex) { @@ -174,16 +211,17 @@ private XmlFunctions() { } XPathExpression xpathExpression = xPath.compile(xpath); + final Node documentNode = getDocumentNode(xml); try { NodeList nodes = (NodeList) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET); + .evaluate(documentNode, XPathConstants.NODESET); if (nodes != null && nodes.getLength() > 0) { return 1; } return 0; } catch (XPathExpressionException e) { Node node = (Node) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE); + .evaluate(documentNode, XPathConstants.NODE); if (node != null) { return 1; } @@ -215,6 +253,17 @@ private static String convertNodeToString(Node node) throws TransformerException return writer.toString(); } + private static Node getDocumentNode(final String xml) { + try { + final DocumentBuilder documentBuilder = + castNonNull(DOCUMENT_BUILDER_FACTORY.get()).newDocumentBuilder(); + final InputSource inputSource = new InputSource(new StringReader(xml)); + return documentBuilder.parse(inputSource); + } catch (final ParserConfigurationException | SAXException | IOException e) { + throw new IllegalArgumentException("XML parsing failed", e); + } + } + /** The internal default ErrorListener for Transformer. Just rethrows errors to * discontinue the XML transformation. */ private static class InternalErrorListener implements ErrorListener { diff --git a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java index 1457ef69084..046d9357059 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java @@ -21,9 +21,13 @@ import org.apache.calcite.runtime.XmlFunctions; import org.apache.calcite.util.BuiltInMethod; +import org.checkerframework.checker.nullness.qual.Nullable; import org.hamcrest.Matcher; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.function.Supplier; import static org.hamcrest.CoreMatchers.is; @@ -36,6 +40,23 @@ */ class SqlXmlFunctionsTest { + private static final String XML = "string"; + private static final String XSLT = + ""; + private static final String DOCUMENT_PATH = "/document"; + private static @Nullable String xmlExternalEntity = null; + private static @Nullable String xsltExternalEntity = null; + + @BeforeAll public static void setup() throws Exception { + final Path testFile = Files.createTempFile("foo", "temp"); + testFile.toFile().deleteOnExit(); + final String filePath = "file:///" + testFile.toAbsolutePath(); + xmlExternalEntity = " ]>&entity;"; + xsltExternalEntity = " ]>&entity;"; + } + @Test void testExtractValue() { assertExtractValue("cccddd", "/a", is("ccc")); @@ -45,6 +66,33 @@ class SqlXmlFunctionsTest { assertExtractValueFailed(input, "#", Matchers.expectThrowable(expected)); } + @Test void testExtractValueExternalEntity() { + String message = "Invalid input for EXTRACTVALUE: xml: '" + + xmlExternalEntity + "', xpath expression: '" + DOCUMENT_PATH + "'"; + CalciteException expected = new CalciteException(message, null); + assertExtractValueFailed(xmlExternalEntity, DOCUMENT_PATH, + Matchers.expectThrowable(expected)); + } + + @Test void testExistsNodeExternalEntity() { + String message = "Invalid input for EXISTSNODE xpath: '" + + DOCUMENT_PATH + "', namespace: '" + null + "'"; + CalciteException expected = new CalciteException(message, null); + assertExistsNodeFailed(xmlExternalEntity, DOCUMENT_PATH, null, + Matchers.expectThrowable(expected)); + } + + @Test void testXmlTransformExternalEntity() { + String message = "Invalid input for XMLTRANSFORM xml: '" + xmlExternalEntity + "'"; + CalciteException expected = new CalciteException(message, null); + assertXmlTransformFailed(xmlExternalEntity, XSLT, Matchers.expectThrowable(expected)); + } + + @Test void testXmlTransformExternalEntityXslt() { + String message = "Illegal xslt specified : '" + xsltExternalEntity + "'"; + CalciteException expected = new CalciteException(message, null); + assertXmlTransformFailed(XML, xsltExternalEntity, Matchers.expectThrowable(expected)); + } @Test void testXmlTransform() { assertXmlTransform(null, "", nullValue()); diff --git a/gradle.properties b/gradle.properties index 3001a30c5f1..1a96abe55ff 100644 --- a/gradle.properties +++ b/gradle.properties @@ -27,7 +27,7 @@ systemProp.org.gradle.internal.publish.checksums.insecure=true # This is version for Calcite itself # Note: it should not include "-SNAPSHOT" as it is automatically added by build.gradle.kts # Release version can be generated by using -Prelease or -Prc= arguments -calcite.version=1.30.0-kylin-4.x-r01 +calcite.version=1.30.0-kylin-4.x-r02 # This is a version to be used from Maven repository. It can be overridden by localAvatica below calcite.avatica.version=1.20.0