From 0737220959fe52ee22535e7db55b015a46a6294e Mon Sep 17 00:00:00 2001 From: Dawid Wysakowicz Date: Thu, 23 May 2024 16:12:14 +0200 Subject: [PATCH] [FLINK-35216] Support for RETURNING clause of JSON_QUERY --- docs/data/sql_functions.yml | 8 +- .../pyflink/table/tests/test_expression.py | 2 +- flink-table/flink-sql-parser/pom.xml | 3 +- .../src/main/codegen/templates/Parser.jj | 8457 +++++++++++++++++ .../calcite/sql/fun/SqlJsonQueryFunction.java | 192 + .../table/api/internal/BaseExpressions.java | 72 + .../ExpressionSerializationTest.java | 6 +- .../functions/BuiltInFunctionDefinitions.java | 9 +- .../functions/JsonFunctionsCallSyntax.java | 9 +- ...QueryOnErrorEmptyArgumentTypeStrategy.java | 77 + .../SpecificInputTypeStrategies.java | 4 + .../sql2rel/StandardConvertletTable.java | 1865 ++++ .../converters/JsonQueryConverter.java | 20 +- .../sql/SqlJsonQueryFunctionWrapper.java | 101 +- .../table/planner/codegen/CodeGenUtils.scala | 2 + .../planner/codegen/ExprCodeGenerator.scala | 2 + .../codegen/calls/BuiltInMethods.scala | 3 + .../codegen/calls/JsonQueryCallGen.scala | 90 + .../functions/JsonFunctionsITCase.java | 100 + .../table/runtime/functions/SqlJsonUtils.java | 141 +- 20 files changed, 11089 insertions(+), 74 deletions(-) create mode 100644 flink-table/flink-sql-parser/src/main/codegen/templates/Parser.jj create mode 100644 flink-table/flink-sql-parser/src/main/java/org/apache/calcite/sql/fun/SqlJsonQueryFunction.java create mode 100644 flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/JsonQueryOnErrorEmptyArgumentTypeStrategy.java create mode 100644 flink-table/flink-table-planner/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java create mode 100644 flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/calls/JsonQueryCallGen.scala diff --git a/docs/data/sql_functions.yml b/docs/data/sql_functions.yml index e7a0455159a91..17f03773786bd 100644 --- a/docs/data/sql_functions.yml +++ b/docs/data/sql_functions.yml @@ -820,12 +820,12 @@ json: -- "right" JSON_VALUE('{"contains blank": "right"}', 'strict $.[''contains blank'']' NULL ON EMPTY DEFAULT 'wrong' ON ERROR) ``` - - sql: JSON_QUERY(jsonValue, path [ { WITHOUT | WITH CONDITIONAL | WITH UNCONDITIONAL } [ ARRAY ] WRAPPER ] [ { NULL | EMPTY ARRAY | EMPTY OBJECT | ERROR } ON EMPTY ] [ { NULL | EMPTY ARRAY | EMPTY OBJECT | ERROR } ON ERROR ]) - table: STRING.jsonQuery(path [, JsonQueryWrapper [, JsonQueryOnEmptyOrError, JsonQueryOnEmptyOrError ] ]) + - sql: JSON_QUERY(jsonValue, path [RETURNING ] [ { WITHOUT | WITH CONDITIONAL | WITH UNCONDITIONAL } [ ARRAY ] WRAPPER ] [ { NULL | EMPTY ARRAY | EMPTY OBJECT | ERROR } ON EMPTY ] [ { NULL | EMPTY ARRAY | EMPTY OBJECT | ERROR } ON ERROR ]) + table: STRING.jsonQuery(path [, returnType [, JsonQueryWrapper [, JsonQueryOnEmptyOrError, JsonQueryOnEmptyOrError ] ] ]) description: | Extracts JSON values from a JSON string. - The result is always returned as a `STRING`. The `RETURNING` clause is currently not supported. + The result is returned as a `STRING` or `ARRAY`. This can be controlled with the `RETURNING` clause. The `wrappingBehavior` determines whether the extracted value should be wrapped into an array, and whether to do so unconditionally or only if the value itself isn't an array already. @@ -844,6 +844,8 @@ json: -- '["c1","c2"]' JSON_QUERY('{"a":[{"c":"c1"},{"c":"c2"}]}', 'lax $.a[*].c') + -- ['c1','c2'] + JSON_QUERY('{"a":[{"c":"c1"},{"c":"c2"}]}', 'lax $.a[*].c' RETURNING ARRAY) -- Wrap result into an array -- '[{}]' diff --git a/flink-python/pyflink/table/tests/test_expression.py b/flink-python/pyflink/table/tests/test_expression.py index d187ef8347ee3..e957283d9c389 100644 --- a/flink-python/pyflink/table/tests/test_expression.py +++ b/flink-python/pyflink/table/tests/test_expression.py @@ -231,7 +231,7 @@ def test_expression(self): JsonValueOnEmptyOrError.DEFAULT, 42, JsonValueOnEmptyOrError.ERROR, None))) - self.assertEqual("JSON_QUERY('{}', '$.x', WITHOUT_ARRAY, NULL, EMPTY_ARRAY)", + self.assertEqual("JSON_QUERY('{}', '$.x', STRING, WITHOUT_ARRAY, NULL, EMPTY_ARRAY)", str(lit('{}').json_query('$.x', JsonQueryWrapper.WITHOUT_ARRAY, JsonQueryOnEmptyOrError.NULL, JsonQueryOnEmptyOrError.EMPTY_ARRAY))) diff --git a/flink-table/flink-sql-parser/pom.xml b/flink-table/flink-sql-parser/pom.xml index 6deceabd2081c..02ea0f6360a55 100644 --- a/flink-table/flink-sql-parser/pom.xml +++ b/flink-table/flink-sql-parser/pom.xml @@ -265,7 +265,8 @@ under the License. org.apache.calcite calcite-core jar - true + + false ${project.build.directory}/ **/Parser.jj diff --git a/flink-table/flink-sql-parser/src/main/codegen/templates/Parser.jj b/flink-table/flink-sql-parser/src/main/codegen/templates/Parser.jj new file mode 100644 index 0000000000000..cc9345acc84ff --- /dev/null +++ b/flink-table/flink-sql-parser/src/main/codegen/templates/Parser.jj @@ -0,0 +1,8457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +<@pp.dropOutputFile /> + +<@pp.changeOutputFile name="javacc/Parser.jj" /> + +options { + STATIC = false; + IGNORE_CASE = true; + UNICODE_INPUT = true; +} + + +PARSER_BEGIN(${parser.class}) + +package ${parser.package}; + +<#list (parser.imports!default.parser.imports) as importStr> +import ${importStr}; + + +import org.apache.calcite.avatica.util.Casing; +import org.apache.calcite.avatica.util.TimeUnit; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.runtime.CalciteContextException; +import org.apache.calcite.sql.JoinConditionType; +import org.apache.calcite.sql.JoinType; +import org.apache.calcite.sql.SqlAlter; +import org.apache.calcite.sql.SqlBasicTypeNameSpec; +import org.apache.calcite.sql.SqlBinaryOperator; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlCharStringLiteral; +import org.apache.calcite.sql.SqlCollation; +import org.apache.calcite.sql.SqlCollectionTypeNameSpec; +import org.apache.calcite.sql.SqlDataTypeSpec; +import org.apache.calcite.sql.SqlDelete; +import org.apache.calcite.sql.SqlDescribeSchema; +import org.apache.calcite.sql.SqlDescribeTable; +import org.apache.calcite.sql.SqlDynamicParam; +import org.apache.calcite.sql.SqlExplain; +import org.apache.calcite.sql.SqlExplainFormat; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlHint; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlInsert; +import org.apache.calcite.sql.SqlInsertKeyword; +import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlJdbcDataTypeName; +import org.apache.calcite.sql.SqlJdbcFunctionCall; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlJsonConstructorNullClause; +import org.apache.calcite.sql.SqlJsonEncoding; +import org.apache.calcite.sql.SqlJsonExistsErrorBehavior; +import org.apache.calcite.sql.SqlJsonEmptyOrError; +import org.apache.calcite.sql.SqlJsonQueryEmptyOrErrorBehavior; +import org.apache.calcite.sql.SqlJsonQueryWrapperBehavior; +import org.apache.calcite.sql.SqlJsonValueEmptyOrErrorBehavior; +import org.apache.calcite.sql.SqlJsonValueReturning; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlMatchRecognize; +import org.apache.calcite.sql.SqlMerge; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlNumericLiteral; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlOrderBy; +import org.apache.calcite.sql.SqlPivot; +import org.apache.calcite.sql.SqlPostfixOperator; +import org.apache.calcite.sql.SqlPrefixOperator; +import org.apache.calcite.sql.SqlRowTypeNameSpec; +import org.apache.calcite.sql.SqlSampleSpec; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlSelectKeyword; +import org.apache.calcite.sql.SqlSetOption; +import org.apache.calcite.sql.SqlSnapshot; +import org.apache.calcite.sql.SqlTableRef; +import org.apache.calcite.sql.SqlTypeNameSpec; +import org.apache.calcite.sql.SqlUnnestOperator; +import org.apache.calcite.sql.SqlUnpivot; +import org.apache.calcite.sql.SqlUpdate; +import org.apache.calcite.sql.SqlUserDefinedTypeNameSpec; +import org.apache.calcite.sql.SqlUtil; +import org.apache.calcite.sql.SqlWindow; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.SqlWithItem; +import org.apache.calcite.sql.fun.SqlCase; +import org.apache.calcite.sql.fun.SqlInternalOperators; +import org.apache.calcite.sql.fun.SqlLibraryOperators; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.SqlTrimFunction; +import org.apache.calcite.sql.parser.Span; +import org.apache.calcite.sql.parser.SqlAbstractParserImpl; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.parser.SqlParserImplFactory; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.parser.SqlParserUtil; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlConformance; +import org.apache.calcite.util.Glossary; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.SourceStringReader; +import org.apache.calcite.util.Util; +import org.apache.calcite.util.trace.CalciteTrace; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.slf4j.Logger; + +import java.io.Reader; +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import static org.apache.calcite.util.Static.RESOURCE; + +/** + * SQL parser, generated from Parser.jj by JavaCC. + * + *

The public wrapper for this parser is {@link SqlParser}. + */ +public class ${parser.class} extends SqlAbstractParserImpl +{ + private static final Logger LOGGER = CalciteTrace.getParserTracer(); + + // Can't use quoted literal because of a bug in how JavaCC translates + // backslash-backslash. + private static final char BACKSLASH = 0x5c; + private static final char DOUBLE_QUOTE = 0x22; + private static final String DQ = DOUBLE_QUOTE + ""; + private static final String DQDQ = DQ + DQ; + private static final SqlLiteral LITERAL_ZERO = + SqlLiteral.createExactNumeric("0", SqlParserPos.ZERO); + private static final SqlLiteral LITERAL_ONE = + SqlLiteral.createExactNumeric("1", SqlParserPos.ZERO); + private static final SqlLiteral LITERAL_MINUS_ONE = + SqlLiteral.createExactNumeric("-1", SqlParserPos.ZERO); + + private static Metadata metadata; + + private Casing unquotedCasing; + private Casing quotedCasing; + private int identifierMaxLength; + private ImmutableMap timeUnitCodes; + private SqlConformance conformance; + + /** + * {@link SqlParserImplFactory} implementation for creating parser. + */ + public static final SqlParserImplFactory FACTORY = new SqlParserImplFactory() { + public SqlAbstractParserImpl getParser(Reader reader) { + final ${parser.class} parser = new ${parser.class}(reader); + if (reader instanceof SourceStringReader) { + final String sql = + ((SourceStringReader) reader).getSourceString(); + parser.setOriginalSql(sql); + } + return parser; + } + }; + + public SqlParseException normalizeException(Throwable ex) { + try { + if (ex instanceof ParseException) { + ex = cleanupParseException((ParseException) ex); + } + return convertException(ex); + } catch (ParseException e) { + throw new AssertionError(e); + } + } + + public Metadata getMetadata() { + synchronized (${parser.class}.class) { + if (metadata == null) { + metadata = new MetadataImpl( + new ${parser.class}(new java.io.StringReader(""))); + } + return metadata; + } + } + + public void setTabSize(int tabSize) { + jj_input_stream.setTabSize(tabSize); + } + + public void switchTo(SqlAbstractParserImpl.LexicalState state) { + final int stateOrdinal = + Arrays.asList(${parser.class}TokenManager.lexStateNames) + .indexOf(state.name()); + token_source.SwitchTo(stateOrdinal); + } + + public void setQuotedCasing(Casing quotedCasing) { + this.quotedCasing = quotedCasing; + } + + public void setUnquotedCasing(Casing unquotedCasing) { + this.unquotedCasing = unquotedCasing; + } + + public void setIdentifierMaxLength(int identifierMaxLength) { + this.identifierMaxLength = identifierMaxLength; + } + + public void setTimeUnitCodes(Map timeUnitCodes) { + this.timeUnitCodes = ImmutableMap.copyOf(timeUnitCodes); + } + + public void setConformance(SqlConformance conformance) { + this.conformance = conformance; + } + + public SqlNode parseSqlExpressionEof() throws Exception { + return SqlExpressionEof(); + } + + public SqlNode parseSqlStmtEof() throws Exception { + return SqlStmtEof(); + } + + public SqlNodeList parseSqlStmtList() throws Exception { + return SqlStmtList(); + } + + private SqlNode extend(SqlNode table, SqlNodeList extendList) { + return SqlStdOperatorTable.EXTEND.createCall( + Span.of(table, extendList).pos(), table, extendList); + } + + /** Adds a warning that a token such as "HOURS" was used, + * whereas the SQL standard only allows "HOUR". + * + *

Currently, we silently add an exception to a list of warnings. In + * future, we may have better compliance checking, for example a strict + * compliance mode that throws if any non-standard features are used. */ + private TimeUnit warn(TimeUnit timeUnit) throws ParseException { + final String token = getToken(0).image.toUpperCase(Locale.ROOT); + warnings.add( + SqlUtil.newContextException(getPos(), + RESOURCE.nonStandardFeatureUsed(token))); + return timeUnit; + } +} + +PARSER_END(${parser.class}) + + +/*************************************** + * Utility Codes for Semantic Analysis * + ***************************************/ + +/* For Debug */ +JAVACODE +void debug_message1() { + LOGGER.info("{} , {}", getToken(0).image, getToken(1).image); +} + +JAVACODE String unquotedIdentifier() { + return SqlParserUtil.toCase(getToken(0).image, unquotedCasing); +} + +/** + * Allows parser to be extended with new types of table references. The + * default implementation of this production is empty. + */ +SqlNode ExtendedTableRef() : +{ +} +{ + UnusedExtension() + { + return null; + } +} + +/** + * Allows an OVER clause following a table expression as an extension to + * standard SQL syntax. The default implementation of this production is empty. + */ +SqlNode TableOverOpt() : +{ +} +{ + { + return null; + } +} + +/* + * Parses dialect-specific keywords immediately following the SELECT keyword. + */ +void SqlSelectKeywords(List keywords) : +{} +{ + E() +} + +/* + * Parses dialect-specific keywords immediately following the INSERT keyword. + */ +void SqlInsertKeywords(List keywords) : +{} +{ + E() +} + +/* +* Parse Floor/Ceil function parameters +*/ +SqlNode FloorCeilOptions(Span s, boolean floorFlag) : +{ + SqlNode node; +} +{ + node = StandardFloorCeilOptions(s, floorFlag) { + return node; + } +} + +/* +// This file contains the heart of a parser for SQL SELECT statements. +// code can be shared between various parsers (for example, a DDL parser and a +// DML parser) but is not a standalone JavaCC file. You need to prepend a +// parser declaration (such as that in Parser.jj). +*/ + +/* Epsilon */ +JAVACODE +void E() {} + +/** @Deprecated */ +JAVACODE List startList(Object o) +{ + List list = new ArrayList(); + list.add(o); + return list; +} + +/* + * NOTE jvs 6-Feb-2004: The straightforward way to implement the SQL grammar is + * to keep query expressions (SELECT, UNION, etc) separate from row expressions + * (+, LIKE, etc). However, this is not possible with an LL(k) parser, because + * both kinds of expressions allow parenthesization, so no fixed amount of left + * context is ever good enough. A sub-query can be a leaf in a row expression, + * and can include operators like UNION, so it's not even possible to use a + * syntactic lookahead rule like "look past an indefinite number of parentheses + * until you see SELECT, VALUES, or TABLE" (since at that point we still + * don't know whether we're parsing a sub-query like ((select ...) + x) + * vs. (select ... union select ...). + * + * The somewhat messy solution is to unify the two kinds of expression, + * and to enforce syntax rules using parameterized context. This + * is the purpose of the ExprContext parameter. It is passed to + * most expression productions, which check the expressions encountered + * against the context for correctness. When a query + * element like SELECT is encountered, the production calls + * checkQueryExpression, which will throw an exception if + * a row expression was expected instead. When a row expression like + * IN is encountered, the production calls checkNonQueryExpression + * instead. It is very important to understand how this works + * when modifying the grammar. + * + * The commingling of expressions results in some bogus ambiguities which are + * resolved with LOOKAHEAD hints. The worst example is comma. SQL allows both + * (WHERE x IN (1,2)) and (WHERE x IN (select ...)). This means when we parse + * the right-hand-side of an IN, we have to allow any kind of expression inside + * the parentheses. Now consider the expression "WHERE x IN(SELECT a FROM b + * GROUP BY c,d)". When the parser gets to "c,d" it doesn't know whether the + * comma indicates the end of the GROUP BY or the end of one item in an IN + * list. Luckily, we know that select and comma-list are mutually exclusive + * within IN, so we use maximal munch for the GROUP BY comma. However, this + * usage of hints could easily mask unintended ambiguities resulting from + * future changes to the grammar, making it very brittle. + */ + +JAVACODE protected SqlParserPos getPos() +{ + return new SqlParserPos( + token.beginLine, + token.beginColumn, + token.endLine, + token.endColumn); +} + +/** Starts a span at the current position. */ +JAVACODE Span span() +{ + return Span.of(getPos()); +} + +JAVACODE void checkQueryExpression(ExprContext exprContext) +{ + switch (exprContext) { + case ACCEPT_NON_QUERY: + case ACCEPT_SUB_QUERY: + case ACCEPT_CURSOR: + throw SqlUtil.newContextException(getPos(), + RESOURCE.illegalQueryExpression()); + } +} + +JAVACODE void checkNonQueryExpression(ExprContext exprContext) +{ + switch (exprContext) { + case ACCEPT_QUERY: + throw SqlUtil.newContextException(getPos(), + RESOURCE.illegalNonQueryExpression()); + } +} + +JAVACODE SqlNode checkNotJoin(SqlNode e) +{ + if (e instanceof SqlJoin) { + throw SqlUtil.newContextException(e.getParserPosition(), + RESOURCE.illegalJoinExpression()); + } + return e; +} + +/** + * Converts a ParseException (local to this particular instantiation + * of the parser) into a SqlParseException (common to all parsers). + */ +JAVACODE SqlParseException convertException(Throwable ex) +{ + if (ex instanceof SqlParseException) { + return (SqlParseException) ex; + } + SqlParserPos pos = null; + int[][] expectedTokenSequences = null; + String[] tokenImage = null; + if (ex instanceof ParseException) { + ParseException pex = (ParseException) ex; + expectedTokenSequences = pex.expectedTokenSequences; + tokenImage = pex.tokenImage; + if (pex.currentToken != null) { + final Token token = pex.currentToken.next; + // Checks token.image.equals("1") to avoid recursive call. + // The SqlAbstractParserImpl#MetadataImpl constructor uses constant "1" to + // throw intentionally to collect the expected tokens. + if (!token.image.equals("1") + && getMetadata().isKeyword(token.image) + && SqlParserUtil.allowsIdentifier(tokenImage, expectedTokenSequences)) { + // If the next token is a keyword, reformat the error message as: + + // Incorrect syntax near the keyword '{keyword}' at line {line_number}, + // column {column_number}. + final String expecting = ex.getMessage() + .substring(ex.getMessage().indexOf("Was expecting")); + final String errorMsg = String.format("Incorrect syntax near the keyword '%s' " + + "at line %d, column %d.\n%s", + token.image, + token.beginLine, + token.beginColumn, + expecting); + // Replace the ParseException with explicit error message. + ex = new ParseException(errorMsg); + } + pos = new SqlParserPos( + token.beginLine, + token.beginColumn, + token.endLine, + token.endColumn); + } + } else if (ex instanceof TokenMgrError) { + expectedTokenSequences = null; + tokenImage = null; + // Example: + // Lexical error at line 3, column 24. Encountered "#" after "a". + final java.util.regex.Pattern pattern = java.util.regex.Pattern.compile( + "(?s)Lexical error at line ([0-9]+), column ([0-9]+).*"); + java.util.regex.Matcher matcher = pattern.matcher(ex.getMessage()); + if (matcher.matches()) { + int line = Integer.parseInt(matcher.group(1)); + int column = Integer.parseInt(matcher.group(2)); + pos = new SqlParserPos(line, column, line, column); + } + } else if (ex instanceof CalciteContextException) { + // CalciteContextException is the standard wrapper for exceptions + // produced by the validator, but in the parser, the standard is + // SqlParseException; so, strip it away. In case you were wondering, + // the CalciteContextException appears because the parser + // occasionally calls into validator-style code such as + // SqlSpecialOperator.reduceExpr. + CalciteContextException ece = + (CalciteContextException) ex; + pos = new SqlParserPos( + ece.getPosLine(), + ece.getPosColumn(), + ece.getEndPosLine(), + ece.getEndPosColumn()); + ex = ece.getCause(); + } + + return new SqlParseException( + ex.getMessage(), pos, expectedTokenSequences, tokenImage, ex); +} + +/** + * Removes or transforms misleading information from a parse exception. + * + * @param e dirty excn + * + * @return clean excn + */ +JAVACODE ParseException cleanupParseException(ParseException ex) +{ + if (ex.expectedTokenSequences == null) { + return ex; + } + int iIdentifier = Arrays.asList(ex.tokenImage).indexOf(""); + + // Find all sequences in the error which contain identifier. For + // example, + // {} + // {A} + // {B, C} + // {D, } + // {D, A} + // {D, B} + // + // would yield + // {} + // {D} + final List prefixList = new ArrayList(); + for (int i = 0; i < ex.expectedTokenSequences.length; ++i) { + int[] seq = ex.expectedTokenSequences[i]; + int j = seq.length - 1; + int i1 = seq[j]; + if (i1 == iIdentifier) { + int[] prefix = new int[j]; + System.arraycopy(seq, 0, prefix, 0, j); + prefixList.add(prefix); + } + } + + if (prefixList.isEmpty()) { + return ex; + } + + int[][] prefixes = (int[][]) + prefixList.toArray(new int[prefixList.size()][]); + + // Since was one of the possible productions, + // we know that the parser will also have included all + // of the non-reserved keywords (which are treated as + // identifiers in non-keyword contexts). So, now we need + // to clean those out, since they're totally irrelevant. + + final List list = new ArrayList(); + Metadata metadata = getMetadata(); + for (int i = 0; i < ex.expectedTokenSequences.length; ++i) { + int [] seq = ex.expectedTokenSequences[i]; + String tokenImage = ex.tokenImage[seq[seq.length - 1]]; + String token = SqlParserUtil.getTokenVal(tokenImage); + if (token == null || !metadata.isNonReservedKeyword(token)) { + list.add(seq); + continue; + } + boolean match = matchesPrefix(seq, prefixes); + if (!match) { + list.add(seq); + } + } + + ex.expectedTokenSequences = + (int [][]) list.toArray(new int [list.size()][]); + return ex; +} + +JAVACODE boolean matchesPrefix(int[] seq, int[][] prefixes) +{ + nextPrefix: + for (int[] prefix : prefixes) { + if (seq.length == prefix.length + 1) { + for (int k = 0; k < prefix.length; k++) { + if (prefix[k] != seq[k]) { + continue nextPrefix; + } + } + return true; + } + } + return false; +} + +/***************************************** + * Syntactical Descriptions * + *****************************************/ + +SqlNode ExprOrJoinOrOrderedQuery(ExprContext exprContext) : +{ + SqlNode e; + final List list = new ArrayList(); +} +{ + // Lookhead to distinguish between "TABLE emp" (which will be + // matched by ExplicitTable() via Query()) + // and "TABLE fun(args)" (which will be matched by TableRef()) + ( + LOOKAHEAD(2) + e = Query(exprContext) + e = OrderByLimitOpt(e) + { return e; } + | + e = TableRef1(ExprContext.ACCEPT_QUERY_OR_JOIN) + ( e = JoinTable(e) )* + { list.add(e); } + ( AddSetOpQuery(list, exprContext) )* + { return SqlParserUtil.toTree(list); } + ) +} + +/** + * Parses either a row expression or a query expression with an optional + * ORDER BY. + * + *

Postgres syntax for limit: + * + *

+ *    [ LIMIT { count | ALL } ]
+ *    [ OFFSET start ]
+ *
+ * + *

Trino syntax for limit: + * + *

+ *    [ OFFSET start ]
+ *    [ LIMIT { count | ALL } ]
+ *
+ * + *

MySQL syntax for limit: + * + *

+ *    [ LIMIT { count | start, count } ]
+ *
+ * + *

SQL:2008 syntax for limit: + * + *

+ *    [ OFFSET start { ROW | ROWS } ]
+ *    [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ONLY ]
+ *
+ */ +SqlNode OrderedQueryOrExpr(ExprContext exprContext) : +{ + SqlNode e; +} +{ + e = QueryOrExpr(exprContext) + e = OrderByLimitOpt(e) + { return e; } +} + +/** Reads optional "ORDER BY", "LIMIT", "OFFSET", "FETCH" following a query, + * {@code e}. If any of them are present, adds them to the query; + * otherwise returns the query unchanged. + * Throws if they are present and {@code e} is not a query. */ +SqlNode OrderByLimitOpt(SqlNode e) : +{ + final SqlNodeList orderBy; + final Span s = Span.of(); + SqlNode[] offsetFetch = {null, null}; +} +{ + ( + // use the syntactic type of the expression we just parsed + // to decide whether ORDER BY makes sense + orderBy = OrderBy(e.isA(SqlKind.QUERY)) + | { orderBy = null; } + ) + [ + LimitClause(s, offsetFetch) + [ OffsetClause(s, offsetFetch) ] + | + OffsetClause(s, offsetFetch) + [ + LimitClause(s, offsetFetch) { + if (!this.conformance.isOffsetLimitAllowed()) { + throw SqlUtil.newContextException(s.end(this), + RESOURCE.offsetLimitNotAllowed()); + } + } + | + FetchClause(offsetFetch) + ] + | + FetchClause(offsetFetch) + ] + { + if (orderBy != null || offsetFetch[0] != null || offsetFetch[1] != null) { + return new SqlOrderBy(getPos(), e, + Util.first(orderBy, SqlNodeList.EMPTY), + offsetFetch[0], offsetFetch[1]); + } + return e; + } +} + +/** + * Parses an OFFSET clause in an ORDER BY expression. + */ +void OffsetClause(Span s, SqlNode[] offsetFetch) : +{ +} +{ + // ROW or ROWS is required in SQL:2008 but we make it optional + // because it is not present in Postgres-style syntax. + { s.add(this); } + offsetFetch[0] = UnsignedNumericLiteralOrParam() + [ | ] +} + +/** + * Parses a FETCH clause in an ORDER BY expression. + */ +void FetchClause(SqlNode[] offsetFetch) : +{ +} +{ + // SQL:2008-style syntax. "OFFSET ... FETCH ...". + // If you specify both LIMIT and FETCH, FETCH wins. + ( | ) offsetFetch[1] = UnsignedNumericLiteralOrParam() + ( | ) +} + +/** + * Parses a LIMIT clause in an ORDER BY expression. + */ +void LimitClause(Span s, SqlNode[] offsetFetch) : +{ +} +{ + // Postgres-style syntax. "LIMIT ... OFFSET ..." + { s.add(this); } + ( + // MySQL-style syntax. "LIMIT start, count" + LOOKAHEAD(2) + offsetFetch[0] = UnsignedNumericLiteralOrParam() + offsetFetch[1] = UnsignedNumericLiteralOrParam() { + if (!this.conformance.isLimitStartCountAllowed()) { + throw SqlUtil.newContextException(s.end(this), + RESOURCE.limitStartCountNotAllowed()); + } + } + | + offsetFetch[1] = UnsignedNumericLiteralOrParam() + | + + ) +} + +/** + * Parses a leaf in a query expression (SELECT, VALUES or TABLE). + */ +SqlNode LeafQuery(ExprContext exprContext) : +{ + SqlNode e; +} +{ + { + // ensure a query is legal in this context + checkQueryExpression(exprContext); + } + e = SqlSelect() { return e; } +| + e = TableConstructor() { return e; } +| + e = ExplicitTable(getPos()) { return e; } +} + +/** + * Parses a parenthesized query or single row expression. + * Depending on {@code exprContext}, may also accept a join. + */ +SqlNode ParenthesizedExpression(ExprContext exprContext) : +{ + SqlNode e; +} +{ + + { + // we've now seen left paren, so queries inside should + // be allowed as sub-queries + switch (exprContext) { + case ACCEPT_SUB_QUERY: + exprContext = ExprContext.ACCEPT_NONCURSOR; + break; + case ACCEPT_CURSOR: + exprContext = ExprContext.ACCEPT_ALL; + break; + } + } + e = ExprOrJoinOrOrderedQuery(exprContext) + + { + exprContext.throwIfNotCompatible(e); + return e; + } +} + +/** + * Parses a parenthesized query or comma-list of row expressions. + * + *

REVIEW jvs 8-Feb-2004: There's a small hole in this production. It can be + * used to construct something like + * + *

+ * WHERE x IN (select count(*) from t where c=d,5)
+ *
+ * + *

which should be illegal. The above is interpreted as equivalent to + * + *

+ * WHERE x IN ((select count(*) from t where c=d),5)
+ *
+ * + *

which is a legal use of a sub-query. The only way to fix the hole is to + * be able to remember whether a subexpression was parenthesized or not, which + * means preserving parentheses in the SqlNode tree. This is probably + * desirable anyway for use in purely syntactic parsing applications (e.g. SQL + * pretty-printer). However, if this is done, it's important to also make + * isA() on the paren node call down to its operand so that we can + * always correctly discriminate a query from a row expression. + */ +SqlNodeList ParenthesizedQueryOrCommaList( + ExprContext exprContext) : +{ + SqlNode e; + final List list = new ArrayList(); + ExprContext firstExprContext = exprContext; + final Span s; +} +{ + + { + // we've now seen left paren, so a query by itself should + // be interpreted as a sub-query + s = span(); + switch (exprContext) { + case ACCEPT_SUB_QUERY: + firstExprContext = ExprContext.ACCEPT_NONCURSOR; + break; + case ACCEPT_CURSOR: + firstExprContext = ExprContext.ACCEPT_ALL; + break; + } + } + e = OrderedQueryOrExpr(firstExprContext) { list.add(e); } + ( + + { + // a comma-list can't appear where only a query is expected + checkNonQueryExpression(exprContext); + } + AddExpression(list, exprContext) + )* + + { + return new SqlNodeList(list, s.end(this)); + } +} + +/** As ParenthesizedQueryOrCommaList, but allows DEFAULT + * in place of any of the expressions. For example, + * {@code (x, DEFAULT, null, DEFAULT)}. */ +SqlNodeList ParenthesizedQueryOrCommaListWithDefault( + ExprContext exprContext) : +{ + SqlNode e; + final List list = new ArrayList(); + ExprContext firstExprContext = exprContext; + final Span s; +} +{ + + { + // we've now seen left paren, so a query by itself should + // be interpreted as a sub-query + s = span(); + switch (exprContext) { + case ACCEPT_SUB_QUERY: + firstExprContext = ExprContext.ACCEPT_NONCURSOR; + break; + case ACCEPT_CURSOR: + firstExprContext = ExprContext.ACCEPT_ALL; + break; + } + } + ( + e = OrderedQueryOrExpr(firstExprContext) { list.add(e); } + | + e = Default() { list.add(e); } + ) + ( + + { + // a comma-list can't appear where only a query is expected + checkNonQueryExpression(exprContext); + } + ( + e = Expression(exprContext) { list.add(e); } + | + e = Default() { list.add(e); } + ) + )* + + { + return new SqlNodeList(list, s.end(this)); + } +} + +/** + * Parses function parameter lists. + * If the list starts with DISTINCT or ALL, it is discarded. + */ +List UnquantifiedFunctionParameterList(ExprContext exprContext) : +{ + final List args; +} +{ + args = FunctionParameterList(exprContext) { + args.remove(0); // remove DISTINCT or ALL, if present + return args; + } +} + +/** + * Parses function parameter lists including DISTINCT keyword recognition, + * DEFAULT, and named argument assignment. + */ +List FunctionParameterList(ExprContext exprContext) : +{ + final SqlLiteral qualifier; + final List list = new ArrayList(); +} +{ + + ( + qualifier = AllOrDistinct() { list.add(qualifier); } + | + { list.add(null); } + ) + AddArg0(list, exprContext) + ( + { + // a comma-list can't appear where only a query is expected + checkNonQueryExpression(exprContext); + } + AddArg(list, exprContext) + )* + + { + return list; + } +} + +SqlLiteral AllOrDistinct() : +{ +} +{ + { return SqlSelectKeyword.DISTINCT.symbol(getPos()); } +| + { return SqlSelectKeyword.ALL.symbol(getPos()); } +} + +void AddArg0(List list, ExprContext exprContext) : +{ + final SqlIdentifier name; + SqlNode e; + final ExprContext firstExprContext; + { + // we've now seen left paren, so queries inside should + // be allowed as sub-queries + switch (exprContext) { + case ACCEPT_SUB_QUERY: + firstExprContext = ExprContext.ACCEPT_NONCURSOR; + break; + case ACCEPT_CURSOR: + firstExprContext = ExprContext.ACCEPT_ALL; + break; + default: + firstExprContext = exprContext; + break; + } + } +} +{ + ( + LOOKAHEAD(2) name = SimpleIdentifier() + | { name = null; } + ) + ( + e = Default() + | + LOOKAHEAD(3) + e = TableParam() + | + e = PartitionedQueryOrQueryOrExpr(firstExprContext) + ) + { + if (name != null) { + e = SqlStdOperatorTable.ARGUMENT_ASSIGNMENT.createCall( + Span.of(name, e).pos(), e, name); + } + list.add(e); + } +} + +void AddArg(List list, ExprContext exprContext) : +{ + final SqlIdentifier name; + SqlNode e; +} +{ + ( + LOOKAHEAD(2) name = SimpleIdentifier() + | { name = null; } + ) + ( + e = Default() + | + e = Expression(exprContext) + | + e = TableParam() + ) + { + if (name != null) { + e = SqlStdOperatorTable.ARGUMENT_ASSIGNMENT.createCall( + Span.of(name, e).pos(), e, name); + } + list.add(e); + } +} + +SqlNode Default() : {} +{ + { + return SqlStdOperatorTable.DEFAULT.createCall(getPos()); + } +} + +/** + * Parses a query (SELECT, UNION, INTERSECT, EXCEPT, VALUES, TABLE) followed by + * the end-of-file symbol. + */ +SqlNode SqlQueryEof() : +{ + SqlNode query; +} +{ + query = OrderedQueryOrExpr(ExprContext.ACCEPT_QUERY) + + { return query; } +} + +/** + * Parses a list of SQL statements separated by semicolon. + * The semicolon is required between statements, but is + * optional at the end. + */ +SqlNodeList SqlStmtList() : +{ + final List stmtList = new ArrayList(); + SqlNode stmt; +} +{ + stmt = SqlStmt() { + stmtList.add(stmt); + } + ( + + [ + stmt = SqlStmt() { + stmtList.add(stmt); + } + ] + )* + + { + return new SqlNodeList(stmtList, Span.of(stmtList).pos()); + } +} + +/** + * Parses an SQL statement. + */ +SqlNode SqlStmt() : +{ + SqlNode stmt; +} +{ + ( +<#-- Add methods to parse additional statements here --> +<#list (parser.statementParserMethods!default.parser.statementParserMethods) as method> + LOOKAHEAD(2) stmt = ${method} + | + + stmt = SqlSetOption(Span.of(), null) + | + stmt = SqlAlter() + | +<#if (parser.createStatementParserMethods!default.parser.createStatementParserMethods)?size != 0> + stmt = SqlCreate() + | + +<#if (parser.dropStatementParserMethods!default.parser.dropStatementParserMethods)?size != 0> + stmt = SqlDrop() + | + + stmt = OrderedQueryOrExpr(ExprContext.ACCEPT_QUERY) + | + stmt = SqlExplain() + | + stmt = SqlDescribe() + | + stmt = SqlInsert() + | + stmt = SqlDelete() + | + stmt = SqlUpdate() + | + stmt = SqlMerge() + | + stmt = SqlProcedureCall() + ) + { + return stmt; + } +} + +/** + * Parses an SQL statement followed by the end-of-file symbol. + */ +SqlNode SqlStmtEof() : +{ + SqlNode stmt; +} +{ + stmt = SqlStmt() + { + return stmt; + } +} + +<#-- Add implementations of additional parser statement calls here --> +<#list (parser.implementationFiles!default.parser.implementationFiles) as file> + <#include "/@includes/"+file /> + + +SqlNodeList ParenthesizedKeyValueOptionCommaList() : +{ + final Span s; + final List list = new ArrayList(); +} +{ + { s = span(); } + + AddKeyValueOption(list) + ( + + AddKeyValueOption(list) + )* + { + return new SqlNodeList(list, s.end(this)); + } +} + +/** +* Parses an option with format key=val whose key is a simple identifier or string literal +* and value is a string literal. +*/ +void AddKeyValueOption(List list) : +{ + final SqlNode key; + final SqlNode value; +} +{ + ( + key = SimpleIdentifier() + | + key = StringLiteral() + ) + + value = StringLiteral() { + list.add(key); + list.add(value); + } +} + +/** Parses an option value (either a string or a numeric) and adds to a list. */ +void AddOptionValue(List list) : +{ + final SqlNode value; +} +{ + ( + value = NumericLiteral() { list.add(value); } + | + value = StringLiteral() { list.add(value); } + ) +} + +/** + * Parses a literal list separated by comma. The literal is either a string or a numeric. + */ +SqlNodeList ParenthesizedLiteralOptionCommaList() : +{ + final Span s; + final List list = new ArrayList(); +} +{ + { s = span(); } + + AddOptionValue(list) ( AddOptionValue(list) )* + { + return new SqlNodeList(list, s.end(this)); + } +} + +void AddHint(List hints) : +{ + final SqlIdentifier hintName; + final SqlNodeList hintOptions; + final SqlHint.HintOptionFormat optionFormat; +} +{ + hintName = SimpleIdentifier() + ( + LOOKAHEAD(5) + hintOptions = ParenthesizedKeyValueOptionCommaList() { + optionFormat = SqlHint.HintOptionFormat.KV_LIST; + } + | + LOOKAHEAD(3) + hintOptions = ParenthesizedSimpleIdentifierList() { + optionFormat = SqlHint.HintOptionFormat.ID_LIST; + } + | + LOOKAHEAD(3) + hintOptions = ParenthesizedLiteralOptionCommaList() { + optionFormat = SqlHint.HintOptionFormat.LITERAL_LIST; + } + | + LOOKAHEAD(2) + [ ] + { + hintOptions = SqlNodeList.EMPTY; + optionFormat = SqlHint.HintOptionFormat.EMPTY; + } + ) + { + hints.add( + new SqlHint(Span.of(hintOptions).end(this), hintName, hintOptions, + optionFormat)); + } +} + +/** Parses hints following a table reference, + * and returns the wrapped table reference. */ +SqlNode TableHints(SqlIdentifier tableName) : +{ + final List hints = new ArrayList(); +} +{ + AddHint(hints) ( AddHint(hints) )* { + final SqlParserPos pos = Span.of(tableName).addAll(hints).end(this); + final SqlNodeList hintList = new SqlNodeList(hints, pos); + return new SqlTableRef(pos, tableName, hintList); + } +} + +/** + * Parses a leaf SELECT expression without ORDER BY. + */ +SqlSelect SqlSelect() : +{ + final List keywords = new ArrayList(); + final SqlLiteral keyword; + final SqlNodeList keywordList; + final List selectList = new ArrayList(); + final SqlNode fromClause; + final SqlNode where; + final SqlNodeList groupBy; + final SqlNode having; + final SqlNodeList windowDecls; + final List hints = new ArrayList(); + final Span s; +} +{ +