diff --git a/src/search/parser/abstract_syntax_tree.cc b/src/search/parser/abstract_syntax_tree.cc index 280174b645..49c1635a93 100644 --- a/src/search/parser/abstract_syntax_tree.cc +++ b/src/search/parser/abstract_syntax_tree.cc @@ -420,6 +420,8 @@ DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const { switch (value.type) { case TokenType::BOOLEAN: return utils::make_unique_ptr(value.content); + case TokenType::STRING: + return utils::make_unique_ptr(value.content); case TokenType::INTEGER: return utils::make_unique_ptr(value.content); case TokenType::FLOAT: @@ -441,6 +443,8 @@ const plugins::Type &LiteralNode::get_type(DecorateContext &context) const { switch (value.type) { case TokenType::BOOLEAN: return plugins::TypeRegistry::instance()->get_type(); + case TokenType::STRING: + return plugins::TypeRegistry::instance()->get_type(); case TokenType::INTEGER: return plugins::TypeRegistry::instance()->get_type(); case TokenType::FLOAT: diff --git a/src/search/parser/decorated_abstract_syntax_tree.cc b/src/search/parser/decorated_abstract_syntax_tree.cc index 3a401d9e7f..c1f6294383 100644 --- a/src/search/parser/decorated_abstract_syntax_tree.cc +++ b/src/search/parser/decorated_abstract_syntax_tree.cc @@ -218,6 +218,46 @@ void BoolLiteralNode::dump(string indent) const { cout << indent << "BOOL: " << value << endl; } +StringLiteralNode::StringLiteralNode(const string &value) + : value(value) { +} + +plugins::Any StringLiteralNode::construct(ConstructContext &context) const { + utils::TraceBlock block(context, "Constructing string value from '" + value + "'"); + if (!(value.starts_with('"') && value.ends_with('"'))) { + ABORT("String literal value is not enclosed in quotation marks" + " (this should have been caught before constructing this node)."); + } + /* + We are not doing any further syntax checking. Escaped symbols other than + \n will just ignore the escaping \ (e.g., \t is treated as t, not as a + tab). Strings ending in \ will not produce an error but should be excluded + by the previous steps. + */ + string result; + result.reserve(value.length() - 2); + bool escaped = false; + for (char c : value.substr(1, value.size() - 2)) { + if (escaped) { + escaped = false; + if (c == 'n') { + result += '\n'; + } else { + result += c; + } + } else if (c == '\\') { + escaped = true; + } else { + result += c; + } + } + return result; +} + +void StringLiteralNode::dump(string indent) const { + cout << indent << "STRING: " << value << endl; +} + IntLiteralNode::IntLiteralNode(const string &value) : value(value) { } @@ -473,6 +513,18 @@ shared_ptr BoolLiteralNode::clone_shared() const { return make_shared(*this); } +StringLiteralNode::StringLiteralNode(const StringLiteralNode &other) + : value(other.value) { +} + +unique_ptr StringLiteralNode::clone() const { + return utils::make_unique_ptr(*this); +} + +shared_ptr StringLiteralNode::clone_shared() const { + return make_shared(*this); +} + IntLiteralNode::IntLiteralNode(const IntLiteralNode &other) : value(other.value) { } diff --git a/src/search/parser/decorated_abstract_syntax_tree.h b/src/search/parser/decorated_abstract_syntax_tree.h index 0094f8874b..105f77bf13 100644 --- a/src/search/parser/decorated_abstract_syntax_tree.h +++ b/src/search/parser/decorated_abstract_syntax_tree.h @@ -157,6 +157,20 @@ class BoolLiteralNode : public DecoratedASTNode { BoolLiteralNode(const BoolLiteralNode &other); }; +class StringLiteralNode : public DecoratedASTNode { + std::string value; +public: + StringLiteralNode(const std::string &value); + + plugins::Any construct(ConstructContext &context) const override; + void dump(std::string indent) const override; + + // TODO: once we get rid of lazy construction, this should no longer be necessary. + virtual std::unique_ptr clone() const override; + virtual std::shared_ptr clone_shared() const override; + StringLiteralNode(const StringLiteralNode &other); +}; + class IntLiteralNode : public DecoratedASTNode { std::string value; public: diff --git a/src/search/parser/lexical_analyzer.cc b/src/search/parser/lexical_analyzer.cc index 9a37360809..9812fd3e77 100644 --- a/src/search/parser/lexical_analyzer.cc +++ b/src/search/parser/lexical_analyzer.cc @@ -24,12 +24,22 @@ static vector> construct_token_type_expressions() { {TokenType::CLOSING_BRACKET, R"(\])"}, {TokenType::COMMA, R"(,)"}, {TokenType::EQUALS, R"(=)"}, + {TokenType::LET, R"(let)"}, + {TokenType::BOOLEAN, R"(true|false)"}, + {TokenType::STRING, R"("(\\\\|\\"|\\n|[^"\\])*")"}, + /* + Floats have to be parsed before integers, so tokens like '1.2' are + parsed as one float token rather than an integer token '1' followed + by a float token '.2'. + */ {TokenType::FLOAT, R"([+-]?(((\d*\.\d+|\d+\.)(e[+-]?\d+|[kmg]\b)?)|\d+e[+-]?\d+))"}, - {TokenType::INTEGER, - R"([+-]?(infinity|\d+([kmg]\b)?))"}, - {TokenType::BOOLEAN, R"(true|false)"}, - {TokenType::LET, R"(let)"}, + {TokenType::INTEGER, R"([+-]?(infinity|\d+([kmg]\b)?))"}, + /* + Identifiers have to be parsed last to prevent reserved words ( + 'infinity', 'true', 'false', and 'let') from being recognized as + identifiers. + */ {TokenType::IDENTIFIER, R"([a-zA-Z_]\w*)"} }; vector> token_type_expression; @@ -42,6 +52,24 @@ static vector> construct_token_type_expressions() { static const vector> token_type_expressions = construct_token_type_expressions(); +static string highlight_position(const string &text, string::const_iterator pos) { + ostringstream message_stream; + int distance_to_highlight = pos - text.begin(); + for (const string &line : utils::split(text, "\n")) { + int line_length = line.size(); + bool highlight_in_line = + distance_to_highlight < line_length && distance_to_highlight >= 0; + message_stream << (highlight_in_line ? "> " : " ") << line << endl; + if (highlight_in_line) { + message_stream << string(distance_to_highlight + 2, ' ') << "^" + << endl; + } + distance_to_highlight -= line.size() + 1; + } + string message = message_stream.str(); + utils::rstrip(message); + return message; +} TokenStream split_tokens(const string &text) { utils::Context context; @@ -59,29 +87,15 @@ TokenStream split_tokens(const string &text) { TokenType token_type = type_and_expression.first; const regex &expression = type_and_expression.second; if (regex_search(start, end, match, expression, regex_constants::match_continuous)) { - tokens.push_back({utils::tolower(match[1]), token_type}); + tokens.push_back({match[1], token_type}); start += match[0].length(); has_match = true; break; } } if (!has_match) { - ostringstream error; - error << "Unable to recognize next token:" << endl; - int distance_to_error = start - text.begin(); - for (const string &line : utils::split(text, "\n")) { - int line_length = line.size(); - bool error_in_line = - distance_to_error < line_length && distance_to_error >= 0; - error << (error_in_line ? "> " : " ") << line << endl; - if (error_in_line) - error << string(distance_to_error + 2, ' ') << "^" << endl; - - distance_to_error -= line.size() + 1; - } - string message = error.str(); - utils::rstrip(message); - context.error(message); + context.error("Unable to recognize next token:\n" + + highlight_position(text, start)); } } return TokenStream(move(tokens)); diff --git a/src/search/parser/syntax_analyzer.cc b/src/search/parser/syntax_analyzer.cc index ffcafbfa4e..789dbab4c7 100644 --- a/src/search/parser/syntax_analyzer.cc +++ b/src/search/parser/syntax_analyzer.cc @@ -159,9 +159,10 @@ static ASTNodePtr parse_function(TokenStream &tokens, } static unordered_set literal_tokens { - TokenType::FLOAT, - TokenType::INTEGER, TokenType::BOOLEAN, + TokenType::STRING, + TokenType::INTEGER, + TokenType::FLOAT, TokenType::IDENTIFIER }; @@ -191,27 +192,35 @@ static ASTNodePtr parse_list(TokenStream &tokens, SyntaxAnalyzerContext &context return utils::make_unique_ptr(move(elements)); } -static vector PARSE_NODE_TOKEN_TYPES = { - TokenType::LET, TokenType::IDENTIFIER, TokenType::BOOLEAN, - TokenType::INTEGER, TokenType::FLOAT, TokenType::OPENING_BRACKET}; +static vector parse_node_token_types = { + TokenType::OPENING_BRACKET, TokenType::LET, TokenType::BOOLEAN, + TokenType::STRING, TokenType::INTEGER, TokenType::FLOAT, + TokenType::IDENTIFIER}; static ASTNodePtr parse_node(TokenStream &tokens, SyntaxAnalyzerContext &context) { utils::TraceBlock block(context, "Identify node type"); Token token = tokens.peek(context); - if (find(PARSE_NODE_TOKEN_TYPES.begin(), - PARSE_NODE_TOKEN_TYPES.end(), - token.type) == PARSE_NODE_TOKEN_TYPES.end()) { + if (find(parse_node_token_types.begin(), + parse_node_token_types.end(), + token.type) == parse_node_token_types.end()) { ostringstream message; message << "Unexpected token '" << token << "'. Expected any of the following token types: " - << utils::join(PARSE_NODE_TOKEN_TYPES, ", "); + << utils::join(parse_node_token_types, ", "); context.error(message.str()); } switch (token.type) { + case TokenType::OPENING_BRACKET: + return parse_list(tokens, context); case TokenType::LET: return parse_let(tokens, context); + case TokenType::BOOLEAN: + case TokenType::STRING: + case TokenType::INTEGER: + case TokenType::FLOAT: + return parse_literal(tokens, context); case TokenType::IDENTIFIER: if (tokens.has_tokens(2) && tokens.peek(context, 1).type == TokenType::OPENING_PARENTHESIS) { @@ -219,12 +228,6 @@ static ASTNodePtr parse_node(TokenStream &tokens, } else { return parse_literal(tokens, context); } - case TokenType::BOOLEAN: - case TokenType::INTEGER: - case TokenType::FLOAT: - return parse_literal(tokens, context); - case TokenType::OPENING_BRACKET: - return parse_list(tokens, context); default: ABORT("Unknown token type '" + token_type_name(token.type) + "'."); } diff --git a/src/search/parser/token_stream.cc b/src/search/parser/token_stream.cc index c6d689cf1b..b2147f5a9b 100644 --- a/src/search/parser/token_stream.cc +++ b/src/search/parser/token_stream.cc @@ -11,8 +11,18 @@ using namespace std; namespace parser { +static string case_insensitive_to_lower(const string &content, TokenType type) { + if (type == TokenType::BOOLEAN || + type == TokenType::INTEGER || + type == TokenType::FLOAT || + type == TokenType::IDENTIFIER) { + return utils::tolower(content); + } else { + return content; + } +} Token::Token(const string &content, TokenType type) - : content(content), type(type) { + : content(case_insensitive_to_lower(content, type)), type(type) { } TokenStream::TokenStream(vector &&tokens) @@ -90,16 +100,18 @@ string token_type_name(TokenType token_type) { return ","; case TokenType::EQUALS: return "="; + case TokenType::LET: + return "Let"; + case TokenType::BOOLEAN: + return "Boolean"; + case TokenType::STRING: + return "String"; case TokenType::INTEGER: return "Integer"; case TokenType::FLOAT: return "Float"; - case TokenType::BOOLEAN: - return "Boolean"; case TokenType::IDENTIFIER: return "Identifier"; - case TokenType::LET: - return "Let"; default: ABORT("Unknown token type."); } diff --git a/src/search/parser/token_stream.h b/src/search/parser/token_stream.h index 832cdafc75..71dd831f5d 100644 --- a/src/search/parser/token_stream.h +++ b/src/search/parser/token_stream.h @@ -16,11 +16,12 @@ enum class TokenType { CLOSING_BRACKET, COMMA, EQUALS, + LET, + BOOLEAN, + STRING, INTEGER, FLOAT, - BOOLEAN, - IDENTIFIER, - LET + IDENTIFIER }; struct Token { diff --git a/src/search/plugins/types.cc b/src/search/plugins/types.cc index 117c139b53..c1ca2c372b 100644 --- a/src/search/plugins/types.cc +++ b/src/search/plugins/types.cc @@ -292,6 +292,7 @@ BasicType TypeRegistry::NO_TYPE = BasicType(typeid(void), ""); TypeRegistry::TypeRegistry() { insert_basic_type(); + insert_basic_type(); insert_basic_type(); insert_basic_type(); }