aibasel · FlorianPommerening · Nov 21, 2023 · Aug 18, 2023 · Nov 16, 2023 · Nov 20, 2023
diff --git a/src/search/parser/abstract_syntax_tree.cc b/src/search/parser/abstract_syntax_tree.cc
@@ -420,6 +420,8 @@ DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const {
     switch (value.type) {
     case TokenType::BOOLEAN:
         return utils::make_unique_ptr<BoolLiteralNode>(value.content);
+    case TokenType::STRING:
+        return utils::make_unique_ptr<StringLiteralNode>(value.content);
     case TokenType::INTEGER:
         return utils::make_unique_ptr<IntLiteralNode>(value.content);
     case TokenType::FLOAT:
@@ -441,6 +443,8 @@ const plugins::Type &LiteralNode::get_type(DecorateContext &context) const {
     switch (value.type) {
     case TokenType::BOOLEAN:
         return plugins::TypeRegistry::instance()->get_type<bool>();
+    case TokenType::STRING:
+        return plugins::TypeRegistry::instance()->get_type<string>();
     case TokenType::INTEGER:
         return plugins::TypeRegistry::instance()->get_type<int>();
     case TokenType::FLOAT:

diff --git a/src/search/parser/decorated_abstract_syntax_tree.cc b/src/search/parser/decorated_abstract_syntax_tree.cc
@@ -218,6 +218,46 @@ void BoolLiteralNode::dump(string indent) const {
     cout << indent << "BOOL: " << value << endl;
 }
 
+StringLiteralNode::StringLiteralNode(const string &value)
+    : value(value) {
+}
+
+plugins::Any StringLiteralNode::construct(ConstructContext &context) const {
+    utils::TraceBlock block(context, "Constructing string value from '" + value + "'");
+    if (!(value.starts_with('"') && value.ends_with('"'))) {
+        ABORT("String literal value is not enclosed in quotation marks"
+              " (this should have been caught before constructing this node).");
+    }
+    /*
+      We are not doing any further syntax checking. Escaped symbols other than
+      \n will just ignore the escaping \ (e.g., \t is treated as t, not as a
+      tab). Strings ending in \ will not produce an error but should be excluded
+      by the previous steps.
+    */
+    string result;
+    result.reserve(value.length() - 2);
+    bool escaped = false;
+    for (char c : value.substr(1, value.size() - 2)) {
+        if (escaped) {
+            escaped = false;
+            if (c == 'n') {
+                result += '\n';
+            } else {
+                result += c;
+            }
+        } else if (c == '\\') {
+            escaped = true;
+        } else {
+            result += c;
+        }
+    }
+    return result;
+}
+
+void StringLiteralNode::dump(string indent) const {
+    cout << indent << "STRING: " << value << endl;
+}
+
 IntLiteralNode::IntLiteralNode(const string &value)
     : value(value) {
 }
@@ -473,6 +513,18 @@ shared_ptr<DecoratedASTNode> BoolLiteralNode::clone_shared() const {
     return make_shared<BoolLiteralNode>(*this);
 }
 
+StringLiteralNode::StringLiteralNode(const StringLiteralNode &other)
+    : value(other.value) {
+}
+
+unique_ptr<DecoratedASTNode> StringLiteralNode::clone() const {
+    return utils::make_unique_ptr<StringLiteralNode>(*this);
+}
+
+shared_ptr<DecoratedASTNode> StringLiteralNode::clone_shared() const {
+    return make_shared<StringLiteralNode>(*this);
+}
+
 IntLiteralNode::IntLiteralNode(const IntLiteralNode &other)
     : value(other.value) {
 }

diff --git a/src/search/parser/decorated_abstract_syntax_tree.h b/src/search/parser/decorated_abstract_syntax_tree.h
@@ -157,6 +157,20 @@ class BoolLiteralNode : public DecoratedASTNode {
     BoolLiteralNode(const BoolLiteralNode &other);
 };
 
+class StringLiteralNode : public DecoratedASTNode {
+    std::string value;
+public:
+    StringLiteralNode(const std::string &value);
+
+    plugins::Any construct(ConstructContext &context) const override;
+    void dump(std::string indent) const override;
+
+    // TODO: once we get rid of lazy construction, this should no longer be necessary.
+    virtual std::unique_ptr<DecoratedASTNode> clone() const override;
+    virtual std::shared_ptr<DecoratedASTNode> clone_shared() const override;
+    StringLiteralNode(const StringLiteralNode &other);
+};
+
 class IntLiteralNode : public DecoratedASTNode {
     std::string value;
 public:

diff --git a/src/search/parser/lexical_analyzer.cc b/src/search/parser/lexical_analyzer.cc
@@ -29,6 +29,7 @@ static vector<pair<TokenType, regex>> construct_token_type_expressions() {
         {TokenType::INTEGER,
          R"([+-]?(infinity|\d+([kmg]\b)?))"},
         {TokenType::BOOLEAN, R"(true|false)"},
+        {TokenType::STRING, R"("(\\\\|\\"|\\n|[^"\\])*")"},
         {TokenType::LET, R"(let)"},
         {TokenType::IDENTIFIER, R"([a-zA-Z_]\w*)"}
     };
@@ -42,6 +43,23 @@ static vector<pair<TokenType, regex>> construct_token_type_expressions() {
 static const vector<pair<TokenType, regex>> token_type_expressions =
     construct_token_type_expressions();
 
+static string highlight_position(const string &text, string::const_iterator pos) {
+    ostringstream error;
+    int distance_to_error = pos - text.begin();
+    for (const string &line : utils::split(text, "\n")) {
+        int line_length = line.size();
+        bool error_in_line =
+            distance_to_error < line_length && distance_to_error >= 0;
+        error << (error_in_line ? "> " : "  ") << line << endl;
+        if (error_in_line)
+            error << string(distance_to_error + 2, ' ') << "^" << endl;
+
+        distance_to_error -= line.size() + 1;
+    }
+    string message = error.str();
+    utils::rstrip(message);
+    return message;
+}
 
 TokenStream split_tokens(const string &text) {
     utils::Context context;
@@ -59,29 +77,15 @@ TokenStream split_tokens(const string &text) {
             TokenType token_type = type_and_expression.first;
             const regex &expression = type_and_expression.second;
             if (regex_search(start, end, match, expression, regex_constants::match_continuous)) {
-                tokens.push_back({utils::tolower(match[1]), token_type});
+                tokens.push_back({match[1], token_type});
                 start += match[0].length();
                 has_match = true;
                 break;
             }
         }
         if (!has_match) {
-            ostringstream error;
-            error << "Unable to recognize next token:" << endl;
-            int distance_to_error = start - text.begin();
-            for (const string &line : utils::split(text, "\n")) {
-                int line_length = line.size();
-                bool error_in_line =
-                    distance_to_error < line_length && distance_to_error >= 0;
-                error << (error_in_line ? "> " : "  ") << line << endl;
-                if (error_in_line)
-                    error << string(distance_to_error + 2, ' ') << "^" << endl;
-
-                distance_to_error -= line.size() + 1;
-            }
-            string message = error.str();
-            utils::rstrip(message);
-            context.error(message);
+            context.error("Unable to recognize next token:\n" +
+                          highlight_position(text, start));
         }
     }
     return TokenStream(move(tokens));

diff --git a/src/search/parser/syntax_analyzer.cc b/src/search/parser/syntax_analyzer.cc
@@ -162,6 +162,7 @@ static unordered_set<TokenType> literal_tokens {
     TokenType::FLOAT,
     TokenType::INTEGER,
     TokenType::BOOLEAN,
+    TokenType::STRING,
     TokenType::IDENTIFIER
 };
 
@@ -193,7 +194,8 @@ static ASTNodePtr parse_list(TokenStream &tokens, SyntaxAnalyzerContext &context
 
 static vector<TokenType> PARSE_NODE_TOKEN_TYPES = {
     TokenType::LET, TokenType::IDENTIFIER, TokenType::BOOLEAN,
-    TokenType::INTEGER, TokenType::FLOAT, TokenType::OPENING_BRACKET};
+    TokenType::STRING, TokenType::INTEGER, TokenType::FLOAT,
+    TokenType::OPENING_BRACKET};
 
 static ASTNodePtr parse_node(TokenStream &tokens,
                              SyntaxAnalyzerContext &context) {
@@ -220,6 +222,7 @@ static ASTNodePtr parse_node(TokenStream &tokens,
             return parse_literal(tokens, context);
         }
     case TokenType::BOOLEAN:
+    case TokenType::STRING:
     case TokenType::INTEGER:
     case TokenType::FLOAT:
         return parse_literal(tokens, context);

diff --git a/src/search/parser/token_stream.cc b/src/search/parser/token_stream.cc
@@ -11,8 +11,18 @@
 using namespace std;
 
 namespace parser {
+static string case_insensitive_to_lower(const string &content, TokenType type) {
+    if (type == TokenType::BOOLEAN ||
+        type == TokenType::FLOAT ||
+        type == TokenType::IDENTIFIER ||
+        type == TokenType::INTEGER) {
+        return utils::tolower(content);
+    } else {
+        return content;
+    }
+}
 Token::Token(const string &content, TokenType type)
-    : content(content), type(type) {
+    : content(case_insensitive_to_lower(content, type)), type(type) {
 }
 
 TokenStream::TokenStream(vector<Token> &&tokens)
@@ -96,6 +106,8 @@ string token_type_name(TokenType token_type) {
         return "Float";
     case TokenType::BOOLEAN:
         return "Boolean";
+    case TokenType::STRING:
+        return "String";
     case TokenType::IDENTIFIER:
         return "Identifier";
     case TokenType::LET:

diff --git a/src/search/parser/token_stream.h b/src/search/parser/token_stream.h
@@ -19,6 +19,7 @@ enum class TokenType {
     INTEGER,
     FLOAT,
     BOOLEAN,
+    STRING,
     IDENTIFIER,
     LET
 };

diff --git a/src/search/plugins/types.cc b/src/search/plugins/types.cc
@@ -292,6 +292,7 @@ BasicType TypeRegistry::NO_TYPE = BasicType(typeid(void), "<no type>");
 
 TypeRegistry::TypeRegistry() {
     insert_basic_type<bool>();
+    insert_basic_type<string>();
     insert_basic_type<int>();
     insert_basic_type<double>();
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ enum class TokenType { @@
         INTEGER,
         FLOAT,
         BOOLEAN,
+        STRING,
         IDENTIFIER,
         LET
     };
@@ Expand Down @@