Skip to content

Commit

Permalink
parse string later
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianPommerening committed Nov 20, 2023
1 parent d751d5e commit c9cfb16
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 83 deletions.
18 changes: 5 additions & 13 deletions src/search/parser/abstract_syntax_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -408,21 +408,13 @@ LiteralNode::LiteralNode(Token value)
}

DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const {
utils::TraceBlock block(context, "Checking Literal: " + value.repr());
utils::TraceBlock block(context, "Checking Literal: " + value.content);
if (context.has_variable(value.content)) {
if (value.type == TokenType::IDENTIFIER) {
string variable_name = value.content;
return utils::make_unique_ptr<VariableNode>(variable_name);
} else if (value.type != TokenType::STRING) {
/*
Variable names may be identical to a string literal but not
identical to any other token, e.g., a boolean:
"let(true, blind(), astar(true))"
This kind of mistake is handled earlier, so ending up here is a
programming mistake, not an input error.
*/
if (value.type != TokenType::IDENTIFIER) {
ABORT("A non-identifier token was defined as variable.");
}
string variable_name = value.content;
return utils::make_unique_ptr<VariableNode>(variable_name);
}

switch (value.type) {
Expand All @@ -444,7 +436,7 @@ DecoratedASTNodePtr LiteralNode::decorate(DecorateContext &context) const {

void LiteralNode::dump(string indent) const {
cout << indent << token_type_name(value.type) << ": "
<< value.repr() << endl;
<< value.content << endl;
}

const plugins::Type &LiteralNode::get_type(DecorateContext &context) const {
Expand Down
31 changes: 29 additions & 2 deletions src/search/parser/decorated_abstract_syntax_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,38 @@ StringLiteralNode::StringLiteralNode(const string &value)

plugins::Any StringLiteralNode::construct(ConstructContext &context) const {
utils::TraceBlock block(context, "Constructing string value from '" + value + "'");
return value;
if (!(value.starts_with('"') && value.ends_with('"'))) {
ABORT("String literal value is not enclosed in quotation marks"
" (this should have been caught before constructing this node).");
}
/*
We are not doing any further syntax checking. Escaped symbols other than
\n will just ignore the escaping \ (e.g., \t is treated as t, not as a
tab). Strings ending in \ will not produce an error but should be excluded
by the previous steps.
*/
string result;
result.reserve(value.length() - 2);
bool escaped = false;
for (char c : value.substr(1, value.size() - 2)) {
if (escaped) {
escaped = false;
if (c == 'n') {
result += '\n';
} else {
result += c;
}
} else if (c == '\\') {
escaped = true;
} else {
result += c;
}
}
return result;
}

void StringLiteralNode::dump(string indent) const {
cout << indent << "STRING: \"" << utils::escape(value) << "\"" << endl;
cout << indent << "STRING: " << value << endl;
}

IntLiteralNode::IntLiteralNode(const string &value)
Expand Down
10 changes: 2 additions & 8 deletions src/search/parser/lexical_analyzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static vector<pair<TokenType, regex>> construct_token_type_expressions() {
{TokenType::INTEGER,
R"([+-]?(infinity|\d+([kmg]\b)?))"},
{TokenType::BOOLEAN, R"(true|false)"},
{TokenType::STRING, R"(\"((\\\\|\\"|\\n|[^"\\])*)\")"},
{TokenType::STRING, R"("(\\\\|\\"|\\n|[^"\\])*")"},
{TokenType::LET, R"(let)"},
{TokenType::IDENTIFIER, R"([a-zA-Z_]\w*)"}
};
Expand Down Expand Up @@ -77,13 +77,7 @@ TokenStream split_tokens(const string &text) {
TokenType token_type = type_and_expression.first;
const regex &expression = type_and_expression.second;
if (regex_search(start, end, match, expression, regex_constants::match_continuous)) {
string value;
if (token_type == TokenType::STRING) {
value = utils::unescape(match[2]);
} else {
value = utils::tolower(match[1]);
}
tokens.push_back({value, token_type});
tokens.push_back({match[1], token_type});
start += match[0].length();
has_match = true;
break;
Expand Down
20 changes: 11 additions & 9 deletions src/search/parser/token_stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@
using namespace std;

namespace parser {
Token::Token(const string &content, TokenType type)
: content(content), type(type) {
}

string Token::repr() const {
if (type == TokenType::STRING) {
return "\"" + utils::escape(content) + "\"";
static string case_insensitive_to_lower(const string &content, TokenType type) {
if (type == TokenType::BOOLEAN ||
type == TokenType::FLOAT ||
type == TokenType::IDENTIFIER ||
type == TokenType::INTEGER) {
return utils::tolower(content);
} else {
return content;
}
}
Token::Token(const string &content, TokenType type)
: content(case_insensitive_to_lower(content, type)), type(type) {
}

TokenStream::TokenStream(vector<Token> &&tokens)
: tokens(move(tokens)), pos(0) {
Expand Down Expand Up @@ -78,7 +80,7 @@ string TokenStream::str(int from, int to) const {
int max_position = min(static_cast<int>(tokens.size()), to);
ostringstream message;
while (curr_position < max_position) {
message << tokens[curr_position].repr();
message << tokens[curr_position].content;
curr_position++;
}
return message.str();
Expand Down Expand Up @@ -121,7 +123,7 @@ ostream &operator<<(ostream &out, TokenType token_type) {
}

ostream &operator<<(ostream &out, const Token &token) {
out << "<Type: '" << token.type << "', Value: '" << token.repr() << "'>";
out << "<Type: '" << token.type << "', Value: '" << token.content << "'>";
return out;
}
}
1 change: 0 additions & 1 deletion src/search/parser/token_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ struct Token {
TokenType type;

Token(const std::string &content, TokenType type);
std::string repr() const;
};

class TokenStream {
Expand Down
48 changes: 0 additions & 48 deletions src/search/utils/strings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,54 +8,6 @@
using namespace std;

namespace utils {
string escape(const string &s) {
/*
Escape any occurrences of \ with \\, occurrences of " with \" and
newlines with \n.
*/
string result;
result.reserve(s.length());
for (char c : s) {
if (c == '\\') {
result += "\\\\";
} else if (c == '"') {
result += "\\\"";
} else if (c == '\n') {
result += "\\n";
} else {
result += c;
}
}
return result;
}

string unescape(const string &s) {
/*
On sequences created with escape(), this will restore the original string.
However, no syntax checking is done. Escaped symbols other than the ones
added by escape() will just ignore the escaping \ (e.g., \t is treated
as t, not as a tab). Strings ending in \ will not produce an error.
*/
string result;
result.reserve(s.length());
bool escaped = false;
for (char c : s) {
if (escaped) {
escaped = false;
if (c == 'n') {
result += '\n';
} else {
result += c;
}
} else if (c == '\\') {
escaped = true;
} else {
result += c;
}
}
return result;
}

void lstrip(string &s) {
s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) {
return !isspace(ch);
Expand Down
2 changes: 0 additions & 2 deletions src/search/utils/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
#include <vector>

namespace utils {
extern std::string escape(const std::string &s);
extern std::string unescape(const std::string &s);
extern void lstrip(std::string &s);
extern void rstrip(std::string &s);
extern void strip(std::string &s);
Expand Down

0 comments on commit c9cfb16

Please sign in to comment.