-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProgramLanguageLexer.cpp
108 lines (87 loc) · 3.21 KB
/
ProgramLanguageLexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#include "ProgramLanguageLexer.hpp"
#include "ProgramLanguage.hpp"
#include "SyntaxError.hpp"
#include <cctype>
#include <cstring>
ProgramLanguageLexer::ProgramLanguageLexer()
: textBufferReader(nullptr), lexemList(nullptr)
{
}
void ProgramLanguageLexer::tokenizeText(const char *textBuffer, Stack<ProgramLanguage::Lexem> *newLexemList)
{
textBufferReader = textBuffer;
lexemList = newLexemList;
skipWhitespaces();
while (*textBufferReader) {
if (lookForKeyword() || lookForSymbol()) {
} else if (std::isdigit(*textBufferReader)) {
addNumberLexem();
} else if (std::isalpha(*textBufferReader) || (*textBufferReader == '_')) {
addIdentifierLexem();
} else {
throw SyntaxError(__FILE__, __LINE__, __func__, "illegal symbol found");
}
skipWhitespaces();
}
}
void ProgramLanguageLexer::skipWhitespaces()
{
while (std::isspace(*textBufferReader)) ++textBufferReader;
}
bool ProgramLanguageLexer::lookForKeyword()
{
bool foundKeyword = false;
for (size_t i = 0; i < sizeof(ProgramLanguage::Keywords) / sizeof(ProgramLanguage::Keywords[0]); ++i) {
if (std::strstr(textBufferReader, ProgramLanguage::Keywords[i]) == textBufferReader) {
foundKeyword = true;
addKeywordLexem((ProgramLanguage::Keyword) i, std::strlen(ProgramLanguage::Keywords[i]));
continue;
}
}
return foundKeyword;
}
bool ProgramLanguageLexer::lookForSymbol()
{
bool foundSymbol = false;
for (auto &symbol: ProgramLanguage::Symbols) {
if (*textBufferReader == symbol) {
foundSymbol = true;
addSymbolLexem(symbol);
continue;
}
}
return foundSymbol;
}
void ProgramLanguageLexer::addKeywordLexem(ProgramLanguage::Keyword keyword, size_t keywordLength)
{
ProgramLanguage::Lexem lexem{.type = ProgramLanguage::Lexem::Type::Keyword, .data{.keyword = keyword}};
lexemList->push(lexem);
textBufferReader += keywordLength;
}
void ProgramLanguageLexer::addIdentifierLexem()
{
const char *identifierReader = textBufferReader;
while (std::isalpha(*identifierReader) || std::isdigit(*identifierReader) || (*identifierReader == '_')) ++identifierReader;
ptrdiff_t identifierLength = identifierReader - textBufferReader;
ProgramLanguage::Lexem lexem{.type = ProgramLanguage::Lexem::Type::Identifier,
.data{.identifier = (char *) std::calloc(identifierLength + 1, sizeof(char))}};
std::strncpy(lexem.data.identifier, textBufferReader, identifierLength);
lexem.data.identifier[identifierLength] = '\0';
lexemList->push(lexem);
textBufferReader = identifierReader;
}
void ProgramLanguageLexer::addNumberLexem()
{
double number = 0;
while (std::isdigit(*textBufferReader)) {
number = number * 10 + (*textBufferReader++ - '0');
}
ProgramLanguage::Lexem lexem{.type = ProgramLanguage::Lexem::Type::Number, .data{.number = number}};
lexemList->push(lexem);
}
void ProgramLanguageLexer::addSymbolLexem(char symbol)
{
ProgramLanguage::Lexem lexem{.type = ProgramLanguage::Lexem::Type::Symbol, .data{.symbol = symbol}};
lexemList->push(lexem);
++textBufferReader;
}