-
Notifications
You must be signed in to change notification settings - Fork 0
/
LEX.cpp
182 lines (172 loc) · 5.73 KB
/
LEX.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
//
// Created by WANG on 2022/4/23.
//
#include "LEX.h"
LEX::LEX(const set<string> &kWord, const set<string> &OSymbol, const set<string> &SSymbol) {
key_word = kWord;
operator_symbol = OSymbol;
separator_symbol = SSymbol;
InitJumpMap();
}
void LEX::InitJumpMap() {
//状态10为结束,回归开始状态0,状态9为错误状态
//状态0
for(char i = 32; i <= 126; i++) JumpState[0][i] = 9;
for(auto & i : separator_symbol) JumpState[0][i[0]] = 1;
for(auto & i : operator_symbol) JumpState[0][i[0]] = 2;
//后有可能跟 =
JumpState[0]['<'] = 3;
JumpState[0]['>'] = 3;
JumpState[0][':'] = 4;
for(char i = 'A'; i <= 'Z'; i++) JumpState[0][i] = 5;
for(char i = '0'; i <= '9'; i++) JumpState[0][i] = 6;
//状态1任意符号跳转到10
for(char i = 32; i <= 126; i++) JumpState[1][i] = 10; // 可显示的字符
//状态2任意符号跳转到10
for(char i = 32; i <= 126; i++) JumpState[2][i] = 10; // 可显示的字符
//状态3
for(char i = 32; i <= 126; i++) JumpState[3][i] = 10;
JumpState[3]['='] = 7;
//状态4
for(char i = 32; i <= 126; i++) JumpState[4][i] = 9;
JumpState[4]['='] = 8;
//状态5
for(char i = 32; i <= 126; i++) JumpState[5][i] = 10;
for(char i = 'A'; i <= 'Z'; i++) JumpState[5][i] = 5; //覆盖
for(char i = '0'; i <= '9'; i++) JumpState[5][i] = 5;
//状态6
for(char i = 32; i <= 126; i++) JumpState[6][i] = 10;
for(char i = '0'; i <= '9'; i++) JumpState[6][i] = 6;
for(char i = 'A'; i <= 'Z'; i++) JumpState[6][i] = 9; //数字开头不可字母
//状态7
for(char i = 32; i <= 126; i++) JumpState[7][i] = 10;
//状态8
for(char i = 32; i <= 126; i++) JumpState[8][i] = 10;
}
int LEX::getNextState(int nowState, char ch, const string & word) {
int nextState = JumpState[nowState][ch];
if(nextState != 10) return nextState;
else{
//长度过长
if(word.size() > 10) return 9;
return 10;
}
}
bool LEX::lexical_analysis(const string &fileName, vector<string> &sym, vector<string> &num) {
//文件读取
stringstream oss;
if(!fileName.empty()) {
ifstream in(fileName);
assert(("文件打开失败", in.is_open()));
oss << in.rdbuf();
string file_content = oss.str();
transform(file_content.begin(), file_content.end(),
file_content.begin(), ::toupper); //改为大写
oss.str("");
oss<<file_content;
}
else{
string a;
while(getline(cin,a)) {
oss << a;
oss <<" ";
}
string file_content = oss.str();
transform(file_content.begin(), file_content.end(),
file_content.begin(), ::toupper); //改为大写
oss.str("");
oss<<file_content;
}
string tmp;
int nowState = 0;
int nextState;
string word;
while (oss >> tmp) {
for(int i = 0; i < tmp.size(); i++) {
nextState = getNextState(nowState,tmp[i],word);
if(nextState == 9){
cout<<"Lexical Error\n";
return false;
}
if(nextState == 10){
i--;nextState = 0;
if(nowState == 6){
//数字
string outStr = "NUMBER " + to_string(stoi(word));
ans.push_back(outStr);
param.emplace_back(to_string(stoi(word)),Number);
}
else if(nowState == 5){
//标识符
string outStr;
if(key_word.find(word) == key_word.end()) {
outStr = "IDENTIFIER " + word;
param.emplace_back(word,Identifier);
}
else {
outStr = word;
param.emplace_back(word,KeyWord);
}
ans.push_back(outStr);
}
else {
ans.push_back(word);
param.emplace_back(word,Other);
}
word.clear();
nowState = nextState;
continue;
}
word.push_back(tmp[i]);
nowState = nextState;
}
if(!word.empty()){
nextState = getNextState(nowState,' ',word);
if(nextState == 9){
cout<<"Lexical Error\n";
return false;
}
if(nextState == 10){
if(nowState == 6){
//数字
string outStr = "NUMBER " + to_string(stoi(word));
ans.push_back(outStr);
param.emplace_back(to_string(stoi(word)),Number);
}
else if(nowState == 5){
//标识符
string outStr;
if(key_word.find(word) == key_word.end()) {
outStr = "IDENTIFIER " + word;
param.emplace_back(word,Identifier);
}
else {
outStr = word;
param.emplace_back(word,KeyWord);
}
ans.push_back(outStr);
}
else {
ans.push_back(word);
param.emplace_back(word,Other);
}
nowState = 0;
word.clear();
}
}
}
return true;
}
void LEX::printParam() {
out.open("lex.out");
for(auto & i:param){
out<<i.name<<" "<<i.type<<endl;
}
out.close();
}
void LEX::printAns() {
out.open("lex.out");
for(auto & i:ans)
out<<i<<endl;
out.close();
}