-
Notifications
You must be signed in to change notification settings - Fork 9
/
Parser.pas
239 lines (226 loc) · 6.7 KB
/
Parser.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
unit Parser;
{
Author: Wanderlan Santos dos Anjos, [email protected]
Date: jan-2010
License: <extlink http://www.opensource.org/licenses/bsd-license.php>BSD</extlink>
}
interface
uses
Scanner;
type
TSymbol = string[15];
TStack = array[1..100] of TSymbol;
TParser = class(TScanner)
private
Symbol : TSymbol;
Symbols : TStack;
function GetProductionName(const P : AnsiString) : AnsiString;
procedure ExpandProduction; inline;
procedure PopSymbol; //inline;
protected
procedure Call(Operations : array of pointer; Op : char);
procedure RecoverFromError(const Expected, Found : AnsiString); override;
procedure Analyse(Symbol : char); virtual; abstract;
// procedure Generate(Symbol : char); virtual; abstract;
public
procedure Compile(const Source : AnsiString);
procedure Error(const Msg : AnsiString); override;
end;
implementation
uses
SysUtils, Math, Grammar, StrUtils, Token {$IFDEF UNICODE}, AnsiStrings{$ENDIF};
type
TParserMethod = procedure of object;
procedure TParser.PopSymbol; begin
dec(Top);
if Top >= 1 then begin
Symbol := Symbols[Top];
case Symbol[1] of
Mark, Require : PopSymbol;
Pop :
repeat
while (Symbols[Top] <> Mark) and (Top > 2) do dec(Top);
dec(Top);
Symbol := Symbols[Top];
until Symbol[1] <> Pop;
Skip : begin
ShowMessage('Warning', Symbols[Top + 1] + ' construct is ignored');
dec(Top);
Symbol := Symbols[Top];
while UpperCase(Token.Lexeme) <> Symbol do NextToken(true);
end;
end;
end
end;
procedure TParser.Call(Operations : array of pointer; Op : char);
var
Method : TMethod;
begin
Method.Code := Operations[Ord(Op)];
Method.Data := Self;
TParserMethod(Method);
end;
procedure TParser.RecoverFromError(const Expected, Found : AnsiString); begin
inherited;
if Top = 1 then
FEndSource := true
else begin
repeat
Top := LastGoodTop + 1;
Symbol := Symbols[Top];
Token.Lexeme := UpperCase(Token.Lexeme);
while (Symbol <> Token.Lexeme) and (Top > 1) do
if (Symbol[1] = Syntatic) and (pos('{' + Token.Lexeme + '}', Productions[Symbol[2]]) <> 0) then
break
else
PopSymbol;
if (Top = 1) and not EndSource then NextToken;
until (Top <> 1) or EndSource;
inc(Top);
end;
end;
procedure TParser.Compile(const Source : AnsiString); begin
try
SourceName := Source;
Symbols[1] := Start;
Symbol := Start;
Top := 1;
repeat
case Symbol[1] of
#0..#127 : MatchToken(Symbol); // Terminal
Syntatic : ExpandProduction;
Semantic : ;//Analyse(Symbol[2]);
Generator : ;//Generate(Symbol[2]);
InsertSemi : begin
if DoNextToken then NextToken;
dec(First, length(Token.Lexeme));
Token.Lexeme := ';';
Token.Kind := tkSpecialSymbol;
end
else // Other Terminal
MatchTerminal(CharToTokenKind(Symbol[1]));
end;
PopSymbol;
until EndSource or (Top < 1);
except
on E : EAbort do raise;
on E : Exception do Error(E.Message + Format(':%d.%d', [Ord(Symbol[1]), Ord(Symbol[2])]));
end;
end;
procedure TParser.Error(const Msg : AnsiString);
var
I : integer;
begin
inherited;
exit; // Comment this line to debug the compiler
for I := min(Top + 5, high(Symbols)) downto 2 do
case Symbols[I][1] of
#0..#127 : writeln(I, ': ', Symbols[I]); // Terminal
Syntatic : writeln(I, ': #', Ord(Symbols[I][2]), ', ', GetProductionName(Productions[Symbols[I][2]])); // Production
Skip : writeln(I, ': Skip');
Require : writeln(I, ': Require');
Mark : writeln(I, ': Mark');
Pop : writeln(I, ': Pop');
else
writeln(I, ': ', Symbols[I], ': TRASH');
end;
end;
procedure TParser.ExpandProduction;
var
Production : AnsiString;
P, TopAux, LenToken : integer;
Aux : TStack;
begin
if DoNextToken then NextToken;
if EndSource then exit;
ErrorCode := Symbol[2];
Production := Productions[Symbol[2]];
LenToken := 1;
case Token.Kind of
tkIdentifier : begin
P := pos('{' + Ident, Production);
if P = 0 then begin
P := pos('{' + UpperCase(Token.Lexeme) + '}', Production); // find FIRST terminal
LenToken := length(Token.Lexeme);
end
end;
tkReservedWord, tkSpecialSymbol : begin
P := pos('{' + UpperCase(Token.Lexeme) + '}', Production); // find FIRST terminal
LenToken := length(Token.Lexeme);
end;
else // tkStringConstant..tkRealConstant
P := pos('{' + TokenKindToChar(Token.Kind) + '}', Production);
end;
if P <> 0 then begin
dec(Top);
TopAux := 1;
Aux[1] := copy(Production, P + 1, LenToken);
inc(P, LenToken + 2);
while P <= length(Production) do begin
case Production[P] of
Syntatic..Generator : begin // Nonterminal
inc(TopAux);
Aux[TopAux] := Production[P] + Production[P+1];
inc(P);
end;
Ident..Pop : begin // Nonterminal
inc(TopAux);
Aux[TopAux] := Production[P];
end;
'{' : break; // End production
else
if (Aux[TopAux] <> '') and (Aux[TopAux][1] >= Syntatic) then begin // begin terminal
inc(TopAux);
Aux[TopAux] := Production[P]
end
else begin // Terminal
if Production[P-1] = '}' then begin
inc(TopAux);
Aux[TopAux] := '';
end;
Aux[TopAux] := Aux[TopAux] + Production[P]
end;
end;
inc(P)
end;
for TopAux := TopAux downto 1 do begin // push at reverse order
inc(Top);
Symbols[Top] := Aux[TopAux];
end;
inc(Top);
end
else
if (Top = 1) or (Symbols[Top+1] = Require) then
RecoverFromError(GetProductionName(Production), Token.Lexeme);
end;
function TParser.GetProductionName(const P : AnsiString) : AnsiString;
var
I, J : integer;
S : AnsiString;
begin
Result := '';
if P[1] = '{' then begin
I := 2;
repeat
J := posex('}', P, I);
S := copy(P, I, J-I);
if S[1] > Start then
S := GetNonTerminalName(S[1])
else
S := '"' + S + '"';
if Result = '' then
Result := S
else
Result := Result + ', ' + S;
I := posex('{', P, J+1) + 1;
until I = 1;
I := LastDelimiter(',', Result);
if I <> 0 then begin
delete(Result, I, 2);
insert(' or ', Result, I);
end;
end
else
Result := copy(P, 1, pos('{', P)-1);
end;
end.