-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGrammarParser.cs
174 lines (146 loc) · 6.27 KB
/
GrammarParser.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
using System;
using System.Collections.Generic;
using System.Linq;
using JetBrains.Annotations;
using Lokad.Parsing.Error;
using Lokad.Parsing.Lexer;
namespace Lokad.Parsing.Parser
{
/// <summary>
/// Parses a stream of <see cref="TTok"/> according to the grammar
/// in the child class, resulting in an output of type
/// <see cref="TResult"/>.
/// </summary>
public abstract class
GrammarParser<TSelf, TTok, TResult> where TTok : struct
{
/// <summary> For error messages. </summary>
private readonly ITokenNamer<TTok> _naming;
/// <summary> The start location of the currently evaluated rule. </summary>
/// <remarks> Computed from <see cref="StartToken"/> </remarks>
private SourceLocation _startLocation;
/// <summary> Backing for <see cref="StartToken"/>. </summary>
/// <remarks>
/// Using the default value of 0 prevent correct update of position information
/// for the first matched rule, yielding a (0:0) SourceLocation instead of the
/// real position of the first token.
/// </remarks>
private int _startToken = -1;
/// <summary> The position of the first token of the currently reduced rule, in the stream. </summary>
/// <remarks> Set from <see cref="StreamParser"/> before each reduction. </remarks>
[UsedImplicitly]
public int StartToken
{
get => _startToken;
set
{
if (_startToken == value) return;
_startToken = value;
var position = Tokens.Tokens[_startToken].Start;
Tokens.LineOfPosition(position, out var line, out var col);
_startLocation = new SourceLocation(position, line, col);
}
}
/// <summary> The position of the last (inclusive) token in the currently reduced rule. </summary>
/// <remarks> Set from <see cref="StreamParser"/> before each reduction. </remarks>
[UsedImplicitly]
public int EndToken { get; set; }
/// <summary> The location of the currently reduced rule. </summary>
public SourceSpan Location
{
get
{
var end = Tokens.Tokens[EndToken];
var start = EndToken == StartToken ? end : Tokens.Tokens[StartToken];
var length = end.Length + end.Start - start.Start;
return new SourceSpan(_startLocation, length);
}
}
/// <summary> The available tokens. </summary>
public LexerResult<TTok> Tokens { get; protected set; }
/// <summary>
/// The token reader used to parse strings into streams of
/// <see cref="TTok"/>, also contains reflexive information about
/// the token type itself.
/// </summary>
public static TokenReader<TTok> MakeTokenReader() =>
new ReflectionTokenReader<TTok>();
/// <summary> Used to compile <see cref="StreamParser"/> </summary>
public static readonly ParserCompiler<TSelf, TTok, TResult> ParserCompiler =
new ParserCompiler<TSelf, TTok, TResult>(MakeTokenReader());
/// <summary> The parser used to reduce rules. </summary>
public static readonly Func<TSelf, LexerResult<TTok>, TResult> StreamParser =
ParserCompiler.Compile();
protected GrammarParser(ITokenNamer<TTok> naming)
{
_naming = naming;
}
/// <summary> Signal that an error has occurred during parsing. </summary>
/// <remarks> Called from <see cref="StreamParser"/>. </remarks>
public void OnErrorRaw(
short[] actions,
int step,
short state,
Stack<short> stateStack)
{
var tokens = typeof (TTok).GetEnumNames().Length;
var seenStates = new HashSet<short>();
var token = Tokens.GetString(EndToken);
if (token.Length > 0)
{
token = "'" + token + "'";
}
else
{
token = _naming.TokenName(Tokens.Tokens[EndToken].Token, new TTok[0]);
}
var found = new HashSet<TTok>(AcceptableValues(actions, tokens, step, state, stateStack, seenStates).Select(i => (TTok)(object)i));
throw new ParseException(
token,
found.Select(t => _naming.TokenName(t, found)).Where(n => n != null).Distinct().ToArray(),
Location);
}
/// <summary> Retrieve all token values that can shift from the current position. </summary>
private static IEnumerable<int> AcceptableValues(
short[] actions,
int tokens,
int step,
short state,
Stack<short> stateStack,
HashSet<short> seenStates)
{
if (seenStates.Contains(state)) yield break;
seenStates.Add(state);
var a = (state-1)*step;
for (var i = 0; i < tokens; ++i)
{
var r = actions[a + i];
if (r == 0) continue;
if (r > 0)
{
// This is a shift, so the token is accepted.
yield return i;
continue;
}
var rule = -r;
var pop = ParserCompiler.Rules.GetRule(rule).Steps.Count;
var popped = new Stack<short>();
if (pop <= stateStack.Count) continue;
// Simulate reducing the rule...
stateStack.Push(state);
while (pop-- > 0) popped.Push(stateStack.Pop());
state = stateStack.Pop();
var a2 = (state-1)*step;
var s2 = actions[a2 + rule];
// ...list post-reduction shifts available...
if (s2 > 0)
foreach (var t in AcceptableValues(actions, tokens, step, s2, stateStack, seenStates))
yield return t;
// ...then undo the rule reduction
stateStack.Push(state);
while (popped.Count > 0) stateStack.Push(popped.Pop());
state = stateStack.Pop();
}
}
}
}