diff --git a/sly/lexer/GenericLexer.cs b/sly/lexer/GenericLexer.cs index cb866797..82e604c7 100644 --- a/sly/lexer/GenericLexer.cs +++ b/sly/lexer/GenericLexer.cs @@ -55,7 +55,6 @@ public Config() IgnoreEOL = true; IgnoreWS = true; WhiteSpace = new[] { ' ', '\t' }; - } public IdentifierType IdType { get; set; } @@ -65,25 +64,27 @@ public Config() public bool IgnoreWS { get; set; } public char[] WhiteSpace { get; set; } - + public bool KeyWordIgnoreCase { get; set; } - + public bool IndentationAware { get; set; } - + public string Indentation { get; set; } - + public IEnumerable IdentifierStartPattern { get; set; } - + public IEnumerable IdentifierRestPattern { get; set; } - public BuildExtension ExtensionBuilder { get; set; } + public Action> ExtensionBuilder { get; set; } + + public IEqualityComparer KeyWordComparer => + KeyWordIgnoreCase ? StringComparer.OrdinalIgnoreCase : null; - public IEqualityComparer KeyWordComparer => KeyWordIgnoreCase ? StringComparer.OrdinalIgnoreCase : null; public IList Modes { get; set; } } - + public LexerPostProcess LexerPostProcess { get; set; } - + public string I18n { get; set; } public const string in_string = "in_string"; @@ -104,25 +105,27 @@ public Config() public const string escape_string = "escape_string"; public const string escape_char = "escape_char"; public const string in_up_to = "in_upto"; - + public const string single_line_comment_start = "single_line_comment_start"; public const string multi_line_comment_start = "multi_line_comment_start"; - protected readonly Dictionary> derivedTokens; + protected readonly + Dictionary> + derivedTokens; + protected IN doubleDerivedToken; protected char EscapeStringDelimiterChar; - protected readonly BuildExtension ExtensionBuilder; - public FSMLexerBuilder FSMBuilder; + protected readonly Action> ExtensionBuilder; + public FSMLexerBuilder FSMBuilder { get; private set; } protected IN identifierDerivedToken; protected IN intDerivedToken; - protected FSMLexer TempLexerFsm; - + internal IDictionary> SubLexersFsm { get; set; } protected int StringCounter; @@ -130,21 +133,24 @@ public Config() protected int upToCounter; - protected Dictionary, Token>> CallBacks = new Dictionary, Token>>(); + protected Dictionary, Token>> CallBacks = + new Dictionary, Token>>(); protected char StringDelimiterChar; private readonly IEqualityComparer KeyWordComparer; public GenericLexer(IdentifierType idType = IdentifierType.Alpha, - BuildExtension extensionBuilder = null, - params GenericToken[] staticTokens) + Action> extensionBuilder = null, + params GenericToken[] staticTokens) : this(new Config { IdType = idType, ExtensionBuilder = extensionBuilder }, staticTokens) - { } + { + } public GenericLexer(Config config, GenericToken[] staticTokens) { - derivedTokens = new Dictionary>(); + derivedTokens = + new Dictionary>(); ExtensionBuilder = config.ExtensionBuilder; KeyWordComparer = config.KeyWordComparer; SubLexersFsm = new Dictionary>(); @@ -162,7 +168,9 @@ public void AddCallBack(IN token, Func, Token> callback) CallBacks[token] = callback; } - public void AddDefinition(TokenDefinition tokenDefinition) { } + public void AddDefinition(TokenDefinition tokenDefinition) + { + } public LexerResult Tokenize(string source) @@ -170,7 +178,7 @@ public LexerResult Tokenize(string source) var memorySource = new ReadOnlyMemory(source.ToCharArray()); return Tokenize(memorySource); } - + public LexerResult Tokenize(ReadOnlyMemory memorySource) { Stack> lexersStack = new Stack>(); @@ -178,10 +186,10 @@ public LexerResult Tokenize(ReadOnlyMemory memorySource) FSMLexer LexerFsm = SubLexersFsm[ModeAttribute.DefaultLexerMode]; lexersStack.Push(LexerFsm); LexerPosition position = new LexerPosition(); - + var tokens = new List>(); string src = memorySource.ToString(); - + var r = LexerFsm.Run(memorySource, new LexerPosition()); LexerFsm = SetLexerMode(r, lexersStack); @@ -189,15 +197,15 @@ public LexerResult Tokenize(ReadOnlyMemory memorySource) new Token(default(IN), x.SpanValue, x.Position, x.IsComment, x.CommentType, x.Channel)).ToList(); tokens.AddRange(ignored); - - + + switch (r.IsSuccess) { case false when !r.IsEOS: { var result = r.Result; var error = new LexicalError(result.Position.Line, result.Position.Column, result.CharValue, I18n); - return new LexerResult(error,tokens); + return new LexerResult(error, tokens); } case true when r.Result.IsComment: position = r.NewPosition; @@ -216,7 +224,7 @@ public LexerResult Tokenize(ReadOnlyMemory memorySource) { transcoded = callback(transcoded); } - + if (transcoded.IsLineEnding) { ComputePositionWhenIgnoringEOL(r, tokens, LexerFsm); @@ -227,17 +235,18 @@ public LexerResult Tokenize(ReadOnlyMemory memorySource) for (int i = 1; i < r.UnIndentCount; i++) { tokens.Add(transcoded); - } + } } + tokens.Add(transcoded); - r = LexerFsm.Run(memorySource,position); + r = LexerFsm.Run(memorySource, position); LexerFsm = SetLexerMode(r, lexersStack); - - ignored = r.IgnoredTokens.Select(x => - new Token(default(IN), x.SpanValue, x.Position, x.IsComment, - x.CommentType, x.Channel, x.IsWhiteSpace, x.DecimalSeparator)).ToList(); - tokens.AddRange(ignored); - + + ignored = r.IgnoredTokens.Select(x => + new Token(default(IN), x.SpanValue, x.Position, x.IsComment, + x.CommentType, x.Channel, x.IsWhiteSpace, x.DecimalSeparator)).ToList(); + tokens.AddRange(ignored); + switch (r.IsSuccess) { case false when !r.IsEOS: @@ -245,14 +254,15 @@ public LexerResult Tokenize(ReadOnlyMemory memorySource) if (r.IsIndentationError) { var result = r.Result; - var error = new IndentationError(result.Position.Line, result.Position.Column,I18n); - return new LexerResult(error,tokens); + var error = new IndentationError(result.Position.Line, result.Position.Column, I18n); + return new LexerResult(error, tokens); } else { var result = r.Result; - var error = new LexicalError(result.Position.Line, result.Position.Column, result.CharValue,I18n); - return new LexerResult(error,tokens); + var error = new LexicalError(result.Position.Line, result.Position.Column, result.CharValue, + I18n); + return new LexerResult(error, tokens); } } case true when r.Result.IsComment: @@ -276,6 +286,7 @@ public LexerResult Tokenize(ReadOnlyMemory memorySource) eos.Position = new LexerPosition(prev.Position.Index + 1, prev.Position.Line, prev.Position.Column + prev.Value.Length); } + tokens.Add(eos); return new LexerResult(tokens); } @@ -283,7 +294,7 @@ public LexerResult Tokenize(ReadOnlyMemory memorySource) private FSMLexer SetLexerMode(FSMMatch r, Stack> lexersStack) { FSMLexer LexerFsm = lexersStack.Peek(); - + if (!r.IsEOS) { if (r.IsPop) @@ -308,9 +319,10 @@ private FSMLexer SetLexerMode(FSMMatch r, Stack r, List> tokens,FSMLexer LexerFsm) + private void ComputePositionWhenIgnoringEOL(FSMMatch r, List> tokens, + FSMLexer LexerFsm) { - if (!LexerFsm.IgnoreEOL) + if (!LexerFsm.IgnoreEOL) { var newPosition = r.Result.Position.Clone(); @@ -321,10 +333,10 @@ private void ComputePositionWhenIgnoringEOL(FSMMatch r, List(GenericToken.Identifier) || staticTokens.Contains(GenericToken.KeyWord)) + if (staticTokens.Contains(GenericToken.Identifier) || + staticTokens.Contains(GenericToken.KeyWord)) { InitializeIdentifier(config); } // numeric - if (staticTokens.Contains(GenericToken.Int) || staticTokens.Contains(GenericToken.Double)) + if (staticTokens.Contains(GenericToken.Int) || + staticTokens.Contains(GenericToken.Double)) { FSMBuilder = FSMBuilder.GoTo(start) .RangeTransition('0', '9') @@ -393,7 +406,8 @@ public void InitializeIdentifier(Config config) } else { - FSMBuilder.RangeTransition(pattern[0], pattern[1]).Mark(in_identifier).End(GenericToken.Identifier); + FSMBuilder.RangeTransition(pattern[0], pattern[1]).Mark(in_identifier) + .End(GenericToken.Identifier); marked = true; } } @@ -429,7 +443,7 @@ public void InitializeIdentifier(Config config) .GoTo(in_identifier) .RangeTransitionTo('0', '9', in_identifier); } - + if (config.IdType == IdentifierType.AlphaNumericDash) { FSMBuilder @@ -448,37 +462,37 @@ public void AddLexeme(GenericToken generic, IN token) switch (match.Result.TokenID) { case GenericToken.Identifier: + { + if (derivedTokens.ContainsKey(GenericToken.Identifier)) { - if (derivedTokens.ContainsKey(GenericToken.Identifier)) - { - var possibleTokens = derivedTokens[GenericToken.Identifier]; - if (possibleTokens.ContainsKey(match.Result.Value)) - match.Properties[DerivedToken] = possibleTokens[match.Result.Value].tokenId; - else - match.Properties[DerivedToken] = identifierDerivedToken; - } + var possibleTokens = derivedTokens[GenericToken.Identifier]; + if (possibleTokens.ContainsKey(match.Result.Value)) + match.Properties[DerivedToken] = possibleTokens[match.Result.Value].tokenId; else - { match.Properties[DerivedToken] = identifierDerivedToken; - } - - break; } - case GenericToken.Int: + else { - match.Properties[DerivedToken] = intDerivedToken; - break; + match.Properties[DerivedToken] = identifierDerivedToken; } + + break; + } + case GenericToken.Int: + { + match.Properties[DerivedToken] = intDerivedToken; + break; + } case GenericToken.Double: - { - match.Properties[DerivedToken] = doubleDerivedToken; - break; - } + { + match.Properties[DerivedToken] = doubleDerivedToken; + break; + } default: - { - match.Properties[DerivedToken] = token; - break; - } + { + match.Properties[DerivedToken] = token; + break; + } } return match; @@ -487,23 +501,24 @@ public void AddLexeme(GenericToken generic, IN token) switch (generic) { case GenericToken.Int: - { - intDerivedToken = token; - FSMBuilder.GoTo(in_int); - FSMBuilder.CallBack(callback); - break; - } + { + intDerivedToken = token; + FSMBuilder.GoTo(in_int); + FSMBuilder.CallBack(callback); + break; + } case GenericToken.Identifier: - { - identifierDerivedToken = token; - FSMBuilder.GoTo(in_identifier); - FSMBuilder.CallBack(callback); - break; - } + { + identifierDerivedToken = token; + FSMBuilder.GoTo(in_identifier); + FSMBuilder.CallBack(callback); + break; + } } } - public void AddLexeme(GenericToken genericToken,BuildResult> result, IN token, bool isPop, bool isPush, string mode, string specialValue) + public void AddLexeme(GenericToken genericToken, BuildResult> result, IN token, bool isPop, + bool isPush, string mode, string specialValue) { if (genericToken == GenericToken.SugarToken) { @@ -514,7 +529,8 @@ public void AddLexeme(GenericToken genericToken,BuildResult> result, { if (genericToken == GenericToken.Identifier) { - tokensForGeneric = new Dictionary(KeyWordComparer); + tokensForGeneric = + new Dictionary(KeyWordComparer); } else { @@ -524,7 +540,7 @@ public void AddLexeme(GenericToken genericToken,BuildResult> result, derivedTokens[genericToken] = tokensForGeneric; } - tokensForGeneric[specialValue] = (token,isPop,isPush,mode); + tokensForGeneric[specialValue] = (token, isPop, isPush, mode); } public void AddDouble(IN token, string separator, BuildResult> result) @@ -532,7 +548,7 @@ public void AddDouble(IN token, string separator, BuildResult> result NodeCallback callback = match => { IN derivedToken = token; - + match.Properties[DerivedToken] = derivedToken; @@ -550,19 +566,20 @@ public void AddDouble(IN token, string separator, BuildResult> result .RangeTransitionTo('0', '9', in_double) .End(GenericToken.Double) .CallBack(callback); - + FSMBuilder.Fsm.DecimalSeparator = separatorChar; - } - public void AddKeyWord(IN token, string keyword, bool isPop, bool isPush, string mode, BuildResult> result ) + public void AddKeyWord(IN token, string keyword, bool isPop, bool isPush, string mode, + BuildResult> result) { NodeCallback callback = match => { IN derivedToken = default; if (derivedTokens.TryGetValue(GenericToken.Identifier, out var derived)) { - if (derived.TryGetValue(match.Result.Value, out (IN tokenId, bool isPop, bool isPush, string mode) derived2)) + if (derived.TryGetValue(match.Result.Value, + out (IN tokenId, bool isPop, bool isPush, string mode) derived2)) { derivedToken = derived2.tokenId; match.IsPush = derived2.isPush; @@ -584,7 +601,7 @@ public void AddKeyWord(IN token, string keyword, bool isPop, bool isPush, string return match; }; - AddLexeme(GenericToken.Identifier, result, token,isPop,isPush,mode, keyword); + AddLexeme(GenericToken.Identifier, result, token, isPop, isPush, mode, keyword); var node = FSMBuilder.GetNode(in_identifier); if (!FSMBuilder.Fsm.HasCallback(node.Id)) { @@ -593,7 +610,8 @@ public void AddKeyWord(IN token, string keyword, bool isPop, bool isPush, string } - public ReadOnlyMemory diffCharEscaper(char escapeStringDelimiterChar, char stringDelimiterChar, ReadOnlyMemory stringValue) + public ReadOnlyMemory diffCharEscaper(char escapeStringDelimiterChar, char stringDelimiterChar, + ReadOnlyMemory stringValue) { var value = stringValue; var i = 1; @@ -620,15 +638,19 @@ public ReadOnlyMemory diffCharEscaper(char escapeStringDelimiterChar, char { r += escapeStringDelimiterChar; } + escaping = false; } + if (substitutionHappened) { r += current; } } + i++; } + if (substitutionHappened) { r += value.At(value.Length - 1); @@ -638,7 +660,8 @@ public ReadOnlyMemory diffCharEscaper(char escapeStringDelimiterChar, char return value; } - public ReadOnlyMemory sameCharEscaper(char escapeStringDelimiterChar, char stringDelimiterChar, ReadOnlyMemory stringValue) + public ReadOnlyMemory sameCharEscaper(char escapeStringDelimiterChar, char stringDelimiterChar, + ReadOnlyMemory stringValue) { var value = stringValue; int i = 1; @@ -669,8 +692,10 @@ public ReadOnlyMemory sameCharEscaper(char escapeStringDelimiterChar, char r += current; } } + i++; } + if (substitutionHappened) { r += value.At(value.Length - 1); @@ -680,25 +705,29 @@ public ReadOnlyMemory sameCharEscaper(char escapeStringDelimiterChar, char return value; } - public void AddStringLexem(IN token, BuildResult> result , string stringDelimiter, + public void AddStringLexem(IN token, BuildResult> result, string stringDelimiter, string escapeDelimiterChar = "\\") { if (string.IsNullOrEmpty(stringDelimiter) || stringDelimiter.Length > 1) result.AddError(new LexerInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.StringDelimiterMustBe1Char,stringDelimiter,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.StringDelimiterMustBe1Char, stringDelimiter, + token.ToString()), ErrorCodes.LEXER_STRING_DELIMITER_MUST_BE_1_CHAR)); if (stringDelimiter.Length == 1 && char.IsLetterOrDigit(stringDelimiter[0])) result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.StringDelimiterCannotBeLetterOrDigit,stringDelimiter,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.StringDelimiterCannotBeLetterOrDigit, stringDelimiter, + token.ToString()), ErrorCodes.LEXER_STRING_DELIMITER_CANNOT_BE_LETTER_OR_DIGIT)); if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1) result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.StringEscapeCharMustBe1Char,escapeDelimiterChar,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.StringEscapeCharMustBe1Char, escapeDelimiterChar, + token.ToString()), ErrorCodes.LEXER_STRING_ESCAPE_CHAR_MUST_BE_1_CHAR)); if (escapeDelimiterChar.Length == 1 && char.IsLetterOrDigit(escapeDelimiterChar[0])) result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.StringEscapeCharCannotBeLetterOrDigit,escapeDelimiterChar,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.StringEscapeCharCannotBeLetterOrDigit, escapeDelimiterChar, + token.ToString()), ErrorCodes.LEXER_STRING_ESCAPE_CHAR_CANNOT_BE_LETTER_OR_DIGIT)); StringDelimiterChar = (char)0; @@ -706,7 +735,7 @@ public void AddStringLexem(IN token, BuildResult> result , string st EscapeStringDelimiterChar = (char)0; var escapeStringDelimiterChar = (char)0; - + if (stringDelimiter.Length == 1) { StringCounter++; @@ -730,11 +759,13 @@ public void AddStringLexem(IN token, BuildResult> result , string st match.IsString = true; if (stringDelimiterChar != escapeStringDelimiterChar) { - match.Result.SpanValue = diffCharEscaper(escapeStringDelimiterChar,stringDelimiterChar, match.Result.SpanValue); + match.Result.SpanValue = diffCharEscaper(escapeStringDelimiterChar, stringDelimiterChar, + match.Result.SpanValue); } else - { - match.Result.SpanValue = sameCharEscaper(escapeStringDelimiterChar,stringDelimiterChar, match.Result.SpanValue); + { + match.Result.SpanValue = sameCharEscaper(escapeStringDelimiterChar, stringDelimiterChar, + match.Result.SpanValue); } return match; @@ -742,7 +773,6 @@ public void AddStringLexem(IN token, BuildResult> result , string st if (stringDelimiterChar != escapeStringDelimiterChar) { - FSMBuilder.GoTo(start); FSMBuilder.Transition(stringDelimiterChar) .Mark(in_string + StringCounter) @@ -765,39 +795,40 @@ public void AddStringLexem(IN token, BuildResult> result , string st var escaped = "escaped_same"; FSMBuilder.GoTo(start) - .Transition(stringDelimiterChar) .Mark(in_string + StringCounter) - .ExceptTransitionTo(exceptDelimiter, in_string + StringCounter) - .Transition(stringDelimiterChar) .Mark(escaped + StringCounter) .End(GenericToken.String) .CallBack(callback) - .TransitionTo(stringDelimiterChar,in_string + StringCounter) + .TransitionTo(stringDelimiterChar, in_string + StringCounter) .Transition(stringDelimiterChar); } } - - public void AddCharLexem(IN token, BuildResult> result ,string charDelimiter, string escapeDelimiterChar = "\\") + + public void AddCharLexem(IN token, BuildResult> result, string charDelimiter, + string escapeDelimiterChar = "\\") { if (string.IsNullOrEmpty(charDelimiter) || charDelimiter.Length > 1) - result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.CharDelimiterMustBe1Char,charDelimiter,token.ToString()), + result.AddError(new InitializationError(ErrorLevel.FATAL, + I18N.Instance.GetText(I18n, I18NMessage.CharDelimiterMustBe1Char, charDelimiter, token.ToString()), ErrorCodes.LEXER_CHAR_DELIMITER_MUST_BE_1_CHAR)); if (charDelimiter.Length == 1 && char.IsLetterOrDigit(charDelimiter[0])) result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n, I18NMessage.CharDelimiterCannotBeLetter,charDelimiter,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.CharDelimiterCannotBeLetter, charDelimiter, + token.ToString()), ErrorCodes.LEXER_CHAR_DELIMITER_CANNOT_BE_LETTER)); if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1) result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.CharEscapeCharMustBe1Char,escapeDelimiterChar,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.CharEscapeCharMustBe1Char, escapeDelimiterChar, + token.ToString()), ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_MUST_BE_1_CHAR)); if (escapeDelimiterChar.Length == 1 && char.IsLetterOrDigit(escapeDelimiterChar[0])) result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.CharEscapeCharCannotBeLetterOrDigit,escapeDelimiterChar,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.CharEscapeCharCannotBeLetterOrDigit, escapeDelimiterChar, + token.ToString()), ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_CANNOT_BE_LETTER_OR_DIGIT)); CharCounter++; @@ -818,38 +849,39 @@ public void AddCharLexem(IN token, BuildResult> result ,string charDe FSMBuilder.GoTo(start); FSMBuilder.Transition(charDelimiterChar) - .Mark(start_char+"_"+CharCounter) + .Mark(start_char + "_" + CharCounter) .ExceptTransition(new[] { charDelimiterChar, escapeChar }) - .Mark(in_char+"_"+CharCounter) + .Mark(in_char + "_" + CharCounter) .Transition(charDelimiterChar) - .Mark(end_char+"_"+CharCounter) + .Mark(end_char + "_" + CharCounter) .End(GenericToken.Char) .CallBack(callback) - .GoTo(start_char+"_"+CharCounter) + .GoTo(start_char + "_" + CharCounter) .Transition(escapeChar) - .Mark(escapeChar_char+"_"+CharCounter) + .Mark(escapeChar_char + "_" + CharCounter) .ExceptTransitionTo(new[] { 'u' }, in_char + "_" + CharCounter) .CallBack(callback); FSMBuilder.Fsm.StringDelimiter = charDelimiterChar; - + // unicode transitions ? FSMBuilder = FSMBuilder.GoTo(escapeChar_char + "_" + CharCounter) - .Transition('u') - .Mark(unicode_char+"_"+CharCounter) - .RepetitionTransitionTo(in_char + "_" + CharCounter,4,"[0-9,a-z,A-Z]"); - + .Transition('u') + .Mark(unicode_char + "_" + CharCounter) + .RepetitionTransitionTo(in_char + "_" + CharCounter, 4, "[0-9,a-z,A-Z]"); } - public void AddSugarLexem(IN token, BuildResult> buildResult, string specialValue, bool isLineEnding = false, int? channel = null) + public void AddSugarLexem(IN token, BuildResult> buildResult, string specialValue, + bool isLineEnding = false, int? channel = null) { if (char.IsLetter(specialValue[0])) { buildResult.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.SugarTokenCannotStartWithLetter,specialValue,token.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.SugarTokenCannotStartWithLetter, specialValue, + token.ToString()), ErrorCodes.LEXER_SUGAR_TOKEN_CANNOT_START_WITH_LETTER)); return; } - + NodeCallback callback = match => { match.Properties[DerivedToken] = token; @@ -859,22 +891,18 @@ public void AddSugarLexem(IN token, BuildResult> buildResult, string Func precond = (int i) => { - return (ReadOnlyMemory value) => - { - return value.Length == i+1; - }; + return (ReadOnlyMemory value) => { return value.Length == i + 1; }; }; - + FSMBuilder.GoTo(start); - for (var i = 0; i < specialValue.Length; i++) FSMBuilder.SafeTransition(specialValue[i],precond(i)); + for (var i = 0; i < specialValue.Length; i++) FSMBuilder.SafeTransition(specialValue[i], precond(i)); FSMBuilder.End(GenericToken.SugarToken, isLineEnding) .CallBack(callback); } - - public void AddUpTo(IN token, BuildResult> buildResult, string[] exceptions, bool isLineEnding = false) + + public void AddUpTo(IN token, BuildResult> buildResult, string[] exceptions, + bool isLineEnding = false) { - - NodeCallback callback = match => { match.Properties[DerivedToken] = token; @@ -894,16 +922,16 @@ public void AddUpTo(IN token, BuildResult> buildResult, string[] exce return $"{in_up_to}_{exception}_{exceptionIndex}_{upToCounter}"; }; - + FSMBuilder.ExceptTransition(upToChars0) - .Mark(GetEndLabel(-1,-1)) + .Mark(GetEndLabel(-1, -1)) .End(GenericToken.UpTo) .CallBack(callback); - FSMBuilder.ExceptTransitionTo(upToChars0,GetEndLabel(-1,-1)); + FSMBuilder.ExceptTransitionTo(upToChars0, GetEndLabel(-1, -1)); for (int i = 0; i < exceptions.Length; i++) { string exception = exceptions[i]; - for (int j = 0; j < exception.Length-1; j ++) + for (int j = 0; j < exception.Length - 1; j++) { char exceptionChar = exception[j]; var end = GetEndLabel(i, j); @@ -915,11 +943,11 @@ public void AddUpTo(IN token, BuildResult> buildResult, string[] exce FSMBuilder.Mark(end); // FSMBuilder.End(GenericToken.AllExcept) // .CallBack(callback); - + startNode = start; } - + FSMBuilder.GoTo(startNode); if (j < exception.Length - 1) @@ -932,7 +960,7 @@ public void AddUpTo(IN token, BuildResult> buildResult, string[] exce { if (transition.Check is TransitionAnyExcept except) { - except.AddException(exception[j+1]); + except.AddException(exception[j + 1]); } } else @@ -942,13 +970,14 @@ public void AddUpTo(IN token, BuildResult> buildResult, string[] exce } } } - + upToCounter++; } - public LexerPosition ConsumeComment(Token comment, ReadOnlyMemory source, LexerPosition lexerPosition) + public LexerPosition ConsumeComment(Token comment, ReadOnlyMemory source, + LexerPosition lexerPosition) { - ReadOnlyMemory commentValue; + ReadOnlyMemory commentValue; if (comment.IsSingleLineComment) { @@ -998,7 +1027,7 @@ public Token Transcode(FSMMatch match) tok.Position = inTok.Position; tok.Discarded = inTok.Discarded; tok.StringDelimiter = match.StringDelimiterChar; - tok.TokenID = match.Properties.ContainsKey(DerivedToken) ? (IN) match.Properties[DerivedToken] : default(IN); + tok.TokenID = match.Properties.ContainsKey(DerivedToken) ? (IN)match.Properties[DerivedToken] : default(IN); tok.IsLineEnding = match.IsLineEnding; tok.IsEOS = match.IsEOS; tok.IsIndent = match.IsIndent; @@ -1015,11 +1044,10 @@ public override string ToString() { return TempLexerFsm.ToString(); } - + public string ToGraphViz() { return TempLexerFsm.ToGraphViz(); } - } } \ No newline at end of file diff --git a/sly/lexer/LexerBuilder.cs b/sly/lexer/LexerBuilder.cs index 47176dfd..c279e872 100644 --- a/sly/lexer/LexerBuilder.cs +++ b/sly/lexer/LexerBuilder.cs @@ -8,22 +8,21 @@ namespace sly.lexer { - public static class DicExt { - public static void AddToKey(this IDictionary> dic, K key, K2 k2, V value) + public static void AddToKey(this IDictionary> dic, K key, K2 k2, V value) { - IDictionary values ; + IDictionary values; if (!dic.TryGetValue(key, out values)) { values = new Dictionary(); } + values[k2] = value; dic[key] = values; } - } - + public static class EnumHelper { /// @@ -37,14 +36,15 @@ public static T[] GetAttributesOfType(this Enum enumVal) where T : Attribute { var type = enumVal.GetType(); var memInfo = type.GetMember(enumVal.ToString()); - var attributes = (T[]) memInfo[0].GetCustomAttributes(typeof(T), false); + var attributes = (T[])memInfo[0].GetCustomAttributes(typeof(T), false); return attributes; } } public static class LexerBuilder { - public static Dictionary> GetLexemes(BuildResult> result, string lang) where IN: struct + public static Dictionary> GetLexemes(BuildResult> result, string lang) + where IN : struct { var attributes = new Dictionary>(); @@ -52,27 +52,24 @@ public static Dictionary> GetLexemes(BuildResult().GroupBy(x => x).ToList>(); foreach (var group in grouped) { - var v = group.Key; if (group.Count() > 1) { - Enum enumValue = Enum.Parse(typeof(IN), v.ToString()) as Enum; int intValue = Convert.ToInt32(enumValue); // x is the integer value of enum - + result.AddError(new LexerInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(lang,I18NMessage.SameValueUsedManyTime,intValue.ToString(),group.Count().ToString(),typeof(IN).FullName), + I18N.Instance.GetText(lang, I18NMessage.SameValueUsedManyTime, intValue.ToString(), + group.Count().ToString(), typeof(IN).FullName), ErrorCodes.LEXER_SAME_VALUE_USED_MANY_TIME)); - } } if (!result.IsError) { - foreach (Enum value in values) { - var tokenID = (IN) (object) value; + var tokenID = (IN)(object)value; var enumAttributes = value.GetAttributesOfType(); var singleCommentAttributes = value.GetAttributesOfType(); var multiCommentAttributes = value.GetAttributesOfType(); @@ -81,7 +78,8 @@ public static Dictionary> GetLexemes(BuildResult> GetLexemes(BuildResult> BuildLexer(BuildExtension extensionBuilder = null, LexerPostProcess lexerPostProcess = null) where IN : struct + public static BuildResult> BuildLexer( + Action> extensionBuilder = null, + LexerPostProcess lexerPostProcess = null) where IN : struct { - return BuildLexer(new BuildResult < ILexer < IN >>() , extensionBuilder, lexerPostProcess:lexerPostProcess); + return BuildLexer(new BuildResult>(), extensionBuilder, lexerPostProcess: lexerPostProcess); } public static BuildResult> BuildLexer(BuildResult> result, - BuildExtension extensionBuilder = null, - string lang = null, LexerPostProcess lexerPostProcess = null, IList explicitTokens = null) where IN : struct + Action> extensionBuilder = null, + string lang = null, LexerPostProcess lexerPostProcess = null, IList explicitTokens = null) + where IN : struct { - var attributes = GetLexemes(result,lang); + var attributes = GetLexemes(result, lang); if (!result.IsError) { - result = Build(attributes, result, extensionBuilder,lang, explicitTokens); + result = Build(attributes, result, extensionBuilder, lang, explicitTokens); if (!result.IsError) { result.Result.LexerPostProcess = lexerPostProcess; } } - + return result; } private static BuildResult> Build(Dictionary> attributes, - BuildResult> result, BuildExtension extensionBuilder = null, string lang = null, + BuildResult> result, Action> extensionBuilder = null, + string lang = null, IList explicitTokens = null) where IN : struct { var hasRegexLexemes = IsRegexLexer(attributes); @@ -126,7 +128,7 @@ private static BuildResult> Build(Dictionary> Build(Dictionary> BuildRegexLexer(Dictionary(tokenID, lexeme.Pattern, channel,lexeme.IsSkippable, + lexer.AddDefinition(new TokenDefinition(tokenID, lexeme.Pattern, channel, + lexeme.IsSkippable, lexeme.IsLineEnding)); } } @@ -196,7 +199,8 @@ private static BuildResult> BuildRegexLexer(Dictionary> BuildRegexLexer(Dictionary>> GetSubLexers( IDictionary> attributes) where IN : struct { - Dictionary>> subLexers = new Dictionary>>(); + Dictionary>> subLexers = + new Dictionary>>(); foreach (var attribute in attributes) { if (attribute.Key is Enum enumValue) @@ -222,16 +227,16 @@ private static Dictionary>> GetSub { foreach (var mode in modes) { - subLexers.AddToKey(mode,attribute.Key,attribute.Value); + subLexers.AddToKey(mode, attribute.Key, attribute.Value); } } } } else { - subLexers.AddToKey(ModeAttribute.DefaultLexerMode,attribute.Key,attribute.Value); + subLexers.AddToKey(ModeAttribute.DefaultLexerMode, attribute.Key, attribute.Value); } - + var push = enumValue.GetAttributesOfType(); if (push != null && push.Length >= 1) { @@ -239,15 +244,13 @@ private static Dictionary>> GetSub { x.IsPush = true; x.Pushtarget = push.First().TargetMode; - }); + }); } + var pop = enumValue.GetAttributesOfType(); if (pop != null && pop.Length >= 1) { - attribute.Value.ForEach(x => - { - x.IsPop = true; - }); + attribute.Value.ForEach(x => { x.IsPop = true; }); } } } @@ -261,12 +264,10 @@ private static Dictionary>> GetSub return subLexers; } - private static (GenericLexer.Config, GenericToken[]) GetConfigAndGenericTokens(IDictionary> attributes) + private static (GenericLexer.Config, GenericToken[]) GetConfigAndGenericTokens( + IDictionary> attributes) where IN : struct { - - - var config = new GenericLexer.Config(); var lexerAttribute = typeof(IN).GetCustomAttribute(); if (lexerAttribute != null) @@ -278,6 +279,7 @@ private static (GenericLexer.Config, GenericToken[]) GetConfigAndGenericToke config.Indentation = lexerAttribute.Indentation; config.IndentationAware = lexerAttribute.IndentationAWare; } + var modesAttribute = typeof(IN).GetCustomAttribute(); if (modesAttribute != null) { @@ -298,10 +300,10 @@ private static (GenericLexer.Config, GenericToken[]) GetConfigAndGenericToke } } } - + return (config, statics.Distinct().ToArray()); } - + private static IEnumerable ParseIdentifierPattern(string pattern) { var index = 0; @@ -317,6 +319,7 @@ private static IEnumerable ParseIdentifierPattern(string pattern) { yield return new[] { pattern[index + 2], pattern[index] }; } + index += 3; } else @@ -326,7 +329,8 @@ private static IEnumerable ParseIdentifierPattern(string pattern) } } - private static NodeCallback GetCallbackSingle(IN token, bool doNotIgnore, int channel) where IN : struct + private static NodeCallback GetCallbackSingle(IN token, bool doNotIgnore, int channel) + where IN : struct { NodeCallback callback = match => { @@ -340,7 +344,8 @@ private static NodeCallback GetCallbackSingle(IN token, bool d return callback; } - private static NodeCallback GetCallbackMulti(IN token, bool doNotIgnore, int channel) where IN : struct + private static NodeCallback GetCallbackMulti(IN token, bool doNotIgnore, int channel) + where IN : struct { NodeCallback callbackMulti = match => { @@ -353,11 +358,11 @@ private static NodeCallback GetCallbackMulti(IN token, bool do }; return callbackMulti; } - - - - private static BuildResult> BuildGenericSubLexers(Dictionary> attributes, - BuildExtension extensionBuilder, BuildResult> result, string lang, + + + private static BuildResult> BuildGenericSubLexers( + Dictionary> attributes, + Action> extensionBuilder, BuildResult> result, string lang, IList explicitTokens = null) where IN : struct { GenericLexer genLexer = null; @@ -372,27 +377,24 @@ private static BuildResult> BuildGenericSubLexers(Dictionary> BuildGenericLexer(IDictionary> attributes, - BuildExtension extensionBuilder, BuildResult> result, string lang, + Action> extensionBuilder, BuildResult> result, string lang, IList explicitTokens = null) where IN : struct { - result = CheckStringAndCharTokens(attributes, result, lang); var (config, tokens) = GetConfigAndGenericTokens(attributes); - - + config.ExtensionBuilder = extensionBuilder; var lexer = new GenericLexer(config, tokens); var Extensions = new Dictionary(); @@ -406,7 +408,7 @@ private static BuildResult> BuildGenericLexer(IDictionary x.GenericToken != GenericToken.Identifier)) { try @@ -423,7 +425,8 @@ private static BuildResult> BuildGenericLexer(IDictionary> BuildGenericLexer(IDictionary> BuildGenericLexer(IDictionary> BuildGenericLexer(IDictionary(Extensions, extensionBuilder, lexer); - - var allComments = GetCommentsAttribute(result,lang); - var CommentsForSubLexer = allComments.Where(x => attributes.Keys.ToList().Contains(x.Key)).ToDictionary(x => x.Key, x => x.Value); + + var allComments = GetCommentsAttribute(result, lang); + var CommentsForSubLexer = allComments.Where(x => attributes.Keys.ToList().Contains(x.Key)) + .ToDictionary(x => x.Key, x => x.Value); if (!result.IsError) { foreach (var comment in CommentsForSubLexer) { - foreach (var commentAttr in comment.Value) { var fsmBuilder = lexer.FSMBuilder; @@ -507,7 +511,8 @@ private static BuildResult> BuildGenericLexer(IDictionary.single_line_comment_start); fsmBuilder.End(GenericToken.Comment); - fsmBuilder.CallBack(GetCallbackSingle(comment.Key,commentAttr.DoNotIgnore, commentAttr.Channel)); + fsmBuilder.CallBack(GetCallbackSingle(comment.Key, commentAttr.DoNotIgnore, + commentAttr.Channel)); } var hasMultiLine = !string.IsNullOrWhiteSpace(commentAttr.MultiLineCommentStart); @@ -515,12 +520,13 @@ private static BuildResult> BuildGenericLexer(IDictionary.start); fsmBuilder.ConstantTransition(commentAttr.MultiLineCommentStart); fsmBuilder.Mark(GenericLexer.multi_line_comment_start); fsmBuilder.End(GenericToken.Comment); - fsmBuilder.CallBack(GetCallbackMulti(comment.Key,commentAttr.DoNotIgnore, commentAttr.Channel)); + fsmBuilder.CallBack(GetCallbackMulti(comment.Key, commentAttr.DoNotIgnore, + commentAttr.Channel)); } } } @@ -534,22 +540,21 @@ private static BuildResult> BuildGenericLexer(IDictionary mark.Key == GenericLexer.in_identifier)) { // no identifier pattern has been defined. Creating a default one to allow explicit keyword tokens - (lexer as GenericLexer).InitializeIdentifier(new GenericLexer.Config() { IdType = IdentifierType.Alpha}); + (lexer as GenericLexer).InitializeIdentifier(new GenericLexer.Config() + { IdType = IdentifierType.Alpha }); } - + var x = fsmBuilder.Fsm.Run(explicitToken, new LexerPosition()); if (x.IsSuccess) { - - - var t = fsmBuilder.Marks; var y = fsmBuilder.Marks.FirstOrDefault(k => k.Value == x.NodeId); if (y.Key == GenericLexer.in_identifier) // explicit keyword { var resultx = new BuildResult>(); result.Errors.AddRange(resultx.Errors); - lexer.AddKeyWord(default(IN), explicitToken,false,false,ModeAttribute.DefaultLexerMode, resultx); + lexer.AddKeyWord(default(IN), explicitToken, false, false, + ModeAttribute.DefaultLexerMode, resultx); ; } else @@ -567,16 +572,15 @@ private static BuildResult> BuildGenericLexer(IDictionary> CheckStringAndCharTokens( - IDictionary> attributes, BuildResult> result, string lang) where IN : struct + IDictionary> attributes, BuildResult> result, string lang) + where IN : struct { var allLexemes = attributes.Values.SelectMany, LexemeAttribute>(a => a); var allDelimiters = allLexemes - .Where(a => a.IsString || a.IsChar) - .Where(a => a.HasGenericTokenParameters) - .Select(a => a.GenericTokenParameters[0]); + .Where(a => a.IsString || a.IsChar) + .Where(a => a.HasGenericTokenParameters) + .Select(a => a.GenericTokenParameters[0]); var duplicates = allDelimiters.GroupBy(x => x) - .Where>(g => g.Count() > 1) - .Select(y => new { Element = y.Key, Counter = y.Count() }); + .Where>(g => g.Count() > 1) + .Select(y => new { Element = y.Key, Counter = y.Count() }); foreach (var duplicate in duplicates) { result.AddError(new LexerInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(lang,I18NMessage.DuplicateStringCharDelimiters,duplicate.Element,duplicate.Counter.ToString()), + I18N.Instance.GetText(lang, I18NMessage.DuplicateStringCharDelimiters, duplicate.Element, + duplicate.Counter.ToString()), ErrorCodes.LEXER_DUPLICATE_STRING_CHAR_DELIMITERS)); } @@ -615,46 +621,52 @@ private static BuildResult> CheckStringAndCharTokens( } - private static Dictionary> GetCommentsAttribute(BuildResult> result, string lang) where IN : struct + private static Dictionary> GetCommentsAttribute(BuildResult> result, + string lang) where IN : struct { var attributes = new Dictionary>(); var values = Enum.GetValues(typeof(IN)); foreach (Enum value in values) { - var tokenID = (IN) (object) value; + var tokenID = (IN)(object)value; var enumAttributes = value.GetAttributesOfType(); - if (enumAttributes != null && enumAttributes.Any()) attributes[tokenID] = enumAttributes.ToList(); + if (enumAttributes != null && enumAttributes.Any()) + attributes[tokenID] = enumAttributes.ToList(); } - var commentCount = attributes.Values.Select, int>(l => l?.Count(attr => attr.GetType() == typeof(CommentAttribute)) ?? 0).Sum(); - var multiLineCommentCount = attributes.Values.Select, int>(l => l?.Count(attr => attr.GetType() == typeof(MultiLineCommentAttribute)) ?? 0).Sum(); - var singleLineCommentCount = attributes.Values.Select, int>(l => l?.Count(attr => attr.GetType() == typeof(SingleLineCommentAttribute)) ?? 0).Sum(); + var commentCount = attributes.Values.Select, int>(l => + l?.Count(attr => attr.GetType() == typeof(CommentAttribute)) ?? 0).Sum(); + var multiLineCommentCount = attributes.Values.Select, int>(l => + l?.Count(attr => attr.GetType() == typeof(MultiLineCommentAttribute)) ?? 0).Sum(); + var singleLineCommentCount = attributes.Values.Select, int>(l => + l?.Count(attr => attr.GetType() == typeof(SingleLineCommentAttribute)) ?? 0).Sum(); if (commentCount > 1) { result.AddError(new LexerInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(lang,I18NMessage.TooManyComment), + I18N.Instance.GetText(lang, I18NMessage.TooManyComment), ErrorCodes.LEXER_TOO_MANY_COMMNENT)); } if (multiLineCommentCount > 1) { result.AddError(new LexerInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(lang,I18NMessage.TooManyMultilineComment), + I18N.Instance.GetText(lang, I18NMessage.TooManyMultilineComment), ErrorCodes.LEXER_TOO_MANY_MULTILINE_COMMNENT)); } if (singleLineCommentCount > 1) { - result.AddError(new LexerInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(lang,I18NMessage.TooManySingleLineComment),ErrorCodes.LEXER_TOO_MANY_SINGLELINE_COMMNENT)); + result.AddError(new LexerInitializationError(ErrorLevel.FATAL, + I18N.Instance.GetText(lang, I18NMessage.TooManySingleLineComment), + ErrorCodes.LEXER_TOO_MANY_SINGLELINE_COMMNENT)); } if (commentCount > 0 && (multiLineCommentCount > 0 || singleLineCommentCount > 0)) { - result.AddError(new LexerInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(lang,I18NMessage.CannotMixCommentAndSingleOrMulti), + result.AddError(new LexerInitializationError(ErrorLevel.FATAL, + I18N.Instance.GetText(lang, I18NMessage.CannotMixCommentAndSingleOrMulti), ErrorCodes.LEXER_CANNOT_MIX_COMMENT_AND_SINGLE_OR_MULTI)); } @@ -662,7 +674,7 @@ private static Dictionary> GetCommentsAttribute(B } private static void AddExtensions(Dictionary extensions, - BuildExtension extensionBuilder, GenericLexer lexer) where IN : struct + Action> extensionBuilder, GenericLexer lexer) where IN : struct { if (extensionBuilder != null) { diff --git a/sly/lexer/fsm/FSMLexer.cs b/sly/lexer/fsm/FSMLexer.cs index 29120c0c..5216ff8d 100644 --- a/sly/lexer/fsm/FSMLexer.cs +++ b/sly/lexer/fsm/FSMLexer.cs @@ -12,22 +12,19 @@ public static T At(this ReadOnlyMemory memory, int index) { return memory.Span[index]; } - + public static T At(this ReadOnlyMemory memory, LexerPosition position) { return memory.Span[position.Index]; } } - public delegate void BuildExtension(IN token, LexemeAttribute lexem, GenericLexer lexer) where IN : struct; - public delegate List> LexerPostProcess(List> tokens) where IN : struct; - public class FSMLexer where N : struct + public class FSMLexer where N : struct { - public string Mode { get; set; } - + private readonly Dictionary> Nodes; public char StringDelimiter = '"'; @@ -52,9 +49,9 @@ public FSMLexer() public bool IgnoreEOL { get; set; } public bool AggregateEOL { get; set; } - + public bool IndentationAware { get; set; } - + public string Indentation { get; set; } @@ -71,10 +68,9 @@ public string ToGraphViz() dump.Append(fsmNode.Value.Id); var shape = fsmNode.Value.IsEnd ? "doublecircle" : "circle"; dump.AppendLine($@"[shape={shape} label=""{fsmNode.Value.GraphVizNodeLabel()}""] "); - } - - foreach (var transition in Transitions.Values.SelectMany, FSMTransition>(x => x) ) + + foreach (var transition in Transitions.Values.SelectMany, FSMTransition>(x => x)) dump.AppendLine(transition.ToGraphViz(Nodes)); dump.AppendLine("}"); @@ -154,7 +150,6 @@ public FSMNode AddNode() #region run - public FSMMatch Run(string source, LexerPosition position) { return Run(new ReadOnlyMemory(source.ToCharArray()), position); @@ -168,6 +163,7 @@ public int ComputeIndentationSize(ReadOnlyMemory source, int index) { return 0; } + string id = source.Slice(index, Indentation.Length).ToString(); while (id == Indentation) { @@ -175,6 +171,7 @@ public int ComputeIndentationSize(ReadOnlyMemory source, int index) index += Indentation.Length; id = source.Slice(index, Indentation.Length).ToString(); } + return count; } @@ -182,44 +179,43 @@ public List GetIndentations(ReadOnlyMemory source, int index) { List indentations = new List(); int i = 0; - if (index >= source.Length) + if (index >= source.Length) { return new List(); ; } - char current = source.At(index+i); - while (i < source.Length && (current == ' ' || current == '\t' )) + + char current = source.At(index + i); + while (i < source.Length && (current == ' ' || current == '\t')) { indentations.Add(current); i++; - current = source.At(index+i); + current = source.At(index + i); } return indentations; } - - - + + public FSMMatch Run(ReadOnlyMemory source, LexerPosition lexerPosition) { - if (IndentationAware) { - var ind = ConsumeIndents(source, lexerPosition); + var ind = ConsumeIndents(source, lexerPosition); if (ind != null) { return ind; } } - + // if line start : - + // consume tabs and count them // if count = previousCount +1 => add an indent token // if count = previousCount -1 => add an unindent token // else .... - - var ignoredTokens = ConsumeIgnored(source,lexerPosition); + + var ignoredTokens = ConsumeIgnored(source, lexerPosition); if (IndentationAware) // could start of line { @@ -256,7 +252,9 @@ public FSMMatch Run(ReadOnlyMemory source, LexerPosition lexerPosition) { // Remember the possible match lexerPosition.Mode = this.Mode; - result = new FSMMatch(true, currentNode.Value, currentValue, position, currentNode.Id,lexerPosition, currentNode.IsLineEnding, currentNode.IsPopModeNode, currentNode.IsPushModeNode, currentNode.PushToMode,DecimalSeparator); + result = new FSMMatch(true, currentNode.Value, currentValue, position, currentNode.Id, + lexerPosition, currentNode.IsLineEnding, currentNode.IsPopModeNode, currentNode.IsPushModeNode, + currentNode.PushToMode, DecimalSeparator); } lexerPosition.Index++; @@ -287,21 +285,20 @@ public FSMMatch Run(ReadOnlyMemory source, LexerPosition lexerPosition) } var errorChar = source.Slice(lexerPosition.Index, 1); - var ko = new FSMMatch(false, default(N), errorChar, lexerPosition, -1,lexerPosition, false); + var ko = new FSMMatch(false, default(N), errorChar, lexerPosition, -1, lexerPosition, false); return ko; } private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition lexerPosition) { - if (lexerPosition.IsStartOfLine) { var indents = GetIndentations(source, lexerPosition.Index); - + var currentIndentations = lexerPosition.Indentations.ToList(); int uIndentCount = 0; - + int indentPosition = 0; if (currentIndentations.Any()) { @@ -309,7 +306,7 @@ private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition le int indentCharCount = 0; while (i < currentIndentations.Count && indentPosition < indents.Count) { - var current = currentIndentations[currentIndentations.Count-i-1]; + var current = currentIndentations[currentIndentations.Count - i - 1]; int j = 0; if (indentPosition + current.Length > indents.Count) { @@ -328,7 +325,8 @@ private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition le var actual = indents[j + indentPosition]; if (actual != reference) { - var ko = new FSMMatch(false, default(N), " ", lexerPosition, -1, lexerPosition, false) + var ko = new FSMMatch(false, default(N), " ", lexerPosition, -1, lexerPosition, + false) { IsIndentationError = true }; @@ -345,7 +343,7 @@ private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition le { var t = indents.Skip(indentCharCount).ToArray(); var newTab = new string(t); - var indent = FSMMatch.Indent(lexerPosition.Indentations.Count()+1); + var indent = FSMMatch.Indent(lexerPosition.Indentations.Count() + 1); indent.Result = new Token { IsIndent = true, @@ -356,8 +354,8 @@ private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition le indent.NewPosition.Column += indents.Count; indent.NewPosition.Indentations = indent.NewPosition.Indentations.Push(newTab); return indent; - } + indentPosition += current.Length; i++; } @@ -368,8 +366,8 @@ private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition le currentIndentations.Reverse(); var unindented = currentIndentations.Take(i).ToList(); var spaces = unindented.Select(x => x.Length).Sum(); - - var uIndent = FSMMatch.UIndent(uIndentCount,uIndentCount); + + var uIndent = FSMMatch.UIndent(uIndentCount, uIndentCount); uIndent.Result = new Token { IsIndent = true, @@ -383,6 +381,7 @@ private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition le { uIndent.NewPosition.Indentations = uIndent.NewPosition.Indentations.Pop(); } + return uIndent; } else @@ -408,9 +407,11 @@ private FSMMatch ConsumeIndents(ReadOnlyMemory source, LexerPosition le indent.NewPosition = lexerPosition.Clone(); indent.NewPosition.Index += indents.Count; indent.NewPosition.Column += indents.Count; - indent.NewPosition.Indentations = indent.NewPosition.Indentations.Push(new string(indents.ToArray())); + indent.NewPosition.Indentations = + indent.NewPosition.Indentations.Push(new string(indents.ToArray())); return indent; } + ; return null; } @@ -439,13 +440,12 @@ private FSMNode Move(FSMNode from, char token, ReadOnlyMemory value) public FSMNode GetNext(int from, char token) { var node = Nodes[from]; - + return Move(node, token, "".AsMemory()); } private List> ConsumeIgnored(ReadOnlyMemory source, LexerPosition position) { - bool eolReached = false; List> ignoredTokens = new List>(); while (position.Index < source.Length && !(eolReached && IndentationAware)) @@ -455,8 +455,9 @@ private List> ConsumeIgnored(ReadOnlyMemory source, LexerPosition var currentCharacter = source.At(position.Index); if (WhiteSpaces.Contains(currentCharacter)) { - var whiteToken = new Token(default(N),source.Slice(position.Index, 1), position, false, CommentType.No, - Channels.WhiteSpaces,isWhiteSpace:true, decimalSeparator:DecimalSeparator); + var whiteToken = new Token(default(N), source.Slice(position.Index, 1), position, false, + CommentType.No, + Channels.WhiteSpaces, isWhiteSpace: true, decimalSeparator: DecimalSeparator); ignoredTokens.Add(whiteToken); whiteToken.IsWhiteSpace = true; position.Index++; @@ -484,10 +485,10 @@ private List> ConsumeIgnored(ReadOnlyMemory source, LexerPosition break; } + return ignoredTokens; } #endregion - } } \ No newline at end of file diff --git a/sly/parser/generator/EBNFParserBuilder.cs b/sly/parser/generator/EBNFParserBuilder.cs index a6e62f71..62c7f453 100644 --- a/sly/parser/generator/EBNFParserBuilder.cs +++ b/sly/parser/generator/EBNFParserBuilder.cs @@ -4,6 +4,7 @@ using System.Reflection; using sly.buildresult; using sly.i18n; +using sly.lexer; using sly.lexer.fsm; using sly.parser.generator.visitor; using sly.parser.llparser; @@ -16,23 +17,23 @@ namespace sly.parser.generator /// internal class EBNFParserBuilder : ParserBuilder where IN : struct { - public EBNFParserBuilder(string i18n = null) : base(i18n) { } - + public override BuildResult> BuildParser(object parserInstance, ParserType parserType, - string rootRule, BuildExtension extensionBuilder = null, LexerPostProcess lexerPostProcess = null) + string rootRule, Action> extensionBuilder = null, + LexerPostProcess lexerPostProcess = null) { if (string.IsNullOrEmpty(rootRule)) { var rootAttribute = parserInstance.GetType().GetCustomAttribute(); - if (rootAttribute != null) + if (rootAttribute != null) { rootRule = rootAttribute.RootRule; } } - + var ruleparser = new RuleParser(); var builder = new ParserBuilder>(I18n); @@ -46,24 +47,25 @@ public override BuildResult> BuildParser(object parserInstance, try { configuration = ExtractEbnfParserConfiguration(parserInstance.GetType(), grammarParser); - LeftRecursionChecker recursionChecker = new LeftRecursionChecker(); - + LeftRecursionChecker recursionChecker = new LeftRecursionChecker(); + // check left recursion. - var (foundRecursion, recursions) = LeftRecursionChecker.CheckLeftRecursion(configuration); + var (foundRecursion, recursions) = LeftRecursionChecker.CheckLeftRecursion(configuration); if (foundRecursion) { - var recs = string.Join("\n", recursions.Select, string>(x => string.Join(" > ",x))); + var recs = string.Join("\n", recursions.Select, string>(x => string.Join(" > ", x))); result.AddError(new ParserInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.LeftRecursion,recs), + I18N.Instance.GetText(I18n, I18NMessage.LeftRecursion, recs), ErrorCodes.PARSER_LEFT_RECURSIVE)); return result; } - + configuration.StartingRule = rootRule; } catch (Exception e) { - result.AddError(new ParserInitializationError(ErrorLevel.ERROR, e.Message,ErrorCodes.PARSER_UNKNOWN_ERROR)); + result.AddError(new ParserInitializationError(ErrorLevel.ERROR, e.Message, + ErrorCodes.PARSER_UNKNOWN_ERROR)); return result; } @@ -71,15 +73,17 @@ public override BuildResult> BuildParser(object parserInstance, SyntaxTreeVisitor visitor = null; visitor = new EBNFSyntaxTreeVisitor(configuration, parserInstance); - var parser = new Parser(I18n,syntaxParser, visitor); + var parser = new Parser(I18n, syntaxParser, visitor); parser.Configuration = configuration; - var lexerResult = BuildLexer(extensionBuilder,lexerPostProcess, configuration.GetAllExplicitTokenClauses().Select(x => x.ExplicitToken).Distinct().ToList()); + var lexerResult = BuildLexer(extensionBuilder, lexerPostProcess, + configuration.GetAllExplicitTokenClauses().Select(x => x.ExplicitToken).Distinct().ToList()); if (lexerResult.IsError) { foreach (var lexerResultError in lexerResult.Errors) { result.AddError(lexerResultError); } + return result; } else @@ -101,12 +105,12 @@ protected override ISyntaxParser BuildSyntaxParser(ParserConfiguration< { case ParserType.LL_RECURSIVE_DESCENT: { - parser = new RecursiveDescentSyntaxParser(conf, rootRule,I18n); + parser = new RecursiveDescentSyntaxParser(conf, rootRule, I18n); break; } case ParserType.EBNF_LL_RECURSIVE_DESCENT: { - parser = new EBNFRecursiveDescentSyntaxParser(conf, rootRule,I18n); + parser = new EBNFRecursiveDescentSyntaxParser(conf, rootRule, I18n); break; } default: @@ -138,7 +142,7 @@ protected virtual ParserConfiguration ExtractEbnfParserConfiguration(Ty methods.ForEach(m => { var attributes = - (ProductionAttribute[]) m.GetCustomAttributes(typeof(ProductionAttribute), true); + (ProductionAttribute[])m.GetCustomAttributes(typeof(ProductionAttribute), true); foreach (var attr in attributes) { @@ -146,7 +150,7 @@ protected virtual ParserConfiguration ExtractEbnfParserConfiguration(Ty var parseResult = grammarParser.Parse(ruleString); if (!parseResult.IsError) { - var rule = (Rule) parseResult.Result; + var rule = (Rule)parseResult.Result; rule.RuleString = ruleString; rule.SetVisitor(m); NonTerminal nonT = null; diff --git a/sly/parser/generator/ParserBuilder.cs b/sly/parser/generator/ParserBuilder.cs index 982c4eb7..cbce76a5 100644 --- a/sly/parser/generator/ParserBuilder.cs +++ b/sly/parser/generator/ParserBuilder.cs @@ -23,7 +23,7 @@ public delegate BuildResult> ParserChecker(BuildResult< public class ParserBuilder where IN : struct { #region API - + public string I18n { get; set; } public ParserBuilder(string i18n) @@ -32,9 +32,10 @@ public ParserBuilder(string i18n) { i18n = CultureInfo.CurrentCulture.TwoLetterISOLanguageName; } + I18n = i18n; } - + public ParserBuilder() : this(null) { } @@ -53,7 +54,8 @@ public ParserBuilder() : this(null) /// the name of the root non terminal of the grammar /// public virtual BuildResult> BuildParser(object parserInstance, ParserType parserType, - string rootRule = null, BuildExtension extensionBuilder = null, LexerPostProcess lexerPostProcess = null) + string rootRule = null, Action> extensionBuilder = null, + LexerPostProcess lexerPostProcess = null) { Parser parser = null; var result = new BuildResult>(); @@ -62,21 +64,22 @@ public virtual BuildResult> BuildParser(object parserInstance, P case ParserType.LL_RECURSIVE_DESCENT: { var configuration = ExtractParserConfiguration(parserInstance.GetType()); - var (foundRecursion, recursions) = LeftRecursionChecker.CheckLeftRecursion(configuration); + var (foundRecursion, recursions) = LeftRecursionChecker.CheckLeftRecursion(configuration); if (foundRecursion) { - var recs = string.Join("\n", recursions.Select, string>(x => string.Join(" > ",x))); + var recs = string.Join("\n", + recursions.Select, string>(x => string.Join(" > ", x))); result.AddError(new ParserInitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.LeftRecursion, recs), + I18N.Instance.GetText(I18n, I18NMessage.LeftRecursion, recs), ErrorCodes.PARSER_LEFT_RECURSIVE)); return result; - } + configuration.StartingRule = rootRule; var syntaxParser = BuildSyntaxParser(configuration, parserType, rootRule); var visitor = new SyntaxTreeVisitor(configuration, parserInstance); - parser = new Parser(I18n,syntaxParser, visitor); - + parser = new Parser(I18n, syntaxParser, visitor); + parser.Instance = parserInstance; parser.Configuration = configuration; result.Result = parser; @@ -86,7 +89,7 @@ public virtual BuildResult> BuildParser(object parserInstance, P { var builder = new EBNFParserBuilder(I18n); result = builder.BuildParser(parserInstance, ParserType.EBNF_LL_RECURSIVE_DESCENT, rootRule, - extensionBuilder,lexerPostProcess); + extensionBuilder, lexerPostProcess); break; } } @@ -96,12 +99,13 @@ public virtual BuildResult> BuildParser(object parserInstance, P { var expressionResult = parser.BuildExpressionParser(result, rootRule); if (expressionResult.IsError) result.AddErrors(expressionResult.Errors); - - - + + result.Result.Configuration = expressionResult.Result; - - var lexerResult = BuildLexer(extensionBuilder,lexerPostProcess, result.Result.Configuration.GetAllExplicitTokenClauses().Select(x => x.ExplicitToken).Distinct().ToList()); + + var lexerResult = BuildLexer(extensionBuilder, lexerPostProcess, + result.Result.Configuration.GetAllExplicitTokenClauses().Select(x => x.ExplicitToken).Distinct() + .ToList()); if (lexerResult.IsError) { foreach (var lexerResultError in lexerResult.Errors) @@ -114,14 +118,14 @@ public virtual BuildResult> BuildParser(object parserInstance, P parser.Lexer = lexerResult.Result; parser.Instance = parserInstance; result.Result = parser; - } - + result = CheckParser(result); if (result.IsError) { result.Result = null; } + return result; } else @@ -141,7 +145,7 @@ protected virtual ISyntaxParser BuildSyntaxParser(ParserConfiguration(conf, rootRule,I18n); + parser = new RecursiveDescentSyntaxParser(conf, rootRule, I18n); break; } default: @@ -156,7 +160,6 @@ protected virtual ISyntaxParser BuildSyntaxParser(ParserConfiguration ExtractNTAndRule(string ruleString) } - protected virtual BuildResult> BuildLexer(BuildExtension extensionBuilder = null, + protected virtual BuildResult> BuildLexer( + Action> extensionBuilder = null, LexerPostProcess lexerPostProcess = null, IList explicitTokens = null) { - - - var lexer = LexerBuilder.BuildLexer(new BuildResult>(), extensionBuilder, I18n, lexerPostProcess, explicitTokens); + var lexer = LexerBuilder.BuildLexer(new BuildResult>(), extensionBuilder, I18n, + lexerPostProcess, explicitTokens); return lexer; } @@ -207,7 +210,7 @@ protected virtual ParserConfiguration ExtractParserConfiguration(Type p parserClass.GetMethods(); methods.ForEach(m => { - var attributes = (ProductionAttribute[]) m.GetCustomAttributes(typeof(ProductionAttribute), true); + var attributes = (ProductionAttribute[])m.GetCustomAttributes(typeof(ProductionAttribute), true); foreach (var attr in attributes) { @@ -240,7 +243,7 @@ private Rule BuildNonTerminal(Tuple ntAndRule) rule.RuleString = $"{ntAndRule.Item1} : {ntAndRule.Item2}"; var clauses = new List>(); var ruleString = ntAndRule.Item2; - var clausesString = ruleString.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries); + var clausesString = ruleString.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); foreach (var item in clausesString) { IClause clause = null; @@ -269,7 +272,8 @@ private Rule BuildNonTerminal(Tuple ntAndRule) } else if (item == "[d]") { - if (clauses.Last>() is TerminalClause discardedTerminal) discardedTerminal.Discarded = true; + if (clauses.Last>() is TerminalClause discardedTerminal) + discardedTerminal.Discarded = true; } else { @@ -321,7 +325,7 @@ private BuildResult> CheckUnreachable(BuildResult nonTerminal, string re found = innerNonTerminal.NonTerminalName == referenceName; break; case ChoiceClause innerChoice when innerChoice.IsNonTerminalChoice: - found = innerChoice.Choices.Where>(c => (c as NonTerminalClause).NonTerminalName == referenceName).Any>(); + found = innerChoice.Choices.Where>(c => + (c as NonTerminalClause).NonTerminalName == referenceName) + .Any>(); break; } @@ -380,6 +386,7 @@ private static bool NonTerminalReferences(NonTerminal nonTerminal, string re { found = nonTerm.NonTerminalName == referenceName; } + i++; } @@ -389,6 +396,7 @@ private static bool NonTerminalReferences(NonTerminal nonTerminal, string re iClause++; } + iRule++; } @@ -405,11 +413,12 @@ private BuildResult> CheckNotFound(BuildResult> if (clause is NonTerminalClause ntClause) if (!conf.NonTerminals.ContainsKey(ntClause.NonTerminalName)) result.AddError(new ParserInitializationError(ErrorLevel.ERROR, - I18N.Instance.GetText(I18n,I18NMessage.ReferenceNotFound,ntClause.NonTerminalName,rule.RuleString), + I18N.Instance.GetText(I18n, I18NMessage.ReferenceNotFound, ntClause.NonTerminalName, + rule.RuleString), ErrorCodes.PARSER_REFERENCE_NOT_FOUND)); return result; } - + private BuildResult> CheckAlternates(BuildResult> result, NonTerminal nonTerminal) { @@ -424,13 +433,15 @@ private BuildResult> CheckAlternates(BuildResult if (!choice.IsTerminalChoice && !choice.IsNonTerminalChoice) { result.AddError(new ParserInitializationError(ErrorLevel.ERROR, - I18N.Instance.GetText(I18n,I18NMessage.MixedChoices,rule.RuleString,choice.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.MixedChoices, rule.RuleString, + choice.ToString()), ErrorCodes.PARSER_MIXED_CHOICES)); } else if (choice.IsDiscarded && choice.IsNonTerminalChoice) { result.AddError(new ParserInitializationError(ErrorLevel.ERROR, - I18N.Instance.GetText(I18n,I18NMessage.NonTerminalChoiceCannotBeDiscarded,rule.RuleString,choice.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.NonTerminalChoiceCannotBeDiscarded, + rule.RuleString, choice.ToString()), ErrorCodes.PARSER_NON_TERMINAL_CHOICE_CANNOT_BE_DISCARDED)); } } @@ -440,7 +451,7 @@ private BuildResult> CheckAlternates(BuildResult return result; } - private BuildResult> CheckVisitorsSignature(BuildResult> result, + private BuildResult> CheckVisitorsSignature(BuildResult> result, NonTerminal nonTerminal) { foreach (var rule in nonTerminal.Rules) @@ -450,13 +461,13 @@ private BuildResult> CheckVisitorsSignature(BuildResult> CheckVisitorSignature(BuildResult> result, - Rule rule) + Rule rule) { if (!rule.IsExpressionRule) { @@ -465,27 +476,35 @@ private BuildResult> CheckVisitorSignature(BuildResult>(x => !(x is TerminalClause || x is ChoiceClause) || (x is TerminalClause t && !t.Discarded) || (x is ChoiceClause c && !c.IsDiscarded) ).ToList>(); + var realClauses = rule.Clauses.Where>(x => + !(x is TerminalClause || x is ChoiceClause) || + (x is TerminalClause t && !t.Discarded) || (x is ChoiceClause c && !c.IsDiscarded)) + .ToList>(); - if (visitor.GetParameters().Length != realClauses.Count && visitor.GetParameters().Length != realClauses.Count +1) + if (visitor.GetParameters().Length != realClauses.Count && + visitor.GetParameters().Length != realClauses.Count + 1) { result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.IncorrectVisitorParameterNumber,visitor.Name,rule.RuleString,realClauses.Count.ToString(),(realClauses.Count+1).ToString(),visitor.GetParameters().Length.ToString()), + I18N.Instance.GetText(I18n, I18NMessage.IncorrectVisitorParameterNumber, visitor.Name, + rule.RuleString, realClauses.Count.ToString(), (realClauses.Count + 1).ToString(), + visitor.GetParameters().Length.ToString()), ErrorCodes.PARSER_INCORRECT_VISITOR_PARAMETER_NUMBER)); // do not go further : it will cause an out of bound error. return result; } - + int i = 0; foreach (var clause in realClauses) { @@ -506,19 +525,20 @@ private BuildResult> CheckVisitorSignature(BuildResult); + expected = typeof(Group); } else { expected = typeof(OUT); } + CheckArgType(result, rule, expected, visitor, arg); break; } case ManyClause many: { Type expected = null; - Type found = arg.ParameterType; + Type found = arg.ParameterType; var innerClause = many.Clause; switch (innerClause) { @@ -555,23 +575,25 @@ private BuildResult> CheckVisitorSignature(BuildResult); } + break; } } + result = CheckArgType(result, rule, expected, visitor, arg); break; } case GroupClause group: { - Type expected = typeof(Group); - Type found = arg.ParameterType; + Type expected = typeof(Group); + Type found = arg.ParameterType; result = CheckArgType(result, rule, expected, visitor, arg); break; } case OptionClause option: { Type expected = null; - Type found = arg.ParameterType; + Type found = arg.ParameterType; var innerClause = option.Clause; switch (innerClause) { @@ -608,9 +630,11 @@ private BuildResult> CheckVisitorSignature(BuildResult); } + break; } } + result = CheckArgType(result, rule, expected, visitor, arg); break; } @@ -631,10 +655,11 @@ private BuildResult> CheckVisitorSignature(BuildResult> CheckVisitorSignature(BuildResult> CheckVisitorSignature(BuildResult), visitor, op); + result = CheckArgType(result, rule, typeof(Token), visitor, op); var right = parameters[2]; - result = CheckArgType(result, rule, typeof(OUT), visitor, right); - + result = CheckArgType(result, rule, typeof(OUT), visitor, right); } } } @@ -685,21 +712,21 @@ private BuildResult> CheckVisitorSignature(BuildResult> CheckArgType(BuildResult> result, Rule rule, Type expected, MethodInfo visitor, - ParameterInfo arg) + private BuildResult> CheckArgType(BuildResult> result, Rule rule, + Type expected, MethodInfo visitor, + ParameterInfo arg) { if (!expected.IsAssignableFrom(arg.ParameterType) && arg.ParameterType != expected) { result.AddError(new InitializationError(ErrorLevel.FATAL, - I18N.Instance.GetText(I18n,I18NMessage.IncorrectVisitorParameterType,visitor.Name,rule.RuleString,arg.Name,expected.FullName,arg.ParameterType.FullName), + I18N.Instance.GetText(I18n, I18NMessage.IncorrectVisitorParameterType, visitor.Name, + rule.RuleString, arg.Name, expected.FullName, arg.ParameterType.FullName), ErrorCodes.PARSER_INCORRECT_VISITOR_PARAMETER_TYPE)); } return result; } - - #endregion } } \ No newline at end of file diff --git a/sly/sly.csproj b/sly/sly.csproj index 716e791e..4efc6ecf 100644 --- a/sly/sly.csproj +++ b/sly/sly.csproj @@ -6,11 +6,11 @@ $(AllowedOutputExtensionsInPackageBuildOutputFolder);.pdb #LY is a parser generator halfway between parser combinators and parser generator like ANTLR b3b00 - 2.9.4.0 + 2.9.4.1 https://github.com/b3b00/sly https://github.com/b3b00/sly https://github.com/b3b00/sly/blob/master/LICENSE - 2.9.4.0 + 2.9.4.1 Library