diff --git a/ParserTests/CommentsTests.cs b/ParserTests/CommentsTests.cs index f41507cf..a6c49aac 100644 --- a/ParserTests/CommentsTests.cs +++ b/ParserTests/CommentsTests.cs @@ -96,9 +96,6 @@ public void TestGenericMultiLineComment() var tokens = lexer.Tokenize(code).ToList(); - - - Assert.Equal(4, tokens.Count); var token1 = tokens[0]; @@ -127,6 +124,98 @@ public void TestGenericMultiLineComment() } + [Fact] + public void TestInnerMultiComment() { + var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); + Assert.False(lexerRes.IsError); + var lexer = lexerRes.Result as GenericLexer; + + + string dump = lexer.ToString(); + + string code = @"1 +2 /* inner */ 3 +4 + "; + + var tokens = lexer.Tokenize(code).ToList(); + + Assert.Equal(5, tokens.Count); + + var token1 = tokens[0]; + var token2 = tokens[1]; + var token3 = tokens[2]; + var token4 = tokens[3]; + var token5 = tokens[4]; + + + Assert.Equal(CommentsToken.INT,token1.TokenID); + Assert.Equal("1",token1.Value); + Assert.Equal(0,token1.Position.Line); + Assert.Equal(0,token1.Position.Column); + + Assert.Equal(CommentsToken.INT,token2.TokenID); + Assert.Equal("2",token2.Value); + Assert.Equal(1,token2.Position.Line); + Assert.Equal(0,token2.Position.Column); + + Assert.Equal(CommentsToken.COMMENT,token3.TokenID); + Assert.Equal(@" inner ",token3.Value); + Assert.Equal(1,token3.Position.Line); + Assert.Equal(2,token3.Position.Column); + + Assert.Equal(CommentsToken.INT,token4.TokenID); + Assert.Equal("3",token4.Value); + Assert.Equal(1,token4.Position.Line); + Assert.Equal(14,token4.Position.Column); + + Assert.Equal(CommentsToken.INT,token5.TokenID); + Assert.Equal("4",token5.Value); + Assert.Equal(2,token5.Position.Line); + Assert.Equal(0,token5.Position.Column); + } + + [Fact] + public void NotEndingMultiComment() { + var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); + Assert.False(lexerRes.IsError); + var lexer = lexerRes.Result as GenericLexer; + + + string dump = lexer.ToString(); + + string code = @"1 +2 /* not ending +comment"; + + var tokens = lexer.Tokenize(code).ToList(); + + Assert.Equal(3, tokens.Count); + + var token1 = tokens[0]; + var token2 = tokens[1]; + var token3 = tokens[2]; + + + Assert.Equal(CommentsToken.INT,token1.TokenID); + Assert.Equal("1",token1.Value); + Assert.Equal(0,token1.Position.Line); + Assert.Equal(0,token1.Position.Column); + + Assert.Equal(CommentsToken.INT,token2.TokenID); + Assert.Equal("2",token2.Value); + Assert.Equal(1,token2.Position.Line); + Assert.Equal(0,token2.Position.Column); + + Assert.Equal(CommentsToken.COMMENT,token3.TokenID); + Assert.Equal(@" not ending +comment",token3.Value); + Assert.Equal(1,token3.Position.Line); + Assert.Equal(2,token3.Position.Column); + + + } + } diff --git a/ParserTests/GenericLexerTests.cs b/ParserTests/GenericLexerTests.cs index ab6a3df1..57f29b8c 100644 --- a/ParserTests/GenericLexerTests.cs +++ b/ParserTests/GenericLexerTests.cs @@ -7,6 +7,7 @@ using System.Linq; using System.Collections.Generic; using System.Text; +using System; using Xunit; using sly.buildresult; using sly.lexer.fsm; @@ -14,51 +15,58 @@ namespace ParserTests { - public enum Extensions { - [Lexeme(GenericToken.Extension)] - DATE, + public enum Extensions + { + [Lexeme(GenericToken.Extension)] + DATE, - [Lexeme(GenericToken.Double)] - DOUBLE, + [Lexeme(GenericToken.Double)] + DOUBLE, } - public static class ExtendedGenericLexer + + + public static class ExtendedGenericLexer { - - public static bool CheckDate(string value) { + + public static bool CheckDate(string value) + { bool ok = false; - if (value.Length==5) { + if (value.Length == 5) + { ok = char.IsDigit(value[0]); ok = ok && char.IsDigit(value[1]); ok = ok && value[2] == '.'; ok = ok && char.IsDigit(value[3]); ok = ok && char.IsDigit(value[4]); - } + } return ok; } - public static void AddExtension(Extensions token, LexemeAttribute lexem, GenericLexer lexer) { - if (token == Extensions.DATE) { + public static void AddExtension(Extensions token, LexemeAttribute lexem, GenericLexer lexer) + { + if (token == Extensions.DATE) + { + - - NodeCallback callback = (FSMMatch match) => + NodeCallback callback = (FSMMatch match) => { - match.Properties[GenericLexer.DerivedToken] = Extensions.DATE; - return match; + match.Properties[GenericLexer.DerivedToken] = Extensions.DATE; + return match; }; var fsmBuilder = lexer.FSMBuilder; // TODO fsmBuilder.GoTo(GenericLexer.in_double) - .Transition('.',CheckDate) + .Transition('.', CheckDate) .Mark("start_date") - .RepetitionTransition(4,"[0-9]") + .RepetitionTransition(4, "[0-9]") // .RangeTransition('0','9') // .Mark("y1") // .RangeTransition('0','9') @@ -74,33 +82,48 @@ public static void AddExtension(Extensions token, LexemeAttribute lexem, Generic } - public enum DoubleQuotedString { + public enum BadLetterStringDelimiter + { + [Lexeme(GenericToken.String, "a")] + Letter + } - [Lexeme(GenericToken.String,"\"")] + public enum BadEmptyStringDelimiter + { + [Lexeme(GenericToken.String, "")] + Empty + } + + public enum DoubleQuotedString + { + + [Lexeme(GenericToken.String, "\"")] DoubleString } - public enum SingleQuotedString { + public enum SingleQuotedString + { - [Lexeme(GenericToken.String,"'")] + [Lexeme(GenericToken.String, "'")] SingleString } - public enum DefaultQuotedString { + public enum DefaultQuotedString + { - [Lexeme(GenericToken.String)] + [Lexeme(GenericToken.String)] DefaultString } public enum AlphaId { - [Lexeme(GenericToken.Identifier,IdentifierType.Alpha)] + [Lexeme(GenericToken.Identifier, IdentifierType.Alpha)] ID } public enum AlphaNumId { - [Lexeme(GenericToken.Identifier,IdentifierType.AlphaNumeric)] + [Lexeme(GenericToken.Identifier, IdentifierType.AlphaNumeric)] ID } @@ -115,19 +138,19 @@ public class GenericLexerTests [Fact] - public void TestExtensions() + public void TestExtensions() { var lexerRes = LexerBuilder.BuildLexer(new BuildResult>(), ExtendedGenericLexer.AddExtension); Assert.False(lexerRes.IsError); var lexer = lexerRes.Result as GenericLexer; Assert.NotNull(lexer); - + List> tokens = lexer.Tokenize("20.02.2018 3.14").ToList(); - Assert.Equal(2,tokens.Count); - Assert.Equal(Extensions.DATE,tokens[0].TokenID); - Assert.Equal("20.02.2018",tokens[0].Value); - Assert.Equal(Extensions.DOUBLE,tokens[1].TokenID); - Assert.Equal("3.14",tokens[1].Value); + Assert.Equal(2, tokens.Count); + Assert.Equal(Extensions.DATE, tokens[0].TokenID); + Assert.Equal("20.02.2018", tokens[0].Value); + Assert.Equal(Extensions.DOUBLE, tokens[1].TokenID); + Assert.Equal("3.14", tokens[1].Value); } [Fact] @@ -189,7 +212,8 @@ public void TestAlphaNumDashIdStartsWithUnderscore() } [Fact] - public void TestDoubleQuotedString() { + public void TestDoubleQuotedString() + { var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); Assert.False(lexerRes.IsError); var lexer = lexerRes.Result; @@ -201,8 +225,9 @@ public void TestDoubleQuotedString() { Assert.Equal(source, tok.StringWithoutQuotes); } - [Fact] - public void TestSingleQuotedString() { + [Fact] + public void TestSingleQuotedString() + { var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); Assert.False(lexerRes.IsError); var lexer = lexerRes.Result; @@ -215,7 +240,8 @@ public void TestSingleQuotedString() { } [Fact] - public void TestDefaultQuotedString() { + public void TestDefaultQuotedString() + { var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); Assert.False(lexerRes.IsError); var lexer = lexerRes.Result; @@ -225,6 +251,46 @@ public void TestDefaultQuotedString() { Token tok = r[0]; Assert.Equal(DefaultQuotedString.DefaultString, tok.TokenID); Assert.Equal(source, tok.StringWithoutQuotes); + } + + [Fact] + public void TestBadLetterStringDelimiter() + { + var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); + Assert.True(lexerRes.IsError); + Assert.Equal(1, lexerRes.Errors.Count); + var error = lexerRes.Errors[0]; + Assert.Equal(ErrorLevel.FATAL, error.Level); + Assert.True(error.Message.Contains("can not start with a letter")); + } + + [Fact] + public void TestBadEmptyStringDelimiter() + { + var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); + Assert.True(lexerRes.IsError); + Assert.Equal(1, lexerRes.Errors.Count); + var error = lexerRes.Errors[0]; + Assert.Equal(ErrorLevel.FATAL, error.Level); + Assert.True(error.Message.Contains("must be 1 character length")); + } + + [Fact] + public void TestLexerError() + { + var lexerRes = LexerBuilder.BuildLexer(new BuildResult>()); + Assert.False(lexerRes.IsError); + var lexer = lexerRes.Result; + string source = "hello world 2 + 2 "; + var errException = Assert.Throws>(() => lexer.Tokenize(source).ToList()); + var error = errException.Error; + Assert.Equal(0, error.Line); + Assert.Equal(13, error.Column); + Assert.Equal('2', error.UnexpectedChar); + + + + } } } diff --git a/README.md b/README.md index 30729af7..1f98429f 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,14 @@ # C# Lex Yacc # [![Build status](https://ci.appveyor.com/api/projects/status/n9uffgkqn2qet7k9?svg=true)](https://ci.appveyor.com/project/OlivierDuhart/sly) -[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/b3b00/sly/blob/dev/LICENSE) -[![NuGet](https://img.shields.io/nuget/v/sly.svg)](https://www.nuget.org/packages/sly/) +![AppVeyor tests](https://img.shields.io/appveyor/tests/OlivierDuhart/sly.svg) [![codecov](https://codecov.io/gh/b3b00/csly/branch/dev/graph/badge.svg)](https://codecov.io/gh/b3b00/csly) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/b3b00/sly/blob/dev/LICENSE) +![NuGet](https://img.shields.io/nuget/v/sly.svg) + + :warning: This readme is a bit out of date. Go to to the [wiki](https://github.com/b3b00/csly/wiki) for a more up to date documentation. #LY is a parser generator halfway between parser combinators and parser generator like diff --git a/sly/lexer/GenericLexer.cs b/sly/lexer/GenericLexer.cs index fb08180d..b0f706b2 100644 --- a/sly/lexer/GenericLexer.cs +++ b/sly/lexer/GenericLexer.cs @@ -8,33 +8,7 @@ namespace sly.lexer { - public static class StringExtension - { - - public static List SplitString(this string value, string delimiter) - { - List elements = new List(); - int index = value.IndexOf(delimiter); - int lastPosition = 0; - if (index > 0) - { - while (index > 0) - { - string element = value.Substring(lastPosition, index - lastPosition); - lastPosition = index + delimiter.Length; - elements.Add(element); - index = value.IndexOf(delimiter, lastPosition); - } - elements.Add(value.Substring(lastPosition, value.Length - lastPosition)); - } - else - { - elements.Add(value); - } - return elements; - } - } public enum GenericToken { @@ -62,7 +36,7 @@ public enum EOLType { Windows, Nix, - + Mac, Environment, @@ -108,7 +82,7 @@ public GenericLexer(IdentifierType idType = IdentifierType.Alpha, BuildExtension { InitializeStaticLexer(idType, staticTokens); derivedTokens = new Dictionary>(); - this.ExtensionBuilder = extensionBuilder; + ExtensionBuilder = extensionBuilder; } @@ -122,7 +96,7 @@ private void InitializeStaticLexer(IdentifierType idType = IdentifierType.Alpha, .WhiteSpace(' ') .WhiteSpace('\t') .IgnoreEOL(); - + // start machine definition FSMBuilder.Mark(start); @@ -317,11 +291,11 @@ public void AddStringLexem(IN token, string stringDelimiter) { if (string.IsNullOrEmpty(stringDelimiter) || stringDelimiter.Length > 1) { - throw new ArgumentException($"bad lexem {stringDelimiter} : StringToken lexeme <{token.ToString()}> must be 1 character length."); + throw new InvalidLexerException($"bad lexem {stringDelimiter} : StringToken lexeme <{token.ToString()}> must be 1 character length."); } if (char.IsLetterOrDigit(stringDelimiter[0])) { - throw new ArgumentException($"bad lexem {stringDelimiter} : SugarToken lexeme <{token.ToString()}> can not start with a letter."); + throw new InvalidLexerException($"bad lexem {stringDelimiter} : SugarToken lexeme <{token.ToString()}> can not start with a letter."); } StringDelimiter = stringDelimiter[0]; @@ -353,7 +327,7 @@ public void AddSugarLexem(IN token, string specialValue) { if (char.IsLetter(specialValue[0])) { - throw new ArgumentException($"bad lexem {specialValue} : SugarToken lexeme <{token.ToString()}> can not start with a letter."); + throw new InvalidLexerException($"bad lexem {specialValue} : SugarToken lexeme <{token.ToString()}> can not start with a letter."); } NodeCallback callback = (FSMMatch match) => { @@ -395,7 +369,7 @@ public IEnumerable> Tokenize(string source) } - + public void ConsumeComment(Token comment, string source) { @@ -404,34 +378,42 @@ public void ConsumeComment(Token comment, string source) if (comment.IsSingleLineComment) { - int position = this.LexerFsm.CurrentPosition; - commentValue = EOLManager.GetToEndOfLine(source,position); - position = position + commentValue.Length; - comment.Value = commentValue.Replace("\n","").Replace("\r",""); + int position = LexerFsm.CurrentPosition; + commentValue = EOLManager.GetToEndOfLine(source, position); + position = position + commentValue.Length; + comment.Value = commentValue.Replace("\n", "").Replace("\r", ""); LexerFsm.Move(position, LexerFsm.CurrentLine + 1, 0); } else if (comment.IsMultiLineComment) { - int position = this.LexerFsm.CurrentPosition; - int end = source.IndexOf(this.MultiLineCommentEnd, position); + int position = LexerFsm.CurrentPosition; + int end = source.IndexOf(MultiLineCommentEnd, position); if (end < 0) { - position = source.Length+this.MultiLineCommentEnd.Length; + position = source.Length; } else { position = end; } - commentValue = source.Substring(this.LexerFsm.CurrentPosition, position - this.LexerFsm.CurrentPosition); + commentValue = source.Substring(LexerFsm.CurrentPosition, position - LexerFsm.CurrentPosition); comment.Value = commentValue; // TODO : compute new line and column - int newPosition = LexerFsm.CurrentPosition + commentValue.Length + this.MultiLineCommentEnd.Length; - var remaining = source.Substring(newPosition); - + int newPosition = LexerFsm.CurrentPosition + commentValue.Length + MultiLineCommentEnd.Length; + var lines = EOLManager.GetLines(commentValue); int newLine = LexerFsm.CurrentLine + lines.Count - 1; - int newColumn = lines[lines.Count - 1].Length+this.MultiLineCommentEnd.Length; + int newColumn = 0; + if (lines.Count > 1) + { + newColumn = lines[lines.Count - 1].Length + MultiLineCommentEnd.Length; + } + else + { + newColumn = LexerFsm.CurrentColumn+lines[0].Length+MultiLineCommentEnd.Length; + } + LexerFsm.Move(newPosition, newLine, newColumn); } diff --git a/sly/lexer/InvalidLexerException.cs b/sly/lexer/InvalidLexerException.cs new file mode 100644 index 00000000..8d177a28 --- /dev/null +++ b/sly/lexer/InvalidLexerException.cs @@ -0,0 +1,10 @@ +using System; + +namespace sly.lexer { + public class InvalidLexerException : Exception + { + public InvalidLexerException(string message) : base(message) + { + } + } +} \ No newline at end of file diff --git a/sly/lexer/LexerBuilder.cs b/sly/lexer/LexerBuilder.cs index 27cfeef0..f9e62b63 100644 --- a/sly/lexer/LexerBuilder.cs +++ b/sly/lexer/LexerBuilder.cs @@ -264,7 +264,13 @@ private static BuildResult> BuildGenericLexer(Dictionary 0) { + try { lexer.AddStringLexem(tokenID, lexem.GenericTokenParameters[0]); + } + catch (Exception e) { + result.IsError = true; + result.AddError(new InitializationError(ErrorLevel.FATAL,e.Message)); + } } else { diff --git a/sly/lexer/fsm/FSMLexer.cs b/sly/lexer/fsm/FSMLexer.cs index c01ba2ea..0d0b434f 100644 --- a/sly/lexer/fsm/FSMLexer.cs +++ b/sly/lexer/fsm/FSMLexer.cs @@ -187,7 +187,7 @@ public FSMMatch Run(string source, int start) bool consumeSkipped = true; - while (consumeSkipped && !tokenStarted) + while (consumeSkipped && !tokenStarted && CurrentPosition < source.Length) { currentToken = source[CurrentPosition]; if (IgnoreWhiteSpace && WhiteSpaces.Contains(currentToken)) @@ -247,7 +247,14 @@ public FSMMatch Run(string source, int start) successes.Push(resultInter); } CurrentPosition++; - CurrentColumn += value.Length; + CurrentColumn++ ; + } + else { + + if (lastNode == 0 && !tokenStarted && !successes.Any() && CurrentPosition < source.Length) { + throw new LexerException(new LexicalError(CurrentLine,CurrentColumn, source[CurrentPosition])); + } + ; } } diff --git a/sly/sly.csproj b/sly/sly.csproj index 7c8c3897..a183354d 100644 --- a/sly/sly.csproj +++ b/sly/sly.csproj @@ -3,7 +3,7 @@ netcoreapp2.0;net45 #LY is a parser generator halfway between parser combinators and parser generator like ANTLR b3b00 - 2.0.6 + 2.0.6.2 https://github.com/b3b00/sly https://github.com/b3b00/sly https://github.com/b3b00/sly/blob/master/LICENSE