Skip to content

Commit

Permalink
Merge pull request #107 from b3b00/tech/zeroAllocGenericLexer
Browse files Browse the repository at this point in the history
Tech/zero alloc generic lexer
  • Loading branch information
b3b00 authored Mar 22, 2019
2 parents 5f413fc + 5def64a commit 11c7e8f
Show file tree
Hide file tree
Showing 17 changed files with 159 additions and 72 deletions.
46 changes: 23 additions & 23 deletions ParserTests/CommentsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,29 +79,29 @@ public void TestGenericMultiLineComment()

Assert.Equal(5, tokens.Count);

var token1 = tokens[0];
var token2 = tokens[1];
var token3 = tokens[2];
var token4 = tokens[3];

Assert.Equal(CommentsToken.INT, token1.TokenID);
Assert.Equal("1", token1.Value);
Assert.Equal(0, token1.Position.Line);
Assert.Equal(0, token1.Position.Column);

Assert.Equal(CommentsToken.INT, token2.TokenID);
Assert.Equal("2", token2.Value);
Assert.Equal(1, token2.Position.Line);
Assert.Equal(0, token2.Position.Column);
Assert.Equal(CommentsToken.COMMENT, token3.TokenID);
var intToken1 = tokens[0];
var intToken2 = tokens[1];
var multiLineCommentToken = tokens[2];
var doubleToken = tokens[3];

Assert.Equal(CommentsToken.INT, intToken1.TokenID);
Assert.Equal("1", intToken1.Value);
Assert.Equal(0, intToken1.Position.Line);
Assert.Equal(0, intToken1.Position.Column);

Assert.Equal(CommentsToken.INT, intToken2.TokenID);
Assert.Equal("2", intToken2.Value);
Assert.Equal(1, intToken2.Position.Line);
Assert.Equal(0, intToken2.Position.Column);
Assert.Equal(CommentsToken.COMMENT, multiLineCommentToken.TokenID);
Assert.Equal(@" multi line
comment on 2 lines ", token3.Value);
Assert.Equal(1, token3.Position.Line);
Assert.Equal(2, token3.Position.Column);
Assert.Equal(CommentsToken.DOUBLE, token4.TokenID);
Assert.Equal("3.0", token4.Value);
Assert.Equal(2, token4.Position.Line);
Assert.Equal(22, token4.Position.Column);
comment on 2 lines ", multiLineCommentToken.Value);
Assert.Equal(1, multiLineCommentToken.Position.Line);
Assert.Equal(2, multiLineCommentToken.Position.Column);
Assert.Equal(CommentsToken.DOUBLE, doubleToken.TokenID);
Assert.Equal("3.0", doubleToken.Value);
Assert.Equal(2, doubleToken.Position.Line);
Assert.Equal(22, doubleToken.Position.Column);
}

[Fact]
Expand Down Expand Up @@ -135,7 +135,7 @@ public void TestGenericSingleLineComment()
Assert.Equal(1, token2.Position.Line);
Assert.Equal(0, token2.Position.Column);
Assert.Equal(CommentsToken.COMMENT, token3.TokenID);
Assert.Equal(" single line comment", token3.Value);
Assert.Equal(" single line comment", token3.Value.Replace("\r","").Replace("\n",""));
Assert.Equal(1, token3.Position.Line);
Assert.Equal(2, token3.Position.Column);
Assert.Equal(CommentsToken.DOUBLE, token4.TokenID);
Expand Down
2 changes: 1 addition & 1 deletion ParserTests/EBNFTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public enum GroupTestToken
public class OptionTestParser
{
[Production("root2 : a B? c ")]
public string root2(Token<OptionTestToken> a, ValueOption<string> b, Token<OptionTestToken> c)
public string Root2(Token<OptionTestToken> a, ValueOption<string> b, Token<OptionTestToken> c)
{
var r = new StringBuilder();
r.Append("R(");
Expand Down
3 changes: 3 additions & 0 deletions ParserTests/ParserTests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@
<ItemGroup>
<Service Include="{82a7f48d-3b50-4b1e-b82e-3ada8210c358}" />
</ItemGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;1705;1591</NoWarn>
</PropertyGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@
<ItemGroup>
<ProjectReference Include="..\..\sly\sly.csproj" />
</ItemGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;1705;1591</NoWarn>
</PropertyGroup>

</Project>
5 changes: 3 additions & 2 deletions samples/GenericLexerWithCallbacks/TestCallbacks.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using sly.lexer;
using System;
using sly.lexer;

namespace GenericLexerWithCallbacks
{
Expand All @@ -12,7 +13,7 @@ public static Token<CallbackTokens> TranslateIdentifier(Token<CallbackTokens> to
{
token.TokenID = CallbackTokens.SKIP;
}
token.Value = token.Value.ToUpper();
token.SpanValue = new ReadOnlyMemory<char>(token.Value.ToUpper().ToCharArray());

return token;
}
Expand Down
4 changes: 4 additions & 0 deletions samples/ParserExample/ParserExample.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,9 @@
<ProjectReference Include="..\SimpleExpressionParser\SimpleExpressionParser.csproj" />
<ProjectReference Include="..\while\while.csproj" />
</ItemGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;1705;1591</NoWarn>
</PropertyGroup>

</Project>
3 changes: 2 additions & 1 deletion samples/ParserExample/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ private static void testLexerBuilder()
.Mark("string_end")
.CallBack(match =>
{
match.Result.Value = match.Result.Value.ToUpper();
string upperVAlue = match.Result.Value.ToString().ToUpper();
match.Result.SpanValue = new ReadOnlyMemory<char>(upperVAlue.ToCharArray());
return match;
});

Expand Down
3 changes: 3 additions & 0 deletions samples/SimpleExpressionParser/SimpleExpressionParser.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@
<ProjectReference Include="..\..\sly\sly.csproj" />
<ProjectReference Include="..\expressionParser\expressionParser.csproj" />
</ItemGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;1705;1591</NoWarn>
</PropertyGroup>
</Project>
3 changes: 3 additions & 0 deletions samples/expressionParser/expressionParser.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@
<ItemGroup>
<ProjectReference Include="..\..\sly\sly.csproj" />
</ItemGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;1705;1591</NoWarn>
</PropertyGroup>
</Project>
14 changes: 12 additions & 2 deletions samples/jsonparser/JSONLexer.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using sly.lexer;
using sly.lexer.fsm;

namespace jsonparser
{
Expand All @@ -11,6 +12,11 @@ public void AddDefinition(TokenDefinition<JsonToken> tokenDefinition)
}

public IEnumerable<Token<JsonToken>> Tokenize(string source)
{
return Tokenize(new ReadOnlyMemory<char>(source.ToCharArray()));
}

public IEnumerable<Token<JsonToken>> Tokenize(ReadOnlyMemory<char> source)
{
var tokens = new List<Token<JsonToken>>();
var position = 0;
Expand All @@ -30,11 +36,15 @@ public IEnumerable<Token<JsonToken>> Tokenize(string source)
var InFalse = false;
var NumIsDouble = false;

int tokenStartIndex = 0;
int tokenLength = 0;

Func<JsonToken, Token<JsonToken>> NewToken = tok =>
{
var token = new Token<JsonToken>();
token.Position = new TokenPosition(currentTokenPosition, currentTokenLine, currentTokenColumn);
token.Value = currentValue;
token.SpanValue = source.Slice(tokenStartIndex,tokenLength);
tokenStartIndex = tokenStartIndex + tokenLength;
token.TokenID = tok;
tokens.Add(token);
currentValue = "";
Expand All @@ -44,7 +54,7 @@ public IEnumerable<Token<JsonToken>> Tokenize(string source)

while (position < length)
{
var current = source[position];
var current = source.At(position);
if (InString)
{
currentValue += current;
Expand Down
3 changes: 3 additions & 0 deletions samples/jsonparser/jsonparser.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@
<ItemGroup>
<ProjectReference Include="..\..\sly\sly.csproj" />
</ItemGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;1705;1591</NoWarn>
</PropertyGroup>
</Project>
3 changes: 3 additions & 0 deletions samples/while/while.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@
<ItemGroup>
<PackageReference Include="Sigil" Version="4.7.0" />
</ItemGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;1705;1591</NoWarn>
</PropertyGroup>
</Project>
38 changes: 22 additions & 16 deletions sly/lexer/GenericLexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public IEnumerable<Token<IN>> Tokenize(string source)
tokens.Add(transcoded);
r = LexerFsm.Run(source);

if (r.Result.IsComment) ConsumeComment(r.Result, source);
if (r.IsSuccess && r.Result.IsComment) ConsumeComment(r.Result, source);
}

var eos = new Token<IN>();
Expand Down Expand Up @@ -332,9 +332,9 @@ public void AddStringLexem(IN token, string stringDelimiter, string escapeDelimi
NodeCallback<GenericToken> callback = match =>
{
match.Properties[DerivedToken] = token;
var value = match.Result.Value;
var value = match.Result.SpanValue;

match.Result.Value = value;
match.Result.SpanValue = value;
return match;
};

Expand Down Expand Up @@ -364,7 +364,7 @@ public void AddStringLexem(IN token, string stringDelimiter, string escapeDelimi
};

var exceptDelimiter = new[] {StringDelimiterChar};
var in_string = "in_string_same";
in_string = "in_string_same";
var escaped = "escaped_same";
var delim = "delim_same";

Expand Down Expand Up @@ -407,36 +407,41 @@ public void AddSugarLexem(IN token, string specialValue)

public void ConsumeComment(Token<GenericToken> comment, string source)
{
var commentValue = "";
ConsumeComment(comment, source.AsMemory());
}
public void ConsumeComment(Token<GenericToken> comment, ReadOnlyMemory<char> source)
{

ReadOnlyMemory<char> commentValue;

if (comment.IsSingleLineComment)
{
var position = LexerFsm.CurrentPosition;
commentValue = EOLManager.GetToEndOfLine(source, position);
position = position + commentValue.Length;
comment.Value = commentValue.Replace("\n", "").Replace("\r", "");
comment.SpanValue = commentValue;
LexerFsm.Move(position, LexerFsm.CurrentLine + 1, 0);
}
else if (comment.IsMultiLineComment)
{
var position = LexerFsm.CurrentPosition;
var end = source.IndexOf(MultiLineCommentEnd, position);

var end = source.Span.Slice(position).IndexOf(MultiLineCommentEnd.AsSpan());
if (end < 0)
position = source.Length;
else
position = end;
commentValue = source.Substring(LexerFsm.CurrentPosition, position - LexerFsm.CurrentPosition);
comment.Value = commentValue;
position = end+position;
commentValue = source.Slice(LexerFsm.CurrentPosition, position - LexerFsm.CurrentPosition);
comment.SpanValue = commentValue;

var newPosition = LexerFsm.CurrentPosition + commentValue.Length + MultiLineCommentEnd.Length;

var lines = EOLManager.GetLines(commentValue);
var lines = EOLManager.GetLinesLength(commentValue);
var newLine = LexerFsm.CurrentLine + lines.Count - 1;
var newColumn = 0;
int newColumn;
if (lines.Count > 1)
newColumn = lines[lines.Count - 1].Length + MultiLineCommentEnd.Length;
newColumn = lines.Last() + MultiLineCommentEnd.Length;
else
newColumn = LexerFsm.CurrentColumn + lines[0].Length + MultiLineCommentEnd.Length;
newColumn = LexerFsm.CurrentColumn + lines[0] + MultiLineCommentEnd.Length;


LexerFsm.Move(newPosition, newLine, newColumn);
Expand All @@ -449,7 +454,8 @@ public Token<IN> Transcode(FSMMatch<GenericToken> match)
var inTok = match.Result;
tok.IsComment = inTok.IsComment;
tok.IsEmpty = inTok.IsEmpty;
tok.Value = inTok.Value;
// tok.Value = inTok.Value;
tok.SpanValue = inTok.SpanValue;
tok.CommentType = inTok.CommentType;
tok.Position = inTok.Position;
tok.Discarded = inTok.Discarded;
Expand Down
15 changes: 12 additions & 3 deletions sly/lexer/Token.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Diagnostics.CodeAnalysis;
using System;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand All @@ -18,10 +19,16 @@ public class Token<T>


public Token(T token, string value, TokenPosition position, bool isCommentStart = false,
CommentType commentType = CommentType.Single) : this(token,new ReadOnlyMemory<char>(value.ToCharArray()),position,isCommentStart,commentType )
{

}

public Token(T token, ReadOnlyMemory<char> value, TokenPosition position, bool isCommentStart = false,
CommentType commentType = CommentType.Single)
{
TokenID = token;
Value = value;
SpanValue = value;
Position = position;
CommentType = commentType;
}
Expand All @@ -36,6 +43,8 @@ public Token()
}


public ReadOnlyMemory<char> SpanValue { get; set; }

public TokenPosition Position { get; set; }

public int PositionInTokenFlow { get; set; }
Expand All @@ -54,7 +63,7 @@ public Token()

public bool IsSingleLineComment => CommentType == CommentType.Single;

public string Value { get; set; }
public string Value => SpanValue.ToString();

public static T DefaultToken
{
Expand Down
Loading

0 comments on commit 11c7e8f

Please sign in to comment.