From a9f51dec2e1028bbfcf74fd5a827ae4b3567f7ab Mon Sep 17 00:00:00 2001 From: Asrom11 Date: Sun, 24 Nov 2024 16:23:21 +0500 Subject: [PATCH 1/4] initial Markdown --- cs/MarkDownTest/HeaderConverterTests.cs | 45 +++++++ cs/MarkDownTest/ItalicConverterTests.cs | 45 +++++++ cs/MarkDownTest/MarkDownTest.csproj | 25 ++++ cs/MarkDownTest/MarkdownConverterTests.cs | 44 +++++++ cs/MarkDownTest/MarkdownParserTests.cs | 111 ++++++++++++++++++ cs/MarkDownTest/StrongConverterTests.cs | 45 +++++++ .../HtmlConverter/HeaderTagConverter.cs | 9 ++ .../Interface/IHtmlTagConverter.cs | 6 + .../HtmlConverter/ItalicTagConverter.cs | 9 ++ .../HtmlConverter/StrongTagConverter.cs | 9 ++ cs/Markdown/MarkDown.cs | 17 +++ .../MarkDownConverter/IMarkdownConverter.cs | 6 + .../MarkDownConverter/MarkdownConverter.cs | 37 ++++++ cs/Markdown/Markdown.csproj | 11 ++ .../Parser/Interface/IMarkdownParser.cs | 6 + cs/Markdown/Parser/MarkdownParser.cs | 11 ++ cs/Markdown/Program.cs | 1 + cs/Markdown/Token/Token.cs | 8 ++ cs/Markdown/Token/TokenType.cs | 8 ++ cs/clean-code.sln | 12 ++ 20 files changed, 465 insertions(+) create mode 100644 cs/MarkDownTest/HeaderConverterTests.cs create mode 100644 cs/MarkDownTest/ItalicConverterTests.cs create mode 100644 cs/MarkDownTest/MarkDownTest.csproj create mode 100644 cs/MarkDownTest/MarkdownConverterTests.cs create mode 100644 cs/MarkDownTest/MarkdownParserTests.cs create mode 100644 cs/MarkDownTest/StrongConverterTests.cs create mode 100644 cs/Markdown/HtmlConverter/HeaderTagConverter.cs create mode 100644 cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs create mode 100644 cs/Markdown/HtmlConverter/ItalicTagConverter.cs create mode 100644 cs/Markdown/HtmlConverter/StrongTagConverter.cs create mode 100644 cs/Markdown/MarkDown.cs create mode 100644 cs/Markdown/MarkDownConverter/IMarkdownConverter.cs create mode 100644 cs/Markdown/MarkDownConverter/MarkdownConverter.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/Parser/Interface/IMarkdownParser.cs create mode 100644 cs/Markdown/Parser/MarkdownParser.cs create mode 100644 cs/Markdown/Program.cs create mode 100644 cs/Markdown/Token/Token.cs create mode 100644 cs/Markdown/Token/TokenType.cs diff --git a/cs/MarkDownTest/HeaderConverterTests.cs b/cs/MarkDownTest/HeaderConverterTests.cs new file mode 100644 index 000000000..81274ad59 --- /dev/null +++ b/cs/MarkDownTest/HeaderConverterTests.cs @@ -0,0 +1,45 @@ +using FluentAssertions; +using Markdown; +using Markdown.interfaces; +using NUnit.Framework; + +namespace MarkDownTest; + +public class HeaderConverterTests +{ + private HeaderTagConverter _converter; + + [SetUp] + public void Setup() + { + _converter = new HeaderTagConverter(); + } + + [Test] + public void ConvertToHtml_SimpleHeader_ReturnsHeaderWithH1Tags() + { + var tokens = new List + { + new() { Text = "# Header", Type = TokenType.Header } + }; + + var result = _converter.ConvertToHtml(tokens.ToList()); + + result.Single().Text.Should().Be("

Header

", + "Header should be wrapped in h1 tags"); + } + + [Test] + public void ConvertToHtml_NoHeader_ReturnsUnmodifiedTokens() + { + var tokens = new List + { + new() { Text = "Regular text", Type = TokenType.Italic } + }; + + var result = _converter.ConvertToHtml(tokens.ToList()); + + result.Should().BeEquivalentTo(tokens, + "Non-header tokens should remain unchanged"); + } +} \ No newline at end of file diff --git a/cs/MarkDownTest/ItalicConverterTests.cs b/cs/MarkDownTest/ItalicConverterTests.cs new file mode 100644 index 000000000..c4d302fa4 --- /dev/null +++ b/cs/MarkDownTest/ItalicConverterTests.cs @@ -0,0 +1,45 @@ +using FluentAssertions; +using Markdown; +using Markdown.interfaces; +using NUnit.Framework; + +namespace MarkDownTest; + +public class ItalicConverterTests +{ + private ItalicTagConverter _converter; + + [SetUp] + public void Setup() + { + _converter = new ItalicTagConverter(); + } + + [Test] + public void ConvertToHtml_SimpleItalic_ReturnsTextWithEmTags() + { + var tokens = new List + { + new Token { Text = "_italic_", Type = TokenType.Italic } + }; + + var result = _converter.ConvertToHtml(tokens.ToList()); + + result.Single().Text.Should().Be("italic", + "Italic text should be wrapped in em tags"); + } + + [Test] + public void ConvertToHtml_NoItalic_ReturnsUnmodifiedTokens() + { + var tokens = new List + { + new Token { Text = "Regular text", Type = TokenType.Strong } + }; + + var result = _converter.ConvertToHtml(tokens.ToList()); + + result.Should().BeEquivalentTo(tokens, + "Non-italic tokens should remain unchanged"); + } +} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkDownTest.csproj b/cs/MarkDownTest/MarkDownTest.csproj new file mode 100644 index 000000000..f389e53f0 --- /dev/null +++ b/cs/MarkDownTest/MarkDownTest.csproj @@ -0,0 +1,25 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + diff --git a/cs/MarkDownTest/MarkdownConverterTests.cs b/cs/MarkDownTest/MarkdownConverterTests.cs new file mode 100644 index 000000000..c0be4fe16 --- /dev/null +++ b/cs/MarkDownTest/MarkdownConverterTests.cs @@ -0,0 +1,44 @@ +using FluentAssertions; +using Markdown; +using NUnit.Framework; + +namespace MarkDownTest; + +public class MarkdownConverterTests +{ + private MarkdownConverter _converter; + + [SetUp] + public void Setup() + { + _converter = new MarkdownConverter(); + } + + [Test] + public void Convert_ComplexMarkdown_ReturnsCorrectlyFormattedHtml() + { + var tokens = new List + { + new() { Text = "# ", Type = TokenType.Header }, + new() { Text = "Header with ", Type = TokenType.Header }, + new() { Text = "_italic_", Type = TokenType.Italic }, + new() { Text = " and ", Type = TokenType.Header }, + new() { Text = "__bold__", Type = TokenType.Strong } + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("

Header with italic and bold

", + "Complex markdown should be converted with proper nesting"); + } + + [Test] + public void Convert_EmptyTokenList_ReturnsEmptyString() + { + var tokens = new List(); + + var result = _converter.Convert(tokens); + + result.Should().BeEmpty("Empty token list should result in empty string"); + } +} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkdownParserTests.cs b/cs/MarkDownTest/MarkdownParserTests.cs new file mode 100644 index 000000000..31e4a0498 --- /dev/null +++ b/cs/MarkDownTest/MarkdownParserTests.cs @@ -0,0 +1,111 @@ +using FluentAssertions; +using Markdown; +using Markdown.Parser; +using NUnit.Framework; + +namespace MarkDownTest; + +public class MarkdownParserTests +{ + private MarkdownParser _parser; + + [SetUp] + public void Setup() + { + _parser = new MarkdownParser(); + } + + [Test] + public void Parse_EmptyString_ReturnsEmptyTokenList() + { + const string input = ""; + + var tokens = _parser.Parse(input).ToList(); + + tokens.Should().BeEmpty("Empty input should result in empty token list"); + } + + [TestCase("# Header", TokenType.Header, "Header")] + [TestCase("_italic_", TokenType.Italic, "italic")] + [TestCase("__strong__", TokenType.Strong, "strong")] + public void Parse_SingleElement_ReturnsCorrectToken(string input, TokenType expectedType, string expectedText) + { + var tokens = _parser.Parse(input).ToList(); + + tokens.Should().ContainSingle() + .Which.Should().Match(t => + t.Type == expectedType && + t.Text == expectedText); + } + + [Test] + public void Parse_ComplexMarkdown_ReturnsCorrectTokenSequence() + { + const string input = "# Header with __strong__ and _italic_"; + var expectedTokens = new[] + { + new Token { Type = TokenType.Header, Text = "Header with " }, + new Token { Type = TokenType.Strong, Text = "strong" }, + new Token { Type = TokenType.Header, Text = " and " }, + new Token { Type = TokenType.Italic, Text = "italic" } + }; + + var tokens = _parser.Parse(input).ToList(); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); + } + + [Test] + public void Parse_UnderscoresInMiddleOfWord_NotConsideredAsMarkup() + { + const string input = "some_word_with_underscores"; + + var tokens = _parser.Parse(input).ToList(); + + tokens.Should().ContainSingle() + .Which.Text.Should().Be("some_word_with_underscores", + "Underscores in middle of word should not be treated as markup"); + } + + [Test] + public void Parse_EscapedUnderscores_NotConsideredAsMarkup() + { + const string input = "Text with \\_escaped\\_ underscores"; + + var tokens = _parser.Parse(input).ToList(); + + tokens.Should().ContainSingle() + .Which.Text.Should().Be("Text with _escaped_ underscores", + "Escaped underscores should not be treated as markup"); + } + + [Test] + public void Parse_HeaderWithoutSpace_NotConsideredAsHeader() + { + const string input = "#Not a header"; + + var tokens = _parser.Parse(input).ToList(); + + tokens.Should().ContainSingle() + .Which.Text.Should().Be("#Not a header", + "# without space should not be treated as header"); + } + + [Test] + public void Parse_MultipleLines_HandledCorrectly() + { + const string input = "# Header\n_italic_\n__strong__"; + var expectedTokens = new Token[] + { + new() { Type = TokenType.Header, Text = "Header" }, + new() { Type = TokenType.Italic, Text = "italic" }, + new() { Type = TokenType.Strong, Text = "strong" } + }; + + var tokens = _parser.Parse(input).ToList(); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); + } +} \ No newline at end of file diff --git a/cs/MarkDownTest/StrongConverterTests.cs b/cs/MarkDownTest/StrongConverterTests.cs new file mode 100644 index 000000000..e188fefd5 --- /dev/null +++ b/cs/MarkDownTest/StrongConverterTests.cs @@ -0,0 +1,45 @@ +using FluentAssertions; +using Markdown; +using Markdown.interfaces; +using NUnit.Framework; + +namespace MarkDownTest; + +public class StrongConverterTests +{ + private StrongTagConverter _converter; + + [SetUp] + public void Setup() + { + _converter = new StrongTagConverter(); + } + + [Test] + public void ConvertToHtml_SimpleStrong_ReturnsTextWithStrongTags() + { + var tokens = new List + { + new Token { Text = "__strong__", Type = TokenType.Strong } + }; + + var result = _converter.ConvertToHtml(tokens.ToList()); + + result.Single().Text.Should().Be("strong", + "Strong text should be wrapped in strong tags"); + } + + [Test] + public void ConvertToHtml_NoStrong_ReturnsUnmodifiedTokens() + { + var tokens = new List + { + new Token { Text = "Regular text", Type = TokenType.Header } + }; + + var result = _converter.ConvertToHtml(tokens.ToList()); + + result.Should().BeEquivalentTo(tokens, + "Non-strong tokens should remain unchanged"); + } +} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/HeaderTagConverter.cs b/cs/Markdown/HtmlConverter/HeaderTagConverter.cs new file mode 100644 index 000000000..c8c31713e --- /dev/null +++ b/cs/Markdown/HtmlConverter/HeaderTagConverter.cs @@ -0,0 +1,9 @@ +namespace Markdown.interfaces; + +public class HeaderTagConverter : IHtmlTagConverter +{ + public IList ConvertToHtml(IList tokens) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs b/cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs new file mode 100644 index 000000000..c7afdc28e --- /dev/null +++ b/cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs @@ -0,0 +1,6 @@ +namespace Markdown.interfaces; + +public interface IHtmlTagConverter +{ + IList ConvertToHtml(IList tokens); +} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/ItalicTagConverter.cs b/cs/Markdown/HtmlConverter/ItalicTagConverter.cs new file mode 100644 index 000000000..338efe233 --- /dev/null +++ b/cs/Markdown/HtmlConverter/ItalicTagConverter.cs @@ -0,0 +1,9 @@ +namespace Markdown.interfaces; + +public class ItalicTagConverter : IHtmlTagConverter +{ + public IList ConvertToHtml(IList tokens) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/StrongTagConverter.cs b/cs/Markdown/HtmlConverter/StrongTagConverter.cs new file mode 100644 index 000000000..2999aeb5c --- /dev/null +++ b/cs/Markdown/HtmlConverter/StrongTagConverter.cs @@ -0,0 +1,9 @@ +namespace Markdown.interfaces; + +public class StrongTagConverter : IHtmlTagConverter +{ + public IList ConvertToHtml(IList tokens) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDown.cs b/cs/Markdown/MarkDown.cs new file mode 100644 index 000000000..0dc5a9f69 --- /dev/null +++ b/cs/Markdown/MarkDown.cs @@ -0,0 +1,17 @@ +using Markdown.interfaces; +using Markdown.Parser; +using Markdown.Parser.Interface; + +namespace Markdown; + +public class MarkDown +{ + private readonly IMarkdownConverter _converter = new MarkdownConverter(); + private readonly IMarkdownParser _parser = new MarkdownParser(); + + public string Render(string markdown) + { + var tokens = _parser.Parse(markdown); + return _converter.Convert(tokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/IMarkdownConverter.cs b/cs/Markdown/MarkDownConverter/IMarkdownConverter.cs new file mode 100644 index 000000000..2da855708 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/IMarkdownConverter.cs @@ -0,0 +1,6 @@ +namespace Markdown.interfaces; + +public interface IMarkdownConverter +{ + string Convert(IEnumerable tokens); +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/MarkdownConverter.cs b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs new file mode 100644 index 000000000..c8ba278a2 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs @@ -0,0 +1,37 @@ +using System.Text; +using Markdown.interfaces; + +namespace Markdown; + +public class MarkdownConverter : IMarkdownConverter +{ + private readonly List tagConverters = CreateTagConverters(); + + private static List CreateTagConverters() + { + return + [ + new HeaderTagConverter(), + new ItalicTagConverter(), + new StrongTagConverter() + ]; + } + + public string Convert(IEnumerable tokens) + { + IList convertedTokens = tokens.ToArray(); + var result = new StringBuilder(); + + foreach (var tagConverter in tagConverters) + { + convertedTokens = tagConverter.ConvertToHtml(convertedTokens); + } + + foreach (var text in convertedTokens.Select(token => token.Text)) + { + result.Append(text); + } + + return result.ToString(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..e407cdc01 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,11 @@ + + + + Exe + net8.0 + enable + enable + Linux + + + diff --git a/cs/Markdown/Parser/Interface/IMarkdownParser.cs b/cs/Markdown/Parser/Interface/IMarkdownParser.cs new file mode 100644 index 000000000..c3fe4d1fc --- /dev/null +++ b/cs/Markdown/Parser/Interface/IMarkdownParser.cs @@ -0,0 +1,6 @@ +namespace Markdown.Parser.Interface; + +public interface IMarkdownParser +{ + IEnumerable Parse(string markdownText); +} diff --git a/cs/Markdown/Parser/MarkdownParser.cs b/cs/Markdown/Parser/MarkdownParser.cs new file mode 100644 index 000000000..c66f016a3 --- /dev/null +++ b/cs/Markdown/Parser/MarkdownParser.cs @@ -0,0 +1,11 @@ +using Markdown.Parser.Interface; + +namespace Markdown.Parser; + +public class MarkdownParser : IMarkdownParser +{ + public IEnumerable Parse(string markdownText) + { + throw new NotImplementedException(); + } +} diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..5f282702b --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/cs/Markdown/Token/Token.cs b/cs/Markdown/Token/Token.cs new file mode 100644 index 000000000..c033962d5 --- /dev/null +++ b/cs/Markdown/Token/Token.cs @@ -0,0 +1,8 @@ +namespace Markdown; + +public class Token +{ + public string Text { get; set; } + public TokenType Type { get; set; } + public int Position { get; set; } +} diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs new file mode 100644 index 000000000..d9a5eb277 --- /dev/null +++ b/cs/Markdown/Token/TokenType.cs @@ -0,0 +1,8 @@ +namespace Markdown; + +public enum TokenType +{ + Header, + Italic, + Strong, +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..769a08bc8 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{964D393D-8B85-4F4F-A4A6-B96C876BF12C}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkDownTest", "MarkDownTest\MarkDownTest.csproj", "{0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +31,13 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Release|Any CPU.Build.0 = Release|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal From 3dd1c8f06432ce6f5e3b6e68c2b7b6d3788abe42 Mon Sep 17 00:00:00 2001 From: Asrom11 Date: Mon, 2 Dec 2024 18:13:02 +0500 Subject: [PATCH 2/4] complete md task --- MarkdownSpec.md | 12 +- cs/.dockerignore | 25 ++ cs/MarkDownTest/HeaderConverterTests.cs | 45 --- cs/MarkDownTest/ItalicConverterTests.cs | 45 --- cs/MarkDownTest/MarkDownConverterTest.cs | 191 ++++++++++++ cs/MarkDownTest/MarkdownConverterTests.cs | 44 --- cs/MarkDownTest/MarkdownParserTests.cs | 276 ++++++++++++++---- cs/MarkDownTest/StrongConverterTests.cs | 45 --- .../HtmlConverter/HeaderTagConverter.cs | 9 - .../Interface/IHtmlTagConverter.cs | 6 - .../HtmlConverter/ItalicTagConverter.cs | 9 - .../HtmlConverter/StrongTagConverter.cs | 9 - .../MarkDownConverter/MarkdownConverter.cs | 116 ++++++-- cs/Markdown/Parser/Interface/ITokenHandler.cs | 6 + cs/Markdown/Parser/MarkdownParser.cs | 62 +++- .../TokenHandler/Handlers/BaseHandler.cs | 21 ++ .../Parser/TokenHandler/Handlers/Delimiter.cs | 15 + .../TokenHandler/Handlers/HeaderHandler.cs | 41 +++ .../TokenHandler/Handlers/LinkHandler.cs | 35 +++ .../TokenHandler/Handlers/PairedTagHandler.cs | 53 ++++ .../Handlers/TokenHandlerFactory.cs | 24 ++ cs/Markdown/Program.cs | 6 +- cs/Markdown/Token/ParsingContext.cs | 17 ++ cs/Markdown/Token/Token.cs | 39 ++- cs/Markdown/Token/TokenType.cs | 12 +- cs/TestMaKR/DefaultTagSets.cs | 26 ++ cs/TestMaKR/Dockerfile | 21 ++ cs/TestMaKR/Md.cs | 31 ++ cs/TestMaKR/Parser/IMdParser.cs | 14 + cs/TestMaKR/Parser/MdParser.cs | 118 ++++++++ cs/TestMaKR/Parser/ParserContext.cs | 19 ++ cs/TestMaKR/Program.cs | 5 + cs/TestMaKR/Renderer/HtmlRenderer.cs | 72 +++++ cs/TestMaKR/Renderer/HtmlTag.cs | 16 + cs/TestMaKR/Renderer/IRenderer.cs | 10 + cs/TestMaKR/Renderer/TagInsertion.cs | 14 + cs/TestMaKR/TestMaKR.csproj | 17 ++ cs/TestMaKR/Tokens/BoldToken.cs | 43 +++ cs/TestMaKR/Tokens/HeaderToken.cs | 38 +++ cs/TestMaKR/Tokens/ImageToken.cs | 80 +++++ cs/TestMaKR/Tokens/ItalicToken.cs | 37 +++ cs/TestMaKR/Tokens/ScreeningToken.cs | 28 ++ cs/TestMaKR/Tokens/Token.cs | 75 +++++ cs/TestMaKR/Tokens/TokenExtensions.cs | 63 ++++ 44 files changed, 1583 insertions(+), 307 deletions(-) create mode 100644 cs/.dockerignore delete mode 100644 cs/MarkDownTest/HeaderConverterTests.cs delete mode 100644 cs/MarkDownTest/ItalicConverterTests.cs create mode 100644 cs/MarkDownTest/MarkDownConverterTest.cs delete mode 100644 cs/MarkDownTest/MarkdownConverterTests.cs delete mode 100644 cs/MarkDownTest/StrongConverterTests.cs delete mode 100644 cs/Markdown/HtmlConverter/HeaderTagConverter.cs delete mode 100644 cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs delete mode 100644 cs/Markdown/HtmlConverter/ItalicTagConverter.cs delete mode 100644 cs/Markdown/HtmlConverter/StrongTagConverter.cs create mode 100644 cs/Markdown/Parser/Interface/ITokenHandler.cs create mode 100644 cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs create mode 100644 cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs create mode 100644 cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs create mode 100644 cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs create mode 100644 cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs create mode 100644 cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs create mode 100644 cs/Markdown/Token/ParsingContext.cs create mode 100644 cs/TestMaKR/DefaultTagSets.cs create mode 100644 cs/TestMaKR/Dockerfile create mode 100644 cs/TestMaKR/Md.cs create mode 100644 cs/TestMaKR/Parser/IMdParser.cs create mode 100644 cs/TestMaKR/Parser/MdParser.cs create mode 100644 cs/TestMaKR/Parser/ParserContext.cs create mode 100644 cs/TestMaKR/Program.cs create mode 100644 cs/TestMaKR/Renderer/HtmlRenderer.cs create mode 100644 cs/TestMaKR/Renderer/HtmlTag.cs create mode 100644 cs/TestMaKR/Renderer/IRenderer.cs create mode 100644 cs/TestMaKR/Renderer/TagInsertion.cs create mode 100644 cs/TestMaKR/TestMaKR.csproj create mode 100644 cs/TestMaKR/Tokens/BoldToken.cs create mode 100644 cs/TestMaKR/Tokens/HeaderToken.cs create mode 100644 cs/TestMaKR/Tokens/ImageToken.cs create mode 100644 cs/TestMaKR/Tokens/ItalicToken.cs create mode 100644 cs/TestMaKR/Tokens/ScreeningToken.cs create mode 100644 cs/TestMaKR/Tokens/Token.cs create mode 100644 cs/TestMaKR/Tokens/TokenExtensions.cs diff --git a/MarkdownSpec.md b/MarkdownSpec.md index 886e99c95..adeb07b65 100644 --- a/MarkdownSpec.md +++ b/MarkdownSpec.md @@ -70,4 +70,14 @@ __Непарные_ символы в рамках одного абзаца н превратится в: -\

Заголовок \с \разными\ символами\\

\ No newline at end of file +\

Заголовок \с \разными\ символами\\

+ + + +# Ссылка + +Текст, оформленный в виде [текст ссылки](URL), должен превращаться в HTML-тег вот так: +[пример ссылки](https://example.com) + +превратится в: +пример ссылки \ No newline at end of file diff --git a/cs/.dockerignore b/cs/.dockerignore new file mode 100644 index 000000000..cd967fc3a --- /dev/null +++ b/cs/.dockerignore @@ -0,0 +1,25 @@ +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/.idea +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/azds.yaml +**/bin +**/charts +**/docker-compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md \ No newline at end of file diff --git a/cs/MarkDownTest/HeaderConverterTests.cs b/cs/MarkDownTest/HeaderConverterTests.cs deleted file mode 100644 index 81274ad59..000000000 --- a/cs/MarkDownTest/HeaderConverterTests.cs +++ /dev/null @@ -1,45 +0,0 @@ -using FluentAssertions; -using Markdown; -using Markdown.interfaces; -using NUnit.Framework; - -namespace MarkDownTest; - -public class HeaderConverterTests -{ - private HeaderTagConverter _converter; - - [SetUp] - public void Setup() - { - _converter = new HeaderTagConverter(); - } - - [Test] - public void ConvertToHtml_SimpleHeader_ReturnsHeaderWithH1Tags() - { - var tokens = new List - { - new() { Text = "# Header", Type = TokenType.Header } - }; - - var result = _converter.ConvertToHtml(tokens.ToList()); - - result.Single().Text.Should().Be("

Header

", - "Header should be wrapped in h1 tags"); - } - - [Test] - public void ConvertToHtml_NoHeader_ReturnsUnmodifiedTokens() - { - var tokens = new List - { - new() { Text = "Regular text", Type = TokenType.Italic } - }; - - var result = _converter.ConvertToHtml(tokens.ToList()); - - result.Should().BeEquivalentTo(tokens, - "Non-header tokens should remain unchanged"); - } -} \ No newline at end of file diff --git a/cs/MarkDownTest/ItalicConverterTests.cs b/cs/MarkDownTest/ItalicConverterTests.cs deleted file mode 100644 index c4d302fa4..000000000 --- a/cs/MarkDownTest/ItalicConverterTests.cs +++ /dev/null @@ -1,45 +0,0 @@ -using FluentAssertions; -using Markdown; -using Markdown.interfaces; -using NUnit.Framework; - -namespace MarkDownTest; - -public class ItalicConverterTests -{ - private ItalicTagConverter _converter; - - [SetUp] - public void Setup() - { - _converter = new ItalicTagConverter(); - } - - [Test] - public void ConvertToHtml_SimpleItalic_ReturnsTextWithEmTags() - { - var tokens = new List - { - new Token { Text = "_italic_", Type = TokenType.Italic } - }; - - var result = _converter.ConvertToHtml(tokens.ToList()); - - result.Single().Text.Should().Be("italic", - "Italic text should be wrapped in em tags"); - } - - [Test] - public void ConvertToHtml_NoItalic_ReturnsUnmodifiedTokens() - { - var tokens = new List - { - new Token { Text = "Regular text", Type = TokenType.Strong } - }; - - var result = _converter.ConvertToHtml(tokens.ToList()); - - result.Should().BeEquivalentTo(tokens, - "Non-italic tokens should remain unchanged"); - } -} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkDownConverterTest.cs b/cs/MarkDownTest/MarkDownConverterTest.cs new file mode 100644 index 000000000..c486b996e --- /dev/null +++ b/cs/MarkDownTest/MarkDownConverterTest.cs @@ -0,0 +1,191 @@ +using FluentAssertions; +using Markdown; +using Markdown.interfaces; +using NUnit.Framework; + +namespace MarkDownTest; + +public class MarkdownConverterTests +{ + private IMarkdownConverter _converter; + + [SetUp] + public void Setup() + { + _converter = new MarkdownConverter(); + } + + [Test] + public void Convert_TextOnly_ShouldReturnPlainText() + { + var tokens = new List + { + Token.CreateText("Hello, World!", 0) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("Hello, World!"); + } + + [Test] + public void Convert_StrongText_ShouldReturnStrongHtml() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold", 2), + Token.CreateStrong(false, 6) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("bold"); + } + + [Test] + public void Convert_ItalicText_ShouldReturnItalicHtml() + { + var tokens = new List + { + Token.CreateItalic(true, 0), + Token.CreateText("italic", 1), + Token.CreateItalic(false, 7) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("italic"); + } + + [Test] + public void Convert_StrongAndItalicNested_ShouldReturnNestedHtml() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateItalic(false, 14), + Token.CreateText(" bold", 15), + Token.CreateStrong(false, 20) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("bold italic bold"); + } + + [Test] + public void Convert_Header_ShouldReturnHeaderHtml() + { + var tokens = new List + { + Token.CreateHeader(2, 0), + Token.CreateText("Header Text", 2), + new Token("", TokenType.Header, TagState.Close, 13, 2) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("

Header Text

"); + } + + [Test] + public void Convert_HeaderWithFormatting_ShouldReturnFormattedHeaderHtml() + { + var tokens = new List + { + Token.CreateHeader(1, 0), + Token.CreateText("Welcome to ", 1), + Token.CreateItalic(true, 12), + Token.CreateText("Markdown", 13), + Token.CreateItalic(false, 21), + new Token("", TokenType.Header, TagState.Close, 22, 1) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("

Welcome to Markdown

"); + } + + [Test] + public void Convert_IncorrectClosingTag_ShouldAddAsText() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateStrong(false, 14), + Token.CreateText(" text_", 16) + }; + + var result = _converter.Convert(tokens); + + + result.Should().Be("bold italic__ text_"); + } + + [Test] + public void Convert_UnmatchedClosingTag_ShouldAddAsText() + { + var tokens = new List + { + Token.CreateItalic(false, 0), + Token.CreateText("text_", 1) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("_text_"); + } + + [Test] + public void Convert_UnclosedTags_ShouldCloseRemainingTags() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold", 2) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("bold"); + } + + [Test] + public void Convert_Link_ShouldReturnLinkHtml() + { + var tokens = new List + { + Token.CreateLink("пример ссылки", "https://example.com", 0) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("пример ссылки"); + } + + + [Test] + public void Convert_StrongTextWithLink_ShouldReturnCorrectHtml() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("Посетите ", 2), + Token.CreateLink("сайт", "https://example.com", 11), + Token.CreateStrong(false, 31) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("Посетите сайт"); + } + + +} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkdownConverterTests.cs b/cs/MarkDownTest/MarkdownConverterTests.cs deleted file mode 100644 index c0be4fe16..000000000 --- a/cs/MarkDownTest/MarkdownConverterTests.cs +++ /dev/null @@ -1,44 +0,0 @@ -using FluentAssertions; -using Markdown; -using NUnit.Framework; - -namespace MarkDownTest; - -public class MarkdownConverterTests -{ - private MarkdownConverter _converter; - - [SetUp] - public void Setup() - { - _converter = new MarkdownConverter(); - } - - [Test] - public void Convert_ComplexMarkdown_ReturnsCorrectlyFormattedHtml() - { - var tokens = new List - { - new() { Text = "# ", Type = TokenType.Header }, - new() { Text = "Header with ", Type = TokenType.Header }, - new() { Text = "_italic_", Type = TokenType.Italic }, - new() { Text = " and ", Type = TokenType.Header }, - new() { Text = "__bold__", Type = TokenType.Strong } - }; - - var result = _converter.Convert(tokens); - - result.Should().Be("

Header with italic and bold

", - "Complex markdown should be converted with proper nesting"); - } - - [Test] - public void Convert_EmptyTokenList_ReturnsEmptyString() - { - var tokens = new List(); - - var result = _converter.Convert(tokens); - - result.Should().BeEmpty("Empty token list should result in empty string"); - } -} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkdownParserTests.cs b/cs/MarkDownTest/MarkdownParserTests.cs index 31e4a0498..9499d4350 100644 --- a/cs/MarkDownTest/MarkdownParserTests.cs +++ b/cs/MarkDownTest/MarkdownParserTests.cs @@ -1,13 +1,14 @@ using FluentAssertions; using Markdown; using Markdown.Parser; +using Markdown.Parser.Interface; using NUnit.Framework; namespace MarkDownTest; -public class MarkdownParserTests +public class MarkdownParserTests { - private MarkdownParser _parser; + private IMarkdownParser _parser; [SetUp] public void Setup() @@ -16,96 +17,251 @@ public void Setup() } [Test] - public void Parse_EmptyString_ReturnsEmptyTokenList() + public void Parse_PlainText_ReturnsTextToken() { - const string input = ""; + var input = "Simple text"; + var expectedTokens = new[] + { + Token.CreateText("Simple text", 0) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); + } - var tokens = _parser.Parse(input).ToList(); + [Test] + public void Parse_TextWithSymbols_ReturnsTextToken() + { + var input = "Text with symbols !@#$%^&*()"; + var expectedTokens = new[] + { + Token.CreateText("Text with symbols !@#$%^&*()", 0) + }; - tokens.Should().BeEmpty("Empty input should result in empty token list"); + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); } - [TestCase("# Header", TokenType.Header, "Header")] - [TestCase("_italic_", TokenType.Italic, "italic")] - [TestCase("__strong__", TokenType.Strong, "strong")] - public void Parse_SingleElement_ReturnsCorrectToken(string input, TokenType expectedType, string expectedText) + [TestCase("# Header", 1)] + [TestCase("## Header", 2)] + [TestCase("###### Header", 6)] + public void Parse_Header_ReturnsHeaderAndTextTokens(string input, int expectedLevel) { - var tokens = _parser.Parse(input).ToList(); - - tokens.Should().ContainSingle() - .Which.Should().Match(t => - t.Type == expectedType && - t.Text == expectedText); + var expectedTokens = new[] + { + Token.CreateHeader(expectedLevel, 0), + Token.CreateText("Header", expectedLevel + 1) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); } - [Test] - public void Parse_ComplexMarkdown_ReturnsCorrectTokenSequence() + [TestCase("#Header")] + [TestCase("####### Header")] + [TestCase("Text# Header")] + public void Parse_InvalidHeader_ReturnsSingleTextToken(string input) { - const string input = "# Header with __strong__ and _italic_"; var expectedTokens = new[] - { - new Token { Type = TokenType.Header, Text = "Header with " }, - new Token { Type = TokenType.Strong, Text = "strong" }, - new Token { Type = TokenType.Header, Text = " and " }, - new Token { Type = TokenType.Italic, Text = "italic" } + { + Token.CreateText(input, 0) }; - - var tokens = _parser.Parse(input).ToList(); - + + var tokens = _parser.Parse(input); + tokens.Should().BeEquivalentTo(expectedTokens, options => options.WithStrictOrdering()); } [Test] - public void Parse_UnderscoresInMiddleOfWord_NotConsideredAsMarkup() + public void Parse_StrongEmphasis_ReturnsCorrectTokens() { - const string input = "some_word_with_underscores"; + var input = "__bold__"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold", 2), + Token.CreateStrong(false, 6) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); + } - var tokens = _parser.Parse(input).ToList(); - - tokens.Should().ContainSingle() - .Which.Text.Should().Be("some_word_with_underscores", - "Underscores in middle of word should not be treated as markup"); + [Test] + public void Parse_ItalicEmphasis_ReturnsCorrectTokens() + { + var input = "_italic_"; + var expectedTokens = new[] + { + Token.CreateItalic(true, 0), + Token.CreateText("italic", 1), + Token.CreateItalic(false, 7) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); } [Test] - public void Parse_EscapedUnderscores_NotConsideredAsMarkup() + public void Parse_NestedEmphasis_ReturnsCorrectTokens() { - const string input = "Text with \\_escaped\\_ underscores"; - - var tokens = _parser.Parse(input).ToList(); - - tokens.Should().ContainSingle() - .Which.Text.Should().Be("Text with _escaped_ underscores", - "Escaped underscores should not be treated as markup"); + var input = "__bold _italic_ text__"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateItalic(false, 14), + Token.CreateText(" text", 15), + Token.CreateStrong(false, 20) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); } [Test] - public void Parse_HeaderWithoutSpace_NotConsideredAsHeader() + public void Parse_NestedTags_ClosesInCorrectOrder() { - const string input = "#Not a header"; - - var tokens = _parser.Parse(input).ToList(); - - tokens.Should().ContainSingle() - .Which.Text.Should().Be("#Not a header", - "# without space should not be treated as header"); + var input = "__bold _italic__ text_"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateStrong(false, 14), + Token.CreateText(" text", 16), + Token.CreateItalic(false, 21) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); + } + + [Test] + public void Parse_ComplexMarkdown_ReturnsCorrectTokenSequence() + { + var input = "# Header\n__bold _italic_ text__"; + var expectedTokens = new[] + { + Token.CreateHeader(1, 0), + Token.CreateText("Header\n", 2), + Token.CreateStrong(true, 9), + Token.CreateText("bold ", 11), + Token.CreateItalic(true, 16), + Token.CreateText("italic", 17), + Token.CreateItalic(false, 23), + Token.CreateText(" text", 24), + Token.CreateStrong(false, 29) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); + } + + [Test] + public void Parse_NestedTags_HandlesNestedStrongAndItalic() + { + var input = "__bold _italic_ bold__"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateItalic(false, 14), + Token.CreateText(" bold", 15), + Token.CreateStrong(false, 20), + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); + } + + [TestCase("")] + [TestCase(" ")] + [TestCase("\n")] + public void Parse_MinimalInput_ReturnsTextToken(string input) + { + var expectedTokens = input.Length == 0 + ? Array.Empty() + : new[] { Token.CreateText(input, 0) }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, + options => options.WithStrictOrdering()); } [Test] - public void Parse_MultipleLines_HandledCorrectly() + public void Parse_MixedContent_PreservesWhitespaceInTextTokens() { - const string input = "# Header\n_italic_\n__strong__"; - var expectedTokens = new Token[] - { - new() { Type = TokenType.Header, Text = "Header" }, - new() { Type = TokenType.Italic, Text = "italic" }, - new() { Type = TokenType.Strong, Text = "strong" } + var input = "Text __with spaces__ here"; + var expectedTokens = new[] + { + Token.CreateText("Text ", 0), + Token.CreateStrong(true, 6), + Token.CreateText("with spaces", 8), + Token.CreateStrong(false, 20), + Token.CreateText(" here", 22) }; - - var tokens = _parser.Parse(input).ToList(); - + + var tokens = _parser.Parse(input); + tokens.Should().BeEquivalentTo(expectedTokens, options => options.WithStrictOrdering()); } + + [Test] + public void Parse_Link_ReturnsLinkToken() + { + var input = "[пример ссылки](https://example.com)"; + var expectedTokens = new[] + { + Token.CreateLink("пример ссылки", "https://example.com", 0) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, options => options.WithStrictOrdering()); + } + + + [Test] + public void Parse_StrongTextWithLink_ReturnsCorrectTokens() + { + var input = "__Посетите [сайт](https://example.com)__"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("Посетите ", 2), + Token.CreateLink("сайт", "https://example.com", 11), + Token.CreateStrong(false, 38) + }; + + var tokens = _parser.Parse(input); + + tokens.Should().BeEquivalentTo(expectedTokens, options => options.WithStrictOrdering()); + } } \ No newline at end of file diff --git a/cs/MarkDownTest/StrongConverterTests.cs b/cs/MarkDownTest/StrongConverterTests.cs deleted file mode 100644 index e188fefd5..000000000 --- a/cs/MarkDownTest/StrongConverterTests.cs +++ /dev/null @@ -1,45 +0,0 @@ -using FluentAssertions; -using Markdown; -using Markdown.interfaces; -using NUnit.Framework; - -namespace MarkDownTest; - -public class StrongConverterTests -{ - private StrongTagConverter _converter; - - [SetUp] - public void Setup() - { - _converter = new StrongTagConverter(); - } - - [Test] - public void ConvertToHtml_SimpleStrong_ReturnsTextWithStrongTags() - { - var tokens = new List - { - new Token { Text = "__strong__", Type = TokenType.Strong } - }; - - var result = _converter.ConvertToHtml(tokens.ToList()); - - result.Single().Text.Should().Be("strong", - "Strong text should be wrapped in strong tags"); - } - - [Test] - public void ConvertToHtml_NoStrong_ReturnsUnmodifiedTokens() - { - var tokens = new List - { - new Token { Text = "Regular text", Type = TokenType.Header } - }; - - var result = _converter.ConvertToHtml(tokens.ToList()); - - result.Should().BeEquivalentTo(tokens, - "Non-strong tokens should remain unchanged"); - } -} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/HeaderTagConverter.cs b/cs/Markdown/HtmlConverter/HeaderTagConverter.cs deleted file mode 100644 index c8c31713e..000000000 --- a/cs/Markdown/HtmlConverter/HeaderTagConverter.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.interfaces; - -public class HeaderTagConverter : IHtmlTagConverter -{ - public IList ConvertToHtml(IList tokens) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs b/cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs deleted file mode 100644 index c7afdc28e..000000000 --- a/cs/Markdown/HtmlConverter/Interface/IHtmlTagConverter.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.interfaces; - -public interface IHtmlTagConverter -{ - IList ConvertToHtml(IList tokens); -} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/ItalicTagConverter.cs b/cs/Markdown/HtmlConverter/ItalicTagConverter.cs deleted file mode 100644 index 338efe233..000000000 --- a/cs/Markdown/HtmlConverter/ItalicTagConverter.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.interfaces; - -public class ItalicTagConverter : IHtmlTagConverter -{ - public IList ConvertToHtml(IList tokens) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/HtmlConverter/StrongTagConverter.cs b/cs/Markdown/HtmlConverter/StrongTagConverter.cs deleted file mode 100644 index 2999aeb5c..000000000 --- a/cs/Markdown/HtmlConverter/StrongTagConverter.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.interfaces; - -public class StrongTagConverter : IHtmlTagConverter -{ - public IList ConvertToHtml(IList tokens) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/MarkdownConverter.cs b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs index c8ba278a2..ead6f550c 100644 --- a/cs/Markdown/MarkDownConverter/MarkdownConverter.cs +++ b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs @@ -1,37 +1,109 @@ using System.Text; using Markdown.interfaces; -namespace Markdown; - -public class MarkdownConverter : IMarkdownConverter +namespace Markdown { - private readonly List tagConverters = CreateTagConverters(); - - private static List CreateTagConverters() + public class MarkdownConverter : IMarkdownConverter { - return - [ - new HeaderTagConverter(), - new ItalicTagConverter(), - new StrongTagConverter() - ]; - } + public string Convert(IEnumerable tokens) + { + var result = new StringBuilder(); + var tagStack = new Stack(); - public string Convert(IEnumerable tokens) - { - IList convertedTokens = tokens.ToArray(); - var result = new StringBuilder(); + foreach (var token in tokens) + { + switch (token.Type) + { + case TokenType.Text: + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + break; - foreach (var tagConverter in tagConverters) + case TokenType.Strong: + case TokenType.Italic: + HandleFormattingTag(token, tagStack, result); + break; + + case TokenType.Header: + HandleHeaderTag(token, tagStack, result); + break; + case TokenType.Link: + HandleLinkToken(token, result); + break; + } + } + + while (tagStack.Count > 0) + { + var openTag = tagStack.Pop(); + result.Append(GetClosingTag(openTag)); + } + + return result.ToString(); + } + + private void HandleLinkToken(Token token, StringBuilder result) { - convertedTokens = tagConverter.ConvertToHtml(convertedTokens); + result.Append($"{token.Text}"); } + + private void HandleFormattingTag(Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append(GetOpeningTag(token.Type)); + tagStack.Push(token.Type); + } + else + { + if (tagStack.Count > 0 && tagStack.Peek() == token.Type) + { + result.Append(GetClosingTag(token.Type)); + tagStack.Pop(); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + } + + private void HandleHeaderTag(Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append($""); + tagStack.Push(TokenType.Header); + return; + } + + if (tagStack.Count > 0 && tagStack.Peek() == TokenType.Header) + { + result.Append($""); + tagStack.Pop(); + return; + } - foreach (var text in convertedTokens.Select(token => token.Text)) + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + + private string GetOpeningTag(TokenType type) { - result.Append(text); + return type switch + { + TokenType.Strong => "", + TokenType.Italic => "", + _ => string.Empty + }; } - return result.ToString(); + private string GetClosingTag(TokenType type) + { + return type switch + { + TokenType.Strong => "", + TokenType.Italic => "", + _ => string.Empty + }; + } } } \ No newline at end of file diff --git a/cs/Markdown/Parser/Interface/ITokenHandler.cs b/cs/Markdown/Parser/Interface/ITokenHandler.cs new file mode 100644 index 000000000..7db772ba0 --- /dev/null +++ b/cs/Markdown/Parser/Interface/ITokenHandler.cs @@ -0,0 +1,6 @@ +namespace Markdown.Parser.TokenHandler; + +public interface ITokenHandler +{ + bool TryHandle(ParsingContext context, out Token token, out int skip); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/MarkdownParser.cs b/cs/Markdown/Parser/MarkdownParser.cs index c66f016a3..ba33df335 100644 --- a/cs/Markdown/Parser/MarkdownParser.cs +++ b/cs/Markdown/Parser/MarkdownParser.cs @@ -1,11 +1,65 @@ -using Markdown.Parser.Interface; +using System.Text; +using Markdown.Parser.Interface; +using Markdown.Parser.TokenHandler; +using Markdown.Parser.TokenHandler.Handlers; namespace Markdown.Parser; public class MarkdownParser : IMarkdownParser { - public IEnumerable Parse(string markdownText) + private readonly IList handlers; + + public MarkdownParser() { - throw new NotImplementedException(); + handlers = TokenHandlerFactory.CreateHandlers(); + } + + public IEnumerable Parse(string text) + { + var tokens = new List(); + var openTags = new Stack(); + var textBuffer = new StringBuilder(); + var textStart = 0; + var position = 0; + + while (position < text.Length) + { + var context = new ParsingContext(text, position, openTags); + var handled = false; + + foreach (var handler in handlers) + { + if (!handler.TryHandle(context, out var token, out var skip)) + { + continue; + } + + if (textBuffer.Length > 0) + { + tokens.Add(Token.CreateText(textBuffer.ToString(), textStart)); + textBuffer.Clear(); + } + + tokens.Add(token); + position += skip; + handled = true; + break; + } + + if (handled) + { + continue; + } + + if (textBuffer.Length == 0) + textStart = position; + textBuffer.Append(text[position]); + position++; + } + + if (textBuffer.Length > 0) + tokens.Add(Token.CreateText(textBuffer.ToString(), textStart)); + + return tokens; } -} +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs new file mode 100644 index 000000000..a5f114ad5 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs @@ -0,0 +1,21 @@ +namespace Markdown.Parser.TokenHandler; + +public abstract class BaseTokenHandler : ITokenHandler +{ + protected readonly Delimiter Delimiter; + + public BaseTokenHandler(Delimiter delimiter) + { + Delimiter = delimiter; + } + + public abstract bool TryHandle(ParsingContext context, out Token token, out int skip); + + protected bool IsMatch(string text, int position, string pattern) + { + if (position + pattern.Length > text.Length) + return false; + + return text.Substring(position, pattern.Length) == pattern; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs b/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs new file mode 100644 index 000000000..580dd48d9 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs @@ -0,0 +1,15 @@ +namespace Markdown.Parser.TokenHandler; + +public class Delimiter +{ + public string Opening { get; } + public string Closing { get; } + public TokenType Type { get; } + + public Delimiter(string opening, string closing, TokenType type) + { + Opening = opening; + Closing = closing; + Type = type; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs new file mode 100644 index 000000000..5d13d0a05 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs @@ -0,0 +1,41 @@ +namespace Markdown.Parser.TokenHandler.Handlers; + +public class HeaderHandler : BaseTokenHandler +{ + public HeaderHandler() : base(new Delimiter("#", "", TokenType.Header)) + { + } + + public override bool TryHandle(ParsingContext context, out Token token, out int skip) + { + token = null; + skip = 0; + + if (!context.IsStartOfLine || !IsMatch(context.Text, context.Position, Delimiter.Opening)) + return false; + + var level = 1; + var pos = context.Position + 1; + + while (pos < context.Text.Length && context.Text[pos] == '#' && level < 6) + { + level++; + pos++; + } + + if (pos >= context.Text.Length || context.Text[pos] != ' ') + return false; + + context.OpenTags.Push(Delimiter.Type); + + token = new Token( + new string('#', level), + TokenType.Header, + TagState.Open, + context.Position, + level); + + skip = level + 1; + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs new file mode 100644 index 000000000..a9799467e --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs @@ -0,0 +1,35 @@ +namespace Markdown.Parser.TokenHandler.Handlers; + +public class LinkHandler : BaseTokenHandler +{ + public LinkHandler() : base(new Delimiter("[", "]", TokenType.Link)) + { + } + + public override bool TryHandle(ParsingContext context, out Token token, out int skip) + { + token = null; + skip = 0; + var position = context.Position; + var text = context.Text; + + if (text[position] != '[') + return false; + + var closingBracketIndex = text.IndexOf(']', position); + if (closingBracketIndex == -1 || closingBracketIndex + 1 >= text.Length || text[closingBracketIndex + 1] != '(') + return false; + + var closingParenIndex = text.IndexOf(')', closingBracketIndex + 1); + if (closingParenIndex == -1) + return false; + + var linkText = text.Substring(position + 1, closingBracketIndex - position - 1); + var url = text.Substring(closingBracketIndex + 2, closingParenIndex - closingBracketIndex - 2); + + token = new Token(linkText, TokenType.Link, TagState.Open, position, url: url); + skip = closingParenIndex - position + 1; + + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs new file mode 100644 index 000000000..87746cc69 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs @@ -0,0 +1,53 @@ +namespace Markdown.Parser.TokenHandler.Handlers; + +public class PairedTagHandler : BaseTokenHandler +{ + public PairedTagHandler(Delimiter delimiter) : base(delimiter) + { + } + + public override bool TryHandle(ParsingContext context, out Token token, out int skip) + { + token = null; + skip = 0; + + if (!IsMatch(context.Text, context.Position, Delimiter.Opening)) + return false; + + var isClosing = false; + var tempStack = new Stack(); + + while (context.OpenTags.Count > 0) + { + var openTag = context.OpenTags.Pop(); + tempStack.Push(openTag); + + if (openTag != Delimiter.Type) + { + continue; + } + + isClosing = true; + tempStack.Pop(); + break; + } + + while (tempStack.Count > 0) + { + context.OpenTags.Push(tempStack.Pop()); + } + + if (!isClosing) + { + context.OpenTags.Push(Delimiter.Type); + } + + token = new Token( + Delimiter.Opening, + Delimiter.Type, + isClosing ? TagState.Close : TagState.Open, + context.Position); + skip = Delimiter.Opening.Length; + return true; + } +} diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs b/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs new file mode 100644 index 000000000..f54282bdc --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs @@ -0,0 +1,24 @@ +using Markdown.Parser.TokenHandler.Handlers; +using static Markdown.Parser.TokenHandler.Delimiter; + +namespace Markdown.Parser.TokenHandler; + +public static class TokenHandlerFactory +{ + private static readonly Dictionary Delimiters = new() + { + { TokenType.Strong, new Delimiter("__", "__", TokenType.Strong) }, + { TokenType.Italic, new Delimiter("_", "_", TokenType.Italic) } + }; + + public static IList CreateHandlers() + { + return new List + { + new LinkHandler(), + new PairedTagHandler(Delimiters[TokenType.Strong]), + new PairedTagHandler(Delimiters[TokenType.Italic]), + new HeaderHandler() + }; + } +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs index 5f282702b..388fa99ec 100644 --- a/cs/Markdown/Program.cs +++ b/cs/Markdown/Program.cs @@ -1 +1,5 @@ - \ No newline at end of file +using Markdown; + +var md = new MarkDown(); +var result = md.Render("__bold _italic_ text__"); +Console.WriteLine(result); \ No newline at end of file diff --git a/cs/Markdown/Token/ParsingContext.cs b/cs/Markdown/Token/ParsingContext.cs new file mode 100644 index 000000000..f59c0b1b8 --- /dev/null +++ b/cs/Markdown/Token/ParsingContext.cs @@ -0,0 +1,17 @@ +namespace Markdown.Parser.TokenHandler; + +public class ParsingContext +{ + public string Text { get; } + public int Position { get; } + public Stack OpenTags { get; } + + public ParsingContext(string text, int position, Stack? openTags) + { + Text = text; + Position = position; + OpenTags = openTags ?? new Stack(); + } + + public bool IsStartOfLine => Position == 0 || (Position > 0 && Text[Position - 1] == '\n'); +} \ No newline at end of file diff --git a/cs/Markdown/Token/Token.cs b/cs/Markdown/Token/Token.cs index c033962d5..b82d67d0a 100644 --- a/cs/Markdown/Token/Token.cs +++ b/cs/Markdown/Token/Token.cs @@ -1,8 +1,37 @@ -namespace Markdown; +using Markdown; public class Token { - public string Text { get; set; } - public TokenType Type { get; set; } - public int Position { get; set; } -} + public string Text { get; } + public TokenType Type { get; } + public TagState State { get; } + public int Position { get; } + public int Level { get; } + public string Url { get; } + + public Token(string text, TokenType type, TagState state, int position, int level = 0, string url = null) + { + Text = text; + Type = type; + State = state; + Position = position; + Level = level; + Url = url; + } + + public static Token CreateText(string text, int position) + => new(text, TokenType.Text, TagState.Open, position); + + public static Token CreateStrong(bool isOpening, int position) + => new("__", TokenType.Strong, isOpening ? TagState.Open : TagState.Close, position); + + public static Token CreateItalic(bool isOpening, int position) + => new("_", TokenType.Italic, isOpening ? TagState.Open : TagState.Close, position); + + public static Token CreateHeader(int level, int position) + => new(new string('#', level), TokenType.Header, TagState.Open, position, level); + + public static Token CreateLink(string text, string url, int position) + => new(text, TokenType.Link, TagState.Open, position, url: url); + +} \ No newline at end of file diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs index d9a5eb277..2d49b5009 100644 --- a/cs/Markdown/Token/TokenType.cs +++ b/cs/Markdown/Token/TokenType.cs @@ -2,7 +2,15 @@ public enum TokenType { - Header, - Italic, + Text, Strong, + Italic, + Header, + Link +} + +public enum TagState +{ + Open, + Close } \ No newline at end of file diff --git a/cs/TestMaKR/DefaultTagSets.cs b/cs/TestMaKR/DefaultTagSets.cs new file mode 100644 index 000000000..f7d03358d --- /dev/null +++ b/cs/TestMaKR/DefaultTagSets.cs @@ -0,0 +1,26 @@ +using Markdown.Renderer; +using Markdown.Tokens; + +namespace Markdown +{ + public static class DefaultTagSets + { + public static readonly IReadOnlyDictionary HtmlTagsBySeparator = new Dictionary + { + { BoldToken.Separator, new HtmlTag("", "", true) }, + { ItalicToken.Separator, new HtmlTag("", "", true) }, + { HeaderToken.Separator, new HtmlTag("

", "

", true) }, + { ScreeningToken.Separator, new HtmlTag(string.Empty, string.Empty, false) }, + { ImageToken.Separator, new HtmlTag("", string.Empty, false) } + }; + + public static readonly IReadOnlyDictionary> TokensBySeparator = new Dictionary> + { + { ItalicToken.Separator, index => new ItalicToken(index) }, + { BoldToken.Separator, index => new BoldToken(index) }, + { HeaderToken.Separator, index => new HeaderToken(index) }, + { ScreeningToken.Separator, index => new ScreeningToken(index) }, + { ImageToken.Separator, index => new ImageToken(index) } + }; + } +} diff --git a/cs/TestMaKR/Dockerfile b/cs/TestMaKR/Dockerfile new file mode 100644 index 000000000..16448f820 --- /dev/null +++ b/cs/TestMaKR/Dockerfile @@ -0,0 +1,21 @@ +FROM mcr.microsoft.com/dotnet/runtime:8.0 AS base +USER $APP_UID +WORKDIR /app + +FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build +ARG BUILD_CONFIGURATION=Release +WORKDIR /src +COPY ["TestMaKR/TestMaKR.csproj", "TestMaKR/"] +RUN dotnet restore "TestMaKR/TestMaKR.csproj" +COPY . . +WORKDIR "/src/TestMaKR" +RUN dotnet build "TestMaKR.csproj" -c $BUILD_CONFIGURATION -o /app/build + +FROM build AS publish +ARG BUILD_CONFIGURATION=Release +RUN dotnet publish "TestMaKR.csproj" -c $BUILD_CONFIGURATION -o /app/publish /p:UseAppHost=false + +FROM base AS final +WORKDIR /app +COPY --from=publish /app/publish . +ENTRYPOINT ["dotnet", "TestMaKR.dll"] diff --git a/cs/TestMaKR/Md.cs b/cs/TestMaKR/Md.cs new file mode 100644 index 000000000..01b089d97 --- /dev/null +++ b/cs/TestMaKR/Md.cs @@ -0,0 +1,31 @@ +using Markdown.Parser; +using Markdown.Renderer; + +namespace Markdown +{ + public class Md + { + public IMdParser Parser { get; } + public IRenderer Renderer { get; } + + public Md() + { + Renderer = new HtmlRenderer(DefaultTagSets.HtmlTagsBySeparator); + Parser = new MdParser(DefaultTagSets.TokensBySeparator); + } + + public Md(IRenderer renderer, IMdParser parser) + { + Renderer = renderer; + Parser = parser; + } + + public string Render(string text) + { + var tokens = Parser.ParseTokens(text); + var result = Renderer.Render(tokens, text); + + return result; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Parser/IMdParser.cs b/cs/TestMaKR/Parser/IMdParser.cs new file mode 100644 index 000000000..ce7868c7f --- /dev/null +++ b/cs/TestMaKR/Parser/IMdParser.cs @@ -0,0 +1,14 @@ +using System.Collections.Generic; +using Markdown.Tokens; + +namespace Markdown.Parser +{ + public interface IMdParser + { + public IReadOnlyDictionary Tokens { get; } + public IReadOnlyList Result { get; } + public string TextToParse { get; } + public IEnumerable ParseTokens(string textToParse); + public void AddScreening(ScreeningToken token); + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Parser/MdParser.cs b/cs/TestMaKR/Parser/MdParser.cs new file mode 100644 index 000000000..eda3d6973 --- /dev/null +++ b/cs/TestMaKR/Parser/MdParser.cs @@ -0,0 +1,118 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Markdown.Tokens; + +namespace Markdown.Parser +{ + public class MdParser : IMdParser + { + private ParserContext ParserContext { get; set; } + + public readonly IReadOnlyDictionary> TokensBySeparator; + public string TextToParse => ParserContext.TextToParse; + public IReadOnlyDictionary Tokens => ParserContext.Tokens; + public IReadOnlyList Result => ParserContext.Result; + + public MdParser(IReadOnlyDictionary> tokensBySeparator) + { + TokensBySeparator = tokensBySeparator; + } + + public IEnumerable ParseTokens(string textToParse) + { + ParserContext = new ParserContext(textToParse); + var possibleTag = new StringBuilder(); + + for (var i = 0; i < TextToParse.Length; i++) + { + var symbol = TextToParse[i]; + + if (symbol == '\n') + { + ParserContext.Tokens.Clear(); + possibleTag.Clear(); + continue; + } + + if (TokensBySeparator.Keys.Any(x => x.StartsWith($"{possibleTag}{symbol}"))) + { + possibleTag.Append(symbol); + continue; + } + + var tag = possibleTag.ToString(); + + if (TokensBySeparator.ContainsKey(tag)) + ProcessToken(tag, i - tag.Length); + + possibleTag.Clear(); + + if (TokensBySeparator.Keys.Any(x => x.StartsWith(symbol))) + possibleTag.Append(symbol); + } + + if (possibleTag.Length > 0) + ProcessToken(possibleTag.ToString(), TextToParse.Length - possibleTag.Length); + + return ParserContext.Result; + } + + public void AddScreening(ScreeningToken token) + { + ParserContext.Tokens.Add(token.GetSeparator(), token); + } + + private void ProcessToken(string separator, int index) + { + if (ExecuteScreening(index)) + { + separator = separator[1..]; + index++; + + if (!(separator.Length > 0 && TokensBySeparator.ContainsKey(separator))) + return; + } + + if (ParserContext.Tokens.Remove(separator, out var token) && Token.IsCorrectTokenCloseIndex(index, TextToParse)) + { + token.Close(index); + + if (token.Validate(this)) + ParserContext.Result.Add(token); + + return; + } + + token = TokensBySeparator[separator].Invoke(index); + + if (token.IsNonPaired && token.Validate(this)) + ParserContext.Result.Add(token); + else if (Token.IsCorrectTokenOpenIndex(index, TextToParse, separator.Length)) + ParserContext.Tokens[separator] = token; + } + + private bool ExecuteScreening(int index) + { + if (!ParserContext.Tokens.TryGetValue(ScreeningToken.Separator, out var token)) + return false; + + if (token.CloseIndex > index - ScreeningToken.Separator.Length) + return true; + + if (token.CloseIndex != index - ScreeningToken.Separator.Length) + { + if (token.CloseIndex <= index) + ParserContext.Tokens.Remove(ScreeningToken.Separator); + + return false; + } + + if (token.OpenIndex == token.CloseIndex) + ParserContext.Result.Add(token); + + return true; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Parser/ParserContext.cs b/cs/TestMaKR/Parser/ParserContext.cs new file mode 100644 index 000000000..097b81e29 --- /dev/null +++ b/cs/TestMaKR/Parser/ParserContext.cs @@ -0,0 +1,19 @@ +using System.Collections.Generic; +using Markdown.Tokens; + +namespace Markdown.Parser +{ + public class ParserContext + { + public readonly List Result; + public readonly string TextToParse; + public readonly Dictionary Tokens; + + public ParserContext(string textToParse) + { + Result = new List(); + TextToParse = textToParse; + Tokens = new Dictionary(); + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Program.cs b/cs/TestMaKR/Program.cs new file mode 100644 index 000000000..2b64fa1fe --- /dev/null +++ b/cs/TestMaKR/Program.cs @@ -0,0 +1,5 @@ +using Markdown; + +var md = new Md(); +var result = md.Render("__bold _italic__ text__"); +Console.WriteLine(result); \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/HtmlRenderer.cs b/cs/TestMaKR/Renderer/HtmlRenderer.cs new file mode 100644 index 000000000..612b4fe53 --- /dev/null +++ b/cs/TestMaKR/Renderer/HtmlRenderer.cs @@ -0,0 +1,72 @@ +using System.Collections.Generic; +using System.Text; +using Markdown.Tokens; + +namespace Markdown.Renderer +{ + public class HtmlRenderer : IRenderer + { + public readonly IReadOnlyDictionary HtmlTagsBySeparator; + + public HtmlRenderer(IReadOnlyDictionary htmlTagsBySeparators) + { + HtmlTagsBySeparator = htmlTagsBySeparators; + } + + public string Render(IEnumerable tokens, string text) + { + var tagInsertions = GetTagInsertions(tokens); + var result = new StringBuilder(); + var index = 0; + + while (index < text.Length) + { + if (tagInsertions.TryGetValue(index, out var replacement)) + { + result.Append(replacement.Tag); + index += replacement.Shift; + + if (replacement.Shift > 0) + continue; + } + + result.Append(text[index]); + index++; + } + + if (tagInsertions.TryGetValue(text.Length, out var endTag)) + result.Append(endTag.Tag); + + return result.ToString(); + } + + private Dictionary GetTagInsertions(IEnumerable tokens) + { + var result = new Dictionary(); + + foreach (var token in tokens) + { + var htmlTag = HtmlTagsBySeparator[token.GetSeparator()]; + + if (token.IsContented) + result[token.OpenIndex] = GetContentedTokenInsertion(token, htmlTag); + else + result[token.OpenIndex] = new TagInsertion(htmlTag.OpenTag, token.GetSeparator().Length); + + if (htmlTag.IsPaired) + result[token.CloseIndex] = new TagInsertion(htmlTag.CloseTag, token.IsNonPaired ? 0 : token.GetSeparator().Length); + } + + return result; + } + + private static TagInsertion GetContentedTokenInsertion(Token token, HtmlTag htmlTag) + { + var altText = token.AltText.Length > 0 ? $" alt=\"{token.AltText}\"" : string.Empty; + var source = $"src=\"{token.Source}\""; + var insertion = htmlTag.OpenTag.Insert(htmlTag.OpenTag.Length - 1, $"{source}{altText}"); + var shift = token.CloseIndex - token.OpenIndex + 1; + return new TagInsertion(insertion, shift); + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/HtmlTag.cs b/cs/TestMaKR/Renderer/HtmlTag.cs new file mode 100644 index 000000000..86022b5f7 --- /dev/null +++ b/cs/TestMaKR/Renderer/HtmlTag.cs @@ -0,0 +1,16 @@ +namespace Markdown.Renderer +{ + public class HtmlTag + { + public readonly string CloseTag; + public readonly bool IsPaired; + public readonly string OpenTag; + + public HtmlTag(string openTag, string closeTag, bool isPaired) + { + OpenTag = openTag; + CloseTag = closeTag; + IsPaired = isPaired; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/IRenderer.cs b/cs/TestMaKR/Renderer/IRenderer.cs new file mode 100644 index 000000000..7c7e1beee --- /dev/null +++ b/cs/TestMaKR/Renderer/IRenderer.cs @@ -0,0 +1,10 @@ +using System.Collections.Generic; +using Markdown.Tokens; + +namespace Markdown.Renderer +{ + public interface IRenderer + { + public string Render(IEnumerable tokens, string text); + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/TagInsertion.cs b/cs/TestMaKR/Renderer/TagInsertion.cs new file mode 100644 index 000000000..8a05bac5e --- /dev/null +++ b/cs/TestMaKR/Renderer/TagInsertion.cs @@ -0,0 +1,14 @@ +namespace Markdown.Renderer +{ + internal class TagInsertion + { + public readonly int Shift; + public readonly string Tag; + + public TagInsertion(string tag, int shift) + { + Tag = tag; + Shift = shift; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/TestMaKR.csproj b/cs/TestMaKR/TestMaKR.csproj new file mode 100644 index 000000000..4fa85771c --- /dev/null +++ b/cs/TestMaKR/TestMaKR.csproj @@ -0,0 +1,17 @@ + + + + Exe + net8.0 + enable + enable + Linux + + + + + .dockerignore + + + + diff --git a/cs/TestMaKR/Tokens/BoldToken.cs b/cs/TestMaKR/Tokens/BoldToken.cs new file mode 100644 index 000000000..37bd5dffd --- /dev/null +++ b/cs/TestMaKR/Tokens/BoldToken.cs @@ -0,0 +1,43 @@ +using System.Collections.Generic; +using Markdown.Parser; + +namespace Markdown.Tokens +{ + public class BoldToken : Token + { + public const string Separator = "__"; + + public override bool IsNonPaired => false; + public override bool IsContented => false; + public BoldToken(int openIndex) : base(openIndex) { } + internal BoldToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } + + public override string GetSeparator() + { + return Separator; + } + + internal override bool Validate(IMdParser parser) + { + this.ValidatePlacedCorrectly(parser.TextToParse); + ValidateInteractions(parser.Tokens); + + return IsCorrect; + } + + private void ValidateInteractions(IReadOnlyDictionary tokens) + { + if (!IsCorrect || !tokens.TryGetValue(ItalicToken.Separator, out var italicToken)) + return; + + if (this.IsIntersectWith(italicToken)) + { + italicToken.IsCorrect = false; + IsCorrect = false; + } + + if (italicToken.OpenIndex < OpenIndex && italicToken.IsOpened) + IsCorrect = false; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/HeaderToken.cs b/cs/TestMaKR/Tokens/HeaderToken.cs new file mode 100644 index 000000000..fe2f35fba --- /dev/null +++ b/cs/TestMaKR/Tokens/HeaderToken.cs @@ -0,0 +1,38 @@ +using Markdown.Parser; + +namespace Markdown.Tokens +{ + public class HeaderToken : Token + { + public const string Separator = "# "; + + public override bool IsNonPaired => true; + public override bool IsContented => false; + public HeaderToken(int openIndex) : base(openIndex) { } + internal HeaderToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } + + public override string GetSeparator() + { + return Separator; + } + + internal override bool Validate(IMdParser parser) + { + if (OpenIndex != 0 && parser.TextToParse[OpenIndex - 1] != '\n' && parser.TextToParse[OpenIndex - 1] != '\r') + return false; + + var closeIndexLf = parser.TextToParse.IndexOf('\n', OpenIndex); + + var closeIndex = closeIndexLf > 0 && parser.TextToParse[closeIndexLf - 1] == '\r' + ? closeIndexLf - 1 + : closeIndexLf; + + if (closeIndex == -1) + closeIndex = parser.TextToParse.Length; + + Close(closeIndex); + + return true; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/ImageToken.cs b/cs/TestMaKR/Tokens/ImageToken.cs new file mode 100644 index 000000000..b832642da --- /dev/null +++ b/cs/TestMaKR/Tokens/ImageToken.cs @@ -0,0 +1,80 @@ +using System; +using Markdown.Parser; + +namespace Markdown.Tokens +{ + public class ImageToken : Token + { + public const string Separator = "!["; + + private string altText; + private string source; + public override bool IsNonPaired => true; + public override bool IsContented => true; + public ImageToken(int openIndex) : base(openIndex) { } + internal ImageToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } + internal ImageToken(int openIndex, int closeIndex, string source, string altText) : base(openIndex, closeIndex) + { + this.source = source; + this.altText = altText; + } + + public override string Source + { + get => source ?? string.Empty; + set + { + if (source == null) + source = value; + else + throw new InvalidOperationException("There is already a source"); + } + } + + public override string AltText + { + get => altText ?? string.Empty; + set + { + if (altText == null) + altText = value; + else + throw new InvalidOperationException("There is already an alt text"); + } + } + + public override string GetSeparator() + { + return Separator; + } + + internal override bool Validate(IMdParser parser) + { + var text = parser.TextToParse; + var endOfAltText = text.IndexOf(']', OpenIndex); + var startOfSource = text.IndexOf('(', OpenIndex); + var endOfSource = text.IndexOf(')', OpenIndex); + var endOfParagraph = text.IndexOf('\n', OpenIndex); + if (endOfParagraph < 0) + endOfParagraph = text.Length - 1; + + if (endOfAltText == -1 || startOfSource == -1 || endOfSource == -1) + return false; + + if (!(endOfAltText < startOfSource && startOfSource < endOfSource)) + return false; + + if (startOfSource != endOfAltText + 1 || endOfSource > endOfParagraph) + return false; + + var altText = text.Substring(OpenIndex + GetSeparator().Length, endOfAltText - OpenIndex - GetSeparator().Length); + AltText = altText; + var source = text.Substring(startOfSource + 1, endOfSource - startOfSource - 1); + Source = source; + parser.AddScreening(new ScreeningToken(OpenIndex, endOfSource)); + + Close(endOfSource); + return true; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/ItalicToken.cs b/cs/TestMaKR/Tokens/ItalicToken.cs new file mode 100644 index 000000000..f96cb6ca3 --- /dev/null +++ b/cs/TestMaKR/Tokens/ItalicToken.cs @@ -0,0 +1,37 @@ +using System.Collections.Generic; +using Markdown.Parser; + +namespace Markdown.Tokens +{ + public class ItalicToken : Token + { + public const string Separator = "_"; + + public override bool IsNonPaired => false; + public override bool IsContented => false; + public ItalicToken(int openIndex) : base(openIndex) { } + internal ItalicToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } + + public override string GetSeparator() + { + return Separator; + } + + internal override bool Validate(IMdParser parser) + { + this.ValidatePlacedCorrectly(parser.TextToParse); + ValidateInteractions(parser.Tokens); + + return IsCorrect; + } + + private void ValidateInteractions(IReadOnlyDictionary tokens) + { + if (!tokens.TryGetValue(BoldToken.Separator, out var boldToken)) return; + if (!this.IsIntersectWith(boldToken)) return; + + boldToken.IsCorrect = false; + IsCorrect = false; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/ScreeningToken.cs b/cs/TestMaKR/Tokens/ScreeningToken.cs new file mode 100644 index 000000000..7ea0bde19 --- /dev/null +++ b/cs/TestMaKR/Tokens/ScreeningToken.cs @@ -0,0 +1,28 @@ +using Markdown.Parser; + +namespace Markdown.Tokens +{ + public class ScreeningToken : Token + { + public const string Separator = "\\"; + + public override bool IsNonPaired => true; + public override bool IsContented => false; + + public ScreeningToken(int openIndex) : base(openIndex) { } + internal ScreeningToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } + + public override string GetSeparator() + { + return Separator; + } + + internal override bool Validate(IMdParser parser) + { + Close(OpenIndex); + parser.AddScreening(this); + + return false; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/Token.cs b/cs/TestMaKR/Tokens/Token.cs new file mode 100644 index 000000000..715ff48ec --- /dev/null +++ b/cs/TestMaKR/Tokens/Token.cs @@ -0,0 +1,75 @@ +using System; +using Markdown.Parser; + +namespace Markdown.Tokens +{ + public abstract class Token + + { + private bool isCorrect = true; + public int OpenIndex { get; } + public int CloseIndex { get; private set; } + public bool IsOpened => CloseIndex == 0; + public abstract bool IsNonPaired { get; } + public abstract bool IsContented { get; } + public virtual string AltText { get; set; } + public virtual string Source { get; set; } + + public virtual bool IsCorrect + { + get => isCorrect; + set + { + if (!isCorrect) + return; + + isCorrect = value; + } + } + + protected Token(int openIndex) + { + if (openIndex < 0) + throw new ArgumentException("The open index must be greater than zero"); + + OpenIndex = openIndex; + } + + protected Token(int openIndex, int closeIndex) + { + if (openIndex < 0) + throw new ArgumentException("The open index must be greater than zero"); + + OpenIndex = openIndex; + Close(closeIndex); + } + + public void Close(int index) + { + if (!IsOpened) + throw new InvalidOperationException("Token already closed"); + + if (index < OpenIndex) + throw new InvalidOperationException("The close index must be no larger than the open index"); + + CloseIndex = index; + } + + public abstract string GetSeparator(); + + internal abstract bool Validate(IMdParser parser); + + public static bool IsCorrectTokenOpenIndex(int openIndex, string text, int length) + { + var indexNextToSeparator = openIndex + length; + + return openIndex != text.Length - 1 && indexNextToSeparator < text.Length && + text[indexNextToSeparator] != ' '; + } + + public static bool IsCorrectTokenCloseIndex(int closeIndex, string text) + { + return closeIndex != 0 && text[closeIndex - 1] != ' '; + } + } +} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/TokenExtensions.cs b/cs/TestMaKR/Tokens/TokenExtensions.cs new file mode 100644 index 000000000..1fa2d51d4 --- /dev/null +++ b/cs/TestMaKR/Tokens/TokenExtensions.cs @@ -0,0 +1,63 @@ +using System.Linq; + +namespace Markdown.Tokens +{ + public static class TokenExtensions + { + public static void ValidatePlacedCorrectly(this Token token, string text) + { + token.IsCorrect = !(token.IsInsideTextWithDigits(text) || token.IsInsideDifferentWords(text) || token.IsTokenEmpty(text)); + } + + public static bool IsIntersectWith(this Token thisToken, Token otherToken) + { + return thisToken.OpenIndex < otherToken.OpenIndex && otherToken.OpenIndex < thisToken.CloseIndex; + } + + public static bool IsTokenEmpty(this Token token, string text) + { + return token.GetTokenContent(text).Length == 0; + } + + public static bool IsInsideDifferentWords(this Token token, string text) + { + var openInsideWord = IsSeparatorInsideWord(token.OpenIndex, token.GetSeparator().Length, text); + var closeInsideWord = IsSeparatorInsideWord(token.CloseIndex, token.GetSeparator().Length, text); + var tokenContent = token.GetTokenContent(text); + + return (openInsideWord || closeInsideWord) && tokenContent.Any(x => x == ' '); + } + + public static bool IsInsideTextWithDigits(this Token token, string text) + { + var openInsideWord = IsSeparatorInsideTextWithDigits(token.OpenIndex, token.GetSeparator().Length, text); + var closeInsideWord = IsSeparatorInsideTextWithDigits(token.CloseIndex, token.GetSeparator().Length, text); + + return openInsideWord || closeInsideWord; + } + + public static bool IsSeparatorInsideWord(int index, int separatorLength, string text) + { + var isLeftLetter = index > 0 && char.IsLetter(text[index - 1]); + var isRightLetter = index + separatorLength < text.Length - 1 && char.IsLetter(text[index + separatorLength]); + + return isLeftLetter && isRightLetter; + } + + public static bool IsSeparatorInsideTextWithDigits(int index, int separatorLength, string text) + { + var isLeftLetter = index > 0 && char.IsDigit(text[index - 1]); + var isRightLetter = index + separatorLength < text.Length - 1 && char.IsDigit(text[index + separatorLength]); + + return isLeftLetter && isRightLetter; + } + + public static string GetTokenContent(this Token token, string text) + { + var contentStartIndex = token.OpenIndex + token.GetSeparator().Length; + var contentLength = token.CloseIndex - contentStartIndex; + + return text.Substring(contentStartIndex, contentLength); + } + } +} \ No newline at end of file From b893d3c35d7bc33b96dc8efe0547092b9376ce77 Mon Sep 17 00:00:00 2001 From: Asrom11 Date: Mon, 2 Dec 2024 18:14:43 +0500 Subject: [PATCH 3/4] refactoring --- cs/TestMaKR/DefaultTagSets.cs | 26 ------ cs/TestMaKR/Dockerfile | 21 ----- cs/TestMaKR/Md.cs | 31 ------- cs/TestMaKR/Parser/IMdParser.cs | 14 --- cs/TestMaKR/Parser/MdParser.cs | 118 -------------------------- cs/TestMaKR/Parser/ParserContext.cs | 19 ----- cs/TestMaKR/Program.cs | 5 -- cs/TestMaKR/Renderer/HtmlRenderer.cs | 72 ---------------- cs/TestMaKR/Renderer/HtmlTag.cs | 16 ---- cs/TestMaKR/Renderer/IRenderer.cs | 10 --- cs/TestMaKR/Renderer/TagInsertion.cs | 14 --- cs/TestMaKR/TestMaKR.csproj | 17 ---- cs/TestMaKR/Tokens/BoldToken.cs | 43 ---------- cs/TestMaKR/Tokens/HeaderToken.cs | 38 --------- cs/TestMaKR/Tokens/ImageToken.cs | 80 ----------------- cs/TestMaKR/Tokens/ItalicToken.cs | 37 -------- cs/TestMaKR/Tokens/ScreeningToken.cs | 28 ------ cs/TestMaKR/Tokens/Token.cs | 75 ---------------- cs/TestMaKR/Tokens/TokenExtensions.cs | 63 -------------- 19 files changed, 727 deletions(-) delete mode 100644 cs/TestMaKR/DefaultTagSets.cs delete mode 100644 cs/TestMaKR/Dockerfile delete mode 100644 cs/TestMaKR/Md.cs delete mode 100644 cs/TestMaKR/Parser/IMdParser.cs delete mode 100644 cs/TestMaKR/Parser/MdParser.cs delete mode 100644 cs/TestMaKR/Parser/ParserContext.cs delete mode 100644 cs/TestMaKR/Program.cs delete mode 100644 cs/TestMaKR/Renderer/HtmlRenderer.cs delete mode 100644 cs/TestMaKR/Renderer/HtmlTag.cs delete mode 100644 cs/TestMaKR/Renderer/IRenderer.cs delete mode 100644 cs/TestMaKR/Renderer/TagInsertion.cs delete mode 100644 cs/TestMaKR/TestMaKR.csproj delete mode 100644 cs/TestMaKR/Tokens/BoldToken.cs delete mode 100644 cs/TestMaKR/Tokens/HeaderToken.cs delete mode 100644 cs/TestMaKR/Tokens/ImageToken.cs delete mode 100644 cs/TestMaKR/Tokens/ItalicToken.cs delete mode 100644 cs/TestMaKR/Tokens/ScreeningToken.cs delete mode 100644 cs/TestMaKR/Tokens/Token.cs delete mode 100644 cs/TestMaKR/Tokens/TokenExtensions.cs diff --git a/cs/TestMaKR/DefaultTagSets.cs b/cs/TestMaKR/DefaultTagSets.cs deleted file mode 100644 index f7d03358d..000000000 --- a/cs/TestMaKR/DefaultTagSets.cs +++ /dev/null @@ -1,26 +0,0 @@ -using Markdown.Renderer; -using Markdown.Tokens; - -namespace Markdown -{ - public static class DefaultTagSets - { - public static readonly IReadOnlyDictionary HtmlTagsBySeparator = new Dictionary - { - { BoldToken.Separator, new HtmlTag("", "", true) }, - { ItalicToken.Separator, new HtmlTag("", "", true) }, - { HeaderToken.Separator, new HtmlTag("

", "

", true) }, - { ScreeningToken.Separator, new HtmlTag(string.Empty, string.Empty, false) }, - { ImageToken.Separator, new HtmlTag("", string.Empty, false) } - }; - - public static readonly IReadOnlyDictionary> TokensBySeparator = new Dictionary> - { - { ItalicToken.Separator, index => new ItalicToken(index) }, - { BoldToken.Separator, index => new BoldToken(index) }, - { HeaderToken.Separator, index => new HeaderToken(index) }, - { ScreeningToken.Separator, index => new ScreeningToken(index) }, - { ImageToken.Separator, index => new ImageToken(index) } - }; - } -} diff --git a/cs/TestMaKR/Dockerfile b/cs/TestMaKR/Dockerfile deleted file mode 100644 index 16448f820..000000000 --- a/cs/TestMaKR/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -FROM mcr.microsoft.com/dotnet/runtime:8.0 AS base -USER $APP_UID -WORKDIR /app - -FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build -ARG BUILD_CONFIGURATION=Release -WORKDIR /src -COPY ["TestMaKR/TestMaKR.csproj", "TestMaKR/"] -RUN dotnet restore "TestMaKR/TestMaKR.csproj" -COPY . . -WORKDIR "/src/TestMaKR" -RUN dotnet build "TestMaKR.csproj" -c $BUILD_CONFIGURATION -o /app/build - -FROM build AS publish -ARG BUILD_CONFIGURATION=Release -RUN dotnet publish "TestMaKR.csproj" -c $BUILD_CONFIGURATION -o /app/publish /p:UseAppHost=false - -FROM base AS final -WORKDIR /app -COPY --from=publish /app/publish . -ENTRYPOINT ["dotnet", "TestMaKR.dll"] diff --git a/cs/TestMaKR/Md.cs b/cs/TestMaKR/Md.cs deleted file mode 100644 index 01b089d97..000000000 --- a/cs/TestMaKR/Md.cs +++ /dev/null @@ -1,31 +0,0 @@ -using Markdown.Parser; -using Markdown.Renderer; - -namespace Markdown -{ - public class Md - { - public IMdParser Parser { get; } - public IRenderer Renderer { get; } - - public Md() - { - Renderer = new HtmlRenderer(DefaultTagSets.HtmlTagsBySeparator); - Parser = new MdParser(DefaultTagSets.TokensBySeparator); - } - - public Md(IRenderer renderer, IMdParser parser) - { - Renderer = renderer; - Parser = parser; - } - - public string Render(string text) - { - var tokens = Parser.ParseTokens(text); - var result = Renderer.Render(tokens, text); - - return result; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Parser/IMdParser.cs b/cs/TestMaKR/Parser/IMdParser.cs deleted file mode 100644 index ce7868c7f..000000000 --- a/cs/TestMaKR/Parser/IMdParser.cs +++ /dev/null @@ -1,14 +0,0 @@ -using System.Collections.Generic; -using Markdown.Tokens; - -namespace Markdown.Parser -{ - public interface IMdParser - { - public IReadOnlyDictionary Tokens { get; } - public IReadOnlyList Result { get; } - public string TextToParse { get; } - public IEnumerable ParseTokens(string textToParse); - public void AddScreening(ScreeningToken token); - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Parser/MdParser.cs b/cs/TestMaKR/Parser/MdParser.cs deleted file mode 100644 index eda3d6973..000000000 --- a/cs/TestMaKR/Parser/MdParser.cs +++ /dev/null @@ -1,118 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using Markdown.Tokens; - -namespace Markdown.Parser -{ - public class MdParser : IMdParser - { - private ParserContext ParserContext { get; set; } - - public readonly IReadOnlyDictionary> TokensBySeparator; - public string TextToParse => ParserContext.TextToParse; - public IReadOnlyDictionary Tokens => ParserContext.Tokens; - public IReadOnlyList Result => ParserContext.Result; - - public MdParser(IReadOnlyDictionary> tokensBySeparator) - { - TokensBySeparator = tokensBySeparator; - } - - public IEnumerable ParseTokens(string textToParse) - { - ParserContext = new ParserContext(textToParse); - var possibleTag = new StringBuilder(); - - for (var i = 0; i < TextToParse.Length; i++) - { - var symbol = TextToParse[i]; - - if (symbol == '\n') - { - ParserContext.Tokens.Clear(); - possibleTag.Clear(); - continue; - } - - if (TokensBySeparator.Keys.Any(x => x.StartsWith($"{possibleTag}{symbol}"))) - { - possibleTag.Append(symbol); - continue; - } - - var tag = possibleTag.ToString(); - - if (TokensBySeparator.ContainsKey(tag)) - ProcessToken(tag, i - tag.Length); - - possibleTag.Clear(); - - if (TokensBySeparator.Keys.Any(x => x.StartsWith(symbol))) - possibleTag.Append(symbol); - } - - if (possibleTag.Length > 0) - ProcessToken(possibleTag.ToString(), TextToParse.Length - possibleTag.Length); - - return ParserContext.Result; - } - - public void AddScreening(ScreeningToken token) - { - ParserContext.Tokens.Add(token.GetSeparator(), token); - } - - private void ProcessToken(string separator, int index) - { - if (ExecuteScreening(index)) - { - separator = separator[1..]; - index++; - - if (!(separator.Length > 0 && TokensBySeparator.ContainsKey(separator))) - return; - } - - if (ParserContext.Tokens.Remove(separator, out var token) && Token.IsCorrectTokenCloseIndex(index, TextToParse)) - { - token.Close(index); - - if (token.Validate(this)) - ParserContext.Result.Add(token); - - return; - } - - token = TokensBySeparator[separator].Invoke(index); - - if (token.IsNonPaired && token.Validate(this)) - ParserContext.Result.Add(token); - else if (Token.IsCorrectTokenOpenIndex(index, TextToParse, separator.Length)) - ParserContext.Tokens[separator] = token; - } - - private bool ExecuteScreening(int index) - { - if (!ParserContext.Tokens.TryGetValue(ScreeningToken.Separator, out var token)) - return false; - - if (token.CloseIndex > index - ScreeningToken.Separator.Length) - return true; - - if (token.CloseIndex != index - ScreeningToken.Separator.Length) - { - if (token.CloseIndex <= index) - ParserContext.Tokens.Remove(ScreeningToken.Separator); - - return false; - } - - if (token.OpenIndex == token.CloseIndex) - ParserContext.Result.Add(token); - - return true; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Parser/ParserContext.cs b/cs/TestMaKR/Parser/ParserContext.cs deleted file mode 100644 index 097b81e29..000000000 --- a/cs/TestMaKR/Parser/ParserContext.cs +++ /dev/null @@ -1,19 +0,0 @@ -using System.Collections.Generic; -using Markdown.Tokens; - -namespace Markdown.Parser -{ - public class ParserContext - { - public readonly List Result; - public readonly string TextToParse; - public readonly Dictionary Tokens; - - public ParserContext(string textToParse) - { - Result = new List(); - TextToParse = textToParse; - Tokens = new Dictionary(); - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Program.cs b/cs/TestMaKR/Program.cs deleted file mode 100644 index 2b64fa1fe..000000000 --- a/cs/TestMaKR/Program.cs +++ /dev/null @@ -1,5 +0,0 @@ -using Markdown; - -var md = new Md(); -var result = md.Render("__bold _italic__ text__"); -Console.WriteLine(result); \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/HtmlRenderer.cs b/cs/TestMaKR/Renderer/HtmlRenderer.cs deleted file mode 100644 index 612b4fe53..000000000 --- a/cs/TestMaKR/Renderer/HtmlRenderer.cs +++ /dev/null @@ -1,72 +0,0 @@ -using System.Collections.Generic; -using System.Text; -using Markdown.Tokens; - -namespace Markdown.Renderer -{ - public class HtmlRenderer : IRenderer - { - public readonly IReadOnlyDictionary HtmlTagsBySeparator; - - public HtmlRenderer(IReadOnlyDictionary htmlTagsBySeparators) - { - HtmlTagsBySeparator = htmlTagsBySeparators; - } - - public string Render(IEnumerable tokens, string text) - { - var tagInsertions = GetTagInsertions(tokens); - var result = new StringBuilder(); - var index = 0; - - while (index < text.Length) - { - if (tagInsertions.TryGetValue(index, out var replacement)) - { - result.Append(replacement.Tag); - index += replacement.Shift; - - if (replacement.Shift > 0) - continue; - } - - result.Append(text[index]); - index++; - } - - if (tagInsertions.TryGetValue(text.Length, out var endTag)) - result.Append(endTag.Tag); - - return result.ToString(); - } - - private Dictionary GetTagInsertions(IEnumerable tokens) - { - var result = new Dictionary(); - - foreach (var token in tokens) - { - var htmlTag = HtmlTagsBySeparator[token.GetSeparator()]; - - if (token.IsContented) - result[token.OpenIndex] = GetContentedTokenInsertion(token, htmlTag); - else - result[token.OpenIndex] = new TagInsertion(htmlTag.OpenTag, token.GetSeparator().Length); - - if (htmlTag.IsPaired) - result[token.CloseIndex] = new TagInsertion(htmlTag.CloseTag, token.IsNonPaired ? 0 : token.GetSeparator().Length); - } - - return result; - } - - private static TagInsertion GetContentedTokenInsertion(Token token, HtmlTag htmlTag) - { - var altText = token.AltText.Length > 0 ? $" alt=\"{token.AltText}\"" : string.Empty; - var source = $"src=\"{token.Source}\""; - var insertion = htmlTag.OpenTag.Insert(htmlTag.OpenTag.Length - 1, $"{source}{altText}"); - var shift = token.CloseIndex - token.OpenIndex + 1; - return new TagInsertion(insertion, shift); - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/HtmlTag.cs b/cs/TestMaKR/Renderer/HtmlTag.cs deleted file mode 100644 index 86022b5f7..000000000 --- a/cs/TestMaKR/Renderer/HtmlTag.cs +++ /dev/null @@ -1,16 +0,0 @@ -namespace Markdown.Renderer -{ - public class HtmlTag - { - public readonly string CloseTag; - public readonly bool IsPaired; - public readonly string OpenTag; - - public HtmlTag(string openTag, string closeTag, bool isPaired) - { - OpenTag = openTag; - CloseTag = closeTag; - IsPaired = isPaired; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/IRenderer.cs b/cs/TestMaKR/Renderer/IRenderer.cs deleted file mode 100644 index 7c7e1beee..000000000 --- a/cs/TestMaKR/Renderer/IRenderer.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System.Collections.Generic; -using Markdown.Tokens; - -namespace Markdown.Renderer -{ - public interface IRenderer - { - public string Render(IEnumerable tokens, string text); - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Renderer/TagInsertion.cs b/cs/TestMaKR/Renderer/TagInsertion.cs deleted file mode 100644 index 8a05bac5e..000000000 --- a/cs/TestMaKR/Renderer/TagInsertion.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Markdown.Renderer -{ - internal class TagInsertion - { - public readonly int Shift; - public readonly string Tag; - - public TagInsertion(string tag, int shift) - { - Tag = tag; - Shift = shift; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/TestMaKR.csproj b/cs/TestMaKR/TestMaKR.csproj deleted file mode 100644 index 4fa85771c..000000000 --- a/cs/TestMaKR/TestMaKR.csproj +++ /dev/null @@ -1,17 +0,0 @@ - - - - Exe - net8.0 - enable - enable - Linux - - - - - .dockerignore - - - - diff --git a/cs/TestMaKR/Tokens/BoldToken.cs b/cs/TestMaKR/Tokens/BoldToken.cs deleted file mode 100644 index 37bd5dffd..000000000 --- a/cs/TestMaKR/Tokens/BoldToken.cs +++ /dev/null @@ -1,43 +0,0 @@ -using System.Collections.Generic; -using Markdown.Parser; - -namespace Markdown.Tokens -{ - public class BoldToken : Token - { - public const string Separator = "__"; - - public override bool IsNonPaired => false; - public override bool IsContented => false; - public BoldToken(int openIndex) : base(openIndex) { } - internal BoldToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } - - public override string GetSeparator() - { - return Separator; - } - - internal override bool Validate(IMdParser parser) - { - this.ValidatePlacedCorrectly(parser.TextToParse); - ValidateInteractions(parser.Tokens); - - return IsCorrect; - } - - private void ValidateInteractions(IReadOnlyDictionary tokens) - { - if (!IsCorrect || !tokens.TryGetValue(ItalicToken.Separator, out var italicToken)) - return; - - if (this.IsIntersectWith(italicToken)) - { - italicToken.IsCorrect = false; - IsCorrect = false; - } - - if (italicToken.OpenIndex < OpenIndex && italicToken.IsOpened) - IsCorrect = false; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/HeaderToken.cs b/cs/TestMaKR/Tokens/HeaderToken.cs deleted file mode 100644 index fe2f35fba..000000000 --- a/cs/TestMaKR/Tokens/HeaderToken.cs +++ /dev/null @@ -1,38 +0,0 @@ -using Markdown.Parser; - -namespace Markdown.Tokens -{ - public class HeaderToken : Token - { - public const string Separator = "# "; - - public override bool IsNonPaired => true; - public override bool IsContented => false; - public HeaderToken(int openIndex) : base(openIndex) { } - internal HeaderToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } - - public override string GetSeparator() - { - return Separator; - } - - internal override bool Validate(IMdParser parser) - { - if (OpenIndex != 0 && parser.TextToParse[OpenIndex - 1] != '\n' && parser.TextToParse[OpenIndex - 1] != '\r') - return false; - - var closeIndexLf = parser.TextToParse.IndexOf('\n', OpenIndex); - - var closeIndex = closeIndexLf > 0 && parser.TextToParse[closeIndexLf - 1] == '\r' - ? closeIndexLf - 1 - : closeIndexLf; - - if (closeIndex == -1) - closeIndex = parser.TextToParse.Length; - - Close(closeIndex); - - return true; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/ImageToken.cs b/cs/TestMaKR/Tokens/ImageToken.cs deleted file mode 100644 index b832642da..000000000 --- a/cs/TestMaKR/Tokens/ImageToken.cs +++ /dev/null @@ -1,80 +0,0 @@ -using System; -using Markdown.Parser; - -namespace Markdown.Tokens -{ - public class ImageToken : Token - { - public const string Separator = "!["; - - private string altText; - private string source; - public override bool IsNonPaired => true; - public override bool IsContented => true; - public ImageToken(int openIndex) : base(openIndex) { } - internal ImageToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } - internal ImageToken(int openIndex, int closeIndex, string source, string altText) : base(openIndex, closeIndex) - { - this.source = source; - this.altText = altText; - } - - public override string Source - { - get => source ?? string.Empty; - set - { - if (source == null) - source = value; - else - throw new InvalidOperationException("There is already a source"); - } - } - - public override string AltText - { - get => altText ?? string.Empty; - set - { - if (altText == null) - altText = value; - else - throw new InvalidOperationException("There is already an alt text"); - } - } - - public override string GetSeparator() - { - return Separator; - } - - internal override bool Validate(IMdParser parser) - { - var text = parser.TextToParse; - var endOfAltText = text.IndexOf(']', OpenIndex); - var startOfSource = text.IndexOf('(', OpenIndex); - var endOfSource = text.IndexOf(')', OpenIndex); - var endOfParagraph = text.IndexOf('\n', OpenIndex); - if (endOfParagraph < 0) - endOfParagraph = text.Length - 1; - - if (endOfAltText == -1 || startOfSource == -1 || endOfSource == -1) - return false; - - if (!(endOfAltText < startOfSource && startOfSource < endOfSource)) - return false; - - if (startOfSource != endOfAltText + 1 || endOfSource > endOfParagraph) - return false; - - var altText = text.Substring(OpenIndex + GetSeparator().Length, endOfAltText - OpenIndex - GetSeparator().Length); - AltText = altText; - var source = text.Substring(startOfSource + 1, endOfSource - startOfSource - 1); - Source = source; - parser.AddScreening(new ScreeningToken(OpenIndex, endOfSource)); - - Close(endOfSource); - return true; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/ItalicToken.cs b/cs/TestMaKR/Tokens/ItalicToken.cs deleted file mode 100644 index f96cb6ca3..000000000 --- a/cs/TestMaKR/Tokens/ItalicToken.cs +++ /dev/null @@ -1,37 +0,0 @@ -using System.Collections.Generic; -using Markdown.Parser; - -namespace Markdown.Tokens -{ - public class ItalicToken : Token - { - public const string Separator = "_"; - - public override bool IsNonPaired => false; - public override bool IsContented => false; - public ItalicToken(int openIndex) : base(openIndex) { } - internal ItalicToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } - - public override string GetSeparator() - { - return Separator; - } - - internal override bool Validate(IMdParser parser) - { - this.ValidatePlacedCorrectly(parser.TextToParse); - ValidateInteractions(parser.Tokens); - - return IsCorrect; - } - - private void ValidateInteractions(IReadOnlyDictionary tokens) - { - if (!tokens.TryGetValue(BoldToken.Separator, out var boldToken)) return; - if (!this.IsIntersectWith(boldToken)) return; - - boldToken.IsCorrect = false; - IsCorrect = false; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/ScreeningToken.cs b/cs/TestMaKR/Tokens/ScreeningToken.cs deleted file mode 100644 index 7ea0bde19..000000000 --- a/cs/TestMaKR/Tokens/ScreeningToken.cs +++ /dev/null @@ -1,28 +0,0 @@ -using Markdown.Parser; - -namespace Markdown.Tokens -{ - public class ScreeningToken : Token - { - public const string Separator = "\\"; - - public override bool IsNonPaired => true; - public override bool IsContented => false; - - public ScreeningToken(int openIndex) : base(openIndex) { } - internal ScreeningToken(int openIndex, int closeIndex) : base(openIndex, closeIndex) { } - - public override string GetSeparator() - { - return Separator; - } - - internal override bool Validate(IMdParser parser) - { - Close(OpenIndex); - parser.AddScreening(this); - - return false; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/Token.cs b/cs/TestMaKR/Tokens/Token.cs deleted file mode 100644 index 715ff48ec..000000000 --- a/cs/TestMaKR/Tokens/Token.cs +++ /dev/null @@ -1,75 +0,0 @@ -using System; -using Markdown.Parser; - -namespace Markdown.Tokens -{ - public abstract class Token - - { - private bool isCorrect = true; - public int OpenIndex { get; } - public int CloseIndex { get; private set; } - public bool IsOpened => CloseIndex == 0; - public abstract bool IsNonPaired { get; } - public abstract bool IsContented { get; } - public virtual string AltText { get; set; } - public virtual string Source { get; set; } - - public virtual bool IsCorrect - { - get => isCorrect; - set - { - if (!isCorrect) - return; - - isCorrect = value; - } - } - - protected Token(int openIndex) - { - if (openIndex < 0) - throw new ArgumentException("The open index must be greater than zero"); - - OpenIndex = openIndex; - } - - protected Token(int openIndex, int closeIndex) - { - if (openIndex < 0) - throw new ArgumentException("The open index must be greater than zero"); - - OpenIndex = openIndex; - Close(closeIndex); - } - - public void Close(int index) - { - if (!IsOpened) - throw new InvalidOperationException("Token already closed"); - - if (index < OpenIndex) - throw new InvalidOperationException("The close index must be no larger than the open index"); - - CloseIndex = index; - } - - public abstract string GetSeparator(); - - internal abstract bool Validate(IMdParser parser); - - public static bool IsCorrectTokenOpenIndex(int openIndex, string text, int length) - { - var indexNextToSeparator = openIndex + length; - - return openIndex != text.Length - 1 && indexNextToSeparator < text.Length && - text[indexNextToSeparator] != ' '; - } - - public static bool IsCorrectTokenCloseIndex(int closeIndex, string text) - { - return closeIndex != 0 && text[closeIndex - 1] != ' '; - } - } -} \ No newline at end of file diff --git a/cs/TestMaKR/Tokens/TokenExtensions.cs b/cs/TestMaKR/Tokens/TokenExtensions.cs deleted file mode 100644 index 1fa2d51d4..000000000 --- a/cs/TestMaKR/Tokens/TokenExtensions.cs +++ /dev/null @@ -1,63 +0,0 @@ -using System.Linq; - -namespace Markdown.Tokens -{ - public static class TokenExtensions - { - public static void ValidatePlacedCorrectly(this Token token, string text) - { - token.IsCorrect = !(token.IsInsideTextWithDigits(text) || token.IsInsideDifferentWords(text) || token.IsTokenEmpty(text)); - } - - public static bool IsIntersectWith(this Token thisToken, Token otherToken) - { - return thisToken.OpenIndex < otherToken.OpenIndex && otherToken.OpenIndex < thisToken.CloseIndex; - } - - public static bool IsTokenEmpty(this Token token, string text) - { - return token.GetTokenContent(text).Length == 0; - } - - public static bool IsInsideDifferentWords(this Token token, string text) - { - var openInsideWord = IsSeparatorInsideWord(token.OpenIndex, token.GetSeparator().Length, text); - var closeInsideWord = IsSeparatorInsideWord(token.CloseIndex, token.GetSeparator().Length, text); - var tokenContent = token.GetTokenContent(text); - - return (openInsideWord || closeInsideWord) && tokenContent.Any(x => x == ' '); - } - - public static bool IsInsideTextWithDigits(this Token token, string text) - { - var openInsideWord = IsSeparatorInsideTextWithDigits(token.OpenIndex, token.GetSeparator().Length, text); - var closeInsideWord = IsSeparatorInsideTextWithDigits(token.CloseIndex, token.GetSeparator().Length, text); - - return openInsideWord || closeInsideWord; - } - - public static bool IsSeparatorInsideWord(int index, int separatorLength, string text) - { - var isLeftLetter = index > 0 && char.IsLetter(text[index - 1]); - var isRightLetter = index + separatorLength < text.Length - 1 && char.IsLetter(text[index + separatorLength]); - - return isLeftLetter && isRightLetter; - } - - public static bool IsSeparatorInsideTextWithDigits(int index, int separatorLength, string text) - { - var isLeftLetter = index > 0 && char.IsDigit(text[index - 1]); - var isRightLetter = index + separatorLength < text.Length - 1 && char.IsDigit(text[index + separatorLength]); - - return isLeftLetter && isRightLetter; - } - - public static string GetTokenContent(this Token token, string text) - { - var contentStartIndex = token.OpenIndex + token.GetSeparator().Length; - var contentLength = token.CloseIndex - contentStartIndex; - - return text.Substring(contentStartIndex, contentLength); - } - } -} \ No newline at end of file From cd44825e3a26e7ccfa1bd6b3fde8dd1e4d41d92d Mon Sep 17 00:00:00 2001 From: Asrom11 Date: Sun, 8 Dec 2024 20:45:49 +0500 Subject: [PATCH 4/4] Refactoring and add new parser tests --- cs/MarkDownTest/Extension/TokenExtension.cs | 14 ++ cs/MarkDownTest/MarkDownConverterTest.cs | 2 + cs/MarkDownTest/MarkdownParserTests.cs | 211 ++++++++++++++---- cs/Markdown/MarkDown.cs | 1 + .../{ => Interface}/IMarkdownConverter.cs | 2 +- .../MarkDownConverter/MarkdownConverter.cs | 126 ++++------- .../TagConverters/HeaderConverter.cs | 30 +++ .../TagConverters/ITagConverter.cs | 10 + .../TagConverters/ItalicConverter.cs | 30 +++ .../TagConverters/LinkCoverter.cs | 14 ++ .../TagConverters/StrongConverter.cs | 30 +++ .../TagConverters/TextConverter.cs | 14 ++ .../Parser/Interface/IMarkdownParser.cs | 2 +- cs/Markdown/Parser/Interface/ITokenHandler.cs | 6 +- cs/Markdown/Parser/MarkdownParser.cs | 19 +- .../TokenHandler/Handlers/BaseHandler.cs | 7 +- .../Parser/TokenHandler/Handlers/Delimiter.cs | 4 +- .../Handlers/EscapedCharacterHandler.cs | 33 +++ .../TokenHandler/Handlers/HeaderHandler.cs | 16 +- .../TokenHandler/Handlers/LinkHandler.cs | 16 +- .../TokenHandler/Handlers/PairedTagHandler.cs | 157 ++++++++++--- .../Handlers/TokenHandlerFactory.cs | 9 +- cs/Markdown/Token/ParsingContext.cs | 4 +- cs/Markdown/Token/TagState.cs | 7 + cs/Markdown/Token/Token.cs | 4 +- cs/Markdown/Token/TokenType.cs | 11 +- 26 files changed, 570 insertions(+), 209 deletions(-) create mode 100644 cs/MarkDownTest/Extension/TokenExtension.cs rename cs/Markdown/MarkDownConverter/{ => Interface}/IMarkdownConverter.cs (58%) create mode 100644 cs/Markdown/MarkDownConverter/TagConverters/HeaderConverter.cs create mode 100644 cs/Markdown/MarkDownConverter/TagConverters/ITagConverter.cs create mode 100644 cs/Markdown/MarkDownConverter/TagConverters/ItalicConverter.cs create mode 100644 cs/Markdown/MarkDownConverter/TagConverters/LinkCoverter.cs create mode 100644 cs/Markdown/MarkDownConverter/TagConverters/StrongConverter.cs create mode 100644 cs/Markdown/MarkDownConverter/TagConverters/TextConverter.cs create mode 100644 cs/Markdown/Parser/TokenHandler/Handlers/EscapedCharacterHandler.cs create mode 100644 cs/Markdown/Token/TagState.cs diff --git a/cs/MarkDownTest/Extension/TokenExtension.cs b/cs/MarkDownTest/Extension/TokenExtension.cs new file mode 100644 index 000000000..e1db39e1a --- /dev/null +++ b/cs/MarkDownTest/Extension/TokenExtension.cs @@ -0,0 +1,14 @@ +using FluentAssertions; +using FluentAssertions.Collections; +using Markdown.Token; + +namespace MarkDownTest.Extension; + +public static class TokenExtension +{ + public static void AssertTokensEqual(this IEnumerable actual, IEnumerable expected) + { + actual.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } +} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkDownConverterTest.cs b/cs/MarkDownTest/MarkDownConverterTest.cs index c486b996e..2839af090 100644 --- a/cs/MarkDownTest/MarkDownConverterTest.cs +++ b/cs/MarkDownTest/MarkDownConverterTest.cs @@ -1,6 +1,8 @@ using FluentAssertions; using Markdown; using Markdown.interfaces; +using Markdown.MarkDownConverter; +using Markdown.Token; using NUnit.Framework; namespace MarkDownTest; diff --git a/cs/MarkDownTest/MarkdownParserTests.cs b/cs/MarkDownTest/MarkdownParserTests.cs index 9499d4350..ea1330979 100644 --- a/cs/MarkDownTest/MarkdownParserTests.cs +++ b/cs/MarkDownTest/MarkdownParserTests.cs @@ -2,6 +2,8 @@ using Markdown; using Markdown.Parser; using Markdown.Parser.Interface; +using Markdown.Token; +using MarkDownTest.Extension; using NUnit.Framework; namespace MarkDownTest; @@ -26,9 +28,8 @@ public void Parse_PlainText_ReturnsTextToken() }; var tokens = _parser.Parse(input); - - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -41,9 +42,8 @@ public void Parse_TextWithSymbols_ReturnsTextToken() }; var tokens = _parser.Parse(input); - - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + + tokens.AssertTokensEqual(expectedTokens); } [TestCase("# Header", 1)] @@ -59,8 +59,7 @@ public void Parse_Header_ReturnsHeaderAndTextTokens(string input, int expectedLe var tokens = _parser.Parse(input); - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + tokens.AssertTokensEqual(expectedTokens); } [TestCase("#Header")] @@ -74,9 +73,8 @@ public void Parse_InvalidHeader_ReturnsSingleTextToken(string input) }; var tokens = _parser.Parse(input); - - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -91,9 +89,8 @@ public void Parse_StrongEmphasis_ReturnsCorrectTokens() }; var tokens = _parser.Parse(input); - - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -108,9 +105,8 @@ public void Parse_ItalicEmphasis_ReturnsCorrectTokens() }; var tokens = _parser.Parse(input); - - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -130,29 +126,24 @@ public void Parse_NestedEmphasis_ReturnsCorrectTokens() var tokens = _parser.Parse(input); - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + tokens.AssertTokensEqual(expectedTokens); } [Test] public void Parse_NestedTags_ClosesInCorrectOrder() { - var input = "__bold _italic__ text_"; + var input = "__bold italic__ text_"; var expectedTokens = new[] { Token.CreateStrong(true, 0), - Token.CreateText("bold ", 2), - Token.CreateItalic(true, 7), - Token.CreateText("italic", 8), - Token.CreateStrong(false, 14), - Token.CreateText(" text", 16), - Token.CreateItalic(false, 21) + Token.CreateText("bold italic", 2), + Token.CreateStrong(false, 13), + Token.CreateText(" text_", 15), }; var tokens = _parser.Parse(input); - - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -174,8 +165,7 @@ public void Parse_ComplexMarkdown_ReturnsCorrectTokenSequence() var tokens = _parser.Parse(input); - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -195,23 +185,24 @@ public void Parse_NestedTags_HandlesNestedStrongAndItalic() var tokens = _parser.Parse(input); - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + tokens.AssertTokensEqual(expectedTokens); } [TestCase("")] [TestCase(" ")] [TestCase("\n")] + [TestCase(null)] public void Parse_MinimalInput_ReturnsTextToken(string input) { - var expectedTokens = input.Length == 0 - ? Array.Empty() - : new[] { Token.CreateText(input, 0) }; + var expectedTokens = new[] { Token.CreateText(input, 0) }; + if (input is null || input.Length == 0) + { + expectedTokens = Array.Empty(); + } var tokens = _parser.Parse(input); - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -229,8 +220,7 @@ public void Parse_MixedContent_PreservesWhitespaceInTextTokens() var tokens = _parser.Parse(input); - tokens.Should().BeEquivalentTo(expectedTokens, - options => options.WithStrictOrdering()); + tokens.AssertTokensEqual(expectedTokens); } [Test] @@ -243,25 +233,150 @@ public void Parse_Link_ReturnsLinkToken() }; var tokens = _parser.Parse(input); - - tokens.Should().BeEquivalentTo(expectedTokens, options => options.WithStrictOrdering()); + + tokens.AssertTokensEqual(expectedTokens); } [Test] public void Parse_StrongTextWithLink_ReturnsCorrectTokens() { - var input = "__Посетите [сайт](https://example.com)__"; + var input = "_Посетите [сайт](https://example.com)_"; var expectedTokens = new[] { - Token.CreateStrong(true, 0), - Token.CreateText("Посетите ", 2), - Token.CreateLink("сайт", "https://example.com", 11), - Token.CreateStrong(false, 38) + Token.CreateItalic(true, 0), + Token.CreateText("Посетите ", 1), + Token.CreateLink("сайт", "https://example.com", 10), + Token.CreateItalic(false, 37) }; var tokens = _parser.Parse(input); - tokens.Should().BeEquivalentTo(expectedTokens, options => options.WithStrictOrdering()); + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithIncorrectItalicSpacing_ReturnsTextAsIs() + { + var input = "_подчерки _не считаются_"; + var expectedTokens = new[] + { + Token.CreateText("_подчерки ", 0), + Token.CreateItalic(true, 10), + Token.CreateText("не считаются", 11), + Token.CreateItalic(false, 23) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithUnderscoresWithinWords_NoFormattingApplied() + { + var input = "ра_зных сл_овах"; + var expectedTokens = new[] + { + Token.CreateText("ра_зных сл_овах", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithTrailingUnderscoresAfterWords_NoFormattingApplied() + { + var input = "эти_ подчерки_ не должны работать"; + var expectedTokens = new[] + { + Token.CreateText("эти_ подчерки_ не должны работать", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithUnpairedUnderscore_ReturnsTextAsIs() + { + var input = "Непарные_ символы"; + var expectedTokens = new[] + { + Token.CreateText("Непарные_ символы", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithUnderscoresAroundNumbers_NoFormattingApplied() + { + var input = "цифрами_12_3"; + var expectedTokens = new[] + { + Token.CreateText("цифрами_12_3", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithMultipleConsecutiveUnderscores_ReturnsTextAsIs() + { + var input = "____"; + var expectedTokens = new[] + { + Token.CreateText("____", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + + [Test] + public void Parse_WithEscapedCharacters_ReturnsTextWithEscapedSymbols() + { + var input = @"\_не_подчеркивается\_"; + var expectedTokens = new[] + { + Token.CreateText("_", 0), + Token.CreateText("не_подчеркивается", 2), + Token.CreateText("_", 19), + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithLineBreaksBreakingControlCharacters_ReturnsTextAsIs() + { + var input = "Это пример с разрывом _подчеркивания\nна новой строке_"; + var expectedTokens = new[] + { + Token.CreateText("Это пример с разрывом _подчеркивания\nна новой строке_", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); } } \ No newline at end of file diff --git a/cs/Markdown/MarkDown.cs b/cs/Markdown/MarkDown.cs index 0dc5a9f69..2cd44ff9f 100644 --- a/cs/Markdown/MarkDown.cs +++ b/cs/Markdown/MarkDown.cs @@ -1,4 +1,5 @@ using Markdown.interfaces; +using Markdown.MarkDownConverter; using Markdown.Parser; using Markdown.Parser.Interface; diff --git a/cs/Markdown/MarkDownConverter/IMarkdownConverter.cs b/cs/Markdown/MarkDownConverter/Interface/IMarkdownConverter.cs similarity index 58% rename from cs/Markdown/MarkDownConverter/IMarkdownConverter.cs rename to cs/Markdown/MarkDownConverter/Interface/IMarkdownConverter.cs index 2da855708..78132ab9b 100644 --- a/cs/Markdown/MarkDownConverter/IMarkdownConverter.cs +++ b/cs/Markdown/MarkDownConverter/Interface/IMarkdownConverter.cs @@ -2,5 +2,5 @@ public interface IMarkdownConverter { - string Convert(IEnumerable tokens); + string Convert(IEnumerable tokens); } \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/MarkdownConverter.cs b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs index ead6f550c..6d5b9df42 100644 --- a/cs/Markdown/MarkDownConverter/MarkdownConverter.cs +++ b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs @@ -1,109 +1,61 @@ using System.Text; using Markdown.interfaces; +using Markdown.MarkDownConverter.TagConverters; +using Markdown.Token; -namespace Markdown +namespace Markdown.MarkDownConverter; + +public class MarkdownConverter : IMarkdownConverter { - public class MarkdownConverter : IMarkdownConverter + private readonly IList tagConverters; + + public MarkdownConverter() { - public string Convert(IEnumerable tokens) + tagConverters = new List { - var result = new StringBuilder(); - var tagStack = new Stack(); - - foreach (var token in tokens) - { - switch (token.Type) - { - case TokenType.Text: - result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); - break; - - case TokenType.Strong: - case TokenType.Italic: - HandleFormattingTag(token, tagStack, result); - break; + new TextConverter(), + new StrongConverter(), + new ItalicConverter(), + new HeaderConverter(), + new LinkConverter() + }; + } - case TokenType.Header: - HandleHeaderTag(token, tagStack, result); - break; - case TokenType.Link: - HandleLinkToken(token, result); - break; - } - } - - while (tagStack.Count > 0) - { - var openTag = tagStack.Pop(); - result.Append(GetClosingTag(openTag)); - } + public string Convert(IEnumerable tokens) + { + var result = new StringBuilder(); + var tagStack = new Stack(); - return result.ToString(); - } - - private void HandleLinkToken(Token token, StringBuilder result) - { - result.Append($"{token.Text}"); - } - - private void HandleFormattingTag(Token token, Stack tagStack, StringBuilder result) + foreach (var token in tokens) { - if (token.State == TagState.Open) + var converter = tagConverters.FirstOrDefault(c => c.CanHandle(token.Type)); + if (converter != null) { - result.Append(GetOpeningTag(token.Type)); - tagStack.Push(token.Type); + converter.Handle(token, tagStack, result); } - else + else { - if (tagStack.Count > 0 && tagStack.Peek() == token.Type) - { - result.Append(GetClosingTag(token.Type)); - tagStack.Pop(); - } - else - { - result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); - } + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); } } - private void HandleHeaderTag(Token token, Stack tagStack, StringBuilder result) + while (tagStack.Count > 0) { - if (token.State == TagState.Open) - { - result.Append($""); - tagStack.Push(TokenType.Header); - return; - } - - if (tagStack.Count > 0 && tagStack.Peek() == TokenType.Header) - { - result.Append($""); - tagStack.Pop(); - return; - } - - result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + var openTag = tagStack.Pop(); + result.Append(GetClosingTag(openTag)); } - private string GetOpeningTag(TokenType type) - { - return type switch - { - TokenType.Strong => "", - TokenType.Italic => "", - _ => string.Empty - }; - } + return result.ToString(); + } - private string GetClosingTag(TokenType type) + private string GetClosingTag(TokenType type) + { + return type switch { - return type switch - { - TokenType.Strong => "", - TokenType.Italic => "", - _ => string.Empty - }; - } + TokenType.Strong => "", + TokenType.Italic => "", + TokenType.Header => "", + _ => string.Empty + }; } } \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/HeaderConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/HeaderConverter.cs new file mode 100644 index 000000000..bf554fd59 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/HeaderConverter.cs @@ -0,0 +1,30 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class HeaderConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Header; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append($""); + tagStack.Push(token.Type); + } + else + { + if (tagStack.Count > 0 && tagStack.Peek() == token.Type) + { + result.Append($""); + tagStack.Pop(); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/ITagConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/ITagConverter.cs new file mode 100644 index 000000000..ef429bce0 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/ITagConverter.cs @@ -0,0 +1,10 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public interface ITagConverter +{ + bool CanHandle(TokenType type); + void Handle(Token.Token token, Stack tagStack, StringBuilder result); +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/ItalicConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/ItalicConverter.cs new file mode 100644 index 000000000..ac42979ad --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/ItalicConverter.cs @@ -0,0 +1,30 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class ItalicConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Italic; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append(""); + tagStack.Push(token.Type); + } + else + { + if (tagStack.Count > 0 && tagStack.Peek() == token.Type) + { + result.Append(""); + tagStack.Pop(); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/LinkCoverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/LinkCoverter.cs new file mode 100644 index 000000000..25cbf6fb5 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/LinkCoverter.cs @@ -0,0 +1,14 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class LinkConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Link; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + result.Append($"{System.Net.WebUtility.HtmlEncode(token.Text)}"); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/StrongConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/StrongConverter.cs new file mode 100644 index 000000000..d1f842f79 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/StrongConverter.cs @@ -0,0 +1,30 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class StrongConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Strong; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append(""); + tagStack.Push(token.Type); + } + else + { + if (tagStack.Count > 0 && tagStack.Peek() == token.Type) + { + result.Append(""); + tagStack.Pop(); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/TextConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/TextConverter.cs new file mode 100644 index 000000000..7f807a0b3 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/TextConverter.cs @@ -0,0 +1,14 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class TextConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Text; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/Interface/IMarkdownParser.cs b/cs/Markdown/Parser/Interface/IMarkdownParser.cs index c3fe4d1fc..4d082200c 100644 --- a/cs/Markdown/Parser/Interface/IMarkdownParser.cs +++ b/cs/Markdown/Parser/Interface/IMarkdownParser.cs @@ -2,5 +2,5 @@ public interface IMarkdownParser { - IEnumerable Parse(string markdownText); + IEnumerable Parse(string markdownText); } diff --git a/cs/Markdown/Parser/Interface/ITokenHandler.cs b/cs/Markdown/Parser/Interface/ITokenHandler.cs index 7db772ba0..ee8196a52 100644 --- a/cs/Markdown/Parser/Interface/ITokenHandler.cs +++ b/cs/Markdown/Parser/Interface/ITokenHandler.cs @@ -1,6 +1,8 @@ -namespace Markdown.Parser.TokenHandler; +using Markdown.Token; + +namespace Markdown.Parser.Interface; public interface ITokenHandler { - bool TryHandle(ParsingContext context, out Token token, out int skip); + bool TryHandle(ParsingContext context, out Token.Token token, out int skip); } \ No newline at end of file diff --git a/cs/Markdown/Parser/MarkdownParser.cs b/cs/Markdown/Parser/MarkdownParser.cs index ba33df335..63c62f6b1 100644 --- a/cs/Markdown/Parser/MarkdownParser.cs +++ b/cs/Markdown/Parser/MarkdownParser.cs @@ -1,7 +1,9 @@ -using System.Text; + + +using System.Text; using Markdown.Parser.Interface; using Markdown.Parser.TokenHandler; -using Markdown.Parser.TokenHandler.Handlers; +using Markdown.Token; namespace Markdown.Parser; @@ -14,14 +16,19 @@ public MarkdownParser() handlers = TokenHandlerFactory.CreateHandlers(); } - public IEnumerable Parse(string text) + public IEnumerable Parse(string text) { - var tokens = new List(); + var tokens = new List(); var openTags = new Stack(); var textBuffer = new StringBuilder(); var textStart = 0; var position = 0; + if (text is null) + { + return tokens; + } + while (position < text.Length) { var context = new ParsingContext(text, position, openTags); @@ -36,7 +43,7 @@ public IEnumerable Parse(string text) if (textBuffer.Length > 0) { - tokens.Add(Token.CreateText(textBuffer.ToString(), textStart)); + tokens.Add(Token.Token.CreateText(textBuffer.ToString(), textStart)); textBuffer.Clear(); } @@ -58,7 +65,7 @@ public IEnumerable Parse(string text) } if (textBuffer.Length > 0) - tokens.Add(Token.CreateText(textBuffer.ToString(), textStart)); + tokens.Add(Token.Token.CreateText(textBuffer.ToString(), textStart)); return tokens; } diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs index a5f114ad5..613a3f9c7 100644 --- a/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs +++ b/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs @@ -1,4 +1,7 @@ -namespace Markdown.Parser.TokenHandler; +using Markdown.Parser.Interface; +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers; public abstract class BaseTokenHandler : ITokenHandler { @@ -9,7 +12,7 @@ public BaseTokenHandler(Delimiter delimiter) Delimiter = delimiter; } - public abstract bool TryHandle(ParsingContext context, out Token token, out int skip); + public abstract bool TryHandle(ParsingContext context, out Token.Token token, out int skip); protected bool IsMatch(string text, int position, string pattern) { diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs b/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs index 580dd48d9..f4e61c3f9 100644 --- a/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs +++ b/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs @@ -1,4 +1,6 @@ -namespace Markdown.Parser.TokenHandler; +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler; public class Delimiter { diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/EscapedCharacterHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/EscapedCharacterHandler.cs new file mode 100644 index 000000000..94175ccdc --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/EscapedCharacterHandler.cs @@ -0,0 +1,33 @@ +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers +{ + public class EscapedCharacterHandler : BaseTokenHandler + { + public EscapedCharacterHandler() : base(new Delimiter(@"\", @"\", TokenType.Escaped)) + { + } + + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) + { + token = null; + skip = 0; + + var text = context.Text; + var position = context.Position; + + if (!IsMatch(text, position, Delimiter.Opening)) + return false; + + if (position + 1 >= text.Length) + return false; + + var escapedChar = text[position + 1]; + token = Token.Token.CreateText(escapedChar.ToString(), position); + skip = 2; + + return true; + } + + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs index 5d13d0a05..850f9f6ae 100644 --- a/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs +++ b/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs @@ -1,4 +1,6 @@ -namespace Markdown.Parser.TokenHandler.Handlers; +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers; public class HeaderHandler : BaseTokenHandler { @@ -6,7 +8,7 @@ public HeaderHandler() : base(new Delimiter("#", "", TokenType.Header)) { } - public override bool TryHandle(ParsingContext context, out Token token, out int skip) + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) { token = null; skip = 0; @@ -15,20 +17,20 @@ public override bool TryHandle(ParsingContext context, out Token token, out int return false; var level = 1; - var pos = context.Position + 1; + var position = context.Position + 1; - while (pos < context.Text.Length && context.Text[pos] == '#' && level < 6) + while (position < context.Text.Length && context.Text[position] == '#' && level < 6) { level++; - pos++; + position++; } - if (pos >= context.Text.Length || context.Text[pos] != ' ') + if (position >= context.Text.Length || context.Text[position] != ' ') return false; context.OpenTags.Push(Delimiter.Type); - token = new Token( + token = new Token.Token( new string('#', level), TokenType.Header, TagState.Open, diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs index a9799467e..ae3cb4ecf 100644 --- a/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs +++ b/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs @@ -1,4 +1,6 @@ -namespace Markdown.Parser.TokenHandler.Handlers; +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers; public class LinkHandler : BaseTokenHandler { @@ -6,7 +8,7 @@ public LinkHandler() : base(new Delimiter("[", "]", TokenType.Link)) { } - public override bool TryHandle(ParsingContext context, out Token token, out int skip) + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) { token = null; skip = 0; @@ -20,15 +22,15 @@ public override bool TryHandle(ParsingContext context, out Token token, out int if (closingBracketIndex == -1 || closingBracketIndex + 1 >= text.Length || text[closingBracketIndex + 1] != '(') return false; - var closingParenIndex = text.IndexOf(')', closingBracketIndex + 1); - if (closingParenIndex == -1) + var closingParentIndex = text.IndexOf(')', closingBracketIndex + 1); + if (closingParentIndex == -1) return false; var linkText = text.Substring(position + 1, closingBracketIndex - position - 1); - var url = text.Substring(closingBracketIndex + 2, closingParenIndex - closingBracketIndex - 2); + var url = text.Substring(closingBracketIndex + 2, closingParentIndex - closingBracketIndex - 2); - token = new Token(linkText, TokenType.Link, TagState.Open, position, url: url); - skip = closingParenIndex - position + 1; + token = new Token.Token(linkText, TokenType.Link, TagState.Open, position, url: url); + skip = closingParentIndex - position + 1; return true; } diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs index 87746cc69..01ed79020 100644 --- a/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs +++ b/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs @@ -1,53 +1,142 @@ -namespace Markdown.Parser.TokenHandler.Handlers; +using Markdown.Token; -public class PairedTagHandler : BaseTokenHandler +namespace Markdown.Parser.TokenHandler.Handlers { - public PairedTagHandler(Delimiter delimiter) : base(delimiter) + public class PairedTagHandler : BaseTokenHandler { - } + public PairedTagHandler(Delimiter delimiter) : base(delimiter) + { + } - public override bool TryHandle(ParsingContext context, out Token token, out int skip) - { - token = null; - skip = 0; + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) + { + token = null; + skip = 0; + + var text = context.Text; + var position = context.Position; + + if (!IsOpeningDelimiter(text, position)) + return false; - if (!IsMatch(context.Text, context.Position, Delimiter.Opening)) - return false; + if (HasExcessiveDelimiters(text, position)) + return false; - var isClosing = false; - var tempStack = new Stack(); - - while (context.OpenTags.Count > 0) + if (IsClosingDelimiter(context, text, position, out token, out skip)) + return true; + + return IsOpeningPossible(context, text, position, out token, out skip); + } + + private bool IsOpeningDelimiter(string text, int position) { - var openTag = context.OpenTags.Pop(); - tempStack.Push(openTag); + return IsMatch(text, position, Delimiter.Opening); + } - if (openTag != Delimiter.Type) + private bool HasExcessiveDelimiters(string text, int position) + { + var delimiterCount = 0; + while (position + delimiterCount < text.Length && + IsMatch(text, position + delimiterCount, Delimiter.Opening)) { - continue; + delimiterCount++; } - - isClosing = true; - tempStack.Pop(); - break; + + return delimiterCount > Delimiter.Opening.Length; } - - while (tempStack.Count > 0) + + private bool IsClosingDelimiter(ParsingContext context, string text, int position, out Token.Token token, out int skip) { - context.OpenTags.Push(tempStack.Pop()); + token = null; + skip = 0; + + var isClosingPossible = context.OpenTags.Count > 0 && context.OpenTags.Peek() == Delimiter.Type; + if (!isClosingPossible) + { + return false; + } + + if (HasLetterOrDigitAfterDelimiter(text, position)) + return false; + + token = CreateClosingToken(position); + context.OpenTags.Pop(); + skip = Delimiter.Opening.Length; + return true; } - if (!isClosing) + private bool IsOpeningPossible(ParsingContext context, string text, int position, out Token.Token token, out int skip) { + token = null; + skip = 0; + + if (!DelimiterOpeningValid(context, text, position)) + return false; + + var closingPos = FindClosingDelimiter(text, position + Delimiter.Opening.Length); + if (closingPos == -1 || HasWhitespaceBeforeClosing(text, closingPos)) + return false; + + var innerText = text.Substring(position + Delimiter.Opening.Length, closingPos - (position + Delimiter.Opening.Length)); + if (innerText.Contains("\n") || innerText.Contains("\r")) + return false; + + token = CreateOpeningToken(position); context.OpenTags.Push(Delimiter.Type); + skip = Delimiter.Opening.Length; + return true; + } + + private bool DelimiterOpeningValid(ParsingContext context, string text, int position) + { + if (Delimiter.Opening.Length != 1) + { + return true; + } + + return !HasLetterBefore(text, position) && !HasWhitespaceAfter(text, position); + } + + private bool HasLetterBefore(string text, int position) + { + return position > 0 && char.IsLetterOrDigit(text[position - 1]); + } + + private bool HasWhitespaceAfter(string text, int position) + { + return position + Delimiter.Opening.Length >= text.Length || + char.IsWhiteSpace(text[position + Delimiter.Opening.Length]); + } + + private bool HasLetterOrDigitAfterDelimiter(string text, int position) + { + return position + Delimiter.Opening.Length < text.Length && + char.IsLetterOrDigit(text[position + Delimiter.Opening.Length]); } - token = new Token( - Delimiter.Opening, - Delimiter.Type, - isClosing ? TagState.Close : TagState.Open, - context.Position); - skip = Delimiter.Opening.Length; - return true; + private bool HasWhitespaceBeforeClosing(string text, int closingPos) + { + return closingPos > 0 && char.IsWhiteSpace(text[closingPos - 1]); + } + + private Token.Token CreateClosingToken(int position) + { + return new Token.Token(Delimiter.Closing, Delimiter.Type, TagState.Close, position); + } + + private Token.Token CreateOpeningToken(int position) + { + return new Token.Token(Delimiter.Opening, Delimiter.Type, TagState.Open, position); + } + + private int FindClosingDelimiter(string text, int startPos) + { + for (var i = startPos; i <= text.Length - Delimiter.Closing.Length; i++) + { + if (IsMatch(text, i, Delimiter.Closing)) + return i; + } + return -1; + } } -} +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs b/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs index f54282bdc..702b864c5 100644 --- a/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs +++ b/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs @@ -1,5 +1,6 @@ -using Markdown.Parser.TokenHandler.Handlers; -using static Markdown.Parser.TokenHandler.Delimiter; +using Markdown.Parser.Interface; +using Markdown.Parser.TokenHandler.Handlers; +using Markdown.Token; namespace Markdown.Parser.TokenHandler; @@ -10,11 +11,13 @@ public static class TokenHandlerFactory { TokenType.Strong, new Delimiter("__", "__", TokenType.Strong) }, { TokenType.Italic, new Delimiter("_", "_", TokenType.Italic) } }; - + + public static IList CreateHandlers() { return new List { + new EscapedCharacterHandler(), new LinkHandler(), new PairedTagHandler(Delimiters[TokenType.Strong]), new PairedTagHandler(Delimiters[TokenType.Italic]), diff --git a/cs/Markdown/Token/ParsingContext.cs b/cs/Markdown/Token/ParsingContext.cs index f59c0b1b8..d0f5f8d83 100644 --- a/cs/Markdown/Token/ParsingContext.cs +++ b/cs/Markdown/Token/ParsingContext.cs @@ -1,4 +1,6 @@ -namespace Markdown.Parser.TokenHandler; +using Markdown.Parser.TokenHandler; + +namespace Markdown.Token; public class ParsingContext { diff --git a/cs/Markdown/Token/TagState.cs b/cs/Markdown/Token/TagState.cs new file mode 100644 index 000000000..0b1c26073 --- /dev/null +++ b/cs/Markdown/Token/TagState.cs @@ -0,0 +1,7 @@ +namespace Markdown.Token; + +public enum TagState +{ + Open, + Close +} \ No newline at end of file diff --git a/cs/Markdown/Token/Token.cs b/cs/Markdown/Token/Token.cs index b82d67d0a..a5982c5c2 100644 --- a/cs/Markdown/Token/Token.cs +++ b/cs/Markdown/Token/Token.cs @@ -1,4 +1,6 @@ -using Markdown; +using Markdown.Parser.TokenHandler; + +namespace Markdown.Token; public class Token { diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs index 2d49b5009..955cbbe47 100644 --- a/cs/Markdown/Token/TokenType.cs +++ b/cs/Markdown/Token/TokenType.cs @@ -1,4 +1,4 @@ -namespace Markdown; +namespace Markdown.Token; public enum TokenType { @@ -6,11 +6,6 @@ public enum TokenType Strong, Italic, Header, - Link -} - -public enum TagState -{ - Open, - Close + Link, + Escaped } \ No newline at end of file