From 2720994886635db186083b87d84f2002524f43ac Mon Sep 17 00:00:00 2001 From: qreaqtor Date: Tue, 3 Dec 2024 00:44:29 +0500 Subject: [PATCH 1/2] markdown render with tests --- cs/Markdown.Tests/Markdown.Tests.csproj | 18 +++ cs/Markdown.Tests/Markdown.cs | 16 +++ cs/Markdown.Tests/Program.cs | 2 + cs/Markdown/Builder/HtmlBuilder.cs | 80 +++++++++++ cs/Markdown/Checker/TagChecker.cs | 85 +++++++++++ cs/Markdown/Config/MarkdownConfig.cs | 30 ++++ cs/Markdown/Domain/IHtmlBuilder.cs | 13 ++ cs/Markdown/Domain/IMarkdownParser.cs | 13 ++ cs/Markdown/Domain/ITagChecker.cs | 14 ++ cs/Markdown/Domain/Tags/HtmlTag.cs | 24 ++++ cs/Markdown/Domain/Tags/MdTag.cs | 20 +++ cs/Markdown/Domain/Tags/Tag.cs | 17 +++ cs/Markdown/Domain/Token.cs | 25 ++++ cs/Markdown/Extensions/HtmlTagExtensions.cs | 23 +++ cs/Markdown/Markdown.csproj | 17 +++ cs/Markdown/Md.cs | 33 +++++ cs/Markdown/Md_Test_Cases.cs | 150 ++++++++++++++++++++ cs/Markdown/Md_Tests.cs | 53 +++++++ cs/Markdown/Parser/MarkdownParser.cs | 146 +++++++++++++++++++ cs/Markdown/Program.cs | 2 + cs/clean-code.sln | 13 +- 21 files changed, 792 insertions(+), 2 deletions(-) create mode 100644 cs/Markdown.Tests/Markdown.Tests.csproj create mode 100644 cs/Markdown.Tests/Markdown.cs create mode 100644 cs/Markdown.Tests/Program.cs create mode 100644 cs/Markdown/Builder/HtmlBuilder.cs create mode 100644 cs/Markdown/Checker/TagChecker.cs create mode 100644 cs/Markdown/Config/MarkdownConfig.cs create mode 100644 cs/Markdown/Domain/IHtmlBuilder.cs create mode 100644 cs/Markdown/Domain/IMarkdownParser.cs create mode 100644 cs/Markdown/Domain/ITagChecker.cs create mode 100644 cs/Markdown/Domain/Tags/HtmlTag.cs create mode 100644 cs/Markdown/Domain/Tags/MdTag.cs create mode 100644 cs/Markdown/Domain/Tags/Tag.cs create mode 100644 cs/Markdown/Domain/Token.cs create mode 100644 cs/Markdown/Extensions/HtmlTagExtensions.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/Md.cs create mode 100644 cs/Markdown/Md_Test_Cases.cs create mode 100644 cs/Markdown/Md_Tests.cs create mode 100644 cs/Markdown/Parser/MarkdownParser.cs create mode 100644 cs/Markdown/Program.cs diff --git a/cs/Markdown.Tests/Markdown.Tests.csproj b/cs/Markdown.Tests/Markdown.Tests.csproj new file mode 100644 index 000000000..a2cb5bd2e --- /dev/null +++ b/cs/Markdown.Tests/Markdown.Tests.csproj @@ -0,0 +1,18 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + diff --git a/cs/Markdown.Tests/Markdown.cs b/cs/Markdown.Tests/Markdown.cs new file mode 100644 index 000000000..7361dd02b --- /dev/null +++ b/cs/Markdown.Tests/Markdown.cs @@ -0,0 +1,16 @@ +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Builder; +using Markdown.Checker; +using Markdown.Config; +using Markdown.Parser; +using FluentAssertions; + +namespace Markdown.Tests +{ + +} diff --git a/cs/Markdown.Tests/Program.cs b/cs/Markdown.Tests/Program.cs new file mode 100644 index 000000000..3751555cb --- /dev/null +++ b/cs/Markdown.Tests/Program.cs @@ -0,0 +1,2 @@ +// See https://aka.ms/new-console-template for more information +Console.WriteLine("Hello, World!"); diff --git a/cs/Markdown/Builder/HtmlBuilder.cs b/cs/Markdown/Builder/HtmlBuilder.cs new file mode 100644 index 000000000..900042ac0 --- /dev/null +++ b/cs/Markdown/Builder/HtmlBuilder.cs @@ -0,0 +1,80 @@ +using Markdown.Domain.Tags; +using Markdown.Domain; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Extensions; + +namespace Markdown.Builder +{ + public class HtmlBuilder : IHtmlBuilder + { + private readonly Dictionary _htmlTagsMarkupDict; + + private int shift; + + public HtmlBuilder(Dictionary htmlTagsMarkupDict) + { + _htmlTagsMarkupDict = htmlTagsMarkupDict; + } + + public string BuildHtmlFromMarkdown(string markdownText, List tokens) + { + var htmlResultText = new StringBuilder(markdownText); + var htmlTags = ConvertToHtmlTags(tokens); + shift = 0; + + foreach (var tag in htmlTags) + { + ReplaceMarkdownWithHtml(htmlResultText, tag); + shift = htmlResultText.Length - markdownText.Length; + } + + return htmlResultText.ToString(); + } + + private void ReplaceMarkdownWithHtml(StringBuilder htmlResultText, HtmlTag tag) + { + var mdTagLength = GetMdTagLength(tag); + + htmlResultText.Remove(tag.Index + shift, mdTagLength); + + htmlResultText.Insert(tag.Index + shift, tag.GetMarkup()); + } + + private int GetMdTagLength(HtmlTag tag) + { + if (tag.Tag == Tag.Bold) + { + shift--; + return 2; + } + + if (tag.IsClosing && (tag.Tag == Tag.Header || tag.Tag == Tag.EscapedSymbol)) + { + shift++; + return 0; + } + + return 1; + } + + private List ConvertToHtmlTags(List tokens) + { + var htmlTags = new List(); + + foreach (var token in tokens) + { + var htmlMarkup = _htmlTagsMarkupDict[token.TagType]; + var tag = token.TagType; + + htmlTags.Add(new HtmlTag(tag, token.StartIndex, false, htmlMarkup)); + htmlTags.Add(new HtmlTag(tag, token.EndIndex, true, htmlMarkup)); + } + + return htmlTags.OrderBy(tag => tag.Index).ToList(); + } + } +} diff --git a/cs/Markdown/Checker/TagChecker.cs b/cs/Markdown/Checker/TagChecker.cs new file mode 100644 index 000000000..f78f16638 --- /dev/null +++ b/cs/Markdown/Checker/TagChecker.cs @@ -0,0 +1,85 @@ +using Markdown.Domain.Tags; +using Markdown.Domain; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Checker +{ + public class TagChecker : ITagChecker + { + private int currentIndex; + + public const char HASH_SYMBOL = '#'; + public const char UNDERSCORE_SYMBOL = '_'; + public const char SLASH_SYMBOL = '\\'; + + public Tuple GetTagType(string line, int index) + { + currentIndex = index; + var tag = Tag.None; + + switch (line[index]) + { + case UNDERSCORE_SYMBOL: + tag = GetTagForUnderscore(line); + break; + case SLASH_SYMBOL: + tag = GetTagForSlash(line); + break; + case HASH_SYMBOL: + if (index == 0) + tag = Tag.Header; + break; + } + + return Tuple.Create(tag, currentIndex); + } + + private Tag GetTagForSlash(string line) + { + if (currentIndex < line.Length - 1 + && (line[currentIndex + 1] == SLASH_SYMBOL + || line[currentIndex + 1] == UNDERSCORE_SYMBOL + || line[currentIndex + 1] == HASH_SYMBOL)) + { + return Tag.EscapedSymbol; + } + + return Tag.None; + } + + private Tag GetTagForUnderscore(string line) + { + if (currentIndex < line.Length - 1 && line[currentIndex + 1] == UNDERSCORE_SYMBOL) + return GetTagWithMultipleUnderscores(line); + + return Tag.Italic; + } + + private Tag GetTagWithMultipleUnderscores(string line) + { + if (currentIndex < line.Length - 2 && line[currentIndex + 2] == UNDERSCORE_SYMBOL) + { + currentIndex = FindEndOfInvalidTag(line); + return Tag.None; + } + + currentIndex++; + + return Tag.Bold; + } + + private int FindEndOfInvalidTag(string line) + { + var endIndex = currentIndex; + + while (endIndex < line.Length && line[endIndex] == UNDERSCORE_SYMBOL) + endIndex++; + + return endIndex; + } + } +} diff --git a/cs/Markdown/Config/MarkdownConfig.cs b/cs/Markdown/Config/MarkdownConfig.cs new file mode 100644 index 000000000..418f11d2d --- /dev/null +++ b/cs/Markdown/Config/MarkdownConfig.cs @@ -0,0 +1,30 @@ +using Markdown.Domain.Tags; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Config +{ + public class MarkdownConfig + { + public static Dictionary HtmlTags => new Dictionary() + { + { Tag.Bold, "strong" }, { Tag.Italic, "em" }, + { Tag.Header, "h1" }, { Tag.EscapedSymbol, "" } + }; + + public static Dictionary MdTags => new Dictionary() + { + { Tag.Bold, "__" }, { Tag.Italic, "_" }, + { Tag.Header, "# " }, { Tag.EscapedSymbol, ""} + }; + + public static Dictionary DifferentTags => new Dictionary() + { + {Tag.Bold, Tag.Italic}, + {Tag.Italic, Tag.Bold} + }; + } +} diff --git a/cs/Markdown/Domain/IHtmlBuilder.cs b/cs/Markdown/Domain/IHtmlBuilder.cs new file mode 100644 index 000000000..5b4d6b72c --- /dev/null +++ b/cs/Markdown/Domain/IHtmlBuilder.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Domain +{ + public interface IHtmlBuilder + { + string BuildHtmlFromMarkdown(string markdownText, List tokens); + } +} diff --git a/cs/Markdown/Domain/IMarkdownParser.cs b/cs/Markdown/Domain/IMarkdownParser.cs new file mode 100644 index 000000000..46a5375d4 --- /dev/null +++ b/cs/Markdown/Domain/IMarkdownParser.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Domain +{ + public interface IMarkdownParser + { + List ParseMarkdown(string line); + } +} diff --git a/cs/Markdown/Domain/ITagChecker.cs b/cs/Markdown/Domain/ITagChecker.cs new file mode 100644 index 000000000..835e393b9 --- /dev/null +++ b/cs/Markdown/Domain/ITagChecker.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Domain.Tags; + +namespace Markdown.Domain +{ + public interface ITagChecker + { + Tuple GetTagType(string line, int index); + } +} diff --git a/cs/Markdown/Domain/Tags/HtmlTag.cs b/cs/Markdown/Domain/Tags/HtmlTag.cs new file mode 100644 index 000000000..8c7dffbb4 --- /dev/null +++ b/cs/Markdown/Domain/Tags/HtmlTag.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Domain.Tags +{ + public class HtmlTag + { + public Tag Tag { get; } + public int Index { get; } + public bool IsClosing { get; } + public string Markup { get; } + + public HtmlTag(Tag tag, int index, bool isClosing, string htmlMarkup) + { + Tag = tag; + Index = index; + IsClosing = isClosing; + Markup = htmlMarkup; + } + } +} diff --git a/cs/Markdown/Domain/Tags/MdTag.cs b/cs/Markdown/Domain/Tags/MdTag.cs new file mode 100644 index 000000000..1003614fb --- /dev/null +++ b/cs/Markdown/Domain/Tags/MdTag.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Domain.Tags +{ + public class MdTag + { + public Tag Tag { get; } + public int Index { get; } + + public MdTag(Tag tag, int index) + { + Tag = tag; + Index = index; + } + } +} diff --git a/cs/Markdown/Domain/Tags/Tag.cs b/cs/Markdown/Domain/Tags/Tag.cs new file mode 100644 index 000000000..9ab8f0101 --- /dev/null +++ b/cs/Markdown/Domain/Tags/Tag.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Domain.Tags +{ + public enum Tag + { + None, + Bold, + Italic, + Header, + EscapedSymbol, + } +} diff --git a/cs/Markdown/Domain/Token.cs b/cs/Markdown/Domain/Token.cs new file mode 100644 index 000000000..88412b087 --- /dev/null +++ b/cs/Markdown/Domain/Token.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Domain.Tags; + +namespace Markdown.Domain +{ + public class Token + { + public Tag TagType { get; } + public int StartIndex { get; } + public int EndIndex { get; } + public bool IsSingleTag { get; } + + public Token(Tag type, int startIndex, int endIndex, bool isSingleTag = false) + { + TagType = type; + IsSingleTag = isSingleTag; + StartIndex = startIndex; + EndIndex = endIndex; + } + } +} diff --git a/cs/Markdown/Extensions/HtmlTagExtensions.cs b/cs/Markdown/Extensions/HtmlTagExtensions.cs new file mode 100644 index 000000000..853d8ed5d --- /dev/null +++ b/cs/Markdown/Extensions/HtmlTagExtensions.cs @@ -0,0 +1,23 @@ +using Markdown.Domain.Tags; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Extensions +{ + public static class HtmlTagExtensions + { + public static string GetMarkup(this HtmlTag htmlTag) + { + if (htmlTag.Tag == Tag.EscapedSymbol) + return ""; + + var format = htmlTag.IsClosing ? "" : "<{0}>"; + + return string.Format(format, htmlTag.Markup); + } + } +} diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..aea3b1122 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,17 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..9ff3168b2 --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,33 @@ +using Markdown.Domain; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + public class Md + { + private readonly IMarkdownParser _parser; + private readonly IHtmlBuilder _converter; + public Md(IMarkdownParser parser, IHtmlBuilder htmlBuilder) + { + _parser = parser; + _converter = htmlBuilder; + } + + public string Render(string markdownText) + { + if (string.IsNullOrEmpty(markdownText)) + { + throw new ArgumentNullException("Provided string was empty"); + } + + var tokens = _parser.ParseMarkdown(markdownText); + var htmlText = _converter.BuildHtmlFromMarkdown(markdownText, tokens); + + return htmlText; + } + } +} diff --git a/cs/Markdown/Md_Test_Cases.cs b/cs/Markdown/Md_Test_Cases.cs new file mode 100644 index 000000000..597904a23 --- /dev/null +++ b/cs/Markdown/Md_Test_Cases.cs @@ -0,0 +1,150 @@ +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + public static class Md_Test_Cases + { + public static IEnumerable SimpleTests + { + get + { + yield return new TestCaseData("_Hello_", "Hello"). + SetName("Correct conversion of a italic tags"); + + yield return new TestCaseData("#Hello", "

Hello

"). + SetName("Correct conversion of a header tags"); + + yield return new TestCaseData("__Hello__", "Hello"). + SetName("Correct conversion of bold tags"); + } + } + + public static IEnumerable TestsWhenSpacesBetweenTagAndWord + { + get + { + yield return new TestCaseData("_ Hello_", "_ Hello_"). + SetName("Correct conversion when there is a space after the opening italic tag"); + + yield return new TestCaseData("_Hello _", "_Hello _"). + SetName("Correct conversion if there is a space before the closing italic tag"); + + yield return new TestCaseData("__Hello __", "__Hello __"). + SetName("Correct conversion if there is a space before the closing bold tag"); + + yield return new TestCaseData("__ Hello__", "__ Hello__"). + SetName("Correct conversion when there is a space after the opening bold tag"); + } + } + + public static IEnumerable TestsWhenPairTagNotExists + { + get + { + yield return new TestCaseData("_Hello", "_Hello"). + SetName("Correct conversion if there is no closing italic tag"); + + yield return new TestCaseData("Hello_", "Hello_"). + SetName("Correct conversion if there is no opening italic tag"); + + yield return new TestCaseData("__Hello", "__Hello"). + SetName("Correct conversion if there is no closing bold tag"); + + yield return new TestCaseData("Hello__", "Hello__"). + SetName("Correct conversion if there is no opening bold tag"); + + yield return new TestCaseData("_Hello_world_", "Helloworld_"). + SetName("Correct conversion if there are several italic tags and there are extra ones"); + + yield return new TestCaseData("_Hello_wor_ld_", "Helloworld"). + SetName("Correct conversion if there are several tags"); + + yield return new TestCaseData("Hel_lo w_orld", "Hel_lo w_orld"). + SetName("No highlighting if the italic tags are in different words"); + + yield return new TestCaseData("Hel__lo w__orld", "Hel__lo w__orld"). + SetName("No highlighting if the bold tags are in different words"); + + yield return new TestCaseData("__Hello_ world", "__Hello_ world"). + SetName("Correct conversion of unpaired tags"); + } + } + + public static IEnumerable TestsWhenPairTagsInDifferentWords + { + get + { + yield return new TestCaseData("Hel_lo w_orld", "Hel_lo w_orld"). + SetName("No highlighting if the italic tags are in different words"); + + yield return new TestCaseData("Hel__lo w__orld", "Hel__lo w__orld"). + SetName("No highlighting if the bold tags are in different words"); + } + } + + public static IEnumerable TestsWhenTagsHighlightPartOfWord + { + get + { + yield return new TestCaseData("_Hell_o _w_orld", "Hello world"). + SetName("Correct highlighting if italic tags highlight part of a word"); + + yield return new TestCaseData("__Hell__o __w__orld", "Hello world"). + SetName("Correct highlighting if bold tags highlight part of a word"); + } + } + + public static IEnumerable TestsWithoutWords + { + get + { + yield return new TestCaseData("__", "__"). + SetName("Don't convert tags if there is nothing inside them"); + + yield return new TestCaseData("____", "____"). + SetName("Don't convert tags if there is nothing inside them"); + + yield return new TestCaseData("__ __", "__ __"). + SetName("Don't convert tags if there is only a space inside them"); + } + } + + public static IEnumerable TestsWithIntersectionTags + { + get + { + yield return new TestCaseData("H __e _l_ l__ o", "H e l l o"). + SetName("Correct converting if there are italic tags inside bold tags"); + + yield return new TestCaseData("H _e __l__ l_ o", "H e __l__ l o"). + SetName("Correct converting if there are bold tags inside italic tags"); + } + } + + public static IEnumerable TestsWithEscape + { + get + { + yield return new TestCaseData(@"\Hello\", @"\Hello\"). + SetName("Correct conversion if the escape symbol does not escape anything"); + + yield return new TestCaseData(@"\_Hello_", @"_Hello_"). + SetName("Correct conversion if the escaping character escapes the italic tag"); + + yield return new TestCaseData(@"\__Hello__", @"__Hello__"). + SetName("Correct conversion if the escaping character escapes the bold tag"); + + yield return new TestCaseData(@"\\_Hello_", @"\Hello"). + SetName("Correct conversion if the escaping character escapes another escaping character"); + + yield return new TestCaseData(@"\# Hello", @"# Hello"). + SetName("Correct conversion if the escaping character escapes the header tag"); + } + } + } +} diff --git a/cs/Markdown/Md_Tests.cs b/cs/Markdown/Md_Tests.cs new file mode 100644 index 000000000..bac786d0e --- /dev/null +++ b/cs/Markdown/Md_Tests.cs @@ -0,0 +1,53 @@ +using FluentAssertions; +using Markdown.Builder; +using Markdown.Checker; +using Markdown.Config; +using Markdown.Parser; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + [TestFixture] + public class Markdown_Tests + { + private Md md; + + [SetUp] + public void Setup() + { + var builder = new HtmlBuilder(MarkdownConfig.HtmlTags); + var parser = new MarkdownParser(new TagChecker(), MarkdownConfig.DifferentTags, MarkdownConfig.MdTags); + md = new Md(parser, builder); + } + + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.SimpleTests))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWhenPairTagNotExists))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWhenPairTagsInDifferentWords))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWhenSpacesBetweenTagAndWord))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWhenTagsHighlightPartOfWord))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWithEscape))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWithIntersectionTags))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWithoutWords))] + public void Correct_Render_WhenInputValidString(string markdownText, string htmlText) + { + md.Render(markdownText).Should().Be(htmlText); + } + + [TestCase("", TestName = "Return empty string if string empty")] + [TestCase(null, TestName = "Return empty string if string null")] + public void ThrowArgumentException_When_InvalidInputString(string markdownText) + { + Action action = () => + { + var htmlText = md.Render(markdownText); + }; + + action.Should().Throw(); + } + } +} diff --git a/cs/Markdown/Parser/MarkdownParser.cs b/cs/Markdown/Parser/MarkdownParser.cs new file mode 100644 index 000000000..9aec24f7a --- /dev/null +++ b/cs/Markdown/Parser/MarkdownParser.cs @@ -0,0 +1,146 @@ +using Markdown.Domain.Tags; +using Markdown.Domain; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Parser +{ + public class MarkdownParser : IMarkdownParser + { + private readonly ITagChecker _tagChecker; + private readonly Stack needClosingTags; + private readonly Queue offsetTags; + + private readonly Dictionary _differentTagTypes; + private readonly Dictionary _mdTags; + + private int currentIndex; + + public MarkdownParser(ITagChecker tagChecker, Dictionary differentTagTypes, Dictionary mdTags) + { + _tagChecker = tagChecker; + + _differentTagTypes = differentTagTypes; + _mdTags = mdTags; + + needClosingTags = new Stack(); + offsetTags = new Queue(); + } + + public List ParseMarkdown(string markdownText) + { + var lines = markdownText.Split('\n'); + var fountedTokens = new List(); + + foreach (var line in lines) + { + needClosingTags.Clear(); + offsetTags.Clear(); + + currentIndex = 0; + + SearchTokensInLine(line, fountedTokens); + } + + return fountedTokens; + } + + private void SearchTokensInLine(string line, List fountedTokens) + { + while (currentIndex < line.Length) + { + var tagTypeIndex = _tagChecker.GetTagType(line, currentIndex); + currentIndex = tagTypeIndex.Item2; + AnalyzeTag(line, tagTypeIndex.Item1, fountedTokens); + + currentIndex++; + } + } + + private void AnalyzeTag(string line, Tag tagType, List fountedTokens) + { + switch (tagType) + { + case Tag.Header: + fountedTokens.Add(new Token(Tag.Header, 0, line.Length - 1)); + break; + case Tag.EscapedSymbol: + fountedTokens.Add(new Token(Tag.EscapedSymbol, currentIndex, currentIndex)); + currentIndex += 1; + break; + case Tag.None: + break; + default: + TryAddToken(tagType, line, fountedTokens); + break; + } + } + + private void TryAddToken(Tag tagType, string line, List fountedTokens) + { + var openingTag = FindOpeningTag(tagType, currentIndex); + + if (openingTag.Tag == Tag.None) + { + HandleNotATag(tagType, line); + return; + } + + HandleExistingTag(tagType, line, fountedTokens, openingTag); + } + + private void HandleExistingTag(Tag tagType, string line, List fountedTokens, MdTag openingTag) + { + var token = new Token(tagType, openingTag.Index, currentIndex); + + if (IsPossibleToAdd(token, line)) + { + fountedTokens.Add(token); + return; + } + + if (offsetTags.Count > 0 && offsetTags.Peek() == tagType) + { + needClosingTags.Push(new MdTag(tagType, currentIndex)); + offsetTags.Dequeue(); + } + } + + private void HandleNotATag(Tag tagType, string line) + { + if (currentIndex < line.Length - 1 && !char.IsWhiteSpace(line[currentIndex + 1])) + needClosingTags.Push(new MdTag(tagType, currentIndex)); + } + + private bool IsPossibleToAdd(Token token, string line) + { + var shift = _mdTags[token.TagType].Length; + var diffTagType = _differentTagTypes[token.TagType]; + var anyWhiteSpace = line.Substring(token.StartIndex + 1, token.EndIndex - token.StartIndex - 1).Any(char.IsWhiteSpace); + + return !(char.IsWhiteSpace(line[token.EndIndex - shift]) + || offsetTags.Dequeue() == diffTagType + || token.EndIndex < line.Length - 1 && !char.IsWhiteSpace(line[token.EndIndex + 1]) && anyWhiteSpace + || token.TagType == Tag.Bold && needClosingTags.Any(tag => tag.Tag == diffTagType) + || token.StartIndex - 1 > 0 && !char.IsWhiteSpace(line[token.StartIndex - shift]) && anyWhiteSpace + ); + } + + private MdTag FindOpeningTag(Tag tagType, int index) + { + var openingTag = new MdTag(Tag.None, index); + + while (needClosingTags.Any(tag => tag.Tag == tagType)) + { + var removeClosingTag = needClosingTags.Pop(); + openingTag = new MdTag(removeClosingTag.Tag, removeClosingTag.Index); + offsetTags.Enqueue(removeClosingTag.Tag); + } + + return openingTag; + } + } +} diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..3751555cb --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,2 @@ +// See https://aka.ms/new-console-template for more information +Console.WriteLine("Hello, World!"); diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..c2a57c810 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 14 -VisualStudioVersion = 14.0.25420.1 +# Visual Studio Version 17 +VisualStudioVersion = 17.12.35521.163 d17.12 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Chess", "Chess\Chess.csproj", "{DBFBE40E-EE0C-48F4-8763-EBD11C960081}" EndProject @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{0AC75194-2382-496F-B7F9-F07EB1620A8A}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,12 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {0AC75194-2382-496F-B7F9-F07EB1620A8A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0AC75194-2382-496F-B7F9-F07EB1620A8A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0AC75194-2382-496F-B7F9-F07EB1620A8A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0AC75194-2382-496F-B7F9-F07EB1620A8A}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE EndGlobalSection EndGlobal From 778bdda446debe719a16f4caef4d58f8d329abb2 Mon Sep 17 00:00:00 2001 From: qreaqtor Date: Sun, 8 Dec 2024 04:15:02 +0500 Subject: [PATCH 2/2] refactoring and update tests --- cs/Markdown/Extensions/StringExtensions.cs | 21 ++++++++++++ cs/Markdown/Md_Test_Cases.cs | 37 ++++++++++++++++++++++ cs/Markdown/Md_Tests.cs | 31 ++++++++++++++++-- cs/Markdown/Parser/MarkdownParser.cs | 31 ++++++++++++------ 4 files changed, 108 insertions(+), 12 deletions(-) create mode 100644 cs/Markdown/Extensions/StringExtensions.cs diff --git a/cs/Markdown/Extensions/StringExtensions.cs b/cs/Markdown/Extensions/StringExtensions.cs new file mode 100644 index 000000000..37381588f --- /dev/null +++ b/cs/Markdown/Extensions/StringExtensions.cs @@ -0,0 +1,21 @@ +using Markdown.Domain.Tags; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Extensions +{ + public static class StringExtensions + { + public static bool SubstringContainsAny(this string s, int start, int length, Func check) + { + for (var i = 0; i < length; i++) + if (check(s[start+i])) + return true; + + return false; + } + } +} diff --git a/cs/Markdown/Md_Test_Cases.cs b/cs/Markdown/Md_Test_Cases.cs index 597904a23..f43b41c24 100644 --- a/cs/Markdown/Md_Test_Cases.cs +++ b/cs/Markdown/Md_Test_Cases.cs @@ -123,6 +123,9 @@ public static IEnumerable TestsWithIntersectionTags yield return new TestCaseData("H _e __l__ l_ o", "H e __l__ l o"). SetName("Correct converting if there are bold tags inside italic tags"); + + yield return new TestCaseData("H _e __l_ l__ o", "H _e __l_ l__ o"). + SetName("Correct converting if there are intersection of tags"); } } @@ -133,6 +136,9 @@ public static IEnumerable TestsWithEscape yield return new TestCaseData(@"\Hello\", @"\Hello\"). SetName("Correct conversion if the escape symbol does not escape anything"); + yield return new TestCaseData(@"\\\_Hello_", @"\_Hello_"). + SetName("Correct conversion with multiply escaping"); + yield return new TestCaseData(@"\_Hello_", @"_Hello_"). SetName("Correct conversion if the escaping character escapes the italic tag"); @@ -146,5 +152,36 @@ public static IEnumerable TestsWithEscape SetName("Correct conversion if the escaping character escapes the header tag"); } } + + public static IEnumerable BigTests + { + get + { + yield return new TestCaseData( + "#заголовок __с жирным текстом__ \n" + + "Просто текст, в котором _курсивные_ выделения\n" + + "__Есть жирный текст__\n" + + "__А вот жирный текст _с курсивом_ внутри _и ещё курсив_ в жирном__\n" + + "_Вот __это_ не__ сработает\n" + + "_И вот так __тоже__ нет_\n" + + "Это - _ - просто подчёркивание\n" + + "Так_ не работает_\n" + + "И _ вот так _ тоже\n" + + "В с_лов_е можно выделять, а в цифрах 1_23_ нет\n", + + "

заголовок с жирным текстом

\n" + + "Просто текст, в котором курсивные выделения\n" + + "Есть жирный текст\n" + + "А вот жирный текст с курсивом внутри и ещё курсив в жирном\n" + + "_Вот __это_ не__ сработает\n" + + "И вот так __тоже__ нет\n" + + "Это - _ - просто подчёркивание\n" + + "Так_ не работает_\n" + + "И _ вот так _ тоже\n" + + "В слове можно выделять, а в цифрах 1_23_ нет\n" + ). + SetName("Correct conversion with multiply lines"); + } + } } } diff --git a/cs/Markdown/Md_Tests.cs b/cs/Markdown/Md_Tests.cs index bac786d0e..5588af6d0 100644 --- a/cs/Markdown/Md_Tests.cs +++ b/cs/Markdown/Md_Tests.cs @@ -6,6 +6,7 @@ using NUnit.Framework; using System; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; @@ -33,13 +34,14 @@ public void Setup() [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWithEscape))] [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWithIntersectionTags))] [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.TestsWithoutWords))] + [TestCaseSource(typeof(Md_Test_Cases), nameof(Md_Test_Cases.BigTests))] public void Correct_Render_WhenInputValidString(string markdownText, string htmlText) { md.Render(markdownText).Should().Be(htmlText); } - [TestCase("", TestName = "Return empty string if string empty")] - [TestCase(null, TestName = "Return empty string if string null")] + [TestCase("", TestName = "Throw exception if string empty")] + [TestCase(null, TestName = "Throw exception if string null")] public void ThrowArgumentException_When_InvalidInputString(string markdownText) { Action action = () => @@ -49,5 +51,30 @@ public void ThrowArgumentException_When_InvalidInputString(string markdownText) action.Should().Throw(); } + + [TestCase(@"#заголовок с __жирным__ и _курсивным_ текстом, а еще \\\_экранированием\\\n", 5)] + public void CheckRenderHaveLinearTimeComplexity(string text, int count) + { + var linearCoefficient = 2; + var timer = new Stopwatch(); + + timer.Start(); + md.Render(text); + timer.Stop(); + + var previous = timer.ElapsedTicks; + + for (var i = 0; i < count; i++) + { + text += text; + + timer.Restart(); + md.Render(text); + timer.Stop(); + + Assert.That(timer.ElapsedTicks / previous, Is.LessThanOrEqualTo(linearCoefficient)); + previous = timer.ElapsedTicks; + } + } } } diff --git a/cs/Markdown/Parser/MarkdownParser.cs b/cs/Markdown/Parser/MarkdownParser.cs index 9aec24f7a..f6c6d83eb 100644 --- a/cs/Markdown/Parser/MarkdownParser.cs +++ b/cs/Markdown/Parser/MarkdownParser.cs @@ -5,12 +5,14 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using Markdown.Extensions; namespace Markdown.Parser { public class MarkdownParser : IMarkdownParser { private readonly ITagChecker _tagChecker; + private readonly Stack needClosingTags; private readonly Queue offsetTags; @@ -18,6 +20,7 @@ public class MarkdownParser : IMarkdownParser private readonly Dictionary _mdTags; private int currentIndex; + private int offset; public MarkdownParser(ITagChecker tagChecker, Dictionary differentTagTypes, Dictionary mdTags) { @@ -35,6 +38,8 @@ public List ParseMarkdown(string markdownText) var lines = markdownText.Split('\n'); var fountedTokens = new List(); + offset = 0; + foreach (var line in lines) { needClosingTags.Clear(); @@ -43,6 +48,8 @@ public List ParseMarkdown(string markdownText) currentIndex = 0; SearchTokensInLine(line, fountedTokens); + + offset += line.Length + 1; } return fountedTokens; @@ -65,10 +72,10 @@ private void AnalyzeTag(string line, Tag tagType, List fountedTokens) switch (tagType) { case Tag.Header: - fountedTokens.Add(new Token(Tag.Header, 0, line.Length - 1)); + fountedTokens.Add(new Token(Tag.Header, offset, offset+line.Length - 1)); break; case Tag.EscapedSymbol: - fountedTokens.Add(new Token(Tag.EscapedSymbol, currentIndex, currentIndex)); + fountedTokens.Add(new Token(Tag.EscapedSymbol, offset+currentIndex, offset + currentIndex)); currentIndex += 1; break; case Tag.None: @@ -81,7 +88,7 @@ private void AnalyzeTag(string line, Tag tagType, List fountedTokens) private void TryAddToken(Tag tagType, string line, List fountedTokens) { - var openingTag = FindOpeningTag(tagType, currentIndex); + var openingTag = FindOpeningTag(tagType, offset+currentIndex); if (openingTag.Tag == Tag.None) { @@ -94,7 +101,7 @@ private void TryAddToken(Tag tagType, string line, List fountedTokens) private void HandleExistingTag(Tag tagType, string line, List fountedTokens, MdTag openingTag) { - var token = new Token(tagType, openingTag.Index, currentIndex); + var token = new Token(tagType, openingTag.Index, offset + currentIndex); if (IsPossibleToAdd(token, line)) { @@ -104,7 +111,7 @@ private void HandleExistingTag(Tag tagType, string line, List fountedToke if (offsetTags.Count > 0 && offsetTags.Peek() == tagType) { - needClosingTags.Push(new MdTag(tagType, currentIndex)); + needClosingTags.Push(new MdTag(tagType, offset+currentIndex)); offsetTags.Dequeue(); } } @@ -112,20 +119,24 @@ private void HandleExistingTag(Tag tagType, string line, List fountedToke private void HandleNotATag(Tag tagType, string line) { if (currentIndex < line.Length - 1 && !char.IsWhiteSpace(line[currentIndex + 1])) - needClosingTags.Push(new MdTag(tagType, currentIndex)); + needClosingTags.Push(new MdTag(tagType, offset + currentIndex)); } private bool IsPossibleToAdd(Token token, string line) { var shift = _mdTags[token.TagType].Length; var diffTagType = _differentTagTypes[token.TagType]; - var anyWhiteSpace = line.Substring(token.StartIndex + 1, token.EndIndex - token.StartIndex - 1).Any(char.IsWhiteSpace); - return !(char.IsWhiteSpace(line[token.EndIndex - shift]) + var anyWhiteSpace = line.SubstringContainsAny(token.StartIndex + 1 - offset, token.EndIndex - token.StartIndex - 1, char.IsWhiteSpace); + + var anyLetter = line.SubstringContainsAny(token.StartIndex + 1 - offset, token.EndIndex - token.StartIndex - 1, char.IsLetter); + + return anyLetter && !( + char.IsWhiteSpace(line[token.EndIndex - shift - offset]) || offsetTags.Dequeue() == diffTagType - || token.EndIndex < line.Length - 1 && !char.IsWhiteSpace(line[token.EndIndex + 1]) && anyWhiteSpace + || token.EndIndex - offset < line.Length - 1 && !char.IsWhiteSpace(line[token.EndIndex + 1 - offset]) && anyWhiteSpace || token.TagType == Tag.Bold && needClosingTags.Any(tag => tag.Tag == diffTagType) - || token.StartIndex - 1 > 0 && !char.IsWhiteSpace(line[token.StartIndex - shift]) && anyWhiteSpace + || token.StartIndex - 1 - offset> 0 && !char.IsWhiteSpace(line[token.StartIndex - shift - offset]) && anyWhiteSpace ); }