Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Мажирин Александр #237

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cs/Markdown/Extensions/StringExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace Markdown.Extensions;

public static class StringExtensions
{
/// <summary>
/// Производит проверку наличия строки в строке на позиции i без копирования
/// </summary>
/// <param name="original">Исходная строка</param>
/// <param name="str">Проверяемая строка</param>
/// <param name="i">Позиция в исходной строке</param>
/// <returns></returns>
public static bool ContainsSubstringOnIndex(this string original, string str, int i)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Названия аргументов сложно понять. Правильно указал название метода. Предлагаю делать origin, substring, position

{
for (var j = 0; j < str.Length; j++)
{
if (i + j >= original.Length || original[i + j] != str[j])
return false;
}

return true;
}
}
10 changes: 10 additions & 0 deletions cs/Markdown/Markdown.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

</Project>
9 changes: 9 additions & 0 deletions cs/Markdown/Markdown/IMd.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace Markdown.Markdown;

/// <summary>
/// Интерфейс для конвертера Markdown в HTML
/// </summary>
public interface IMd
{
string Render(string md);
}
18 changes: 18 additions & 0 deletions cs/Markdown/Markdown/Md.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using Markdown.Renderer;
using Markdown.Token;

namespace Markdown.Markdown;

/// <summary>
/// Конвертер markdown в HTML
/// </summary>
public class Md : IMd
{
private readonly ITokenizer tokenizer = new MarkdownTokenizer();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Токенайзер хранит в себе состояние. А значит на каждый рендер нам нужно генерировать новую сущность

private readonly IRenderer renderer = new HtmlRenderer();
public string Render(string md)
{
var tokens = tokenizer.Tokenize(md);
return renderer.Render(tokens);
}
}
13 changes: 13 additions & 0 deletions cs/Markdown/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using Markdown.Markdown;

namespace Markdown;

class Program
{
public static void Main()
{
var mdFile = File.ReadAllText("Markdown.md");
var md = new Md();
Console.WriteLine(md.Render(mdFile));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Для тестов вполне достаточно выводить в консоль. Но если делать какой-то cli, то лучше сохранять в файлик html рядом с входным

}
}
57 changes: 57 additions & 0 deletions cs/Markdown/Renderer/HtmlRenderer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
using System.Text;
using System.Web;
using Markdown.Token;

namespace Markdown.Renderer;

/// <summary>
/// HTML-рендерер. Преобразует токены в HTML-текст, экранируя спецсимволы в тексте
/// </summary>
public class HtmlRenderer : IRenderer
{
public string Render(IEnumerable<IToken> tokens)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Если делать прямо по честному дерево, то оно должно лежать в одном корне. Но так тоже можно

{
var sb = new StringBuilder();
foreach (var token in tokens)
{
sb.Append(RenderToken(token));
}

return sb.ToString();
}

private string? RenderToken(IToken token)
{
return token switch
{
TextToken textToken => HttpUtility.HtmlEncode(textToken.TextContent),
TagToken tagToken => RenderTagToken(tagToken),
_ => null
};
}

private string? RenderTagToken(TagToken tagToken)
{
var sb = new StringBuilder();
sb.Append($"<{tagToken.Tag.HtmlTag}");
foreach (var (key, value) in tagToken.Attributes)
{
sb.Append($" {key}=\"{value}\"");
}

if (tagToken.Tag.SelfClosing)
{
sb.Append(" />");
return sb.ToString();
}

sb.Append('>');
foreach (var child in tagToken.Children)
{
sb.Append(RenderToken(child));
}

sb.Append($"</{tagToken.Tag.HtmlTag}>");
return sb.ToString();
}
}
16 changes: 16 additions & 0 deletions cs/Markdown/Renderer/IRenderer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Markdown.Token;

namespace Markdown.Renderer;

/// <summary>
/// Универсальный интерфейс рендерера
/// </summary>
public interface IRenderer
{
/// <summary>
/// Переводит набор токенов в текст языка разметки
/// </summary>
/// <param name="tokens">Набор токенов</param>
/// <returns>Сгенерированный текст</returns>
string Render(IEnumerable<IToken> tokens);
}
12 changes: 12 additions & 0 deletions cs/Markdown/Tags/CursiveTag.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Markdown.Tags;

public class CursiveTag : ITag
{
public string MdTag { get; } = "_";

public string MdClosingTag => MdTag;

public string HtmlTag { get; } = "em";

public IReadOnlyCollection<ITag> DisallowedChildren { get; } = new List<ITag> { new StrongTag() };
}
8 changes: 8 additions & 0 deletions cs/Markdown/Tags/HeaderTag.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace Markdown.Tags;

public class HeaderTag : ITag
{
public string MdTag { get; } = "#";
public string HtmlTag { get; } = "h1";
public IReadOnlyCollection<ITag> DisallowedChildren { get; } = new List<ITag>();
}
42 changes: 42 additions & 0 deletions cs/Markdown/Tags/ITag.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
namespace Markdown.Tags;

/// <summary>
/// Интерфейс тега
/// </summary>
public interface ITag

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

На мой взгляд тэг - это скорее абстрактный класс с идейной точки зрения

{
/// <summary>
/// Тег, который используется в Markdown
/// </summary>
string MdTag { get; }

/// <summary>
/// Закрывающий тег в Markdown
/// </summary>
string? MdClosingTag => null;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Комментарий для себя на будущее: посмотреть, как будет производиться обработка этих закрывающих тегов, чтобы ответственность канкретики не легла на абстракцию


/// <summary>
/// Тег в HTML, соответсвующий тегу Markdown
/// </summary>
/// <see cref="MdTag"/>
string HtmlTag { get; }

/// <summary>
/// Самозакрывание тега в HTML
/// </summary>
bool SelfClosing => false;

/// <summary>
/// Запрет на вложение дочерних элементов определенного типа
/// </summary>
IReadOnlyCollection<ITag> DisallowedChildren { get; }

/// <summary>
/// Получить атрибуты для рендера в HTML
/// </summary>
/// <param name="content">Содержание тега</param>
/// <returns>Строка с атрибутами для вставки в тег</returns>
static string? GetHtmlRenderAttributes(string content) => null;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Больше похоже на GetHtmlTadAttributes


//Добавить какой-то метод для валидации?
}
19 changes: 19 additions & 0 deletions cs/Markdown/Tags/ImageTag.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
namespace Markdown.Tags;

/// <summary>
/// Тег для картинки
/// </summary>
public class ImageTag : ITag
{
public string MdTag { get; } = "![";

public string MdClosingTag { get; } = ")";

public string HtmlTag { get; } = "img";

public IReadOnlyCollection<ITag> DisallowedChildren => new List<ITag>() {new CursiveTag(), new HeaderTag(), new ImageTag(), new StrongTag()};

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А при добавлении нового тега нам придётся не забыть его добавить и здесь. Легко забыть


public bool SelfClosing { get; } = true;

public static string GetHtmlRenderAttributes(string content) => throw new NotImplementedException();
}
15 changes: 15 additions & 0 deletions cs/Markdown/Tags/StrongTag.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace Markdown.Tags;

/// <summary>
/// Тег для полужирного текста
/// </summary>
public class StrongTag : ITag
{
public string MdTag { get; } = "__";

public string MdClosingTag => MdTag;

public string HtmlTag { get; } = "strong";

public IReadOnlyCollection<ITag> DisallowedChildren { get; } = new List<ITag>();
}
10 changes: 10 additions & 0 deletions cs/Markdown/Token/IToken.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
namespace Markdown.Token;

/// <summary>
/// Интерфейс токена
/// </summary>
public interface IToken

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

токен больше похож на data class, нежели что-то с различными реализациями. Расскажи, что вкладываешь в него и как планируешь использовать при парсинге?

{
string? TextContent { get; }
List<IToken>? Children { get; }
}
11 changes: 11 additions & 0 deletions cs/Markdown/Token/ITokenizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Markdown.Token;

/// <summary>
/// Интерфейс токенайзера - переводчика строки в токены
/// </summary>
public interface ITokenizer
{
public List<IToken> Tokenize(string content);

public List<IToken> GetTokens();
}
91 changes: 91 additions & 0 deletions cs/Markdown/Token/MarkdownTokenizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
using Markdown.Tags;
using System.Text;
using Markdown.Extensions;

namespace Markdown.Token;

/// <summary>
/// Токенайзер - переводчик строку Markdown в токены
/// </summary>
public class MarkdownTokenizer : ITokenizer
{
private List<IToken> tokens = new List<IToken>();

private readonly List<ITag> tags = new List<ITag>()
{
new StrongTag(),
new CursiveTag(),
new HeaderTag(),
new ImageTag()
};

public List<IToken> Tokenize(string content)
{
var tokenStack = new Stack<TagToken>();
var sb = new StringBuilder();
var i = 0;
var foundTag = false;
while (i < content.Length)
{
foreach (var tag in tags)
{
//Check if on starting tag
//TODO: Check for closing tag
if (content.ContainsSubstringOnIndex(tag.MdTag, i))
{
foundTag = true;
//Advance i to the end of tag opening/closing
i += tag.MdTag.Length;
//We found tag! Let's check if it's closing tag
if (tokenStack.Count > 0 && tokenStack.Peek().Tag == tag)
{
var token = tokenStack.Pop();
token.Children.Add(new TextToken(sb.ToString()));
sb.Clear();
if (tokenStack.Count == 0) tokens.Add(token);
else tokenStack.Peek().Children.Add(token);
break;
}
//Not a closing tag, add text token to children of previous tag (if exists) or to tokens
if (sb.Length > 0)
{
if (tokenStack.Count > 0)
{
//Add text token to children of previous tag
tokenStack.Peek().Children.Add(new TextToken(sb.ToString()));
}
else
{
//On root level - add text token to tokens
tokens.Add(new TextToken(sb.ToString()));
}
sb.Clear();
}
//Add new tag to stack
tokenStack.Push(new TagToken(tag));
break;
}
}
//If we found tag, skip to next iteration
if (foundTag)
{
foundTag = false;
continue;
}
sb.Append(content[i]);
i++;
}

if (sb.Length > 0)
{
tokens.Add(new TextToken(sb.ToString()));
}

return tokens;
}

public List<IToken> GetTokens()
{
return tokens.ToList();
}
}
Loading