diff --git a/src/markdown.gleam b/src/markdown.gleam deleted file mode 100644 index 29dc6de..0000000 --- a/src/markdown.gleam +++ /dev/null @@ -1,97 +0,0 @@ -import gleam/list -import gleam/string -import gleam/regex -import gleam/option.{type Option, None, Some} - -pub fn reddit_to_telegram(reddit_markdown: String) -> String { - reddit_markdown - |> format_code_blocks - |> string.split("\n") - |> list.map(convert_line) - |> string.join("\n") -} - -fn format_code_blocks(text: String) -> String { - replace_regex(text, using: "```(.*)```", by: fn(content) { - case content { - [Some(content)] -> Some("
" <> content <> "
") - _ -> None - } - }) -} - -fn convert_line(line: String) -> String { - line - // Links - |> replace_regex(using: "\\[(.*?)\\]\\((.*?)\\)", by: fn(content) { - case content { - [Some(text), Some(link)] -> - Some(" link <> "\">" <> text <> "") - _ -> None - } - }) - // Bold-italic - |> replace_regex( - using: "(?:\\*{3}(.*)\\*{3})|(?:_{3}(.*)_{3})", - by: fn(content) { - case content { - [Some(content)] -> Some("" <> content <> "") - [_, Some(content)] -> Some("" <> content <> "") - _ -> None - } - }, - ) - // Bold - |> replace_regex( - using: "(?:\\*{2}(.*)\\*{2})|(?:_{2}(.*)_{2})", - by: fn(content) { - case content { - [Some(content)] -> Some("" <> content <> "") - [_, Some(content)] -> Some("" <> content <> "") - _ -> None - } - }, - ) - // Italic - |> replace_regex(using: "[_*](.*)[_*]", by: fn(content) { - case content { - [Some(content)] -> Some("" <> content <> "") - _ -> None - } - }) - // Strikethrough - |> replace_regex(using: "~{2}(.*)~{2}", by: fn(content) { - case content { - [Some(content)] -> Some("" <> content <> "") - _ -> None - } - }) - // Spoiler - |> replace_regex(using: ">!(.*)!<", by: fn(content) { - case content { - [Some(content)] -> Some("" <> content <> "") - _ -> None - } - }) -} - -fn replace_regex( - text text: String, - using pattern: String, - by replacement: fn(List(Option(String))) -> Option(String), -) -> String { - let assert Ok(regex) = regex.from_string(pattern) - - let matches = regex.scan(with: regex, content: text) - - use new_text, match <- list.fold(over: matches, from: text) - - let new_chunk = replacement(match.submatches) - - case new_chunk { - Some(chunk) -> string.replace(new_text, match.content, chunk) - None -> new_text - } -} -// **Douglas** => Douglas -// /\*\*(.*)\*\*/ => $1 diff --git a/src/reddit/markdown.gleam b/src/reddit/markdown.gleam new file mode 100644 index 0000000..ba739f5 --- /dev/null +++ b/src/reddit/markdown.gleam @@ -0,0 +1,324 @@ +import gleam/io +import gleam/string +import gleam/regex.{type Regex, Match} +import gleam/result +import gleam/option.{type Option, None, Some} + +pub fn reddit_to_telegram(markdown: String) -> String { + markdown + |> parse_helper("") +} + +fn text_style_regex(delimiter: Delimiter) -> Regex { + let #(start, end) = case delimiter { + Same(d) -> #(d, d) + Different(d1, d2) -> #(d1, d2) + } + + let assert Ok(regex) = regex.from_string("^" <> start <> "(.*?)" <> end <> "") + + regex +} + +fn link_regex() -> Regex { + let assert Ok(regex) = regex.from_string("^\\[(.*?)\\]\\((.*?)\\)") + + regex +} + +fn inline_code_regex() -> Regex { + let assert Ok(regex) = regex.from_string("^`(.*?)`") + + regex +} + +fn quote_regex() -> Regex { + let assert Ok(regex) = regex.from_string("^(\n> [\\w\\W]*?)\n[^(> )]") + + regex +} + +type CodeBlockDelimiter { + Backticks + Spaces +} + +fn backtick_code_block_regex() -> Regex { + let assert Ok(regex) = regex.from_string("^\n```([\\w\\W]*?)\n```") + + regex +} + +fn spaces_code_block_regex() -> Regex { + let assert Ok(regex) = regex.from_string("^(\n [\\w\\W]*?)\n[^( )]") + + regex +} + +fn parse_helper(markdown: String, parsed: String) -> String { + case markdown { + "" -> parsed + "**" <> _rest -> + apply_style( + delimiter: Same("**"), + replace_tags: #("", ""), + on: markdown, + resulting: parsed, + ) + "__" <> _rest -> + apply_style( + delimiter: Same("__"), + replace_tags: #("", ""), + on: markdown, + resulting: parsed, + ) + "*" <> _rest -> + apply_style( + delimiter: Same("*"), + replace_tags: #("", ""), + on: markdown, + resulting: parsed, + ) + "_" <> _rest -> + apply_style( + delimiter: Same("_"), + replace_tags: #("", ""), + on: markdown, + resulting: parsed, + ) + "[" <> _rest -> apply_link(markdown, parsed) + "~~" <> _rest -> + apply_style( + delimiter: Same("~~"), + replace_tags: #("", ""), + on: markdown, + resulting: parsed, + ) + ">!" <> _rest -> + apply_style( + delimiter: Different(">!", "!<"), + replace_tags: #("", ""), + on: markdown, + resulting: parsed, + ) + "`" <> _rest -> apply_inline_code(markdown, parsed) + "\n> " <> _rest -> apply_quote(markdown, parsed) + "\n```" <> _rest -> apply_code_block(Backticks, markdown, parsed) + "\n " <> _rest -> apply_code_block(Spaces, markdown, parsed) + _ -> { + let first_char = + markdown + |> string.first + |> result.unwrap("") + + parse_helper(string.drop_left(markdown, 1), parsed <> first_char) + } + } +} + +type Delimiter { + Same(String) + Different(String, String) +} + +fn apply_style( + delimiter delimiter: Delimiter, + replace_tags style: #(String, String), + on markdown: String, + resulting parsed: String, +) -> String { + let escape = string.replace(_, each: "*", with: "\\*") + let delimiter_escaped = case delimiter { + Same(d) -> Same(escape(d)) + Different(d1, d2) -> Different(escape(d1), escape(d2)) + } + + apply_formatting( + text_style_regex(delimiter_escaped), + fn(content) { + case content { + [Some(styled_content)] -> Some(style.0 <> styled_content <> style.1) + _ -> None + } + }, + markdown, + parsed, + ) +} + +fn apply_link(markdown: String, parsed: String) -> String { + let style_applied = + replace(markdown, using: link_regex(), by: fn(content) { + case content { + [Some(text), Some(link)] -> + Some(" link <> "\">" <> text <> "") + _ -> None + } + }) + + case style_applied { + Some(new_markdown) -> { + case string.split_once(new_markdown, ">") { + Ok(#(first, rest)) -> parse_helper(rest, parsed <> first <> ">") + Error(Nil) -> { + io.debug("No closing '>' found after applying link style.") + + parse_helper(new_markdown, parsed) + } + } + } + None -> parse_helper(string.drop_left(markdown, 1), parsed <> "[") + } +} + +fn apply_inline_code(markdown: String, parsed: String) -> String { + let style_applied = + replace(markdown, using: inline_code_regex(), by: fn(content) { + case content { + [Some(code)] -> Some("" <> code <> "") + _ -> None + } + }) + + case style_applied { + Some(new_markdown) -> { + case string.split_once(new_markdown, "") { + Ok(#(first, rest)) -> parse_helper(rest, parsed <> first <> "") + Error(Nil) -> { + io.debug("No closing '' found after applying inline code.") + + parse_helper(new_markdown, parsed) + } + } + } + None -> parse_helper(string.drop_left(markdown, 1), parsed <> "`") + } +} + +fn apply_quote(markdown: String, parsed: String) -> String { + let style_applied = case regex.scan(with: quote_regex(), content: markdown) { + [Match(content: _, submatches: [Some(quote)])] -> { + let parsed_quote = + quote + // Get rid of the leading "\n> " + |> string.drop_left(3) + |> string.replace("\n> ", "\n") + + let replaced = + string.replace( + markdown, + quote, + "\n
" <> parsed_quote <> "
", + ) + + Some(replaced) + } + _ -> None + } + + case style_applied { + Some(new_markdown) -> { + case string.split_once(new_markdown, "") { + Ok(#(first, rest)) -> + parse_helper(rest, parsed <> first <> "") + Error(Nil) -> { + io.debug( + "No closing '' found after applying quote style.", + ) + + parse_helper(new_markdown, parsed) + } + } + } + None -> parse_helper(string.drop_left(markdown, 1), parsed) + } +} + +fn apply_code_block( + delimiter: CodeBlockDelimiter, + markdown: String, + parsed: String, +) -> String { + let #(regex, block_formatter) = case delimiter { + Backticks -> #(backtick_code_block_regex(), fn(quote) { + string.drop_left(quote, 1) + }) + Spaces -> #(spaces_code_block_regex(), fn(quote) { + quote + |> string.drop_left(5) + |> string.replace("\n ", "\n") + }) + } + + let style_applied = case regex.scan(with: regex, content: markdown) { + [Match(content: content, submatches: [Some(code_block)])] -> { + let parsed_quote = block_formatter(code_block) + let replace_target = case delimiter { + // Only the content between the backticks + Backticks -> content + // Include the leading 4 spaces in the replace target + Spaces -> code_block + } + + let replaced = + string.replace( + markdown, + replace_target, + "
" <> parsed_quote <> "
", + ) + + Some(replaced) + } + _ -> None + } + + case style_applied { + Some(new_markdown) -> { + case string.split_once(new_markdown, "") { + Ok(#(first, rest)) -> parse_helper(rest, parsed <> first <> "") + Error(Nil) -> { + io.debug("No closing '' found after applying code block style.") + + parse_helper(new_markdown, parsed) + } + } + } + None -> parse_helper(string.drop_left(markdown, 1), parsed) + } +} + +fn apply_formatting( + regex: Regex, + style: fn(List(Option(String))) -> Option(String), + markdown: String, + parsed: String, +) -> String { + let style_applied = replace(markdown, using: regex, by: style) + + case style_applied { + Some(new_markdown) -> parse_helper(new_markdown, parsed) + None -> { + let first_char = + markdown + |> string.first + |> result.unwrap("") + + parse_helper(string.drop_left(markdown, 1), parsed <> first_char) + } + } +} + +fn replace( + text text: String, + using regex: Regex, + by replacement: fn(List(Option(String))) -> Option(String), +) -> Option(String) { + case regex.scan(with: regex, content: text) { + [Match(content: matched, submatches: submatches)] -> { + submatches + |> replacement + |> option.map(string.replace(text, matched, _)) + } + _ -> None + } +} diff --git a/src/reddit_to_telegram.gleam b/src/reddit_to_telegram.gleam index 763759c..adb661c 100644 --- a/src/reddit_to_telegram.gleam +++ b/src/reddit_to_telegram.gleam @@ -100,7 +100,7 @@ fn start( <> " messages sent", ) - let _ = database.add_messages(database, inserted, bridge.telegram_channel) + // let _ = database.add_messages(database, inserted, bridge.telegram_channel) let error_string = string.join(errors, "\n") diff --git a/src/telegram.gleam b/src/telegram.gleam index 2aa8b93..04372ac 100644 --- a/src/telegram.gleam +++ b/src/telegram.gleam @@ -12,7 +12,7 @@ import gleam/list import gleam/option.{type Option, None, Some} import gleam/result import gleam/string -import markdown +import reddit/markdown import reddit type InputMedia { diff --git a/test/markdown_test.gleam b/test/markdown_test.gleam new file mode 100644 index 0000000..c39bc04 --- /dev/null +++ b/test/markdown_test.gleam @@ -0,0 +1,102 @@ +import gleeunit/should +import reddit/markdown + +pub fn parse_empty_test() { + "" + |> markdown.reddit_to_telegram + |> should.equal("") +} + +pub fn parse_simple_text_test() { + "Hello, World!" + |> markdown.reddit_to_telegram + |> should.equal("Hello, World!") +} + +pub fn parse_bold_text_test() { + "Hello, my **friend**! +My __good__ friend!" + |> markdown.reddit_to_telegram + |> should.equal("Hello, my friend!\nMy good friend!") +} + +pub fn parse_italic_text_test() { + "Hello, my *friend*! +My _good_ friend!" + |> markdown.reddit_to_telegram + |> should.equal("Hello, my friend!\nMy good friend!") +} + +pub fn parse_bold_italic_nested_text_test() { + "Hello, **my *friend***! +My __good _friend___!" + |> markdown.reddit_to_telegram + |> should.equal( + "Hello, my friend!\nMy good friend!", + ) +} + +pub fn parse_bold_italic_text_test() { + "Hello, ***my friend***! +My ___good friend___!" + |> markdown.reddit_to_telegram + |> should.equal( + "Hello, my friend!\nMy good friend!", + ) +} + +pub fn parse_simple_link_test() { + "Check out [this link](https://example.com)!" + |> markdown.reddit_to_telegram + |> should.equal("Check out this link!") +} + +pub fn parse_bold_italic_link_test() { + "Check out [**this** _link_](https://example.com/some_cool_path)! It's **amazing**!" + |> markdown.reddit_to_telegram + |> should.equal( + "Check out this link! It's amazing!", + ) +} + +pub fn parse_strikethrough_test() { + "Hello, my ~~friend~~!" + |> markdown.reddit_to_telegram + |> should.equal("Hello, my friend!") +} + +pub fn parse_spoiler_test() { + "Hello, my >!friend! markdown.reddit_to_telegram + |> should.equal("Hello, my friend!") +} + +pub fn parse_inline_code_test() { + "Check this: `print(\"*Hello*, _World_!\")`" + |> markdown.reddit_to_telegram + |> should.equal("Check this: print(\"*Hello*, _World_!\")") +} + +pub fn parse_quote_test() { + "A citation\n> Be brave!\n> No matter what\nMotivating!" + |> markdown.reddit_to_telegram + |> should.equal( + "A citation\n
Be brave!\nNo matter what
\nMotivating!", + ) +} + +pub fn parse_backtick_code_block_test() { + "Some code:\n```\nconst value_to_print = \"World\"\nprint(`Hello, ${value_to_print}!`)\n```Cool" + |> markdown.reddit_to_telegram + |> should.equal( + "Some code:
const value_to_print = \"World\"\nprint(`Hello, ${value_to_print}!`)
Cool", + ) +} + +pub fn parse_spaces_code_block_test() { + "Some code:\n const value_to_print = \"World\"\n print(`Hello, ${value_to_print}!`)\nAwesome!" + |> markdown.reddit_to_telegram + |> should.equal( + "Some code:
const value_to_print = \"World\"\nprint(`Hello, ${value_to_print}!`)
\nAwesome!", + ) +}