From e1a2129c5f43694ea43b7d4aa80b152066923667 Mon Sep 17 00:00:00 2001 From: Astrian Zheng Date: Sat, 7 Sep 2024 10:39:18 +1000 Subject: [PATCH 1/3] Add image support --- Sources/SwiftHTMLtoMarkdown/BasicHTML.swift | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift b/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift index fa97bd7..16b23d1 100644 --- a/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift +++ b/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift @@ -113,6 +113,16 @@ public class BasicHTML: HTML { if node.nodeName() == "#text" && node.description != " " { markdown += node.description } + + if node.nodeName() == "img" { + markdown += "![" + let alt = try node.attr("alt") + markdown += alt + markdown += "](" + let src = try node.attr("src") + markdown += src + markdown += ")" + } for node in node.getChildNodes() { try convertNode(node) From 8f97dbbba23dc45a69218fc068d0455a0cba46dc Mon Sep 17 00:00:00 2001 From: Astrian Zheng Date: Sat, 7 Sep 2024 10:41:58 +1000 Subject: [PATCH 2/3] Fix a problem that the headline rendering error The headline may render to the same line to the last paragraph --- Sources/SwiftHTMLtoMarkdown/BasicHTML.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift b/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift index 16b23d1..bc1842b 100644 --- a/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift +++ b/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift @@ -22,6 +22,8 @@ public class BasicHTML: HTML { return } + markdown += "\n\n" + for _ in 0.. Date: Sat, 7 Sep 2024 11:47:41 +1000 Subject: [PATCH 3/3] Parse DIV, also process image correctly --- Sources/SwiftHTMLtoMarkdown/BasicHTML.swift | 25 +++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift b/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift index bc1842b..bb75eba 100644 --- a/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift +++ b/Sources/SwiftHTMLtoMarkdown/BasicHTML.swift @@ -110,13 +110,13 @@ public class BasicHTML: HTML { markdown += "\n```" return } - } - - if node.nodeName() == "#text" && node.description != " " { - markdown += node.description - } - - if node.nodeName() == "img" { + } else if node.nodeName() == "figcaption" { + markdown += "\n\n" + for child in node.getChildNodes() { + try convertNode(child) + } + markdown += "\n\n" + } else if node.nodeName() == "img" { markdown += "![" let alt = try node.attr("alt") markdown += alt @@ -124,8 +124,19 @@ public class BasicHTML: HTML { let src = try node.attr("src") markdown += src markdown += ")" + } else if node.nodeName() == "div" { + if hasSpacedParagraph { + markdown += "\n\n" + } else { + hasSpacedParagraph = true + } } + if node.nodeName() == "#text" && node.description != " " { + markdown += node.description + } + + for node in node.getChildNodes() { try convertNode(node) }