diff --git a/spec-results.json b/spec-results.json index 9e11837..3e20c8e 100644 --- a/spec-results.json +++ b/spec-results.json @@ -31,11 +31,8 @@ 601, 602, 603, - 604, 605, - 606, - 607, - 608 + 606 ], "Backslash escapes": [ 298, @@ -573,11 +570,8 @@ 601, 602, 603, - 604, 605, - 606, - 607, - 608 + 606 ], "Backslash escapes": [ 298, @@ -1082,6 +1076,18 @@ 28, 29 ], + "[extension] Autolinks": [ + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 629, + 630, + 631 + ], "[extension] Strikethrough": [ 491, 492 diff --git a/src/Markdown/Inline.elm b/src/Markdown/Inline.elm index f21b8fb..5fd40b0 100644 --- a/src/Markdown/Inline.elm +++ b/src/Markdown/Inline.elm @@ -1,6 +1,7 @@ module Markdown.Inline exposing ( Inline(..) , extractText + , AllowedInlines(..) ) {-| Inline rendering and helpers. @@ -44,6 +45,11 @@ type Inline | Strikethrough (List Inline) +type AllowedInlines + = AllowAll + | SkipAutolinks + + type alias Attribute = { name : String, value : String } diff --git a/src/Markdown/InlineParser.elm b/src/Markdown/InlineParser.elm index 780df9f..e374a5b 100644 --- a/src/Markdown/InlineParser.elm +++ b/src/Markdown/InlineParser.elm @@ -3,7 +3,7 @@ module Markdown.InlineParser exposing (parse, query, tokenize, walk) import Dict import HtmlParser import Markdown.Helpers exposing (References, cleanWhitespaces, formatStr, ifError, insideSquareBracketRegex, isEven, lineEndChars, prepareRefLabel, returnFirstJust, titleRegex, whiteSpaceChars) -import Markdown.Inline exposing (Inline(..)) +import Markdown.Inline exposing (AllowedInlines(..), Inline(..)) import Parser.Advanced as Advanced exposing ((|=)) import Regex exposing (Regex) import Url @@ -13,8 +13,8 @@ import Url -- Parser -parse : References -> String -> List Inline -parse refs rawText_ = +parse : AllowedInlines -> References -> String -> List Inline +parse allowedInlines refs rawText_ = let rawText = String.trim rawText_ @@ -25,7 +25,7 @@ parse refs rawText_ = tokensToMatches tokens [] refs rawText |> organizeMatches |> parseTextMatches rawText [] - |> matchesToInlines + |> matchesToInlines allowedInlines parseTextMatches : String -> List Match -> List Match -> List Match @@ -145,6 +145,8 @@ type Meaning | EmphasisToken Char { leftFringeRank : Int, rightFringeRank : Int } | SoftLineBreakToken | HardLineBreakToken + | ExtendedAutolink + | EmailAutolink | StrikethroughToken Escaped @@ -252,6 +254,8 @@ tokenize rawText = |> mergeByIndex (findHardBreakTokens rawText) |> mergeByIndex (findAngleBracketLTokens rawText) |> mergeByIndex (findAngleBracketRTokens rawText) + |> mergeByIndex (findExtendedAutolinkTokens rawText) + |> mergeByIndex (findEmailAutolinkTokens rawText) {-| Merges two sorted sequences into a sorted sequence @@ -434,6 +438,8 @@ regMatchToEmphasisToken char rawText regMatch = _ -> Nothing + + -- Strikethrough Tokens @@ -458,12 +464,12 @@ regMatchToStrikethroughToken regMatch = Maybe.map String.length maybeBackslashes |> Maybe.withDefault 0 - (length, meaning) = - if isEven backslashesLength then - (String.length tilde, StrikethroughToken NotEscaped) + ( length, meaning ) = + if isEven backslashesLength then + ( String.length tilde, StrikethroughToken NotEscaped ) - else - (String.length tilde, StrikethroughToken Escaped) + else + ( String.length tilde, StrikethroughToken Escaped ) in Just { index = regMatch.index + backslashesLength @@ -475,9 +481,6 @@ regMatchToStrikethroughToken regMatch = Nothing - - - {-| Whitespace characters matched by the `\\s` regex -} isWhitespace : Char -> Bool @@ -492,7 +495,7 @@ isWhitespace c = '\n' -> True - '\r' -> + '\u{000D}' -> True '\t' -> @@ -734,6 +737,106 @@ regMatchToLinkImageCloseToken regMatch = +-- GFM Auto Link Tokens + + +findExtendedAutolinkTokens : String -> List Token +findExtendedAutolinkTokens str = + Regex.find extendedAutoLinkRegex str + |> List.filterMap regMatchToExtendedAutolinkToken + + +extendedAutoLinkRegex : Regex +extendedAutoLinkRegex = + -- what if we do this without the negative lookbehind and just make it a sub match? + Regex.fromString "(?<=^|\\s|\\*|_|~|\\()(?:(?:https?://)|(?:www\\.))[a-z0-9A-Z_-]+(?:\\.[a-z0-9A-Z_-]+)*(?:/([^\\s<]*))?" + |> Maybe.withDefault Regex.never + + +extendedAutoLinkTrailingPunctuationRegex : Regex +extendedAutoLinkTrailingPunctuationRegex = + --should this use the isPunctuation helper? + Regex.fromString "[?!\\.,:*_~]+$" + |> Maybe.withDefault Regex.never + + +extendedAutoLinkTrailingEntityReferenceRegex : Regex +extendedAutoLinkTrailingEntityReferenceRegex = + Regex.fromString "(&[a-zA-Z0-9]+;)+$" + |> Maybe.withDefault Regex.never + + +regMatchToExtendedAutolinkToken : Regex.Match -> Maybe Token +regMatchToExtendedAutolinkToken regMatch = + let + lengthOfUnmatchedParenthesis = + if String.endsWith ")" regMatch.match then + Basics.max 0 (List.length (String.indexes ")" regMatch.match) - List.length (String.indexes "(" regMatch.match)) + + else + 0 + + -- Ensue we trim trailing punctuation even if there are unmatched parenthesis after + lengthOfTrailingPunctuation = + regMatch.match + |> String.dropRight lengthOfUnmatchedParenthesis + |> Regex.find extendedAutoLinkTrailingPunctuationRegex + |> List.head + |> Maybe.map (.match >> String.length) + |> Maybe.withDefault 0 + + lengthOfTrailingEntityReferences = + regMatch.match + |> Regex.find extendedAutoLinkTrailingEntityReferenceRegex + |> List.head + |> Maybe.map (.match >> String.length) + |> Maybe.withDefault 0 + in + Just + { index = regMatch.index + , length = String.length regMatch.match - lengthOfTrailingPunctuation - lengthOfUnmatchedParenthesis - lengthOfTrailingEntityReferences + , meaning = ExtendedAutolink + } + + + +-- GFM Auto Link Tokens + + +findEmailAutolinkTokens : String -> List Token +findEmailAutolinkTokens str = + Regex.find emailAutoLinkRegex str + |> List.filterMap regMatchToEmailAutolinkToken + + +emailAutoLinkRegex : Regex +emailAutoLinkRegex = + Regex.fromString "(?<=^|\\s|\\*|_|~|\\()[a-zA-Z0-9\\._+-]+@[a-zA-Z0-9_-]+((\\.[a-zA-Z0-9_-]+)+)" + |> Maybe.withDefault Regex.never + + +regMatchToEmailAutolinkToken : Regex.Match -> Maybe Token +regMatchToEmailAutolinkToken regMatch = + let + lastCharacter = + regMatch.match + |> String.right 1 + |> String.uncons + |> Maybe.map Tuple.first + in + case Maybe.map Char.isAlphaNum lastCharacter of + Just True -> + Just + { index = regMatch.index + , length = String.length regMatch.match + , meaning = EmailAutolink + } + + _ -> + Nothing + + + -- Angle Brackets Tokens @@ -1192,6 +1295,68 @@ autolinkToMatch (Match match) = Result.Err (Match match) +extendedAutolinkToMatch : String -> Token -> Maybe Match +extendedAutolinkToMatch rawText token = + let + start = + token.index + + end = + token.index + token.length + + text = + String.slice token.index end rawText + + url = + withProtocol text + in + if Regex.contains urlRegex url then + { type_ = AutolinkType ( text, encodeUrl url ) + , start = start + , end = end + , textStart = 0 + , textEnd = 0 + , text = "" + , matches = [] + } + |> Match + |> Just + + else + Nothing + + +emailAutolinkToMatch : String -> Token -> Maybe Match +emailAutolinkToMatch rawText token = + let + start = + token.index + + end = + token.index + token.length + + text = + String.slice token.index end rawText + + url = + "mailto:" ++ text + in + if Regex.contains urlRegex url then + { type_ = AutolinkType ( text, encodeUrl url ) + , start = start + , end = end + , textStart = 0 + , textEnd = 0 + , text = "" + , matches = [] + } + |> Match + |> Just + + else + Nothing + + -- From http://spec.commonmark.org/dingus/commonmark.js @@ -1349,12 +1514,17 @@ voidHtmlTags = isCloseToken : HtmlModel -> Token -> Bool isCloseToken htmlModel token = - --case token.meaning of - -- HtmlToken False htmlModel_ -> - -- htmlModel.tag == htmlModel_.tag - -- - -- _ -> - False + case token.meaning of + HtmlToken NotOpening htmlModel_ -> + case ( htmlModel, htmlModel_ ) of + ( HtmlParser.Element firstTag _ _, HtmlParser.Element secondTag _ _ ) -> + firstTag == secondTag + + _ -> + False + + _ -> + False @@ -1366,7 +1536,7 @@ linkImageTypeTTM : List Token -> List Token -> List Match -> References -> Strin linkImageTypeTTM remaining tokens matches references rawText = case remaining of [] -> - emphasisTTM (List.reverse tokens) [] matches references rawText + extendedAutolinkTTM (List.reverse tokens) [] matches references rawText token :: tokensTail -> case token.meaning of @@ -1657,13 +1827,22 @@ refRegexToMatch matchModel references maybeRegexMatch = _ -> LinkType (prepareUrlAndTitle rawUrl maybeTitle) in - Just ( - Match + Just + (Match { matchModel | type_ = type_ , end = matchModel.end + regexMatchLength } - ) + ) + + +withProtocol : String -> String +withProtocol url = + if String.startsWith "http" url then + url + + else + "http://" ++ url encodeUrl : String -> String @@ -1688,6 +1867,67 @@ decodeUrlRegex = +-- ExtendedAutolink Tokens To Matches + + +isExtendedAutoLink : Token -> Bool +isExtendedAutoLink token = + case token.meaning of + ExtendedAutolink -> + True + + EmailAutolink -> + True + + _ -> + False + + +extendedAutolinkTTM : List Token -> List Token -> List Match -> References -> String -> List Match +extendedAutolinkTTM remaining tokens matches references rawText = + case remaining of + [] -> + emailAutolinkTTM (List.reverse tokens) [] matches references rawText + + token :: tokensTail -> + case token.meaning of + ExtendedAutolink -> + case extendedAutolinkToMatch rawText token of + Just match -> + extendedAutolinkTTM tokensTail tokens (match :: matches) references rawText + + Nothing -> + extendedAutolinkTTM tokensTail (token :: tokens) matches references rawText + + _ -> + extendedAutolinkTTM tokensTail (token :: tokens) matches references rawText + + + +-- EmailAutolink Tokens To Matches + + +emailAutolinkTTM : List Token -> List Token -> List Match -> References -> String -> List Match +emailAutolinkTTM remaining tokens matches references rawText = + case remaining of + [] -> + emphasisTTM (List.reverse tokens) [] matches references rawText + + token :: tokensTail -> + case token.meaning of + EmailAutolink -> + case emailAutolinkToMatch rawText token of + Just match -> + emailAutolinkTTM tokensTail tokens (match :: matches) references rawText + + Nothing -> + emailAutolinkTTM tokensTail (token :: tokens) matches references rawText + + _ -> + emailAutolinkTTM tokensTail (token :: tokens) matches references rawText + + + -- EmphasisType Tokens To Matches @@ -1852,34 +2092,39 @@ lineBreakTTM remaining tokens matches refs rawText = lineBreakTTM tokensTail (token :: tokens) matches refs rawText + -- StrikethroughType Tokens To Matches + isStrikethroughTokenPair : Token -> Token -> Bool isStrikethroughTokenPair closeToken openToken = let - (openTokenIsStrikethrough, openTokenLength) = + ( openTokenIsStrikethrough, openTokenLength ) = case openToken.meaning of -- If open token is escaped, ignore first '~' StrikethroughToken Escaped -> - (True, openToken.length - 1) + ( True, openToken.length - 1 ) + StrikethroughToken NotEscaped -> - (True, openToken.length) + ( True, openToken.length ) _ -> - (False, 0) + ( False, 0 ) - (closeTokenIsStrikethrough, closeTokenLength) = + ( closeTokenIsStrikethrough, closeTokenLength ) = case closeToken.meaning of -- If close token is escaped, ignore first '~' StrikethroughToken Escaped -> - (True, closeToken.length - 1) + ( True, closeToken.length - 1 ) + StrikethroughToken NotEscaped -> - (True, closeToken.length) + ( True, closeToken.length ) _ -> - (False, 0) + ( False, 0 ) in - closeTokenIsStrikethrough && openTokenIsStrikethrough && closeTokenLength == openTokenLength + closeTokenIsStrikethrough && openTokenIsStrikethrough && closeTokenLength == openTokenLength + strikethroughToMatch : Token -> List Match -> References -> String -> ( Token, List Token, List Token ) -> ( List Token, List Match ) strikethroughToMatch closeToken matches references rawText ( openToken, _, remainTokens ) = @@ -1911,6 +2156,7 @@ strikethroughToMatch closeToken matches references rawText ( openToken, _, remai , match :: matches ) + strikethroughTTM : List Token -> List Token -> List Match -> References -> String -> List Match strikethroughTTM remaining tokens matches references rawText = case remaining of @@ -1920,34 +2166,32 @@ strikethroughTTM remaining tokens matches references rawText = token :: tokensTail -> case token.meaning of StrikethroughToken isEscaped -> - case findToken (isStrikethroughTokenPair token) tokens of - Just content -> - let - ( newTokens, newMatches ) = - strikethroughToMatch token matches references rawText content - in - strikethroughTTM tokensTail newTokens newMatches references rawText - + case findToken (isStrikethroughTokenPair token) tokens of + Just content -> + let + ( newTokens, newMatches ) = + strikethroughToMatch token matches references rawText content + in + strikethroughTTM tokensTail newTokens newMatches references rawText - Nothing -> - strikethroughTTM tokensTail (token :: tokens) matches references rawText + Nothing -> + strikethroughTTM tokensTail (token :: tokens) matches references rawText _ -> strikethroughTTM tokensTail (token :: tokens) matches references rawText - -- Matches to Inline -matchesToInlines : List Match -> List Inline -matchesToInlines matches = - List.map matchToInline matches +matchesToInlines : AllowedInlines -> List Match -> List Inline +matchesToInlines allowedInlines matches = + List.map (matchToInline allowedInlines) matches -matchToInline : Match -> Inline -matchToInline (Match match) = +matchToInline : AllowedInlines -> Match -> Inline +matchToInline allowedInlines (Match match) = case match.type_ of NormalType -> Text match.text @@ -1959,28 +2203,33 @@ matchToInline (Match match) = CodeInline match.text AutolinkType ( text, url ) -> - Link url Nothing [ Text text ] + case allowedInlines of + SkipAutolinks -> + Text text + + AllowAll -> + Link url Nothing [ Text text ] LinkType ( url, maybeTitle ) -> Link url maybeTitle - (matchesToInlines match.matches) + (matchesToInlines allowedInlines match.matches) ImageType ( url, maybeTitle ) -> Image url maybeTitle - (matchesToInlines match.matches) + (matchesToInlines allowedInlines match.matches) HtmlType model -> HtmlInline model EmphasisType length -> Emphasis length - (matchesToInlines match.matches) + (matchesToInlines allowedInlines match.matches) StrikethroughType -> Strikethrough - (matchesToInlines match.matches) + (matchesToInlines allowedInlines match.matches) diff --git a/src/Markdown/Parser.elm b/src/Markdown/Parser.elm index 1bb2a01..38cfdac 100644 --- a/src/Markdown/Parser.elm +++ b/src/Markdown/Parser.elm @@ -6,14 +6,13 @@ module Markdown.Parser exposing (parse, deadEndToString) -} -import Whitespace -import Helpers import Dict exposing (Dict) +import Helpers import HtmlParser exposing (Node(..)) import Markdown.Block as Block exposing (Block, Inline, ListItem, Task) import Markdown.CodeBlock import Markdown.Heading as Heading -import Markdown.Inline as Inline +import Markdown.Inline as Inline exposing (AllowedInlines(..)) import Markdown.InlineParser import Markdown.LinkReferenceDefinition as LinkReferenceDefinition exposing (LinkReferenceDefinition) import Markdown.ListItem as ListItem @@ -26,6 +25,7 @@ import Parser import Parser.Advanced as Advanced exposing ((|.), (|=), Nestable(..), Step(..), andThen, chompIf, chompWhile, getChompedString, loop, map, oneOf, problem, succeed, symbol, token) import Parser.Token as Token import ThematicBreak +import Whitespace {-| Try parsing a markdown String into `Markdown.Block.Block`s. @@ -51,6 +51,11 @@ But you can also do a lot with the `Block`s before passing them through: -} parse : String -> Result (List (Advanced.DeadEnd String Parser.Problem)) (List Block) parse input = + parseHelper AllowAll input + + +parseHelper : AllowedInlines -> String -> Result (List (Advanced.DeadEnd String Parser.Problem)) (List Block) +parseHelper allowedInlines input = -- first parse the file as raw blocks case Advanced.run (rawBlockParser |. Helpers.endOfFile) input of Err e -> @@ -58,7 +63,7 @@ parse input = Ok v -> -- then parse the inlines of each raw block - case parseAllInlines v of + case parseAllInlines allowedInlines v of Err e -> -- NOTE these messages get an incorrect location, -- because they are parsed outside of the main (raw block) parser context. @@ -143,15 +148,15 @@ type alias Parser a = Advanced.Parser String Parser.Problem a -inlineParseHelper : LinkReferenceDefinitions -> UnparsedInlines -> List Block.Inline -inlineParseHelper referencesDict (UnparsedInlines unparsedInlines) = +inlineParseHelper : AllowedInlines -> LinkReferenceDefinitions -> UnparsedInlines -> List Block.Inline +inlineParseHelper allowedInlines referencesDict (UnparsedInlines unparsedInlines) = let mappedReferencesDict = referencesDict |> List.map (Tuple.mapSecond (\{ destination, title } -> ( destination, title ))) |> Dict.fromList in - Markdown.InlineParser.parse mappedReferencesDict unparsedInlines + Markdown.InlineParser.parse allowedInlines mappedReferencesDict unparsedInlines |> List.map mapInline @@ -175,7 +180,7 @@ mapInline inline = Inline.HtmlInline node -> node - |> nodeToRawBlock + |> nodeToRawBlock AllowAll |> Block.HtmlInline Inline.Emphasis level inlines -> @@ -189,8 +194,9 @@ mapInline inline = _ -> -- TODO fix this Block.Strong (inlines |> List.map mapInline) + Inline.Strikethrough inlines -> - Block.Strikethrough (inlines |> List.map mapInline) + Block.Strikethrough (inlines |> List.map mapInline) toHeading : Int -> Result Parser.Problem Block.HeadingLevel @@ -224,14 +230,14 @@ type InlineResult | InlineProblem Parser.Problem -parseInlines : LinkReferenceDefinitions -> RawBlock -> InlineResult -parseInlines linkReferences rawBlock = +parseInlines : AllowedInlines -> LinkReferenceDefinitions -> RawBlock -> InlineResult +parseInlines allowedInlines linkReferences rawBlock = case rawBlock of Heading level unparsedInlines -> case toHeading level of Ok parsedLevel -> unparsedInlines - |> inlineParseHelper linkReferences + |> inlineParseHelper allowedInlines linkReferences |> Block.Heading parsedLevel |> ParsedBlock @@ -240,7 +246,7 @@ parseInlines linkReferences rawBlock = OpenBlockOrParagraph unparsedInlines -> unparsedInlines - |> inlineParseHelper linkReferences + |> inlineParseHelper allowedInlines linkReferences |> Block.Paragraph |> ParsedBlock @@ -253,7 +259,7 @@ parseInlines linkReferences rawBlock = parseItem unparsed = let parsedInlines = - parseRawInline linkReferences identity unparsed.body + parseRawInline allowedInlines linkReferences identity unparsed.body task = case unparsed.task of @@ -275,7 +281,7 @@ parseInlines linkReferences rawBlock = OrderedListBlock startingIndex unparsedInlines -> unparsedInlines - |> List.map (parseRawInline linkReferences identity) + |> List.map (parseRawInline allowedInlines linkReferences identity) |> Block.OrderedList startingIndex |> ParsedBlock @@ -292,7 +298,7 @@ parseInlines linkReferences rawBlock = BlockQuote rawBlocks -> case Advanced.run rawBlockParser rawBlocks of Ok value -> - case parseAllInlines value of + case parseAllInlines allowedInlines value of Ok parsedBlocks -> Block.BlockQuote parsedBlocks |> ParsedBlock @@ -308,30 +314,31 @@ parseInlines linkReferences rawBlock = |> ParsedBlock Table (Markdown.Table.Table header rows) -> - Block.Table (parseHeaderInlines linkReferences header) (parseRowInlines linkReferences rows) + Block.Table (parseHeaderInlines allowedInlines linkReferences header) (parseRowInlines allowedInlines linkReferences rows) |> ParsedBlock TableDelimiter (Markdown.Table.TableDelimiterRow text _) -> UnparsedInlines text.raw - |> inlineParseHelper linkReferences + |> inlineParseHelper allowedInlines linkReferences |> Block.Paragraph |> ParsedBlock SetextLine _ raw -> UnparsedInlines raw - |> inlineParseHelper linkReferences + |> inlineParseHelper allowedInlines linkReferences |> Block.Paragraph |> ParsedBlock -parseHeaderInlines : LinkReferenceDefinitions -> List (Markdown.Table.HeaderCell String) -> List (Markdown.Table.HeaderCell (List Inline)) -parseHeaderInlines linkReferences header = +parseHeaderInlines : AllowedInlines -> LinkReferenceDefinitions -> List (Markdown.Table.HeaderCell String) -> List (Markdown.Table.HeaderCell (List Inline)) +parseHeaderInlines allowedInlines linkReferences header = header |> List.map (\{ label, alignment } -> label |> UnparsedInlines - |> parseRawInline linkReferences + |> parseRawInline allowedInlines + linkReferences (\parsedHeaderLabel -> { label = parsedHeaderLabel , alignment = alignment @@ -340,8 +347,8 @@ parseHeaderInlines linkReferences header = ) -parseRowInlines : LinkReferenceDefinitions -> List (List String) -> List (List (List Inline)) -parseRowInlines linkReferences rows = +parseRowInlines : AllowedInlines -> LinkReferenceDefinitions -> List (List String) -> List (List (List Inline)) +parseRowInlines allowedInlines linkReferences rows = rows |> List.map (\row -> @@ -349,16 +356,16 @@ parseRowInlines linkReferences rows = (\column -> column |> UnparsedInlines - |> parseRawInline linkReferences identity + |> parseRawInline allowedInlines linkReferences identity ) row ) -parseRawInline : LinkReferenceDefinitions -> (List Inline -> a) -> UnparsedInlines -> a -parseRawInline linkReferences wrap unparsedInlines = +parseRawInline : AllowedInlines -> LinkReferenceDefinitions -> (List Inline -> a) -> UnparsedInlines -> a +parseRawInline allowedInlines linkReferences wrap unparsedInlines = unparsedInlines - |> inlineParseHelper linkReferences + |> inlineParseHelper allowedInlines linkReferences |> wrap @@ -446,6 +453,26 @@ htmlParser = |> Advanced.andThen xmlNodeToHtmlNode +allowAllInlinesUnlessInsideAnchor : String -> AllowedInlines +allowAllInlinesUnlessInsideAnchor tag = + case tag of + "a" -> + SkipAutolinks + + _ -> + AllowAll + + +mostRestrictiveAllowance : AllowedInlines -> AllowedInlines -> AllowedInlines +mostRestrictiveAllowance allowed1 allowed2 = + case ( allowed1, allowed2 ) of + ( AllowAll, AllowAll ) -> + AllowAll + + _ -> + SkipAutolinks + + xmlNodeToHtmlNode : Node -> Parser RawBlock xmlNodeToHtmlNode xmlNode = case xmlNode of @@ -454,7 +481,7 @@ xmlNodeToHtmlNode xmlNode = |> succeed HtmlParser.Element tag attributes children -> - case nodesToBlocks children of + case nodesToBlocks (allowAllInlinesUnlessInsideAnchor tag) children of Ok parsedChildren -> Block.HtmlElement tag attributes parsedChildren |> RawBlock.Html @@ -484,27 +511,30 @@ xmlNodeToHtmlNode xmlNode = |> succeed -textNodeToBlocks : String -> List Block -textNodeToBlocks textNodeValue = - parse textNodeValue +textNodeToBlocks : AllowedInlines -> String -> List Block +textNodeToBlocks allowedInlines textNodeValue = + parseHelper allowedInlines textNodeValue |> Result.withDefault [] -nodeToRawBlock : Node -> Block.Html Block -nodeToRawBlock node = +nodeToRawBlock : AllowedInlines -> Node -> Block.Html Block +nodeToRawBlock allowedInlines node = case node of HtmlParser.Text innerText -> Block.HtmlComment "TODO this never happens, but use types to drop this case." HtmlParser.Element tag attributes children -> let + allowance = + mostRestrictiveAllowance allowedInlines (allowAllInlinesUnlessInsideAnchor tag) + parseChild child = case child of HtmlParser.Text text -> - textNodeToBlocks text + textNodeToBlocks allowance text _ -> - [ nodeToRawBlock child |> Block.HtmlBlock ] + [ nodeToRawBlock allowance child |> Block.HtmlBlock ] in Block.HtmlElement tag attributes @@ -523,18 +553,18 @@ nodeToRawBlock node = Block.HtmlDeclaration declarationType content -nodesToBlocks : List Node -> Result Parser.Problem (List Block) -nodesToBlocks children = - nodesToBlocksHelp children [] +nodesToBlocks : AllowedInlines -> List Node -> Result Parser.Problem (List Block) +nodesToBlocks allowedInlines children = + nodesToBlocksHelp allowedInlines children [] -nodesToBlocksHelp : List Node -> List Block -> Result Parser.Problem (List Block) -nodesToBlocksHelp remaining soFar = +nodesToBlocksHelp : AllowedInlines -> List Node -> List Block -> Result Parser.Problem (List Block) +nodesToBlocksHelp allowedInlines remaining soFar = case remaining of node :: rest -> - case childToBlocks node soFar of + case childToBlocks allowedInlines node soFar of Ok newSoFar -> - nodesToBlocksHelp rest newSoFar + nodesToBlocksHelp allowedInlines rest newSoFar Err e -> Err e @@ -545,11 +575,11 @@ nodesToBlocksHelp remaining soFar = {-| Add the blocks from this node to the passed-in list of blocks -} -childToBlocks : Node -> List Block -> Result Parser.Problem (List Block) -childToBlocks node blocks = +childToBlocks : AllowedInlines -> Node -> List Block -> Result Parser.Problem (List Block) +childToBlocks allowedInlines node blocks = case node of Element tag attributes children -> - case nodesToBlocks children of + case nodesToBlocks (mostRestrictiveAllowance allowedInlines (allowAllInlinesUnlessInsideAnchor tag)) children of Ok childrenAsBlocks -> let block = @@ -562,7 +592,7 @@ childToBlocks node blocks = Err err Text innerText -> - case parse innerText of + case parseHelper allowedInlines innerText of Ok value -> Ok (List.reverse value ++ blocks) @@ -614,22 +644,22 @@ rawBlockParser = stepRawBlock -parseAllInlines : State -> Result Parser.Problem (List Block) -parseAllInlines state = - parseAllInlinesHelp state state.rawBlocks [] +parseAllInlines : AllowedInlines -> State -> Result Parser.Problem (List Block) +parseAllInlines allowedInlines state = + parseAllInlinesHelp allowedInlines state state.rawBlocks [] -parseAllInlinesHelp : State -> List RawBlock -> List Block -> Result Parser.Problem (List Block) -parseAllInlinesHelp state rawBlocks parsedBlocks = +parseAllInlinesHelp : AllowedInlines -> State -> List RawBlock -> List Block -> Result Parser.Problem (List Block) +parseAllInlinesHelp allowedInlines state rawBlocks parsedBlocks = case rawBlocks of rawBlock :: rest -> - case parseInlines state.linkReferenceDefinitions rawBlock of + case parseInlines allowedInlines state.linkReferenceDefinitions rawBlock of ParsedBlock newParsedBlock -> - parseAllInlinesHelp state rest (newParsedBlock :: parsedBlocks) + parseAllInlinesHelp allowedInlines state rest (newParsedBlock :: parsedBlocks) EmptyBlock -> -- ignore empty blocks - parseAllInlinesHelp state rest parsedBlocks + parseAllInlinesHelp allowedInlines state rest parsedBlocks InlineProblem e -> Err e diff --git a/test-results/failing/CommonMark/Autolinks.md b/test-results/failing/CommonMark/Autolinks.md new file mode 100644 index 0000000..4388501 --- /dev/null +++ b/test-results/failing/CommonMark/Autolinks.md @@ -0,0 +1,62 @@ +# CommonMark - Autolinks + +## [Example 604](https://spec.commonmark.org/0.29/#example-604) + +This markdown: + +````````````markdown +< http://foo.bar > + +```````````` + +Should give output: + +````````````html +
< http://foo.bar >
+```````````` + +But instead was: + +````````````html + +```````````` +## [Example 607](https://spec.commonmark.org/0.29/#example-607) + +This markdown: + +````````````markdown +http://example.com + +```````````` + +Should give output: + +````````````html +http://example.com
+```````````` + +But instead was: + +````````````html + +```````````` +## [Example 608](https://spec.commonmark.org/0.29/#example-608) + +This markdown: + +````````````markdown +foo@bar.example.com + +```````````` + +Should give output: + +````````````html +foo@bar.example.com
+```````````` + +But instead was: + +````````````html + +```````````` diff --git a/test-results/failing/GFM/Autolinks.md b/test-results/failing/GFM/Autolinks.md new file mode 100644 index 0000000..e83ec94 --- /dev/null +++ b/test-results/failing/GFM/Autolinks.md @@ -0,0 +1,62 @@ +# GFM - Autolinks + +## [Example 604](https://spec.commonmark.org/0.29/#example-604) + +This markdown: + +````````````markdown +< http://foo.bar > + +```````````` + +Should give output: + +````````````html +< http://foo.bar >
+```````````` + +But instead was: + +````````````html + +```````````` +## [Example 607](https://spec.commonmark.org/0.29/#example-607) + +This markdown: + +````````````markdown +http://example.com + +```````````` + +Should give output: + +````````````html +http://example.com
+```````````` + +But instead was: + +````````````html + +```````````` +## [Example 608](https://spec.commonmark.org/0.29/#example-608) + +This markdown: + +````````````markdown +foo@bar.example.com + +```````````` + +Should give output: + +````````````html +foo@bar.example.com
+```````````` + +But instead was: + +````````````html + +```````````` diff --git a/test-results/failing/GFM/[extension] Autolinks.md b/test-results/failing/GFM/[extension] Autolinks.md index d98ba5b..f832f01 100644 --- a/test-results/failing/GFM/[extension] Autolinks.md +++ b/test-results/failing/GFM/[extension] Autolinks.md @@ -1,148 +1,5 @@ # GFM - [extension] Autolinks -## [Example 621](https://github.github.com/gfm/#example-621) - -This markdown: - -````````````markdown -www.commonmark.org -```````````` - -Should give output: - -````````````html - -```````````` - -But instead was: - -````````````html -www.commonmark.org
-```````````` -## [Example 622](https://github.github.com/gfm/#example-622) - -This markdown: - -````````````markdown -Visit www.commonmark.org/help for more information. -```````````` - -Should give output: - -````````````html -Visitwww.commonmark.org/helpfor more information.
-```````````` - -But instead was: - -````````````html -Visit www.commonmark.org/help for more information.
-```````````` -## [Example 623](https://github.github.com/gfm/#example-623) - -This markdown: - -````````````markdown -Visit www.commonmark.org. - -Visit www.commonmark.org/a.b. -```````````` - -Should give output: - -````````````html -Visitwww.commonmark.org.
Visitwww.commonmark.org/a.b.
-```````````` - -But instead was: - -````````````html -Visit www.commonmark.org.
Visit www.commonmark.org/a.b.
-```````````` -## [Example 624](https://github.github.com/gfm/#example-624) - -This markdown: - -````````````markdown -www.google.com/search?q=Markup+(business) - -www.google.com/search?q=Markup+(business))) - -(www.google.com/search?q=Markup+(business)) - -(www.google.com/search?q=Markup+(business) -```````````` - -Should give output: - -````````````html -www.google.com/search?q=Markup+(business)
www.google.com/search?q=Markup+(business)))
(www.google.com/search?q=Markup+(business))
(www.google.com/search?q=Markup+(business)
-```````````` - -But instead was: - -````````````html -www.google.com/search?q=Markup+(business)
www.google.com/search?q=Markup+(business)))
(www.google.com/search?q=Markup+(business))
(www.google.com/search?q=Markup+(business)
-```````````` -## [Example 625](https://github.github.com/gfm/#example-625) - -This markdown: - -````````````markdown -www.google.com/search?q=(business))+ok -```````````` - -Should give output: - -````````````html -www.google.com/search?q=(business))+ok
-```````````` - -But instead was: - -````````````html -www.google.com/search?q=(business))+ok
-```````````` -## [Example 626](https://github.github.com/gfm/#example-626) - -This markdown: - -````````````markdown -www.google.com/search?q=commonmark&hl=en - -www.google.com/search?q=commonmark&hl; -```````````` - -Should give output: - -````````````html -www.google.com/search?q=commonmark&hl=en
www.google.com/search?q=commonmark&hl;
-```````````` - -But instead was: - -````````````html -www.google.com/search?q=commonmark&hl=en
www.google.com/search?q=commonmark&hl;
-```````````` -## [Example 627](https://github.github.com/gfm/#example-627) - -This markdown: - -````````````markdown -www.commonmark.org/hewww.commonmark.org/he<lp
-```````````` ## [Example 628](https://github.github.com/gfm/#example-628) This markdown: @@ -164,68 +21,5 @@ Should give output: But instead was: ````````````html -http://commonmark.org
(Visit https://encrypted.google.com/search?q=Markup+(business))
Anonymous FTP is available at ftp://foo.bar.baz.
-```````````` -## [Example 629](https://github.github.com/gfm/#example-629) - -This markdown: - -````````````markdown -foo@bar.baz -```````````` - -Should give output: - -````````````html - -```````````` - -But instead was: - -````````````html -foo@bar.baz
-```````````` -## [Example 630](https://github.github.com/gfm/#example-630) - -This markdown: - -````````````markdown -hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is. -```````````` - -Should give output: - -````````````html -hello@mail+xyz.example isn't valid, buthello+xyz@mail.exampleis.
-```````````` - -But instead was: - -````````````html -hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.
-```````````` -## [Example 631](https://github.github.com/gfm/#example-631) - -This markdown: - -````````````markdown -a.b-c_d@a.b - -a.b-c_d@a.b. - -a.b-c_d@a.b- - -a.b-c_d@a.b_ -```````````` - -Should give output: - -````````````html -a.b-c_d@a.b-
a.b-c_d@a.b_
-```````````` - -But instead was: - -````````````html -a.b-c_d@a.b
a.b-c_d@a.b.
a.b-c_d@a.b-
a.b-c_d@a.b_
+(Visithttps://encrypted.google.com/search?q=Markup+(business))
Anonymous FTP is available at ftp://foo.bar.baz.
```````````` diff --git a/test-results/failing/New/autolinks.md b/test-results/failing/New/autolinks.md index 400cc11..5145cc9 100644 --- a/test-results/failing/New/autolinks.md +++ b/test-results/failing/New/autolinks.md @@ -31,5 +31,5 @@ Should give output: But instead was: ````````````html -(See https://www.example.com/fhqwhgads.)
((http://foo.com))
((http://foo.com.))
HTTP://FOO.COM
hTtP://fOo.CoM
hello@email.com
me@example.com
test@test.com
+(Seehttps://www.example.com/fhqwhgads.)
((http://foo.com))
((http://foo.com.))
HTTP://FOO.COM
hTtP://fOo.CoM
test@test.com
```````````` diff --git a/test-results/passing-CommonMark.md b/test-results/passing-CommonMark.md index cb0b019..69d0749 100644 --- a/test-results/passing-CommonMark.md +++ b/test-results/passing-CommonMark.md @@ -541,24 +541,6 @@ Gives this correct output: ```````````` -### [Example 604](https://spec.commonmark.org/0.29/#example-604) - -This markdown: - - -````````````markdown -< http://foo.bar > - -```````````` - -Gives this correct output: - - -````````````html -< http://foo.bar >
- -```````````` - ### [Example 605](https://spec.commonmark.org/0.29/#example-605) This markdown: @@ -595,42 +577,6 @@ Gives this correct output: ```````````` -### [Example 607](https://spec.commonmark.org/0.29/#example-607) - -This markdown: - - -````````````markdown -http://example.com - -```````````` - -Gives this correct output: - - -````````````html -http://example.com
- -```````````` - -### [Example 608](https://spec.commonmark.org/0.29/#example-608) - -This markdown: - - -````````````markdown -foo@bar.example.com - -```````````` - -Gives this correct output: - - -````````````html -foo@bar.example.com
- -```````````` - ## Backslash escapes ### [Example 298](https://spec.commonmark.org/0.29/#example-298) diff --git a/test-results/passing-GFM.md b/test-results/passing-GFM.md index addadce..a33660b 100644 --- a/test-results/passing-GFM.md +++ b/test-results/passing-GFM.md @@ -541,24 +541,6 @@ Gives this correct output: ```````````` -### [Example 604](https://spec.commonmark.org/0.29/#example-604) - -This markdown: - - -````````````markdown -< http://foo.bar > - -```````````` - -Gives this correct output: - - -````````````html -< http://foo.bar >
- -```````````` - ### [Example 605](https://spec.commonmark.org/0.29/#example-605) This markdown: @@ -595,42 +577,6 @@ Gives this correct output: ```````````` -### [Example 607](https://spec.commonmark.org/0.29/#example-607) - -This markdown: - - -````````````markdown -http://example.com - -```````````` - -Gives this correct output: - - -````````````html -http://example.com
- -```````````` - -### [Example 608](https://spec.commonmark.org/0.29/#example-608) - -This markdown: - - -````````````markdown -foo@bar.example.com - -```````````` - -Gives this correct output: - - -````````````html -foo@bar.example.com
- -```````````` - ## Backslash escapes ### [Example 298](https://spec.commonmark.org/0.29/#example-298) @@ -9796,6 +9742,192 @@ Gives this correct output: ```````````` +## [extension] Autolinks + +### [Example 621](https://github.github.com/gfm/#example-621) + +This markdown: + + +````````````markdown +www.commonmark.org +```````````` + +Gives this correct output: + + +````````````html + +```````````` + +### [Example 622](https://github.github.com/gfm/#example-622) + +This markdown: + + +````````````markdown +Visit www.commonmark.org/help for more information. +```````````` + +Gives this correct output: + + +````````````html +Visit www.commonmark.org/help for more information.
+```````````` + +### [Example 623](https://github.github.com/gfm/#example-623) + +This markdown: + + +````````````markdown +Visit www.commonmark.org. + +Visit www.commonmark.org/a.b. +```````````` + +Gives this correct output: + + +````````````html +Visit www.commonmark.org.
+Visit www.commonmark.org/a.b.
+```````````` + +### [Example 624](https://github.github.com/gfm/#example-624) + +This markdown: + + +````````````markdown +www.google.com/search?q=Markup+(business) + +www.google.com/search?q=Markup+(business))) + +(www.google.com/search?q=Markup+(business)) + +(www.google.com/search?q=Markup+(business) +```````````` + +Gives this correct output: + + +````````````html +www.google.com/search?q=Markup+(business)
+www.google.com/search?q=Markup+(business)))
+(www.google.com/search?q=Markup+(business))
+(www.google.com/search?q=Markup+(business)
+```````````` + +### [Example 625](https://github.github.com/gfm/#example-625) + +This markdown: + + +````````````markdown +www.google.com/search?q=(business))+ok +```````````` + +Gives this correct output: + + +````````````html +www.google.com/search?q=(business))+ok
+```````````` + +### [Example 626](https://github.github.com/gfm/#example-626) + +This markdown: + + +````````````markdown +www.google.com/search?q=commonmark&hl=en + +www.google.com/search?q=commonmark&hl; +```````````` + +Gives this correct output: + + +````````````html +www.google.com/search?q=commonmark&hl=en
+www.google.com/search?q=commonmark&hl;
+```````````` + +### [Example 627](https://github.github.com/gfm/#example-627) + +This markdown: + + +````````````markdown +www.commonmark.org/hehello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.
+```````````` + +### [Example 631](https://github.github.com/gfm/#example-631) + +This markdown: + + +````````````markdown +a.b-c_d@a.b + +a.b-c_d@a.b. + +a.b-c_d@a.b- + +a.b-c_d@a.b_ +```````````` + +Gives this correct output: + + +````````````html + + +a.b-c_d@a.b-
+a.b-c_d@a.b_
+```````````` + ## [extension] Strikethrough ### [Example 491](https://github.github.com/gfm/#example-491) diff --git a/tests/InlineTests.elm b/tests/InlineTests.elm index 73a1ce8..2221475 100644 --- a/tests/InlineTests.elm +++ b/tests/InlineTests.elm @@ -3,7 +3,7 @@ module InlineTests exposing (suite) import Dict import Expect exposing (Expectation) import HtmlParser -import Markdown.Inline as Inlines +import Markdown.Inline as Inlines exposing (AllowedInlines(..)) import Markdown.InlineParser import Parser import Parser.Advanced as Advanced @@ -37,12 +37,32 @@ suite = |> expectInlines [ Inlines.Text "Nothing interesting here!" ] - , test "emphasis parsing" <| - \() -> - "*hello!*" - |> expectInlines - [ Inlines.Emphasis 1 [ Inlines.Text "hello!" ] - ] + , describe "emphasis parsing" <| + [ test "single depth asterisk" <| + \() -> + "*hello!*" + |> expectInlines + [ Inlines.Emphasis 1 [ Inlines.Text "hello!" ] + ] + , test "double depth asterisk" <| + \() -> + "**hello!**" + |> expectInlines + [ Inlines.Emphasis 2 [ Inlines.Text "hello!" ] + ] + , test "single depth underscore" <| + \() -> + "_hello!_" + |> expectInlines + [ Inlines.Emphasis 1 [ Inlines.Text "hello!" ] + ] + , test "double depth underscore" <| + \() -> + "__hello!__" + |> expectInlines + [ Inlines.Emphasis 2 [ Inlines.Text "hello!" ] + ] + ] , test "No stripping occurs if the code span contains only spaces (example 344)" <| \() -> """` ` @@ -58,6 +78,21 @@ suite = """[Contact](/contact)""" |> expectInlines [ Inlines.Link "/contact" Nothing [ Inlines.Text "Contact" ] ] + , test "simple link with a full url" <| + \() -> + """[Contact](https://example.com/contact)""" + |> expectInlines + [ Inlines.Link "https://example.com/contact" Nothing [ Inlines.Text "Contact" ] ] + , test "multiple simple links with full urls" <| + \() -> + """[One](https://example.com/1) [Two](https://example.com/2) [Three](https://example.com/3)""" + |> expectInlines + [ Inlines.Link "https://example.com/1" Nothing [ Inlines.Text "One" ] + , Inlines.Text " " + , Inlines.Link "https://example.com/2" Nothing [ Inlines.Text "Two" ] + , Inlines.Text " " + , Inlines.Link "https://example.com/3" Nothing [ Inlines.Text "Three" ] + ] , test "link with formatting" <| \() -> """[This `code` is *really* awesome](/contact)""" @@ -94,6 +129,153 @@ suite = "Already linked: http://example.com/.
\n" + |> expectInlines + [ Inlines.HtmlInline (HtmlParser.Element "p" [] [ HtmlParser.Text "Already linked: ", HtmlParser.Element "a" [ { name = "href", value = "http://example.com/" } ] [ HtmlParser.Text "http://example.com/" ], HtmlParser.Text "." ]) ] + , describe "when we are not allowing autolinks" + [ test "basic http url" <| + \() -> + "go here http://www.bar.baz next\n" + |> expectInlinesWithoutAutolinks + [ Inlines.Text "go here ", Inlines.Text "http://www.bar.baz", Inlines.Text " next" ] + ] + ] + , describe "extended email autolinks" + [ test "basic email autolink" <| + \() -> + "hello+xyz@mail.example" + |> expectInlines + [ Inlines.Link "mailto:hello+xyz@mail.example" Nothing [ Inlines.Text "hello+xyz@mail.example" ] ] + , test "basic email autolink with asterisk emphasis" <| + \() -> + "**hello@mail.example**" + |> expectInlines + [ Inlines.Emphasis 2 [ Inlines.Link "mailto:hello@mail.example" Nothing [ Inlines.Text "hello@mail.example" ] ] ] + , test "basic email autolink with single underscore emphasis" <| + \() -> + "_hello@mail.example_" + |> expectInlines + [ Inlines.Emphasis 1 [ Inlines.Link "mailto:hello@mail.example" Nothing [ Inlines.Text "hello@mail.example" ] ] ] + , test "basic email autolink with double underscore emphasis" <| + \() -> + "__hello@mail.example__" + |> expectInlines + [ Inlines.Emphasis 2 [ Inlines.Link "mailto:hello@mail.example" Nothing [ Inlines.Text "hello@mail.example" ] ] ] + , test "email autolinks must have a dot in the domain" <| + \() -> + "hello+xyz@mail" + |> expectInlines + [ Inlines.Text "hello+xyz@mail" ] + , test "email autolinks cannot end in a hyphen" <| + \() -> + "hello+xyz@mail.example-" + |> expectInlines + [ Inlines.Text "hello+xyz@mail.example-" ] + ] + ] --, skip <| -- test "unlike GFM and commonmark, elm-markdown parses image alt as raw text" <| @@ -208,5 +390,12 @@ suite = expectInlines : List Inlines.Inline -> String -> Expectation expectInlines expected input = input - |> Markdown.InlineParser.parse Dict.empty + |> Markdown.InlineParser.parse AllowAll Dict.empty + |> Expect.equal expected + + +expectInlinesWithoutAutolinks : List Inlines.Inline -> String -> Expectation +expectInlinesWithoutAutolinks expected input = + input + |> Markdown.InlineParser.parse SkipAutolinks Dict.empty |> Expect.equal expected diff --git a/tests/Tests.elm b/tests/Tests.elm index 58aa269..d893f5a 100644 --- a/tests/Tests.elm +++ b/tests/Tests.elm @@ -474,18 +474,60 @@ qwer ] ] ) - - --, skip <| - -- test "autolink" <| - -- \() -> - -- "Already linked:http://example.com/.
\n\n" + |> parse + |> Expect.equal + (Ok + [ Block.HtmlBlock + (HtmlElement "p" + [] + [ Block.Paragraph + [ Block.Text "Already linked:" ] + , Block.HtmlBlock + (HtmlElement "a" + [ { name = "href", value = "http://example.com/" } ] + [ Block.Paragraph + [ Block.Text "http://example.com/" ] + ] + ) + , Block.Paragraph [ Block.Text "." ] + ] + ) + ] + ) + , test "inside html with inline styles on the link text" <| + \() -> + "Already linked:**http://example.com/**.\n\n" + |> parse + |> Expect.equal + (Ok + [ Block.Paragraph + [ Block.Text "Already linked:" + , Block.HtmlInline + (HtmlElement "a" + [ { name = "href", value = "http://example.com/" } ] + [ Block.Paragraph + [ Block.Strong [ Block.Text "http://example.com/" ] ] + ] + ) + , Block.Text "." + ] + ] + ) + ] , describe "blank line" [ test "even though paragraphs can start with blank lines, it is not a paragraph if there are only blanks" <| \() ->