From f11a2d0759891684f2bcd18e61cf771328a2119f Mon Sep 17 00:00:00 2001 From: Victor Nicollet Date: Thu, 19 Jun 2014 16:26:17 +0200 Subject: [PATCH] Markdown naive parser --- syntax.mll | 168 +++++++++++++++++++++++++++++++++++++++++++++++++---- write.ml | 9 +-- 2 files changed, 159 insertions(+), 18 deletions(-) diff --git a/syntax.mll b/syntax.mll index cf8864b..1fe9dcf 100644 --- a/syntax.mll +++ b/syntax.mll @@ -24,24 +24,172 @@ and jsP sol buffer = parse | _ as c { Buffer.add_char buffer c ; jsP (c = '\n') buffer lexbuf } | eof { () } +and markdownP stack buffer = parse +| '`' { + let stack = match stack with + | `CODE :: t -> Buffer.add_string buffer "" ; t + | _ -> Buffer.add_string buffer "" ; `CODE :: stack in + markdownP stack buffer lexbuf +} +| "__" { + let stack = match stack with + | `I :: t -> Buffer.add_string buffer "" ; t + | _ -> Buffer.add_string buffer "" ; `I :: stack in + markdownP stack buffer lexbuf +} +| "**" { + let stack = match stack with + | `B :: t -> Buffer.add_string buffer "" ; t + | _ -> Buffer.add_string buffer "" ; `B :: stack in + markdownP stack buffer lexbuf +} +| "[" { + let buf2 = Buffer.create 100 in + let inner = markdownP [ `LINK ] buf2 lexbuf in + if inner = [] then begin + Buffer.add_string buffer " [] + | other -> Buffer.add_string buffer "](" ; markdownP stack buffer lexbuf +} +| ')' { + match stack with + | `A body :: t -> Buffer.add_string buffer "\">" ; + Buffer.add_string buffer body ; + Buffer.add_string buffer "" ; + markdownP t buffer lexbuf + | _ -> Buffer.add_char buffer ')' ; + markdownP stack buffer lexbuf +} +| [^ '&' '<' '"' ')' '>' ']' '[' '*' '_' '`' ] + as s { Buffer.add_string buffer s ; markdownP stack buffer lexbuf } +| '&' { Buffer.add_string buffer "&" ; markdownP stack buffer lexbuf } +| '<' { Buffer.add_string buffer "<" ; markdownP stack buffer lexbuf } +| '"' { Buffer.add_string buffer """ ; markdownP stack buffer lexbuf } +| '>' { Buffer.add_string buffer ">" ; markdownP stack buffer lexbuf } +| _ as c { Buffer.add_char buffer c ; markdownP stack buffer lexbuf } +| eof { stack } + { - let api block = + let process f block = let lexbuf = Lexing.from_string block in let buffer = Buffer.create (String.length block) in - apiP true buffer lexbuf ; + f buffer lexbuf ; Buffer.contents buffer + + let api block = + process (apiP true) block let json block = - let lexbuf = Lexing.from_string block in - let buffer = Buffer.create (String.length block) in - jsonP true buffer lexbuf ; - Buffer.contents buffer + process (jsonP true) block let js block = - let lexbuf = Lexing.from_string block in - let buffer = Buffer.create (String.length block) in - jsP true buffer lexbuf ; - Buffer.contents buffer + process (jsP true) block + +(** This is not really a markdown processor: it does not handle + the full range of markdown semantics. + + It does handle: + - newlines to split paragraphs + - hyphens (-) to define lists + - hashes (#) to define headings + - **stars** to highlight in bold + - [text](url) for links + - __underscores__ to highlight in italics + - `ticks` to enter code + + It does not handle: + - nested lists + - ordered lists + - blockquotes + - code blocks + - [text][ref] for footnote links +*) + + let markdown block = + let lines = List.map BatString.trim (BatString.nsplit block "\n") in + + (* Each block is a string containing text matching a specific semantic block, + such as '## Heading' or 'paragraph' or '- list item' *) + let rec to_blocks acc list = + let newp acc = match acc with [] :: _ -> acc | l -> [] :: l in + let add acc x = match acc with [] -> [[x]] | h :: t -> (x :: h) :: t in + match list with + | [] -> List.rev_map (fun lines -> String.concat " " (List.rev lines)) acc + | "" :: t -> to_blocks (newp acc) t + | line :: t when line.[0] = '#' || line.[0] = '-' -> to_blocks (add (newp acc) line) t + | line :: t -> to_blocks (add acc line) t + in + + let blocks = to_blocks [] lines in + + (* Traverse all blocks, outputting HTML markup to the buffer. *) + let buffer = Buffer.create (String.length block * 2) in + let parse_block block = + let lexbuf = Lexing.from_string block in + ignore (markdownP [] buffer lexbuf) + in + + let in_list = List.fold_left (fun in_list block -> + print_endline ("Block: >>" ^ block ^ "<<") ; + if block = "" then + in_list + else if block.[0] = '#' then begin + + let level = + let i = ref 1 in + while !i < String.length block && block.[!i] = '#' do incr i done ; + !i in + + let nlevel = string_of_int level in + + if in_list then Buffer.add_string buffer "" ; + Buffer.add_string buffer "' ; + + parse_block (String.sub block level (String.length block - level)) ; + + Buffer.add_string buffer "' ; + + false + + end else if block.[0] = '-' then begin + + Buffer.add_string buffer (if in_list then "
  • " else "
    • ") ; + + parse_block (BatString.lchop block) ; + + Buffer.add_string buffer "
    • " ; + + true + + end else begin + + if in_list then Buffer.add_string buffer "
    " ; + Buffer.add_string buffer "

    " ; + + parse_block block ; + + Buffer.add_string buffer "

    " ; + + false + + end) false blocks + in + + if in_list then Buffer.add_string buffer "" ; + + Buffer.contents buffer } diff --git a/write.ml b/write.ml index 1583899..ba16cba 100644 --- a/write.ml +++ b/write.ml @@ -50,13 +50,6 @@ let write file files tree site prefix = let replace_internal_links block = String.concat url_prefix_repl (String.nsplit block "](/") in - (* Removes the initial ' ' from the beginning of each line in the block. *) - let remove_starting_spaces block = - let lines = String.nsplit block "\n" in - let lines = List.map (fun s -> - if String.starts_with s " " then String.sub s 2 (String.length s - 2) else s) lines in - String.concat "\n" lines in - (* Show a list of files, rendered as a table. *) let show_file_list = function [] -> "" | list -> "" @@ -77,7 +70,7 @@ let write file files tree site prefix = (* Turns an individual block into a string. *) let to_string elt = match elt.Read.what with - | `MD block -> replace_internal_links (remove_starting_spaces block) + | `MD block -> Syntax.markdown (replace_internal_links block) | `API (c,block) -> caption c ^ "
    " ^ Syntax.api block ^ "
    " | `JSON (c,block) -> caption c ^ "
    " ^ Syntax.json block ^ "
    " | `JS (c,block) -> caption c ^ "
    " ^ Syntax.js block ^ "
    "