Skip to content

Commit

Permalink
Markdown naive parser
Browse files Browse the repository at this point in the history
  • Loading branch information
VictorNicollet committed Jun 19, 2014
1 parent a73cf12 commit f11a2d0
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 18 deletions.
168 changes: 158 additions & 10 deletions syntax.mll
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,172 @@ and jsP sol buffer = parse
| _ as c { Buffer.add_char buffer c ; jsP (c = '\n') buffer lexbuf }
| eof { () }

and markdownP stack buffer = parse
| '`' {
let stack = match stack with
| `CODE :: t -> Buffer.add_string buffer "</code>" ; t
| _ -> Buffer.add_string buffer "<code>" ; `CODE :: stack in
markdownP stack buffer lexbuf
}
| "__" {
let stack = match stack with
| `I :: t -> Buffer.add_string buffer "</i>" ; t
| _ -> Buffer.add_string buffer "<i>" ; `I :: stack in
markdownP stack buffer lexbuf
}
| "**" {
let stack = match stack with
| `B :: t -> Buffer.add_string buffer "</b>" ; t
| _ -> Buffer.add_string buffer "<b>" ; `B :: stack in
markdownP stack buffer lexbuf
}
| "[" {
let buf2 = Buffer.create 100 in
let inner = markdownP [ `LINK ] buf2 lexbuf in
if inner = [] then begin
Buffer.add_string buffer "<a href=\"" ;
markdownP (`A (Buffer.contents buf2) :: stack) buffer lexbuf
end else begin
Buffer.add_char buffer '[' ;
Buffer.add_string buffer (Buffer.contents buf2) ;
inner
end
}
| "](" {
match stack with
| [ `LINK ] -> []
| other -> Buffer.add_string buffer "](" ; markdownP stack buffer lexbuf
}
| ')' {
match stack with
| `A body :: t -> Buffer.add_string buffer "\">" ;
Buffer.add_string buffer body ;
Buffer.add_string buffer "</a>" ;
markdownP t buffer lexbuf
| _ -> Buffer.add_char buffer ')' ;
markdownP stack buffer lexbuf
}
| [^ '&' '<' '"' ')' '>' ']' '[' '*' '_' '`' ] + as s { Buffer.add_string buffer s ; markdownP stack buffer lexbuf }
| '&' { Buffer.add_string buffer "&amp;" ; markdownP stack buffer lexbuf }
| '<' { Buffer.add_string buffer "&lt;" ; markdownP stack buffer lexbuf }
| '"' { Buffer.add_string buffer "&quot;" ; markdownP stack buffer lexbuf }
| '>' { Buffer.add_string buffer "&gt;" ; markdownP stack buffer lexbuf }
| _ as c { Buffer.add_char buffer c ; markdownP stack buffer lexbuf }
| eof { stack }

{

let api block =
let process f block =
let lexbuf = Lexing.from_string block in
let buffer = Buffer.create (String.length block) in
apiP true buffer lexbuf ;
f buffer lexbuf ;
Buffer.contents buffer

let api block =
process (apiP true) block

let json block =
let lexbuf = Lexing.from_string block in
let buffer = Buffer.create (String.length block) in
jsonP true buffer lexbuf ;
Buffer.contents buffer
process (jsonP true) block

let js block =
let lexbuf = Lexing.from_string block in
let buffer = Buffer.create (String.length block) in
jsP true buffer lexbuf ;
Buffer.contents buffer
process (jsP true) block

(** This is not really a markdown processor: it does not handle
the full range of markdown semantics.
It does handle:
- newlines to split paragraphs
- hyphens (-) to define lists
- hashes (#) to define headings
- **stars** to highlight in bold
- [text](url) for links
- __underscores__ to highlight in italics
- `ticks` to enter code
It does not handle:
- nested lists
- ordered lists
- blockquotes
- code blocks
- [text][ref] for footnote links
*)

let markdown block =

let lines = List.map BatString.trim (BatString.nsplit block "\n") in

(* Each block is a string containing text matching a specific semantic block,
such as '## Heading' or 'paragraph' or '- list item' *)
let rec to_blocks acc list =
let newp acc = match acc with [] :: _ -> acc | l -> [] :: l in
let add acc x = match acc with [] -> [[x]] | h :: t -> (x :: h) :: t in
match list with
| [] -> List.rev_map (fun lines -> String.concat " " (List.rev lines)) acc
| "" :: t -> to_blocks (newp acc) t
| line :: t when line.[0] = '#' || line.[0] = '-' -> to_blocks (add (newp acc) line) t
| line :: t -> to_blocks (add acc line) t
in

let blocks = to_blocks [] lines in

(* Traverse all blocks, outputting HTML markup to the buffer. *)
let buffer = Buffer.create (String.length block * 2) in
let parse_block block =
let lexbuf = Lexing.from_string block in
ignore (markdownP [] buffer lexbuf)
in

let in_list = List.fold_left (fun in_list block ->
print_endline ("Block: >>" ^ block ^ "<<") ;
if block = "" then
in_list
else if block.[0] = '#' then begin

let level =
let i = ref 1 in
while !i < String.length block && block.[!i] = '#' do incr i done ;
!i in

let nlevel = string_of_int level in

if in_list then Buffer.add_string buffer "</ul>" ;
Buffer.add_string buffer "<h" ;
Buffer.add_string buffer nlevel ;
Buffer.add_char buffer '>' ;

parse_block (String.sub block level (String.length block - level)) ;

Buffer.add_string buffer "</h" ;
Buffer.add_string buffer nlevel ;
Buffer.add_char buffer '>' ;

false

end else if block.[0] = '-' then begin

Buffer.add_string buffer (if in_list then "<li>" else "<ul><li>") ;

parse_block (BatString.lchop block) ;

Buffer.add_string buffer "</li>" ;

true

end else begin

if in_list then Buffer.add_string buffer "</ul>" ;
Buffer.add_string buffer "<p>" ;

parse_block block ;

Buffer.add_string buffer "</p>" ;

false

end) false blocks
in

if in_list then Buffer.add_string buffer "</ul>" ;

Buffer.contents buffer
}
9 changes: 1 addition & 8 deletions write.ml
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,6 @@ let write file files tree site prefix =
let replace_internal_links block =
String.concat url_prefix_repl (String.nsplit block "](/") in

(* Removes the initial ' ' from the beginning of each line in the block. *)
let remove_starting_spaces block =
let lines = String.nsplit block "\n" in
let lines = List.map (fun s ->
if String.starts_with s " " then String.sub s 2 (String.length s - 2) else s) lines in
String.concat "\n" lines in

(* Show a list of files, rendered as a table. *)
let show_file_list = function [] -> "" | list ->
"<table class=\"files\"><tr>"
Expand All @@ -77,7 +70,7 @@ let write file files tree site prefix =

(* Turns an individual block into a string. *)
let to_string elt = match elt.Read.what with
| `MD block -> replace_internal_links (remove_starting_spaces block)
| `MD block -> Syntax.markdown (replace_internal_links block)
| `API (c,block) -> caption c ^ "<pre class=\"api\">" ^ Syntax.api block ^ "</pre>"
| `JSON (c,block) -> caption c ^ "<pre class=\"json\">" ^ Syntax.json block ^ "</pre>"
| `JS (c,block) -> caption c ^ "<pre class=\"js\">" ^ Syntax.js block ^ "</pre>"
Expand Down

0 comments on commit f11a2d0

Please sign in to comment.