Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add markdown support #2

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,6 @@
path = sitters/tree-sitter-vim
url = https://github.com/neovim/tree-sitter-vim.git
shallow = true
[submodule "sitters/tree-sitter-markdown"]
path = sitters/tree-sitter-markdown
url = https://github.com/MDeiml/tree-sitter-markdown
8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ default = [
"javascript",
"json",
"lua",
"markdown",
"markdown-inline",
"python",
"rust",
"toml",
Expand All @@ -55,6 +57,8 @@ java = []
javascript = []
json = []
lua = []
markdown = []
markdown-inline = []
python = []
rust = []
toml = []
Expand All @@ -72,6 +76,10 @@ tree-sitter-highlight = { version = ">= 0.19, < 0.21", optional = true }
[build-dependencies]
cc = "1"

[dev-dependencies]
html-escape = "0.2.13"
lazy_static = "1.4.0"

# docs.rs-specific configuration
[package.metadata.docs.rs]
all-features = true
Expand Down
31 changes: 26 additions & 5 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,27 @@ use std::{
process::Command,
};

fn resolve_lang_src(base: &Path, prefix: Option<&str>, lang: String) -> (String, PathBuf) {
if let Some(prefix) = prefix {
let nested = base.join(format!("tree-sitter-{prefix}-{lang}"));
if nested.exists() {
return (format!("{prefix}_{lang}"), nested.join("src"));
}
}

let nested = base.join(format!("tree-sitter-{lang}"));
if nested.exists() {
return (lang, nested.join("src"));
}

let nested = base.join(&lang);
if nested.exists() {
return (lang, nested.join("src"));
}

(lang, base.join("src"))
}

#[derive(Debug)]
struct Sitter {
path: PathBuf,
Expand All @@ -20,21 +41,21 @@ impl Sitter {
let sitter = match feature.split_once('-') {
Some((dir, lang)) => {
let path = PathBuf::from(format!("sitters/tree-sitter-{dir}"));
let src = path.join(lang).join("src");
let (lang, src) = resolve_lang_src(&path, Some(dir), lang.to_owned());

Self {
path,
src,
path,
lang: lang.to_owned(),
version: OnceCell::new(),
}
}
None => {
let lang = feature;
let path = PathBuf::from(format!("sitters/tree-sitter-{lang}"));
let path = PathBuf::from(format!("sitters/tree-sitter-{feature}"));
let (lang, src) = resolve_lang_src(&path, None, feature);

Self {
src: path.join("src"),
src,
path,
lang,
version: OnceCell::new(),
Expand Down
250 changes: 250 additions & 0 deletions examples/highlighter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
use html_escape::encode_text;
use lazy_static::lazy_static;
use std::collections::HashMap;
use std::{env::args, fs::read_to_string};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter};

const HIGHLIGHT_NAMES: &[&str; 19] = &[
"attribute",
"constant",
"function.builtin",
"function",
"keyword",
"operator",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"string",
"string.special",
"tag",
"type",
"type.builtin",
"comment",
"variable",
"variable.builtin",
"variable.parameter",
];

lazy_static! {
static ref FILETYPES: HashMap<&'static str, &'static str> = HashMap::from([
("md", "markdown"),
("markdown", "markdown"),
("rs", "rust"),
("toml", "toml"),
("js", "javascript"),
("ts", "javascript"),
("html", "html"),
("vue", "html"),
("tera", "html"),
("css", "css"),
("c", "c"),
("cc", "c"),
("cpp", "cpp"),
("sh", "shells"),
("bash", "shells"),
("zsh", "shells"),
("lua", "lua"),
("py", "python"),
("yml", "yaml"),
("go", "go"),
("haskell", "haskell"),
("d", "d"),
("java", "java"),
("vim", "vim"),
]);
static ref CONFIGS: HashMap<&'static str, HighlightConfiguration> = HashMap::from([
("vim", pepegsitter::vim::highlight()),
("rust", pepegsitter::rust::highlight()),
("toml", pepegsitter::toml::highlight()),
("javascript", pepegsitter::javascript::highlight()),
("typescript", pepegsitter::typescript::highlight()),
("html", pepegsitter::html::highlight()),
("css", pepegsitter::css::highlight()),
("c", pepegsitter::c::highlight()),
("cpp", pepegsitter::cpp::highlight()),
("shells", pepegsitter::bash::highlight()),
("shells", pepegsitter::bash::highlight()),
("lua", pepegsitter::lua::highlight()),
//("markdown", pepegsitter::markdown::highlight()),
("markdown", pepegsitter::markdown_inline::highlight()),
("python", pepegsitter::python::highlight()),
("yaml", pepegsitter::yaml::highlight()),
("go", pepegsitter::go::highlight()),
("haskell", pepegsitter::haskell::highlight()),
("d", pepegsitter::d::highlight()),
("java", pepegsitter::java::highlight()),
].map(|(key, mut val)| { val.configure(HIGHLIGHT_NAMES); (key, val) }));
}

/// An example file highlighter supporting [CONFIGS] filetypes. Run eg on itself with :
/// `cargo r --example=highlighter -- examples/highlighter.rs > highlighter.html`
fn main() {
let arguments: Vec<_> = args().into_iter().collect();
if arguments.len() != 2 {
panic!("\nSyntax: highlighter text_file");
}
let file_name = arguments[1].clone();
let text_content = read_to_string(&file_name).expect("readable file in text_file");
let mut highlighted_text = highlight(&file_name, &text_content);
highlighted_text = format!(
r#"
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>{file_name}</title>
{STYLE}
</head>
<body>
<pre>
{highlighted_text}
</pre>
</body>
</html>
"#
);
println!("{highlighted_text}");
}

/// Highlight `text` using `filename`'s extension to guess content type.
/// The output is some html using [HIGHLIGHT_NAMES] span classes.
/// See [STYLE] for some basic styling.
pub fn highlight(filename: &str, text: &str) -> String {
let mut highlighter = Highlighter::new();
let extension = filename.split(".").last().unwrap();

match FILETYPES.get(extension) {
Some(filetype) => {
eprintln!(" > highlighting file {filename:?} with type {filetype:?}");
let highlights = highlighter
.highlight(
CONFIGS.get(filetype).unwrap(),
text.as_bytes(),
None,
|injected| {
eprintln!(" > highlighting injected content with type {injected:?}");

CONFIGS.get(injected)
},
)
.unwrap();

let mut highlighted_text = String::new();
for event in highlights {
match event.unwrap() {
HighlightEvent::Source { start, end } => {
highlighted_text =
format!("{}{}", highlighted_text, encode_text(&text[start..end]));
}
HighlightEvent::HighlightStart(s) => {
highlighted_text = format!(
"{}<span class=\"{}\">",
highlighted_text,
HIGHLIGHT_NAMES[s.0].replace(".", " ")
);
}
HighlightEvent::HighlightEnd => {
highlighted_text = format!("{}</span>", highlighted_text);
}
}
}

highlighted_text
}
None => {
eprintln!(
" > highlighting: unrecognized extension '{}' with file '{}'.",
extension, filename
);

encode_text(&text).to_string()
}
}
}

const STYLE: &str = r#"
<style>
body {
background-color: var(--bg);
color: var(--fg);
}
.comment {
color: var(--other);
}
.attribute {
color: var(--fg-less);
}
.constant {
color: var(--info);
}
.function.builtin {
color: var(--fg-less);
}
.function {
color: var(--fg-less);
}
.keyword {
color: var(--fg-less);
}
.operator {
color: var(--fg-less);
}
.property {
color: var(--fg-less);
}
.punctuation {
color: var(--fg-lesser);
}
.punctuation.bracket {
color: var(--fg-lesser);
}
.punctuation.delimiter {
color: var(--fg-lesser);
}
.string {
color: var(--info);
}
.string.special {
color: var(--special);
}
.tag {
color: var(--fg-less);
}
.type {
color: var(--fg-less);
}
.type.builtin {
color: var(--fg-less);
}
body {
--fg: black;
--fg-less: #777; /* dark grey */
--fg-lesser: #aaa; /* light grey */
--bg-less: #f3f3f3; /* lighter grey */
--bg: white; /* white */
--ok: #79d907; /* green */
--err: #e51426; /* red */
--warn: #ee5e12; /* orange */
--info: #0060df; /* blue */
--other: #03b5b5; /* cyan */
--special: #9b1ddf; /* magenta */
--caret: #EE4EB8; /* pink */
}
@media(prefers-color-scheme: dark) {
body {
--fg: white;
--fg-less: #ccc; /* lighter grey */
--fg-lesser: #777; /* light grey */
--bg-less: #222323; /* dark grey */
--bg: black; /* black */
--ok: #A4CC35; /* green */
--err: #FF4050; /* red */
--warn: #F28144; /* orange */
--info: #9cd6ff; /* cyan */
--special: #CC78FA; /* magenta */
--caret: #F553BF; /* pink */
}
}
</style>
"#;
1 change: 1 addition & 0 deletions sitters/tree-sitter-markdown
Submodule tree-sitter-markdown added at 23d9cb
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ pepegsit!(java, "java", "/lang_java.rs");
pepegsit!(javascript, "javascript", "/lang_javascript.rs");
pepegsit!(json, "json", "/lang_json.rs");
pepegsit!(lua, "lua", "/lang_lua.rs");
pepegsit!(markdown, "markdown", "/lang_markdown.rs");
pepegsit!(
markdown_inline,
"markdown-inline",
"/lang_markdown_inline.rs"
);
pepegsit!(python, "python", "/lang_python.rs");
pepegsit!(rust, "rust", "/lang_rust.rs");
pepegsit!(toml, "toml", "/lang_toml.rs");
Expand Down
Loading