Skip to content

Commit

Permalink
refactor(common): add extensions to languages.toml (#1907)
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys authored Apr 22, 2024
1 parent 4b81b8b commit 9c96e5f
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 83 deletions.
76 changes: 73 additions & 3 deletions crates/tabby-common/assets/languages.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
[[config]]
languages = ["python"]
exts = ["py"]
line_comment = "#"
top_level_keywords = ["def", "from", "class", "import"]

[[config]]
languages = ["rust"]
exts = ["rs"]
line_comment = "//"
top_level_keywords = [
"fn",
Expand All @@ -21,6 +23,7 @@ top_level_keywords = [

[[config]]
languages = ["java"]
exts = ["java"]
line_comment = "//"
top_level_keywords = [
"abstract",
Expand All @@ -43,6 +46,7 @@ top_level_keywords = [

[[config]]
languages = ["kotlin"]
exts = ["kt", "kts"]
line_comment = "//"
top_level_keywords = [
"abstract",
Expand All @@ -66,7 +70,8 @@ top_level_keywords = [
]

[[config]]
languages = ["javascript", "typescript", "javascriptreact", "typescriptreact"]
languages = ["javascript-typescript", "javascript", "typescript", "javascriptreact", "typescriptreact"]
exts = ["js", "ts", "jsx", "tsx", "mjs", "mts"]
line_comment = "//"
top_level_keywords = [
"abstract",
Expand All @@ -86,6 +91,7 @@ top_level_keywords = [

[[config]]
languages = ["go"]
exts = ["go"]
line_comment = "//"
top_level_keywords = [
"func",
Expand All @@ -100,6 +106,7 @@ top_level_keywords = [

[[config]]
languages = ["ruby"]
exts = ["rb"]
line_comment = "#"
top_level_keywords = [
"begin",
Expand All @@ -116,6 +123,7 @@ top_level_keywords = [

[[config]]
languages = ["c"]
exts = ["c", "h"]
line_comment = "//"
top_level_keywords = [
"const",
Expand All @@ -132,6 +140,7 @@ top_level_keywords = [

[[config]]
languages = ["cpp"]
exts= ["cpp", "hpp", "c++", "h++", "cc", "hh", "C", "H", "tcc"]
line_comment = "//"
top_level_keywords = [
"auto",
Expand All @@ -153,6 +162,7 @@ top_level_keywords = [

[[config]]
languages = ["csharp"]
exts = ["cs"]
line_comment = "//"
top_level_keywords = [
"class",
Expand All @@ -169,9 +179,9 @@ top_level_keywords = [
"async",
]


[[config]]
languages = ["php"]
exts = ["php", "php3", "php4", "php5", "phps", "phpt"]
line_comment = "//"
top_level_keywords = [
"abstract",
Expand All @@ -198,9 +208,9 @@ top_level_keywords = [
"use",
]


[[config]]
languages = ["solidity"]
exts = ["sol"]
line_comment = "//"
top_level_keywords = [
"contract",
Expand All @@ -212,3 +222,63 @@ top_level_keywords = [
"function",
"type",
]

[[config]]
languages = ["css"]
exts = ["css"]

[[config]]
languages = ["dockerfile"]
exts = ["Dockerfile"]

[[config]]
languages = ["haskell"]
exts = ["hs"]

[[config]]
languages = ["html"]
exts = ["html"]

[[config]]
languages = ["julia"]
exts = ["jl"]

[[config]]
languages = ["lua"]
exts = ["lua"]

[[config]]
languages = ["makefile"]
exts = ["Makefile"]

[[config]]
languages = ["markdown"]
exts = ["md", "markdown"]

[[config]]
languages = ["perl"]
exts = ["pl", "pm", "pod", "perl"]

[[config]]
languages = ["powershell"]
exts = ["ps1", "psd1", "psm1"]

[[config]]
languages = ["sql"]
exts = ["sql"]

[[config]]
languages = ["scala"]
exts = ["scala"]

[[config]]
languages = ["shellscript"]
exts = ["sh", "bash", "command", "zsh"]

[[config]]
languages = ["tex"]
exts = ["tex"]

[[config]]
languages = ["vb"]
exts = ["vb"]
75 changes: 60 additions & 15 deletions crates/tabby-common/src/languages.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::{collections::HashMap, ffi::OsStr};

use lazy_static::lazy_static;
use serde::Deserialize;

Expand Down Expand Up @@ -29,45 +31,88 @@ struct ConfigList {
#[derive(Deserialize, Debug)]
pub struct Language {
languages: Vec<String>,
top_level_keywords: Vec<String>,
exts: Vec<String>,

pub line_comment: String,
top_level_keywords: Option<Vec<String>>,
pub line_comment: Option<String>,
}

impl Language {
pub fn get_stop_words(&self) -> Vec<String> {
let mut out = vec![];
out.push(format!("\n{}", self.line_comment));
for word in &self.top_level_keywords {
out.push(format!("\n{}", word));
}

for x in DEFAULT.iter() {
out.push((*x).to_owned());
}

if let Some(line_comment) = &self.line_comment {
out.push(format!("\n{}", line_comment));
};

if let Some(top_level_keywords) = &self.top_level_keywords {
for word in top_level_keywords {
out.push(format!("\n{}", word));
}
};

out
}

pub fn get_hashkey(&self) -> String {
self.languages[0].clone()
pub fn language(&'static self) -> &'static str {
self.languages[0].as_str()
}
}

lazy_static! {
static ref CONFIG: ConfigList =
serdeconv::from_toml_str(include_str!("../assets/languages.toml")).unwrap();
static ref LANGUAGE_CONFIG_MAPPING: HashMap<&'static str, &'static Language> = {
let mut map = HashMap::new();
for c in &CONFIG.config {
for l in &c.languages {
assert!(
!map.contains_key(l.as_str()),
"Duplicate language found: {}",
l
);
map.insert(l.as_str(), c);
}
}
map
};
static ref EXTS_LANGUAGE_MAPPING: HashMap<&'static str, &'static str> = {
let mut map = HashMap::new();
for c in &CONFIG.config {
for e in &c.exts {
for l in &c.languages {
assert!(
!map.contains_key(e.as_str()),
"Duplicate extension found: {}",
e
);
map.insert(e.as_str(), l.as_str());
}
}
}
map
};
pub static ref UNKNOWN_LANGUAGE: Language = Language {
languages: vec!["unknown".to_owned()],
line_comment: "".to_owned(),
top_level_keywords: vec![],
line_comment: Some("".into()),
top_level_keywords: Some(vec![]),
exts: vec![],
};
}

pub fn get_language(language: &str) -> &'static Language {
CONFIG
.config
.iter()
.find(|c| c.languages.iter().any(|x| x == language))
.unwrap_or(&UNKNOWN_LANGUAGE)
if let Some(lang) = LANGUAGE_CONFIG_MAPPING.get(language) {
lang
} else {
&UNKNOWN_LANGUAGE
}
}

pub fn get_language_by_ext(ext: &OsStr) -> Option<&'static Language> {
let ext = ext.to_str()?;
EXTS_LANGUAGE_MAPPING.get(ext).map(|x| get_language(x))
}
2 changes: 1 addition & 1 deletion crates/tabby-inference/src/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl StopConditionFactory {
if stop_words.is_empty() {
None
} else {
let hashkey = language.get_hashkey();
let hashkey = language.language().to_owned();
let mut trie = self.stop_trie_cache.get(&hashkey);
if trie.is_none() {
self.stop_trie_cache
Expand Down
Loading

0 comments on commit 9c96e5f

Please sign in to comment.