diff --git a/Cargo.lock b/Cargo.lock index 60439ca7..aeeb192d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,9 +47,9 @@ checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" [[package]] name = "anyhow" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" [[package]] name = "async-channel" @@ -402,6 +402,10 @@ dependencies = [ "libloading", ] +[[package]] +name = "cli" +version = "0.0.0" + [[package]] name = "cmake" version = "0.1.50" @@ -489,6 +493,34 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-queue" version = "0.3.11" @@ -751,6 +783,13 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs" +version = "0.0.0" +dependencies = [ + "serde", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -1119,6 +1158,12 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +[[package]] +name = "jod-thread" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b23360e99b8717f20aaa4598f5a6541efbe30630039fbc7706cf954a87947ae" + [[package]] name = "js-sys" version = "0.3.68" @@ -1222,6 +1267,33 @@ dependencies = [ "value-bag", ] +[[package]] +name = "lsp" +version = "0.0.0" +dependencies = [ + "anyhow", + "crossbeam-channel", + "lsp-server", + "lsp-types 0.95.0", + "num_cpus", + "schema_cache", + "serde", + "serde_json", + "stdx", +] + +[[package]] +name = "lsp-server" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248f65b78f6db5d8e1b1604b4098a28b43d21a8eb1deeca22b1c421b276c7095" +dependencies = [ + "crossbeam-channel", + "log", + "serde", + "serde_json", +] + [[package]] name = "lsp-types" version = "0.94.1" @@ -1235,6 +1307,19 @@ dependencies = [ "url", ] +[[package]] +name = "lsp-types" +version = "0.95.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "158c1911354ef73e8fe42da6b10c0484cb65c7f1007f28022e847706c1ab6984" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "serde_repr", + "url", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1425,6 +1510,7 @@ dependencies = [ "pg_query_proto_parser", "regex", "serde_json", + "text_size", ] [[package]] @@ -1559,6 +1645,16 @@ version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" +[[package]] +name = "playground" +version = "0.0.0" +dependencies = [ + "dashmap", + "rayon", + "schema_cache", + "tree-sitter", +] + [[package]] name = "polling" version = "2.8.0" @@ -1765,6 +1861,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -1948,18 +2064,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", @@ -1968,9 +2084,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.113" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", @@ -1988,6 +2104,20 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "service" +version = "0.0.0" +dependencies = [ + "dashmap", + "fs", + "parser", + "schema_cache", + "serde", + "text_size", + "tree-sitter", + "tree_sitter_sql", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2324,6 +2454,15 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stdx" +version = "0.0.0" +dependencies = [ + "crossbeam-channel", + "jod-thread", + "libc", +] + [[package]] name = "str_indices" version = "0.4.3" @@ -2396,6 +2535,13 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" +[[package]] +name = "text_size" +version = "0.0.0" +dependencies = [ + "serde", +] + [[package]] name = "thiserror" version = "1.0.56" @@ -2526,7 +2672,7 @@ dependencies = [ "dashmap", "futures", "httparse", - "lsp-types", + "lsp-types 0.94.1", "memchr", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 6e6fb923..e6ed6680 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,13 @@ resolver = "2" rust-version = "1.71" [workspace.dependencies] +stdx = { path = "./crates/stdx", version = "0.0.0" } +lsp = { path = "./crates/lsp", version = "0.0.0" } +cli = { path = "./crates/cli", version = "0.0.0" } +playground = { path = "./crates/playground", version = "0.0.0" } +fs = { path = "./crates/fs", version = "0.0.0" } +text_size = { path = "./crates/text_size", version = "0.0.0" } +service = { path = "./crates/service", version = "0.0.0" } tree_sitter_sql = { path = "./crates/tree_sitter_sql", version = "0.0.0" } schema_cache = { path = "./crates/schema_cache", version = "0.0.0" } parser = { path = "./crates/parser", version = "0.0.0" } @@ -17,6 +24,12 @@ sourcegen = { path = "./crates/sourcegen", version = "0.0.0" } pg_query_proto_parser = { path = "./crates/pg_query_proto_parser", version = "0.0.0" } triomphe = { version = "0.1.8", default-features = false, features = ["std"] } +serde = "1.0.197" +dashmap = "5.5.3" +tree-sitter = "0.20.10" +libc = "0.2.150" +crossbeam-channel = "0.5.12" + [profile.dev.package] insta.opt-level = 3 similar.opt-level = 3 diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml new file mode 100644 index 00000000..fdadc45c --- /dev/null +++ b/crates/cli/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "cli" +version = "0.0.0" +edition = "2021" + +[dependencies] + +[lib] +doctest = false diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs new file mode 100644 index 00000000..e7a11a96 --- /dev/null +++ b/crates/cli/src/lib.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/crates/fs/Cargo.toml b/crates/fs/Cargo.toml new file mode 100644 index 00000000..ed0c6076 --- /dev/null +++ b/crates/fs/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "fs" +version = "0.0.0" +edition = "2021" + +[dependencies] +serde = { workspace = true } + +[lib] +doctest = false diff --git a/crates/fs/src/lib.rs b/crates/fs/src/lib.rs new file mode 100644 index 00000000..631d0001 --- /dev/null +++ b/crates/fs/src/lib.rs @@ -0,0 +1,3 @@ +mod path; + +pub use path::FilePath; diff --git a/crates/fs/src/path.rs b/crates/fs/src/path.rs new file mode 100644 index 00000000..d03af9ef --- /dev/null +++ b/crates/fs/src/path.rs @@ -0,0 +1,29 @@ +//! This module is responsible to manage paths inside Biome. +//! It is a small wrapper around [path::PathBuf] but it is also able to +//! give additional information around the file that holds: +//! - the [FileHandlers] for the specific file +//! - shortcuts to open/write to the file +use std::fs::read_to_string; +use std::io::Read; +use std::{fs::File, io, io::Write, ops::Deref, path::PathBuf}; + +#[derive(Debug, Clone, Eq, Hash, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct FilePath { + path: PathBuf, +} + +impl Deref for FilePath { + type Target = PathBuf; + + fn deref(&self) -> &Self::Target { + &self.path + } +} + +impl FilePath { + pub fn new(path_to_file: impl Into) -> Self { + Self { + path: path_to_file.into(), + } + } +} diff --git a/crates/lsp/Cargo.toml b/crates/lsp/Cargo.toml new file mode 100644 index 00000000..80171d6f --- /dev/null +++ b/crates/lsp/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "lsp" +version = "0.0.0" +edition = "2021" + +[dependencies] +lsp-server = "0.7.6" +anyhow = "1.0.80" +lsp-types = "0.95.0" +serde = "1.0.197" +serde_json = "1.0.114" +crossbeam-channel.workspace = true +num_cpus = "1.15.0" + +schema_cache.workspace = true +stdx.workspace = true + +[lib] +doctest = false + +[[bin]] +name = "postgres_lsp" +path = "src/bin/main.rs" diff --git a/crates/lsp/src/bin/main.rs b/crates/lsp/src/bin/main.rs new file mode 100644 index 00000000..0e37f47c --- /dev/null +++ b/crates/lsp/src/bin/main.rs @@ -0,0 +1,59 @@ +use lsp::config::Config; +use lsp::from_json; +use lsp::server_capabilities; +use lsp_server::Connection; + +fn main() { + run_server().unwrap(); +} + +fn run_server() -> anyhow::Result<()> { + let (connection, io_threads) = Connection::stdio(); + + // wait for init request from client + let (initialize_id, initialize_params) = match connection.initialize_start() { + Ok(it) => it, + Err(e) => { + if e.channel_is_disconnected() { + io_threads.join()?; + } + return Err(e.into()); + } + }; + + let lsp_types::InitializeParams { + root_uri, + capabilities, + workspace_folders, + initialization_options, + client_info, + .. + } = from_json::("InitializeParams", &initialize_params)?; + + // we can later pass the init params to the config + let config = Config::default(); + + let server_capabilities = server_capabilities(&config); + + let initialize_result = lsp_types::InitializeResult { + capabilities: server_capabilities, + server_info: Some(lsp_types::ServerInfo { + name: String::from("postgres_lsp"), + version: Some("0.0.0".to_string()), + }), + }; + + let initialize_result = serde_json::to_value(initialize_result).unwrap(); + + if let Err(e) = connection.initialize_finish(initialize_id, initialize_result) { + if e.channel_is_disconnected() { + io_threads.join()?; + } + return Err(e.into()); + } + + main_loop(config, connection)?; + + io_threads.join()?; + Ok(()) +} diff --git a/crates/lsp/src/capabilities.rs b/crates/lsp/src/capabilities.rs new file mode 100644 index 00000000..5a5a35e4 --- /dev/null +++ b/crates/lsp/src/capabilities.rs @@ -0,0 +1,52 @@ +use lsp_types::{ + SaveOptions, ServerCapabilities, TextDocumentSyncCapability, TextDocumentSyncKind, + TextDocumentSyncOptions, +}; + +use crate::config::Config; + +pub fn server_capabilities(config: &Config) -> ServerCapabilities { + ServerCapabilities { + text_document_sync: Some(TextDocumentSyncCapability::Options( + TextDocumentSyncOptions { + open_close: Some(true), + change: Some(TextDocumentSyncKind::INCREMENTAL), + will_save: None, + will_save_wait_until: None, + save: Some(SaveOptions::default().into()), + }, + )), + position_encoding: None, + selection_range_provider: None, + hover_provider: None, + completion_provider: None, + signature_help_provider: None, + definition_provider: None, + type_definition_provider: None, + implementation_provider: None, + references_provider: None, + document_highlight_provider: None, + document_symbol_provider: None, + workspace_symbol_provider: None, + code_action_provider: None, + code_lens_provider: None, + document_formatting_provider: None, + document_range_formatting_provider: None, + document_on_type_formatting_provider: None, + rename_provider: None, + document_link_provider: None, + color_provider: None, + folding_range_provider: None, + declaration_provider: None, + execute_command_provider: None, + workspace: None, + call_hierarchy_provider: None, + semantic_tokens_provider: None, + moniker_provider: None, + linked_editing_range_provider: None, + inline_value_provider: None, + inlay_hint_provider: None, + diagnostic_provider: None, + experimental: None, + } +} diff --git a/crates/lsp/src/config.rs b/crates/lsp/src/config.rs new file mode 100644 index 00000000..41ac4eac --- /dev/null +++ b/crates/lsp/src/config.rs @@ -0,0 +1,16 @@ +#[derive(Default, Clone)] +pub struct Config { + data: ConfigData, +} + +#[derive(Default, Clone)] +pub struct ConfigData { + database_url: Option, + num_threads: Option, +} + +impl Config { + pub fn main_loop_num_threads(&self) -> usize { + self.data.num_threads.unwrap_or(num_cpus::get_physical()) + } +} diff --git a/crates/lsp/src/dispatch.rs b/crates/lsp/src/dispatch.rs new file mode 100644 index 00000000..cadf73b5 --- /dev/null +++ b/crates/lsp/src/dispatch.rs @@ -0,0 +1,357 @@ +//! See [RequestDispatcher]. +use std::{ + fmt::{self, Debug}, + panic, thread, +}; + +use ide::Cancelled; +use lsp_server::ExtractError; +use serde::{de::DeserializeOwned, Serialize}; +use stdx::thread::ThreadIntent; + +use crate::{ + global_state::{GlobalState, GlobalStateSnapshot}, + lsp::LspError, + main_loop::Task, + version::version, +}; + +/// A visitor for routing a raw JSON request to an appropriate handler function. +/// +/// Most requests are read-only and async and are handled on the threadpool +/// (`on` method). +/// +/// Some read-only requests are latency sensitive, and are immediately handled +/// on the main loop thread (`on_sync`). These are typically typing-related +/// requests. +/// +/// Some requests modify the state, and are run on the main thread to get +/// `&mut` (`on_sync_mut`). +/// +/// Read-only requests are wrapped into `catch_unwind` -- they don't modify the +/// state, so it's OK to recover from their failures. +pub(crate) struct RequestDispatcher<'a> { + pub(crate) req: Option, + pub(crate) global_state: &'a mut GlobalState, +} + +impl RequestDispatcher<'_> { + /// Dispatches the request onto the current thread, given full access to + /// mutable global state. Unlike all other methods here, this one isn't + /// guarded by `catch_unwind`, so, please, don't make bugs :-) + pub(crate) fn on_sync_mut( + &mut self, + f: fn(&mut GlobalState, R::Params) -> anyhow::Result, + ) -> &mut Self + where + R: lsp_types::request::Request, + R::Params: DeserializeOwned + panic::UnwindSafe + fmt::Debug, + R::Result: Serialize, + { + let (req, params, panic_context) = match self.parse::() { + Some(it) => it, + None => return self, + }; + let result = { + let _pctx = stdx::panic_context::enter(panic_context); + f(self.global_state, params) + }; + if let Ok(response) = result_to_response::(req.id, result) { + self.global_state.respond(response); + } + + self + } + + /// Dispatches the request onto the current thread. + pub(crate) fn on_sync( + &mut self, + f: fn(GlobalStateSnapshot, R::Params) -> anyhow::Result, + ) -> &mut Self + where + R: lsp_types::request::Request, + R::Params: DeserializeOwned + panic::UnwindSafe + fmt::Debug, + R::Result: Serialize, + { + let (req, params, panic_context) = match self.parse::() { + Some(it) => it, + None => return self, + }; + let global_state_snapshot = self.global_state.snapshot(); + + let result = panic::catch_unwind(move || { + let _pctx = stdx::panic_context::enter(panic_context); + f(global_state_snapshot, params) + }); + + if let Ok(response) = thread_result_to_response::(req.id, result) { + self.global_state.respond(response); + } + + self + } + + /// Dispatches a non-latency-sensitive request onto the thread pool + /// without retrying it if it panics. + pub(crate) fn on_no_retry( + &mut self, + f: fn(GlobalStateSnapshot, R::Params) -> anyhow::Result, + ) -> &mut Self + where + R: lsp_types::request::Request + 'static, + R::Params: DeserializeOwned + panic::UnwindSafe + Send + fmt::Debug, + R::Result: Serialize, + { + let (req, params, panic_context) = match self.parse::() { + Some(it) => it, + None => return self, + }; + + self.global_state + .task_pool + .handle + .spawn(ThreadIntent::Worker, { + let world = self.global_state.snapshot(); + move || { + let result = panic::catch_unwind(move || { + let _pctx = stdx::panic_context::enter(panic_context); + f(world, params) + }); + match thread_result_to_response::(req.id.clone(), result) { + Ok(response) => Task::Response(response), + Err(_) => Task::Response(lsp_server::Response::new_err( + req.id, + lsp_server::ErrorCode::ContentModified as i32, + "content modified".to_owned(), + )), + } + } + }); + + self + } + + /// Dispatches a non-latency-sensitive request onto the thread pool. + pub(crate) fn on( + &mut self, + f: fn(GlobalStateSnapshot, R::Params) -> anyhow::Result, + ) -> &mut Self + where + R: lsp_types::request::Request + 'static, + R::Params: DeserializeOwned + panic::UnwindSafe + Send + fmt::Debug, + R::Result: Serialize, + { + self.on_with_thread_intent::(ThreadIntent::Worker, f) + } + + /// Dispatches a latency-sensitive request onto the thread pool. + pub(crate) fn on_latency_sensitive( + &mut self, + f: fn(GlobalStateSnapshot, R::Params) -> anyhow::Result, + ) -> &mut Self + where + R: lsp_types::request::Request + 'static, + R::Params: DeserializeOwned + panic::UnwindSafe + Send + fmt::Debug, + R::Result: Serialize, + { + self.on_with_thread_intent::(ThreadIntent::LatencySensitive, f) + } + + /// Formatting requests should never block on waiting a for task thread to open up, editors will wait + /// on the response and a late formatting update might mess with the document and user. + /// We can't run this on the main thread though as we invoke rustfmt which may take arbitrary time to complete! + pub(crate) fn on_fmt_thread( + &mut self, + f: fn(GlobalStateSnapshot, R::Params) -> anyhow::Result, + ) -> &mut Self + where + R: lsp_types::request::Request + 'static, + R::Params: DeserializeOwned + panic::UnwindSafe + Send + fmt::Debug, + R::Result: Serialize, + { + self.on_with_thread_intent::(ThreadIntent::LatencySensitive, f) + } + + pub(crate) fn finish(&mut self) { + if let Some(req) = self.req.take() { + tracing::error!("unknown request: {:?}", req); + let response = lsp_server::Response::new_err( + req.id, + lsp_server::ErrorCode::MethodNotFound as i32, + "unknown request".to_owned(), + ); + self.global_state.respond(response); + } + } + + fn on_with_thread_intent( + &mut self, + intent: ThreadIntent, + f: fn(GlobalStateSnapshot, R::Params) -> anyhow::Result, + ) -> &mut Self + where + R: lsp_types::request::Request + 'static, + R::Params: DeserializeOwned + panic::UnwindSafe + Send + fmt::Debug, + R::Result: Serialize, + { + let (req, params, panic_context) = match self.parse::() { + Some(it) => it, + None => return self, + }; + + let world = self.global_state.snapshot(); + if MAIN_POOL { + &mut self.global_state.task_pool.handle + } else { + &mut self.global_state.fmt_pool.handle + } + .spawn(intent, move || { + let result = panic::catch_unwind(move || { + let _pctx = stdx::panic_context::enter(panic_context); + f(world, params) + }); + match thread_result_to_response::(req.id.clone(), result) { + Ok(response) => Task::Response(response), + Err(_) => Task::Retry(req), + } + }); + + self + } + + fn parse(&mut self) -> Option<(lsp_server::Request, R::Params, String)> + where + R: lsp_types::request::Request, + R::Params: DeserializeOwned + fmt::Debug, + { + let req = match &self.req { + Some(req) if req.method == R::METHOD => self.req.take()?, + _ => return None, + }; + + let res = crate::from_json(R::METHOD, &req.params); + match res { + Ok(params) => { + let panic_context = format!( + "\nversion: {}\nrequest: {} {params:#?}", + version(), + R::METHOD + ); + Some((req, params, panic_context)) + } + Err(err) => { + let response = lsp_server::Response::new_err( + req.id, + lsp_server::ErrorCode::InvalidParams as i32, + err.to_string(), + ); + self.global_state.respond(response); + None + } + } + } +} + +fn thread_result_to_response( + id: lsp_server::RequestId, + result: thread::Result>, +) -> Result +where + R: lsp_types::request::Request, + R::Params: DeserializeOwned, + R::Result: Serialize, +{ + match result { + Ok(result) => result_to_response::(id, result), + Err(panic) => { + let panic_message = panic + .downcast_ref::() + .map(String::as_str) + .or_else(|| panic.downcast_ref::<&str>().copied()); + + let mut message = "request handler panicked".to_owned(); + if let Some(panic_message) = panic_message { + message.push_str(": "); + message.push_str(panic_message) + }; + + Ok(lsp_server::Response::new_err( + id, + lsp_server::ErrorCode::InternalError as i32, + message, + )) + } + } +} + +fn result_to_response( + id: lsp_server::RequestId, + result: anyhow::Result, +) -> Result +where + R: lsp_types::request::Request, + R::Params: DeserializeOwned, + R::Result: Serialize, +{ + let res = match result { + Ok(resp) => lsp_server::Response::new_ok(id, &resp), + Err(e) => match e.downcast::() { + Ok(lsp_error) => lsp_server::Response::new_err(id, lsp_error.code, lsp_error.message), + Err(e) => match e.downcast::() { + Ok(cancelled) => return Err(cancelled), + Err(e) => lsp_server::Response::new_err( + id, + lsp_server::ErrorCode::InternalError as i32, + e.to_string(), + ), + }, + }, + }; + Ok(res) +} + +pub(crate) struct NotificationDispatcher<'a> { + pub(crate) not: Option, + pub(crate) global_state: &'a mut GlobalState, +} + +impl NotificationDispatcher<'_> { + pub(crate) fn on_sync_mut( + &mut self, + f: fn(&mut GlobalState, N::Params) -> anyhow::Result<()>, + ) -> anyhow::Result<&mut Self> + where + N: lsp_types::notification::Notification, + N::Params: DeserializeOwned + Send + Debug, + { + let not = match self.not.take() { + Some(it) => it, + None => return Ok(self), + }; + + let params = match not.extract::(N::METHOD) { + Ok(it) => it, + Err(ExtractError::JsonError { method, error }) => { + panic!("Invalid request\nMethod: {method}\n error: {error}",) + } + Err(ExtractError::MethodMismatch(not)) => { + self.not = Some(not); + return Ok(self); + } + }; + + let _pctx = stdx::panic_context::enter(format!( + "\nversion: {}\nnotification: {}", + version(), + N::METHOD + )); + f(self.global_state, params)?; + Ok(self) + } + + pub(crate) fn finish(&mut self) { + if let Some(not) = &self.not { + if !not.method.starts_with("$/") {} + } + } +} diff --git a/crates/lsp/src/global_state.rs b/crates/lsp/src/global_state.rs new file mode 100644 index 00000000..91e7b819 --- /dev/null +++ b/crates/lsp/src/global_state.rs @@ -0,0 +1,113 @@ +use schema_cache::SchemaCache; +use std::{sync::Arc, time::Instant}; + +use crossbeam_channel::{unbounded, Receiver, Sender}; + +use crate::{config::Config, main_loop::Task, task_pool::TaskPool}; + +// Enforces drop order +pub(crate) struct Handle { + pub(crate) handle: H, + pub(crate) receiver: C, +} + +pub(crate) type ReqHandler = fn(&mut GlobalState, lsp_server::Response); +type ReqQueue = lsp_server::ReqQueue<(String, Instant), ReqHandler>; + +pub(crate) struct GlobalState { + sender: Sender, + config: Arc, + // add listener for schema cache + schema_cache: SchemaCache, + + req_queue: ReqQueue, + pub(crate) task_pool: Handle, Receiver>, + + // status + pub(crate) shutdown_requested: bool, +} + +impl GlobalState { + pub(crate) fn new(sender: Sender, config: Config) -> GlobalState { + let task_pool = { + let (sender, receiver) = unbounded(); + let handle = TaskPool::new_with_threads(sender, config.main_loop_num_threads()); + Handle { handle, receiver } + }; + GlobalState { + sender, + config: Arc::new(config.clone()), + schema_cache: SchemaCache::default(), + + req_queue: ReqQueue::default(), + task_pool, + + shutdown_requested: false, + } + } + + pub(crate) fn send_request( + &mut self, + params: R::Params, + handler: ReqHandler, + ) { + let request = self + .req_queue + .outgoing + .register(R::METHOD.to_owned(), params, handler); + self.send(request.into()); + } + + pub(crate) fn complete_request(&mut self, response: lsp_server::Response) { + let handler = self + .req_queue + .outgoing + .complete(response.id.clone()) + .expect("received response for unknown request"); + handler(self, response) + } + + pub(crate) fn send_notification( + &self, + params: N::Params, + ) { + let not = lsp_server::Notification::new(N::METHOD.to_owned(), params); + self.send(not.into()); + } + + pub(crate) fn register_request( + &mut self, + request: &lsp_server::Request, + request_received: Instant, + ) { + self.req_queue.incoming.register( + request.id.clone(), + (request.method.clone(), request_received), + ); + } + + pub(crate) fn respond(&mut self, response: lsp_server::Response) { + if self + .req_queue + .incoming + .complete(response.id.clone()) + .is_some() + { + self.send(response.into()); + } + } + + pub(crate) fn cancel(&mut self, request_id: lsp_server::RequestId) { + if let Some(response) = self.req_queue.incoming.cancel(request_id) { + self.send(response.into()); + } + } + + pub(crate) fn is_completed(&self, request: &lsp_server::Request) -> bool { + self.req_queue.incoming.is_completed(&request.id) + } + + fn send(&self, message: lsp_server::Message) { + self.sender.send(message).unwrap() + } +} diff --git a/crates/lsp/src/handlers/notification.rs b/crates/lsp/src/handlers/notification.rs new file mode 100644 index 00000000..b4d89667 --- /dev/null +++ b/crates/lsp/src/handlers/notification.rs @@ -0,0 +1,133 @@ +use lsp_types::{ + DidChangeTextDocumentParams, DidCloseTextDocumentParams, DidOpenTextDocumentParams, + DidSaveTextDocumentParams, +}; + +use crate::global_state::GlobalState; + +// we do not need to track workspace or file changes ourselves to notify is not required +// rust analyzer i think also does only track file changes with vfs to watch for changes directly +// from the file system +// same for latex: +// https://github.com/latex-lsp/texlab/blob/master/crates/texlab/src/server.rs#L1026 + +pub(crate) fn handle_did_open_text_document( + state: &mut GlobalState, + params: DidOpenTextDocumentParams, +) -> anyhow::Result<()> { + if let Ok(path) = from_proto::vfs_path(¶ms.text_document.uri) { + let already_exists = state + .mem_docs + .insert( + path.clone(), + DocumentData::new( + params.text_document.version, + params.text_document.text.clone().into_bytes(), + ), + ) + .is_err(); + if already_exists {} + + state + .vfs + .write() + .0 + .set_file_contents(path, Some(params.text_document.text.into_bytes())); + if state.config.notifications().unindexed_project { + tracing::debug!("queuing task"); + let _ = state.deferred_task_queue.sender.send( + crate::main_loop::QueuedTask::CheckIfIndexed(params.text_document.uri), + ); + } + } + Ok(()) +} + +pub(crate) fn handle_did_change_text_document( + state: &mut GlobalState, + params: DidChangeTextDocumentParams, +) -> anyhow::Result<()> { + if let Ok(path) = from_proto::vfs_path(¶ms.text_document.uri) { + let Some(DocumentData { version, data }) = state.mem_docs.get_mut(&path) else { + tracing::error!(?path, "unexpected DidChangeTextDocument"); + return Ok(()); + }; + // The version passed in DidChangeTextDocument is the version after all edits are applied + // so we should apply it before the vfs is notified. + *version = params.text_document.version; + + let new_contents = apply_document_changes( + state.config.position_encoding(), + std::str::from_utf8(data).unwrap(), + params.content_changes, + ) + .into_bytes(); + if *data != new_contents { + *data = new_contents.clone(); + state + .vfs + .write() + .0 + .set_file_contents(path, Some(new_contents)); + } + } + Ok(()) +} + +pub(crate) fn handle_did_close_text_document( + state: &mut GlobalState, + params: DidCloseTextDocumentParams, +) -> anyhow::Result<()> { + if let Ok(path) = from_proto::vfs_path(¶ms.text_document.uri) { + if state.mem_docs.remove(&path).is_err() { + tracing::error!("orphan DidCloseTextDocument: {}", path); + } + + if let Some(file_id) = state.vfs.read().0.file_id(&path) { + state.diagnostics.clear_native_for(file_id); + } + + state + .semantic_tokens_cache + .lock() + .remove(¶ms.text_document.uri); + + if let Some(path) = path.as_path() { + state.loader.handle.invalidate(path.to_path_buf()); + } + } + Ok(()) +} + +pub(crate) fn handle_did_save_text_document( + state: &mut GlobalState, + params: DidSaveTextDocumentParams, +) -> anyhow::Result<()> { + if state.config.script_rebuild_on_save() && state.build_deps_changed { + state.build_deps_changed = false; + state + .fetch_build_data_queue + .request_op("build_deps_changed - save notification".to_owned(), ()); + } + + if let Ok(vfs_path) = from_proto::vfs_path(¶ms.text_document.uri) { + // Re-fetch workspaces if a workspace related file has changed + if let Some(abs_path) = vfs_path.as_path() { + if reload::should_refresh_for_change(abs_path, ChangeKind::Modify) { + state + .fetch_workspaces_queue + .request_op(format!("workspace vfs file change saved {abs_path}"), false); + } + } + + if !state.config.check_on_save() || run_flycheck(state, vfs_path) { + return Ok(()); + } + } else if state.config.check_on_save() { + // No specific flycheck was triggered, so let's trigger all of them. + for flycheck in state.flycheck.iter() { + flycheck.restart_workspace(None); + } + } + Ok(()) +} diff --git a/crates/lsp/src/handlers/request.rs b/crates/lsp/src/handlers/request.rs new file mode 100644 index 00000000..e69de29b diff --git a/crates/lsp/src/lib.rs b/crates/lsp/src/lib.rs new file mode 100644 index 00000000..be6cd2dc --- /dev/null +++ b/crates/lsp/src/lib.rs @@ -0,0 +1,16 @@ +mod capabilities; +pub mod config; +mod dispatch; +mod global_state; +mod main_loop; +mod task_pool; +mod utils; + +mod handlers { + pub(crate) mod notification; + pub(crate) mod request; +} + +pub use crate::capabilities::server_capabilities; +pub use crate::main_loop::main_loop; +pub use crate::utils::from_json; diff --git a/crates/lsp/src/main_loop.rs b/crates/lsp/src/main_loop.rs new file mode 100644 index 00000000..160d1b88 --- /dev/null +++ b/crates/lsp/src/main_loop.rs @@ -0,0 +1,166 @@ +use std::{fmt, time::Instant}; + +use crossbeam_channel::{select, Receiver}; +use lsp_server::{Connection, Notification, Request}; +use lsp_types::notification::Notification as _; + +use crate::{ + config::Config, + dispatch::{NotificationDispatcher, RequestDispatcher}, + global_state::GlobalState, +}; + +pub fn main_loop(config: Config, connection: Connection) -> anyhow::Result<()> { + GlobalState::new(connection.sender, config).run(connection.receiver) +} + +// feature params in latex: document, project and workspace! +// based on that they fetch all +// they parse with a rw lock on open. they open the entire doc on change! +// +// --> we can do the same. but do not name it workspace since we do not care about the workspace, +// just the sql files. and then apply changes and parse with ts and with libg_query. if no syntax +// error, compute cst. +// --> base db stores syntax tree +// also compute cst on save. +// --> computing cst could later be directly in sync. +// +// --> performance analysis for libg_query for large statements + +enum Event { + Lsp(lsp_server::Message), + Task(Task), +} + +impl fmt::Display for Event { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Event::Lsp(_) => write!(f, "Event::Lsp"), + Event::Task(_) => write!(f, "Event::Task"), + } + } +} + +#[derive(Debug)] +pub(crate) enum Task { + Response(lsp_server::Response), + Retry(lsp_server::Request), + Diagnostics(Vec<(FileId, Vec)>), +} + +impl GlobalState { + fn run(mut self, inbox: Receiver) -> anyhow::Result<()> { + // todo: reload schema cache + + while let Some(event) = self.next_event(&inbox) { + if matches!( + &event, + Event::Lsp(lsp_server::Message::Notification(Notification { method, .. })) + if method == lsp_types::notification::Exit::METHOD + ) { + return Ok(()); + } + self.handle_event(event)?; + } + + anyhow::bail!("client exited without proper shutdown sequence") + } + + fn next_event(&self, inbox: &Receiver) -> Option { + select! { + recv(inbox) -> msg => + msg.ok().map(Event::Lsp), + + recv(self.task_pool.receiver) -> task => + Some(Event::Task(task.unwrap())), + } + } + + fn handle_event(&mut self, event: Event) -> anyhow::Result<()> { + let loop_start = Instant::now(); + + match event { + Event::Lsp(msg) => match msg { + lsp_server::Message::Request(req) => self.on_new_request(loop_start, req), + lsp_server::Message::Notification(not) => self.on_notification(not)?, + lsp_server::Message::Response(resp) => self.complete_request(resp), + }, + Event::Task(task) => { + self.handle_task(task); + // Coalesce multiple task events into one loop turn + while let Ok(task) = self.task_pool.receiver.try_recv() { + self.handle_task(task); + } + } + } + + Ok(()) + } + + fn handle_task(&mut self, task: Task) { + match task { + Task::Response(response) => self.respond(response), + // Only retry requests that haven't been cancelled. Otherwise we do unnecessary work. + Task::Retry(req) if !self.is_completed(&req) => self.on_request(req), + Task::Retry(_) => (), + Task::Diagnostics(diagnostics_per_file) => { + for (file_id, diagnostics) in diagnostics_per_file { + self.diagnostics + .set_native_diagnostics(file_id, diagnostics) + } + } + } + } + + /// Registers and handles a request. This should only be called once per incoming request. + fn on_new_request(&mut self, request_received: Instant, req: Request) { + self.register_request(&req, request_received); + self.on_request(req); + } + + /// Handles a request. + fn on_request(&mut self, req: Request) { + let mut dispatcher = RequestDispatcher { + req: Some(req), + global_state: self, + }; + dispatcher.on_sync_mut::(|s, ()| { + s.shutdown_requested = true; + Ok(()) + }); + + match &mut dispatcher { + RequestDispatcher { + req: Some(req), + global_state: this, + } if this.shutdown_requested => { + this.respond(lsp_server::Response::new_err( + req.id.clone(), + lsp_server::ErrorCode::InvalidRequest as i32, + "Shutdown already requested.".to_owned(), + )); + return; + } + _ => (), + } + + dispatcher.finish(); + } + + /// Handles an incoming notification. + fn on_notification(&mut self, not: Notification) -> anyhow::Result<()> { + use crate::handlers::notification as handlers; + use lsp_types::notification as notifs; + + NotificationDispatcher { + not: Some(not), + global_state: self, + } + .on_sync_mut::(handlers::handle_did_open_text_document)? + .on_sync_mut::(handlers::handle_did_change_text_document)? + .on_sync_mut::(handlers::handle_did_close_text_document)? + .on_sync_mut::(handlers::handle_did_save_text_document)? + .finish(); + Ok(()) + } +} diff --git a/crates/lsp/src/task_pool.rs b/crates/lsp/src/task_pool.rs new file mode 100644 index 00000000..33734e0c --- /dev/null +++ b/crates/lsp/src/task_pool.rs @@ -0,0 +1,45 @@ +//! A thin wrapper around [`stdx::thread::Pool`] which threads a sender through spawned jobs. +//! It is used in [`crate::global_state::GlobalState`] throughout the main loop. + +use crossbeam_channel::Sender; +use stdx::thread::{Pool, ThreadIntent}; + +pub(crate) struct TaskPool { + sender: Sender, + pool: Pool, +} + +impl TaskPool { + pub(crate) fn new_with_threads(sender: Sender, threads: usize) -> TaskPool { + TaskPool { + sender, + pool: Pool::new(threads), + } + } + + pub(crate) fn spawn(&mut self, intent: ThreadIntent, task: F) + where + F: FnOnce() -> T + Send + 'static, + T: Send + 'static, + { + self.pool.spawn(intent, { + let sender = self.sender.clone(); + move || sender.send(task()).unwrap() + }) + } + + pub(crate) fn spawn_with_sender(&mut self, intent: ThreadIntent, task: F) + where + F: FnOnce(Sender) + Send + 'static, + T: Send + 'static, + { + self.pool.spawn(intent, { + let sender = self.sender.clone(); + move || task(sender) + }) + } + + pub(crate) fn len(&self) -> usize { + self.pool.len() + } +} diff --git a/crates/lsp/src/utils.rs b/crates/lsp/src/utils.rs new file mode 100644 index 00000000..14b31229 --- /dev/null +++ b/crates/lsp/src/utils.rs @@ -0,0 +1,9 @@ +use serde::de::DeserializeOwned; + +pub fn from_json( + what: &'static str, + json: &serde_json::Value, +) -> anyhow::Result { + serde_json::from_value(json.clone()) + .map_err(|e| anyhow::format_err!("Failed to deserialize {what}: {e}; {json}")) +} diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index 92edf274..4fb06bbb 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -16,6 +16,7 @@ log = { version = "0.4.20" } codegen.workspace = true pg_query_proto_parser.workspace = true +text_size.workspace = true [dev-dependencies] insta = "1.31.0" diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 846c84fc..b0ee79f2 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -28,6 +28,9 @@ mod syntax_node; use lexer::lex; use parse::source::source; +use parse::statement::collect_statement_token_range; +use parse::statement_start::is_at_stmt_start; +use text_size::TextRange; pub use crate::codegen::SyntaxKind; pub use crate::parser::{Parse, Parser}; @@ -40,3 +43,79 @@ pub fn parse_source(text: &str) -> Parse { source(&mut p); p.finish() } + +pub fn get_statements(text: &str) -> Vec<(TextRange, String)> { + let mut parser = Parser::new(lex(text)); + parser.start_node(SyntaxKind::SourceFile); + + let mut ranges = vec![]; + + while !parser.eof() { + match is_at_stmt_start(&mut parser) { + Some(stmt) => { + let range = collect_statement_token_range(&mut parser, stmt); + + let from = parser.tokens.get(range.start); + let to = parser.tokens.get(range.end - 1); + // get text range from token range + let start = from.unwrap().span.start(); + let end = to.unwrap().span.end(); + ranges.push(( + TextRange::new( + text_size::TextSize::from(u32::from(start)), + text_size::TextSize::from(u32::from(end)), + ), + text.get(start.into()..end.into()).unwrap().to_string(), + )); + + while parser.pos < range.end { + parser.advance(); + } + } + None => { + parser.advance(); + } + } + } + + parser.finish_node(); + + ranges +} + +#[cfg(test)] +mod tests { + use super::*; + + fn init() { + let _ = env_logger::builder().is_test(true).try_init(); + } + + #[test] + fn test_get_statements() { + init(); + + let input = "select 1; \n select 2; \n select 3;"; + + let ranges = get_statements(input); + + println!("{:?}", ranges); + + assert_eq!(ranges.len(), 3); + + assert_eq!( + input.get(ranges[0].0.start().into()..ranges[0].0.end().into()), + Some("select 1;") + ); + + assert_eq!( + input.get(ranges[1].0.start().into()..ranges[1].0.end().into()), + Some("select 2;") + ); + + assert_eq!( + input.get(ranges[2].0.start().into()..ranges[2].0.end().into()), + Some("select 3;") + ); + } +} diff --git a/crates/parser/src/parse/statement.rs b/crates/parser/src/parse/statement.rs index e1a7c5ff..36959675 100644 --- a/crates/parser/src/parse/statement.rs +++ b/crates/parser/src/parse/statement.rs @@ -63,7 +63,7 @@ pub fn statement(parser: &mut Parser, kind: SyntaxKind) { assert_eq!(parser.pos, token_range.end); } -fn collect_statement_token_range(parser: &mut Parser, kind: SyntaxKind) -> Range { +pub fn collect_statement_token_range(parser: &mut Parser, kind: SyntaxKind) -> Range { parser.open_buffer(); // advance with all start tokens of statement diff --git a/crates/playground/Cargo.toml b/crates/playground/Cargo.toml new file mode 100644 index 00000000..d222c6b6 --- /dev/null +++ b/crates/playground/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "playground" +version = "0.0.0" +edition = "2021" + +[dependencies] +rayon = "1.8.1" +dashmap = "5.5.3" +tree-sitter = "0.20.10" + +schema_cache.workspace = true + +[lib] +doctest = false diff --git a/crates/playground/src/analysis_host.rs b/crates/playground/src/analysis_host.rs new file mode 100644 index 00000000..54e3b12b --- /dev/null +++ b/crates/playground/src/analysis_host.rs @@ -0,0 +1,82 @@ +// use std::collections::HashMap; +// +// use crate::file_change::FileChange; +// +// struct Pending {} +// +// struct Analysis { +// pending_db: HashMap, +// } +// +// struct AnalysisHost {} +// +// impl AnalysisHost { +// fn new() -> AnalysisHost { +// AnalysisHost {} +// } +// +// /// Returns a snapshot of the current state, which you can query for +// /// semantic information. +// pub fn analysis(&self) -> Analysis { +// Analysis { +// db: self.db.snapshot(), +// } +// } +// +// /// Applies changes to the current state of the world. If there are +// /// outstanding snapshots, they will be canceled. +// pub fn apply_change(&mut self, change: FileChange) { +// self.db.apply_change(change); +// } +// } + +// problem: we do not snapshot but immediately apply changes and resolve requests +// no cancellation of requests like in rust-analyzer +// +// +// +// biome service: also has extension stuff figured out +// we just have to bring it down to the statement level +// so instead of documents, we store statements +// +// sinilar to biome: they store results on workspace server per file eg syntax tree +// we do the same: store results per statement on workspace level. on change, just clear it. +// we also store statement features and then just fetch them on demand and cache them. +// a feature can also be an extension and is based on +// features are stored on app level +// and then we get capabilities for a feature for a given statement +// features o +// +// we first get parse results and if yes call feature o +// parse results are cached +// if no we return +// +// only problem: they have a common representation for a syntax tree shared between all languages +// so that eg analyze inputs is language agnostic +// +// for us, tstree is the same for all languages +// +// BUT anything else is different for every language +// but maybe it still works. we will always have some tree and diagnostics for every language +// parser +// +// we do not have an unified data source for all +// its different for every language. +// +// we will most likely have to carry around the language with the data +// so we wont have a common crate for every operation +// +// eg plpgsql_analyze +// must be called with plpgsql data +// +// we store features outside of statement data +// +// instead of just syntax, we store multiple syntax, one for each language +// +// biome simply updates the entire file by taking the changes and applying them to the entire file +// we have to allow individual changes to a file to figure out what statements need to be updated +// +// gameplan: +// - create workspace and app structs +// - just document apis +// - then find a solution for parse results and extensions diff --git a/crates/playground/src/file_change.rs b/crates/playground/src/file_change.rs new file mode 100644 index 00000000..7b789e2f --- /dev/null +++ b/crates/playground/src/file_change.rs @@ -0,0 +1,5 @@ +// /// Encapsulate a bunch of raw `.set` calls on the database. +// #[derive(Default)] +// pub struct FileChange { +// pub files_changed: Vec<(FileId, Option>)>, +// } diff --git a/crates/playground/src/lib.rs b/crates/playground/src/lib.rs new file mode 100644 index 00000000..80b73e4c --- /dev/null +++ b/crates/playground/src/lib.rs @@ -0,0 +1,218 @@ +// mod analysis_host; +// mod file_change; +// +// use dashmap::DashMap; +// use schema_cache::SchemaCache; +// use tree_sitter::Tree; +// +// fn tmp() { +// println!("Hello, world!"); +// } +// +// struct WorldState { +// statements: DashMap, +// } +// +// // since the data model is differnet for every state we need to implement the api for every state +// // +// // for parsed and ready, we maybe even need to implement the api for every node type? +// // we should think about a solution that does not require this +// // maybe a deeper analysis on what and hwo to resolve? +// // is it worth it to produce a HIR for every node type? +// // +// // relevant features from lsp +// // NOT go to... only in plpgsql etc +// // NOT find refs... only in plpgsql etc +// // NOT call hierarchy stuff bc it returns where something is used +// // NOT type hierarchy stuff bc it returns where something is used +// // document highlight --> only in plpgsql +// // hover -> YES: just get data from schema cache -> ts, cst +// // code lenses -> YES: e.g. run() on top of statements. -> AST +// // folding range request -> YES, folding range is where the code can be folded -> CST +// // Selection Range Request -> YES, shows enclosing area of the selection, e.g. statement or sub +// // statement -> CST +// // document symbols -> yes, but only for plpgsql and maybe also just per statement -> stmt +// // semantic tokens -> yes -> ts, cst +// // inline value request -> only in plpgsql +// // inlay hint: yes, e.g. for functions -> ts, cst +// // completion: yes -> ts +// // diagnostics: yes -> AST +// // signature help: yes -> ts, cst +// // code action: yes eg execute statement -> stmt, maybe other stuff later eg from linter then AST +// // formatting: yes (full doc, range and maybe while typing) -> CST, maybe later AST +// // rename: no, just in plgpsql +// // +// // per node type only relevant for ast. and ast is only relevant for some features that require +// // per node logic anyway eg linter +// // +// // "analysis" not really required for most features. just for linter, type checking etc so +// // basically diagnostics and code actions / assists. +// // just cst should be build in background and then used for the features but only bc its built from +// // the ast +// // +// // change -> ts direct. then bg: cst, ast, then analysis +// // type checks, linter etc handlers get cst, ast and schema cache +// // cst and ast is kicked off directly with every change. bg analysis is kicked off if there is no +// // syntax error from the ast and when the user saves a file +// // +// // +// // only thing left are plpgsql / plrust / plpython / plv8 functions +// // --> we can use rust libs for each language!! eg rust analyzer, oxc for node, ruff for python! +// // --> for plpgsql we use the api from libpg_query and parse the json into our own data model +// // +// // all of this should be implemented in their own modules +// // +// // now only thing left is to design how we implement the api for different states +// // +// // just fns that define the required states via input +// // +// // IDE is a simple wrapper that holds the data and passes a ref to fns +// +// // some apis require per-file processing too +// // --> not really, e.g. for folding range we just use the range of the statement +// +// type FileId = u32; +// +// type StatementIdx = u32; +// +// #[derive(Debug, Hash)] +// struct StatementLocation { +// file_id: FileId, +// statement_idx: StatementIdx, +// } +// +// struct Pending { +// tree: Tree, +// } +// +// // this has to be extensble to support e.g. plgpsql and plpython etc +// struct Working {} +// +// // we might want the states itself also be extensible? +// struct Snapshot {} +// +// struct Parsed {} +// +// // the data in snapshot could be different for different nodes? +// +// struct AnalysisHost { +// // versioning will be important +// version: u32, +// +// // sync update on tree sitter +// pending: Pending, +// +// // report diagnostics back somehow -> via channel? +// // rust analyser stores the files for which there were changes, and then gets the diagnostics +// // for the files as a task +// // +// // we will produce diagnostics directly and store them in Ready +// parsed: Parsed, +// +// // +// ready: Snapshot, +// } +// +// impl AnalysisHost { +// fn new() -> AnalysisHost { +// AnalysisHost { +// version: 0, +// pending: Pending { tree: Tree::new() }, +// parsed: Parsed {}, +// ready: Snapshot {}, +// } +// } +// } +// +// enum ExtensionType { +// PlPgSQL, +// PlRust, +// PlPython, +// PlV8, +// } +// +// struct IDEExtension {} +// +// // we need more info on if soemting is stale / processing is beind done right now etc +// +// struct IDE { +// source: String, +// pub range: TextRange, +// schema_cache: SchemaCache, +// +// // there can be just one extension per statement (e.g. plpgsql, plrust, plpython, plv8) +// extension: IDEExtension, +// +// analysis: AnalysisHost, +// } +// +// impl IDE { +// pub fn new(source: String, schema_cache: SchemaCache) -> IDE { +// IDE { +// source, +// schema_cache, +// analysis: AnalysisHost::new(), +// } +// } +// +// pub fn update_schema_cache(&mut self) { +// // update schema cache +// // kick off analysis again +// } +// +// // if other statements are updated, we just need to update the range of this one +// pub fn update_range(&mut self, range: TextRange) { +// self.range = range; +// } +// } +// +// #[cfg(test)] +// mod tests { +// use std::sync::mpsc::channel; +// +// #[test] +// fn test_playground() { +// // every ide gets its own sender +// let (tx, rx) = channel(); +// +// // we push updates synchronously, and the IDE then kicks off parallel processing in the +// // background itself +// // upon completion, we send the result back to the IDE +// // but then we enforce threading from the outside +// +// // OR: +// // all of ide is does not use any async or threading at all +// // instead it exposed an api to the outside that can be used to kick off processing +// } +// } +// +// struct StatementAnalysis { +// // implements api +// // gets snapshot of db +// } +// +// struct StatementDatabase { +// // holds data +// } +// +// struct AnalysisHost1 { +// // global +// // exposes analysis snapshot +// // apply change +// // holds db for stmt +// } +// +// struct Pending1 { +// // holds ts tree +// // we do not create a new one, just keep this one and update it +// // we then implement some ide api methods on pending +// } +// +// struct Ready1 { +// // holds ast +// // also spawns extensions if required +// // implements api +// } + +// i dont want to expose file management to api user +// so we will have a wrapper that contls file and statement handling diff --git a/crates/postgres_lsp/src/main.rs b/crates/postgres_lsp/src/main.rs index f02f946b..4ce4f5a3 100644 --- a/crates/postgres_lsp/src/main.rs +++ b/crates/postgres_lsp/src/main.rs @@ -13,6 +13,15 @@ use tower_lsp::{Client, LanguageServer, LspService, Server}; use crate::semantic_token::semantic_token_from_syntax_kind; use crate::utils::offset_to_position; +// TODO: +// - add like in rust analyzer with threads etc +// - they have a snyc mut to schedule on current thread and we will use that for document changes +// - we will also need to schedule our own tasks eg after doc change check if syntax is valid --> +// kick off analysis +// +// rust analyzer uses vfs to track file changes and then takes them in the main loop. we wont neeed +// that + #[derive(Debug)] struct Backend { client: Client, diff --git a/crates/service/Cargo.toml b/crates/service/Cargo.toml new file mode 100644 index 00000000..b2f93821 --- /dev/null +++ b/crates/service/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "service" +version = "0.0.0" +edition = "2021" + +[dependencies] +schema_cache.workspace = true +fs.workspace = true +text_size.workspace = true +parser.workspace = true + +tree-sitter.workspace = true +tree_sitter_sql.workspace = true +serde.workspace = true +dashmap.workspace = true + +[lib] +doctest = false diff --git a/crates/service/src/diagnostics.rs b/crates/service/src/diagnostics.rs new file mode 100644 index 00000000..65902c40 --- /dev/null +++ b/crates/service/src/diagnostics.rs @@ -0,0 +1,11 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +/// Generic errors thrown during biome operations +pub enum WorkspaceError { + /// The file does not exist in the [crate::Workspace] + NotFound(NotFound), +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NotFound; diff --git a/crates/service/src/lib.rs b/crates/service/src/lib.rs new file mode 100644 index 00000000..0c4a1acd --- /dev/null +++ b/crates/service/src/lib.rs @@ -0,0 +1,4 @@ +#![feature(extract_if, lazy_cell)] + +pub mod diagnostics; +pub mod workspace; diff --git a/crates/service/src/workspace.rs b/crates/service/src/workspace.rs new file mode 100644 index 00000000..7146c6c2 --- /dev/null +++ b/crates/service/src/workspace.rs @@ -0,0 +1,68 @@ +use std::panic::RefUnwindSafe; + +use fs::FilePath; +use text_size::{TextRange, TextSize}; + +use crate::diagnostics::WorkspaceError; + +mod server; + +#[derive(Debug, serde::Serialize, serde::Deserialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +pub struct OpenFileParams { + pub path: FilePath, + pub content: String, + pub version: i32, +} + +#[derive(Debug, serde::Serialize, serde::Deserialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +pub struct CloseFileParams { + pub path: FilePath, +} + +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub struct FileChange { + /// The range of the file that changed. If `None`, the whole file changed. + pub range: Option, + pub text: String, +} + +impl FileChange { + pub fn diff_size(&self) -> TextSize { + match self.range { + Some(range) => { + let range_length: usize = range.len().into(); + let text_length = self.text.chars().count(); + let diff = (text_length as i64 - range_length as i64).abs(); + TextSize::from(u32::try_from(diff).unwrap()) + } + None => TextSize::from(u32::try_from(self.text.chars().count()).unwrap()), + } + } + + pub fn is_addition(&self) -> bool { + self.range.is_some() && self.text.len() > self.range.unwrap().len().into() + } + + pub fn is_deletion(&self) -> bool { + self.range.is_some() && self.text.len() < self.range.unwrap().len().into() + } +} + +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub struct FileChangesParams { + pub path: FilePath, + pub version: i32, + pub changes: Vec, +} + +pub trait Workspace: Send + Sync + RefUnwindSafe { + fn open_file(&self, params: OpenFileParams) -> Result<(), WorkspaceError>; + + fn close_file(&self, params: CloseFileParams) -> Result<(), WorkspaceError>; + + fn apply_file_changes(&self, params: FileChangesParams) -> Result<(), WorkspaceError>; +} diff --git a/crates/service/src/workspace/server.rs b/crates/service/src/workspace/server.rs new file mode 100644 index 00000000..e868a946 --- /dev/null +++ b/crates/service/src/workspace/server.rs @@ -0,0 +1,439 @@ +use std::{panic::RefUnwindSafe, sync::LazyLock}; + +use dashmap::DashMap; + +use fs::FilePath; +use parser::get_statements; +use schema_cache::SchemaCache; +use text_size::{TextLen, TextRange}; +use tree_sitter::{InputEdit, Tree}; + +use crate::diagnostics::{NotFound, WorkspaceError}; + +use super::{CloseFileParams, FileChange, FileChangesParams, OpenFileParams, Workspace}; + +pub(super) struct WorkspaceServer { + documents: DashMap, + schema_cache: SchemaCache, +} + +pub(crate) struct Statement { + pub(crate) version: i32, + parser: tree_sitter::Parser, + + pub(crate) content: String, + pub(crate) tree_sitter: Tree, +} + +impl Statement { + fn new(content: String) -> Self { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(tree_sitter_sql::language()) + .expect("Error loading sql language"); + + let tree = parser.parse(&content, None).unwrap(); + + Self { + version: 0, + parser, + content, + tree_sitter: tree, + } + } + + fn apply_change(&mut self, change: FileChange) { + assert!(change.range.is_some()); + + let range = change.range.unwrap(); + + let edit = edit_from_change( + &self.content.as_str(), + usize::from(range.start()), + usize::from(range.end()), + change.text.as_str(), + ); + + self.tree_sitter.edit(&edit); + + self.content = apply_change(&self.content, &change); + + self.tree_sitter = self + .parser + .parse(&self.content, Some(&self.tree_sitter)) + .unwrap(); + + self.version += 1; + } +} + +// we get the version from lsp +pub(crate) struct Document { + pub(crate) content: String, + pub(crate) version: i32, + + pub(crate) statements: Vec<(TextRange, Statement)>, +} + +impl Document { + fn new(content: String, version: i32) -> Self { + Self { + version, + statements: get_statements(&content) + .iter() + .map(|(range, content)| { + let statement = Statement::new(content.clone()); + (range.clone(), statement) + }) + .collect(), + content, + } + } + + fn apply_changes(&mut self, changes: FileChangesParams) { + changes.changes.iter().for_each(|c| { + self.apply_change(c); + }); + self.version = changes.version; + } + + fn apply_change(&mut self, change: &FileChange) { + if change.range.is_none() { + self.content = change.text.clone(); + self.statements = get_statements(&self.content) + .iter() + .map(|(range, content)| { + let statement = Statement::new(content.clone()); + (range.clone(), statement) + }) + .collect(); + return; + } + + self.content = apply_change(&self.content, change); + + if let Some(changed_stmt_pos) = self + .statements + .iter() + .position(|(range, _)| range.contains_range(change.range.unwrap())) + { + let stmt_range = self.statements[changed_stmt_pos].0; + self.statements + .get_mut(changed_stmt_pos) + .unwrap() + .1 + .apply_change(FileChange { + text: change.text.clone(), + // range must be relative to the start of the statement + range: change.range.unwrap().checked_sub(stmt_range.start()), + }); + self.statements + .iter_mut() + .skip_while(|(range, _)| range.start() <= change.range.unwrap().end()) + .for_each(|(range, _)| { + if range.start() > stmt_range.end() { + if change.is_addition() { + range.checked_add(change.diff_size()); + } else if change.is_deletion() { + range.checked_sub(change.diff_size()); + } + } + }); + } else { + // remove all statements that are affected by this change, + let mut min = change.range.unwrap().start(); + let mut max = change.range.unwrap().end(); + + let change_range = change.range.unwrap(); + + for (range, _) in self.statements.extract_if(|(range, _)| { + (change_range.start() < range.end()) && (change_range.end() > range.start()) + }) { + println!("range: {:?}", range); + if range.start() < min { + min = range.start(); + } + if range.end() > max { + max = range.end(); + } + } + if self.content.text_len() < max { + max = self.content.text_len(); + } + + // get text from min(first_stmt_start, change_start) to max(last_stmt_end, change_end) + let extracted_text = self + .content + .as_str() + .get(usize::from(min)..usize::from(max)) + .unwrap(); + // and reparse + self.statements + .extend( + get_statements(extracted_text) + .iter() + .map(|(range, content)| { + let statement = Statement::new(content.clone()); + (range.clone(), statement) + }), + ); + } + } +} + +fn edit_from_change( + text: &str, + start_char: usize, + end_char: usize, + replacement_text: &str, +) -> InputEdit { + let mut start_byte = 0; + let mut end_byte = 0; + let mut chars_counted = 0; + + let mut line = 0; + let mut current_line_char_start = 0; // Track start of the current line in characters + let mut column_start = 0; + let mut column_end = 0; + + for (idx, c) in text.char_indices() { + if chars_counted == start_char { + start_byte = idx; + column_start = chars_counted - current_line_char_start; + } + if chars_counted == end_char { + end_byte = idx; + // Calculate column_end based on replacement_text + let replacement_lines: Vec<&str> = replacement_text.split('\n').collect(); + if replacement_lines.len() > 1 { + // If replacement text spans multiple lines, adjust line and column_end accordingly + line += replacement_lines.len() - 1; + column_end = replacement_lines.last().unwrap().chars().count(); + } else { + // Single line replacement, adjust column_end based on replacement text length + column_end = column_start + replacement_text.chars().count(); + } + break; // Found both start and end + } + if c == '\n' { + line += 1; + current_line_char_start = chars_counted + 1; // Next character starts a new line + } + chars_counted += 1; + } + + // Adjust end_byte based on the byte length of the replacement text + if start_byte != end_byte { + // Ensure there's a range to replace + end_byte = start_byte + replacement_text.len(); + } else if chars_counted < text.chars().count() && end_char == chars_counted { + // For insertions at the end of text + end_byte += replacement_text.len(); + } + + let start_point = tree_sitter::Point::new(line, column_start); + let end_point = tree_sitter::Point::new(line, column_end); + + // Calculate the new end byte after the insertion + let new_end_byte = start_byte + replacement_text.len(); + + // Calculate the new end position + let new_lines = replacement_text.matches('\n').count(); // Count how many new lines are in the inserted text + let last_line_length = replacement_text + .lines() + .last() + .unwrap_or("") + .chars() + .count(); // Length of the last line in the insertion + + let new_end_position = if new_lines > 0 { + // If there are new lines, the row is offset by the number of new lines, and the column is the length of the last line + tree_sitter::Point::new(start_point.row + new_lines, last_line_length) + } else { + // If there are no new lines, the row remains the same, and the column is offset by the length of the insertion + tree_sitter::Point::new(start_point.row, start_point.column + last_line_length) + }; + + InputEdit { + start_byte, + old_end_byte: end_byte, + new_end_byte, + start_position: start_point, + old_end_position: end_point, + new_end_position, + } +} + +fn apply_change(content: &String, change: &FileChange) -> String { + if change.range.is_none() { + return change.text.clone(); + } + + let range = change.range.unwrap(); + let start = usize::from(range.start()); + let end = usize::from(range.end()); + + let mut new_content = String::new(); + new_content.push_str(&content[..start]); + new_content.push_str(&change.text); + new_content.push_str(&content[end..]); + + new_content +} + +/// The `Workspace` object is long-lived, so we want it to be able to cross +/// unwind boundaries. +/// In return, we have to make sure operations on the workspace either do not +/// panic, of that panicking will not result in any broken invariant (it would +/// not result in any undefined behavior as catching an unwind is safe, but it +/// could lead too hard to debug issues) +impl RefUnwindSafe for WorkspaceServer {} + +impl Workspace for WorkspaceServer { + fn open_file(&self, params: OpenFileParams) -> Result<(), WorkspaceError> { + let OpenFileParams { + path, + content, + version, + } = params; + + self.documents + .insert(path.clone(), Document::new(content.clone(), version)); + + Ok(()) + } + + fn close_file(&self, params: CloseFileParams) -> Result<(), WorkspaceError> { + self.documents.remove(¶ms.path); + Ok(()) + } + + fn apply_file_changes(&self, params: FileChangesParams) -> Result<(), WorkspaceError> { + let mut doc = self + .documents + .get_mut(¶ms.path) + .ok_or_else(|| WorkspaceError::NotFound(NotFound))?; + + doc.apply_changes(params); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use fs::FilePath; + use text_size::{TextRange, TextSize}; + + use crate::workspace::{ + server::{Document, Statement}, + FileChange, FileChangesParams, + }; + + #[test] + fn test_statement_apply_change() { + let input = "select id from users;"; + + let mut s = Statement::new(input.to_string()); + + s.apply_change(FileChange { + text: ",test".to_string(), + range: Some(TextRange::new(9.into(), 10.into())), + }); + + assert_eq!( + &s.tree_sitter + .root_node() + .utf8_text(s.content.as_bytes()) + .unwrap(), + &s.content.as_str() + ); + assert_eq!(s.content, "select id,test from users;"); + } + + #[test] + fn test_statement_apply_multiline_change() { + let input = "select id,\ntest from users;"; + + let mut s = Statement::new(input.to_string()); + + s.apply_change(FileChange { + text: "*".to_string(), + range: Some(TextRange::new(7.into(), 15.into())), + }); + + assert_eq!( + &s.tree_sitter + .root_node() + .utf8_text(s.content.as_bytes()) + .unwrap(), + &s.content.as_str() + ); + assert_eq!(s.content, "select * from users;"); + } + + #[test] + fn test_document_apply_changes() { + let input = "select id from users;\nselect * from contacts;"; + + let mut d = Document::new(input.to_string(), 0); + + assert_eq!(d.statements.len(), 2); + + d.apply_changes(FileChangesParams { + path: FilePath::new("test.sql"), + version: 1, + changes: vec![FileChange { + text: ",test from users\nselect 1;".to_string(), + range: Some(TextRange::new(9.into(), 45.into())), + }], + }); + + assert_eq!("select id,test from users\nselect 1;", d.content); + assert_eq!(d.statements.len(), 2); + } + + #[test] + fn test_document_apply_changes_within_statement() { + let input = "select id from users;\nselect * from contacts;"; + + let mut d = Document::new(input.to_string(), 0); + + assert_eq!(d.statements.len(), 2); + + d.apply_changes(FileChangesParams { + path: FilePath::new("test.sql"), + version: 1, + changes: vec![FileChange { + text: ",test".to_string(), + range: Some(TextRange::new(9.into(), 10.into())), + }], + }); + + assert_eq!( + "select id,test from users;\nselect * from contacts;", + d.content + ); + assert_eq!(d.statements.len(), 2); + assert_eq!( + d.statements + .iter() + .find(|s| s.0.start() == TextSize::from(0)) + .unwrap() + .1 + .version, + 1, + "should touch the first statement" + ); + assert_eq!( + d.statements + .iter() + .find(|s| s.0.start() != TextSize::from(0)) + .unwrap() + .1 + .version, + 0, + "should not touch the second statement" + ); + } +} diff --git a/crates/stdx/Cargo.toml b/crates/stdx/Cargo.toml new file mode 100644 index 00000000..f22d95c3 --- /dev/null +++ b/crates/stdx/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "stdx" +version = "0.0.0" +edition = "2021" + +[dependencies] +crossbeam-channel.workspace = true +jod-thread = "0.1.2" +libc.workspace = true + +[lib] +doctest = false diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs new file mode 100644 index 00000000..7c2b43b3 --- /dev/null +++ b/crates/stdx/src/lib.rs @@ -0,0 +1 @@ +pub mod thread; diff --git a/crates/stdx/src/thread.rs b/crates/stdx/src/thread.rs new file mode 100644 index 00000000..5102d1df --- /dev/null +++ b/crates/stdx/src/thread.rs @@ -0,0 +1,120 @@ +//! A utility module for working with threads that automatically joins threads upon drop +//! and abstracts over operating system quality of service (QoS) APIs +//! through the concept of a “thread intent”. +//! +//! The intent of a thread is frozen at thread creation time, +//! i.e. there is no API to change the intent of a thread once it has been spawned. +//! +//! As a system, rust-analyzer should have the property that +//! old manual scheduling APIs are replaced entirely by QoS. +//! To maintain this invariant, we panic when it is clear that +//! old scheduling APIs have been used. +//! +//! Moreover, we also want to ensure that every thread has an intent set explicitly +//! to force a decision about its importance to the system. +//! Thus, [`ThreadIntent`] has no default value +//! and every entry point to creating a thread requires a [`ThreadIntent`] upfront. + +use std::fmt; + +mod intent; +mod pool; + +pub use intent::ThreadIntent; +pub use pool::Pool; + +pub fn spawn(intent: ThreadIntent, f: F) -> JoinHandle +where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, +{ + Builder::new(intent) + .spawn(f) + .expect("failed to spawn thread") +} + +pub struct Builder { + intent: ThreadIntent, + inner: jod_thread::Builder, + allow_leak: bool, +} + +impl Builder { + pub fn new(intent: ThreadIntent) -> Builder { + Builder { + intent, + inner: jod_thread::Builder::new(), + allow_leak: false, + } + } + + pub fn name(self, name: String) -> Builder { + Builder { + inner: self.inner.name(name), + ..self + } + } + + pub fn stack_size(self, size: usize) -> Builder { + Builder { + inner: self.inner.stack_size(size), + ..self + } + } + + pub fn allow_leak(self, b: bool) -> Builder { + Builder { + allow_leak: b, + ..self + } + } + + pub fn spawn(self, f: F) -> std::io::Result> + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + let inner_handle = self.inner.spawn(move || { + self.intent.apply_to_current_thread(); + f() + })?; + + Ok(JoinHandle { + inner: Some(inner_handle), + allow_leak: self.allow_leak, + }) + } +} + +pub struct JoinHandle { + // `inner` is an `Option` so that we can + // take ownership of the contained `JoinHandle`. + inner: Option>, + allow_leak: bool, +} + +impl JoinHandle { + pub fn join(mut self) -> T { + self.inner.take().unwrap().join() + } +} + +impl Drop for JoinHandle { + fn drop(&mut self) { + if !self.allow_leak { + return; + } + + if let Some(join_handle) = self.inner.take() { + join_handle.detach(); + } + } +} + +impl fmt::Debug for JoinHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad("JoinHandle { .. }") + } +} diff --git a/crates/stdx/src/thread/intent.rs b/crates/stdx/src/thread/intent.rs new file mode 100644 index 00000000..7b65db30 --- /dev/null +++ b/crates/stdx/src/thread/intent.rs @@ -0,0 +1,287 @@ +//! An opaque façade around platform-specific QoS APIs. + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +// Please maintain order from least to most priority for the derived `Ord` impl. +pub enum ThreadIntent { + /// Any thread which does work that isn’t in the critical path of the user typing + /// (e.g. processing Go To Definition). + Worker, + + /// Any thread which does work caused by the user typing + /// (e.g. processing syntax highlighting). + LatencySensitive, +} + +impl ThreadIntent { + // These APIs must remain private; + // we only want consumers to set thread intent + // either during thread creation or using our pool impl. + + pub(super) fn apply_to_current_thread(self) { + let class = thread_intent_to_qos_class(self); + set_current_thread_qos_class(class); + } + + pub(super) fn assert_is_used_on_current_thread(self) { + if IS_QOS_AVAILABLE { + let class = thread_intent_to_qos_class(self); + assert_eq!(get_current_thread_qos_class(), Some(class)); + } + } +} + +use imp::QoSClass; + +const IS_QOS_AVAILABLE: bool = imp::IS_QOS_AVAILABLE; + +fn set_current_thread_qos_class(class: QoSClass) { + imp::set_current_thread_qos_class(class) +} + +fn get_current_thread_qos_class() -> Option { + imp::get_current_thread_qos_class() +} + +fn thread_intent_to_qos_class(intent: ThreadIntent) -> QoSClass { + imp::thread_intent_to_qos_class(intent) +} + +// All Apple platforms use XNU as their kernel +// and thus have the concept of QoS. +#[cfg(target_vendor = "apple")] +mod imp { + use super::ThreadIntent; + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + // Please maintain order from least to most priority for the derived `Ord` impl. + pub(super) enum QoSClass { + // Documentation adapted from https://github.com/apple-oss-distributions/libpthread/blob/67e155c94093be9a204b69637d198eceff2c7c46/include/sys/qos.h#L55 + // + /// TLDR: invisible maintenance tasks + /// + /// Contract: + /// + /// * **You do not care about how long it takes for work to finish.** + /// * **You do not care about work being deferred temporarily.** + /// (e.g. if the device’s battery is in a critical state) + /// + /// Examples: + /// + /// * in a video editor: + /// creating periodic backups of project files + /// * in a browser: + /// cleaning up cached sites which have not been accessed in a long time + /// * in a collaborative word processor: + /// creating a searchable index of all documents + /// + /// Use this QoS class for background tasks + /// which the user did not initiate themselves + /// and which are invisible to the user. + /// It is expected that this work will take significant time to complete: + /// minutes or even hours. + /// + /// This QoS class provides the most energy and thermally-efficient execution possible. + /// All other work is prioritized over background tasks. + Background, + + /// TLDR: tasks that don’t block using your app + /// + /// Contract: + /// + /// * **Your app remains useful even as the task is executing.** + /// + /// Examples: + /// + /// * in a video editor: + /// exporting a video to disk – + /// the user can still work on the timeline + /// * in a browser: + /// automatically extracting a downloaded zip file – + /// the user can still switch tabs + /// * in a collaborative word processor: + /// downloading images embedded in a document – + /// the user can still make edits + /// + /// Use this QoS class for tasks which + /// may or may not be initiated by the user, + /// but whose result is visible. + /// It is expected that this work will take a few seconds to a few minutes. + /// Typically your app will include a progress bar + /// for tasks using this class. + /// + /// This QoS class provides a balance between + /// performance, responsiveness and efficiency. + Utility, + + /// TLDR: tasks that block using your app + /// + /// Contract: + /// + /// * **You need this work to complete + /// before the user can keep interacting with your app.** + /// * **Your work will not take more than a few seconds to complete.** + /// + /// Examples: + /// + /// * in a video editor: + /// opening a saved project + /// * in a browser: + /// loading a list of the user’s bookmarks and top sites + /// when a new tab is created + /// * in a collaborative word processor: + /// running a search on the document’s content + /// + /// Use this QoS class for tasks which were initiated by the user + /// and block the usage of your app while they are in progress. + /// It is expected that this work will take a few seconds or less to complete; + /// not long enough to cause the user to switch to something else. + /// Your app will likely indicate progress on these tasks + /// through the display of placeholder content or modals. + /// + /// This QoS class is not energy-efficient. + /// Rather, it provides responsiveness + /// by prioritizing work above other tasks on the system + /// except for critical user-interactive work. + UserInitiated, + + /// TLDR: render loops and nothing else + /// + /// Contract: + /// + /// * **You absolutely need this work to complete immediately + /// or your app will appear to freeze.** + /// * **Your work will always complete virtually instantaneously.** + /// + /// Examples: + /// + /// * the main thread in a GUI application + /// * the update & render loop in a game + /// * a secondary thread which progresses an animation + /// + /// Use this QoS class for any work which, if delayed, + /// will make your user interface unresponsive. + /// It is expected that this work will be virtually instantaneous. + /// + /// This QoS class is not energy-efficient. + /// Specifying this class is a request to run with + /// nearly all available system CPU and I/O bandwidth even under contention. + UserInteractive, + } + + pub(super) const IS_QOS_AVAILABLE: bool = true; + + pub(super) fn set_current_thread_qos_class(class: QoSClass) { + let c = match class { + QoSClass::UserInteractive => libc::qos_class_t::QOS_CLASS_USER_INTERACTIVE, + QoSClass::UserInitiated => libc::qos_class_t::QOS_CLASS_USER_INITIATED, + QoSClass::Utility => libc::qos_class_t::QOS_CLASS_UTILITY, + QoSClass::Background => libc::qos_class_t::QOS_CLASS_BACKGROUND, + }; + + let code = unsafe { libc::pthread_set_qos_class_self_np(c, 0) }; + + if code == 0 { + return; + } + + let errno = unsafe { *libc::__error() }; + + match errno { + libc::EPERM => { + // This thread has been excluded from the QoS system + // due to a previous call to a function such as `pthread_setschedparam` + // which is incompatible with QoS. + // + // Panic instead of returning an error + // to maintain the invariant that we only use QoS APIs. + panic!("tried to set QoS of thread which has opted out of QoS (os error {errno})") + } + + libc::EINVAL => { + // This is returned if we pass something other than a qos_class_t + // to `pthread_set_qos_class_self_np`. + // + // This is impossible, so again panic. + unreachable!( + "invalid qos_class_t value was passed to pthread_set_qos_class_self_np" + ) + } + + _ => { + // `pthread_set_qos_class_self_np`’s documentation + // does not mention any other errors. + unreachable!("`pthread_set_qos_class_self_np` returned unexpected error {errno}") + } + } + } + + pub(super) fn get_current_thread_qos_class() -> Option { + let current_thread = unsafe { libc::pthread_self() }; + let mut qos_class_raw = libc::qos_class_t::QOS_CLASS_UNSPECIFIED; + let code = unsafe { + libc::pthread_get_qos_class_np(current_thread, &mut qos_class_raw, std::ptr::null_mut()) + }; + + if code != 0 { + // `pthread_get_qos_class_np`’s documentation states that + // an error value is placed into errno if the return code is not zero. + // However, it never states what errors are possible. + // Inspecting the source[0] shows that, as of this writing, it always returns zero. + // + // Whatever errors the function could report in future are likely to be + // ones which we cannot handle anyway + // + // 0: https://github.com/apple-oss-distributions/libpthread/blob/67e155c94093be9a204b69637d198eceff2c7c46/src/qos.c#L171-L177 + let errno = unsafe { *libc::__error() }; + unreachable!("`pthread_get_qos_class_np` failed unexpectedly (os error {errno})"); + } + + match qos_class_raw { + libc::qos_class_t::QOS_CLASS_USER_INTERACTIVE => Some(QoSClass::UserInteractive), + libc::qos_class_t::QOS_CLASS_USER_INITIATED => Some(QoSClass::UserInitiated), + libc::qos_class_t::QOS_CLASS_DEFAULT => None, // QoS has never been set + libc::qos_class_t::QOS_CLASS_UTILITY => Some(QoSClass::Utility), + libc::qos_class_t::QOS_CLASS_BACKGROUND => Some(QoSClass::Background), + + libc::qos_class_t::QOS_CLASS_UNSPECIFIED => { + // Using manual scheduling APIs causes threads to “opt out” of QoS. + // At this point they become incompatible with QoS, + // and as such have the “unspecified” QoS class. + // + // Panic instead of returning an error + // to maintain the invariant that we only use QoS APIs. + panic!("tried to get QoS of thread which has opted out of QoS") + } + } + } + + pub(super) fn thread_intent_to_qos_class(intent: ThreadIntent) -> QoSClass { + match intent { + ThreadIntent::Worker => QoSClass::Utility, + ThreadIntent::LatencySensitive => QoSClass::UserInitiated, + } + } +} + +// FIXME: Windows has QoS APIs, we should use them! +#[cfg(not(target_vendor = "apple"))] +mod imp { + use super::ThreadIntent; + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + pub(super) enum QoSClass { + Default, + } + + pub(super) const IS_QOS_AVAILABLE: bool = false; + + pub(super) fn set_current_thread_qos_class(_: QoSClass) {} + + pub(super) fn get_current_thread_qos_class() -> Option { + None + } + + pub(super) fn thread_intent_to_qos_class(_: ThreadIntent) -> QoSClass { + QoSClass::Default + } +} diff --git a/crates/stdx/src/thread/pool.rs b/crates/stdx/src/thread/pool.rs new file mode 100644 index 00000000..985be57c --- /dev/null +++ b/crates/stdx/src/thread/pool.rs @@ -0,0 +1,99 @@ +//! [`Pool`] implements a basic custom thread pool +//! inspired by the [`threadpool` crate](http://docs.rs/threadpool). +//! When you spawn a task you specify a thread intent +//! so the pool can schedule it to run on a thread with that intent. +//! rust-analyzer uses this to prioritize work based on latency requirements. +//! +//! The thread pool is implemented entirely using +//! the threading utilities in [`crate::thread`]. + +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, +}; + +use crossbeam_channel::{Receiver, Sender}; + +use super::{Builder, JoinHandle, ThreadIntent}; + +pub struct Pool { + // `_handles` is never read: the field is present + // only for its `Drop` impl. + + // The worker threads exit once the channel closes; + // make sure to keep `job_sender` above `handles` + // so that the channel is actually closed + // before we join the worker threads! + job_sender: Sender, + _handles: Vec, + extant_tasks: Arc, +} + +struct Job { + requested_intent: ThreadIntent, + f: Box, +} + +impl Pool { + pub fn new(threads: usize) -> Pool { + const STACK_SIZE: usize = 8 * 1024 * 1024; + const INITIAL_INTENT: ThreadIntent = ThreadIntent::Worker; + + let (job_sender, job_receiver) = crossbeam_channel::unbounded(); + let extant_tasks = Arc::new(AtomicUsize::new(0)); + + let mut handles = Vec::with_capacity(threads); + for _ in 0..threads { + let handle = Builder::new(INITIAL_INTENT) + .stack_size(STACK_SIZE) + .name("Worker".into()) + .spawn({ + let extant_tasks = Arc::clone(&extant_tasks); + let job_receiver: Receiver = job_receiver.clone(); + move || { + let mut current_intent = INITIAL_INTENT; + for job in job_receiver { + if job.requested_intent != current_intent { + job.requested_intent.apply_to_current_thread(); + current_intent = job.requested_intent; + } + extant_tasks.fetch_add(1, Ordering::SeqCst); + (job.f)(); + extant_tasks.fetch_sub(1, Ordering::SeqCst); + } + } + }) + .expect("failed to spawn thread"); + + handles.push(handle); + } + + Pool { + _handles: handles, + extant_tasks, + job_sender, + } + } + + pub fn spawn(&self, intent: ThreadIntent, f: F) + where + F: FnOnce() + Send + 'static, + { + let f = Box::new(move || { + if cfg!(debug_assertions) { + intent.assert_is_used_on_current_thread(); + } + f() + }); + + let job = Job { + requested_intent: intent, + f, + }; + self.job_sender.send(job).unwrap(); + } + + pub fn len(&self) -> usize { + self.extant_tasks.load(Ordering::SeqCst) + } +} diff --git a/crates/text_size/Cargo.toml b/crates/text_size/Cargo.toml new file mode 100644 index 00000000..e8aa3b67 --- /dev/null +++ b/crates/text_size/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "text_size" +version = "0.0.0" +edition = "2021" + +[dependencies] +serde = { workspace = true } + +[lib] +doctest = false diff --git a/crates/text_size/src/lib.rs b/crates/text_size/src/lib.rs new file mode 100644 index 00000000..20afa712 --- /dev/null +++ b/crates/text_size/src/lib.rs @@ -0,0 +1,28 @@ +//! Newtypes for working with text sizes/ranges in a more type-safe manner. +//! +//! This library can help with two things: +//! * Reducing storage requirements for offsets and ranges, under the +//! assumption that 32 bits is enough. +//! * Providing standard vocabulary types for applications where text ranges +//! are pervasive. +//! +//! However, you should not use this library simply because you work with +//! strings. In the overwhelming majority of cases, using `usize` and +//! `std::ops::Range` is better. In particular, if you are publishing a +//! library, using only std types in the interface would make it more +//! interoperable. Similarly, if you are writing something like a lexer, which +//! produces, but does not *store* text ranges, then sticking to `usize` would +//! be better. + +#![forbid(unsafe_code)] +#![warn(missing_debug_implementations, missing_docs)] + +mod range; +mod serde_impls; +mod size; +mod traits; + +pub use crate::{range::TextRange, size::TextSize, traits::TextLen}; + +#[cfg(target_pointer_width = "16")] +compile_error!("text-size assumes usize >= u32 and does not work on 16-bit targets"); diff --git a/crates/text_size/src/range.rs b/crates/text_size/src/range.rs new file mode 100644 index 00000000..48d95987 --- /dev/null +++ b/crates/text_size/src/range.rs @@ -0,0 +1,544 @@ +use cmp::Ordering; + +use { + crate::TextSize, + std::{ + cmp, + convert::TryFrom, + fmt, + ops::{Add, AddAssign, Bound, Index, IndexMut, Range, RangeBounds, Sub, SubAssign}, + }, +}; + +/// A range in text, represented as a pair of [`TextSize`][struct@TextSize]. +/// +/// It is a logic error for `start` to be greater than `end`. +#[derive(Default, Copy, Clone, Eq, PartialEq, Hash)] +pub struct TextRange { + // Invariant: start <= end + start: TextSize, + end: TextSize, +} + +impl fmt::Debug for TextRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}..{}", self.start().raw, self.end().raw) + } +} + +impl TextRange { + /// Creates a new `TextRange` with the given `start` and `end` (`start..end`). + /// + /// # Panics + /// + /// Panics if `end < start`. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let start = TextSize::from(5); + /// let end = TextSize::from(10); + /// let range = TextRange::new(start, end); + /// + /// assert_eq!(range.start(), start); + /// assert_eq!(range.end(), end); + /// assert_eq!(range.len(), end - start); + /// ``` + #[inline] + pub fn new(start: TextSize, end: TextSize) -> TextRange { + assert!(start <= end); + TextRange { start, end } + } + + /// Create a new `TextRange` with the given `offset` and `len` (`offset..offset + len`). + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let text = "0123456789"; + /// + /// let offset = TextSize::from(2); + /// let length = TextSize::from(5); + /// let range = TextRange::at(offset, length); + /// + /// assert_eq!(range, TextRange::new(offset, offset + length)); + /// assert_eq!(&text[range], "23456") + /// ``` + #[inline] + pub fn at(offset: TextSize, len: TextSize) -> TextRange { + TextRange::new(offset, offset + len) + } + + /// Create a zero-length range at the specified offset (`offset..offset`). + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let point: TextSize; + /// # point = TextSize::from(3); + /// let range = TextRange::empty(point); + /// assert!(range.is_empty()); + /// assert_eq!(range, TextRange::new(point, point)); + /// ``` + #[inline] + pub fn empty(offset: TextSize) -> TextRange { + TextRange { + start: offset, + end: offset, + } + } + + /// Create a range up to the given end (`..end`). + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let point: TextSize; + /// # point = TextSize::from(12); + /// let range = TextRange::up_to(point); + /// + /// assert_eq!(range.len(), point); + /// assert_eq!(range, TextRange::new(0.into(), point)); + /// assert_eq!(range, TextRange::at(0.into(), point)); + /// ``` + #[inline] + pub fn up_to(end: TextSize) -> TextRange { + TextRange { + start: 0.into(), + end, + } + } +} + +/// Identity methods. +impl TextRange { + /// The start point of this range. + #[inline] + pub const fn start(self) -> TextSize { + self.start + } + + /// The end point of this range. + #[inline] + pub const fn end(self) -> TextSize { + self.end + } + + /// The size of this range. + #[inline] + pub const fn len(self) -> TextSize { + // HACK for const fn: math on primitives only + TextSize { + raw: self.end().raw - self.start().raw, + } + } + + /// Check if this range is empty. + #[inline] + pub const fn is_empty(self) -> bool { + // HACK for const fn: math on primitives only + self.start().raw == self.end().raw + } +} + +/// Manipulation methods. +impl TextRange { + /// Check if this range contains an offset. + /// + /// The end index is considered excluded. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let (start, end): (TextSize, TextSize); + /// # start = 10.into(); end = 20.into(); + /// let range = TextRange::new(start, end); + /// assert!(range.contains(start)); + /// assert!(!range.contains(end)); + /// ``` + #[inline] + pub fn contains(self, offset: TextSize) -> bool { + self.start() <= offset && offset < self.end() + } + + /// Check if this range contains an offset. + /// + /// The end index is considered included. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let (start, end): (TextSize, TextSize); + /// # start = 10.into(); end = 20.into(); + /// let range = TextRange::new(start, end); + /// assert!(range.contains_inclusive(start)); + /// assert!(range.contains_inclusive(end)); + /// ``` + #[inline] + pub fn contains_inclusive(self, offset: TextSize) -> bool { + self.start() <= offset && offset <= self.end() + } + + /// Check if this range completely contains another range. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let larger = TextRange::new(0.into(), 20.into()); + /// let smaller = TextRange::new(5.into(), 15.into()); + /// assert!(larger.contains_range(smaller)); + /// assert!(!smaller.contains_range(larger)); + /// + /// // a range always contains itself + /// assert!(larger.contains_range(larger)); + /// assert!(smaller.contains_range(smaller)); + /// ``` + #[inline] + pub fn contains_range(self, other: TextRange) -> bool { + self.start() <= other.start() && other.end() <= self.end() + } + + /// The range covered by both ranges, if it exists. + /// If the ranges touch but do not overlap, the output range is empty. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// assert_eq!( + /// TextRange::intersect( + /// TextRange::new(0.into(), 10.into()), + /// TextRange::new(5.into(), 15.into()), + /// ), + /// Some(TextRange::new(5.into(), 10.into())), + /// ); + /// ``` + #[inline] + pub fn intersect(self, other: TextRange) -> Option { + let start = cmp::max(self.start(), other.start()); + let end = cmp::min(self.end(), other.end()); + if end < start { + return None; + } + Some(TextRange::new(start, end)) + } + + /// Extends the range to cover `other` as well. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// assert_eq!( + /// TextRange::cover( + /// TextRange::new(0.into(), 5.into()), + /// TextRange::new(15.into(), 20.into()), + /// ), + /// TextRange::new(0.into(), 20.into()), + /// ); + /// ``` + #[inline] + pub fn cover(self, other: TextRange) -> TextRange { + let start = cmp::min(self.start(), other.start()); + let end = cmp::max(self.end(), other.end()); + TextRange::new(start, end) + } + + /// Extends the range to cover `other` offsets as well. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// assert_eq!( + /// TextRange::empty(0.into()).cover_offset(20.into()), + /// TextRange::new(0.into(), 20.into()), + /// ) + /// ``` + #[inline] + pub fn cover_offset(self, offset: TextSize) -> TextRange { + self.cover(TextRange::empty(offset)) + } + + /// Add an offset to this range. + /// + /// Note that this is not appropriate for changing where a `TextRange` is + /// within some string; rather, it is for changing the reference anchor + /// that the `TextRange` is measured against. + /// + /// The unchecked version (`Add::add`) will _always_ panic on overflow, + /// in contrast to primitive integers, which check in debug mode only. + #[inline] + pub fn checked_add(self, offset: TextSize) -> Option { + Some(TextRange { + start: self.start.checked_add(offset)?, + end: self.end.checked_add(offset)?, + }) + } + + /// Subtract an offset from this range. + /// + /// Note that this is not appropriate for changing where a `TextRange` is + /// within some string; rather, it is for changing the reference anchor + /// that the `TextRange` is measured against. + /// + /// The unchecked version (`Sub::sub`) will _always_ panic on overflow, + /// in contrast to primitive integers, which check in debug mode only. + #[inline] + pub fn checked_sub(self, offset: TextSize) -> Option { + Some(TextRange { + start: self.start.checked_sub(offset)?, + end: self.end.checked_sub(offset)?, + }) + } + + /// Relative order of the two ranges (overlapping ranges are considered + /// equal). + /// + /// + /// This is useful when, for example, binary searching an array of disjoint + /// ranges. + /// + /// # Examples + /// + /// ``` + /// # use biome_text_size::*; + /// # use std::cmp::Ordering; + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(4.into(), 5.into()); + /// assert_eq!(a.ordering(b), Ordering::Less); + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(3.into(), 5.into()); + /// assert_eq!(a.ordering(b), Ordering::Less); + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(2.into(), 5.into()); + /// assert_eq!(a.ordering(b), Ordering::Equal); + /// + /// let a = TextRange::new(0.into(), 3.into()); + /// let b = TextRange::new(2.into(), 2.into()); + /// assert_eq!(a.ordering(b), Ordering::Equal); + /// + /// let a = TextRange::new(2.into(), 3.into()); + /// let b = TextRange::new(2.into(), 2.into()); + /// assert_eq!(a.ordering(b), Ordering::Greater); + /// ``` + #[inline] + pub fn ordering(self, other: TextRange) -> Ordering { + if self.end() <= other.start() { + Ordering::Less + } else if other.end() <= self.start() { + Ordering::Greater + } else { + Ordering::Equal + } + } + + /// Subtracts an offset from the start position. + /// + /// + /// ## Panics + /// If `start - amount` is less than zero. + /// + /// ## Examples + /// + /// ``` + /// use biome_text_size::{TextRange, TextSize}; + /// + /// let range = TextRange::new(TextSize::from(5), TextSize::from(10)); + /// assert_eq!(range.sub_start(TextSize::from(2)), TextRange::new(TextSize::from(3), TextSize::from(10))); + /// ``` + #[inline] + pub fn sub_start(&self, amount: TextSize) -> TextRange { + TextRange::new(self.start() - amount, self.end()) + } + + /// Adds an offset to the start position. + /// + /// ## Panics + /// If `start + amount > end` + /// + /// ## Examples + /// + /// ``` + /// use biome_text_size::{TextRange, TextSize}; + /// + /// let range = TextRange::new(TextSize::from(5), TextSize::from(10)); + /// assert_eq!(range.add_start(TextSize::from(3)), TextRange::new(TextSize::from(8), TextSize::from(10))); + /// ``` + #[inline] + pub fn add_start(&self, amount: TextSize) -> TextRange { + TextRange::new(self.start() + amount, self.end()) + } + + /// Subtracts an offset from the end position. + /// + /// + /// ## Panics + /// If `end - amount < 0` or `end - amount < start` + /// + /// ## Examples + /// + /// ``` + /// use biome_text_size::{TextRange, TextSize}; + /// + /// let range = TextRange::new(TextSize::from(5), TextSize::from(10)); + /// assert_eq!(range.sub_end(TextSize::from(2)), TextRange::new(TextSize::from(5), TextSize::from(8))); + /// ``` + #[inline] + pub fn sub_end(&self, amount: TextSize) -> TextRange { + TextRange::new(self.start(), self.end() - amount) + } + + /// Adds an offset to the end position. + /// + /// + /// ## Panics + /// If `end + amount > u32::MAX` + /// + /// ## Examples + /// + /// ``` + /// use biome_text_size::{TextRange, TextSize}; + /// + /// let range = TextRange::new(TextSize::from(5), TextSize::from(10)); + /// assert_eq!(range.add_end(TextSize::from(2)), TextRange::new(TextSize::from(5), TextSize::from(12))); + /// ``` + #[inline] + pub fn add_end(&self, amount: TextSize) -> TextRange { + TextRange::new(self.start(), self.end() + amount) + } +} + +impl Index for str { + type Output = str; + #[inline] + fn index(&self, index: TextRange) -> &str { + &self[Range::::from(index)] + } +} + +impl Index for String { + type Output = str; + #[inline] + fn index(&self, index: TextRange) -> &str { + &self[Range::::from(index)] + } +} + +impl IndexMut for str { + #[inline] + fn index_mut(&mut self, index: TextRange) -> &mut str { + &mut self[Range::::from(index)] + } +} + +impl IndexMut for String { + #[inline] + fn index_mut(&mut self, index: TextRange) -> &mut str { + &mut self[Range::::from(index)] + } +} + +impl RangeBounds for TextRange { + fn start_bound(&self) -> Bound<&TextSize> { + Bound::Included(&self.start) + } + + fn end_bound(&self) -> Bound<&TextSize> { + Bound::Excluded(&self.end) + } +} + +impl From for Range +where + T: From, +{ + #[inline] + fn from(r: TextRange) -> Self { + r.start().into()..r.end().into() + } +} + +macro_rules! ops { + (impl $Op:ident for TextRange by fn $f:ident = $op:tt) => { + impl $Op<&TextSize> for TextRange { + type Output = TextRange; + #[inline] + fn $f(self, other: &TextSize) -> TextRange { + self $op *other + } + } + impl $Op for &TextRange + where + TextRange: $Op, + { + type Output = TextRange; + #[inline] + fn $f(self, other: T) -> TextRange { + *self $op other + } + } + }; +} + +impl Add for TextRange { + type Output = TextRange; + #[inline] + fn add(self, offset: TextSize) -> TextRange { + self.checked_add(offset) + .expect("TextRange +offset overflowed") + } +} + +impl Sub for TextRange { + type Output = TextRange; + #[inline] + fn sub(self, offset: TextSize) -> TextRange { + self.checked_sub(offset) + .expect("TextRange -offset overflowed") + } +} + +ops!(impl Add for TextRange by fn add = +); +ops!(impl Sub for TextRange by fn sub = -); + +impl AddAssign for TextRange +where + TextRange: Add, +{ + #[inline] + fn add_assign(&mut self, rhs: A) { + *self = *self + rhs + } +} + +impl SubAssign for TextRange +where + TextRange: Sub, +{ + #[inline] + fn sub_assign(&mut self, rhs: S) { + *self = *self - rhs + } +} + +impl TryFrom<(usize, usize)> for TextRange { + type Error = std::num::TryFromIntError; + #[inline] + fn try_from((start, end): (usize, usize)) -> Result { + Ok(TextRange::new( + TextSize::try_from(start)?, + TextSize::try_from(end)?, + )) + } +} diff --git a/crates/text_size/src/serde_impls.rs b/crates/text_size/src/serde_impls.rs new file mode 100644 index 00000000..a94bee95 --- /dev/null +++ b/crates/text_size/src/serde_impls.rs @@ -0,0 +1,48 @@ +use { + crate::{TextRange, TextSize}, + serde::{de, Deserialize, Deserializer, Serialize, Serializer}, +}; + +impl Serialize for TextSize { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.raw.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for TextSize { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + u32::deserialize(deserializer).map(TextSize::from) + } +} + +impl Serialize for TextRange { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + (self.start(), self.end()).serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for TextRange { + #[allow(clippy::nonminimal_bool)] + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let (start, end) = Deserialize::deserialize(deserializer)?; + if !(start <= end) { + return Err(de::Error::custom(format!( + "invalid range: {:?}..{:?}", + start, end + ))); + } + Ok(TextRange::new(start, end)) + } +} diff --git a/crates/text_size/src/size.rs b/crates/text_size/src/size.rs new file mode 100644 index 00000000..f7f27d0a --- /dev/null +++ b/crates/text_size/src/size.rs @@ -0,0 +1,161 @@ +use { + crate::TextLen, + std::{ + convert::TryFrom, + fmt, iter, + num::TryFromIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + u32, + }, +}; + +/// A measure of text length. Also, equivalently, an index into text. +/// +/// This is a UTF-8 bytes offset stored as `u32`, but +/// most clients should treat it as an opaque measure. +/// +/// For cases that need to escape `TextSize` and return to working directly +/// with primitive integers, `TextSize` can be converted losslessly to/from +/// `u32` via [`From`] conversions as well as losslessly be converted [`Into`] +/// `usize`. The `usize -> TextSize` direction can be done via [`TryFrom`]. +/// +/// These escape hatches are primarily required for unit testing and when +/// converting from UTF-8 size to another coordinate space, such as UTF-16. +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TextSize { + pub(crate) raw: u32, +} + +impl fmt::Debug for TextSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.raw) + } +} + +impl TextSize { + /// The text size of some primitive text-like object. + /// + /// Accepts `char`, `&str`, and `&String`. + /// + /// # Examples + /// + /// ```rust + /// # use biome_text_size::*; + /// let char_size = TextSize::of('🦀'); + /// assert_eq!(char_size, TextSize::from(4)); + /// + /// let str_size = TextSize::of("rust-analyzer"); + /// assert_eq!(str_size, TextSize::from(13)); + /// ``` + #[inline] + pub fn of(text: T) -> TextSize { + text.text_len() + } +} + +/// Methods to act like a primitive integer type, where reasonably applicable. +// Last updated for parity with Rust 1.42.0. +impl TextSize { + /// Checked addition. Returns `None` if overflow occurred. + #[inline] + pub fn checked_add(self, rhs: TextSize) -> Option { + self.raw.checked_add(rhs.raw).map(|raw| TextSize { raw }) + } + + /// Checked subtraction. Returns `None` if overflow occurred. + #[inline] + pub fn checked_sub(self, rhs: TextSize) -> Option { + self.raw.checked_sub(rhs.raw).map(|raw| TextSize { raw }) + } +} + +impl From for TextSize { + #[inline] + fn from(raw: u32) -> Self { + TextSize { raw } + } +} + +impl From for u32 { + #[inline] + fn from(value: TextSize) -> Self { + value.raw + } +} + +impl TryFrom for TextSize { + type Error = TryFromIntError; + #[inline] + fn try_from(value: usize) -> Result { + Ok(u32::try_from(value)?.into()) + } +} + +impl From for usize { + #[inline] + fn from(value: TextSize) -> Self { + value.raw as usize + } +} + +macro_rules! ops { + (impl $Op:ident for TextSize by fn $f:ident = $op:tt) => { + impl $Op for TextSize { + type Output = TextSize; + #[inline] + fn $f(self, other: TextSize) -> TextSize { + TextSize { raw: self.raw $op other.raw } + } + } + impl $Op<&TextSize> for TextSize { + type Output = TextSize; + #[inline] + fn $f(self, other: &TextSize) -> TextSize { + self $op *other + } + } + impl $Op for &TextSize + where + TextSize: $Op, + { + type Output = TextSize; + #[inline] + fn $f(self, other: T) -> TextSize { + *self $op other + } + } + }; +} + +ops!(impl Add for TextSize by fn add = +); +ops!(impl Sub for TextSize by fn sub = -); + +impl AddAssign for TextSize +where + TextSize: Add, +{ + #[inline] + fn add_assign(&mut self, rhs: A) { + *self = *self + rhs + } +} + +impl SubAssign for TextSize +where + TextSize: Sub, +{ + #[inline] + fn sub_assign(&mut self, rhs: S) { + *self = *self - rhs + } +} + +impl iter::Sum for TextSize +where + TextSize: Add, +{ + #[inline] + fn sum>(iter: I) -> TextSize { + iter.fold(0.into(), Add::add) + } +} diff --git a/crates/text_size/src/traits.rs b/crates/text_size/src/traits.rs new file mode 100644 index 00000000..d0bb6c1f --- /dev/null +++ b/crates/text_size/src/traits.rs @@ -0,0 +1,36 @@ +use {crate::TextSize, std::convert::TryInto}; + +use priv_in_pub::Sealed; +mod priv_in_pub { + pub trait Sealed {} +} + +/// Primitives with a textual length that can be passed to [`TextSize::of`]. +pub trait TextLen: Copy + Sealed { + /// The textual length of this primitive. + fn text_len(self) -> TextSize; +} + +impl Sealed for &'_ str {} +impl TextLen for &'_ str { + #[inline] + fn text_len(self) -> TextSize { + self.len().try_into().unwrap() + } +} + +impl Sealed for &'_ String {} +impl TextLen for &'_ String { + #[inline] + fn text_len(self) -> TextSize { + self.as_str().text_len() + } +} + +impl Sealed for char {} +impl TextLen for char { + #[inline] + fn text_len(self) -> TextSize { + (self.len_utf8() as u32).into() + } +} diff --git a/crates/tree_sitter_sql/Cargo.toml b/crates/tree_sitter_sql/Cargo.toml index a8ca0181..797af669 100644 --- a/crates/tree_sitter_sql/Cargo.toml +++ b/crates/tree_sitter_sql/Cargo.toml @@ -9,7 +9,7 @@ build = "build.rs" cc="*" [dependencies] -tree-sitter = "0.20.10" +tree-sitter.workspace = true [lib] doctest = false