From 7c4a3d11803c6e870e1b0b63d049eb4f4f754fad Mon Sep 17 00:00:00 2001 From: Zhenbo Li Date: Mon, 21 Oct 2024 20:29:04 -0400 Subject: [PATCH] Fix: spaces are highlighted by mistake --- fire_seq_search_server/debug_server.sh | 2 +- fire_seq_search_server/src/language_tools/tokenizer.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fire_seq_search_server/debug_server.sh b/fire_seq_search_server/debug_server.sh index fbc3c0d..55cae89 100644 --- a/fire_seq_search_server/debug_server.sh +++ b/fire_seq_search_server/debug_server.sh @@ -4,7 +4,7 @@ rm -f ./fire_seq_search_server cargo build --features llm cp target/debug/fire_seq_search_server ./fire_seq_search_server -export RUST_LOG="warn,fire_seq_search_server=debug" +export RUST_LOG="warn,fire_seq_search_server=info" #export RUST_LOG="debug" export RUST_BACKTRACE=1 #RAYON_NUM_THREADS=1 diff --git a/fire_seq_search_server/src/language_tools/tokenizer.rs b/fire_seq_search_server/src/language_tools/tokenizer.rs index 7cac6d9..abff1fe 100644 --- a/fire_seq_search_server/src/language_tools/tokenizer.rs +++ b/fire_seq_search_server/src/language_tools/tokenizer.rs @@ -13,6 +13,7 @@ use log::{debug, info}; pub fn filter_out_stopwords<'a,'b>(term_tokens: &'a [String], nltk: &'b HashSet) -> Vec<&'a str> { let term_ref: Vec<&str> = term_tokens.iter().map(|s| &**s).collect(); let terms_selected: Vec<&str> = term_ref.into_iter() + .filter(|&s| ! (s.trim().is_empty() ) ) .filter(|&s| !nltk.contains(&(&s).to_lowercase() ) ) .collect(); terms_selected