diff --git a/engine/custom_rank.py b/engine/custom_rank.py
index 128ec99..d00847d 100644
--- a/engine/custom_rank.py
+++ b/engine/custom_rank.py
@@ -3,12 +3,13 @@
 import pandas as pd
 
 from custom_db import get_doc_by_id
-from custom_tokenizer import tokenize_data
 from sklearn.feature_extraction.text import TfidfVectorizer
 
+from tokenizer import process_text
+
 
 def preprocess_query(Q):
-    tokenized_query = tokenize_data(Q)
+    tokenized_query = process_text(Q)
     return tokenized_query
 
 
diff --git a/engine/main.py b/engine/main.py
index 780c7cd..451e7b7 100755
--- a/engine/main.py
+++ b/engine/main.py
@@ -14,7 +14,7 @@
 # Pipeline
 from crawl import Crawler
 from custom_db import index_pages, access_index, save_pages
-from custom_tokenizer import Tokenizer
+from tokenizer import Tokenizer
 from index import Indexer
 
 # Threading
diff --git a/engine/requirements.txt b/engine/requirements.txt
index 9cf4449..4792ab8 100644
--- a/engine/requirements.txt
+++ b/engine/requirements.txt
@@ -12,3 +12,4 @@ pandas==2.2.2
 scikit-learn==1.5.1
 aiohttp==3.9.5
 spacy==3.7.5
+lxml==5.2.2
diff --git a/engine/test.py b/engine/test.py
deleted file mode 100644
index ad58a80..0000000
--- a/engine/test.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# file to test the written functions
-import logging
-
-from custom_tokenizer import tokenize_data, tf_idf_vectorize, top_30_words
-
-CUSTOM_TEXT = "Lorem Ipsum is simply dummy text" + "       " +  "  \n     "+ "of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum."
-
-top_30_words = top_30_words([CUSTOM_TEXT])
-print(top_30_words)
diff --git a/engine/tokenizer.py b/engine/tokenizer.py
index 925996d..7fc5f19 100644
--- a/engine/tokenizer.py
+++ b/engine/tokenizer.py
@@ -257,7 +257,9 @@ async def process(self, data, link):
     "I'm 6'2\" tall and I weigh 180 lbs. I'm 25 years old.",
 ]
 
-for sentence in test_sentences:
-    print(f"Original: {sentence}")
-    print(f"Tokenized: {process_text(sentence)}")
-    print()
+if __name__ == "__main__":
+
+    for sentence in test_sentences:
+        print(f"Original: {sentence}")
+        print(f"Tokenized: {process_text(sentence)}")
+        print()