diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 00000000..35568ec0
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,37 @@
+name: Python CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+      with:
+        submodules: recursive  # This will clone the repository with all its submodules
+        fetch-depth: 0    # This fetches all history so you can access any version of the submodules
+
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'  # Specify the Python version you want
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build pytest
+    - name: Build DLL
+      run: |
+        pip install -e .
+    - name: Run tests
+      run: |
+        python -m pytest tests
\ No newline at end of file
diff --git a/tests/test_text_generation.py b/tests/test_text_generation.py
index f37a4781..04782a21 100644
--- a/tests/test_text_generation.py
+++ b/tests/test_text_generation.py
@@ -1,7 +1,7 @@
 import os
 from nexa.gguf.llama import llama
 from tests.utils import download_model
-
+from nexa.gguf.lib_utils import is_gpu_available
 # Constants
 TINY_LLAMA_URL = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf"
 OUTPUT_DIR = os.getcwd()
@@ -12,7 +12,7 @@ def init_llama_model(verbose=False, n_gpu_layers=-1, chat_format=None, embedding
     return llama.Llama(
         model_path=MODEL_PATH,
         verbose=verbose,
-        n_gpu_layers=n_gpu_layers,
+        n_gpu_layers=n_gpu_layers if is_gpu_available() else 0,
         chat_format=chat_format,
         embedding=embedding,
     )
diff --git a/tests/test_vlm.py b/tests/test_vlm.py
index b70389be..25d81d56 100644
--- a/tests/test_vlm.py
+++ b/tests/test_vlm.py
@@ -4,6 +4,7 @@
 from nexa.gguf.llama import llama
 from nexa.gguf.llama.llama_chat_format import NanoLlavaChatHandler
 from tests.utils import download_model
+from nexa.gguf.lib_utils import is_gpu_available
 
 def image_to_base64_data_uri(file_path):
     """
@@ -31,7 +32,7 @@ def test_image_generation():
         model_path=model_path,
         chat_handler=chat_handler,
         n_ctx=2048,  # n_ctx should be increased to accommodate the image embedding
-        n_gpu_layers=-1,  # Uncomment to use GPU acceleration
+        n_gpu_layers=-1 if is_gpu_available() else 0,  # Uncomment to use GPU acceleration
         verbose=False,
     )
     output = llm.create_chat_completion(