diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..35568ec0 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,37 @@ +name: Python CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + submodules: recursive # This will clone the repository with all its submodules + fetch-depth: 0 # This fetches all history so you can access any version of the submodules + + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' # Specify the Python version you want + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build pytest + - name: Build DLL + run: | + pip install -e . + - name: Run tests + run: | + python -m pytest tests \ No newline at end of file diff --git a/tests/test_text_generation.py b/tests/test_text_generation.py index f37a4781..04782a21 100644 --- a/tests/test_text_generation.py +++ b/tests/test_text_generation.py @@ -1,7 +1,7 @@ import os from nexa.gguf.llama import llama from tests.utils import download_model - +from nexa.gguf.lib_utils import is_gpu_available # Constants TINY_LLAMA_URL = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf" OUTPUT_DIR = os.getcwd() @@ -12,7 +12,7 @@ def init_llama_model(verbose=False, n_gpu_layers=-1, chat_format=None, embedding return llama.Llama( model_path=MODEL_PATH, verbose=verbose, - n_gpu_layers=n_gpu_layers, + n_gpu_layers=n_gpu_layers if is_gpu_available() else 0, chat_format=chat_format, embedding=embedding, ) diff --git a/tests/test_vlm.py b/tests/test_vlm.py index b70389be..25d81d56 100644 --- a/tests/test_vlm.py +++ b/tests/test_vlm.py @@ -4,6 +4,7 @@ from nexa.gguf.llama import llama from nexa.gguf.llama.llama_chat_format import NanoLlavaChatHandler from tests.utils import download_model +from nexa.gguf.lib_utils import is_gpu_available def image_to_base64_data_uri(file_path): """ @@ -31,7 +32,7 @@ def test_image_generation(): model_path=model_path, chat_handler=chat_handler, n_ctx=2048, # n_ctx should be increased to accommodate the image embedding - n_gpu_layers=-1, # Uncomment to use GPU acceleration + n_gpu_layers=-1 if is_gpu_available() else 0, # Uncomment to use GPU acceleration verbose=False, ) output = llm.create_chat_completion(