diff --git a/bpm_ai_inference/llm/llama_cpp/_constants.py b/bpm_ai_inference/llm/llama_cpp/_constants.py index a039d27..12d1afc 100644 --- a/bpm_ai_inference/llm/llama_cpp/_constants.py +++ b/bpm_ai_inference/llm/llama_cpp/_constants.py @@ -1,7 +1,7 @@ -DEFAULT_MODEL = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" +DEFAULT_MODEL = "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" DEFAULT_QUANT_LARGE = "*Q8_0.gguf" -DEFAULT_QUANT_BALANCED = "*Q4_K_M.gguf" +DEFAULT_QUANT_BALANCED = "*Q5_K_M.gguf" DEFAULT_QUANT_SMALL = "*Q2_K.gguf" DEFAULT_TEMPERATURE = 0.0 DEFAULT_MAX_RETRIES = 8 diff --git a/poetry.lock b/poetry.lock index 1a27149..9a400c2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "accelerate" -version = "0.29.3" +version = "0.30.0" description = "Accelerate" optional = false python-versions = ">=3.8.0" files = [ - {file = "accelerate-0.29.3-py3-none-any.whl", hash = "sha256:99d633d4b6126817c5e554487406748be95c8d1d1e659dd2fd60657e35f532dd"}, - {file = "accelerate-0.29.3.tar.gz", hash = "sha256:1a5a845b06b24b41736b219b2b20fd021ca5dff4070a252445fd6de736e347ac"}, + {file = "accelerate-0.30.0-py3-none-any.whl", hash = "sha256:cd93902dc37a49952b4b8b324c5d53776c936fcd77b4b0162cf5ce55a4fc2721"}, + {file = "accelerate-0.30.0.tar.gz", hash = "sha256:0f8e103a6ced54cd40af6bb2331ec922e42269d2778038dc564d53047b1386d2"}, ] [package.dependencies] @@ -21,14 +21,15 @@ safetensors = ">=0.3.1" torch = ">=1.10.0" [package.extras] -dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "deepspeed", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.2.1,<0.3.0)", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +deepspeed = ["deepspeed (<=0.14.0)"] +dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "diffusers", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.2.1,<0.3.0)", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.2.1,<0.3.0)"] rich = ["rich"] sagemaker = ["sagemaker"] -test-dev = ["bitsandbytes", "datasets", "deepspeed", "evaluate", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist"] test-trackers = ["comet-ml", "dvclive", "tensorboard", "wandb"] -testing = ["bitsandbytes", "datasets", "deepspeed", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] [[package]] name = "aiohttp" @@ -276,17 +277,17 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.34.94" +version = "1.34.98" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.94-py3-none-any.whl", hash = "sha256:bbb87d641c73462e53b1777083b55c8f13921618ad08757478a8122985c56c13"}, - {file = "boto3-1.34.94.tar.gz", hash = "sha256:22f65b3c9b7a419f8f39c2dddc421e14fab8cbb3bd8a9d467e874237d39f59b1"}, + {file = "boto3-1.34.98-py3-none-any.whl", hash = "sha256:030e43b8efe22b4cf10b9f3ef9e30cd4cf9ef9784b26efe9a4583339f2b2bcec"}, + {file = "boto3-1.34.98.tar.gz", hash = "sha256:28c10956033fa79e64529f48c3b62db86d5e4b77024a7343764b6bde6b553543"}, ] [package.dependencies] -botocore = ">=1.34.94,<1.35.0" +botocore = ">=1.34.98,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -295,13 +296,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.94" +version = "1.34.98" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.94-py3-none-any.whl", hash = "sha256:f00a79002e0cb9d6895ecd0919c506402850177d7b6c4d2634fa2da362d95bcb"}, - {file = "botocore-1.34.94.tar.gz", hash = "sha256:99b11be9a28f9051af4c96fa121e9c3f22a86d499abd773c9e868b2a38961bae"}, + {file = "botocore-1.34.98-py3-none-any.whl", hash = "sha256:631c0031d8ce922b5752ab395ead896a0281b0dc74745a754d0351a27c5d83de"}, + {file = "botocore-1.34.98.tar.gz", hash = "sha256:4cee65df02f4b0be08ad1401965cc89efafebc50ef0727d2d17083c7f1ed2831"}, ] [package.dependencies] @@ -1189,13 +1190,13 @@ socks = ["socksio (==1.*)"] [[package]] name = "huggingface-hub" -version = "0.22.2" +version = "0.23.0" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.22.2-py3-none-any.whl", hash = "sha256:3429e25f38ccb834d310804a3b711e7e4953db5a9e420cc147a5e194ca90fd17"}, - {file = "huggingface_hub-0.22.2.tar.gz", hash = "sha256:32e9a9a6843c92f253ff9ca16b9985def4d80a93fb357af5353f770ef74a81be"}, + {file = "huggingface_hub-0.23.0-py3-none-any.whl", hash = "sha256:075c30d48ee7db2bba779190dc526d2c11d422aed6f9044c5e2fdc2c432fdb91"}, + {file = "huggingface_hub-0.23.0.tar.gz", hash = "sha256:7126dedd10a4c6fac796ced4d87a8cf004efc722a5125c2c09299017fa366fa9"}, ] [package.dependencies] @@ -1208,16 +1209,16 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] hf-transfer = ["hf-transfer (>=0.1.4)"] inference = ["aiohttp", "minijinja (>=1.0)"] quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"] tensorflow = ["graphviz", "pydot", "tensorflow"] tensorflow-testing = ["keras (<3.0)", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] torch = ["safetensors", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] @@ -1312,13 +1313,13 @@ files = [ [[package]] name = "joblib" -version = "1.4.0" +version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" files = [ - {file = "joblib-1.4.0-py3-none-any.whl", hash = "sha256:42942470d4062537be4d54c83511186da1fc14ba354961a2114da91efa9a4ed7"}, - {file = "joblib-1.4.0.tar.gz", hash = "sha256:1eb0dc091919cd384490de890cb5dfd538410a6d4b3b54eef09fb8c50b409b1c"}, + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] [[package]] @@ -1450,17 +1451,17 @@ six = "*" [[package]] name = "langfuse" -version = "2.27.3" +version = "2.29.0" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langfuse-2.27.3-py3-none-any.whl", hash = "sha256:e5522abac56cb64b08fdcfc8a81317cb322b361cbd693f3fd7a19200e1fe5efc"}, - {file = "langfuse-2.27.3.tar.gz", hash = "sha256:4d6b9434702c3d729a8b898f07e6065a8026ba568bdae603813b4f0678a17884"}, + {file = "langfuse-2.29.0-py3-none-any.whl", hash = "sha256:e0bbb49ca4da3f8a4f51aab0ad47c975def6167e318fbb8cfefe7f457bd7a020"}, + {file = "langfuse-2.29.0.tar.gz", hash = "sha256:e18053f5670c4c1165d233dc620f0cb5f9890af60519ef0de38cff6cc2e22a38"}, ] [package.dependencies] -backoff = ">=2.2.1,<3.0.0" +backoff = ">=1.10.0" httpx = ">=0.15.4,<1.0" idna = ">=3.7,<4.0" packaging = ">=23.2,<24.0" @@ -1536,12 +1537,12 @@ test = ["pytest (==7.4.3)"] [[package]] name = "llama-cpp-python" -version = "0.2.67" +version = "0.2.69" description = "Python bindings for the llama.cpp library" optional = false python-versions = ">=3.8" files = [ - {file = "llama_cpp_python-0.2.67.tar.gz", hash = "sha256:29e64afbe86ea19fb9ed681f79215c1029bc229017dd278cef93e96de904cfb9"}, + {file = "llama_cpp_python-0.2.69.tar.gz", hash = "sha256:b37e864b4d9f7ac286a3e926d87afab2f136ae9290e11088f7a205b80d3c04a9"}, ] [package.dependencies] @@ -3853,13 +3854,13 @@ optree = ["optree (>=0.9.1)"] [[package]] name = "tqdm" -version = "4.66.2" +version = "4.66.4" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, - {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, + {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"}, + {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"}, ] [package.dependencies] @@ -4358,4 +4359,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "10f0d3da73777133251574f824c2d17cdbcb7f3f7a89cae1c87337adf089ff4b" +content-hash = "42aafa0a5c4b55482bd4f7de437bbd18a78a21ecc28847d7e562c3996530843a" diff --git a/pyproject.toml b/pyproject.toml index fb058c2..c709397 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bpm-ai-inference" -version = "0.3.3" +version = "0.3.4" description = "Inference and server for local AI implementations of bpm-ai-core abstractions." authors = ["Bennet Krause "] repository = "https://github.com/holunda-io/bpm-ai-inference" @@ -23,9 +23,7 @@ optimum = {extras = ["onnxruntime"], version = "^1.18.0"} gliner = "^0.1.6" scipy = "1.10.1" py-cpuinfo = "^9.0.0" -llama-cpp-python = "^0.2.65" -#fast-fit = "^1.2.0" -#jupyter = "^1.0.0" +llama-cpp-python = "^0.2.69" [tool.poetry.group.test.dependencies]