Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bringing in ai-labs/vector_dbs/milvus to compose stack #373

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ models
generated
.idea
.DS_Store
milvus/seed/data/*
milvus/build/volumes/milvus/*data*
*.venv
*venv

# UI assets
**/node_modules
Expand Down
2 changes: 2 additions & 0 deletions milvus/build/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM docker.io/milvusdb/milvus:master-20240426-bed6363f
ADD embedEtcd.yaml /milvus/configs/embedEtcd.yaml
55 changes: 55 additions & 0 deletions milvus/build/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
REGISTRY ?= quay.io
REGISTRY_ORG ?= ai-lab
COMPONENT = vector_dbs

IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/milvus:latest

ARCH ?= $(shell uname -m)
PLATFORM ?= linux/$(ARCH)

gRCP_PORT := 19530
REST_PORT := 9091
CLIENT_PORT := 2379

LIB_MILVUS_DIR_MOUNTPATH := $(shell pwd)/volumes/milvus

.PHONY: build
build:
podman build --platform $(PLATFORM) -f Containerfile -t ${IMAGE} .

.PHONY: run
run:
podman run -d \
--name milvus-standalone \
--security-opt seccomp:unconfined \
-e ETCD_USE_EMBED=true \
-e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \
-e COMMON_STORAGETYPE=local \
-v $(LIB_MILVUS_DIR_MOUNTPATH):/var/lib/milvus \
-p $(gRCP_PORT):$(gRCP_PORT) \
-p $(REST_PORT):$(REST_PORT) \
-p $(CLIENT_PORT):$(CLIENT_PORT) \
--health-cmd="curl -f http://localhost:$(REST_PORT)/healthz" \
--health-interval=30s \
--health-start-period=90s \
--health-timeout=20s \
--health-retries=3 \
$(IMAGE) \
milvus run standalone 1> /dev/null

.PHONY: stop
stop:
-podman stop milvus-standalone

.PHONY: delete
delete:
-podman rm milvus-standalone -f

.PHONY: podman-clean
podman-clean:
@container_ids=$$(podman ps -a --format "{{.ID}} {{.Image}}" | awk '$$2 == "$(IMAGE)" {print $$1}'); \
echo "removing all containers with IMAGE=$(IMAGE)"; \
for id in $$container_ids; do \
echo "Removing container: $$id,"; \
podman rm -f $$id; \
done
5 changes: 5 additions & 0 deletions milvus/build/embedEtcd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
listen-client-urls: http://0.0.0.0:2379
advertise-client-urls: http://0.0.0.0:2379
quota-backend-bytes: 4294967296
auto-compaction-mode: revision
auto-compaction-retention: '1000'
2 changes: 2 additions & 0 deletions milvus/build/volumes/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM docker.io/milvusdb/milvus:master-20240426-bed6363f
ADD embedEtcd.yaml /milvus/configs/embedEtcd.yaml
55 changes: 55 additions & 0 deletions milvus/build/volumes/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
REGISTRY ?= quay.io
REGISTRY_ORG ?= ai-lab
COMPONENT = vector_dbs

IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/milvus:latest

ARCH ?= $(shell uname -m)
PLATFORM ?= linux/$(ARCH)

gRCP_PORT := 19530
REST_PORT := 9091
CLIENT_PORT := 2379

LIB_MILVUS_DIR_MOUNTPATH := $(shell pwd)/volumes/milvus

.PHONY: build
build:
podman build --platform $(PLATFORM) -f Containerfile -t ${IMAGE} .

.PHONY: run
run:
podman run -it \
--name milvus-standalone \
--security-opt seccomp:unconfined \
-e ETCD_USE_EMBED=true \
-e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \
-e COMMON_STORAGETYPE=local \
-v $(LIB_MILVUS_DIR_MOUNTPATH):/var/lib/milvus \
-p $(gRCP_PORT):$(gRCP_PORT) \
-p $(REST_PORT):$(REST_PORT) \
-p $(CLIENT_PORT):$(CLIENT_PORT) \
--health-cmd="curl -f http://localhost:$(REST_PORT)/healthz" \
--health-interval=30s \
--health-start-period=90s \
--health-timeout=20s \
--health-retries=3 \
$(IMAGE) \
milvus run standalone 1> /dev/null

.PHONY: stop
stop:
-podman stop milvus-standalone

.PHONY: delete
delete:
-podman rm milvus-standalone -f

.PHONY: podman-clean
podman-clean:
@container_ids=$$(podman ps --format "{{.ID}} {{.Image}}" | awk '$$2 == "$(IMAGE)" {print $$1}'); \
echo "removing all containers with IMAGE=$(IMAGE)"; \
for id in $$container_ids; do \
echo "Removing container: $$id,"; \
podman rm -f $$id; \
done
5 changes: 5 additions & 0 deletions milvus/build/volumes/embedEtcd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
listen-client-urls: http://0.0.0.0:2379
advertise-client-urls: http://0.0.0.0:2379
quota-backend-bytes: 4294967296
auto-compaction-mode: revision
auto-compaction-retention: '1000'
Empty file.
3 changes: 3 additions & 0 deletions milvus/seed/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_NAME=
MODEL_ENDPOINT=
MODEL_TOKEN=
29 changes: 29 additions & 0 deletions milvus/seed/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
RAG application with ILAB

1. setup a vector DB (Milvus)

Development story:
0. Starting Goal:
- Naive RAG no KG aided
- Addition:

Check failure on line 8 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:8:20 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
1. identify what the model lacks knowledge in

Check failure on line 9 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:9:50 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
2. Can I use the interal trained model or do I have to use the HF model
-

Check failure on line 11 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:11:10 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md

- UI integration

-----------------------------------------------

variable definition
class Config

_identify_params,

Check failure on line 20 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:20:18 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
_llm_type, _extract_token_usage,

Check failure on line 21 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:21:33 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md

Inherint in defining this spec which could eventually live as a contribution to langchain are some assumptions / questions I made:
- Is the model serializable: Assumed no
- Max tokens for merlinite and granite: Both assumed 4096
- Does this model have attention / memmory?
- Does these models have a verbosity option for output?
- Recomended default values:
-

Check failure on line 29 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:29:10 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
Binary file not shown.
Binary file not shown.
66 changes: 66 additions & 0 deletions milvus/seed/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import requests
import json
import os
from ilab_model import IlabLLM
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain

load_dotenv()

# manage ENV
model_endpoint=os.getenv('MODEL_ENDPOINT')
if model_endpoint == "":
model_endpoint = "http://localhost:8001"

model_name=os.getenv('MODEL_NAME')
if model_name == "":
model_name = "ibm/merlinite-7b"

model_token=os.getenv('ILAB_API_TOKEN')

# HTTPS client
# client_key_path = "/home/fedora/client-tls-key.pem2"
# client_crt_path = "/home/fedora/client-tls-crt.pem2"
# server_ca_crt = "/home/fedora/server-ca-crt.pem2"

# ssl_context = ssl.create_default_context(cafile=server_ca_crt)
# ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)

# client = httpx.Client(verify=ssl_context)

# data = {
# "model": "instructlab/granite-7b-lab",
# "messages": [
# {"role": "system", "content": "your name is carl"},
# {"role": "user", "content": "what is your name?"}
# ],
# "temperature": 1,
# "max_tokens": 1792,
# "top_p": 1,
# "repetition_penalty": 1.05,
# "stop": ["<|endoftext|>"],
# "logprobs": False,
# "stream": False
# }

# response = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
# print(response.json())
print(f'model_name={model_name}')
llm = IlabLLM(
model_endpoint=model_endpoint,
model_name=model_name,
apikey=model_token,
temperature=1,
max_tokens=500,
top_p=1,
repetition_penalty=1.05,
stop=["<|endoftext|>"],
streaming=False
)

prompt="I am training for a marathon in 12 weeks. Can you help me build an exercise plan to help prepare myself?"
prompts=[prompt]
# prompt_template = PromptTemplate.from_template(prompt)
llm.generate(prompts)
# llm.invoke("dog")
40 changes: 40 additions & 0 deletions milvus/seed/dumb_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import requests
import json
import os
from dotenv import load_dotenv

load_dotenv()

# manage ENV
model_endpoint=os.getenv('MODEL_ENDPOINT')
if model_endpoint == "":
model_endpoint = "http://localhost:8001"

model_name=os.getenv('MODEL_NAME')
if model_name == "":
model_name = "ibm/merlinite-7b"

model_token=os.getenv('MODEL_TOKEN')

headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {model_token}"
}

data = {
"model": model_name,
"messages": [
{"role": "system", "content": "your name is carl"},
{"role": "user", "content": "what is your name?"}
],
"temperature": 1,
"max_tokens": 1792,
"top_p": 1,
"repetition_penalty": 1.05,
"stop": ["<|endoftext|>"],
"logprobs": False,
"stream": False
}

response = requests.post(model_endpoint, headers=headers, data=json.dumps(data), verify=False)
print(response.json())
Loading
Loading