Skip to content

Commit

Permalink
next refactor
Browse files Browse the repository at this point in the history
Signed-off-by: greg pereira <[email protected]>
  • Loading branch information
Gregory-Pereira committed May 28, 2024
1 parent eeb65e8 commit bb044d9
Show file tree
Hide file tree
Showing 11 changed files with 551 additions and 91 deletions.
7 changes: 0 additions & 7 deletions milvus/build/merlinite-qq.sh

This file was deleted.

29 changes: 29 additions & 0 deletions milvus/seed/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
RAG application with ILAB

1. setup a vector DB (Milvus)

Development story:
0. Starting Goal:
- Naive RAG no KG aided
- Addition:

Check failure on line 8 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:8:20 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
1. identify what the model lacks knowledge in

Check failure on line 9 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:9:50 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
2. Can I use the interal trained model or do I have to use the HF model
-

Check failure on line 11 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:11:10 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md

- UI integration

-----------------------------------------------

variable definition
class Config

_identify_params,

Check failure on line 20 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:20:18 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
_llm_type, _extract_token_usage,

Check failure on line 21 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:21:33 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md

Inherint in defining this spec which could eventually live as a contribution to langchain are some assumptions / questions I made:
- Is the model serializable: Assumed no
- Max tokens for merlinite and granite: Both assumed 4096
- Does this model have attention / memmory?
- Does these models have a verbosity option for output?
- Recomended default values:
-

Check failure on line 29 in milvus/seed/README.md

View workflow job for this annotation

GitHub Actions / markdown-lint

Trailing spaces

milvus/seed/README.md:29:10 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md009.md
Binary file not shown.
Binary file not shown.
98 changes: 53 additions & 45 deletions milvus/seed/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import httpx
import requests
import json
import os
import ssl
from ilab_model import IlabLLM
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain

load_dotenv()

# manage ENV
model_endpoint=os.getenv('MODEL_ENDPOINT')
Expand All @@ -11,48 +17,50 @@
if model_name == "":
model_name = "ibm/merlinite-7b"

model_token=os.getenv('MODEL_TOKEN')
model_token=os.getenv('ILAB_API_TOKEN')

# HTTPS client
client_key_path = "/home/fedora/client-tls-key.pem2"
client_crt_path = "/home/fedora/client-tls-crt.pem2"
server_ca_crt = "/home/fedora/server-ca-crt.pem2"

ssl_context = ssl.create_default_context(cafile=server_ca_crt)
ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)

client = httpx.Client(verify=ssl_context)


def get_openai_response(prompt, **kwargs):
url = model_endpoint
headers = {
"Authorization": f"Bearer {model_token}",
"Content-Type": "application/json"
}
data = {
"model": model_name,
"max_tokens": 4096,
"messages": [
{
"role": "system",
"content": "You are an AI language model developed by IBM Research. You are a cautious assistant that carefully follows instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
},
{
"role":"user",
"content": prompt
}
],
"logprobs":False,
"stream":False
}

response = client.post(url, json=data, headers=headers)
response.raise_for_status()
return response.json()

question = """ Question: I am training for an upcoming marathon but I am completely out of shape! Can you help me to implement a plan to prepare me for running a marathon in 12 weeks?
Answer: Let's think step by step. """

# get_openai_response(question)
# client_key_path = "/home/fedora/client-tls-key.pem2"
# client_crt_path = "/home/fedora/client-tls-crt.pem2"
# server_ca_crt = "/home/fedora/server-ca-crt.pem2"

# ssl_context = ssl.create_default_context(cafile=server_ca_crt)
# ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)

# client = httpx.Client(verify=ssl_context)

# data = {
# "model": "instructlab/granite-7b-lab",
# "messages": [
# {"role": "system", "content": "your name is carl"},
# {"role": "user", "content": "what is your name?"}
# ],
# "temperature": 1,
# "max_tokens": 1792,
# "top_p": 1,
# "repetition_penalty": 1.05,
# "stop": ["<|endoftext|>"],
# "logprobs": False,
# "stream": False
# }

# response = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
# print(response.json())
print(f'model_name={model_name}')
llm = IlabLLM(
model_endpoint=model_endpoint,
model_name=model_name,
apikey=model_token,
temperature=1,
max_tokens=500,
top_p=1,
repetition_penalty=1.05,
stop=["<|endoftext|>"],
streaming=False
)

prompt="I am training for a marathon in 12 weeks. Can you help me build an exercise plan to help prepare myself?"
prompts=[prompt]
# prompt_template = PromptTemplate.from_template(prompt)
llm.generate(prompts)
# llm.invoke("dog")
40 changes: 40 additions & 0 deletions milvus/seed/dumb_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import requests
import json
import os
from dotenv import load_dotenv

load_dotenv()

# manage ENV
model_endpoint=os.getenv('MODEL_ENDPOINT')
if model_endpoint == "":
model_endpoint = "http://localhost:8001"

model_name=os.getenv('MODEL_NAME')
if model_name == "":
model_name = "ibm/merlinite-7b"

model_token=os.getenv('MODEL_TOKEN')

headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {model_token}"
}

data = {
"model": model_name,
"messages": [
{"role": "system", "content": "your name is carl"},
{"role": "user", "content": "what is your name?"}
],
"temperature": 1,
"max_tokens": 1792,
"top_p": 1,
"repetition_penalty": 1.05,
"stop": ["<|endoftext|>"],
"logprobs": False,
"stream": False
}

response = requests.post(model_endpoint, headers=headers, data=json.dumps(data), verify=False)
print(response.json())
Loading

0 comments on commit bb044d9

Please sign in to comment.