next refactor

Signed-off-by: greg pereira <[email protected]>
instructlab · May 28, 2024 · bb044d9 · bb044d9
1 parent eeb65e8
commit bb044d9
Show file tree

Hide file tree

Showing 11 changed files with 551 additions and 91 deletions.
diff --git a/milvus/build/merlinite-qq.sh b/milvus/build/merlinite-qq.sh
diff --git a/milvus/seed/README.md b/milvus/seed/README.md
@@ -0,0 +1,29 @@
+RAG application with ILAB
+
+1. setup a vector DB (Milvus)
+
+Development story:
+    0. Starting Goal:
+        - Naive RAG no KG aided
+        - Addition: 
+    1. identify what the model lacks knowledge in 
+    2. Can I use the interal trained model or do I have to use the HF model
+        - 
+
+- UI integration
+
+-----------------------------------------------
+
+variable definition
+class Config
+
+_identify_params, 
+_llm_type, _extract_token_usage, 
+
+Inherint in defining this spec which could eventually live as a contribution to langchain are some assumptions / questions I made:
+    - Is the model serializable: Assumed no
+    - Max tokens for merlinite and granite: Both assumed 4096
+    - Does this model have attention / memmory?
+    - Does these models have a verbosity option for output?
+    - Recomended default values:
+        - 
diff --git a/milvus/seed/__pycache__/ilab_model.cpython-311.pyc b/milvus/seed/__pycache__/ilab_model.cpython-311.pyc
diff --git a/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc b/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc
diff --git a/milvus/seed/client.py b/milvus/seed/client.py
@@ -1,6 +1,12 @@
-import httpx
+import requests
+import json
 import os
-import ssl
+from ilab_model import IlabLLM
+from dotenv import load_dotenv
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import LLMChain
+
+load_dotenv()
 
 # manage ENV
 model_endpoint=os.getenv('MODEL_ENDPOINT')
@@ -11,48 +17,50 @@
 if model_name == "":
     model_name = "ibm/merlinite-7b"
 
-model_token=os.getenv('MODEL_TOKEN')
+model_token=os.getenv('ILAB_API_TOKEN')
 
 # HTTPS client
-client_key_path = "/home/fedora/client-tls-key.pem2"
-client_crt_path = "/home/fedora/client-tls-crt.pem2"
-server_ca_crt   = "/home/fedora/server-ca-crt.pem2"
-
-ssl_context = ssl.create_default_context(cafile=server_ca_crt)
-ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)
-
-client = httpx.Client(verify=ssl_context)
-
-
-def get_openai_response(prompt, **kwargs):
-    url = model_endpoint
-    headers = {
-        "Authorization": f"Bearer {model_token}",
-        "Content-Type": "application/json"
-    }
-    data = {
-        "model": model_name,
-        "max_tokens": 4096,
-        "messages": [
-            {
-                "role": "system",
-                "content": "You are an AI language model developed by IBM Research. You are a cautious assistant that carefully follows instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
-            },
-            {
-                "role":"user",
-                "content": prompt
-            }
-        ],
-        "logprobs":False,
-        "stream":False
-    }
-
-    response = client.post(url, json=data, headers=headers)
-    response.raise_for_status()
-    return response.json()
-
-question = """ Question: I am training for an upcoming marathon but I am completely out of shape! Can you help me to implement a plan to prepare me for running a marathon in 12 weeks?
-
-Answer: Let's think step by step. """
-
-# get_openai_response(question)
+# client_key_path = "/home/fedora/client-tls-key.pem2"
+# client_crt_path = "/home/fedora/client-tls-crt.pem2"
+# server_ca_crt   = "/home/fedora/server-ca-crt.pem2"
+
+# ssl_context = ssl.create_default_context(cafile=server_ca_crt)
+# ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)
+
+# client = httpx.Client(verify=ssl_context)
+
+# data = {
+#     "model": "instructlab/granite-7b-lab",
+#     "messages": [
+#         {"role": "system", "content": "your name is carl"},
+#         {"role": "user", "content": "what is your name?"}
+#     ],
+#     "temperature": 1,
+#     "max_tokens": 1792,
+#     "top_p": 1,
+#     "repetition_penalty": 1.05,
+#     "stop": ["<|endoftext|>"],
+#     "logprobs": False,
+#     "stream": False
+# }
+
+# response = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
+# print(response.json())
+print(f'model_name={model_name}')
+llm = IlabLLM(
+    model_endpoint=model_endpoint,
+    model_name=model_name,
+    apikey=model_token,
+    temperature=1,
+    max_tokens=500,
+    top_p=1,
+    repetition_penalty=1.05,
+    stop=["<|endoftext|>"],
+    streaming=False
+)
+
+prompt="I am training for a marathon in 12 weeks. Can you help me build an exercise plan to help prepare myself?"
+prompts=[prompt]
+# prompt_template = PromptTemplate.from_template(prompt)
+llm.generate(prompts)
+# llm.invoke("dog")
diff --git a/milvus/seed/dumb_client.py b/milvus/seed/dumb_client.py
@@ -0,0 +1,40 @@
+import requests
+import json
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# manage ENV
+model_endpoint=os.getenv('MODEL_ENDPOINT')
+if model_endpoint == "":
+    model_endpoint = "http://localhost:8001"
+
+model_name=os.getenv('MODEL_NAME')
+if model_name == "":
+    model_name = "ibm/merlinite-7b"
+
+model_token=os.getenv('MODEL_TOKEN')
+
+headers = {
+    "Content-Type": "application/json",
+    "Authorization": f"Bearer {model_token}"
+}
+
+data = {
+    "model": model_name,
+    "messages": [
+        {"role": "system", "content": "your name is carl"},
+        {"role": "user", "content": "what is your name?"}
+    ],
+    "temperature": 1,
+    "max_tokens": 1792,
+    "top_p": 1,
+    "repetition_penalty": 1.05,
+    "stop": ["<|endoftext|>"],
+    "logprobs": False,
+    "stream": False
+}
+
+response = requests.post(model_endpoint, headers=headers, data=json.dumps(data), verify=False)
+print(response.json())