diff --git a/milvus/build/merlinite-qq.sh b/milvus/build/merlinite-qq.sh
deleted file mode 100755
index a70f526..0000000
--- a/milvus/build/merlinite-qq.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-## EXPECTED INPUT IS STRING ECAPSULATED
-input="$1"
-echo "input: $input"
-request_body='{"model":"ibm/merlinite-7b","logprobs":false,"messages":[{"role": "system","content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."},{"role":"user","content": "'$input'"}],"stream":false}'
-echo $request_body
-curl -X 'POST' 'https://merlinite-7b-vllm-openai.apps.fmaas-backend.fmaas.res.ibm.com/v1/chat/completions' -H 'accept: application/json' -H 'Content-Type: application/json' -k -d $request_body
diff --git a/milvus/seed/README.md b/milvus/seed/README.md
index e69de29..5df33a2 100644
--- a/milvus/seed/README.md
+++ b/milvus/seed/README.md
@@ -0,0 +1,29 @@
+RAG application with ILAB
+
+1. setup a vector DB (Milvus)
+
+Development story:
+    0. Starting Goal:
+        - Naive RAG no KG aided
+        - Addition: 
+    1. identify what the model lacks knowledge in 
+    2. Can I use the interal trained model or do I have to use the HF model
+        - 
+
+- UI integration
+
+-----------------------------------------------
+
+variable definition
+class Config
+
+_identify_params, 
+_llm_type, _extract_token_usage, 
+
+Inherint in defining this spec which could eventually live as a contribution to langchain are some assumptions / questions I made:
+    - Is the model serializable: Assumed no
+    - Max tokens for merlinite and granite: Both assumed 4096
+    - Does this model have attention / memmory?
+    - Does these models have a verbosity option for output?
+    - Recomended default values:
+        - 
\ No newline at end of file
diff --git a/milvus/seed/__pycache__/ilab_model.cpython-311.pyc b/milvus/seed/__pycache__/ilab_model.cpython-311.pyc
new file mode 100644
index 0000000..2b8da03
Binary files /dev/null and b/milvus/seed/__pycache__/ilab_model.cpython-311.pyc differ
diff --git a/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc b/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc
new file mode 100644
index 0000000..19d0734
Binary files /dev/null and b/milvus/seed/__pycache__/merlinite_model.cpython-311.pyc differ
diff --git a/milvus/seed/client.py b/milvus/seed/client.py
index 0e23b13..53d1e4e 100644
--- a/milvus/seed/client.py
+++ b/milvus/seed/client.py
@@ -1,6 +1,12 @@
-import httpx
+import requests
+import json
 import os
-import ssl
+from ilab_model import IlabLLM
+from dotenv import load_dotenv
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import LLMChain
+
+load_dotenv()
 
 # manage ENV
 model_endpoint=os.getenv('MODEL_ENDPOINT')
@@ -11,48 +17,50 @@
 if model_name == "":
     model_name = "ibm/merlinite-7b"
 
-model_token=os.getenv('MODEL_TOKEN')
+model_token=os.getenv('ILAB_API_TOKEN')
 
 # HTTPS client
-client_key_path = "/home/fedora/client-tls-key.pem2"
-client_crt_path = "/home/fedora/client-tls-crt.pem2"
-server_ca_crt   = "/home/fedora/server-ca-crt.pem2"
-
-ssl_context = ssl.create_default_context(cafile=server_ca_crt)
-ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)
-
-client = httpx.Client(verify=ssl_context)
-
-
-def get_openai_response(prompt, **kwargs):
-    url = model_endpoint
-    headers = {
-        "Authorization": f"Bearer {model_token}",
-        "Content-Type": "application/json"
-    }
-    data = {
-        "model": model_name,
-        "max_tokens": 4096,
-        "messages": [
-            {
-                "role": "system",
-                "content": "You are an AI language model developed by IBM Research. You are a cautious assistant that carefully follows instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
-            },
-            {
-                "role":"user",
-                "content": prompt
-            }
-        ],
-        "logprobs":False,
-        "stream":False
-    }
-
-    response = client.post(url, json=data, headers=headers)
-    response.raise_for_status()
-    return response.json()
-
-question = """ Question: I am training for an upcoming marathon but I am completely out of shape! Can you help me to implement a plan to prepare me for running a marathon in 12 weeks?
-
-Answer: Let's think step by step. """
-
-# get_openai_response(question)
+# client_key_path = "/home/fedora/client-tls-key.pem2"
+# client_crt_path = "/home/fedora/client-tls-crt.pem2"
+# server_ca_crt   = "/home/fedora/server-ca-crt.pem2"
+
+# ssl_context = ssl.create_default_context(cafile=server_ca_crt)
+# ssl_context.load_cert_chain(certfile=client_crt_path, keyfile=client_key_path)
+
+# client = httpx.Client(verify=ssl_context)
+
+# data = {
+#     "model": "instructlab/granite-7b-lab",
+#     "messages": [
+#         {"role": "system", "content": "your name is carl"},
+#         {"role": "user", "content": "what is your name?"}
+#     ],
+#     "temperature": 1,
+#     "max_tokens": 1792,
+#     "top_p": 1,
+#     "repetition_penalty": 1.05,
+#     "stop": ["<|endoftext|>"],
+#     "logprobs": False,
+#     "stream": False
+# }
+
+# response = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
+# print(response.json())
+print(f'model_name={model_name}')
+llm = IlabLLM(
+    model_endpoint=model_endpoint,
+    model_name=model_name,
+    apikey=model_token,
+    temperature=1,
+    max_tokens=500,
+    top_p=1,
+    repetition_penalty=1.05,
+    stop=["<|endoftext|>"],
+    streaming=False
+)
+
+prompt="I am training for a marathon in 12 weeks. Can you help me build an exercise plan to help prepare myself?"
+prompts=[prompt]
+# prompt_template = PromptTemplate.from_template(prompt)
+llm.generate(prompts)
+# llm.invoke("dog")
diff --git a/milvus/seed/dumb_client.py b/milvus/seed/dumb_client.py
new file mode 100644
index 0000000..e08c912
--- /dev/null
+++ b/milvus/seed/dumb_client.py
@@ -0,0 +1,40 @@
+import requests
+import json
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# manage ENV
+model_endpoint=os.getenv('MODEL_ENDPOINT')
+if model_endpoint == "":
+    model_endpoint = "http://localhost:8001"
+
+model_name=os.getenv('MODEL_NAME')
+if model_name == "":
+    model_name = "ibm/merlinite-7b"
+
+model_token=os.getenv('MODEL_TOKEN')
+
+headers = {
+    "Content-Type": "application/json",
+    "Authorization": f"Bearer {model_token}"
+}
+
+data = {
+    "model": model_name,
+    "messages": [
+        {"role": "system", "content": "your name is carl"},
+        {"role": "user", "content": "what is your name?"}
+    ],
+    "temperature": 1,
+    "max_tokens": 1792,
+    "top_p": 1,
+    "repetition_penalty": 1.05,
+    "stop": ["<|endoftext|>"],
+    "logprobs": False,
+    "stream": False
+}
+
+response = requests.post(model_endpoint, headers=headers, data=json.dumps(data), verify=False)
+print(response.json())
\ No newline at end of file
diff --git a/milvus/seed/ilab_model.py b/milvus/seed/ilab_model.py
new file mode 100644
index 0000000..bc0b009
--- /dev/null
+++ b/milvus/seed/ilab_model.py
@@ -0,0 +1,372 @@
+#!/bin/python3
+
+## This is a langchain compatabible implementation for the Ilab models. It will remain in this repo until we publish APIKey 
+## functionality and route backendservice endpoints through a proxy that can be exposed, similary to openAI. At which point
+## we can move this pr as a contribution to langchain and easily scale our usage!
+
+### Fixes in progress: 
+    ### - override self params with calls invoke or generate for temperature, etc.
+    ### - test that invoke works, generate starts
+    ### - Feat: streaming implementation
+    ### - Callbacks with streaming
+    ### - Authentication enablement via user and password rather than just API keys
+    ### - Authentication checking for API keys (whole backend API setup)
+    ### - Utilize tags and metadata with langserve
+    ### - Allow logprobs as an option
+
+import os
+import httpx
+import requests
+import json
+from langchain_core.language_models.llms import BaseLLM
+from dotenv import load_dotenv
+from langchain_core.outputs import Generation, LLMResult
+from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
+from langchain_core.utils import (
+    convert_to_secret_str,
+    get_from_dict_or_env,
+    get_pydantic_field_names,
+)
+from langchain_core.utils.utils import build_extra_kwargs
+
+load_dotenv()
+from typing import (
+    Any,
+    Dict,
+    List,
+    Set,
+    Optional,
+    Mapping
+)
+
+class IlabLLM(BaseLLM):
+    """
+    Instructlab large language model.
+
+    As this model is currently private, you must have pre-arranged access.
+    """
+
+    # REQUIRED PARAMS
+
+    model_endpoint: str = ""
+    """The model Endpoint to Use"""
+
+    model_name: str = Field(alias="model")
+    """Type of deployed model to use."""
+
+    # OPTIONAL BUT DEFAULTS
+
+    system_prompt: Optional[str] = "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
+    """Default system prompt to use."""
+
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any model parameters valid for `create` call not explicitly specified."""
+
+    max_tokens: int = 4096
+    """The maximum number of tokens to generate in the completion.
+    -1 returns as many tokens as possible given the prompt and
+    the models maximal context size."""
+
+    # TOTALLY OPTIONAL
+
+    apikey: Optional[SecretStr] = None
+    """Apikey to the Ilab model APIs (merlinte or granite)"""
+
+    top_p: Optional[float] = 1
+    """Total probability mass of tokens to consider at each step."""
+
+    frequency_penalty: Optional[float] = 0
+    """Penalizes repeated tokens according to frequency."""
+
+    repetition_penalty: Optional[float] = 0
+    """Penalizes repeated tokens."""
+
+    temperature: Optional[float] = 0.7
+    """What sampling temperature to use."""
+
+    # verbose: Optional[str] = None
+    # """If the model should return verbose output or standard"""
+
+    streaming: bool = False
+    """ Whether to stream the results or not. """
+
+    # FUTURE EXTENSIONS
+
+    tags: Optional[List[str]] = None
+    """Tags to add to the run trace."""
+
+    metadata: Optional[Dict[str, Any]] = None
+    """Metadata to add to the run trace."""
+
+    # This gets implemented with stream
+    # callbacks: Optional[SecretStr] = None
+    # """callbacks"""
+
+    # END PARMS
+
+    class Config:
+        """Configuration for this pydantic object."""
+        allow_population_by_field_name = True
+
+    @property
+    def lc_secrets(self) -> Dict[str, str]:
+        """A map of constructor argument names to secret ids.
+
+        For example:
+            {
+                "apikey": "ILAB_API_KEY",
+            }
+        """
+        return {
+            "apikey": "ILAB_API_KEY",
+        }
+    
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether this model can be serialized by Langchain."""
+        return False
+    
+    @root_validator(pre=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = get_pydantic_field_names(cls)
+        extra = values.get("model_kwargs", {})
+        values["model_kwargs"] = build_extra_kwargs(
+            extra, values, all_required_field_names
+        )
+        return values
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        if values["streaming"] == True:
+            raise ValueError("streaming has not yet been implemented.")
+        if values["apikey"] or "ILAB_API_KEY" in os.environ:
+            values["apikey"] = convert_to_secret_str(
+                get_from_dict_or_env(values, "apikey", "ILAB_API_KEY")
+            )
+        values['model_name'] = get_from_dict_or_env(
+            values,
+            "model_name",
+            "MODEL_NAME",
+        )
+        ## extension for more options for required auth params
+        ## client_params = {
+        ##     "api_key": (
+        ##         values["apikey"].get_secret_value()
+        ##         if values["apikey"]
+        ##         else None
+        ##     )
+        ## }
+        # CURRENTLY WE DONT CHECK KEYS
+        ## if not client_params['values']['apikey']:
+        ##     raise ValueError("Did not find token `apikey`.")
+        return  values
+
+    @property
+    def _params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        params = {**{
+            "model_name": self.model_name,
+            "model_endpoint": self.model_endpoint,
+        }, **self._default_params}
+        if self.apikey:
+            params['apikey'] = self.apikey
+        if self.model_name:
+            params['model_name'] = self.model_name
+        return params
+    
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling Merlinite API."""
+        normal_params: Dict[str, Any] = {
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.repetition_penalty,
+        }
+
+        if self.max_tokens is not None:
+            normal_params["max_tokens"] = self.max_tokens
+
+        return {**normal_params, **self.model_kwargs}
+    
+
+    def _invocation_params(self) -> Dict[str, Any]:
+        """Get the parameters used to invoke the model."""
+        return self._params
+    
+    def make_request(self, params: Dict[str, Any], prompt: str, stop: Optional[List[str]]) -> Dict[str, Any]:
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.apikey}"
+        }
+        
+        data = {
+            "model": params['model_name'],
+            "messages": [
+                {
+                    "role": "system",
+                    "content": self.system_prompt
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            "temperature": params['temperature'],
+            "max_tokens": params['max_tokens'],
+            "top_p": params['top_p'],
+            "stop": stop,
+            "logprobs": False,
+        }
+
+        if 'repetition_penalty' in params:
+            data["repetition_penalty"] = params['repetition_penalty']
+
+        if 'streaming' in params:
+            # Shadowing basemodel re-route for streaming
+            data["stream"] = params["streaming"]
+
+        response = requests.post(self.model_endpoint, headers=headers, data=json.dumps(data), verify=False)
+        response_json = response.json()
+    
+    def _call(self, prompt: str, stop:Optional[List[str]] = None, **kwargs: Any) -> str:
+        """Call the ilab inference endpoint. The result of invoke.
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+            run_manager: Optional callback manager.
+        Returns:
+            The string generated by the model.
+        Example:
+            .. code-block:: python
+
+                response = merlinite.invoke("What is a molecule")
+        """
+
+        invocation_params = self._invocation_params
+        params = {**invocation_params, **kwargs}
+
+        if stop == None:
+            stop = ["<|endoftext|>"]
+        response_json = self.make_request(
+            params=params, prompt=prompt, stop=stop, **kwargs
+        )
+        return response_json['choices'][0]['messages']['content']
+
+    def _generate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Call out to Ilab's endpoint with prompt.
+
+        Args:
+            prompt: The prompt to pass into the model.
+            stop: Optional list of stop words to use when generating.
+            
+        Returns:
+            The full LLM output.
+
+        Example:
+            .. code-block:: python
+
+                response = ilab.generate(["Tell me a joke."])
+        """
+        
+        invocation_params = self._invocation_params()
+        params = {**invocation_params, **kwargs}
+        token_usage: Dict[str, int] = {}
+        system_fingerprint: Optional[str] = None
+
+        response_json = self.make_request(
+            params=params, prompt=prompts[0], stop=stop, **kwargs
+        )
+
+        if not ('choices' in response_json and len(response_json['choices']) > 0):
+            raise ValueError("No valid response from the model")
+
+        if response_json.get("error"):
+            raise ValueError(response_json.get("error"))
+
+        if not system_fingerprint:
+            system_fingerprint = response_json.get("system_fingerprint")
+        return self._create_llm_result(
+            response_json=response_json,
+        )
+
+    def _llm_type(self) -> str:
+        """Get the type of language model used by this chat model. Used for logging purposes only."""
+        return "instructlab"
+
+    @property
+    def max_context_size(self) -> int:
+        """Get max context size for this model."""
+        return self.modelname_to_contextsize(self.model_name)
+
+    def _create_llm_result(self, response: List[dict]) -> LLMResult:
+        """Create the LLMResult from the choices and prompt."""
+        generations = []
+        for res in response:
+            results = res.get("results")
+            if results:
+                finish_reason = results[0].get("choices")[0].get('finished_reason')
+                gen = Generation(
+                    text=results[0].get("choices")[0].get('message').get('content'),
+                    generation_info={"finish_reason": finish_reason},
+                )
+                generations.append([gen])
+        final_token_usage = self._extract_token_usage(response)
+        llm_output = {
+            "token_usage": final_token_usage,
+            "model_name": self.model_name
+        }
+        return LLMResult(generations=generations, llm_output=llm_output)
+
+    @staticmethod
+    def _extract_token_usage(
+        response: Optional[List[Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
+        if response is None:
+            return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+
+        prompt_tokens = 0
+        completion_tokens = 0
+        total_tokens = 0
+
+        def get_count_value(key: str, result: Dict[str, Any]) -> int:
+            return result.get(key, 0) or 0
+
+        for res in response:
+            results = res.get("results")
+            if results:
+                prompt_tokens += get_count_value("prompt_tokens", results[0])
+                completion_tokens += get_count_value(
+                    "completion_tokens", results[0]
+                )
+                total_tokens += get_count_value("total_tokens", results[0])
+
+        return {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens
+        }
+
+    @staticmethod
+    def modelname_to_contextsize(modelname: str) -> int:
+        """Calculate the maximum number of tokens possible to generate for a model."""
+        model_token_mapping = {
+            "ibm/merlinite-7b": 4096,
+            "instructlab/granite-7b-lab": 4096
+        }
+
+        context_size = model_token_mapping.get(modelname, None)
+
+        if context_size is None:
+            raise ValueError(
+                f"Unknown model: {modelname}. Please provide a valid Ilab model name."
+                "Known models are: " + ", ".join(model_token_mapping.keys())
+            )
+
+        return context_size
diff --git a/milvus/seed/new-seed.py b/milvus/seed/new-seed.py
deleted file mode 100644
index c6ea909..0000000
--- a/milvus/seed/new-seed.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-from pymilvus import MilvusClient, DataType
-from langchain_experimental.text_splitter import SemanticChunker
-from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings
-from tika import parser # pip install tika
-
-def log_step(step_num, step_name) -> None:
-    print("-----------------------------------------------")
-    print(f"{step_num}. {step_name}")
-    print("-----------------------------------------------")
-
-model_name = "ibm/merlinite-7b"
-model_kwargs = {"device": "cpu"}
-encode_kwargs = {"normalize_embeddings": True}
-
-log_step(0, "Generate embeddings")
-embeddings = HuggingFaceBgeEmbeddings(
-    model_name=model_name,
-    model_kwargs=model_kwargs,
-    encode_kwargs=encode_kwargs,
-    query_instruction = "search_query:",
-    embed_instruction = "search_document:"
-)
-
-
-# data_url = "https://orkerhulen.dk/onewebmedia/DnD%205e%20Players%20Handbook%20%28BnW%20OCR%29.pdf"
-# loader = WebBaseLoader(data_url)
-# data = loader.load()
-raw = parser.from_file("data/DnD-5e-Handbook.pdf")
-print(raw['content'])
diff --git a/milvus/seed/new_seed.py b/milvus/seed/new_seed.py
new file mode 100644
index 0000000..60311c7
--- /dev/null
+++ b/milvus/seed/new_seed.py
@@ -0,0 +1,41 @@
+import os
+from pymilvus import MilvusClient, DataType
+from langchain_community.vectorstores import Milvus
+from langchain_experimental.text_splitter import SemanticChunker
+from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
+from langchain import hub
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from tika import parser # pip install tika
+from langchain_openai import OpenAI
+from ilab_models import IlabOpenAILLM
+
+
+def log_step(step_num, step_name) -> None:
+    print("-----------------------------------------------")
+    print(f"{step_num}. {step_name}")
+    print("-----------------------------------------------")
+
+embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+
+text_splitter = SemanticChunker(embeddings=embeddings) # fails 
+
+loader = PyPDFLoader('./data/DnD-5e-Handbook.pdf')
+data = loader.load()
+split_data = text_splitter.split_documents(data)
+print(len(split_data))
+vector_store = Milvus.from_documents(
+    documents=split_data,
+    embedding=embeddings,
+    connection_args={"host": "localhost", "port": 19530},
+    collection_name="dnd"
+)
+
+llm = IlabOpenAILLM(
+    
+)
+
+retreiver = vector_store.as_retreiver()
+prompt = hub.pull("rlm/rag-prompt")
\ No newline at end of file
diff --git a/milvus/seed/requirements.txt b/milvus/seed/requirements.txt
index 297139d..431c4f8 100644
--- a/milvus/seed/requirements.txt
+++ b/milvus/seed/requirements.txt
@@ -7,3 +7,4 @@ langchain-experimental==0.0.59
 tika==2.6.0
 sentence-transformers==2.7.0
 beautifulsoup4==4.12.3
+python-dotenv==1.0.1
diff --git a/milvus/seed/seed.py b/milvus/seed/seed.py
index 09a0f4d..044158e 100644
--- a/milvus/seed/seed.py
+++ b/milvus/seed/seed.py
@@ -4,7 +4,7 @@
 from langchain_experimental.text_splitter import SemanticChunker
 from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
 from langchain import hub
 from langchain_core.runnables import RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
@@ -24,18 +24,25 @@ def milvus_init() -> MilvusClient:
 
 def fill_dnd_collection(text_splitter: any, embeddings: any) -> None:
     # local
-    raw = parser.from_file("data/DnD-5e-Handbook.pdf")
-    print(len(raw['content']))
-    docs = text_splitter.create_documents([raw['content']])
+    # raw = parser.from_file("data/DnD-5e-Handbook.pdf")
+    # print(len(raw['content']))
+    # docs = text_splitter.create_documents([raw['content']])
+    # vector_store = Milvus.from_documents(
+    #     docs,
+    #     embedding=embeddings,
+    #     connection_args={"host": "localhost", "port": 19530},
+    #     collection_name="dnd"
+    # )
+    # remote
+    loader = PyPDFLoader('https://orkerhulen.dk/onewebmedia/DnD%205e%20Players%20Handbook%20%28BnW%20OCR%29.pdf')
+    data = loader.load()
+    split_data = text_splitter.split_documents(data)
     vector_store = Milvus.from_documents(
-        docs,
+        documents=split_data,
         embedding=embeddings,
         connection_args={"host": "localhost", "port": 19530},
         collection_name="dnd"
     )
-    # remote
-    # loader = PyPDFLoader('https://orkerhulen.dk/onewebmedia/DnD%205e%20Players%20Handbook%20%28BnW%20OCR%29.pdf')
-    # data = loader.load()
 
 def generate_embeddings() -> any:
     # model_name = "ibm/merlinite-7b"