Skip to content

Commit

Permalink
Merge pull request #13 from clevaway/develop
Browse files Browse the repository at this point in the history
refactored the tts and stt in one file called Interlocus for better management
  • Loading branch information
FotieMConstant authored Jul 12, 2024
2 parents 0502d35 + da76aa1 commit fef9856
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 149 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTOR.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pip install -r llm/llama.cpp/requirements.txt
> Note: some model architectures require using specific convert scripts. For example, Qwen models require running `convert-hf-to-gguf.py` instead of `convert.py`
```bash
python llm/llama.cpp/convert.py ./model --outtype f16 --outfile converted.bin
python llm/llama.cpp/convert.py ../jarvis-hf --outtype f16 --outfile converted.bin
```

### Quantize the model
Expand Down
10 changes: 9 additions & 1 deletion Modelfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
FROM ./ollama/quantized.bin
TEMPLATE "<s>[INST] {{ .Prompt }} [/INST]"
## TEMPLATE "<s>[INST] {{ .Prompt }} [/INST]"
TEMPLATE """[INST] <<SYS>>Always address the user as "Sir" or by their name if they have provided it.<</SYS>>
{{ .Prompt }}
[/INST]
"""
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"
PARAMETER stop "<<SYS>>"
PARAMETER stop "<</SYS>>"
51 changes: 24 additions & 27 deletions dataset/prepare_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 107,
"execution_count": 28,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -42,19 +42,19 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" text\n",
"0 ### Human: Who are you? ### Assistant: I am Ja...\n",
"1 ### Human: Introduce yourself. ### Assistant: ...\n",
"2 ### Human: What is your purpose ### Assistant:...\n",
"3 ### Human: What are you? ### Assistant: I am a...\n",
"4 ### Human: Introduce yourself. ### Assistant: ...\n"
" text\n",
"225 ### Human: Do you agree with this approach? ##...\n",
"226 ### Human: Will you notify me of any updates? ...\n",
"227 ### Human: Do you have any concerns? ### Assis...\n",
"228 ### Human: Jarvis, are you up? ### Assistant: ...\n",
"229 ### Human: Jarvis, you there? ### Assistant: A...\n"
]
}
],
Expand All @@ -68,12 +68,12 @@
"dataset = pd.read_csv('raw_dataset.csv')\n",
"\n",
"# Print the first few rows of the dataset\n",
"print(dataset.head())"
"print(dataset.tail())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -83,7 +83,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 35,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -144,7 +144,7 @@
"4 ### Human: Introduce yourself. ### Assistant: ..."
]
},
"execution_count": 4,
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -170,19 +170,17 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/fotiem.constant/anaconda3/envs/ml-env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"Downloading readme: 100%|██████████| 21.0/21.0 [00:00<00:00, 53.7kB/s]\n",
"Downloading data: 100%|██████████| 16.7k/16.7k [00:00<00:00, 28.1kB/s]\n",
"Generating train split: 229 examples [00:00, 98877.46 examples/s]\n",
"Map: 100%|██████████| 229/229 [00:00<00:00, 18746.50 examples/s]\n"
"Downloading readme: 100%|██████████| 21.0/21.0 [00:00<00:00, 245kB/s]\n",
"Downloading data: 100%|██████████| 16.4k/16.4k [00:00<00:00, 28.8kB/s]\n",
"Generating train split: 230 examples [00:00, 55633.79 examples/s]\n",
"Map: 100%|██████████| 230/230 [00:00<00:00, 13459.79 examples/s]\n"
]
}
],
Expand Down Expand Up @@ -226,7 +224,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 37,
"metadata": {},
"outputs": [
{
Expand All @@ -235,7 +233,7 @@
"text": [
"Dataset({\n",
" features: ['text'],\n",
" num_rows: 229\n",
" num_rows: 230\n",
"})\n"
]
}
Expand All @@ -253,25 +251,24 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 535.88ba/s]\n",
"Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00, 1.06s/it]\n",
"README.md: 100%|██████████| 280/280 [00:00<00:00, 846kB/s]\n"
"Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 605.59ba/s]\n",
"Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00, 1.06s/it]\n"
]
},
{
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/datasets/fotiecodes/jarvis-llama2-dataset/commit/70ae877f33efa0eb31663ded334ebab51153309b', commit_message='Upload dataset', commit_description='', oid='70ae877f33efa0eb31663ded334ebab51153309b', pr_url=None, pr_revision=None, pr_num=None)"
"CommitInfo(commit_url='https://huggingface.co/datasets/fotiecodes/jarvis-llama2-dataset/commit/8e59d8ebe825853f91f119c45739da5f6bc3d12a', commit_message='Upload dataset', commit_description='', oid='8e59d8ebe825853f91f119c45739da5f6bc3d12a', pr_url=None, pr_revision=None, pr_num=None)"
]
},
"execution_count": 7,
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
12 changes: 6 additions & 6 deletions dataset/process_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -206,7 +206,7 @@
"29 ### Human: What can you tell me about your cre..."
]
},
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -219,7 +219,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -229,7 +229,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -290,7 +290,7 @@
"4 ### Human: Introduce yourself. ### Assistant: ..."
]
},
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
Loading

0 comments on commit fef9856

Please sign in to comment.