-
Notifications
You must be signed in to change notification settings - Fork 0
/
embedding.py
49 lines (41 loc) · 1.7 KB
/
embedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import chromadb
import ollama
# 使用embdding模型向量化文档,并存在向量数据库中
# 使用embdding模型向量化提示词
# 使用向量数据库查询相似度最近的记录
# 将查到的向量记录与提示词合并,调用大模型生成结果
embedding_model = 'mxbai-embed-large'
documents = [
"在一个寒冷的冬天,赶集完回家的农夫在路边发现了一条冻僵了的蛇。", "他很可怜蛇,就把它放在怀里。",
"当他身上的热气把蛇温暖以后,蛇很快苏醒了,露出了残忍的本性,给了农夫致命的伤害——咬了农夫一口。",
"农夫临死之前说:“我竟然救了一条可怜的毒蛇,就应该受到这种报应啊!",
]
prompt = "农夫死前在想什么?"
dbclient = chromadb.Client()
collection = dbclient.create_collection(name="docs")
ollama = ollama.Client(host="http://localhost:11434")
# store each document in a vector embedding database
for i, d in enumerate(documents):
response = ollama.embeddings(
model=embedding_model,
prompt='Llamas are members of the camelid family',
)
embedding = response["embedding"]
collection.add(ids=[str(i)], embeddings=[embedding], documents=[d])
# generate an embedding for the prompt and retrieve the most relevant doc
response = ollama.embeddings(
prompt=prompt,
model="mxbai-embed-large"
)
results = collection.query(
query_embeddings=[response["embedding"]],
n_results=1
)
data = results['documents'][0][0]
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
model="llama3.1",
prompt=f"使用数据: {data}. 回复问题: {prompt}"
)
print(output['response'])
print(data)