-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrieve.py
120 lines (84 loc) · 3.24 KB
/
retrieve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from functools import lru_cache
import openai
import pinecone
import yaml
from langchain.llms import OpenAI
# from dotenv import dotenv_values
# config = dotenv_values(".env")
def load_config(file_path: str) -> dict:
with open(file_path, "r") as config_file:
return yaml.safe_load(config_file)
config = load_config("config.yaml")
openai.api_key = config["OPENAI_API_KEY"]
@lru_cache(maxsize=128, typed=False)
def get_embedding(text: str, model: str = "text-embedding-ada-002"):
response = openai.Embedding.create(input=text, model=model)
return response["data"][0]["embedding"]
# TODO: delete this (moved to doc utils)
@lru_cache(maxsize=128, typed=False)
def query_pinecone(index,
query_embedding_tuple,
top_k=5,
include_metadata=True):
# Convert the tuple back to a list
query_embedding = list(query_embedding_tuple)
response = index.query(query_embedding,
top_k=top_k,
include_metadata=include_metadata)
return response
def get_response_texts(response):
return [match["metadata"]["text"] for match in response["matches"]]
def generate_summary(prompt: str):
response_chat = OpenAI.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": prompt
},
],
)
return response_chat.choices[0].message["content"]
# format summary to handle how code is displayed
def format_summary(summary: str) -> str:
backtick_occurrences = summary.count("```")
formatted_summary = ""
start_position = 0
for i in range(backtick_occurrences):
end_position = summary.find("```", start_position)
if i % 2 == 0:
tag = '<div class="code-header"><button class="copy-btn">Copy</button></div><pre><code>'
else:
tag = "</code></pre>"
formatted_summary += summary[start_position:end_position] + tag
start_position = end_position + 3
formatted_summary += summary[start_position:]
return formatted_summary
def search_and_chat(search_query: str) -> list:
openai.api_key = config["OPENAI_API_KEY"]
pinecone.init(api_key=config["PINECONE_API_KEY"],
environment=config["PINECONE_ENVIRONMENT"])
index = pinecone.Index(config["PINECONE_INDEX_NAME"])
query_embeds = get_embedding(search_query)
response = query_pinecone(index, tuple(query_embeds))
print(response)
response_texts = get_response_texts(response)
combined_text = " ".join(response_texts)
prompt = f"""
I have gathered some relevant information to help answer your question. Here is the information:
{combined_text}
Based on this information, provide a detailed summary for topic {search_query}
"""
summary = generate_summary(prompt)
formatted_summary = format_summary(summary)
print(prompt)
print(summary)
print(formatted_summary)
return [formatted_summary]
# return [summary] # Wrap summary in a list
# definir search query
# search_and_chat("Tuples and Sequences with code examples");