Skip to content

Commit

Permalink
fix: bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
madawei2699 committed Mar 25, 2023
1 parent b9a9bef commit bf71127
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 21 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Please join this [slack channel](https://slack-redirect.i365.tech/) to experienc
- Index fine-tune
- [x] Use the [GPTListIndex](https://github.com/jerryjliu/llama_index/issues/753#issuecomment-1472387421) to summarize multiple URLs
- [ ] Use the `GPTTreeIndex` with `summarize` mode to summarize a single web page
- Use [response_mode](https://gist.github.com/ninehills/ecf7107574c83016e8b68965bf9a51c4) to change the summary mode
- Bot regularly send hot ~~summarizes(expensive cost)~~ news in the slack channel (`#daily-news`)
- [x] Use chatGPT to summarize the hot news
- ~~Refer to [this](https://github.com/SkywalkerDarren/chatWeb/blob/c2ad05a97aecbe1bc0c846476ea003640f2a0f2e/main.py#L144-L175) approach~~
Expand Down
11 changes: 7 additions & 4 deletions app/daily_hot_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,22 @@ def cut_string(text):

def get_summary_from_gpt_thread(url):
news_summary_prompt = '请用中文简短概括这篇文章的内容。'
return 'AI: ' + str(get_answer_from_llama_web([news_summary_prompt], [url]))
return str(get_answer_from_llama_web([news_summary_prompt], [url]))

def get_summary_from_gpt(url):
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(get_summary_from_gpt_thread, url)
return future.result(timeout=200)
return future.result(timeout=300)

def get_description(entry):
summary = None
gpt_answer = None
try:
summary = get_summary_from_gpt(entry.link)
gpt_answer = get_summary_from_gpt(entry.link)
except Exception as e:
logging.error(e)
if gpt_answer is not None:
summary = 'AI: ' + gpt_answer
else:
summary = cut_string(get_text_from_html(entry.summary))
return summary

Expand Down
2 changes: 1 addition & 1 deletion app/fetch_web_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_urls(urls):
for url in urls:
if validators.url(url):
feed = feedparser.parse(url)
if feed.version:
if hasattr(feed, 'version') and feed.version:
rss_urls.append(url)
elif check_if_need_use_phantomjscloud(url):
phantomjscloud_urls.append(url)
Expand Down
8 changes: 0 additions & 8 deletions app/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,14 @@
if not os.path.exists(index_cache_file_dir):
os.makedirs(index_cache_file_dir)


def get_unique_md5(urls):
urls_str = ''.join(sorted(urls))
hashed_str = hashlib.md5(urls_str.encode('utf-8')).hexdigest()
return hashed_str


def format_dialog_messages(messages):
return "\n".join(messages)


def get_documents_from_urls(urls):
documents = []
for url in urls['page_urls']:
Expand All @@ -61,7 +58,6 @@ def get_documents_from_urls(urls):
documents.append(document)
return documents


def get_answer_from_chatGPT(messages):
dialog_messages = format_dialog_messages(messages)
logging.info('=====> Use chatGPT to answer!')
Expand Down Expand Up @@ -101,7 +97,6 @@ def get_index_from_file_cache(name):
f"=====> Get index from file cache: {index_cache_file_dir + name}")
return index


def get_answer_from_llama_web(messages, urls):
dialog_messages = format_dialog_messages(messages)
logging.info('=====> Use llama web with chatGPT to answer!')
Expand All @@ -120,13 +115,11 @@ def get_answer_from_llama_web(messages, urls):
index.save_to_disk(index_cache_web_dir + index_file_name)
return index.query(dialog_messages, llm_predictor=llm_predictor, text_qa_template=QUESTION_ANSWER_PROMPT)


def get_index_name_from_file(file: str):
file_md5_with_extension = file.replace(index_cache_file_dir, '')
file_md5 = file_md5_with_extension.split('.')[0]
return file_md5 + '.json'


def get_answer_from_llama_file(messages, file):
dialog_messages = format_dialog_messages(messages)
logging.info('=====> Use llama file with chatGPT to answer!')
Expand All @@ -142,7 +135,6 @@ def get_answer_from_llama_file(messages, file):
index.save_to_disk(index_cache_file_dir + index_name)
return index.query(dialog_messages, llm_predictor=llm_predictor, text_qa_template=QUESTION_ANSWER_PROMPT)


def get_text_from_whisper(voice_file_path):
with open(voice_file_path, "rb") as f:
transcript = openai.Audio.transcribe("whisper-1", f)
Expand Down
19 changes: 11 additions & 8 deletions app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,17 @@ class Config:

def send_daily_news(client, news):
for news_item in news:
client.chat_postMessage(
channel=schedule_channel,
text="",
blocks=news_item,
reply_broadcast=True,
unfurl_links=False,
unfurl_media=False
)
try:
r = client.chat_postMessage(
channel=schedule_channel,
text="",
blocks=news_item,
reply_broadcast=True,
unfurl_links=False,
)
logging.info(r)
except Exception as e:
logging.error(e)

@scheduler.task('cron', id='daily_news_task', hour=1, minute=30)
def schedule_news():
Expand Down

0 comments on commit bf71127

Please sign in to comment.