fix: bugs

lai-flow · Mar 25, 2023 · bf71127 · bf71127
1 parent b9a9bef
commit bf71127
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -53,6 +53,7 @@ Please join this [slack channel](https://slack-redirect.i365.tech/) to experienc
 - Index fine-tune
   - [x] Use the [GPTListIndex](https://github.com/jerryjliu/llama_index/issues/753#issuecomment-1472387421) to summarize multiple URLs
   - [ ] Use the `GPTTreeIndex` with `summarize` mode to summarize a single web page
+    - Use [response_mode](https://gist.github.com/ninehills/ecf7107574c83016e8b68965bf9a51c4) to change the summary mode
 - Bot regularly send hot ~~summarizes(expensive cost)~~ news in the slack channel (`#daily-news`)
   - [x] Use chatGPT to summarize the hot news
   - ~~Refer to [this](https://github.com/SkywalkerDarren/chatWeb/blob/c2ad05a97aecbe1bc0c846476ea003640f2a0f2e/main.py#L144-L175) approach~~

diff --git a/app/daily_hot_news.py b/app/daily_hot_news.py
@@ -29,19 +29,22 @@ def cut_string(text):
 
 def get_summary_from_gpt_thread(url):
     news_summary_prompt = '请用中文简短概括这篇文章的内容。'
-    return 'AI: ' + str(get_answer_from_llama_web([news_summary_prompt], [url]))
+    return str(get_answer_from_llama_web([news_summary_prompt], [url]))
 
 def get_summary_from_gpt(url):
     with concurrent.futures.ThreadPoolExecutor() as executor:
         future = executor.submit(get_summary_from_gpt_thread, url)
-        return future.result(timeout=200)
+        return future.result(timeout=300)
 
 def get_description(entry):
-    summary = None
+    gpt_answer = None
     try:
-        summary = get_summary_from_gpt(entry.link)
+        gpt_answer = get_summary_from_gpt(entry.link)
     except Exception as e:
         logging.error(e)
+    if gpt_answer is not None:
+        summary = 'AI: ' + gpt_answer
+    else:
         summary = cut_string(get_text_from_html(entry.summary))
     return summary
 

diff --git a/app/fetch_web_post.py b/app/fetch_web_post.py
@@ -27,7 +27,7 @@ def get_urls(urls):
     for url in urls:
         if validators.url(url):
             feed = feedparser.parse(url)
-            if feed.version:
+            if hasattr(feed, 'version') and feed.version:
                 rss_urls.append(url)
             elif check_if_need_use_phantomjscloud(url):
                 phantomjscloud_urls.append(url)

diff --git a/app/gpt.py b/app/gpt.py
@@ -36,17 +36,14 @@
 if not os.path.exists(index_cache_file_dir):
     os.makedirs(index_cache_file_dir)
 
-
 def get_unique_md5(urls):
     urls_str = ''.join(sorted(urls))
     hashed_str = hashlib.md5(urls_str.encode('utf-8')).hexdigest()
     return hashed_str
 
-
 def format_dialog_messages(messages):
     return "\n".join(messages)
 
-
 def get_documents_from_urls(urls):
     documents = []
     for url in urls['page_urls']:
@@ -61,7 +58,6 @@ def get_documents_from_urls(urls):
             documents.append(document)
     return documents
 
-
 def get_answer_from_chatGPT(messages):
     dialog_messages = format_dialog_messages(messages)
     logging.info('=====> Use chatGPT to answer!')
@@ -101,7 +97,6 @@ def get_index_from_file_cache(name):
         f"=====> Get index from file cache: {index_cache_file_dir + name}")
     return index
 
-
 def get_answer_from_llama_web(messages, urls):
     dialog_messages = format_dialog_messages(messages)
     logging.info('=====> Use llama web with chatGPT to answer!')
@@ -120,13 +115,11 @@ def get_answer_from_llama_web(messages, urls):
         index.save_to_disk(index_cache_web_dir + index_file_name)
     return index.query(dialog_messages, llm_predictor=llm_predictor, text_qa_template=QUESTION_ANSWER_PROMPT)
 
-
 def get_index_name_from_file(file: str):
     file_md5_with_extension = file.replace(index_cache_file_dir, '')
     file_md5 = file_md5_with_extension.split('.')[0]
     return file_md5 + '.json'
 
-
 def get_answer_from_llama_file(messages, file):
     dialog_messages = format_dialog_messages(messages)
     logging.info('=====> Use llama file with chatGPT to answer!')
@@ -142,7 +135,6 @@ def get_answer_from_llama_file(messages, file):
         index.save_to_disk(index_cache_file_dir + index_name)
     return index.query(dialog_messages, llm_predictor=llm_predictor, text_qa_template=QUESTION_ANSWER_PROMPT)
 
-
 def get_text_from_whisper(voice_file_path):
     with open(voice_file_path, "rb") as f:
         transcript = openai.Audio.transcribe("whisper-1", f)

diff --git a/app/server.py b/app/server.py
@@ -36,14 +36,17 @@ class Config:
 
 def send_daily_news(client, news):
     for news_item in news:
-        client.chat_postMessage(
-            channel=schedule_channel,
-            text="",
-            blocks=news_item,
-            reply_broadcast=True,
-            unfurl_links=False,
-            unfurl_media=False
-        )
+        try:
+            r = client.chat_postMessage(
+                channel=schedule_channel,
+                text="",
+                blocks=news_item,
+                reply_broadcast=True,
+                unfurl_links=False,
+            )
+            logging.info(r)
+        except Exception as e:
+            logging.error(e)
 
 @scheduler.task('cron', id='daily_news_task', hour=1, minute=30)
 def schedule_news():