From 59c36226b7a5ad2ae2f62442c9ea2614f6f9981d Mon Sep 17 00:00:00 2001 From: Mark Botterill Date: Wed, 8 May 2024 11:07:54 +0000 Subject: [PATCH 1/4] Change message format --- backend/scripts/send_slack_report/send_slack_report.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/scripts/send_slack_report/send_slack_report.py b/backend/scripts/send_slack_report/send_slack_report.py index 6c6d68e8e51..4db31f8effc 100755 --- a/backend/scripts/send_slack_report/send_slack_report.py +++ b/backend/scripts/send_slack_report/send_slack_report.py @@ -97,8 +97,7 @@ def classify_initial_queries(): total_initial_queries = sum(tally_json.values()) for k, v in tally_json.items(): percentage = v / total_initial_queries * 100 - classifications += f"There were {v} queries (representing {percentage:.1f}% of\ - all initial queries) about {k}\n" + classifications += f"{k}: {v} queries ({percentage:.1f}%\n" return classifications From 26b154e1498e8fdd5cf6133d53dfc7168262c88b Mon Sep 17 00:00:00 2001 From: Mark Botterill Date: Wed, 8 May 2024 11:19:39 +0000 Subject: [PATCH 2/4] Mirror prod --- backend/scripts/send_slack_report/send_slack_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/scripts/send_slack_report/send_slack_report.py b/backend/scripts/send_slack_report/send_slack_report.py index 4db31f8effc..f08be3c1959 100755 --- a/backend/scripts/send_slack_report/send_slack_report.py +++ b/backend/scripts/send_slack_report/send_slack_report.py @@ -97,7 +97,7 @@ def classify_initial_queries(): total_initial_queries = sum(tally_json.values()) for k, v in tally_json.items(): percentage = v / total_initial_queries * 100 - classifications += f"{k}: {v} queries ({percentage:.1f}%\n" + classifications += f"{k}: {v} queries ({percentage:.1f}%)\n" return classifications From 967038ac9e651e1923e658fa8f93276a25a89d34 Mon Sep 17 00:00:00 2001 From: Mark Botterill <97025274+markbotterill@users.noreply.github.com> Date: Fri, 10 May 2024 13:13:38 +0100 Subject: [PATCH 3/4] Only send weekly --- .github/workflows/send-slack-metrics.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/send-slack-metrics.yml b/.github/workflows/send-slack-metrics.yml index 0b571aa2f95..4cdd81e8654 100644 --- a/.github/workflows/send-slack-metrics.yml +++ b/.github/workflows/send-slack-metrics.yml @@ -2,7 +2,7 @@ name: Send Slack Metrics # NB This only works on the default (prod) branch on: schedule: - - cron: '0 12 * * *' + - cron: '0 9 * * 5' jobs: deploy: From 5f5f554e12ae1c83cb25209afe4c516db57ada0c Mon Sep 17 00:00:00 2001 From: Mark Botterill Date: Fri, 24 May 2024 13:38:26 +0000 Subject: [PATCH 4/4] Add automated response generation to HubGPT --- Makefile | 3 + backend/scripts/hubgpt_eval_automation.py | 99 +++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 backend/scripts/hubgpt_eval_automation.py diff --git a/Makefile b/Makefile index e6b7d456eb3..bb17d0f8a4d 100644 --- a/Makefile +++ b/Makefile @@ -34,3 +34,6 @@ re-deploy: send-slack-metrics: docker exec danswer-stack-background-1 python /app/scripts/send_slack_report/send_slack_report.py + +send-hubgpt-eval: + docker exec danswer-stack-background-1 python /app/scripts/send_slack_report/hubgpt_eval_automation.py diff --git a/backend/scripts/hubgpt_eval_automation.py b/backend/scripts/hubgpt_eval_automation.py new file mode 100644 index 00000000000..6008c67edfc --- /dev/null +++ b/backend/scripts/hubgpt_eval_automation.py @@ -0,0 +1,99 @@ +# This file is used to demonstrate how to use the backend APIs directly +# In this case, the equivalent of asking a question in Danswer Chat in a new chat session +import datetime +import json +import os + +import pandas as pd +import requests +from slack_sdk import WebClient + + +def create_new_chat_session(danswer_url: str, api_key: str | None) -> int: + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None + session_endpoint = danswer_url + "/api/chat/create-chat-session" + + response = requests.post(session_endpoint, headers=headers, json={"persona_id": 0}) + response.raise_for_status() + + new_session_id = response.json()["chat_session_id"] + return new_session_id + + +def process_question(danswer_url: str, question: str, api_key: str | None) -> None: + message_endpoint = danswer_url + "/api/chat/send-message" + + chat_session_id = create_new_chat_session(danswer_url, api_key) + + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None + + data = { + "message": question, + "chat_session_id": chat_session_id, + "parent_message_id": None, + # Default Question Answer prompt + "prompt_id": 0, + # Not specifying any specific docs to chat to, we want to run a search + "search_doc_ids": None, + "retrieval_options": { + "run_search": "always", + "real_time": True, + "enable_auto_detect_filters": False, + # No filters applied, check all sources, document-sets, time ranges, etc. + "filters": {}, + }, + } + + with requests.post(message_endpoint, headers=headers, json=data) as response: + response.raise_for_status() + response_str = "" + for packet in response.iter_lines(): + response_text = json.loads(packet.decode()) + # Can also check "top_documents" to capture the streamed search results + # that include the highest matching documents to the query + # or check "message_id" to get the message_id used as parent_message_id + # to create follow-up messages + new_token = response_text.get("answer_piece") + if new_token: + response_str += new_token + return response_str + + +def upload_to_slack(filename, channel_id): + slack_client = WebClient(token=os.environ.get("SLACK_BOT_TOKEN")) + size = os.stat(filename).st_size + response = slack_client.files_getUploadURLExternal(filename=filename, length=size) + upload_url = response.data["upload_url"] + file_id = response.data["file_id"] + post_response = requests.post(url=upload_url, data=open(filename, "rb")) + if post_response.status_code == 200: + upload_response = slack_client.files_completeUploadExternal( + files=[{"id": file_id, "title": "Monthly Performance Evaluation"}], + channel_id=channel_id, + ) + return upload_response.status_code + + +if __name__ == "__main__": + data = pd.read_csv("hubgpt_eval_automated.csv") + + queries_list = data.Query.tolist() + + responses = [] + + for num, query in enumerate(queries_list): + print(f"Query {num+1}/{len(queries_list)}: {query}") + response = process_question( + danswer_url=os.getenv("WEB_DOMAIN"), question=query, api_key=None + ) + responses.append(response) + print("\n ------------------- \n") + + today_str = str(datetime.date.today()) + data[today_str] = responses + + # Record + send info + data.to_csv("hubgpt_eval_automated.csv", index=False) + print("Complete") + CHANNEL_ID = os.environ.get("METRICS_CHANNEL_ID") + upload_to_slack("hubgpt_eval_automated.csv", CHANNEL_ID)