Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Staging #19

Merged
merged 5 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/send-slack-metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Send Slack Metrics
# NB This only works on the default (prod) branch
on:
schedule:
- cron: '0 12 * * *'
- cron: '0 9 * * 5'

jobs:
deploy:
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ re-deploy:

send-slack-metrics:
docker exec danswer-stack-background-1 python /app/scripts/send_slack_report/send_slack_report.py

send-hubgpt-eval:
docker exec danswer-stack-background-1 python /app/scripts/send_slack_report/hubgpt_eval_automation.py
99 changes: 99 additions & 0 deletions backend/scripts/hubgpt_eval_automation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# This file is used to demonstrate how to use the backend APIs directly
# In this case, the equivalent of asking a question in Danswer Chat in a new chat session
import datetime
import json
import os

import pandas as pd
import requests
from slack_sdk import WebClient


def create_new_chat_session(danswer_url: str, api_key: str | None) -> int:
headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
session_endpoint = danswer_url + "/api/chat/create-chat-session"

response = requests.post(session_endpoint, headers=headers, json={"persona_id": 0})
response.raise_for_status()

new_session_id = response.json()["chat_session_id"]
return new_session_id


def process_question(danswer_url: str, question: str, api_key: str | None) -> None:
message_endpoint = danswer_url + "/api/chat/send-message"

chat_session_id = create_new_chat_session(danswer_url, api_key)

headers = {"Authorization": f"Bearer {api_key}"} if api_key else None

data = {
"message": question,
"chat_session_id": chat_session_id,
"parent_message_id": None,
# Default Question Answer prompt
"prompt_id": 0,
# Not specifying any specific docs to chat to, we want to run a search
"search_doc_ids": None,
"retrieval_options": {
"run_search": "always",
"real_time": True,
"enable_auto_detect_filters": False,
# No filters applied, check all sources, document-sets, time ranges, etc.
"filters": {},
},
}

with requests.post(message_endpoint, headers=headers, json=data) as response:
response.raise_for_status()
response_str = ""
for packet in response.iter_lines():
response_text = json.loads(packet.decode())
# Can also check "top_documents" to capture the streamed search results
# that include the highest matching documents to the query
# or check "message_id" to get the message_id used as parent_message_id
# to create follow-up messages
new_token = response_text.get("answer_piece")
if new_token:
response_str += new_token
return response_str


def upload_to_slack(filename, channel_id):
slack_client = WebClient(token=os.environ.get("SLACK_BOT_TOKEN"))
size = os.stat(filename).st_size
response = slack_client.files_getUploadURLExternal(filename=filename, length=size)
upload_url = response.data["upload_url"]
file_id = response.data["file_id"]
post_response = requests.post(url=upload_url, data=open(filename, "rb"))
if post_response.status_code == 200:
upload_response = slack_client.files_completeUploadExternal(
files=[{"id": file_id, "title": "Monthly Performance Evaluation"}],
channel_id=channel_id,
)
return upload_response.status_code


if __name__ == "__main__":
data = pd.read_csv("hubgpt_eval_automated.csv")

queries_list = data.Query.tolist()

responses = []

for num, query in enumerate(queries_list):
print(f"Query {num+1}/{len(queries_list)}: {query}")
response = process_question(
danswer_url=os.getenv("WEB_DOMAIN"), question=query, api_key=None
)
responses.append(response)
print("\n ------------------- \n")

today_str = str(datetime.date.today())
data[today_str] = responses

# Record + send info
data.to_csv("hubgpt_eval_automated.csv", index=False)
print("Complete")
CHANNEL_ID = os.environ.get("METRICS_CHANNEL_ID")
upload_to_slack("hubgpt_eval_automated.csv", CHANNEL_ID)
Loading