Skip to content

Commit

Permalink
Merge pull request #8 from pablomarin/pages_tabular
Browse files Browse the repository at this point in the history
Pages tabular
  • Loading branch information
giorgiosaez authored Apr 7, 2023
2 parents 315e9fa + dacdf2c commit 7c4d0c6
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 60 deletions.
67 changes: 67 additions & 0 deletions app/Home.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import streamlit as st
import urllib
import os
import time
import requests
import random
from collections import OrderedDict
from openai.error import OpenAIError
from langchain.docstore.document import Document

from components.sidebar import sidebar
from utils import (
embed_docs,
get_answer,
get_sources,
search_docs
)
from credentials import (
DATASOURCE_CONNECTION_STRING,
AZURE_SEARCH_API_VERSION,
AZURE_SEARCH_ENDPOINT,
AZURE_SEARCH_KEY,
COG_SERVICES_NAME,
COG_SERVICES_KEY,
AZURE_OPENAI_ENDPOINT,
AZURE_OPENAI_KEY,
AZURE_OPENAI_API_VERSION

)

os.environ["OPENAI_API_BASE"] = os.environ["AZURE_OPENAI_ENDPOINT"] = st.session_state["AZURE_OPENAI_ENDPOINT "] = AZURE_OPENAI_ENDPOINT
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_OPENAI_API_KEY"] = st.session_state["AZURE_OPENAI_API_KEY"] = AZURE_OPENAI_KEY
os.environ["OPENAI_API_VERSION"] = os.environ["AZURE_OPENAI_API_VERSION"] = AZURE_OPENAI_API_VERSION

st.set_page_config(page_title="GPT Smart Search", page_icon="📖", layout="wide")



st.image("https://user-images.githubusercontent.com/113465005/226238596-cc76039e-67c2-46b6-b0bb-35d037ae66e1.png")

st.header("GPT Smart Search Engine")


st.markdown("---")
st.markdown("""
GPT Smart Search allows you to ask questions about your
documents and get accurate answers with instant citations.
This engine finds information from the following:
- ~10k [Computer Science Publications in Arxiv from 2020-2022](https://www.kaggle.com/datasets/1b6883fb66c5e7f67c697c2547022cc04c9ee98c3742f9a4d6c671b4f4eda591)
- ~52k [COVID-19 literature in LitCovid from 2020-2023](https://www.ncbi.nlm.nih.gov/research/coronavirus/)
**👈 Select a demo from the sidebar** to see some examples
of what Azure Cognitive Search and Azure OpenAI Service can do!
### Want to learn more?
- Check out [Github Repo](https://github.com/pablomarin/GPT-Azure-Search-Engine/)
- Jump into [Azure OpenAI documentation](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/)
- Ask a question or submit a [GitHub Issue!](https://github.com/pablomarin/GPT-Azure-Search-Engine/issues/new)
"""
)
st.markdown("---")


st.sidebar.success("Select a demo above.")
40 changes: 0 additions & 40 deletions app/pages/1_Chat_(Preview).py

This file was deleted.

39 changes: 19 additions & 20 deletions app/main.py → app/pages/1_GPT_Smart_Search.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,28 +71,27 @@ def get_search_results(query, indexes):
st.set_page_config(page_title="GPT Smart Search", page_icon="📖", layout="wide")
st.header("GPT Smart Search Engine")

sidebar()

with st.expander("Instructions"):
with st.sidebar:
st.markdown("""# Instructions""")
st.markdown("""
Ask a question that you think can be answered with the information in about 10k Arxiv Computer Science publications from 2020-2021 or in 52k Medical Covid-19 Publications from 2020.
For example:
- What are markov chains?
- List the authors that talk about Gradient Boosting Machines
- How does random forest work?
- What kind of problems can I solve with reinforcement learning? Give me some real life examples
- What kind of problems Turing Machines solve?
- What are the main risk factors for Covid-19?
- What medicine reduces inflamation in the lungs?
- Why Covid doesn't affect kids that much compared to adults?
\nYou will notice that the answers to these questions are diferent from the open ChatGPT, since these papers are the only possible context. This search engine does not look at the open internet to answer these questions. If the context doesn't contain information, the engine will respond: I don't know.
""")
Ask a question that you think can be answered with the information in about 10k Arxiv Computer Science publications from 2020-2021 or in 52k Medical Covid-19 Publications from 2020.
For example:
- What are markov chains?
- List the authors that talk about Gradient Boosting Machines
- How does random forest work?
- What kind of problems can I solve with reinforcement learning? Give me some real life examples
- What kind of problems Turing Machines solve?
- What are the main risk factors for Covid-19?
- What medicine reduces inflamation in the lungs?
- Why Covid doesn't affect kids that much compared to adults?
\nYou will notice that the answers to these questions are diferent from the open ChatGPT, since these papers are the only possible context. This search engine does not look at the open internet to answer these questions. If the context doesn't contain information, the engine will respond: I don't know.
""")
st.markdown("""
- ***Quick Answer***: GPT model only uses, as context, the captions of the results coming from Azure Search
- ***Best Answer***: GPT model uses, as context. all of the content of the documents coming from Azure Search
""")
- ***Quick Answer***: GPT model only uses, as context, the captions of the results coming from Azure Search
- ***Best Answer***: GPT model uses, as context. all of the content of the documents coming from Azure Search
""")

query = st.text_input("Ask a question to your enterprise data lake", value= "What is CLP?", on_change=clear_submit)

Expand Down
100 changes: 100 additions & 0 deletions app/pages/2_Tabular_Data_(Preview).py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import streamlit as st
import os
import pandas as pd
from langchain.llms import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI
from langchain.agents import create_pandas_dataframe_agent
from langchain.agents import create_csv_agent


def sidebar():
with st.sidebar:
st.markdown("""# Instructions""")
st.markdown("---")
st.markdown("""
**GPT GPT Tabular data Q&A** allows you to ask questions to your Tabular CSV files.
"""
)
st.markdown("**Note**: GPT-4 is in preview and with limited availability. There is a lot of limitation on the API, so it takes longer than needed and it fails some times. Retry if it fails.")
st.markdown("---")

st.session_state["AZURE_OPENAI_GPT4_NAME"] = st.text_input("Enter your GPT-4 deployment name:")
st.session_state["AZURE_OPENAI_ENDPOINT"] = st.text_input("Enter your Azure OpenAI Endpoint:")
st.session_state["AZURE_OPENAI_API_KEY"] = st.text_input("Enter Azure OpenAI Key:", type="password")

preffix = 'First set the pandas display options to show all the columns, then get the column names, then answer the question: '
suffix = '. ALWAYS before giving the Final Answer, reflect on the answer and ask yourself if it answers correctly the original question. If you are not sure, try another method. \n If the two runs does not give the same result, reflect again two more times until you have two runs that have the same result. If you still cannot arrive to a consistent result, say that you are not sure of the answer. But, if you are sure of the correct answer, create a beautiful and thorough response. ALWAYS, as part of your final answer, explain how you got to the answer. Format the final answer in Markdown language'

max_retries = 5

st.set_page_config(page_title="GPT Tabular data Q&A", page_icon="📖", layout="wide")
st.header("GPT Tabular data Q&A (preview)")

sidebar()

def clear_submit():
st.session_state["submit"] = False


col1, col2 = st.columns([1,1])
with col1:
uploaded_file = st.file_uploader(label = "Upload your tabular CSV file", type="csv", accept_multiple_files=False, key=None, help="Upload your CSV file that contains tabular data, make sure that the first row corresponds to the columns", on_change=None, disabled=False)
# with col2:
# st.markdown("Or pick from these sample datasets:")
# st.markdown("[Covid Tracking Project](https://learn.microsoft.com/en-us/azure/open-datasets/dataset-covid-tracking?tabs=azure-storage) ")
# ingest_button = st.button("Load Sample CSV") # Give button a variable name

# if ingest_button: # Make button a condition.
# uploaded_file = "https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/covid_tracking/latest/covid_tracking.csv"

if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.write("Here is the first two rows of your file:", df.head(2))

query_str = st.text_input("Ask a question:", on_change=clear_submit)

qbutton = st.button('Generate Answer')


if (qbutton or st.session_state.get("submit")) and uploaded_file:
if not query_str:
st.error("Please enter a question")
else:
st.session_state["submit"] = True
placeholder = st.empty()

if not st.session_state.get("AZURE_OPENAI_ENDPOINT"):
st.error("Please set your Azure OpenAI API Endpoint on the side bar!")
elif not st.session_state.get("AZURE_OPENAI_API_KEY"):
st.error("Please configure your Azure OpenAI API key on the side bar!")
elif not st.session_state.get("AZURE_OPENAI_GPT4_NAME"):
st.error("Please configure your GPT-4 Deployment Name in the sidebar")

else:

os.environ["OPENAI_API_BASE"] = os.environ["AZURE_OPENAI_ENDPOINT"] = st.session_state["AZURE_OPENAI_ENDPOINT"]
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_OPENAI_API_KEY"] = st.session_state["AZURE_OPENAI_API_KEY"]
os.environ["OPENAI_API_VERSION"] = os.environ["AZURE_OPENAI_API_VERSION"] = "2023-03-15-preview"

llm = AzureChatOpenAI(deployment_name=st.session_state["AZURE_OPENAI_GPT4_NAME"], temperature=0.5, max_tokens=999)
agent = create_pandas_dataframe_agent(llm, df, verbose=True)


try:

with st.spinner("Coming up with an answer... ⏳"):
for i in range(max_retries):
try:
response = agent.run(preffix + query_str + suffix)
break
except:
response = "Error too many failed retries - GPT-4 still in preview and just for testing"
continue


with placeholder.container():
st.markdown("#### Answer")
st.markdown(response.replace("$","\$"))

except Exception as e:
st.error(e)

0 comments on commit 7c4d0c6

Please sign in to comment.