diff --git a/.gitignore b/.gitignore index 83e67f7..d43e568 100644 --- a/.gitignore +++ b/.gitignore @@ -169,4 +169,6 @@ src/scraping/data/* src/evals/parenting_chatbot/* src/genai/parenting_chatbot/prodigy_eval/_scrap/* !src/genai/parenting_chatbot/prodigy_eval/data/ -!src/genai/sandbox/signals/data/ \ No newline at end of file +!src/genai/sandbox/signals/data/ +src/genai/sandbox/signals/data/signals_2024.json +src/genai/sandbox/signals/illustrations/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0235d10..f0f58d2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -75,4 +75,3 @@ repos: --format, custom, ] - stages: [commit] diff --git a/signals_app.py b/signals_app.py index a0c9e7b..b71e78f 100644 --- a/signals_app.py +++ b/signals_app.py @@ -1,40 +1,62 @@ -import streamlit as st - -from genai import MessageTemplate, FunctionTemplate -from genai.eyfs import TextGenerator -from genai.streamlit_pages.utils import reset_state +import copy import json import os +import uuid + +from datetime import datetime +from typing import Union + import openai +import s3fs +import streamlit as st + from dotenv import load_dotenv + +from genai import FunctionTemplate +from genai import MessageTemplate +from genai.eyfs import TextGenerator +from genai.message_history import InMemoryMessageHistory +from genai.message_history import TokenCounter + + load_dotenv() selected_model = "gpt-4-1106-preview" -temperature = 0.6 +# selected_model = "gpt-3.5-turbo-1106" +# selected_model = "gpt-4" +temperature = 0.000001 + +CHECK_COSTS = False # Paths to prompts PROMPT_PATH = "src/genai/sandbox/signals/data/" -PATH_SIGNALS_DATA = PROMPT_PATH + "signals_2023.json" +PATH_SIGNALS_DATA = PROMPT_PATH + "signals_2024.json" PATH_SYSTEM = PROMPT_PATH + "00_system.jsonl" PATH_INTRO = PROMPT_PATH + "01_intro.jsonl" PATH_ACTIONS = PROMPT_PATH + "intent_actions.json" +PATH_ILLUSTRATIONS = "src/genai/sandbox/signals/illustrations/" # Top signal function path_func_top_signal = PROMPT_PATH + "func_top_signal.json" path_prompt_top_signal = PROMPT_PATH + "prompt_top_signal.jsonl" -# Top three signals function +# Top three signals function path_func_top_three_signals = PROMPT_PATH + "func_top_three_signals.json" path_prompt_top_three_signals = PROMPT_PATH + "prompt_top_three_signals.jsonl" -# Intent detection function +# Intent detection function path_func_intent = PROMPT_PATH + "func_intent.json" path_prompt_intent = PROMPT_PATH + "prompt_intent.jsonl" -# Prompt: Impact on the user +# Prompt: Impact on the user path_prompt_impact = PROMPT_PATH + "02_signal_impact.jsonl" -# Prompt: Summary of different signals +# Prompt: Summary of different signals path_prompt_choice = PROMPT_PATH + "03_signal_choice.jsonl" -# Prompt: Following up on user's question +# Prompt: Following up on user's question path_prompt_following_up = PROMPT_PATH + "04_follow_up.jsonl" +aws_key = os.environ["AWS_ACCESS_KEY_ID"] +aws_secret = os.environ["AWS_SECRET_ACCESS_KEY"] +s3_path = os.environ["S3_BUCKET"] + + def auth_openai() -> None: """Authenticate with OpenAI.""" try: @@ -47,9 +69,19 @@ def read_jsonl(path: str) -> list: """Read a JSONL file.""" with open(path, "r") as f: return [json.loads(line) for line in f.readlines()] - -def generate_signals_texts(signals_data: dict, chosen_signals: list = None): + +def generate_signals_texts(signals_data: dict, chosen_signals: list = None) -> str: + """ + Generate a description of the signals. + + Args: + signals_data (dict): A dictionary of signals data. + chosen_signals (list, optional): A list of signals to include in the description. Defaults to None. + + Returns: + str: A description of the signals. + """ signals = [signal["short_name"] for signal in signals_data] signals_titles = [signal["title"] for signal in signals_data] signals_summaries = [signal["summary"] for signal in signals_data] @@ -63,35 +95,51 @@ def generate_signals_texts(signals_data: dict, chosen_signals: list = None): if short_name in chosen_signals: signals_description += f"Signal '{short_name}': {title}\n{summary}\n\n" - return signals_description + return signals_description + + +def generate_action_texts(action_data: dict, active_signal: str = None) -> str: + """ + Generate a description of the actions. + + Args: + action_data (dict): A dictionary of actions data. + active_signal (str, optional): The active signal. Defaults to None. + Returns: + str: A description of the actions. -def generate_action_texts(action_data: dict): + """ actions = [a["name"] for a in action_data] action_descriptions = [a["description"] for a in action_data] action_text = "" for name, description in zip(actions, action_descriptions): - action_text += f"Action '{name}': {description}\n\n" - return action_text + if (name != "following_up") or (active_signal is None): + action_text += f"Action '{name}': {description}\n\n" + else: + action_text += f"Action '{name}': User is following up with another question about the {active_signal} signal that's being discussed just now.\n\n" # noqa: B950 + + return action_text + # Prepare the data signals_data = json.load(open(PATH_SIGNALS_DATA, "r")) -signals_dict = {s['short_name']: s for s in signals_data} -signals_descriptions = generate_signals_texts(signals_data) -signals = [s['short_name'] for s in signals_data] +signals_dict = {s["short_name"]: s for s in signals_data} +signals_descriptions = generate_signals_texts(signals_data) +signals = [s["short_name"] for s in signals_data] actions_data = json.load(open(PATH_ACTIONS, "r")) actions_descriptions = generate_action_texts(actions_data) -actions = [a['name'] for a in actions_data] +actions = [a["name"] for a in actions_data] -def predict_intent(user_message: str, messages: list) -> str: +def predict_intent(user_message: str, active_signal: str) -> str: """Detect the intent of the user's message. - + Args: user_message (str): The user's message. messages (list): The history of messages. - + Returns: str: The intent of the user's message. Possible outputs are: - "explain": The user wants to know more about a signal. @@ -106,273 +154,328 @@ def predict_intent(user_message: str, messages: list) -> str: all_messages = message_history + [message] function = FunctionTemplate.load(func_intent) response = TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=all_messages, - message_kwargs={"intents": actions_descriptions, "user_input": user_message}, - stream=False, - functions=[function.to_prompt()], - function_call={"name": "predict_intent"}, - ) - intent = json.loads(response['choices'][0]['message']['function_call']['arguments']) - return intent['prediction'] + model=selected_model, + temperature=temperature, + messages=all_messages, + message_kwargs={ + "intents": generate_action_texts(actions_data, active_signal=active_signal), + "user_input": user_message, + }, + stream=False, + functions=[function.to_prompt()], + function_call={"name": "predict_intent"}, + ) + intent = json.loads(response["choices"][0]["message"]["function_call"]["arguments"]) + + if CHECK_COSTS: + cost_input = ( + TokenCounter._count_tokens_from_messages( + messages=[m.to_prompt() for m in all_messages], model_name=selected_model + ) + * 0.01 + / 1000 + ) + print(f"Intent cost: {cost_input}") # noqa: T001 + + return intent["prediction"] def predict_top_signal(user_message: str, signals: list) -> str: """Predict the top signal from the user's message. - + Args: user_message (str): The user's message. - + Returns: str: The top signal from the user's message. """ - # Function call + # Function call func_top_signal = json.loads(open(path_func_top_signal).read()) - func_top_signal['parameters']['properties']['prediction']['enum'] = signals - + func_top_signal["parameters"]["properties"]["prediction"]["enum"] = signals + message = MessageTemplate.load(path_prompt_top_signal) function = FunctionTemplate.load(func_top_signal) response = TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=[message], - message_kwargs={"signals": signals_descriptions, "user_input": user_message}, - stream=False, - functions=[function.to_prompt()], - function_call={"name": "predict_top_signal"}, - ) - top_signal = json.loads(response['choices'][0]['message']['function_call']['arguments']) - return top_signal['prediction'] + model=selected_model, + temperature=temperature, + messages=[message], + message_kwargs={"signals": signals_descriptions, "user_input": user_message}, + stream=False, + functions=[function.to_prompt()], + function_call={"name": "predict_top_signal"}, + ) + top_signal = json.loads(response["choices"][0]["message"]["function_call"]["arguments"]) + return top_signal["prediction"] def predict_top_three_signals(user_message: str, allowed_signals: list) -> list: """Predict the top signal from the user's message. - + Args: user_message (str): The user's message. - + Returns: str: The top signal from the user's message. """ - # Function call + # Function call func_top_signals = json.loads(open(path_func_top_three_signals).read()) - func_top_signals['parameters']['properties']['prediction']['items']['enum'] = allowed_signals - print(func_top_signals) + func_top_signals["parameters"]["properties"]["prediction"]["items"]["enum"] = allowed_signals message = MessageTemplate.load(path_prompt_top_three_signals) function_top_three = FunctionTemplate.load(func_top_signals) signals_descriptions_ = generate_signals_texts(signals_data, allowed_signals) response = TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=[message], - message_kwargs={"signals": signals_descriptions_, "user_input": user_message}, - stream=False, - functions=[function_top_three.to_prompt()], - function_call={"name": "predict_top_signals"}, - ) - top_signals = json.loads(response['choices'][0]['message']['function_call']['arguments']) - print(message) - print(f"Prediction: {top_signals}") - print(response) - return top_signals['prediction'] - -def signals_bot(sidebar: bool = True) -> None: + model=selected_model, + temperature=temperature, + messages=[message], + message_kwargs={"signals": signals_descriptions_, "user_input": user_message}, + stream=False, + functions=[function_top_three.to_prompt()], + function_call={"name": "predict_top_signals"}, + ) + top_signals = json.loads(response["choices"][0]["message"]["function_call"]["arguments"]) + return top_signals["prediction"] + + +def signals_bot() -> None: """Explain me a concept like I'm 3.""" - # Define your custom CSS - # custom_css = """ - # - # """ - - # # Apply the custom CSS - # st.markdown(custom_css, unsafe_allow_html=True) - - st.title("Signals chatbot") - st.write("Let's discuss the future!") - - # First time running the app + # Define custom CSS + custom_css = """ + + """ + + # Apply the custom CSS + st.markdown(custom_css, unsafe_allow_html=True) + + st.title("Personalised futures: let our chatbot guide you through our Future Signals for 2024") + st.markdown( + "Undoubtedly, the rise of generative artificial intelligence (AI) has been one of the main trends of 2023, with ChatGPT chosen as [the word of the year](https://www.economist.com/culture/2023/12/07/our-word-of-the-year-for-2023) by *The Economist*. Reflecting on this trend, we have built an experimental generative AI chatbot of our own to help you engage more deeply with our [Signals for 2024](https://www.nesta.org.uk/feature/future-signals-2024/).\n\nThis is an experiment in creating a more interactive reading experience using 2023's big new technology. **Scroll down** to meet our chatbot, Scout, which will try to relate this year's Signals to you and your life. You can provide a little information about yourself and Scout will try to come up with ways that these Signals might be relevant to you.\n\nScout also provides a signal of the potential new ways we might interact with information in the future, with customised bots helping us explore and synthesise reams of written text, data, charts and videos to find what matters the most to us.\n\n**Guidance**\n\n*The chatbot uses OpenAI's GPT-4, a cutting-edge AI model. Nesta does not save the conversations, and OpenAI claims to delete all data in 30 days. **Nonetheless, please do not share any information that could identify you or that is sensitive or confidential.** Please remember, this is an experimental chatbot; it can make mistakes and 'hallucinate' - [another word of the year](https://www.cam.ac.uk/research/news/cambridge-dictionary-names-hallucinate-word-of-the-year-2023) - or show biases despite our efforts to instruct it to be inclusive and sensitive. After trying out the chatbot, we invite you to leave us feedback using [this form](https://forms.gle/UWcnpgKg9WG7JmPt5).*\n\n**Meet Scout...**", # noqa: B950 + unsafe_allow_html=True, # noqa: B950 + ) # noqa: B950 + + # First time running the app if "messages" not in st.session_state: - # Record of messages to display on the app + # Record of messages to display on the app st.session_state.messages = [] - # Record of messages to send to the LLM - st.session_state.history = [] - # Keep track of which state we're in - st.session_state.state = "start" + # Record of messages to send to the LLM + st.session_state["memory"] = InMemoryMessageHistory() + st.session_state["messages_intent"] = [] + st.session_state["messages_signal"] = [] + # Keep track of which state we're in + st.session_state.state = "start" # Fetch system and introduction messages st.session_state.signals = [] - + # Add system message to the history system_message = read_jsonl(PATH_SYSTEM)[0] system_message = MessageTemplate.load(system_message) system_message.format_message(**{"signals": signals_descriptions}) - st.session_state.history.append(system_message.to_prompt()) - print(system_message.to_prompt()) + st.session_state["memory"].add_message(system_message.to_prompt()) # Add the intro messages intro_messages = read_jsonl(PATH_INTRO) - print(intro_messages) for m in intro_messages: st.session_state.messages.append(m) - st.session_state.history.append(m) + st.session_state["memory"].add_message(m) + st.session_state["messages_intent"].append("intro") + st.session_state["messages_signal"].append("none") + # Keep count of the number of unique sessions + timestamp = current_time() + st.session_state["session_log"] = f"{timestamp}-{str(uuid.uuid4())}" + write_to_s3( + key=aws_key, + secret=aws_secret, + s3_path=f"{s3_path}/session-logs-signals", + filename="session_counter", + data={"session": st.session_state["session_log"], "time": timestamp}, + how="a", + ) # Display chat messages on app rerun - for message in st.session_state.messages: + for i, message in enumerate(st.session_state.messages): with st.chat_message(message["role"]): + if st.session_state["messages_intent"][i] == "new_signal": + signal_to_explain = st.session_state["messages_signal"][i] + st.image( + PATH_ILLUSTRATIONS + signals_dict[signal_to_explain]["img"], + caption="Illustration by Chen Wu", + use_column_width=True, + ) st.markdown(message["content"]) # Get user message user_message = st.chat_input("") if user_message: + updated_css = """ + + """ + st.markdown(updated_css, unsafe_allow_html=True) # Display user message with st.chat_message("user"): st.markdown(user_message) st.session_state.messages.append({"role": "user", "content": user_message}) - st.session_state.history.append({"role": "user", "content": user_message}) - + st.session_state["memory"].add_message({"role": "user", "content": user_message}) + st.session_state["messages_intent"].append("user") + st.session_state["messages_signal"].append("none") if st.session_state.state == "start": intent = "new_signal" st.session_state.user_info = user_message st.session_state.state = "chatting" else: - intent = predict_intent(user_message, st.session_state.history) - print(intent) - # intent = "following_up" + intent = predict_intent(user_message, active_signal=st.session_state.active_signal) if intent == "new_signal": - # Predict the signal to explain + # Filter out signals that have already been covered allowed_signals = [s for s in signals if s not in st.session_state.signals] + # Determine the most relevant signal to explain signal_to_explain = predict_top_signal(user_message, allowed_signals) + # Keep track of already discussed signals st.session_state.signals.append(signal_to_explain) st.session_state.active_signal = signal_to_explain - print(signal_to_explain) - print(f"I have these signals in memory: {st.session_state.signals}") - # Explain the signal + # Generate a message about the signal instruction = MessageTemplate.load(path_prompt_impact) - message_history = [MessageTemplate.load(m) for m in st.session_state.history] - message_history += [instruction] + message_history = st.session_state["memory"].get_messages(max_tokens=3000) + [instruction] with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - for response in TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=message_history, - message_kwargs={ - "signal": signals_dict[signal_to_explain]['full_text'], - "user_input": st.session_state.user_info - }, - stream=True, - ): - full_response += response.choices[0].delta.get("content", "") - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - st.session_state.messages.append({"role": "assistant", "content": full_response}) - st.session_state.history.append({"role": "assistant", "content": full_response}) + # Show the signal image + st.image( + PATH_ILLUSTRATIONS + signals_dict[signal_to_explain]["img"], + caption="Illustration by Chen Wu", + use_column_width=True, + ) + # Type the response + full_response = llm_call( + selected_model, + temperature, + messages=message_history, + messages_placeholders={ + "signal": signals_dict[signal_to_explain]["full_text"], + "user_input": st.session_state.user_info, + }, + ) + st.session_state.messages.append({"role": "assistant", "content": full_response}) + st.session_state["memory"].add_message({"role": "assistant", "content": full_response}) + st.session_state["messages_intent"].append(copy.deepcopy(intent)) + st.session_state["messages_signal"].append(copy.deepcopy(signal_to_explain)) elif intent == "more_signals": - # Select the top 5 most relevant signals for the user - # (remove the seen signals) - # Provide an overview of the impacts of signal on the reader - # Ask which one the bot should elaborate on + # Filter out signals that have already been covered allowed_signals = [s for s in signals if s not in st.session_state.signals] + # Determine the top three signals to explain top_signals = predict_top_three_signals(st.session_state.user_info, allowed_signals) - print(allowed_signals) - print(top_signals) - print(top_signals[0:3]) - # Explain the signal - instruction = MessageTemplate.load(path_prompt_choice) top_signals_text = generate_signals_texts(signals_data, top_signals) - message_history = [MessageTemplate.load(m) for m in st.session_state.history] - message_history += [instruction] + # Generate a message about the three signals + instruction = MessageTemplate.load(path_prompt_choice) + message_history = st.session_state["memory"].get_messages(max_tokens=3000) + [instruction] with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - for response in TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=message_history, - message_kwargs={ - "signals": top_signals_text, - "user_input": st.session_state.user_info - }, - stream=True, - ): - full_response += response.choices[0].delta.get("content", "") - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - st.session_state.messages.append({"role": "assistant", "content": full_response}) - st.session_state.history.append({"role": "assistant", "content": full_response}) + full_response = llm_call( + selected_model, + temperature, + messages=message_history, + messages_placeholders={"signals": top_signals_text, "user_input": st.session_state.user_info}, + ) + st.session_state.messages.append({"role": "assistant", "content": full_response}) + st.session_state["memory"].add_message({"role": "assistant", "content": full_response}) + st.session_state["messages_intent"].append(copy.deepcopy(intent)) + st.session_state["messages_signal"].append("none") elif intent == "following_up": - print(st.session_state.active_signal) - #Follow up the user's message + # Generate follow up message instruction = MessageTemplate.load(path_prompt_following_up) - message_history = [MessageTemplate.load(m) for m in st.session_state.history] - message_history += [instruction] + message_history = st.session_state["memory"].get_messages(max_tokens=3000) + [instruction] with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - for response in TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=message_history, - message_kwargs={ - "signal": signals_dict[st.session_state.active_signal]['full_text'], - "user_input": user_message - }, - stream=True, - ): - full_response += response.choices[0].delta.get("content", "") - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - - st.session_state.messages.append({"role": "assistant", "content": full_response}) - st.session_state.history.append({"role": "assistant", "content": full_response}) - - # # Add user message to history - # prompt = prompt2() - # st.session_state.messages.append({"role": "user", "content": prompt.to_prompt()}) - # print(user_message) - # # Generate AI response - # with st.chat_message("assistant"): - # message_placeholder = st.empty() - # full_response = "" - # for response in TextGenerator.generate( - # model=selected_model, - # temperature=temperature, - # messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages], - # message_kwargs= None, - # stream=True, - # ): - # full_response += response.choices[0].delta.get("content", "") - # message_placeholder.markdown(full_response + "▌") - # message_placeholder.markdown(full_response) - # # Add AI response to history - # st.session_state.messages.append({"role": "assistant", "content": full_response}) - - -def llm_call( - selected_model: str, temperature: float, message: MessageTemplate, messages_placeholders: dict) -> str: + full_response = llm_call( + selected_model, + temperature, + messages=message_history, + messages_placeholders={ + "signal": signals_dict[st.session_state.active_signal]["full_text"], + "user_input": user_message, + }, + ) + st.session_state.messages.append({"role": "assistant", "content": full_response}) + st.session_state["memory"].add_message({"role": "assistant", "content": full_response}) + st.session_state["messages_intent"].append(copy.deepcopy(intent)) + st.session_state["messages_signal"].append("none") + + if CHECK_COSTS: + # Keep track of the number of messages + def transform_message(m: Union[MessageTemplate, dict]) -> dict: + """Transform all messages to dictionary format (quick hack)""" + try: + return m.to_prompt() + except AttributeError: + return m + + cost_input = ( + TokenCounter._count_tokens_from_messages( + messages=[transform_message(m) for m in message_history], model_name=selected_model + ) + * 0.01 + / 1000 + ) + cost_output = ( + TokenCounter._count_tokens_from_messages( + messages=[{"role": "assistant", "content": full_response}], model_name=selected_model + ) + * 0.03 + / 1000 + ) + cost_total = cost_input + cost_output + print(f"Total cost: {cost_total}") # noqa: T001 + + write_to_s3( + key=aws_key, + secret=aws_secret, + s3_path=f"{s3_path}/session-logs-signals", + filename="message_counter", + data={ + "session": st.session_state["session_log"], + "time": current_time(), + "intent": intent, + "signal": st.session_state.active_signal, + }, + how="a", + ) + + +def llm_call(selected_model: str, temperature: float, messages: MessageTemplate, messages_placeholders: dict) -> str: """Call the LLM""" message_placeholder = st.empty() full_response = "" for response in TextGenerator.generate( model=selected_model, temperature=temperature, - messages=[message], + messages=messages, message_kwargs=messages_placeholders, stream=True, ): @@ -384,18 +487,51 @@ def llm_call( return full_response -def prompt2(): - """ - Generate a prompt for an overview of the impact of signals on the user +def write_to_s3(key: str, secret: str, s3_path: str, filename: str, data: dict, how: str = "a") -> None: + """Write data to a jsonl file in S3. + + Parameters + ---------- + key + AWS access key ID. + + secret + AWS secret access key. + + s3_path + S3 bucket path. + + filename + Name of the file to write to. + + data + Data to write to the file. + + how + How to write to the file. Default is "a" for append. Use "w" to overwrite. + """ - prompt = MessageTemplate.load(data_path + "prompt2.json") - return prompt + fs = s3fs.S3FileSystem(key=key, secret=secret) + with fs.open(f"{s3_path}/{filename}.jsonl", how) as f: + f.write(f"{json.dumps(data)}\n") + + +def current_time() -> str: + """Return the current time as a string. Used as part of the session UUID.""" + # Get current date and time + current_datetime = datetime.now() + + # Convert to a long number format + datetime_string = current_datetime.strftime("%Y%m%d%H%M%S") + + return datetime_string + def main() -> None: """Run the app.""" auth_openai() - signals_bot(sidebar=False) + signals_bot() main() diff --git a/src/genai/sandbox/signals/data/00_system.jsonl b/src/genai/sandbox/signals/data/00_system.jsonl index 50bdd53..de1bf15 100644 --- a/src/genai/sandbox/signals/data/00_system.jsonl +++ b/src/genai/sandbox/signals/data/00_system.jsonl @@ -1 +1 @@ -{"role": "user", "content": "###Instructions###\nYou are a helpful, kind, intelligent and polite futurist. You work for the UK innovation agency Nesta, and your task is to engage the user about the future signals and trends that Nesta has researched, by helping the user imagine and appreciate how the signals will impact their life. You will personalise the user experience by taking the information provided by the user and tailoring your explanation to the user background. Here are the future signals that you can talk about: {signals}. Do not discuss other future signals as this is not part of this year's Nesta's Signals edition."} +{"role": "system", "content": "###Instructions###\nYou are a helpful, kind, intelligent and polite futurist. You work for the United Kingdom's innovation agency Nesta, and your task is to engage the user about the future signals and trends that Nesta has researched, by helping the user imagine and appreciate how the signals will impact their life. You will personalise the user experience by taking the information provided by the user and tailoring your explanation to the user background. You are also expert in equity, diversity and inclusion and your answers will be inclusive, the answers will never be based on negative stereotypes, you will never offend and you will exercise sensitivity about topics such as ethnicity and gender. Here are the future signals that you can talk about: {signals}. Do not discuss other future signals as this is not part of this year's Nesta's Signals edition."} diff --git a/src/genai/sandbox/signals/data/01_intro.jsonl b/src/genai/sandbox/signals/data/01_intro.jsonl index 977da88..cb914d0 100644 --- a/src/genai/sandbox/signals/data/01_intro.jsonl +++ b/src/genai/sandbox/signals/data/01_intro.jsonl @@ -1,3 +1,3 @@ -{"role": "assistant", "content": "Hi, I’m Scout, Discovery Hub’s experimental AI assistant which helps people explore and interpret signals about the future. ✨"} -{"role": "assistant", "content": "This year we have collected signals about a variety of topics, from green energy to education, to health and even sleep."} -{"role": "assistant", "content": "Tell me one or two things about you and your interests, so that I can suggest which future signals might be the most relevant to you!"} +{"role": "assistant", "content": "Hi, I’m Scout, [Discovery Hub’s](https://www.nesta.org.uk/discovery/) experimental AI assistant which helps people explore and interpret signals about the future. ✨"} +{"role": "assistant", "content": "This year we have collected signals about a variety of topics, from 🏥 virtual hospital wards and 🧠 neuroprivacy to ⚡ space solar power and 🧪 data poisoning."} +{"role": "assistant", "content": "Tell me one or two things about you and your interests, so that I can suggest which future signals might be the most relevant to you! You can always refresh your browser to restart the conversation."} diff --git a/src/genai/sandbox/signals/data/02_signal_impact.jsonl b/src/genai/sandbox/signals/data/02_signal_impact.jsonl index f293084..fbdddeb 100644 --- a/src/genai/sandbox/signals/data/02_signal_impact.jsonl +++ b/src/genai/sandbox/signals/data/02_signal_impact.jsonl @@ -1,2 +1,2 @@ -{"role": "user", "content": "Start your answer by explaining in one clear sentence how the selected future signal might be relevant to the user, given the user information and conversation history. Then describe three ways how the selected future signal might impact them. Keep these descriptions short, two-three sentences at most. Finish your answer by encouraging the user to ask questions about this signal (note that you will try your best to answer them) or suggest to ask about the other future signals. Remember that you must be patient and never offend or be aggressive. \n\n###Future signal###{signal}\n\n###User information### Here is what the user told you about themselves: {user_input}.\n\n###Answer###" -} \ No newline at end of file +{"role": "user", "content": "Start your answer by summarising the signal in one clear, concise sentence and then follow by explaining in another clear sentence how the selected future signal might be relevant to the user, given the user information and conversation history. Then describe three ways how the selected future signal might impact them. Keep these descriptions short, two-three sentences at most. Finish your answer by encouraging the user to ask questions about this signal (note that you will try your best to answer them) or suggest to ask about the other future signals. Remember that you must be patient and never offend or be aggressive. \n\n###Future signal###{signal}\n\n###User information### Here is what the user told you about themselves: {user_input}.\n\n###Answer###" +} diff --git a/src/genai/sandbox/signals/data/03_signal_choice.jsonl b/src/genai/sandbox/signals/data/03_signal_choice.jsonl index 94999bd..b7e979a 100644 --- a/src/genai/sandbox/signals/data/03_signal_choice.jsonl +++ b/src/genai/sandbox/signals/data/03_signal_choice.jsonl @@ -1,2 +1,2 @@ {"role": "user", "content": "Start your answer by explaining each of the signals in one clear sentence (use similar language to the signals descriptions). If possible, indicate how a signal might be relevant to the user, given the user information and conversation history. Finish your answer by asking the user to choose one of the signals to hear more about it. Remember that you must be patient and never offend or be aggressive. \n\n###Future signals###{signals}\n\n###User information### Here is what the user told you about themselves: {user_input}.\n\n###Answer###" -} \ No newline at end of file +} diff --git a/src/genai/sandbox/signals/data/04_follow_up.jsonl b/src/genai/sandbox/signals/data/04_follow_up.jsonl index 979a49e..2abf91e 100644 --- a/src/genai/sandbox/signals/data/04_follow_up.jsonl +++ b/src/genai/sandbox/signals/data/04_follow_up.jsonl @@ -1,2 +1,2 @@ {"role": "user", "content": "Answer to the user's most recent message. Be as concise or detailed as necessary. Use the information from the future signal description when relevant. Keep your answers conversational and three to four sentences long at most. \n\n###Future signal###{signal}\n\n###User information### Here is what the user told you: {user_input}.\n\n###Answer###" -} \ No newline at end of file +} diff --git a/src/genai/sandbox/signals/data/func_intent.json b/src/genai/sandbox/signals/data/func_intent.json index 446d281..f9def2a 100644 --- a/src/genai/sandbox/signals/data/func_intent.json +++ b/src/genai/sandbox/signals/data/func_intent.json @@ -12,4 +12,4 @@ }, "required": ["prediction"] } - } \ No newline at end of file + } diff --git a/src/genai/sandbox/signals/data/func_top_signal.json b/src/genai/sandbox/signals/data/func_top_signal.json index 940184e..e01dc3c 100644 --- a/src/genai/sandbox/signals/data/func_top_signal.json +++ b/src/genai/sandbox/signals/data/func_top_signal.json @@ -12,4 +12,4 @@ }, "required": ["prediction"] } - } \ No newline at end of file + } diff --git a/src/genai/sandbox/signals/data/func_top_three_signals.json b/src/genai/sandbox/signals/data/func_top_three_signals.json index 2c6333c..43fc892 100644 --- a/src/genai/sandbox/signals/data/func_top_three_signals.json +++ b/src/genai/sandbox/signals/data/func_top_three_signals.json @@ -15,4 +15,4 @@ }, "required": ["prediction"] } - } \ No newline at end of file + } diff --git a/src/genai/sandbox/signals/data/intent_actions.json b/src/genai/sandbox/signals/data/intent_actions.json index 4f7e34e..9f6fc8b 100644 --- a/src/genai/sandbox/signals/data/intent_actions.json +++ b/src/genai/sandbox/signals/data/intent_actions.json @@ -1,7 +1,7 @@ [ { "name": "new_signal", - "description": "User wishes to change the topic and talk about an new future signal. Alternatively, the user has been just presented with a set of future signal options by the assistant, and the user has now chosen which signal to talk about more." + "description": "User wishes to change the topic and talk about an different future signal. Alternatively, the user has been just presented with a set of future signal options by the assistant, and the user has now chosen which signal to talk about more." }, { "name": "more_signals", @@ -11,4 +11,4 @@ "name": "following_up", "description": "User is following up with another question about the signal that's being discussed just now." } -] \ No newline at end of file +] diff --git a/src/genai/sandbox/signals/data/signals_2023.json b/src/genai/sandbox/signals/data/signals_2023.json index 80593e0..5d176bc 100644 --- a/src/genai/sandbox/signals/data/signals_2023.json +++ b/src/genai/sandbox/signals/data/signals_2023.json @@ -33,5 +33,5 @@ "summary": "The article discusses the growing trend of community-led initiatives in the UK to reduce energy bills and decarbonize homes. It highlights the case of Stithians, a village in Cornwall, where a project is replacing traditional heating systems with efficient ground source heat pumps, benefiting around 250 homes. The article also mentions other models of community energy efforts, such as peer-to-peer trading, group purchasing schemes, and community energy generation, and emphasizes the potential of these initiatives in addressing energy challenges, despite some difficulties in securing investment and government support.", "full_text": "Everybody needs green neighbours Communities are increasingly coming together to reduce energy bills and decarbonise homes. In September 2022, a small village in Cornwall called Stithians echoed to the sounds of loud drilling. But, unusually, the noise could well have been a welcome sound to those living nearby. It heralded the creation of boreholes designed to replace existing heating systems with efficient ground source heat pumps. Around 250 homes, many of which rely on expensive oil heating or electric storage heaters, are set to benefit from the scheme run by Kensa Utilities and funded by the European Regional Development Fund. The need for cheaper, cleaner energy is more urgent than ever. Of the 630,000 excess winter deaths in England and Wales in 2020-21, cold homes are thought to have contributed to around 13,500 and fuel poverty around 6,300. With living costs and energy bills at a record high, the number is likely to be much higher this winter. Most householders are likely to respond by turning down their thermostat and boiler flow temperature, improving insulation or switching off energy-guzzling appliances. But, as in Stithians, there are a growing number of local initiatives where neighbours are pooling resources, skills or knowledge with their wider community. In many cases, these initiatives are building on ideas that have been tested elsewhere. There are more than 300 community energy organisations in the UK that provide community-led renewable energy or run local energy projects, and collectively they saved consumers £3.3 million off their energy bills in 2021. It is an approach growing in popularity as households search for cheaper, more resilient and sustainable options for heat and power this winter. Last year saw a 38% increase in the delivery of community-led energy efficiency and energy-saving services. This wave of community energy and heating initiatives spans a diverse spectrum. It includes peer-to-peer trading models (where energy or heat is traded between individual households or communities), group purchasing schemes (that reduce the cost to the individual through leveraging economies of scale) and community energy generation (where a community owns or controls the means to produce renewable energy locally). These approaches vary in size and ambition, from providing heat networks for a whole community (as in Stithians) to disrupter companies building energy production on a national scale, but they are all based on the principle of communities sharing resources and knowledge. What do these models look like in action? Nonprofits such as Energy Local are enabling peer-to-peer trading where households with energy generation capabilities can club together and exchange local, clean energy for an agreed price. Solar Together is an example of group purchasing in London, responsible for installing solar panels in more than 2,100 homes in London and securing bulk-buying discounts for residents. Other communities have taken energy generation into their own hands. Bristol Energy Cooperative generates solar energy for its local community and, thanks to solar roofs, has facilitated more than £350,000 of community benefit payments since being established in 2011. Meanwhile, Ripple Energy runs a large-scale scheme where people anywhere in the country can buy shares in cooperatively-owned wind farms in order to reduce their energy bills. It’s been so successful that it’s now oversubscribed and seeking to increase the number of wind farms it owns. Forging closer community links has some other, less obvious, positive outcomes. Social feedback can have a powerful effect on people’s choices about energy consumption. There is evidence that giving households information about how much energy they spend compared to their neighbours can lead to small but enduring energy savings. Neighbours are also well-positioned to help those who are harder to reach by supporting each other to increase the energy efficiency of their homes and providing advice and referrals for vulnerable residents at risk of fuel poverty. Despite the benefits local energy and heating initiatives bring to residents, it has generally been difficult for some of these initiatives (such as community energy projects) to make a case for investment, particularly given the lack of tax relief. There is also minimal government support for community and home energy generation, illustrated by the removal of energy generation incentives such as the feed-in tariff. However, some of these emerging projects have the potential to overcome the challenges around scale and sustainability that have prevented community energy projects from scaling to date. For example, the district heating project in Stithians acts as a proof of concept when it comes to bringing together the private sector and community actors. The scale of the energy and climate challenges we face demands a multi-pronged approach. We anticipate that in coming years this will give rise to a variety of new business models facilitating collective action on heating and energy. These could serve as important tools in the effort to decarbonise homes at scale and reduce energy bills.", "url": "https://www.nesta.org.uk/feature/future-signals-2023/everybody-needs-green-neighbours/" - } -] \ No newline at end of file + } +] diff --git a/src/genai/sandbox/signals/signals_app.py b/src/genai/sandbox/signals/signals_app.py deleted file mode 100644 index 832bf64..0000000 --- a/src/genai/sandbox/signals/signals_app.py +++ /dev/null @@ -1,398 +0,0 @@ -import streamlit as st - -from genai import MessageTemplate, FunctionTemplate -from genai.eyfs import TextGenerator -from genai.streamlit_pages.utils import reset_state -import json -import os -import openai -from dotenv import load_dotenv -load_dotenv() - -selected_model = "gpt-4-1106-preview" -temperature = 0.6 - -# Paths to prompts -PROMPT_PATH = "src/genai/sandbox/signals/data/" -PATH_SIGNALS_DATA = PROMPT_PATH + "signals_2023.json" -PATH_SYSTEM = PROMPT_PATH + "00_system.jsonl" -PATH_INTRO = PROMPT_PATH + "01_intro.jsonl" -PATH_ACTIONS = PROMPT_PATH + "intent_actions.json" - -# Top signal function -path_func_top_signal = PROMPT_PATH + "func_top_signal.json" -path_prompt_top_signal = PROMPT_PATH + "prompt_top_signal.jsonl" -# Top three signals function -path_func_top_three_signals = PROMPT_PATH + "func_top_three_signals.json" -path_prompt_top_three_signals = PROMPT_PATH + "prompt_top_three_signals.jsonl" -# Intent detection function -path_func_intent = PROMPT_PATH + "func_intent.json" -path_prompt_intent = PROMPT_PATH + "prompt_intent.jsonl" -# Prompt: Impact on the user -path_prompt_impact = PROMPT_PATH + "02_signal_impact.jsonl" -# Prompt: Summary of different signals -path_prompt_choice = PROMPT_PATH + "03_signal_choice.jsonl" -# Prompt: Following up on user's question -path_prompt_following_up = PROMPT_PATH + "04_follow_up.jsonl" - -def auth_openai() -> None: - """Authenticate with OpenAI.""" - try: - openai.api_key = os.environ["OPENAI_API_KEY"] - except Exception: - openai.api_key = st.secrets["OPENAI_API_KEY"] - - -def read_jsonl(path: str) -> list: - """Read a JSONL file.""" - with open(path, "r") as f: - return [json.loads(line) for line in f.readlines()] - - -def generate_signals_texts(signals_data: dict, chosen_signals: list = None): - signals = [signal["short_name"] for signal in signals_data] - signals_titles = [signal["title"] for signal in signals_data] - signals_summaries = [signal["summary"] for signal in signals_data] - - if chosen_signals is None: - chosen_signals = signals - - # Combine titles and summaries into a single string - signals_description = "" - for short_name, title, summary in zip(signals, signals_titles, signals_summaries): - if short_name in chosen_signals: - signals_description += f"Signal '{short_name}': {title}\n{summary}\n\n" - - return signals_description - - -def generate_action_texts(action_data: dict): - actions = [a["name"] for a in action_data] - action_descriptions = [a["description"] for a in action_data] - action_text = "" - for name, description in zip(actions, action_descriptions): - action_text += f"Action '{name}': {description}\n\n" - return action_text - -# Prepare the data -signals_data = json.load(open(PATH_SIGNALS_DATA, "r")) -signals_dict = {s['short_name']: s for s in signals_data} -signals_descriptions = generate_signals_texts(signals_data) -signals = [s['short_name'] for s in signals_data] - -actions_data = json.load(open(PATH_ACTIONS, "r")) -actions_descriptions = generate_action_texts(actions_data) -actions = [a['name'] for a in actions_data] - - -def predict_intent(user_message: str, messages: list) -> str: - """Detect the intent of the user's message. - - Args: - user_message (str): The user's message. - messages (list): The history of messages. - - Returns: - str: The intent of the user's message. Possible outputs are: - - "explain": The user wants to know more about a signal. - - "more_signals": The user wants to know more about a signal. - - "follow_up": The user wants to know more about a signal. - - "next_steps": The user wants to know more about a signal. - - "none": The user's message does not match any intent. - """ - func_intent = json.loads(open(path_func_intent).read()) - message_history = [MessageTemplate.load(m) for m in st.session_state.messages] - message = MessageTemplate.load(path_prompt_intent) - all_messages = message_history + [message] - function = FunctionTemplate.load(func_intent) - response = TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=all_messages, - message_kwargs={"intents": actions_descriptions, "user_input": user_message}, - stream=False, - functions=[function.to_prompt()], - function_call={"name": "predict_intent"}, - ) - intent = json.loads(response['choices'][0]['message']['function_call']['arguments']) - return intent['prediction'] - - -def predict_top_signal(user_message: str, signals: list) -> str: - """Predict the top signal from the user's message. - - Args: - user_message (str): The user's message. - - Returns: - str: The top signal from the user's message. - """ - # Function call - func_top_signal = json.loads(open(path_func_top_signal).read()) - func_top_signal['parameters']['properties']['prediction']['enum'] = signals - - message = MessageTemplate.load(path_prompt_top_signal) - function = FunctionTemplate.load(func_top_signal) - - response = TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=[message], - message_kwargs={"signals": signals_descriptions, "user_input": user_message}, - stream=False, - functions=[function.to_prompt()], - function_call={"name": "predict_top_signal"}, - ) - top_signal = json.loads(response['choices'][0]['message']['function_call']['arguments']) - return top_signal['prediction'] - - -def predict_top_three_signals(user_message: str, signals: list) -> list: - """Predict the top signal from the user's message. - - Args: - user_message (str): The user's message. - - Returns: - str: The top signal from the user's message. - """ - # Function call - func_top_signals = json.loads(open(path_func_top_three_signals).read()) - func_top_signals['parameters']['properties']['prediction']['items']['enum'] = signals - print(func_top_signals) - message = MessageTemplate.load(path_prompt_top_three_signals) - function_top_three = FunctionTemplate.load(func_top_signals) - - response = TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=[message], - message_kwargs={"signals": signals_descriptions, "user_input": user_message}, - stream=False, - functions=[function_top_three.to_prompt()], - function_call={"name": "predict_top_signals"}, - ) - top_signals = json.loads(response['choices'][0]['message']['function_call']['arguments']) - print(message) - print(f"Prediction: {top_signals}") - print(response) - return top_signals['prediction'] - -def signals_bot(sidebar: bool = True) -> None: - """Explain me a concept like I'm 3.""" - - # Define your custom CSS - # custom_css = """ - # - # """ - - # # Apply the custom CSS - # st.markdown(custom_css, unsafe_allow_html=True) - - st.title("Signals chatbot") - st.write("Let's discuss the future!") - - # First time running the app - if "messages" not in st.session_state: - # Record of messages to display on the app - st.session_state.messages = [] - # Record of messages to send to the LLM - st.session_state.history = [] - # Keep track of which state we're in - st.session_state.state = "start" - # Fetch system and introduction messages - st.session_state.signals = [] - - # Add system message to the history - system_message = read_jsonl(PATH_SYSTEM)[0] - system_message = MessageTemplate.load(system_message) - system_message.format_message(**{"signals": signals_descriptions}) - st.session_state.history.append(system_message.to_prompt()) - print(system_message.to_prompt()) - # Add the intro messages - intro_messages = read_jsonl(PATH_INTRO) - print(intro_messages) - for m in intro_messages: - st.session_state.messages.append(m) - st.session_state.history.append(m) - - # Display chat messages on app rerun - for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - # Get user message - user_message = st.chat_input("") - if user_message: - # Display user message - with st.chat_message("user"): - st.markdown(user_message) - st.session_state.messages.append({"role": "user", "content": user_message}) - st.session_state.history.append({"role": "user", "content": user_message}) - - if st.session_state.state == "start": - intent = "new_signal" - st.session_state.user_info = user_message - st.session_state.state = "chatting" - else: - intent = predict_intent(user_message, st.session_state.history) - print(intent) - # intent = "following_up" - - if intent == "new_signal": - # Predict the signal to explain - allowed_signals = [s for s in signals if s not in st.session_state.signals] - signal_to_explain = predict_top_signal(user_message, allowed_signals) - st.session_state.signals.append(signal_to_explain) - st.session_state.active_signal = signal_to_explain - print(signal_to_explain) - print(f"I have these signals in memory: {st.session_state.signals}") - # Explain the signal - instruction = MessageTemplate.load(path_prompt_impact) - message_history = [MessageTemplate.load(m) for m in st.session_state.history] - message_history += [instruction] - with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - for response in TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=message_history, - message_kwargs={ - "signal": signals_dict[signal_to_explain]['full_text'], - "user_input": st.session_state.user_info - }, - stream=True, - ): - full_response += response.choices[0].delta.get("content", "") - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - st.session_state.messages.append({"role": "assistant", "content": full_response}) - st.session_state.history.append({"role": "assistant", "content": full_response}) - - elif intent == "more_signals": - # Select the top 5 most relevant signals for the user - # (remove the seen signals) - # Provide an overview of the impacts of signal on the reader - # Ask which one the bot should elaborate on - allowed_signals = [s for s in signals if s not in st.session_state.signals] - top_signals = predict_top_three_signals(st.session_state.user_info, allowed_signals) - print(allowed_signals) - print(top_signals) - print(top_signals[0:3]) - # Explain the signal - instruction = MessageTemplate.load(path_prompt_choice) - top_signals_text = generate_signals_texts(signals_data, top_signals) - message_history = [MessageTemplate.load(m) for m in st.session_state.history] - message_history += [instruction] - with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - for response in TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=message_history, - message_kwargs={ - "signals": top_signals_text, - "user_input": st.session_state.user_info - }, - stream=True, - ): - full_response += response.choices[0].delta.get("content", "") - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - st.session_state.messages.append({"role": "assistant", "content": full_response}) - st.session_state.history.append({"role": "assistant", "content": full_response}) - - elif intent == "following_up": - #Follow up the user's message - instruction = MessageTemplate.load(path_prompt_following_up) - message_history = [MessageTemplate.load(m) for m in st.session_state.history] - message_history += [instruction] - with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - for response in TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=message_history, - message_kwargs={ - "signal": signals_dict[st.session_state.active_signal]['full_text'], - "user_input": user_message - }, - stream=True, - ): - full_response += response.choices[0].delta.get("content", "") - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - - st.session_state.messages.append({"role": "assistant", "content": full_response}) - st.session_state.history.append({"role": "assistant", "content": full_response}) - - # # Add user message to history - # prompt = prompt2() - # st.session_state.messages.append({"role": "user", "content": prompt.to_prompt()}) - # print(user_message) - # # Generate AI response - # with st.chat_message("assistant"): - # message_placeholder = st.empty() - # full_response = "" - # for response in TextGenerator.generate( - # model=selected_model, - # temperature=temperature, - # messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages], - # message_kwargs= None, - # stream=True, - # ): - # full_response += response.choices[0].delta.get("content", "") - # message_placeholder.markdown(full_response + "▌") - # message_placeholder.markdown(full_response) - # # Add AI response to history - # st.session_state.messages.append({"role": "assistant", "content": full_response}) - - -def llm_call( - selected_model: str, temperature: float, message: MessageTemplate, messages_placeholders: dict) -> str: - """Call the LLM""" - message_placeholder = st.empty() - full_response = "" - for response in TextGenerator.generate( - model=selected_model, - temperature=temperature, - messages=[message], - message_kwargs=messages_placeholders, - stream=True, - ): - full_response += response.choices[0].delta.get("content", "") - message_placeholder.markdown(full_response + "▌") - - message_placeholder.markdown(full_response) - - return full_response - - -def prompt2(): - """ - Generate a prompt for an overview of the impact of signals on the user - """ - prompt = MessageTemplate.load(data_path + "prompt2.json") - return prompt - -def main() -> None: - """Run the app.""" - auth_openai() - - signals_bot(sidebar=False) - - -main() diff --git a/src/genai/sandbox/signals/signals_test.ipynb b/src/genai/sandbox/signals/signals_test.ipynb index 6c64180..404fac6 100644 --- a/src/genai/sandbox/signals/signals_test.ipynb +++ b/src/genai/sandbox/signals/signals_test.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -27,9 +27,18 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/karlis.kanders/Documents/code/discovery_generative_ai/.venv/lib/python3.9/site-packages/pinecone/index.py:4: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from tqdm.autonotebook import tqdm\n" + ] + } + ], "source": [ "from genai.eyfs import (\n", " TextGenerator,\n", @@ -39,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -75,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 170, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -88,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -258,17 +267,17 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ - "path_func_top_signals = \"data/func_top_signals.json\"\n", - "path_prompt_top_signals = \"data/prompt_top_signals.jsonl\"" + "path_func_top_signals = \"data/func_top_three_signals.json\"\n", + "path_prompt_top_signals = \"data/prompt_top_three_signals.jsonl\"" ] }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -278,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -288,7 +297,7 @@ " 'content': '###Instructions### Predict which three of the following future signals are the most relevant to user input. You have to choose three of these signals. \\n\\n###Future signal summaries###\\n{signals}\\n\\n###User input:\\n{user_input}'}]" ] }, - "execution_count": 105, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -300,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -310,16 +319,16 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ - "user_input = \"I like burgers\"" + "user_input = \"I am a parent\"" ] }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -336,20 +345,18 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'prediction': ['robochefs',\n", - " 'abundant_energy',\n", - " 'baby_boom',\n", - " 'hidden_figures',\n", - " 'green_neighbours']}" + "{'prediction': ['Pronatalism vs pro-family',\n", + " 'Hidden Figures',\n", + " 'Green neighbours']}" ] }, - "execution_count": 118, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -359,6 +366,26 @@ "answer" ] }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Pronatalism vs pro-family', 'Hidden Figures', 'Green neighbours']" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answer['prediction'][0:3]" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -577,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": 211, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [