Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Image handling is currently broken #156

Merged
merged 8 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 48 additions & 31 deletions GPT/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def gpt_blend(source_text: str, destination_text: str) -> None:
"""

result = gpt_query(format_message(prompt), format_message(source_text))
actions.user.gpt_insert_response(extract_message(result), "paste")
actions.user.gpt_insert_response(result, "paste")

def gpt_blend_list(source_text: list[str], destination_text: str) -> None:
"""Blend all the source text as a list and send it to the destination"""
Expand Down Expand Up @@ -137,10 +137,10 @@ def gpt_apply_prompt(prompt: str, source: str = "", destination: str = ""):
"""Apply an arbitrary prompt to arbitrary text"""

text_to_process: GPTMessageItem = actions.user.gpt_get_source_text(source)

# If after creating the message there wasn't anything in the text_to_process, set it to None so
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check was blocking images from being processed

# we don't send meaningless data to the model
if text_to_process.get("text", "") == "":
if (
text_to_process.get("text", "") == ""
and text_to_process.get("image_url", "") == ""
):
text_to_process = None # type: ignore

# Handle special cases in the prompt
Expand All @@ -150,15 +150,15 @@ def gpt_apply_prompt(prompt: str, source: str = "", destination: str = ""):
prompt = "Generate text that satisfies the question or request given in the input."

response = gpt_query(format_message(prompt), text_to_process, destination)
extracted_text = extract_message(response)

actions.user.gpt_insert_response(extracted_text, destination)
actions.user.gpt_insert_response(response, destination)
return response

def gpt_pass(source: str = "", destination: str = "") -> None:
"""Passes a response from source to destination"""
source_text = extract_message(actions.user.gpt_get_source_text(source))
actions.user.gpt_insert_response(source_text, destination)
actions.user.gpt_insert_response(
actions.user.gpt_get_source_text(source), destination
)

def gpt_help() -> None:
"""Open the GPT help file in the web browser"""
Expand Down Expand Up @@ -192,7 +192,7 @@ def gpt_reformat_last(how_to_reformat: str) -> str:
raise Exception("No text to reformat")

def gpt_insert_response(
result: str,
gpt_message: GPTMessageItem,
method: str = "",
cursorless_destination: Any = None,
) -> None:
Expand All @@ -208,76 +208,93 @@ def gpt_insert_response(
GPTState.thread_enabled
and confirmation_gui.showing
and not method == "window"
# If they ask for thread or newThread specifically,
# it should be pushed to the thread and not just refreshed
and not method == "thread"
and not method == "newThread"
):
# Skip inserting the response if the user is just viewing the thread in the window
actions.user.confirmation_gui_refresh_thread()
return

match method:
case "thread" | "newThread" as t:
if t == "newThread":
GPTState.new_thread()
GPTState.push_thread(format_messages("user", [gpt_message]))
actions.user.confirmation_gui_refresh_thread()
return

if gpt_message.get("type") != "text":
actions.app.notify(
f"Tried to insert an image to {method}, but that is not currently supported. To insert an image to this destination use a prompt to convert it to text."
)
return

message_text_no_images = extract_message(gpt_message)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extract message currently returns text so if there's an image here it will return an empty string

match method:
case "above":
actions.key("left")
actions.edit.line_insert_up()
GPTState.last_was_pasted = True
actions.user.paste(result)
actions.user.paste(message_text_no_images)
case "below":
actions.key("right")
actions.edit.line_insert_down()
GPTState.last_was_pasted = True
actions.user.paste(result)
actions.user.paste(message_text_no_images)
case "clipboard":
clip.set_text(result)
clip.set_text(message_text_no_images)
case "snip":
actions.user.insert_snippet(result)
actions.user.insert_snippet(message_text_no_images)
case "context":
GPTState.push_context(format_message(result))
GPTState.push_context(gpt_message)
case "newContext":
GPTState.clear_context()
GPTState.push_context(format_message(result))
case "thread":
GPTState.push_thread(format_messages("user", [format_message(result)]))
case "newThread":
GPTState.new_thread()
GPTState.push_thread(format_messages("user", [format_message(result)]))
GPTState.push_context(gpt_message)
case "appendClipboard":
if clip.text() is not None:
clip.set_text(clip.text() + "\n" + result) # type: ignore Unclear why this is throwing a type error in pylance
clip.set_text(clip.text() + "\n" + message_text_no_images) # type: ignore Unclear why this is throwing a type error in pylance
else:
clip.set_text(result)
clip.set_text(message_text_no_images)
case "browser":
builder = Builder()
builder.h1("Talon GPT Result")
for line in result.split("\n"):
for line in message_text_no_images.split("\n"):
builder.p(line)
builder.render()
case "textToSpeech":
try:
actions.user.tts(result)
actions.user.tts(message_text_no_images)
except KeyError:
notify("GPT Failure: text to speech is not installed")

# Although we can insert to a cursorless destination, the cursorless_target capture
# Greatly increases DFA compliation times and should be avoided if possible
case "cursorless":
actions.user.cursorless_insert(cursorless_destination, result)
actions.user.cursorless_insert(
cursorless_destination, message_text_no_images
)
# Don't add to the window twice if the thread is enabled
case "window":
# If there was prior text in the confirmation GUI and the user
# explicitly passed new text to the gui, clear the old result
GPTState.text_to_confirm = result
actions.user.confirmation_gui_append(result)
GPTState.text_to_confirm = message_text_no_images
actions.user.confirmation_gui_append(message_text_no_images)
case "chain":
GPTState.last_was_pasted = True
actions.user.paste(result)
actions.user.paste(message_text_no_images)
actions.user.gpt_select_last()

case "paste":
GPTState.last_was_pasted = True
actions.user.paste(result)
actions.user.paste(message_text_no_images)
# If the user doesn't specify a method assume they want to paste.
# However if they didn't specify a method when the confirmation gui
# is showing, assume they don't want anything to be inserted
case _ if not confirmation_gui.showing:
GPTState.last_was_pasted = True
actions.user.paste(result)
actions.user.paste(message_text_no_images)
# Don't do anything if none of the previous conditions were valid
case _:
pass
Expand Down
5 changes: 5 additions & 0 deletions lib/modelState.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ def disable_thread(cls):
@classmethod
def push_context(cls, context: GPTMessageItem):
"""Add the selected text to the stored context"""
if context.get("type") != "text":
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ensures that images don't get inserted into the system messages since that would cause an error

actions.app.notify(
"Only text can be added to context. To add images, try using a prompt to summarize or otherwise describe the image to the context."
)
return
cls.context += [context]
actions.app.notify("Appended user context")

Expand Down