-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor llm iii #365
Refactor llm iii #365
Changes from all commits
9db90d4
33c5cf5
ba65923
4d05375
3cc37bf
b7f02a7
9d0903e
3bd1e86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
from alphastats.gui.utils.gpt_helper import ( | ||
get_assistant_functions, | ||
get_general_assistant_functions, | ||
get_protein_id_for_gene_name, | ||
get_subgroups_for_each_group, | ||
perform_dimensionality_reduction, | ||
) | ||
|
@@ -108,7 +109,7 @@ def _get_tools(self) -> List[Dict[str, Any]]: | |
if self.metadata is not None and self._gene_to_prot_id_map is not None: | ||
tools += ( | ||
*get_assistant_functions( | ||
gene_to_prot_id_dict=self._gene_to_prot_id_map, | ||
gene_to_prot_id_map=self._gene_to_prot_id_map, | ||
metadata=self.metadata, | ||
subgroups_for_each_group=get_subgroups_for_each_group( | ||
self.metadata | ||
|
@@ -133,6 +134,7 @@ def truncate_conversation_history(self, max_tokens: int = 100000): | |
""" | ||
total_tokens = sum(len(m["content"].split()) for m in self.messages) | ||
while total_tokens > max_tokens and len(self.messages) > 1: | ||
# TODO messages should still be displayed! | ||
removed_message = self.messages.pop(0) | ||
total_tokens -= len(removed_message["content"].split()) | ||
|
||
|
@@ -179,24 +181,39 @@ def execute_function( | |
If the function is not implemented or the dataset is not available | ||
""" | ||
try: | ||
if function_name == "get_gene_function": | ||
# TODO log whats going on | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happened to this TODO? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there's now the option to show the whole conversation |
||
return get_gene_function(**function_args) | ||
elif function_name == "get_enrichment_data": | ||
return get_enrichment_data(**function_args) | ||
elif function_name == "perform_dimensionality_reduction": | ||
return perform_dimensionality_reduction(**function_args) | ||
elif function_name.startswith("plot_") or function_name.startswith( | ||
"perform_" | ||
): | ||
# first try to find the function in the non-Dataset functions | ||
if ( | ||
function := { | ||
"get_gene_function": get_gene_function, | ||
"get_enrichment_data": get_enrichment_data, | ||
"perform_dimensionality_reduction": perform_dimensionality_reduction, | ||
}.get(function_name) | ||
) is not None: | ||
return function(**function_args) | ||
|
||
# special treatment for this one | ||
elif function_name == "plot_intensity": | ||
gene_name = function_args.pop("gene_name") | ||
protein_id = get_protein_id_for_gene_name( | ||
gene_name, self._gene_to_prot_id_map | ||
) | ||
function_args["protein_id"] = protein_id | ||
|
||
return self.dataset.plot_intensity(**function_args) | ||
|
||
# fallback: try to find the function in the Dataset functions | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This whole logic is does not work as well anymore, now that we refactored most methods to no longer be directly available from dataset. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note sure what you mean? all method calls should still work |
||
else: | ||
plot_function = getattr( | ||
self.dataset, function_name.split(".")[-1], None | ||
self.dataset, | ||
function_name.split(".")[-1], | ||
None, # TODO why split? | ||
) | ||
if plot_function: | ||
return plot_function(**function_args) | ||
raise ValueError( | ||
f"Function {function_name} not implemented or dataset not available" | ||
) | ||
|
||
except Exception as e: | ||
return f"Error executing {function_name}: {str(e)}" | ||
|
||
|
@@ -219,6 +236,7 @@ def handle_function_calls( | |
|
||
""" | ||
new_artifacts = {} | ||
|
||
funcs_and_args = "\n".join( | ||
[ | ||
f"Calling function: {tool_call.function.name} with arguments: {tool_call.function.arguments}" | ||
|
@@ -231,7 +249,6 @@ def handle_function_calls( | |
|
||
for tool_call in tool_calls: | ||
function_name = tool_call.function.name | ||
print(f"Calling function: {function_name}") | ||
function_args = json.loads(tool_call.function.arguments) | ||
|
||
function_result = self.execute_function(function_name, function_args) | ||
|
@@ -248,8 +265,10 @@ def handle_function_calls( | |
"tool_call_id": tool_call.id, | ||
} | ||
) | ||
|
||
post_artefact_message_idx = len(self.messages) | ||
self.artifacts[post_artefact_message_idx] = new_artifacts.values() | ||
|
||
logger.info( | ||
f"Calling 'chat.completions.create' {self.messages=} {self.tools=} .." | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have you actually tested this? I'm not sure whether you also changed the corresponding function to use gene names instead of protein ids, or the function was wrong before. It should be clear in both instances if it's actually using protein ids or gene names. Generally we should always use protein ids internally, as 1. gene names can change over time, 2. not all uploaded datasets contain gene names.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/MannLabs/alphapeptstats/pull/355/files#r1834408990
Found this in the next pr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, there's some back-and-forth with this mapping ;-)