Skip to content

Commit

Permalink
Move truncate code context func for reusability across modules
Browse files Browse the repository at this point in the history
It needs to be used across routers and processors. It being in
run_code tool makes it hard to be used in other chat provider contexts
due to circular dependency issues created by
send_message_to_model_wrapper func
  • Loading branch information
debanjum committed Nov 21, 2024
1 parent f434c3f commit 5475a26
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 27 deletions.
26 changes: 1 addition & 25 deletions src/khoj/processor/tools/run_code.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import base64
import copy
import datetime
import json
import logging
Expand All @@ -20,7 +19,7 @@
construct_chat_history,
)
from khoj.routers.helpers import send_message_to_model_wrapper
from khoj.utils.helpers import is_none_or_empty, timer
from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
from khoj.utils.rawconfig import LocationData

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -180,26 +179,3 @@ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_ur
"std_err": f"Failed to execute code with {response.status}",
"output_files": [],
}


def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000) -> dict[str, Any]:
"""
Truncate large output files and drop image file data from code results.
"""
# Create a deep copy of the code results to avoid modifying the original data
code_results = copy.deepcopy(original_code_results)
for code_result in code_results.values():
for idx, output_file in enumerate(code_result["results"]["output_files"]):
# Drop image files from code results
if Path(output_file["filename"]).suffix in {".png", ".jpg", ".jpeg", ".webp"}:
code_result["results"]["output_files"][idx] = {
"filename": output_file["filename"],
"b64_data": "[placeholder for generated image data for brevity]",
}
# Truncate large output files
elif len(output_file["b64_data"]) > max_chars:
code_result["results"]["output_files"][idx] = {
"filename": output_file["filename"],
"b64_data": output_file["b64_data"][:max_chars] + "...",
}
return code_results
3 changes: 2 additions & 1 deletion src/khoj/routers/research.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
construct_tool_chat_history,
)
from khoj.processor.tools.online_search import read_webpages, search_online
from khoj.processor.tools.run_code import run_code, truncate_code_context
from khoj.processor.tools.run_code import run_code
from khoj.routers.api import extract_references_and_questions
from khoj.routers.helpers import (
ChatEvent,
Expand All @@ -28,6 +28,7 @@
function_calling_description_for_llm,
is_none_or_empty,
timer,
truncate_code_context,
)
from khoj.utils.rawconfig import LocationData

Expand Down
26 changes: 25 additions & 1 deletion src/khoj/utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations # to avoid quoting type hints

import copy
import datetime
import io
import ipaddress
Expand All @@ -18,7 +19,7 @@
from os import path
from pathlib import Path
from time import perf_counter
from typing import TYPE_CHECKING, Optional, Union
from typing import TYPE_CHECKING, Any, Optional, Union
from urllib.parse import urlparse

import psutil
Expand Down Expand Up @@ -527,6 +528,29 @@ def convert_image_to_webp(image_bytes):
return webp_image_bytes


def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000) -> dict[str, Any]:
"""
Truncate large output files and drop image file data from code results.
"""
# Create a deep copy of the code results to avoid modifying the original data
code_results = copy.deepcopy(original_code_results)
for code_result in code_results.values():
for idx, output_file in enumerate(code_result["results"]["output_files"]):
# Drop image files from code results
if Path(output_file["filename"]).suffix in {".png", ".jpg", ".jpeg", ".webp"}:
code_result["results"]["output_files"][idx] = {
"filename": output_file["filename"],
"b64_data": "[placeholder for generated image data for brevity]",
}
# Truncate large output files
elif len(output_file["b64_data"]) > max_chars:
code_result["results"]["output_files"][idx] = {
"filename": output_file["filename"],
"b64_data": output_file["b64_data"][:max_chars] + "...",
}
return code_results


@lru_cache
def tz_to_cc_map() -> dict[str, str]:
"""Create a mapping of timezone to country code"""
Expand Down

0 comments on commit 5475a26

Please sign in to comment.