Skip to content

Commit

Permalink
Resolve issues with GPT4All and fix prompt for yesterday extract ques…
Browse files Browse the repository at this point in the history
…tions date filter (#483)

- GPT4All integration had ceased working with 0.1.7 specification. Update to use 1.0.12. At a later date, we should also use first party support for llama v2 via gpt4all
- Update the system prompt for the extract_questions flow to add start and end date to the yesterday date filter example.
- Update all setup data in conftest.py to use new client-server indexing pattern
  • Loading branch information
sabaimran authored Sep 18, 2023
1 parent 8141be9 commit 2dd15e9
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 34 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ dependencies = [
"bs4 >= 0.0.1",
"anyio == 3.7.1",
"pymupdf >= 1.23.3",
"gpt4all == 0.1.9; platform_system == 'Linux' and platform_machine == 'x86_64'",
"gpt4all == 0.1.9; platform_system == 'Windows' or platform_system == 'Darwin'",
"gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'",
"gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'",
]
dynamic = ["version"]

Expand Down
7 changes: 4 additions & 3 deletions src/khoj/processor/conversation/gpt4all/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ def download_model(model_name: str):
raise e

url = model_metadata.model_name_to_url.get(model_name)
model_path = os.path.expanduser(f"~/.cache/gpt4all/")
if not url:
logger.debug(f"Model {model_name} not found in model metadata. Skipping download.")
return GPT4All(model_name)
return GPT4All(model_name=model_name, model_path=model_path)

filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}")
if os.path.exists(filename):
Expand All @@ -39,8 +40,8 @@ def download_model(model_name: str):
requests.get("https://www.google.com/", timeout=5)
except:
logger.debug("User is offline. Disabling allowed download flag")
return GPT4All(model_name, allow_download=False)
return GPT4All(model_name)
return GPT4All(model_name=model_name, model_path=model_path, allow_download=False)
return GPT4All(model_name=model_name, model_path=model_path)

# Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file
tmp_filename = filename + ".tmp"
Expand Down
2 changes: 1 addition & 1 deletion src/khoj/processor/conversation/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@
Q: What does yesterday's note say?
["Note from {yesterday_date} dt='{yesterday_date}'"]
["Note from {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]
A: Yesterday's note contains the following information: ...
Expand Down
37 changes: 9 additions & 28 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
TextSearchConfig,
ImageSearchConfig,
)
from khoj.utils import state
from khoj.utils import state, fs_syncer
from khoj.routers.indexer import configure_content
from khoj.processor.jsonl.jsonl_to_jsonl import JsonlToJsonl
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
from khoj.search_filter.date_filter import DateFilter
Expand Down Expand Up @@ -220,15 +221,10 @@ def chat_client(md_content_config: ContentConfig, search_config: SearchConfig, p
state.SearchType = configure_search_types(state.config)

# Index Markdown Content for Search
filters = [DateFilter(), WordFilter(), FileFilter()]
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
state.content_index.markdown = text_search.setup(
MarkdownToJsonl,
get_sample_data("markdown"),
md_content_config.markdown,
state.search_models.text_search.bi_encoder,
regenerate=False,
filters=filters,
all_files = fs_syncer.collect_files(state.config.content_type)
state.content_index = configure_content(
state.content_index, state.config.content_type, all_files, state.search_models
)

# Initialize Processor from Config
Expand Down Expand Up @@ -273,35 +269,20 @@ def client(content_config: ContentConfig, search_config: SearchConfig, processor

@pytest.fixture(scope="function")
def client_offline_chat(
md_content_config: ContentConfig, search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig
search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig, content_config: ContentConfig
):
# Initialize app state
state.config.content_type = md_content_config
state.config.search_type = search_config
state.SearchType = configure_search_types(state.config)

# Index Markdown Content for Search
filters = [DateFilter(), WordFilter(), FileFilter()]
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
state.search_models.image_search = image_search.initialize_model(search_config.image)
state.content_index.org = text_search.setup(
OrgToJsonl,
get_sample_data("org"),
content_config.org,
state.search_models.text_search.bi_encoder,
regenerate=False,
)
state.content_index.image = image_search.setup(
content_config.image, state.search_models.image_search, regenerate=False
)

state.content_index.markdown = text_search.setup(
MarkdownToJsonl,
get_sample_data("markdown"),
md_content_config.markdown,
state.search_models.text_search.bi_encoder,
regenerate=False,
filters=filters,
all_files = fs_syncer.collect_files(content_config.content_type)
state.content_index = configure_content(
state.content_index, state.config.content_type, all_files, state.search_models
)

# Initialize Processor from Config
Expand Down

0 comments on commit 2dd15e9

Please sign in to comment.