From 8c120a513986e43e4429c998831a8ca21a4fdfe0 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 26 Nov 2024 16:16:00 -0800 Subject: [PATCH] Fallback to json5 loader if json.loads cannot parse complex json str JSON5 spec is more flexible, try to load using a fast json5 parser if the stricter json.loads from the standard library can't load the raw complex json string into a python dictionary/list --- pyproject.toml | 1 + src/khoj/processor/conversation/utils.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f02b55599..59adf9527 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,7 @@ dependencies = [ "anthropic == 0.26.1", "docx2txt == 0.8", "google-generativeai == 0.8.3", + "pyjson5 == 1.6.7", ] dynamic = ["version"] diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index efd3c51df..079f3fea3 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -15,6 +15,7 @@ from typing import Any, Callable, Dict, List, Optional import PIL.Image +import pyjson5 import requests import tiktoken import yaml @@ -565,17 +566,18 @@ def replace_unescaped_quotes(match): processed = re.sub(pattern, replace_unescaped_quotes, cleaned) # See which json loader can load the processed JSON as valid - errors = "" - json_loaders_to_try = [json.loads] + errors = [] + json_loaders_to_try = [json.loads, pyjson5.loads] for loads in json_loaders_to_try: try: return loads(processed) - except json.JSONDecodeError as e: - errors += f"\n\n{e}" + except (json.JSONDecodeError, pyjson5.Json5Exception) as e: + errors.append(f"{type(e).__name__}: {str(e)}") # If all loaders fail, raise the aggregated error raise ValueError( - f"Failed to load JSON with error: {errors}\n\nWhile attempting to load this cleaned JSON:\n{processed}" + f"Failed to load JSON with errors: {'; '.join(errors)}\n\n" + f"While attempting to load this cleaned JSON:\n{processed}" )