diff --git a/daras_ai_v2/base.py b/daras_ai_v2/base.py index 1e2948370..42815401d 100644 --- a/daras_ai_v2/base.py +++ b/daras_ai_v2/base.py @@ -1815,7 +1815,9 @@ def render_variables(self): if not self.functions_in_settings: functions_input(self.request.user) variables_input( - template_keys=self.template_keys, allow_add=is_functions_enabled() + template_keys=self.template_keys, + allow_add=is_functions_enabled(), + exclude=self.fields_to_save(), ) @classmethod diff --git a/daras_ai_v2/query_generator.py b/daras_ai_v2/query_generator.py index f69606160..bf6381386 100644 --- a/daras_ai_v2/query_generator.py +++ b/daras_ai_v2/query_generator.py @@ -18,11 +18,12 @@ def generate_final_search_query( context: dict = None, response_format_type: typing.Literal["text", "json_object"] = None, ): - if context is None: - context = request.dict() - if response: - context |= response.dict() - instructions = render_prompt_vars(instructions, context).strip() + state = request.dict() + if response: + state |= response.dict() + if context: + state |= context + instructions = render_prompt_vars(instructions, state).strip() if not instructions: return "" return run_language_model( diff --git a/daras_ai_v2/variables_widget.py b/daras_ai_v2/variables_widget.py index ea24131e0..937124eda 100644 --- a/daras_ai_v2/variables_widget.py +++ b/daras_ai_v2/variables_widget.py @@ -23,6 +23,7 @@ def variables_input( description: str = "Variables let you pass custom parameters to your workflow. Access a variable in your instruction prompt with Jinja, e.g. `{{ my_variable }}`\n ", key: str = "variables", allow_add: bool = False, + exclude: typing.Iterable[str] = (), ): from recipes.BulkRunner import list_view_editor @@ -45,7 +46,7 @@ def variables_input( var_names = ( (template_var_names | set(variables.keys())) - set(context_globals().keys()) # dont show global context variables - - set(gui.session_state.keys()) # dont show other session state variables + - set(exclude) # used for hiding request/response fields ) pressed_add = False if var_names or allow_add: diff --git a/daras_ai_v2/vector_search.py b/daras_ai_v2/vector_search.py index f78c39260..1d367a8f9 100644 --- a/daras_ai_v2/vector_search.py +++ b/daras_ai_v2/vector_search.py @@ -8,6 +8,7 @@ import re import tempfile import typing +import unicodedata from functools import partial from time import time @@ -56,6 +57,7 @@ url_to_gdrive_file_id, gdrive_metadata, ) +from daras_ai_v2.office_utils_pptx import pptx_to_text_pages from daras_ai_v2.redis_cache import redis_lock from daras_ai_v2.scraping_proxy import ( get_scraping_proxy_cert_path, @@ -67,7 +69,6 @@ remove_quotes, generate_text_fragment_url, ) -from daras_ai_v2.office_utils_pptx import pptx_to_text_pages from daras_ai_v2.text_splitter import text_splitter, Document from embeddings.models import EmbeddedFile, EmbeddingsReference from files.models import FileMetadata @@ -190,6 +191,7 @@ def get_top_k_references( s = time() search_result = query_vespa( request.search_query, + request.keyword_query, file_ids=vespa_file_ids, limit=request.max_references or 100, embedding_model=embedding_model, @@ -232,34 +234,63 @@ def vespa_search_results_to_refs( def query_vespa( search_query: str, + keyword_query: str | list[str] | None, file_ids: list[str], limit: int, embedding_model: EmbeddingModels, semantic_weight: float = 1.0, + threshold: float = 0.7, + rerank_count: float = 1000, ) -> dict: - query_embedding = create_embeddings_cached([search_query], model=embedding_model)[0] - if query_embedding is None or not file_ids: + if not file_ids: return {"root": {"children": []}} - file_ids_str = ", ".join(map(repr, file_ids)) - query = f"select * from {settings.VESPA_SCHEMA} where file_id in (@fileIds) and (userQuery() or ({{targetHits: {limit}}}nearestNeighbor(embedding, q))) limit {limit}" - logger.debug(f"Vespa query: {'-'*80}\n{query}\n{'-'*80}") - if semantic_weight == 1.0: - ranking = "semantic" - elif semantic_weight == 0.0: + + yql = "select * from %(schema)s where file_id in (@fileIds) and " % dict( + schema=settings.VESPA_SCHEMA + ) + bm25_yql = "( {targetHits: %(hits)i} userInput(@bm25Query) )" + semantic_yql = "( {targetHits: %(hits)i, distanceThreshold: %(threshold)f} nearestNeighbor(embedding, queryEmbedding) )" + + if semantic_weight == 0.0: + yql += bm25_yql % dict(hits=limit) ranking = "bm25" + elif semantic_weight == 1.0: + yql += semantic_yql % dict(hits=limit, threshold=threshold) + ranking = "semantic" else: + yql += ( + "( " + + bm25_yql % dict(hits=rerank_count) + + " or " + + semantic_yql % dict(hits=rerank_count, threshold=threshold) + + " )" + ) ranking = "fusion" - response = get_vespa_app().query( - yql=query, - query=search_query, - ranking=ranking, - body={ - "ranking.features.query(q)": padded_embedding(query_embedding), - "ranking.features.query(semanticWeight)": semantic_weight, - "fileIds": file_ids_str, - }, + + body = {"yql": yql, "ranking": ranking, "hits": limit} + + if ranking in ("bm25", "fusion"): + if isinstance(keyword_query, list): + keyword_query = " ".join(keyword_query) + body["bm25Query"] = remove_control_characters(keyword_query or search_query) + + logger.debug( + "vespa query " + " ".join(repr(f"{k}={v}") for k, v in body.items()) + " ..." ) + + if ranking in ("semantic", "fusion"): + query_embedding = create_embeddings_cached( + [search_query], model=embedding_model + )[0] + if query_embedding is None: + return {"root": {"children": []}} + body["input.query(queryEmbedding)"] = padded_embedding(query_embedding) + + body["fileIds"] = ", ".join(map(repr, file_ids)) + + response = get_vespa_app().query(body) assert response.is_successful() + return response.get_json() @@ -485,6 +516,23 @@ def create_embeddings_in_search_db( return refs +def format_embedding_row( + doc_id: str, + file_id: str, + ref: SearchReference, + embedding: np.ndarray, + created_at: datetime.datetime, +): + return dict( + id=doc_id, + file_id=file_id, + embedding=padded_embedding(embedding), + created_at=int(created_at.timestamp() * 1000), + title=remove_control_characters(ref["title"]), + snippet=remove_control_characters(ref["snippet"]), + ) + + def get_embeds_for_doc( *, f_url: str, @@ -940,22 +988,9 @@ def render_sources_widget(refs: list[SearchReference]): ) -def format_embedding_row( - doc_id: str, - file_id: str, - ref: SearchReference, - embedding: np.ndarray, - created_at: datetime.datetime, -): - return dict( - id=doc_id, - file_id=file_id, - embedding=padded_embedding(embedding), - created_at=int(created_at.timestamp() * 1000), - # url=ref["url"].encode("unicode-escape").decode(), - # title=ref["title"].encode("unicode-escape").decode(), - # snippet=ref["snippet"].encode("unicode-escape").decode(), - ) +def remove_control_characters(s): + # from https://docs.vespa.ai/en/troubleshooting-encoding.html + return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C") EMBEDDING_SIZE = 3072 diff --git a/poetry.lock b/poetry.lock index 6d8d2ac76..434ffa625 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -464,10 +464,6 @@ files = [ {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"}, {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"}, {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"}, {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"}, @@ -480,14 +476,8 @@ files = [ {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"}, {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"}, {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"}, - {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"}, - {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"}, {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"}, @@ -498,24 +488,8 @@ files = [ {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"}, {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"}, {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"}, - {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"}, - {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"}, - {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"}, - {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"}, {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"}, @@ -525,10 +499,6 @@ files = [ {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"}, {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"}, {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"}, {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"}, @@ -540,10 +510,6 @@ files = [ {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"}, {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"}, {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"}, {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"}, @@ -556,10 +522,6 @@ files = [ {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"}, {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"}, {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"}, {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"}, @@ -572,10 +534,6 @@ files = [ {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"}, {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"}, {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"}, {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"}, @@ -1391,6 +1349,23 @@ dev = ["autoflake (>=1.4.0,<2.0.0)", "flake8 (>=3.8.3,<6.0.0)", "pre-commit (>=2 doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer[all] (>=0.6.1,<0.7.0)"] test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==22.8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flake8 (>=3.8.3,<6.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "pytest-cov (>=2.12.0,<5.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "pyyaml (>=5.3.1,<7.0.0)", "requests (>=2.24.0,<3.0.0)", "sqlalchemy (>=1.3.18,<=1.4.41)", "types-orjson (==3.6.2)", "types-ujson (==5.5.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"] +[[package]] +name = "fastcore" +version = "1.7.27" +description = "Python supercharged for fastai development" +optional = false +python-versions = ">=3.9" +files = [ + {file = "fastcore-1.7.27-py3-none-any.whl", hash = "sha256:ac62143e1fd40fd501a6740b99ccdcfef58dd467d2cd33dca681dc4f500b1e63"}, + {file = "fastcore-1.7.27.tar.gz", hash = "sha256:531c1df430094667d385cbb5ec985b5db289647f6a004ff50134bb87b67d169e"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +dev = ["llms-txt", "matplotlib", "nbclassic", "nbdev (>=0.2.39)", "numpy", "pandas", "pillow", "pysymbol-llm", "torch"] + [[package]] name = "filelock" version = "3.13.1" @@ -2231,6 +2206,21 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + [[package]] name = "hashids" version = "1.3.1" @@ -2262,6 +2252,17 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} [package.extras] export = ["jinja2 (>=2.7,<3)"] +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + [[package]] name = "html-sanitizer" version = "2.4.4" @@ -2374,6 +2375,7 @@ files = [ [package.dependencies] certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} httpcore = ">=0.15.0,<0.17.0" rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]} sniffio = "*" @@ -2417,6 +2419,17 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jed torch = ["torch"] typing = ["pydantic (<2.0)", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + [[package]] name = "identify" version = "2.5.31" @@ -3220,16 +3233,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -4795,24 +4798,37 @@ files = [ [[package]] name = "pyvespa" -version = "0.39.0" +version = "0.51.0" description = "Python API for vespa.ai" optional = false python-versions = ">=3.8" files = [ - {file = "pyvespa-0.39.0-py3-none-any.whl", hash = "sha256:b880fd738dfee88028e7ed0f6dbaa58b8f61a350819ef1e4d6a652dadd2064e0"}, - {file = "pyvespa-0.39.0.tar.gz", hash = "sha256:a45df9264cbc943561fa052b55c11e7c350a8ec0941ca96fa45e125cee16fc78"}, + {file = "pyvespa-0.51.0-py3-none-any.whl", hash = "sha256:71249999b2de0ed3e6cc9370cbabe162dab6a4e78c0936d7217cffb724269611"}, + {file = "pyvespa-0.51.0.tar.gz", hash = "sha256:217b0b722125a5035af54247f3a4fcdc3ead0c3dcf4e61ce70bf7f831f100ac0"}, ] [package.dependencies] aiohttp = "*" cryptography = "*" docker = "*" +fastcore = ">=1.7.8" +httpx = {version = "*", extras = ["http2"]} jinja2 = "*" -pandas = "*" +lxml = "*" +python-dateutil = "*" requests = "*" -tenacity = "*" -typing-extensions = "*" +requests_toolbelt = "*" +tenacity = ">=8.4.1" +typing_extensions = "*" + +[package.extras] +build = ["build (==1.0.3)", "ruff", "setuptools (==69.0.3)", "toml (==0.10.2)", "twine (==5.1.1)"] +dev = ["datasets (>=2.19.1)", "pre-commit", "pyvespa[build]", "pyvespa[unittest]", "vespacli"] +docs = ["ipykernel", "nbsphinx", "sphinx", "sphinx-rtd-theme (>=0.5.0)"] +feed = ["PyYAML", "beautifulsoup4", "html5lib", "markdownify", "mmh3", "requests (<=2.31.0)", "spacy", "tiktoken", "vespacli"] +notebooks = ["datasets (>=2.19.1)", "ipykernel", "ipywidgets", "ir_datasets", "jupytext", "nbconvert (<=7.12.0)", "notebook", "numpy", "openai", "pandas", "papermill", "plotly", "pytrec_eval", "torch"] +unittest = ["pytest", "pytest-asyncio", "requests-mock", "vespacli"] +vespacli = ["vespacli"] [[package]] name = "pywin32" @@ -4849,7 +4865,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -4857,16 +4872,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -4883,7 +4890,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -4891,7 +4897,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -5179,6 +5184,20 @@ requests = ">=2.0.0" [package.extras] rsa = ["oauthlib[signedtoken] (>=3.0.0)"] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + [[package]] name = "rfc3986" version = "1.5.0" @@ -5916,17 +5935,18 @@ widechars = ["wcwidth"] [[package]] name = "tenacity" -version = "8.2.3" +version = "8.5.0" description = "Retry code until it succeeds" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, - {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, + {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"}, + {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"}, ] [package.extras] -doc = ["reno", "sphinx", "tornado (>=4.5)"] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] [[package]] name = "text-unidecode" @@ -6913,4 +6933,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "e708a419d0a3eaec75fa9d2eca4510d51dbf9a85b6d72ccf6c8c7f8254d6b4b6" +content-hash = "c105679a2087db6370058fff1a2d1a300ea55d11b0966f6b2e69d6fae7eb4073" diff --git a/pyproject.toml b/pyproject.toml index eb7ca3822..5b4791259 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ loguru = "^0.7.2" aifail = "^0.3.0" pytest-playwright = "^0.4.3" emoji = "^2.10.1" -pyvespa = "^0.39.0" +pyvespa = "^0.51.0" anthropic = "^0.34.1" azure-cognitiveservices-speech = "^1.37.0" twilio = "^9.2.3" diff --git a/recipes/DocSearch.py b/recipes/DocSearch.py index 8a3926e78..4de891db9 100644 --- a/recipes/DocSearch.py +++ b/recipes/DocSearch.py @@ -168,14 +168,16 @@ def run_v2( else: response.final_search_query = request.search_query - response.references = yield from get_top_k_references( - DocSearchRequest.parse_obj( - { - **request.dict(), - "search_query": response.final_search_query, - }, - ), - current_user=self.request.user, + response.references = yield from ( + get_top_k_references( + DocSearchRequest.parse_obj( + { + **request.dict(), + "search_query": response.final_search_query, + }, + ), + current_user=self.request.user, + ) ) # empty search result, abort! diff --git a/recipes/Functions.py b/recipes/Functions.py index 8472ddade..34b77aeb7 100644 --- a/recipes/Functions.py +++ b/recipes/Functions.py @@ -102,6 +102,7 @@ def render_variables(self): allow_add=True, description="Pass custom parameters to your function and access the parent workflow data. " "Variables will be passed down as the first argument to your anonymous JS function.", + exclude=self.fields_to_save(), ) def render_output(self): diff --git a/recipes/VideoBots.py b/recipes/VideoBots.py index 863d7e58a..79660fa3d 100644 --- a/recipes/VideoBots.py +++ b/recipes/VideoBots.py @@ -983,8 +983,9 @@ def search_step(self, request, response, user_input, model): yield "Creating search query..." response.final_search_query = generate_final_search_query( request=request, + response=response, instructions=query_instructions, - context={**gui.session_state, "messages": chat_history}, + context={"messages": chat_history}, ) else: query_msgs.reverse() @@ -1001,8 +1002,9 @@ def search_step(self, request, response, user_input, model): keyword_query = json.loads( generate_final_search_query( request=k_request, + response=response, instructions=keyword_instructions, - context={**gui.session_state, "messages": chat_history}, + context={"messages": chat_history}, response_format_type="json_object", ), ) @@ -1014,7 +1016,8 @@ def search_step(self, request, response, user_input, model): response.references = yield from get_top_k_references( DocSearchRequest.parse_obj( { - **gui.session_state, + **request.dict(), + **response.dict(), "search_query": response.final_search_query, "keyword_query": response.final_keyword_query, }, diff --git a/scripts/setup_vespa_db.py b/scripts/setup_vespa_db.py index f67da9bf7..f15bf687e 100644 --- a/scripts/setup_vespa_db.py +++ b/scripts/setup_vespa_db.py @@ -6,12 +6,11 @@ Schema, Document, Field, - FieldSet, HNSW, RankProfile, + FieldSet, Function, GlobalPhaseRanking, - QueryTypeField, ) from daras_ai_v2 import settings @@ -35,21 +34,11 @@ rank="filter", ), Field( - name="url", + name="file_id", type="string", indexing=["attribute", "summary"], - ), - Field( - name="title", - type="string", - indexing=["index", "summary"], - index="enable-bm25", - ), - Field( - name="snippet", - type="string", - indexing=["index", "summary"], - index="enable-bm25", + attribute=["fast-search"], + rank="filter", ), Field( name="embedding", @@ -57,75 +46,54 @@ indexing=["index", "attribute"], ann=HNSW(distance_metric="dotproduct"), ), - Field( - name="file_id", - type="string", - indexing=["attribute", "summary"], - attribute=["fast-search"], - rank="filter", - ), Field( name="created_at", type="long", indexing=["attribute"], attribute=["fast-access"], ), + Field( + name="title", + type="string", + indexing=["index", "summary"], + index="enable-bm25", + ), + Field( + name="snippet", + type="string", + indexing=["index", "summary"], + index="enable-bm25", + ), ] ), - fieldsets=[FieldSet(name="default", fields=["title", "snippet"])], + fieldsets=[ + FieldSet(name="default", fields=["title", "snippet"]), + ], rank_profiles=[ RankProfile( name="bm25", - inputs=[ - ("query(q)", EMBEDDING_TYPE), - ], - functions=[ - Function( - name="bm25sum", expression="bm25(title) + bm25(snippet)" - ) - ], - first_phase="bm25sum", + first_phase="bm25(title) + bm25(snippet)", ), RankProfile( name="semantic", - inputs=[ - ("query(q)", EMBEDDING_TYPE), - ], + inputs=[("query(queryEmbedding)", EMBEDDING_TYPE)], first_phase="closeness(field, embedding)", ), RankProfile( name="fusion", - inherits="bm25", inputs=[ - ("query(q)", EMBEDDING_TYPE), + ("query(queryEmbedding)", EMBEDDING_TYPE), ("query(semanticWeight)", "double"), ], - first_phase="closeness(field, embedding)", - global_phase=GlobalPhaseRanking( - expression=""" - if (closeness(field, embedding)>0.6, - reciprocal_rank(bm25sum) * (1 - query(semanticWeight)) + - reciprocal_rank(closeness(field, embedding)) * query(semanticWeight), - 0) - """, - rerank_count=1000, - ), - ), - RankProfile( - name="fusion2", # with bm25 first - inherits="bm25", - inputs=[ - ("query(q)", EMBEDDING_TYPE), - ("query(semanticWeight)", "double"), + functions=[ + Function( + name="bm25sum", + expression="bm25(title) + bm25(snippet)", + ), ], - first_phase="closeness(field, embedding)", + first_phase="bm25sum", global_phase=GlobalPhaseRanking( - expression=""" - if (bm25sum>0.6, - reciprocal_rank(bm25sum) * (1 - query(semanticWeight)) + - reciprocal_rank(closeness(field, embedding)) * query(semanticWeight), - 0) - """, + expression="reciprocal_rank(bm25sum) * (1 - query(semanticWeight)) + reciprocal_rank(closeness(field, embedding)) * query(semanticWeight)", rerank_count=1000, ), ), @@ -133,12 +101,6 @@ ) ], ) -package.query_profile_type.add_fields( - QueryTypeField( - name="ranking.features.query(q)", - type=EMBEDDING_TYPE, - ), -) def run():