turbopuffer · xthexder · Jan 25, 2024 · Jan 16, 2024 · Jan 17, 2024 · Jan 25, 2024
diff --git a/.github/workflows/ci_test.yml b/.github/workflows/ci_test.yml
@@ -40,6 +40,31 @@ jobs:
         poetry run pytest
 
   compatibility:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.12"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest poetry
+        poetry install --with test --with compatibility
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        poetry run flake8 . --count --per-file-ignores=vectors.py:F821 --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. Skip package import warnings.
+        poetry run flake8 . --count --exit-zero --per-file-ignores=vectors.py:F821 --exclude=__init__.py --max-line-length=140 --statistics
+    - name: Test with pytest
+      env:
+        TURBOPUFFER_API_KEY: ${{ secrets.TURBOPUFFER_TEST_API_KEY }}
+      run: |
+        poetry run pytest
+
+  compatibility-fast:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3

diff --git a/tests/test_vectors.py b/tests/test_vectors.py
@@ -7,7 +7,7 @@ def test_upsert_rows():
     ns = tpuf.Namespace(tests.test_prefix + 'client_test')
     assert str(ns) == f'tpuf-namespace:{tests.test_prefix}client_test'
 
-    # Test upsert mutliple dict rows
+    # Test upsert multiple dict rows
     ns.upsert([
         {'id': 2, 'vector': [2, 2]},
         {'id': 7, 'vector': [0.7, 0.7], 'attributes': {'hello': 'world', 'test': 'rows'}},
@@ -201,6 +201,24 @@ def check_result(row, expected):
     for i in range(len(vector_set)):  # Use VectorResult in index mode
         check_result(vector_set[i], expected[i])
 
+    # Test query with all attributes
+    vector_set = ns.query(
+        top_k=5,
+        vector=[0.8, 0.7],
+        distance_metric='euclidean_squared',
+        include_vectors=True,
+        include_attributes=True
+    )
+    expected = [
+        tpuf.VectorRow(id=7, vector=[0.7, 0.7], dist=0.01, attributes={'hello': 'world'}),
+        tpuf.VectorRow(id=10, vector=[1.0, 1.0], dist=0.13, attributes={'test': 'cols'}),
+        tpuf.VectorRow(id=11, vector=[1.1, 1.1], dist=0.25, attributes={'test': 'cols'}),
+        tpuf.VectorRow(id=3, vector=[0.3, 0.3], dist=0.41, attributes={'test': 'cols', 'key1': 'three', 'key2': 'c'}),
+        tpuf.VectorRow(id=6, vector=[0.3, 0.3], dist=0.41, attributes={'test': 'cols', 'key1': 'three', 'key2': 'c'}),
+    ]
+    for i in range(len(vector_set)):  # Use VectorResult in index mode
+        check_result(vector_set[i], expected[i])
+
     # Test query with typed query
     vector_set = ns.query(tpuf.VectorQuery(
         top_k=5,

diff --git a/turbopuffer/__init__.py b/turbopuffer/__init__.py
@@ -1,4 +1,5 @@
 import os
+import sys
 api_key = os.environ.get('TURBOPUFFER_API_KEY')
 api_base_url = os.environ.get('TURBOPUFFER_API_BASE_URL', 'https://api.turbopuffer.com/v1')
 upsert_batch_size = 5_000
@@ -8,7 +9,19 @@
     def dump_json_bytes(obj): return orjson.dumps(obj, option=orjson.OPT_SERIALIZE_NUMPY)
 except ImportError:
     import json
-    def dump_json_bytes(obj): return json.dumps(obj).encode()
+
+    class NumpyEncoder(json.JSONEncoder):
+        def default(self, obj):
+            if 'numpy' in sys.modules:
+                if isinstance(obj, sys.modules['numpy'].integer):
+                    return int(obj)
+                elif isinstance(obj, sys.modules['numpy'].floating):
+                    return float(obj)
+                elif isinstance(obj, sys.modules['numpy'].ndarray):
+                    return obj.tolist()
+            return json.JSONEncoder.default(self, obj)
+
+    def dump_json_bytes(obj): return json.dumps(obj, cls=NumpyEncoder).encode()
 
 from turbopuffer.version import VERSION
 from turbopuffer.namespace import Namespace

diff --git a/turbopuffer/backend.py b/turbopuffer/backend.py
@@ -52,6 +52,8 @@ def make_api_request(self,
                 # before = time.monotonic()
                 json_payload = tpuf.dump_json_bytes(payload)
                 # print('Json time:', time.monotonic() - before)
+            elif isinstance(payload, bytes):
+                json_payload = payload
             else:
                 raise ValueError(f'Unsupported POST payload type: {type(payload)}')
 

diff --git a/turbopuffer/error.py b/turbopuffer/error.py
@@ -10,4 +10,4 @@ class APIError(TurbopufferError):
     def __init__(self, status_code: int, status_name: str, message: str):
         self.status_code = status_code
         self.status_name = status_name
-        super().__init__(f'{status_name}: {message}')
+        super().__init__(f'{status_name} (HTTP {status_code}): {message}')
diff --git a/turbopuffer/query.py b/turbopuffer/query.py
@@ -21,7 +21,7 @@ class VectorQuery:
     distance_metric: Optional[str] = None
     top_k: int = 10
     include_vectors: bool = False
-    include_attributes: Optional[List[str]] = None
+    include_attributes: Optional[Union[List[str], bool]] = None
     filters: Optional[Dict[str, List[FilterTuple]]] = None
 
     def from_dict(source: dict) -> 'VectorQuery':
@@ -41,8 +41,9 @@ def __post_init__(self):
                     raise ValueError(f'VectorQuery.vector must a 1d-array, got {self.vector.ndim} dimensions')
             elif not isinstance(self.vector, list):
                 raise ValueError('VectorQuery.vector must be a list, got:', type(self.vector))
-        if self.include_attributes is not None and not isinstance(self.include_attributes, list) and not isinstance(self.include_attributes, bool):
-            raise ValueError('VectorQuery.include_attributes must be a list or bool, got:', type(self.include_attributes))
+        if self.include_attributes is not None:
+            if not isinstance(self.include_attributes, list) and not isinstance(self.include_attributes, bool):
+                raise ValueError('VectorQuery.include_attributes must be a list or bool, got:', type(self.include_attributes))
         if self.filters is not None:
             if not isinstance(self.filters, dict):
                 raise ValueError('VectorQuery.filters must be a dict, got:', type(self.filters))