Skip to content

Commit

Permalink
Invalidate metadata after 1 read.
Browse files Browse the repository at this point in the history
  • Loading branch information
xthexder committed Feb 8, 2024
1 parent daa61ad commit ab8ee65
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ $ pip install turbopuffer[fast]
2. Start using the API
```py
import turbopuffer as tpuf
tpuf.api_key = 'your-token' # Alternatively: export=TURBOPUFFER_API_KEY=your-token
tpuf.api_key = 'your-token' # Alternatively: export=TURBOPUFFER_API_KEY=your-token

# Open a namespace
ns = tpuf.Namespace('hello_world')
Expand Down
2 changes: 1 addition & 1 deletion turbopuffer/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def make_api_request(self,
while retry_attempts < 6:
if retry_attempts > 0:
print("retrying...")
time.sleep(2 ** retry_attempts) # exponential falloff up to 32 seconds.
time.sleep(2 ** retry_attempts) # exponential falloff up to 32 seconds.
request_start = time.monotonic()
try:
# print(f'Sending request:', prepared.path_url, prepared.headers)
Expand Down
27 changes: 16 additions & 11 deletions turbopuffer/namespace.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import iso8601
from turbopuffer.error import APIError
from turbopuffer.vectors import Cursor, VectorResult, VectorColumns, VectorRow, batch_iter
from turbopuffer.backend import Backend
from turbopuffer.query import VectorQuery, FilterTuple
Expand Down Expand Up @@ -41,7 +42,8 @@ def __eq__(self, other):

def refresh_metadata(self):
response = self.backend.make_api_request('vectors', self.name, method='HEAD')
if response.get('status_code') == 200:
status_code = response.get('status_code')
if status_code == 200:
headers = response.get('headers', dict())
dimensions = int(headers.get('x-turbopuffer-dimensions', '0'))
approx_count = int(headers.get('x-turbopuffer-approx-num-vectors', '0'))
Expand All @@ -50,12 +52,14 @@ def refresh_metadata(self):
'dimensions': dimensions,
'approx_count': approx_count,
}
else:
elif status_code == 404:
self.metadata = {
'exists': False,
'dimensions': 0,
'approx_count': 0,
}
else:
raise APIError(response.status_code, 'Unexpected status code', response.get('content'))

def exists(self) -> bool:
"""
Expand All @@ -69,17 +73,17 @@ def dimensions(self) -> int:
"""
Returns the number of vector dimensions stored in this namespace.
"""
if self.metadata is None:
if self.metadata is None or 'dimensions' not in self.metadata:
self.refresh_metadata()
return self.metadata['dimensions']
return self.metadata.pop('dimensions', 0)

def approx_count(self) -> int:
"""
Returns the approximate number of vectors stored in this namespace.
"""
if self.metadata is None:
if self.metadata is None or 'approx_count' not in self.metadata:
self.refresh_metadata()
return self.metadata['approx_count']
return self.metadata.pop('approx_count', 0)

@overload
def upsert(self, ids: Union[List[int], List[str]], vectors: List[List[float]], attributes: Optional[Dict[str, List[Optional[str]]]] = None) -> None:
Expand Down Expand Up @@ -137,6 +141,9 @@ def upsert(self, data=None, ids=None, vectors=None, attributes=None) -> None:
if vec is None:
raise ValueError('upsert() call would result in a vector deletion, use Namespace.delete([ids...]) instead.')
response = self.backend.make_api_request('vectors', self.name, payload=data.__dict__)

assert response.get('content', dict()).get('status', '') == 'OK', f'Invalid upsert() response: {response}'
self.metadata = None # Invalidate cached metadata
elif isinstance(data, VectorRow):
raise ValueError('upsert() should be called on a list of vectors, got single vector.')
elif isinstance(data, list):
Expand Down Expand Up @@ -182,6 +189,7 @@ def upsert(self, data=None, ids=None, vectors=None, attributes=None) -> None:
# time_diff = time.monotonic() - before
# print(f"Batch {columns.ids[0]}..{columns.ids[-1]} time:", time_diff, '/', len(batch), '=', len(batch)/time_diff)
# start = time.monotonic()
return
elif isinstance(data, Iterable):
# start = time.monotonic()
for batch in batch_iter(data, tpuf.upsert_batch_size):
Expand All @@ -196,9 +204,6 @@ def upsert(self, data=None, ids=None, vectors=None, attributes=None) -> None:
else:
raise ValueError(f'Unsupported data type: {type(data)}')

assert response.get('content', dict()).get('status', '') == 'OK', f'Invalid upsert() response: {response}'
self.metadata = None # Invalidate cached metadata

def delete(self, ids: Union[int, str, List[int], List[str]]) -> None:
"""
Deletes vectors by id.
Expand All @@ -218,7 +223,7 @@ def delete(self, ids: Union[int, str, List[int], List[str]]) -> None:
raise ValueError(f'Unsupported ids type: {type(ids)}')

assert response.get('content', dict()).get('status', '') == 'OK', f'Invalid delete() response: {response}'
self.metadata = None # Invalidate cached metadata
self.metadata = None # Invalidate cached metadata

@overload
def query(self,
Expand Down Expand Up @@ -302,7 +307,7 @@ def delete_all(self) -> None:

response = self.backend.make_api_request('vectors', self.name, method='DELETE')
assert response.get('content', dict()).get('status', '') == 'ok', f'Invalid delete_all() response: {response}'
self.metadata = None # Invalidate cached metadata
self.metadata = None # Invalidate cached metadata

def recall(self, num=20, top_k=10) -> float:
"""
Expand Down

0 comments on commit ab8ee65

Please sign in to comment.