Skip to content

Commit

Permalink
test: add bitmap index cases
Browse files Browse the repository at this point in the history
Signed-off-by: wangting0128 <[email protected]>
  • Loading branch information
wangting0128 committed Sep 2, 2024
1 parent 57422cb commit 9092524
Show file tree
Hide file tree
Showing 7 changed files with 973 additions and 35 deletions.
22 changes: 22 additions & 0 deletions tests/python_client/base/client_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import sys
from typing import Dict, List
from pymilvus import DefaultConfig

from base.database_wrapper import ApiDatabaseWrapper
Expand All @@ -15,6 +16,7 @@
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_params import IndexPrams

from pymilvus import ResourceGroupInfo

Expand Down Expand Up @@ -395,3 +397,23 @@ def init_user_with_privilege(self, privilege_object, object_name, privilege, db_

return tmp_user, tmp_pwd, tmp_role

def build_multi_index(self, index_params: Dict[str, IndexPrams], collection_obj: ApiCollectionWrapper = None):
collection_obj = collection_obj or self.collection_wrap
for k, v in index_params.items():
collection_obj.create_index(field_name=k, index_params=v.to_dict, index_name=k)
log.info(f"[TestcaseBase] Build all indexes done: {list(index_params.keys())}")
return collection_obj

def drop_multi_index(self, index_names: List[str], collection_obj: ApiCollectionWrapper = None,
check_task=None, check_items=None):
collection_obj = collection_obj or self.collection_wrap
for n in index_names:
collection_obj.drop_index(index_name=n, check_task=check_task, check_items=check_items)
log.info(f"[TestcaseBase] Drop all indexes done: {index_names}")
return collection_obj

def show_indexes(self, collection_obj: ApiCollectionWrapper = None):
collection_obj = collection_obj or self.collection_wrap
indexes = {n.field_name: n.params for n in self.collection_wrap.indexes}
log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes))
return indexes
19 changes: 19 additions & 0 deletions tests/python_client/check/func_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ def run(self):
# describe collection interface(high level api) response check
result = self.check_describe_collection_property(self.response, self.func_name, self.check_items)

elif self.check_task == CheckTasks.check_insert_result:
# check `insert` interface response
result = self.check_insert_response(check_items=self.check_items)

# Add check_items here if something new need verify

return result
Expand Down Expand Up @@ -602,3 +606,18 @@ def check_auth_failure(res, actual=True):
log.error("[CheckFunc] Response of API is not an error: %s" % str(res))
assert False
return True

def check_insert_response(self, check_items):
# check request successful
self.assert_succ(self.succ, True)

# get insert count
real = check_items.get("insert_count", None) if isinstance(check_items, dict) else None
if real is None:
real = len(self.kwargs_dict.get("data", [[]])[0])

# check insert count
error_message = "[CheckFunc] Insert count does not meet expectations, response:{0} != expected:{1}"
assert self.response.insert_count == real, error_message.format(self.response.insert_count, real)

return True
6 changes: 6 additions & 0 deletions tests/python_client/common/code_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ class PartitionErrorMessage(ExceptionsMessage):

class IndexErrorMessage(ExceptionsMessage):
WrongFieldName = "cannot create index on non-vector field: %s"
DropLoadedIndex = "index cannot be dropped, collection is loaded, please release it first"
CheckVectorIndex = "data type {0} can't build with this index {1}"
SparseFloatVectorMetricType = "only IP is the supported metric type for sparse index"
VectorMetricTypeExist = "metric type not set for vector index"
CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
CheckBitmapOnPK = "create bitmap index on primary key not supported"
133 changes: 120 additions & 13 deletions tests/python_client/common/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from faker import Faker
from pathlib import Path
from minio import Minio
from pymilvus import DataType
from pymilvus import DataType, CollectionSchema
from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrapper
from common import common_type as ct
from utils.util_log import test_log as log
Expand All @@ -24,6 +24,12 @@
"""" Methods of processing data """


try:
RNG = np.random.default_rng(seed=0)
except ValueError as e:
RNG = None


@singledispatch
def to_serializable(val):
"""Used by default."""
Expand Down Expand Up @@ -1230,30 +1236,45 @@ def gen_data_by_collection_field(field, nb=None, start=None):
if data_type == DataType.BFLOAT16_VECTOR:
dim = field.params['dim']
if nb is None:
raw_vector = [random.random() for _ in range(dim)]
bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
return bytes(bf16_vector)
bf16_vectors = []
for i in range(nb):
raw_vector = [random.random() for _ in range(dim)]
bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
bf16_vectors.append(bytes(bf16_vector))
return bf16_vectors
return RNG.uniform(size=dim).astype(bfloat16)
return [RNG.uniform(size=dim).astype(bfloat16) for _ in range(int(nb))]
# if nb is None:
# raw_vector = [random.random() for _ in range(dim)]
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
# return bytes(bf16_vector)
# bf16_vectors = []
# for i in range(nb):
# raw_vector = [random.random() for _ in range(dim)]
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
# bf16_vectors.append(bytes(bf16_vector))
# return bf16_vectors
if data_type == DataType.FLOAT16_VECTOR:
dim = field.params['dim']
if nb is None:
return [random.random() for i in range(dim)]
return [[random.random() for i in range(dim)] for _ in range(nb)]
return np.array([random.random() for _ in range(int(dim))], dtype=np.float16)
return [np.array([random.random() for _ in range(int(dim))], dtype=np.float16) for _ in range(int(nb))]
if data_type == DataType.BINARY_VECTOR:
dim = field.params['dim']
if nb is None:
raw_vector = [random.randint(0, 1) for _ in range(dim)]
binary_byte = bytes(np.packbits(raw_vector, axis=-1).tolist())
return binary_byte
return [bytes(np.packbits([random.randint(0, 1) for _ in range(dim)], axis=-1).tolist()) for _ in range(nb)]
if data_type == DataType.SPARSE_FLOAT_VECTOR:
if nb is None:
return gen_sparse_vectors(nb=1)[0]
return gen_sparse_vectors(nb=nb)
if data_type == DataType.ARRAY:
max_capacity = field.params['max_capacity']
element_type = field.element_type
if element_type == DataType.INT8:
if nb is None:
return [random.randint(-128, 127) for _ in range(max_capacity)]
return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
if element_type == DataType.INT16:
if nb is None:
return [random.randint(-32768, 32767) for _ in range(max_capacity)]
return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
if element_type == DataType.INT32:
if nb is None:
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
Expand All @@ -1279,7 +1300,6 @@ def gen_data_by_collection_field(field, nb=None, start=None):
if nb is None:
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]

return None


Expand All @@ -1296,6 +1316,25 @@ def gen_data_by_collection_schema(schema, nb, r=0):
return data


def gen_varchar_values(nb: int, length: int = 0):
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]


def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}):
"""
generate default value according to the collection fields,
which can replace the value of the specified field
"""
data = []
for field in schema.fields:
default_value = default_values.get(field.name, None)
if default_value is not None:
data.append(default_value)
elif field.auto_id is False:
data.append(gen_data_by_collection_field(field, nb, start_id * nb))
return data


def gen_json_files_for_bulk_insert(data, schema, data_dir):
for d in data:
if len(d) > 0:
Expand Down Expand Up @@ -2288,3 +2327,71 @@ def gen_vectors_based_on_vector_type(num, dim, vector_data_type):
vectors = gen_sparse_vectors(num, dim)

return vectors


def field_types() -> dict:
return dict(sorted(dict(DataType.__members__).items(), key=lambda item: item[0], reverse=True))


def get_array_element_type(data_type: str):
if hasattr(DataType, "ARRAY") and data_type.startswith(DataType.ARRAY.name):
element_type = data_type.lstrip(DataType.ARRAY.name).lstrip("_")
for _field in field_types().keys():
if str(element_type).upper().startswith(_field):
return _field, getattr(DataType, _field)
raise ValueError(f"[get_array_data_type] Can't find element type:{element_type} for array:{data_type}")
raise ValueError(f"[get_array_data_type] Data type is not start with array: {data_type}")


def set_field_schema(field: str, params: dict):
for k, v in field_types().items():
if str(field).upper().startswith(k):
_kwargs = {}

_field_element, _data_type = k, DataType.NONE
if hasattr(DataType, "ARRAY") and _field_element == DataType.ARRAY.name:
_field_element, _data_type = get_array_element_type(field)
_kwargs.update({"max_capacity": ct.default_max_capacity, "element_type": _data_type})

if _field_element in [DataType.STRING.name, DataType.VARCHAR.name]:
_kwargs.update({"max_length": ct.default_length})

elif _field_element in [DataType.BINARY_VECTOR.name, DataType.FLOAT_VECTOR.name,
DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name]:
_kwargs.update({"dim": ct.default_dim})

if isinstance(params, dict):
_kwargs.update(params)
else:
raise ValueError(
f"[set_field_schema] Field `{field}` params is not a dict, type: {type(params)}, params: {params}")
return ApiFieldSchemaWrapper().init_field_schema(name=field, dtype=v, **_kwargs)[0]
raise ValueError(f"[set_field_schema] Can't set field:`{field}` schema: {params}")


def set_collection_schema(fields: list, field_params: dict = {}, **kwargs):
"""
:param fields: List[str]
:param field_params: {<field name>: dict<field params>}
int64_1:
is_primary: bool
description: str
varchar_1:
is_primary: bool
description: str
max_length: int = 65535
array_int8_1:
max_capacity: int = 100
array_varchar_1:
max_capacity: int = 100
max_length: int = 65535
float_vector:
dim: int = 128
:param kwargs: <params for collection schema>
description: str
primary_field: str
auto_id: bool
enable_dynamic_field: bool
"""
field_schemas = [set_field_schema(field=field, params=field_params.get(field, {})) for field in fields]
return ApiCollectionSchemaWrapper().init_collection_schema(fields=field_schemas, **kwargs)[0]
Loading

0 comments on commit 9092524

Please sign in to comment.