test: add bitmap index cases

Signed-off-by: wangting0128 <[email protected]>
wangting0128 · Sep 2, 2024 · 9092524 · 9092524
1 parent 57422cb
commit 9092524
Show file tree

Hide file tree

Showing 7 changed files with 973 additions and 35 deletions.
diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py
@@ -1,5 +1,6 @@
 import pytest
 import sys
+from typing import Dict, List
 from pymilvus import DefaultConfig
 
 from base.database_wrapper import ApiDatabaseWrapper
@@ -15,6 +16,7 @@
 from utils.util_log import test_log as log
 from common import common_func as cf
 from common import common_type as ct
+from common.common_params import IndexPrams
 
 from pymilvus import ResourceGroupInfo
 
@@ -395,3 +397,23 @@ def init_user_with_privilege(self, privilege_object, object_name, privilege, db_
 
         return tmp_user, tmp_pwd, tmp_role
 
+    def build_multi_index(self, index_params: Dict[str, IndexPrams], collection_obj: ApiCollectionWrapper = None):
+        collection_obj = collection_obj or self.collection_wrap
+        for k, v in index_params.items():
+            collection_obj.create_index(field_name=k, index_params=v.to_dict, index_name=k)
+        log.info(f"[TestcaseBase] Build all indexes done: {list(index_params.keys())}")
+        return collection_obj
+
+    def drop_multi_index(self, index_names: List[str], collection_obj: ApiCollectionWrapper = None,
+                         check_task=None, check_items=None):
+        collection_obj = collection_obj or self.collection_wrap
+        for n in index_names:
+            collection_obj.drop_index(index_name=n, check_task=check_task, check_items=check_items)
+        log.info(f"[TestcaseBase] Drop all indexes done: {index_names}")
+        return collection_obj
+
+    def show_indexes(self, collection_obj: ApiCollectionWrapper = None):
+        collection_obj = collection_obj or self.collection_wrap
+        indexes = {n.field_name: n.params for n in self.collection_wrap.indexes}
+        log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes))
+        return indexes
diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py
@@ -104,6 +104,10 @@ def run(self):
             # describe collection interface(high level api) response check
             result = self.check_describe_collection_property(self.response, self.func_name, self.check_items)
 
+        elif self.check_task == CheckTasks.check_insert_result:
+            # check `insert` interface response
+            result = self.check_insert_response(check_items=self.check_items)
+
         # Add check_items here if something new need verify
 
         return result
@@ -602,3 +606,18 @@ def check_auth_failure(res, actual=True):
             log.error("[CheckFunc] Response of API is not an error: %s" % str(res))
             assert False
         return True
+
+    def check_insert_response(self, check_items):
+        # check request successful
+        self.assert_succ(self.succ, True)
+
+        # get insert count
+        real = check_items.get("insert_count", None) if isinstance(check_items, dict) else None
+        if real is None:
+            real = len(self.kwargs_dict.get("data", [[]])[0])
+
+        # check insert count
+        error_message = "[CheckFunc] Insert count does not meet expectations, response:{0} != expected:{1}"
+        assert self.response.insert_count == real, error_message.format(self.response.insert_count, real)
+
+        return True
diff --git a/tests/python_client/common/code_mapping.py b/tests/python_client/common/code_mapping.py
@@ -32,3 +32,9 @@ class PartitionErrorMessage(ExceptionsMessage):
 
 class IndexErrorMessage(ExceptionsMessage):
     WrongFieldName = "cannot create index on non-vector field: %s"
+    DropLoadedIndex = "index cannot be dropped, collection is loaded, please release it first"
+    CheckVectorIndex = "data type {0} can't build with this index {1}"
+    SparseFloatVectorMetricType = "only IP is the supported metric type for sparse index"
+    VectorMetricTypeExist = "metric type not set for vector index"
+    CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
+    CheckBitmapOnPK = "create bitmap index on primary key not supported"
diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py
@@ -14,7 +14,7 @@
 from faker import Faker
 from pathlib import Path
 from minio import Minio
-from pymilvus import DataType
+from pymilvus import DataType, CollectionSchema
 from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrapper
 from common import common_type as ct
 from utils.util_log import test_log as log
@@ -24,6 +24,12 @@
 """" Methods of processing data """
 
 
+try:
+    RNG = np.random.default_rng(seed=0)
+except ValueError as e:
+    RNG = None
+
+
 @singledispatch
 def to_serializable(val):
     """Used by default."""
@@ -1230,30 +1236,45 @@ def gen_data_by_collection_field(field, nb=None, start=None):
     if data_type == DataType.BFLOAT16_VECTOR:
         dim = field.params['dim']
         if nb is None:
-            raw_vector = [random.random() for _ in range(dim)]
-            bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
-            return bytes(bf16_vector)
-        bf16_vectors = []
-        for i in range(nb):
-            raw_vector = [random.random() for _ in range(dim)]
-            bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
-            bf16_vectors.append(bytes(bf16_vector))
-        return bf16_vectors
+            return RNG.uniform(size=dim).astype(bfloat16)
+        return [RNG.uniform(size=dim).astype(bfloat16) for _ in range(int(nb))]
+        # if nb is None:
+        #     raw_vector = [random.random() for _ in range(dim)]
+        #     bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
+        #     return bytes(bf16_vector)
+        # bf16_vectors = []
+        # for i in range(nb):
+        #     raw_vector = [random.random() for _ in range(dim)]
+        #     bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
+        #     bf16_vectors.append(bytes(bf16_vector))
+        # return bf16_vectors
     if data_type == DataType.FLOAT16_VECTOR:
         dim = field.params['dim']
         if nb is None:
-            return [random.random() for i in range(dim)]
-        return [[random.random() for i in range(dim)] for _ in range(nb)]
+            return np.array([random.random() for _ in range(int(dim))], dtype=np.float16)
+        return [np.array([random.random() for _ in range(int(dim))], dtype=np.float16) for _ in range(int(nb))]
     if data_type == DataType.BINARY_VECTOR:
         dim = field.params['dim']
         if nb is None:
             raw_vector = [random.randint(0, 1) for _ in range(dim)]
             binary_byte = bytes(np.packbits(raw_vector, axis=-1).tolist())
             return binary_byte
         return [bytes(np.packbits([random.randint(0, 1) for _ in range(dim)], axis=-1).tolist()) for _ in range(nb)]
+    if data_type == DataType.SPARSE_FLOAT_VECTOR:
+        if nb is None:
+            return gen_sparse_vectors(nb=1)[0]
+        return gen_sparse_vectors(nb=nb)
     if data_type == DataType.ARRAY:
         max_capacity = field.params['max_capacity']
         element_type = field.element_type
+        if element_type == DataType.INT8:
+            if nb is None:
+                return [random.randint(-128, 127) for _ in range(max_capacity)]
+            return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
+        if element_type == DataType.INT16:
+            if nb is None:
+                return [random.randint(-32768, 32767) for _ in range(max_capacity)]
+            return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
         if element_type == DataType.INT32:
             if nb is None:
                 return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
@@ -1279,7 +1300,6 @@ def gen_data_by_collection_field(field, nb=None, start=None):
             if nb is None:
                 return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
             return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
-
     return None
 
 
@@ -1296,6 +1316,25 @@ def gen_data_by_collection_schema(schema, nb, r=0):
     return data
 
 
+def gen_varchar_values(nb: int, length: int = 0):
+    return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
+
+
+def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}):
+    """
+    generate default value according to the collection fields,
+    which can replace the value of the specified field
+    """
+    data = []
+    for field in schema.fields:
+        default_value = default_values.get(field.name, None)
+        if default_value is not None:
+            data.append(default_value)
+        elif field.auto_id is False:
+            data.append(gen_data_by_collection_field(field, nb, start_id * nb))
+    return data
+
+
 def gen_json_files_for_bulk_insert(data, schema, data_dir):
     for d in data:
         if len(d) > 0:
@@ -2288,3 +2327,71 @@ def gen_vectors_based_on_vector_type(num, dim, vector_data_type):
         vectors = gen_sparse_vectors(num, dim)
 
     return vectors
+
+
+def field_types() -> dict:
+    return dict(sorted(dict(DataType.__members__).items(), key=lambda item: item[0], reverse=True))
+
+
+def get_array_element_type(data_type: str):
+    if hasattr(DataType, "ARRAY") and data_type.startswith(DataType.ARRAY.name):
+        element_type = data_type.lstrip(DataType.ARRAY.name).lstrip("_")
+        for _field in field_types().keys():
+            if str(element_type).upper().startswith(_field):
+                return _field, getattr(DataType, _field)
+        raise ValueError(f"[get_array_data_type] Can't find element type:{element_type} for array:{data_type}")
+    raise ValueError(f"[get_array_data_type] Data type is not start with array: {data_type}")
+
+
+def set_field_schema(field: str, params: dict):
+    for k, v in field_types().items():
+        if str(field).upper().startswith(k):
+            _kwargs = {}
+
+            _field_element, _data_type = k, DataType.NONE
+            if hasattr(DataType, "ARRAY") and _field_element == DataType.ARRAY.name:
+                _field_element, _data_type = get_array_element_type(field)
+                _kwargs.update({"max_capacity": ct.default_max_capacity, "element_type": _data_type})
+
+            if _field_element in [DataType.STRING.name, DataType.VARCHAR.name]:
+                _kwargs.update({"max_length": ct.default_length})
+
+            elif _field_element in [DataType.BINARY_VECTOR.name, DataType.FLOAT_VECTOR.name,
+                                    DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name]:
+                _kwargs.update({"dim": ct.default_dim})
+
+            if isinstance(params, dict):
+                _kwargs.update(params)
+            else:
+                raise ValueError(
+                    f"[set_field_schema] Field `{field}` params is not a dict, type: {type(params)}, params: {params}")
+            return ApiFieldSchemaWrapper().init_field_schema(name=field, dtype=v, **_kwargs)[0]
+    raise ValueError(f"[set_field_schema] Can't set field:`{field}` schema: {params}")
+
+
+def set_collection_schema(fields: list, field_params: dict = {}, **kwargs):
+    """
+    :param fields: List[str]
+    :param field_params: {<field name>: dict<field params>}
+            int64_1:
+                is_primary: bool
+                description: str
+            varchar_1:
+                is_primary: bool
+                description: str
+                max_length: int = 65535
+            array_int8_1:
+                max_capacity: int = 100
+            array_varchar_1:
+                max_capacity: int = 100
+                max_length: int = 65535
+            float_vector:
+                dim: int = 128
+    :param kwargs: <params for collection schema>
+            description: str
+            primary_field: str
+            auto_id: bool
+            enable_dynamic_field: bool
+    """
+    field_schemas = [set_field_schema(field=field, params=field_params.get(field, {})) for field in fields]
+    return ApiCollectionSchemaWrapper().init_collection_schema(fields=field_schemas, **kwargs)[0]