Skip to content

Commit

Permalink
enhance: Reorganize the examples (#2340)
Browse files Browse the repository at this point in the history
Signed-off-by: yangxuan <[email protected]>
  • Loading branch information
XuanYang-cn authored Nov 11, 2024
1 parent e4505ef commit 3110139
Show file tree
Hide file tree
Showing 69 changed files with 349 additions and 880 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The following collection shows Milvus versions and recommended PyMilvus versions
| 2.1.\* | 2.1.3 |
| 2.2.\* | 2.2.15 |
| 2.3.\* | 2.3.7 |
| 2.4.\* | 2.4.4 |
| 2.4.\* | 2.4.9 |


## Installation
Expand All @@ -43,7 +43,7 @@ $ pip3 install pymilvus[bulk_writer] # for bulk_writer
You can install a specific version of PyMilvus by:

```shell
$ pip3 install pymilvus==2.4.4
$ pip3 install pymilvus==2.4.9
```

You can upgrade PyMilvus to the latest version by:
Expand Down
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Examples
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
62 changes: 22 additions & 40 deletions examples/hybrid_search.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,34 @@
import numpy as np
from pymilvus import (
connections,
utility,
FieldSchema, CollectionSchema, DataType,
Collection,
MilvusClient,
DataType,
AnnSearchRequest, RRFRanker, WeightedRanker,
)

fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

print(fmt.format("start connecting to Milvus"))
connections.connect("default", host="localhost", port="19530")
collection_name = "hello_milvus"
milvus_client = MilvusClient("http://localhost:19530")

has = utility.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")
if has:
utility.drop_collection("hello_milvus")
has_collection = milvus_client.has_collection(collection_name, timeout=5)
if has_collection:
milvus_client.drop_collection(collection_name)

fields = [
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = milvus_client.create_schema(auto_id=False, description="hello_milvus is the simplest demo to introduce the APIs")
schema.add_field("pk", DataType.VARCHAR, is_primary=True, max_length=100)
schema.add_field("random", DataType.DOUBLE)
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field("embeddings2", DataType.FLOAT_VECTOR, dim=dim)

schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")
index_params = milvus_client.prepare_index_params()
index_params.add_index(field_name = "embeddings", index_type = "IVF_FLAT", metric_type="L2", nlist=128)
index_params.add_index(field_name = "embeddings2",index_type = "IVF_FLAT", metric_type="L2", nlist=128)

print(fmt.format("Create collection `hello_milvus`"))
hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)

milvus_client.create_collection(collection_name, schema=schema, index_params=index_params, consistency_level="Strong")

print(fmt.format("Start inserting entities"))
rng = np.random.default_rng(seed=19530)
Expand All @@ -41,29 +40,19 @@
rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list
]

insert_result = hello_milvus.insert(entities)
rows = [ {"pk": entities[0][i], "random": entities[1][i], "embeddings": entities[2][i], "embeddings2": entities[3][i]} for i in range (num_entities)]

hello_milvus.flush()
print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities
insert_result = milvus_client.insert(collection_name, rows)

print(fmt.format("Start Creating index IVF_FLAT"))
index = {
"index_type": "IVF_FLAT",
"metric_type": "L2",
"params": {"nlist": 128},
}

hello_milvus.create_index("embeddings", index)
hello_milvus.create_index("embeddings2", index)

print(fmt.format("Start loading"))
hello_milvus.load()
milvus_client.load_collection(collection_name)

field_names = ["embeddings", "embeddings2"]
field_names = ["embeddings"]

req_list = []
nq = 1
weights = [0.2, 0.3]
default_limit = 5
vectors_to_search = []

Expand All @@ -79,15 +68,8 @@
req = AnnSearchRequest(**search_param)
req_list.append(req)

hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])

print("rank by WightedRanker")
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")

print("rank by RRFRanker")
hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
hybrid_res = milvus_client.hybrid_search(collection_name, req_list, RRFRanker(), default_limit, output_fields=["random"])
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")
File renamed without changes.
File renamed without changes.
93 changes: 93 additions & 0 deletions examples/hybrid_search/hybrid_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import numpy as np
from pymilvus import (
connections,
utility,
FieldSchema, CollectionSchema, DataType,
Collection,
AnnSearchRequest, RRFRanker, WeightedRanker,
)

fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

print(fmt.format("start connecting to Milvus"))
connections.connect("default", host="localhost", port="19530")

has = utility.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")
if has:
utility.drop_collection("hello_milvus")

fields = [
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
]

schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")

print(fmt.format("Create collection `hello_milvus`"))
hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)

print(fmt.format("Start inserting entities"))
rng = np.random.default_rng(seed=19530)
entities = [
# provide the pk field because `auto_id` is set to False
[str(i) for i in range(num_entities)],
rng.random(num_entities).tolist(), # field random, only supports list
rng.random((num_entities, dim)), # field embeddings, supports numpy.ndarray and list
rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list
]

insert_result = hello_milvus.insert(entities)

hello_milvus.flush()
print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities

print(fmt.format("Start Creating index IVF_FLAT"))
index = {
"index_type": "IVF_FLAT",
"metric_type": "L2",
"params": {"nlist": 128},
}

hello_milvus.create_index("embeddings", index)
hello_milvus.create_index("embeddings2", index)

print(fmt.format("Start loading"))
hello_milvus.load()

field_names = ["embeddings", "embeddings2"]

req_list = []
nq = 1
weights = [0.2, 0.3]
default_limit = 5
vectors_to_search = []

for i in range(len(field_names)):
# 4. generate search data
vectors_to_search = rng.random((nq, dim))
search_param = {
"data": vectors_to_search,
"anns_field": field_names[i],
"param": {"metric_type": "L2"},
"limit": default_limit,
"expr": "random > 0.5"}
req = AnnSearchRequest(**search_param)
req_list.append(req)

hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])

print("rank by WightedRanker")
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")

print("rank by RRFRanker")
hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")
File renamed without changes.
File renamed without changes.
75 changes: 0 additions & 75 deletions examples/milvus_client/hybrid_search.py

This file was deleted.

85 changes: 0 additions & 85 deletions examples/milvus_client/partition.py

This file was deleted.

File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 3110139

Please sign in to comment.