-
Notifications
You must be signed in to change notification settings - Fork 1
/
LoadMessySampleData.py
executable file
·51 lines (45 loc) · 1.41 KB
/
LoadMessySampleData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from redisearch import Client, IndexDefinition, TextField
from redis import ResponseError
import time
schema = (
TextField("account", weight=4.0),
TextField("owner", weight=2.0),
TextField("address", weight=3.0),
TextField("class", weight=1.0)
)
fld_account = 2
fld_owner = 6
fld_address = 12
fld_class = 13
create_index = False
r_client = Client("data")
start = time.time()
try:
print(r_client.info())
except ResponseError:
print("Index needs to be created")
r_client.create_index(schema, definition=IndexDefinition(prefix=['doc:']))
with open('MessySampleData.txt', encoding='ISO-8859-1') as data:
c = 0
for line in data:
c = c + 1
if c == 1:
continue # pass the first line because it's the header
try:
data = line.strip().split("|")
r_client.redis.hset("doc:" + data[fld_account], mapping={
"account": str(data[fld_account]).rjust(8, '0'),
"owner": data[fld_owner].strip(),
"address": data[fld_address].strip(),
"class": data[fld_class].strip()
})
if c % 1000 == 0:
print(f'{c:,}')
except Exception as e:
print("Error line " + c)
print(e)
print("----------------------------")
print(data)
end = time.time()
elapsed = end - start
print("Completed in %s sec" % elapsed)