forked from aws-samples/aws-dynamodb-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscan_parallel.py
73 lines (53 loc) · 1.8 KB
/
scan_parallel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from __future__ import print_function # Python 2/3 compatibility
import time, sys, boto3, threading
from botocore.exceptions import ClientError
# Create Client
session = boto3.session.Session(region_name="eu-west-1")
dynamoDbClient = session.client('dynamodb')
def scan_table(segment, total_segments):
# Print thread starting info
print('Starting Segment ' + str(segment))
try:
# Initial scan
response = dynamoDbClient.scan(
TableName=table_name,
Segment=segment,
TotalSegments=total_segments
)
# Paginate for each thread, returning 1MB of data each iteration
while 'LastEvaluatedKey' in response:
response = dynamoDbClient.scan(
TableName=table_name,
Segment=segment,
TotalSegments=total_segments,
ExclusiveStartKey=response['LastEvaluatedKey']
)
except ClientError as error:
print("Something went wrong: ")
print(error.response['ResponseMetadata'])
def create_threads():
thread_list = []
# Instantiate and store the thread
for i in range(threads):
thread = threading.Thread(
target=scan_table, args=(i, threads))
thread_list.append(thread)
# Start threads
for thread in thread_list:
thread.start()
# Block main thread until all threads are finished
for thread in thread_list:
thread.join()
# Main Function / Timer
if __name__ == "__main__":
table_name = "AmazonBins"
# Number of threads
threads = 10
# Calculate time taken
start = time.time()
# Create Threads
create_threads()
# Finish time after all threads are complete
end = time.time()
# Print time took
print(end - start)