-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.py
119 lines (97 loc) · 3.24 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#Importing the python libraries
import re
import numpy as np
import matplotlib.pyplot as plt
#Pattern for detecting job start and end
patJobStart = '^INFO:root:([0-9.]*):.*:(\d*)$'
#Pattern for detecting task start
patTaskStart = '^INFO:root:([0-9.]*):.*:(\d*):.*:(.*)$'
#Pattern for detecting task end
patTaskEnd = '^INFO:root:([0-9.]*):.*:(\S*) (\d)$'
#Reading the log file
logs = open('logs.log','r')
log_data = logs.readline()
#Initializing data structures
jobs = dict()
tasks = dict()
worker1 = []
count1 = 0
worker2 = []
count2 = 0
worker3 = []
count3 = 0
#Processing the log file line by line
while log_data:
job_match = re.search(patJobStart,log_data)
if job_match:
t = float(job_match.group(1))
jID = int(job_match.group(2))
if jID not in jobs:
jobs[jID] = t
else:
jobs[jID] = t - jobs[jID]
task_match_start = re.search(patTaskStart,log_data)
if task_match_start:
#print(task_match_start.group(1),task_match_start.group(2),task_match_start.group(3))
t = float(task_match_start.group(1))
port = int(task_match_start.group(2))
tID = task_match_start.group(3)
if tID not in tasks:
tasks[tID] = t
#To detect count of worker 1
if port%3999 == 1:
count1 += 1
worker1.append((count1,t))
#To detect count of worker 2
if port%3999 == 2:
count2 += 1
worker2.append((count2,t))
#To detect count of worker 3
if port%3999 == 3:
count3 += 1
worker3.append((count3,t))
task_match_end = re.search(patTaskEnd,log_data)
if task_match_end:
#print(task_match_end.group(1),task_match_end.group(2),task_match_end.group(3))
t = float(task_match_end.group(1))
tID = task_match_end.group(2)
wID = int(task_match_end.group(3))
if tID in tasks:
tasks[tID] = t - tasks[tID]
#To decrement count of worker 1 after task completion
if wID == 1:
count1 -= 1
worker1.append((count1,t))
#To decrement count of worker 2 after task completion
if wID == 2:
count2 -= 1
worker2.append((count2,t))
#To decrement count of worker 3 after task completion
if wID == 3:
count3 -= 1
worker3.append((count3,t))
log_data = logs.readline()
#Printing and generating the required statistical value and graphs
jobs = np.array(list(jobs.values()))
print("Median of job completion time = ", np.median(jobs))
print("Mean of job completion time = ", jobs.mean())
tasks = np.array(list(tasks.values()))
print("Median of task completion time = ", np.median(tasks))
print("Mean of task completion time = ", tasks.mean())
#print(worker1)
#print(worker2)
#print(worker3)
#Function to plot the graph for number of task scheduled on worker vs time
def plotFig(worker,s):
x,y=[],[]
for i in worker:
x.append(i[0])
y.append(i[1])
plt.plot(y,x,'--bo')
plt.xlabel("Time")
plt.ylabel("Number of running tasks")
plt.title(s)
plt.show()
plotFig(worker1,"Worker 1")
plotFig(worker2,"Worker 2")
plotFig(worker3,"Worker 3")