-
Notifications
You must be signed in to change notification settings - Fork 0
/
Memoryfailures_Parser.py
69 lines (55 loc) · 1.67 KB
/
Memoryfailures_Parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
#This parser will help users to indentify memory failure due to which mapper on which node.
#Note: please fill your memory failure types in the function parser_history_log(mfile)
import sys
import os
import re
import time
import threading
from optparse import OptionParser
from util import *
mapper_memory_failuers = {}
def parser_history_log(mfile):
context = {}
m_file = open(mfile)
for line in m_file:
if ('MAP_ATTEMPT_FAILED' in line) and ('"error":"Error: Java heap space"' in line):
context['TaskID'] = get_TaskID(line)
context['ErrorMsg'] = get_ErrorMsg(line)
context['AttemptID'] = get_AttemptID(line)
context['HostName'] = get_HostName(line)
context['ContainerId'] = get_ContainerId(m_file, context['TaskID'])
print context
return context
def get_TaskID(line):
token = line.split(",")
token = token[1].split(":")
return token[3]
def get_AttemptID(line):
token = line.split(",")
token = token[3].split(":")
return token[1]
def get_ContainerId(mfile, TaskID):
for line in mfile:
if ('MAP_ATTEMPT_STARTED' in line) and (TaskID in line):
token = line.split(",")
token = token[8].split(":")
return token[1]
return ''
def get_HostName(line):
token = line.split(",")
token = token[5].split(":")
return token[1]
def get_ErrorMsg(line):
token = line.split(",")
token = token[9].split(":")
return token[2]
def Parser(path):
files = os.listdir(path)
for f in files:
if f.endswith('jhist'):
data = parser_history_log(path + '/' + f)
mapper_memory_failuers[f] = data
# Note: when calling Parse(path) function, please make the directory of logs as the parameter.
# Parser("/mnt/var/log/hadoop/history/2015/05/05/000000")
#