forked from facebookresearch/code-prediction-transformer
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcode_parser.py
129 lines (118 loc) · 5.21 KB
/
code_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/python
import sys
import json as json
import ast
def read_file_to_string(filename):
f = open(filename, 'rt')
s = f.read()
f.close()
return s
def parse_code(source_code):
global c, d
tree = ast.parse(source_code)
json_tree = []
def gen_identifier(identifier, node_type = 'identifier'):
pos = len(json_tree)
json_node = {}
json_tree.append(json_node)
json_node['type'] = node_type
json_node['value'] = identifier
return pos
def traverse_list(l, node_type = 'list'):
pos = len(json_tree)
json_node = {}
json_tree.append(json_node)
json_node['type'] = node_type
children = []
for item in l:
children.append(traverse(item))
if (len(children) != 0):
json_node['children'] = children
return pos
def traverse(node):
pos = len(json_tree)
json_node = {}
json_tree.append(json_node)
json_node['type'] = type(node).__name__
children = []
if isinstance(node, ast.Name):
json_node['value'] = node.id
elif isinstance(node, ast.Num):
json_node['value'] = str(node.n)
elif isinstance(node, ast.Str):
json_node['value'] = str(node.s)
elif isinstance(node, ast.alias):
json_node['value'] = str(node.name)
if node.asname:
children.append(gen_identifier(node.asname))
elif isinstance(node, ast.FunctionDef):
json_node['value'] = str(node.name)
elif isinstance(node, ast.ClassDef):
json_node['value'] = str(node.name)
elif isinstance(node, ast.ImportFrom):
if node.module:
json_node['value'] = str(node.module)
elif isinstance(node, ast.Global):
for n in node.names:
children.append(gen_identifier(n))
elif isinstance(node, ast.keyword):
json_node['value'] = str(node.arg)
# Process children.
if isinstance(node, ast.For):
children.append(traverse(node.target))
children.append(traverse(node.iter))
children.append(traverse_list(node.body, 'body'))
if node.orelse:
children.append(traverse_list(node.orelse, 'orelse'))
elif isinstance(node, ast.If) or isinstance(node, ast.While):
children.append(traverse(node.test))
children.append(traverse_list(node.body, 'body'))
if node.orelse:
children.append(traverse_list(node.orelse, 'orelse'))
elif isinstance(node, ast.With):
children.append(traverse(node.context_expr))
if node.optional_vars:
children.append(traverse(node.optional_vars))
children.append(traverse_list(node.body, 'body'))
elif isinstance(node, ast.Try):
children.append(traverse_list(node.body, 'body'))
children.append(traverse_list(node.handlers, 'handlers'))
children.append(traverse_list(node.finalbody, 'finalbody'))
if node.orelse:
children.append(traverse_list(node.orelse, 'orelse'))
elif isinstance(node, ast.arguments):
children.append(traverse_list(node.args, 'args'))
children.append(traverse_list(node.defaults, 'defaults'))
if node.vararg:
children.append(gen_identifier(node.vararg, 'vararg'))
if node.kwarg:
children.append(gen_identifier(node.kwarg, 'kwarg'))
elif isinstance(node, ast.ExceptHandler):
if node.type:
children.append(traverse_list([node.type], 'type'))
if node.name:
children.append(traverse_list([node.name], 'name'))
children.append(traverse_list(node.body, 'body'))
elif isinstance(node, ast.ClassDef):
children.append(traverse_list(node.bases, 'bases'))
children.append(traverse_list(node.body, 'body'))
children.append(traverse_list(node.decorator_list, 'decorator_list'))
elif isinstance(node, ast.FunctionDef):
children.append(traverse(node.args))
children.append(traverse_list(node.body, 'body'))
children.append(traverse_list(node.decorator_list, 'decorator_list'))
else:
# Default handling: iterate over children.
for child in ast.iter_child_nodes(node):
if isinstance(child, ast.expr_context) or isinstance(child, ast.operator) or isinstance(child, ast.boolop) or isinstance(child, ast.unaryop) or isinstance(child, ast.cmpop):
# Directly include expr_context, and operators into the type instead of creating a child.
json_node['type'] = json_node['type'] + type(child).__name__
else:
children.append(traverse(child))
if isinstance(node, ast.Attribute):
children.append(gen_identifier(node.attr, 'attr'))
if (len(children) != 0):
json_node['children'] = children
return pos
traverse(tree)
return json.dumps(json_tree, separators=(',', ':'), ensure_ascii=False)