-
Notifications
You must be signed in to change notification settings - Fork 1
/
util.py
149 lines (132 loc) · 4.16 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import re
import execjs
import javalang
ctx = execjs.compile("""
const parser = require('@solidity-parser/parser');
function getAst(code){
const input = `
contract c {` +
code
+ `
}`
try {
var ast = parser.parse(input)
return ast
} catch (e) {
if (e instanceof parser.ParserError) {
console.error(e.errors)
var ast = {"type": "SourceUnit"}
return ast
}
}
}
""") # 获取代码编译完成后的对象
def VLR(a):
tmp_list = []
key_list = []
if isinstance(a,dict):
key_list = a.keys()
for key in key_list:
if key == 'type':
tmp_list.append(a['type'])
elif isinstance(a[key],dict):
tmp_list.extend(VLR(a[key]))
elif isinstance(a[key],list):
for k in a[key]:
tmp_list.extend(VLR(k))
return tmp_list
def SBT(a):
tmp_list = []
tmp_list.append("(")
key_list = []
if isinstance(a, dict):
key_list = a.keys()
for key in key_list:
if key == 'type':
tmp_list.append(a['type'])
elif isinstance(a[key], dict):
tmp_list.extend(VLR(a[key]))
elif isinstance(a[key], list):
for k in a[key]:
tmp_list.extend(VLR(k))
tmp_list.append(")")
tmp_list.append(a['type'])
return tmp_list
#
result = ctx.call("getAst", """
function balanceOf(address addr) constant public returns (uint256) {
return data.balanceOf(addr);
}
""") # 调用js函数add,并传入它的参数
# print(result)
# result = VLR(result)
# print(" ".join(result))
def get_ast(code):
try:
ast = ctx.call("getAst", code) # 调用js函数add,并传入它的参数
result = ' '.join(VLR(ast))
except:
result = 'SourceUnit'
return result
def get_sbt(code):
try:
ast = ctx.call("getAst", code) # 调用js函数add,并传入它的参数
result = ' '.join(SBT(ast))
except:
result = 'SourceUnit'
return result
def hump2underline(hunp_str):
'''
驼峰形式字符串转成下划线形式
:param hunp_str: 驼峰形式字符串
:return: 字母全小写的下划线形式字符串
'''
# 匹配正则,匹配小写字母和大写字母的分界位置
p = re.compile(r'([a-z]|\d)([A-Z])')
# 这里第二个参数使用了正则分组的后向引用
sub = re.sub(p, r'\1 \2', hunp_str).lower()
return sub
def process_source(code):
code = code.replace('\n',' ').strip()
try:
tokens = list(javalang.tokenizer.tokenize(code))
tks = []
for tk in tokens:
if tk.__class__.__name__ == 'String' or tk.__class__.__name__ == 'Character':
tks.append('STR_')
elif 'Integer' in tk.__class__.__name__ or 'FloatingPoint' in tk.__class__.__name__:
tks.append('NUM_')
elif tk.__class__.__name__ == 'Boolean':
tks.append('BOOL_')
else:
tks.append(hump2underline(tk.value))
except Exception:
code = code.replace("\r","")
pattern = r',|\.|/|;|\'|`|\[|\]|<|>|\?|:|"|\{|\}|\~|!|@|#|\$|%|\^|&|\(|\)|-|=|\_|\+|,|。|、|;|‘|’|【|】|·|!| |…|(|)'
result_list = re.split(pattern, code)
tks = [hump2underline(t) for t in result_list]
return " ".join(tks)
from nltk import word_tokenize, pos_tag
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
# 获取单词的词性
def get_wordnet_pos(tag):
if tag.startswith('J'):
return wordnet.ADJ
elif tag.startswith('V'):
return wordnet.VERB
elif tag.startswith('N'):
return wordnet.VERB
elif tag.startswith('R'):
return wordnet.ADV
else:
return None
def getOriginSentence(sentence):
tokens = word_tokenize(sentence) # 分词
tagged_sent = pos_tag(tokens) # 获取单词词性
wnl = WordNetLemmatizer()
lemmas_sent = []
for tag in tagged_sent:
wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN
lemmas_sent.append(wnl.lemmatize(tag[0], pos=wordnet_pos).lower()) # 词形还原
return " ".join(lemmas_sent)