-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathlifter.py
287 lines (240 loc) · 9.45 KB
/
lifter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
from opcodes import *
from bytecodes import *
from instructions import *
from graphbuilder import GraphBuilder
from interpreter import BasicInterpreter
from interpreter import DuplicateInterpreter
from structures import InternalFunction
from ceptions import StackSizeError
from instructionblock import InstructionBlock
from collections import defaultdict
import sys
class Lifter(GraphBuilder):
def __init__(self, binary):
GraphBuilder.__init__(self, binary)
# for func in self.external_functions.values():
# func.visualize_function()
self.__label_function_boundaries()
self.__create_internal_functions()
self.__reduce_functions()
for func in self.get_all_functions():
self.__lift_function(func)
# self.__split_hub_blocks(func)
def __label_function_boundaries(self):
# TODO: extend to include undetected callers ?
# maps callee_pair (callee_entry, callee_exit) to caller_pairs
self.__callee_pairs = dict()
for callee_exit, caller_begins in self.indirect_jumps.items():
for caller_begin, caller_end in caller_begins.items():
# caller_begin should not be jumpi
if self.graph[caller_begin].is_jumpi_block():
# print(caller_begin)
continue
suc_ids = self.graph.get_successor_ids(caller_begin)
if len(suc_ids) == 1:
callee_pair = (suc_ids.pop(), callee_exit)
if callee_pair not in self.__callee_pairs:
self.__callee_pairs[callee_pair] = set()
self.__callee_pairs[callee_pair].add((caller_begin, caller_end))
else:
print("[WARNING] caller successor not unique %s" % caller_begin)
def __create_internal_functions(self):
self.internal_functions = dict()
for callee_pair, caller_pairs in self.__callee_pairs.items():
# print(caller_pairs)
func, caller_pairs = self.__create_internal_function(callee_pair, caller_pairs)
if len(caller_pairs) <= 1:
del (self.__callee_pairs[callee_pair])
else:
self.__callee_pairs[callee_pair] = caller_pairs
# print(caller_pairs)
self.internal_functions[callee_pair] = func
opcode = func.get_intcall_opcode()
func.insert_intreturn()
actions[opcode] = func.action
def __create_internal_function(self, callee_pair, caller_pairs):
possible_funcs = dict()
callee_begin, callee_end = callee_pair
for caller_pair in caller_pairs:
caller_begin, caller_end = caller_pair
caller_begin_image = self.tracker.get_observed_image(callee_begin, caller_begin)
interpreter = BasicInterpreter(self.graph.get_blocks(), self.resolver)
interpreter.add_to_poison(caller_end)
sub_graph, sub_tracker = \
interpreter.explore_control_flow_graph(callee_begin, caller_begin_image)
sub_graph.remove_block(caller_end) # this might not be safe
end_path = interpreter.get_end_path()
operations = interpreter.compute_stack_actions(end_path)
# print(delta, alpha)
signature = len(self.internal_functions)
in_func = \
InternalFunction(signature, sub_graph, sub_tracker, callee_pair, operations)
block_ids = frozenset(sub_graph.get_block_ids())
if block_ids not in possible_funcs:
possible_funcs[block_ids] = [set(), in_func]
possible_funcs[block_ids][0].add(caller_pair)
caller_pairs, func = max(possible_funcs.values(), key=lambda x: len(x[0]))
return func, caller_pairs
def __reduce_functions(self):
for func in self.get_all_functions():
self.__extract_internal_calls(func)
entry_id = func.entry_id
entry_image = func.tracker.get_observed_image(entry_id)
interpreter = DuplicateInterpreter(
func.graph.get_blocks(), self.resolver.get_natural_successors())
func.resolver = interpreter.resolver
interpreter.add_to_poison(func.exit_id)
new_graph, new_tracker = \
interpreter.explore_control_flow_graph(entry_id, entry_image)
func.graph, func.tracker = new_graph, new_tracker
func.ins_outs = interpreter.ins_outs
for callee_pair in self.internal_functions.keys():
func = self.internal_functions.pop(callee_pair)
self.internal_functions[func.signature] = func
def __extract_internal_calls(self, func):
callee_pairs = self.__get_present_pairs(func)
# print(callee_pairs)
for callee_pair, caller_pairs in callee_pairs.items():
in_func = self.internal_functions[callee_pair]
opcode = in_func.get_intcall_opcode()
for caller_pair in caller_pairs:
func.extract_intcall(callee_pair, caller_pair, opcode)
def __get_present_pairs(self, func):
graph = func.graph
callee_pairs = dict()
for callee_pair, caller_pairs in self.__callee_pairs.items():
if not graph.has_blocks(callee_pair):
continue
present_caller_pairs = set()
for caller_pair in caller_pairs:
if not graph.has_blocks(caller_pair):
continue
present_caller_pairs.add(caller_pair)
if len(present_caller_pairs) != 0:
callee_pairs[callee_pair] = present_caller_pairs
caller_begins = defaultdict(int)
# remove the case in which one caller has multiple callees
for callee_pair, caller_pairs in callee_pairs.items():
for caller_pair in caller_pairs:
caller_begin, _ = caller_pair
caller_begins[caller_begin] += 1
if caller_begins[caller_begin] > 1:
del (callee_pairs[callee_pair])
break
return callee_pairs
def __lift_function(self, func):
for block in func.graph:
str_id = block.get_id()
stack_size = set()
for image in func.tracker.get_observed_images(str_id):
stack_size.add(image.top)
if len(stack_size) != 1:
raise StackSizeError("lifter stack size error")
stack_size = stack_size.pop()
block = self.__lift_bytecode_block(block, stack_size)
func.graph.replace_block(block)
def __lift_bytecode_block(self, block, stack_size):
entry_addr = block.get_entry_address()
new_block = InstructionBlock(block.get_id(), entry_addr)
for bytecode in block:
instructions, stack_size = \
self.__lift_bytecode(bytecode, stack_size)
for instruction in instructions:
new_block.append(instruction)
new_block.exit_stack_size = stack_size
# print(new_block.exit_stack_size)
return new_block
@staticmethod
def __lift_bytecode(bytecode, stack_size):
opcode = bytecode.opcode
address = bytecode.get_address()
delta, alpha = actions[opcode][:2]
reads = to_stack_registers([stack_size - i - 1 for i in range(delta)])
writes = to_stack_registers([stack_size - delta + i for i in range(alpha)])
instructions = list()
if opcode in swap_ops:
read1 = [STACK_REGISTER + str(stack_size - delta)]
read2 = [STACK_REGISTER + str(stack_size - 1)]
instructions = [MoveInstruction("MOVE", read1, [SWAP_REGISTER], address),
MoveInstruction("MOVE", read2, read1, address),
MoveInstruction("MOVE", [SWAP_REGISTER], read2, address)]
elif opcode in dup_ops:
reads = [STACK_REGISTER + str(stack_size - delta)]
writes = [STACK_REGISTER + str(stack_size)]
instructions = [MoveInstruction("MOVE", reads, writes, address)]
elif opcode in push_ops:
constant = bytecode.dependencies[0]
instructions = [MoveInstruction("MOVE", [constant], writes, address)]
elif opcode in bin_ops:
instructions = [BinOpInstruction(opcode, reads, writes, address)]
elif opcode in mono_ops:
instructions = [MonoOpInstruction(opcode, reads, writes, address)]
elif opcode == "MSTORE":
instructions = [MstoreInstruction(opcode, reads, writes, address)]
elif opcode == "MLOAD":
instructions = [MloadInstruction(opcode, reads, writes, address)]
elif opcode == "CALLDATALOAD":
instructions = [CallLoadInstruction(opcode, reads, writes, address)]
elif opcode.startswith("INTCALL"):
instructions = [IntCallInstruction(opcode, reads, writes, address)]
elif opcode == "SSTORE":
instructions = [SstoreInstruction(opcode, reads, writes, address)]
elif opcode == "SLOAD":
instructions = [SloadInstruction(opcode, reads, writes, address)]
elif opcode not in {"POP", "JUMPDEST"}:
instructions = [Instruction(opcode, reads, writes, address)]
stack_size = stack_size - delta + alpha # update stack size
return instructions, stack_size
def get_all_functions(self):
return list(self.external_functions.values()) + \
list(self.internal_functions.values())
def debug_callee_pairs(self):
for callee_pair, caller_pairs in self.__callee_pairs.items():
print(callee_pair)
print(list(caller_pairs))
print("")
def debug_functions(self):
for func in self.get_all_functions():
func.debug_function()
# func.visualize_function()
@staticmethod
def __split_hub_blocks(func):
graph = func.graph
resolver = func.resolver
for block_id in graph.get_block_ids():
block = graph[block_id]
if not block.check_exit_instruction("JUMP"):
continue
successor_ids = graph.get_successor_ids(block_id)
if len(successor_ids) <= 1:
continue
if block_id not in func.ins_outs:
continue
for pre_id, suc_id in func.ins_outs[block_id].items():
if len(suc_id) == 1:
suc_id = suc_id.pop()
new_id = resolver.allocate_id()
new_block = block.make_copy(new_id)
graph.add_block(new_block)
# new_block.debug_block()
# block.debug_block()
graph.remove_edge(pre_id, block_id)
graph.remove_edge(block_id, suc_id)
graph.add_edge(pre_id, new_id)
graph.add_edge(new_id, suc_id)
merged = graph.simplify({})
exit_id = func.exit_id
while exit_id in merged:
exit_id = merged[exit_id]
func.exit_id = exit_id
if __name__ == "__main__":
input_file = open(sys.argv[1])
line = input_file.readline().strip()
if " " in line:
line = line.split(" ")[1]
input_file.close()
a = Lifter(line)
if "-v" in sys.argv:
a.visualize_contract()
if "-d" in sys.argv:
a.debug_functions()