diff --git a/docs/CALL_CFG.md b/docs/CALL_CFG.md new file mode 100644 index 0000000..aa26383 --- /dev/null +++ b/docs/CALL_CFG.md @@ -0,0 +1,138 @@ +# Call CFG + +Call CFG is a program's representation, based on the *Control Flow Graph* _(CFG)_ and *function call*. Two features are available : *Generation* and *Comparaison*. This documentation will explain these features. + + +## Mode 1 : Generation + +### Algorithm specification + +1. Determine the beginning address of analysis. Otherwise, entrypoint will be used. +2. Get all `call` instructions of function analyzed (determined by address in previous step) from disassembled code (with `metasm`) +3. For each `call` instruction : + * Get Machoc hash of '*caller function*' + * Get Machoc hash of '*called function*' + * Create t-uple `Machoc(caller function) -> Machoc(called function)` + * Back to *step 3* recursively for each '*called function*' +4. Stored all t-uples in database +5. Generate .dot and .png files + +### Example + +Here is an example to illustrate the algorithm. From binary to call CFG representation.
+*(The source code is displayed for better understanding)* + +* Source code + +``` +int main(){ + for (int i=0; i < 2000; i++){ + if (i > 100) + printf("This is a program test : %d\n",i); + else + function2(); +} + +void function2(){ + return; +} +``` + + +* Call CFG representation + +![Example Call CFG](screenshots/callCFG.png) + +With this example, extracted t-uples are : + +* Machoc('sym._main') -> Machoc('sym.__main') +* Machoc('sym._main') -> Machoc('sym._printf') +* Machoc('sym._main') -> Machoc('sym._function2') + +That corresponds to : + +``` +(ccdaf4fc, e1aa82b3) +(ccdaf4fc, 1a02300e) +(ccdaf4fc, 7c91cb3a) +``` + + +## Mode 2 : Comparaison + +This second feature compare two call CFG and allow us to identify quickly the precise differences. + +### Algorithm specification + +1. Get t-uples from database for compared files n°1 and n°2 : `tuples_ccfg_1` and `tuples_ccfg_2`
*(Feature 'Generation' must be used first)* +2. Jaccard index (Intersection over Union) is calculated, using t-uples +3. Set differences are calculated : `tuples_ccfg_1 \ tuples_ccfg_2` and `tuples_ccfg_2 \ tuples_ccfg_1` +4. Create new call CFG with differences in specific color (red) + +### Example + + * T-uples of analyzed file n°1 : `tuples_ccfg_1` + +We can use example in *Mode 1 : Generation* : + +``` +(ccdaf4fc, e1aa82b3) +(ccdaf4fc, 1a02300e) +(ccdaf4fc, 7c91cb3a) +``` + + * T-uples of analyzed file n°2 : `tuples_ccfg_2` + +We can generate t-uples from this example binary :
+*(The source code is displayed for better understanding)* + +``` +int main(){ + for (int i=0; i < 10; i++){ + if (i % 2 = 0) + printf("Even number : %d\n",i); + else + printf("Odd number : %d\n",i); +} +``` + +Call CFG representation : + +![Example Call CFG File 2](screenshots/callCFG_2.png) + + +``` +(ccdaf4fc, e1aa82b3) +(ccdaf4fc, 1a02300e) +(ccdaf4fc, 1a02300e) +``` + + * Jaccard index + +With this representation, we can easily calculate Jaccard index : + +![Example Jaccard Index](screenshots/jaccard_index.png) + +`S(cCFG1, cCFG2) = 2 / 4 = 50.0 % ` + +* Set differences + +`cCFG2 \ cCFG2 = (ccdaf4fc, 1a02300e) ` + +`cCFG1 \ cCFG2 = (ccdaf4fc, 7c91cb3a) ` + + +* New call CFG of analyzed file n°1, with differents in analyzed file n°2 + +![Example new Call CFG](screenshots/new_callCFG.png) + + +## Usage : + +You can find comparaison results in tab 'Classification' in Polichombr. Except for this example, only binaries with Jaccard index >= 80 % are displayed. + +![Call CFG Polichombr Example](screenshots/example_callCFG_view.png) + +## Screenshot : + +![Call CFG Polichombr](screenshots/callCFG_view.png) \ No newline at end of file diff --git a/docs/screenshots/callCFG.png b/docs/screenshots/callCFG.png new file mode 100644 index 0000000..652fdbb Binary files /dev/null and b/docs/screenshots/callCFG.png differ diff --git a/docs/screenshots/callCFG_2.png b/docs/screenshots/callCFG_2.png new file mode 100644 index 0000000..df4cd59 Binary files /dev/null and b/docs/screenshots/callCFG_2.png differ diff --git a/docs/screenshots/callCFG_view.png b/docs/screenshots/callCFG_view.png new file mode 100644 index 0000000..fef5724 Binary files /dev/null and b/docs/screenshots/callCFG_view.png differ diff --git a/docs/screenshots/example_callCFG_view.png b/docs/screenshots/example_callCFG_view.png new file mode 100644 index 0000000..c4bc0ac Binary files /dev/null and b/docs/screenshots/example_callCFG_view.png differ diff --git a/docs/screenshots/jaccard_index.png b/docs/screenshots/jaccard_index.png new file mode 100644 index 0000000..a1ae43d Binary files /dev/null and b/docs/screenshots/jaccard_index.png differ diff --git a/docs/screenshots/new_callCFG.png b/docs/screenshots/new_callCFG.png new file mode 100644 index 0000000..e9d1a46 Binary files /dev/null and b/docs/screenshots/new_callCFG.png differ diff --git a/polichombr/analysis_tools/__init__.py b/polichombr/analysis_tools/__init__.py new file mode 100644 index 0000000..4b83cb0 --- /dev/null +++ b/polichombr/analysis_tools/__init__.py @@ -0,0 +1,5 @@ +""" + This file is part of Polichombr. + + (c) 2016 ANSSI-FR +""" \ No newline at end of file diff --git a/polichombr/analysis_tools/lib_callCFG/C_CFG.py b/polichombr/analysis_tools/lib_callCFG/C_CFG.py new file mode 100644 index 0000000..d5b4836 --- /dev/null +++ b/polichombr/analysis_tools/lib_callCFG/C_CFG.py @@ -0,0 +1,540 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" + This file is part of Polichombr. + + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description: Generation of call-CFG and compare call-CFG + Date : 08/2018 +""" + +import argparse,sys,os,r2pipe,json +import mmh3 +import datetime, hashlib +import pydot + +from ccfg_analyzeitrb import parse_machoc_signatures + + +class call_CFG: + def __init__(self, filename, offset, app=None, tmessage=""): + """ + Constructor + """ + + #Logger + self.tmessage = tmessage + self.app = app + + self.function_analyzed = [] + self.tuple_ccfg_db = [] + self.tuple_offset = [] + self.array_all_machoc_func = [] + + self.hashfile = filename + self.fname = filename+'.sign' + + # Open binary + self._print("Analyze radare2 : in progress ... ") + self.r2_binary = r2pipe.open(filename, ['-2']) + if not self.is_valid_file(): + self.kill("Not a valid binary file") + return + self.run_cmd_radare2('aaa') + self._print("Analyze radare2 : OK ") + + + self.offset = str(offset) if offset != "" else str(self.get_entry_point()) + #Get machoc functions + self.machoc_functions = parse_machoc_signatures(self.fname) + + + def process_cCFG(self, compare_tuples={}): + """ + Process C-CFG generation or comparaison + """ + if len(compare_tuples): + self._print("Mode comparaison C-CFG") + #Generate new CFG with diff + self.process_compare_call_CFG(compare_tuples) + + else: + self._print("Mode génération C-CFG") + self.process_call_CFG() + + tuples = "('"+"'),('".join(self.tuple_ccfg_db)+"')" + + return self.offset, tuples, self.tuple_offset + + + def _print(self, message): + if self.app != None: + self.app.logger.debug(self.tmessage + message) + else: + print message + + def is_valid_file(self, instance_r2=None): + """ + Check if file contains instructions + """ + + if instance_r2: + result = instance_r2.cmd("iIj") + else: + result = self.r2_binary.cmd("iIj") + + res = json.loads(result.replace("\\", "")) + return bool(res['havecode']) + + def kill(self, message,instance_r2=None): + """ + Kill radare2 process + """ + + self._print("[+] '"+self.filename+"' : "+message) + if instance_r2: + instance_r2.r2_binary.quit() + else: + self.r2_binary.quit() + + def get_entry_point(self): + """ + Return entrypoint + """ + + try: + ie = json.loads(self.run_cmd_radare2("iej")) + ie = ie[0]['vaddr'] + except ValueError: + return None + return ie + + def get_function_code(self, offset, instance_r2=None): + """ + Return instructions from function at specific offset + """ + + # Go to specific offset + self.run_cmd_radare2("s {}".format(offset), instance_r2) + result = self.run_cmd_radare2("pdfj", instance_r2) + + return json.loads(result) if result else {} + + def get_function_block(self, offset, instance_r2=None): + """ + Return blocks from current function + """ + + self.run_cmd_radare2("s {}".format(offset), instance_r2) + return json.loads(self.run_cmd_radare2("agj", instance_r2)) + + def format_function_name(self, function_name): + """ + Format function name + Example : 'call qword sym.imp.KERNEL32.dll_GetSystemTimeAsFileTime' become '_GetSystemTimeAsFile (KERNEL32.dll)' + """ + + return function_name.replace('call','').replace('qword','').replace('[','').replace(']','').strip() + + def get_call_in_function(self,offset, instance_r2=None): + """ + Return dict with call of function from specific + """ + + if instance_r2 == None: + self._print("Function analyzed : "+str(offset)) + else: + self._print("FILE2] Function analyzed : "+str(offset)) + + + all_call = {} + function_call = [] + function_ucall = [] + + + fcode = self.get_function_code(offset, instance_r2) + if fcode != {}: + for instruction in fcode["ops"]: + if instruction["type"] == "call": + assembly_code = instruction['disasm'] + if '.dll' in assembly_code: + array = assembly_code.split('.dll') + dll_name = array[0].split('.')[-1] + function_name = array[1]+' ('+dll_name+'.dll)' + function_ucall.append(function_name) + else: + function_call.append(assembly_code.replace('call','').strip()) + elif instruction["type"] == "ucall": + assembly_code = instruction['disasm'] + if '.dll' in assembly_code: + array = assembly_code.split('.dll') + dll_name = array[0].split('.')[-1] + function_name = array[1]+' ('+dll_name+'.dll)' + function_ucall.append(function_name) + else: + function_ucall.append(self.format_function_name(assembly_code)) + all_call["call"] = function_call + all_call["ucall"] = function_ucall + return all_call + + + + def get_machoke_from_function(self, function_offset, db_machoc_func, instance_r2=None): + """ + Return machoc function from (SHA256).bin.sign file + """ + + offset = self.get_offset(function_offset, instance_r2) + if offset == "": + return "" + + if offset != None: + key_to_search = int(offset[2:],16) + try: + element_dict = db_machoc_func[key_to_search] + machoc = hex(element_dict["machoc"])[2:] + self._print("Machoc of '{0}' : {1}".format(function_offset, machoc)) + return machoc + except: + self._print("Unable to find machoc hash of offset function : "+str(offset)) + return "" + + + def format_block_dot(self, function_offset, function_machoc, adresse_offset, diff=False): + """ + Return block dot info from specific + """ + + color = "palegreen" + if diff and function_machoc != "": + if function_offset not in self.array_all_machoc_func and function_machoc not in self.array_all_machoc_func: + + color = "#ff4d4d" + content = "\""+function_offset+"\"[fillcolor=\""+color+"\",color=\"black\", fontname=\"Courier\",label=\"|- "+"sub_"+adresse_offset[2:]+" ("+function_offset+") "+"\l "+function_machoc+"\"]" + return content+"\n" + + def format_edge_dot(self, function_offset1, function_called_offset2, diff=False): + """ + Return edge dot info from specific + """ + + color = "#ff0000" if diff else "#00007f" + + content = "\""+str(function_offset1)+"\" -> \""+str(function_called_offset2)+"\" [color=\""+color+"\"];" + return content+"\n" + + def add_tuple(self, function_machoke1, function_machoke2, adresse_offset, adresse_offset2, offset_called_func): + """ + Save t-uple analyzed + """ + + if str(function_machoke2).startswith('0x'): + function_machoke2 = 'sub_'+function_machoke2[2:] + + #T-uples saved in DB offset_callCFG (with more details) + if function_machoke1+','+function_machoke2 not in self.tuple_ccfg_db: + self.tuple_offset.append([adresse_offset+','+adresse_offset2,function_machoke1+','+function_machoke2, "sub_"+adresse_offset[2:]+",sub_"+offset_called_func[2:]]) + + #T-uples saved in DB callCFG + self.tuple_ccfg_db.append(function_machoke1+','+function_machoke2) + + + def run_cmd_radare2(self, cmd, instance_r2 = None): + """ + Return radare2 result of command + """ + + if instance_r2: + return instance_r2.cmd(cmd) + else: + return self.r2_binary.cmd(cmd) + + def generate_file(self, content, filename=None): + """ + Generate dot and CFG files + Save file in directory 'polichombr/storage/' + """ + + if filename == None: + filename = self.hashfile + remove_extension = filename.split('.') + if len(remove_extension) > 1: + base_filename = ".".join(remove_extension[:-1]).split("/")[-1] + else: + base_filename = ".".join(remove_extension).split("/")[-1] + + dot_filename = "polichombr/storage/"+base_filename+'.dot' + png_filename = "polichombr/storage/"+base_filename+'.png' + + f = open(dot_filename,'wb') + f.write(content) + f.close() + self._print("Dot file generated : "+dot_filename) + + + (graph,) = pydot.graph_from_dot_file(dot_filename) + graph.write_png(png_filename) + self._print("PNG file generated : "+png_filename) + + def remove_duplicate(self, dict): + """ + Return dict without duplicates values + """ + + ret = {} + for key,value in dict.iteritems(): + if key not in ret.keys(): + ret[key] = value + return ret + + + + def generate_dot_content_info(self, function_offset): + """ + Return call CFG in dot format from specific + """ + + adresse_offset = self.get_offset(function_offset) + function_call = self.get_call_in_function(function_offset) + function_machoc = self.get_machoke_from_function(function_offset, self.machoc_functions) + dot_content = self.format_block_dot(function_offset, function_machoc, adresse_offset) + + #UCALL + for call_func in function_call["ucall"]: + machoke_signature = "" + offset_called_func = self.get_offset(call_func) + dot_content += self.format_block_dot(call_func, machoke_signature, offset_called_func) + dot_content += self.format_edge_dot(function_offset, call_func) + func_name_array = call_func.split(' ') + if len(func_name_array) == 1: + func_name = func_name_array[0] + else: + func_name = func_name_array[1].replace('(','').replace(')','')+func_name_array[0] + + + #self.add_tuple(function_machoc, func_name, adresse_offset,offset_called_func, offset_called_func) + self.add_tuple(function_machoc, machoke_signature, adresse_offset,offset_called_func, offset_called_func) + + + #CALL + for call_func in function_call["call"]: + machoke_signature = self.get_machoke_from_function(call_func, self.machoc_functions) + offset_called_func = self.get_offset(call_func) + #self.add_tuple(function_machoc, machoke_signature if machoke_signature != "" else call_func , adresse_offset,offset_called_func, offset_called_func) + self.add_tuple(function_machoc, machoke_signature , adresse_offset,offset_called_func, offset_called_func) + + + dot_content += self.format_block_dot(call_func, machoke_signature, offset_called_func) + dot_content += self.format_edge_dot(function_offset, call_func)#, offset_called_func) + + + + if call_func not in self.function_analyzed: + self.function_analyzed.append(call_func) + try: + dot_content += self.generate_dot_content_info(call_func) + except: + pass + + return dot_content + + + def generate_dot_info(self, function_offset): + """ + Concatenate dot content file + """ + + dot_content = """digraph code { + graph [bgcolor=azure fontsize=8 fontname="Courier" splines="ortho"]; + node [fillcolor=gray style=filled shape=box]; + edge [arrowhead="normal"];""" + + dot_content += self.generate_dot_content_info(function_offset) + + dot_content += "}" + + self.generate_file(dot_content) + +############################################################################# +############################## COMPARE ############################## +############################################################################# + + + def generate_dot_compare_info(self, function_offset,diff_tuples, second_filename): + """ + Concatenate dot compare content file + """ + + dot_content = """digraph code { + graph [bgcolor=azure fontsize=8 fontname="Courier" splines="ortho"]; + node [fillcolor=gray style=filled shape=box]; + edge [arrowhead="normal"];""" + + dot_content += self.generate_dot_content_compare_info(function_offset, diff_tuples) + + dot_content += "}" + + tmp_filename1 = self.hashfile.split('/')[-1].split('.')[0] + tmp_filename2 = second_filename.split('/')[-1].split('.')[0] + + filename = tmp_filename1+"_"+tmp_filename2 + self.generate_file(dot_content, filename) + + def generate_dot_content_compare_info(self, function_offset, diff_tuples): + """ + Return comapre call CFG in dot format from specific + """ + + adresse_offset = self.get_offset(function_offset) + function_call = self.get_call_in_function(function_offset) + function_machoc = self.get_machoke_from_function(function_offset, self.machoc_functions) + + dot_content = "" # Important ! + + if function_offset not in self.function_analyzed: + dot_content = self.format_block_dot(function_offset, function_machoc, adresse_offset, True) + + #UCALL + for call_func in function_call["ucall"]: + func_name_array = call_func.split(' ') + if len(func_name_array) == 1: + func_name = func_name_array[0] + else: + func_name = func_name_array[1].replace('(','').replace(')','')+func_name_array[0] + + tuple_tmp = function_machoc+","+func_name + dot_content += self.format_block_dot(call_func, "", self.get_offset(call_func), True) + dot_content += self.format_edge_dot(function_offset, call_func, tuple_tmp in diff_tuples) + if tuple_tmp in diff_tuples: + diff_tuples.remove(tuple_tmp) + + #CALL + for call_func in function_call["call"]: + machoke_signature = self.get_machoke_from_function(call_func, self.machoc_functions) + if machoke_signature == "": + tuple_tmp = function_machoc+","+call_func + else: + tuple_tmp = function_machoc+","+machoke_signature + dot_content += self.format_block_dot(call_func, machoke_signature, self.get_offset(call_func), True) + dot_content += self.format_edge_dot(function_offset, call_func, tuple_tmp in diff_tuples ) + if tuple_tmp in diff_tuples: + diff_tuples.remove(tuple_tmp) + + if call_func not in self.function_analyzed: + self.function_analyzed.append(call_func) + try: + dot_content += self.generate_dot_content_compare_info(call_func, diff_tuples) + except: + pass + + return dot_content + + def process_get_all_machoc_functions(self, filename, offset, instance_r2): + """ + Get machoc functions + """ + + machoc_functions = parse_machoc_signatures(filename+'.sign') + return self.get_all_machoc_functions(offset, instance_r2, machoc_functions) + + def get_all_machoc_functions(self, offset, instance_r2, machoc_functions): + """ + Get machoc functions from specific offset + """ + + all_machoc_func = [] + + function_call = self.get_call_in_function(offset, instance_r2) + function_machoc = self.get_machoke_from_function(offset, machoc_functions, instance_r2) + + + if self.first_loop == False: + all_machoc_func.append(function_machoc) + self.first_loop = True + + for call_func in function_call["ucall"]: + func_name_array = call_func.split(' ') + if len(func_name_array) == 1: + func_name = func_name_array[0] + else: + func_name = func_name_array[1].replace('(','').replace(')','')+func_name_array[0] + all_machoc_func.append(func_name) + + for call_func in function_call["call"]: + machoke_signature = self.get_machoke_from_function(call_func, machoc_functions, instance_r2) + if machoke_signature == "": + all_machoc_func.append(str(call_func)) + else: + all_machoc_func.append(machoke_signature) + if call_func not in self.all_machoc_function_analyzed: + self.all_machoc_function_analyzed.append(call_func) + try: + all_machoc_func.extend(self.get_all_machoc_functions(call_func, instance_r2, machoc_functions)) + except: + pass + return all_machoc_func + + + def init_r2_instance(self, filename): + """ + Initialize radare2 instance with filename parameter + """ + instance_r2 = r2pipe.open(filename, ['-2']) + # Open binary + if not self.is_valid_file(instance_r2): + self.kill("Not a valid binary file", instance_r2) + return + self.run_cmd_radare2('aaa', instance_r2) + return instance_r2 + + + def process_compare_call_CFG(self, compare_tuples): + """ + Process callCFG from entrypoint offset + """ + + filename2,diff_tuples,file2_offset = compare_tuples + instance_r2 = self.init_r2_instance(filename2) + + #Etape 1 : Get all machoc functions of file 2 + self.all_machoc_function_analyzed = [] + self.first_loop = False + self.array_all_machoc_func = self.process_get_all_machoc_functions(filename2, file2_offset, instance_r2) + self.function_analyzed = [] + + #Etape2 : Compare file 1 and file 2 + self.generate_dot_compare_info(self.offset, diff_tuples, filename2) + +############################################################################# +############################################################################# +############################################################################# + + def get_offset(self, func_name, instance_r2 = None): + """ + Return offset from specific + """ + + result = self.run_cmd_radare2('s @ '+str(func_name), instance_r2) + return result + + def process_call_CFG(self): + """ + Process callCFG from entrypoint offset + """ + + self._print("Start offset : "+str(self.get_offset(self.offset))) + self.generate_dot_info(self.offset) + + @staticmethod + def is_good_offset_value(offset): + """ + Check if offset is a good value + NOT USED YET + """ + r2_tmp = r2pipe.open(filename, ['-2']) + try: + r2_tmp.cmd("s {}".format(offset)) + return True + except: + return False \ No newline at end of file diff --git a/polichombr/analysis_tools/lib_callCFG/__init__.py b/polichombr/analysis_tools/lib_callCFG/__init__.py new file mode 100644 index 0000000..2f64a42 --- /dev/null +++ b/polichombr/analysis_tools/lib_callCFG/__init__.py @@ -0,0 +1,7 @@ +""" + This file is part of Polichombr. + + Organization : EDF-R&D-PERICLES-IRC + Author : JCO +""" + diff --git a/polichombr/analysis_tools/lib_callCFG/ccfg_analyzeitrb.py b/polichombr/analysis_tools/lib_callCFG/ccfg_analyzeitrb.py new file mode 100644 index 0000000..5e72882 --- /dev/null +++ b/polichombr/analysis_tools/lib_callCFG/ccfg_analyzeitrb.py @@ -0,0 +1,39 @@ +""" + This file is part of Polichombr. + + (c) 2016 ANSSI-FR + + + Description: + AnalyzeIt task implementation. +""" + +import os + +def remove_blacklist_machoc(functions): + blacklisted = [0x1a02300e,0xd3fa94a] + for func in list(functions.keys()): + if functions[func]["machoc"] in blacklisted: + functions.pop(func) + return functions + + +def parse_machoc_signatures(fname): + """ + Returns a dict containing the functions and the hashes + """ + # MACHOC report: we load the functions, hashes, etc. + functions = {} + if not os.path.exists(fname): + return functions + with open(fname) as infile: + fdata = infile.read() + items = fdata.split(";") + for i in items: + if ":" in i: + subitems = i.split(":") + machoc_h = int(subitems[0].strip(), 16) + address = int(subitems[1].strip(), 16) + functions[address] = dict(machoc=machoc_h, name="") + #functions = remove_blacklist_machoc(functions) + return functions \ No newline at end of file diff --git a/polichombr/analysis_tools/lib_callCFG/compare_ccfg.py b/polichombr/analysis_tools/lib_callCFG/compare_ccfg.py new file mode 100644 index 0000000..f6faac8 --- /dev/null +++ b/polichombr/analysis_tools/lib_callCFG/compare_ccfg.py @@ -0,0 +1,182 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" + This file is part of Polichombr. + + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description: Comparaison between callCFG + Date : 08/2018 +""" + +import sqlite3 as lite +import hashlib,os, argparse, collections, copy, hashlib, datetime +from ast import literal_eval + +from polichombr.controllers.callCFG import callCFGController, offset_callCFGController +from polichombr import api + +from C_CFG import * + +class compare_ccfg(): + def __init__(self, sample_id): + """ + Constructor + """ + + self.sample_id = sample_id + + self.ctrl_callCFG = callCFGController() + self.ctrl_offset_callCFG = offset_callCFGController() + + def _print(self, message): + """ + Print and log message + """ + #self.app.logger.info(self.tmessage + message) + print message + + + def compare_tuples(self, tuples_file1, tuples_file2): + """ + Return A_union_B and A_inter_B + """ + diff_tuples = [] + + tuples_file1 = list(literal_eval(tuples_file1)) + tuples_file2 = list(literal_eval(tuples_file2)) + + # Calculate (tuples_file1 U tuples_file2) + all_tuples = list() + all_tuples.extend(tuples_file1) + all_tuples.extend(tuples_file2) + total_element = len(all_tuples) + + # Get common element + count = 0 + for tuple_element in tuples_file1: + if tuple_element in tuples_file2: + tuples_file2.remove(tuple_element) + count += 1 + else: + diff_tuples.append(tuple_element) + + a_union_b = float(total_element-count) # A_union_B + a_inter_b = float(count) # A_inter_B + return a_inter_b, a_union_b, diff_tuples + + + def process_all_comparaison(self): + """ + Process comparaison with all objects in database + """ + + ccfg_obj1 = self.ctrl_callCFG.get_by_id(self.sample_id) + if ccfg_obj1 == None: + return + all_ccfg_obj = self.ctrl_callCFG.get_all(self.sample_id) + + dict_result = {} + + for ccfg_obj2 in all_ccfg_obj: + # Cast result into object callCFG_db + dict_result[ccfg_obj2.sample_id] = self.process_comparaison(ccfg_obj1, ccfg_obj2) + + return dict_result + + def get_png_comparaison(self, sample_id_2): + """ + Generate compare call-CFG PNG + """ + + + ccfg_obj1 = self.ctrl_callCFG.get_by_id(self.sample_id) + ccfg_obj2 = self.ctrl_callCFG.get_by_id(sample_id_2) + if ccfg_obj1 == None or ccfg_obj2 == None: + return + + self.process_comparaison(ccfg_obj1, ccfg_obj2, True) + + + + def process_comparaison(self, ccfg_obj1, ccfg_obj2, png=False): + """ + Process comparaison between ccfg_obj1 and ccfg_obj2 parameters + """ + + #Calculate difference pourcent with Jaccard Distance + a_inter_b, a_union_b, diff_tuples_1 = self.compare_tuples(ccfg_obj1.func_tuples, ccfg_obj2.func_tuples) #Get elements IN file1 and NOT IN file2 + a_inter_b, a_union_b, diff_tuples_2 = self.compare_tuples(ccfg_obj2.func_tuples, ccfg_obj1.func_tuples) #Get elements IN file2 and NOT IN file1 + + pourcent = (a_inter_b / a_union_b) * 100 + + + #Print result when Jaccard Distance >= 80% + result_diff = {} + if pourcent >= 80: + result_diff = self.get_differences(diff_tuples_1, diff_tuples_2, ccfg_obj1.sample_id, ccfg_obj2.sample_id) + + #Generate new call_CFG with differences + if png: + sample_1 = api.get_elem_by_type("sample", ccfg_obj1.sample_id) + sample_2 = api.get_elem_by_type("sample", ccfg_obj2.sample_id) + + c_cfg_inst = call_CFG(sample_1.storage_file, ccfg_obj1.offset_entrypoint) + + + tuples_file_2 = sample_2.storage_file, diff_tuples_1, ccfg_obj2.offset_entrypoint + c_cfg_inst.process_cCFG(tuples_file_2) + + else: + return pourcent, a_inter_b, a_union_b, result_diff + + + def get_differences(self, diff_tuples_1, diff_tuples_2, sample_id_1, sample_id_2): + """ + Print offset of call's differences between file1 and file2 + """ + + #Elements IN 'filename1' and NOT IN 'filename2' + diff_plus = [] + for value in diff_tuples_1: + tmp_dict = {} + id_row = "{0},{1}".format(sample_id_1, value) + row_inst = self.ctrl_offset_callCFG.get_by_id(id_row) + + if row_inst != None: + sample_id, machoc1, machoc2 = row_inst.id.split(',') + parent_func, child_func = row_inst.func_name.split(',') + offset_parent, offset_child = row_inst.offset_func.split(',') + + tmp_dict["parent_func"] = parent_func + tmp_dict["offset_parent"] = offset_parent + tmp_dict["child_func"] = child_func + tmp_dict["offset_child"] = offset_child + tmp_dict["machoc1"] = machoc1 + tmp_dict["machoc2"] = machoc2 + diff_plus.append(tmp_dict) + + + #Elements IN 'filename2' and NOT IN 'filename1' + diff_minus = [] + for value in diff_tuples_2: + + tmp_dict_2 = {} + + id_row = "{0},{1}".format(sample_id_2, value) + row_inst = self.ctrl_offset_callCFG.get_by_id(id_row) + + if row_inst !=None: + sample_id, machoc1, machoc2 = row_inst.id.split(',') + parent_func, child_func = row_inst.func_name.split(',') + offset_parent, offset_child = row_inst.offset_func.split(',') + + tmp_dict_2["parent_func"] = parent_func + tmp_dict_2["offset_parent"] = offset_parent + tmp_dict_2["child_func"] = child_func + tmp_dict_2["offset_child"] = offset_child + tmp_dict_2["machoc1"] = machoc1 + tmp_dict_2["machoc2"] = machoc2 + diff_minus.append(tmp_dict_2) + print "\n" + return diff_plus, diff_minus \ No newline at end of file diff --git a/polichombr/controllers/analysis.py b/polichombr/controllers/analysis.py index 4d01418..d3533ee 100644 --- a/polichombr/controllers/analysis.py +++ b/polichombr/controllers/analysis.py @@ -40,7 +40,7 @@ def load_tasks(self): os.listdir(app.config['TASKS_PATH']))) def form_module(fp): return os.path.splitext(fp)[0] - tasks_modules = list(map(form_module, tasks_files)) + tasks_modules = sorted(list(map(form_module, tasks_files))) #task_analyzeit must be first for task_filename in tasks_modules: if not task_filename.startswith('__'): try: diff --git a/polichombr/controllers/api.py b/polichombr/controllers/api.py index 24779e5..e04fc3a 100644 --- a/polichombr/controllers/api.py +++ b/polichombr/controllers/api.py @@ -25,6 +25,7 @@ from polichombr.controllers.family import FamilyController from polichombr.controllers.user import UserController from polichombr.controllers.idaactions import IDAActionsController +from polichombr.controllers.callCFG import callCFGController, offset_callCFGController from polichombr.models.sample import FunctionInfo @@ -50,6 +51,8 @@ class APIControl(object): analysiscontrol = AnalysisController( app.config['ANALYSIS_PROCESS_POOL_SIZE']) idacontrol = IDAActionsController() + callcfgcontrol = callCFGController() + offset_callcfgcontrol = offset_callCFGController() def __init__(self): """ @@ -63,7 +66,8 @@ def dispatch_sample_creation(self, user=None, tlp=TLPLevel.TLPWHITE, family=None, - zipflag=True): + zipflag=True, + offset_callCFG=None): """ If the sample is a ZipFile, we unpack it and return the last sample,otherwise we return a single sample. @@ -71,17 +75,18 @@ def dispatch_sample_creation(self, file_data = file_stream.read(4) file_stream.seek(0) if file_data.startswith(b"PK") and zipflag: - samples = self.create_from_zip(file_stream, user, tlp, family) + samples = self.create_from_zip(file_stream, user, tlp, family, offset_callCFG) else: sample = self.create_sample_and_run_analysis(file_stream, filename, user, tlp, - family) + family, + offset_callCFG) samples = [sample] return samples - def create_from_zip(self, file_stream, user, tlp, family): + def create_from_zip(self, file_stream, user, tlp, family, offset_callCFG): """ Iterates over the samples in the zip """ @@ -94,7 +99,8 @@ def create_from_zip(self, file_stream, user, tlp, family): name, user, tlp, - family) + family, + offset_callCFG) output_samples.append(sample) zcl.close() return output_samples @@ -105,7 +111,8 @@ def create_sample_and_run_analysis( originate_filename="", user=None, tlp_level=TLPLevel.TLPWHITE, - family=None): + family=None, + offset_callCFG=None): """ Creates a new sample and a schedule an analysis. We also check the file header for ZIP pattern: if a ZIP pattern is found, any file @@ -116,7 +123,7 @@ def create_sample_and_run_analysis( """ file_data = file_data_stream.read() sample = self.samplecontrol.create_sample_from_file( - file_data, originate_filename, user, tlp_level) + file_data, originate_filename, user, tlp_level, offset_callCFG) if sample.analysis_status == AnalysisStatus.TOSTART: self.analysiscontrol.schedule_sample_analysis(sample.id) if family is not None: diff --git a/polichombr/controllers/callCFG.py b/polichombr/controllers/callCFG.py new file mode 100644 index 0000000..bcf8aed --- /dev/null +++ b/polichombr/controllers/callCFG.py @@ -0,0 +1,139 @@ +""" + This file is part of Polichombr. + + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description: Managers for all the actions about call-CFG, associated with callCFG models + Date : 08/2018 +""" +from polichombr import app +from polichombr import db +from polichombr.models.callCFG import callCFG, offset_callCFG + + + +class callCFGController(object): + + """ + Wrapper to the callCFG model. + """ + + def __init__(self): + pass + + + @staticmethod + def add_callCFG(sample_id, func_tuples, entrypoint_offset): + """ + Adds a callCFG + """ + model_callCFG = callCFG() + model_callCFG.sample_id = sample_id + model_callCFG.func_tuples = func_tuples + model_callCFG.offset_entrypoint = entrypoint_offset + db.session.add(model_callCFG) + db.session.commit() + return True + + @staticmethod + def get_all(sid = None): + """ + Get all callcfg + """ + if sid != None: + return callCFG.query.filter(callCFG.sample_id != sid).all() + else: + return callCFG.query.all() + + @staticmethod + def get_by_id(sample_id): + """ + Get callCFG by its sample id. + """ + result = callCFG.query.filter_by(sample_id=sample_id).all() + return result[0] if len(result) > 0 else None + + @staticmethod + def delete(callCFG): + """ + Removes callCFG from database. + """ + db.session.delete(callCFG) + db.session.commit() + return + +class offset_callCFGController(object): + + """ + Wrapper to the offset_callCFG model. + """ + + def __init__(self): + pass + + @staticmethod + def add_offset_callCFG( sample_id, element, do_commit=True): + """ + Adds an offset_callCFG + """ + + offset_func, func_tuples, func_name = element + + model_offset_callCFG = offset_callCFG() + model_offset_callCFG.id = '{0},{1}'.format(sample_id, func_tuples) + model_offset_callCFG.sample_id = sample_id + model_offset_callCFG.offset_func = offset_func + model_offset_callCFG.func_name = func_name + + db.session.add(model_offset_callCFG) + + if do_commit: + db.session.commit() + + + + def add_multiple_offset_callCFG(self, sample_id, tuples): + """ + Adds multiple offset_callCFG + """ + for element in tuples: + self.add_offset_callCFG(sample_id, element, do_commit = False) + db.session.commit() + return True + + + @staticmethod + def get_by_sample_id(sample_id): + """ + Get offset_callCFG by its sample id. + """ + result = offset_callCFG.query.filter_by(sample_id=sample_id).all() + return result + + @staticmethod + def get_by_id(id): + """ + Get offset_callCFG by its sample id. + """ + result = offset_callCFG.query.filter_by(id=id).all() + return result[0] + + + @staticmethod + def delete(offset_callCFG, do_commit=True): + """ + Removes offset_callCFG from database. + """ + db.session.delete(offset_callCFG) + if do_commit: + db.session.commit() + return + + def delete_multiple_offset_callCFG(self, all_offset_callCFG): + """ + Delete multiple offset_callCFG + """ + for element in all_offset_callCFG: + self.delete(element, do_commit = False) + db.session.commit() + return True diff --git a/polichombr/controllers/sample.py b/polichombr/controllers/sample.py index e4fc10c..4b61079 100644 --- a/polichombr/controllers/sample.py +++ b/polichombr/controllers/sample.py @@ -45,7 +45,7 @@ def __init__(self): pass def create_sample_from_file(self, file_data, orig_filename="", user=None, - tlp_level=TLPLevel.TLPWHITE): + tlp_level=TLPLevel.TLPWHITE, offset_callCFG=None): """ Creates a sample from file data. Updates metadata, etc. """ @@ -84,6 +84,8 @@ def create_sample_from_file(self, file_data, orig_filename="", user=None, sample.sha1 = sha1(file_data).hexdigest() sample.sha256 = sha_256 sample.size = len(file_data) + sample.offset_callCFG = offset_callCFG + # Specific metadata, resulting from Tasks sample.import_hash = "" sample.machoc_hash = "" @@ -125,6 +127,12 @@ def delete(cls, sample): if os.path.exists(sample.storage_file): os.remove(sample.storage_file) + if os.path.exists(sample.storage_file.replace('.bin','.dot')): + os.remove(sample.storage_file.replace('.bin','.dot')) + + if os.path.exists(sample.storage_file.replace('.bin','.png')): + os.remove(sample.storage_file.replace('.bin','.png')) + strings = StringsItem.query.filter_by(sample_id=sample.id).all() attributes = [sample.filenames, diff --git a/polichombr/controllers/tasks/task_callCFG.py b/polichombr/controllers/tasks/task_callCFG.py new file mode 100644 index 0000000..b5d45dd --- /dev/null +++ b/polichombr/controllers/tasks/task_callCFG.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" + This file is part of Polichombr. + + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description: CallCFG task implementation. + Date : 08/2018 +""" + +import os +import time + +from polichombr import app +from polichombr.controllers.task import Task +from polichombr.controllers.callCFG import callCFGController, offset_callCFGController + +from polichombr.analysis_tools.lib_callCFG.C_CFG import call_CFG + + +class task_callCFG(Task): + + """ + Generate callCFG form file. + """ + + def __init__(self, sample): + super(task_callCFG, self).__init__() + self.tmessage = "CALLCFG TASK %d :: " % (sample.id) + self.sid = sample.id + self.tstart = None + self.storage_file = sample.storage_file + self.offset_callCFG = sample.offset_callCFG + + def execute(self): + self.tstart = int(time.time()) + app.logger.info(self.tmessage + "EXECUTE") + self.fname = self.storage_file + '.sign' + while not os.path.exists(self.fname): # Waiting Creation of Machoc file SHA256(file).bin.sign + time.sleep(1) + if int(time.time() - self.tstart) > 120: + app.logger.info(self.tmessage + " Machoc file not found") + return False + + self.process_call_CFG() #Generate callCFG + return True + + + def process_call_CFG(self): + app.logger.info(self.tmessage + 'Process call CFG') + + inst_ccfg = call_CFG(self.storage_file, self.offset_callCFG, app, self.tmessage) + self.offset, self.tuples, self.offset_tuples = inst_ccfg.process_cCFG() + + + def apply_result(self): + s_controller = callCFGController() + s_controller_offset = offset_callCFGController() + with app.app_context(): + app.logger.debug(self.tmessage + "APPLY_RESULT") + s_controller.add_callCFG(self.sid, self.tuples, self.offset) + s_controller_offset.add_multiple_offset_callCFG(self.sid, self.offset_tuples) + + + app.logger.debug(self.tmessage + "END - TIME %i" % + (int(time.time()) - self.tstart)) + return True \ No newline at end of file diff --git a/polichombr/controllers/tasks/task_peinfo.py b/polichombr/controllers/tasks/task_peinfo.py index 19814a4..eacb31c 100644 --- a/polichombr/controllers/tasks/task_peinfo.py +++ b/polichombr/controllers/tasks/task_peinfo.py @@ -18,6 +18,7 @@ from polichombr.controllers.sample import SampleController + class task_peinfo(Task): """ @@ -50,16 +51,174 @@ def execute(self): self.compile_timestamp = datetime.datetime.fromtimestamp( pe.FILE_HEADER.TimeDateStamp) self.import_hash = pe.get_imphash() - - metadata = self.generate_metadata(pe) - self.metadata_extracted.append(( SampleMetadataType.PE_import_hash, self.import_hash)) - - for item in metadata.items: - self.metadata_extracted.append(item) - + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_magic, + pe.DOS_HEADER.e_magic)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_cblp, + pe.DOS_HEADER.e_cblp)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_cp, + pe.DOS_HEADER.e_cp)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_crlc, + pe.DOS_HEADER.e_crlc)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_cparhdr, + pe.DOS_HEADER.e_cparhdr)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_minalloc, + pe.DOS_HEADER.e_minalloc)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_maxalloc, + pe.DOS_HEADER.e_maxalloc)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_ss, + pe.DOS_HEADER.e_ss)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_sp, + pe.DOS_HEADER.e_sp)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_csum, + pe.DOS_HEADER.e_csum)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_ip, + pe.DOS_HEADER.e_ip)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_cs, + pe.DOS_HEADER.e_cs)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_lfarlc, + pe.DOS_HEADER.e_lfarlc)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_ovno, + pe.DOS_HEADER.e_ovno)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_res, + pe.DOS_HEADER.e_res)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_oemid, + pe.DOS_HEADER.e_oemid)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_oeminfo, + pe.DOS_HEADER.e_oeminfo)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_res2, + pe.DOS_HEADER.e_res2)) + self.metadata_extracted.append(( + SampleMetadataType.PE_DOS_HEADER_e_lfanew, + pe.DOS_HEADER.e_lfanew)) + self.metadata_extracted.append(( + SampleMetadataType.PE_FILE_HEADER_Machine, + pe.FILE_HEADER.Machine)) + self.metadata_extracted.append(( + SampleMetadataType.PE_FILE_HEADER_NumberOfSections, + pe.FILE_HEADER.NumberOfSections)) + self.metadata_extracted.append(( + SampleMetadataType.PE_FILE_HEADER_TimeDateStamp, + pe.FILE_HEADER.TimeDateStamp)) + self.metadata_extracted.append(( + SampleMetadataType.PE_FILE_HEADER_PointerToSymbolTable, + pe.FILE_HEADER.PointerToSymbolTable)) + self.metadata_extracted.append(( + SampleMetadataType.PE_FILE_HEADER_NumberOfSymbols, + pe.FILE_HEADER.NumberOfSymbols)) + self.metadata_extracted.append(( + SampleMetadataType.PE_FILE_HEADER_SizeOfOptionalHeader, + pe.FILE_HEADER.SizeOfOptionalHeader)) + self.metadata_extracted.append(( + SampleMetadataType.PE_FILE_HEADER_Characteristics, + pe.FILE_HEADER.Characteristics)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_Magic, + pe.OPTIONAL_HEADER.Magic)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MajorLinkerVersion, + pe.OPTIONAL_HEADER.MajorLinkerVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MinorLinkerVersion, + pe.OPTIONAL_HEADER.MinorLinkerVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfCode, + pe.OPTIONAL_HEADER.SizeOfCode)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfInitializedData, + pe.OPTIONAL_HEADER.SizeOfInitializedData)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfUninitializedData, + pe.OPTIONAL_HEADER.SizeOfUninitializedData)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_AddressOfEntryPoint, + pe.OPTIONAL_HEADER.AddressOfEntryPoint)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_BaseOfCode, + pe.OPTIONAL_HEADER.BaseOfCode)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_ImageBase, + pe.OPTIONAL_HEADER.ImageBase)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SectionAlignment, + pe.OPTIONAL_HEADER.SectionAlignment)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_FileAlignment, + pe.OPTIONAL_HEADER.FileAlignment)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MajorOperatingSystemVersion, + pe.OPTIONAL_HEADER.MajorOperatingSystemVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MinorOperatingSystemVersion, + pe.OPTIONAL_HEADER.MinorOperatingSystemVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MajorImageVersion, + pe.OPTIONAL_HEADER.MajorImageVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MinorImageVersion, + pe.OPTIONAL_HEADER.MinorImageVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MajorSubsystemVersion, + pe.OPTIONAL_HEADER.MajorSubsystemVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_MinorSubsystemVersion, + pe.OPTIONAL_HEADER.MinorSubsystemVersion)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_Reserved1, + pe.OPTIONAL_HEADER.Reserved1)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfImage, + pe.OPTIONAL_HEADER.SizeOfImage)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfHeaders, + pe.OPTIONAL_HEADER.SizeOfHeaders)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_CheckSum, + pe.OPTIONAL_HEADER.CheckSum)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_Subsystem, + pe.OPTIONAL_HEADER.Subsystem)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_DllCharacteristics, + pe.OPTIONAL_HEADER.DllCharacteristics)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfStackReserve, + pe.OPTIONAL_HEADER.SizeOfStackReserve)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfStackCommit, + pe.OPTIONAL_HEADER.SizeOfStackCommit)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfHeapReserve, + pe.OPTIONAL_HEADER.SizeOfHeapReserve)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfHeapCommit, + pe.OPTIONAL_HEADER.SizeOfHeapCommit)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_LoaderFlags, + pe.OPTIONAL_HEADER.LoaderFlags)) + self.metadata_extracted.append(( + SampleMetadataType.PE_OPTIONAL_HEADER_NumberOfRvaAndSizes, + pe.OPTIONAL_HEADER.NumberOfRvaAndSizes)) return True def apply_result(self): @@ -67,72 +226,10 @@ def apply_result(self): with app.app_context(): sample = s_controller.get_by_id(self.sid) app.logger.debug(self.tmessage + "APPLY_RESULT") + # Compilation timestamp (even when faked) IS a file date, so update it. s_controller.add_multiple_metadata(sample, self.metadata_extracted) - - # Compilation timestamp IS a file date, so update it. s_controller.set_file_date(sample, self.compile_timestamp) s_controller.set_import_hash(sample, self.import_hash) app.logger.debug(self.tmessage + "END - TIME %i" % (int(time.time()) - self.tstart)) - return True - - @staticmethod - def generate_metadata(pe): - pe_metadata = { - SampleMetadataType.PE_DOS_HEADER_e_magic: pe.DOS_HEADER.e_magic, - SampleMetadataType.PE_DOS_HEADER_e_cblp: pe.DOS_HEADER.e_cblp, - SampleMetadataType.PE_DOS_HEADER_e_cp: pe.DOS_HEADER.e_cp, - SampleMetadataType.PE_DOS_HEADER_e_crlc: pe.DOS_HEADER.e_crlc, - SampleMetadataType.PE_DOS_HEADER_e_cparhdr: pe.DOS_HEADER.e_cparhdr, - SampleMetadataType.PE_DOS_HEADER_e_minalloc: pe.DOS_HEADER.e_minalloc, - SampleMetadataType.PE_DOS_HEADER_e_maxalloc: pe.DOS_HEADER.e_maxalloc, - SampleMetadataType.PE_DOS_HEADER_e_ss: pe.DOS_HEADER.e_ss, - SampleMetadataType.PE_DOS_HEADER_e_sp: pe.DOS_HEADER.e_sp, - SampleMetadataType.PE_DOS_HEADER_e_csum: pe.DOS_HEADER.e_csum, - SampleMetadataType.PE_DOS_HEADER_e_ip: pe.DOS_HEADER.e_ip, - SampleMetadataType.PE_DOS_HEADER_e_cs: pe.DOS_HEADER.e_cs, - SampleMetadataType.PE_DOS_HEADER_e_lfarlc: pe.DOS_HEADER.e_lfarlc, - SampleMetadataType.PE_DOS_HEADER_e_ovno: pe.DOS_HEADER.e_ovno, - SampleMetadataType.PE_DOS_HEADER_e_res: pe.DOS_HEADER.e_res, - SampleMetadataType.PE_DOS_HEADER_e_oemid: pe.DOS_HEADER.e_oemid, - SampleMetadataType.PE_DOS_HEADER_e_oeminfo: pe.DOS_HEADER.e_oeminfo, - SampleMetadataType.PE_DOS_HEADER_e_res2: pe.DOS_HEADER.e_res2, - SampleMetadataType.PE_DOS_HEADER_e_lfanew: pe.DOS_HEADER.e_lfanew, - SampleMetadataType.PE_FILE_HEADER_Machine: pe.FILE_HEADER.Machine, - SampleMetadataType.PE_FILE_HEADER_NumberOfSections: pe.FILE_HEADER.NumberOfSections, - SampleMetadataType.PE_FILE_HEADER_TimeDateStamp: pe.FILE_HEADER.TimeDateStamp, - SampleMetadataType.PE_FILE_HEADER_PointerToSymbolTable: pe.FILE_HEADER.PointerToSymbolTable, - SampleMetadataType.PE_FILE_HEADER_NumberOfSymbols: pe.FILE_HEADER.NumberOfSymbols, - SampleMetadataType.PE_FILE_HEADER_SizeOfOptionalHeader: pe.FILE_HEADER.SizeOfOptionalHeader, - SampleMetadataType.PE_FILE_HEADER_Characteristics: pe.FILE_HEADER.Characteristics, - SampleMetadataType.PE_OPTIONAL_HEADER_Magic: pe.OPTIONAL_HEADER.Magic, - SampleMetadataType.PE_OPTIONAL_HEADER_MajorLinkerVersion: pe.OPTIONAL_HEADER.MajorLinkerVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_MinorLinkerVersion: pe.OPTIONAL_HEADER.MinorLinkerVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfCode: pe.OPTIONAL_HEADER.SizeOfCode, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfInitializedData: pe.OPTIONAL_HEADER.SizeOfInitializedData, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfUninitializedData: pe.OPTIONAL_HEADER.SizeOfUninitializedData, - SampleMetadataType.PE_OPTIONAL_HEADER_AddressOfEntryPoint: pe.OPTIONAL_HEADER.AddressOfEntryPoint, - SampleMetadataType.PE_OPTIONAL_HEADER_BaseOfCode: pe.OPTIONAL_HEADER.BaseOfCode, - SampleMetadataType.PE_OPTIONAL_HEADER_ImageBase: pe.OPTIONAL_HEADER.ImageBase, - SampleMetadataType.PE_OPTIONAL_HEADER_SectionAlignment: pe.OPTIONAL_HEADER.SectionAlignment, - SampleMetadataType.PE_OPTIONAL_HEADER_FileAlignment: pe.OPTIONAL_HEADER.FileAlignment, - SampleMetadataType.PE_OPTIONAL_HEADER_MajorOperatingSystemVersion: pe.OPTIONAL_HEADER.MajorOperatingSystemVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_MinorOperatingSystemVersion: pe.OPTIONAL_HEADER.MinorOperatingSystemVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_MajorImageVersion: pe.OPTIONAL_HEADER.MajorImageVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_MinorImageVersion: pe.OPTIONAL_HEADER.MinorImageVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_MajorSubsystemVersion: pe.OPTIONAL_HEADER.MajorSubsystemVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_MinorSubsystemVersion: pe.OPTIONAL_HEADER.MinorSubsystemVersion, - SampleMetadataType.PE_OPTIONAL_HEADER_Reserved1: pe.OPTIONAL_HEADER.Reserved1, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfImage: pe.OPTIONAL_HEADER.SizeOfImage, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfHeaders: pe.OPTIONAL_HEADER.SizeOfHeaders, - SampleMetadataType.PE_OPTIONAL_HEADER_CheckSum: pe.OPTIONAL_HEADER.CheckSum, - SampleMetadataType.PE_OPTIONAL_HEADER_Subsystem: pe.OPTIONAL_HEADER.Subsystem, - SampleMetadataType.PE_OPTIONAL_HEADER_DllCharacteristics: pe.OPTIONAL_HEADER.DllCharacteristics, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfStackReserve: pe.OPTIONAL_HEADER.SizeOfStackReserve, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfStackCommit: pe.OPTIONAL_HEADER.SizeOfStackCommit, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfHeapReserve: pe.OPTIONAL_HEADER.SizeOfHeapReserve, - SampleMetadataType.PE_OPTIONAL_HEADER_SizeOfHeapCommit: pe.OPTIONAL_HEADER.SizeOfHeapCommit, - SampleMetadataType.PE_OPTIONAL_HEADER_LoaderFlags: pe.OPTIONAL_HEADER.LoaderFlags, - SampleMetadataType.PE_OPTIONAL_HEADER_NumberOfRvaAndSizes: pe.OPTIONAL_HEADER.NumberOfRvaAndSizes - } - return pe_metadata + return True \ No newline at end of file diff --git a/polichombr/models/callCFG.py b/polichombr/models/callCFG.py new file mode 100644 index 0000000..c19f953 --- /dev/null +++ b/polichombr/models/callCFG.py @@ -0,0 +1,68 @@ +""" + This file is part of Polichombr. + + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description: Models to implement callCFG and offset_callCFG objects + Date : 08/2018 +""" + +from datetime import datetime + +from polichombr import db, ma + + +class callCFG(db.Model): + + """ + callCFG DB Model + """ + __tablename__ = 'callCFG' + + id = db.Column(db.Integer(), primary_key=True) + sample_id = db.Column(db.Integer(), db.ForeignKey('sample.id')) + func_tuples = db.Column(db.String()) + offset_entrypoint = db.Column(db.String()) + creation_date = db.Column(db.DateTime()) + + def __init__(self): + self.creation_date = datetime.now() + +class callCFGSchema(ma.ModelSchema): + """ + Schema representation. + """ + class Meta(object): + fields = ('id', + 'sample_id', + 'func_tuples', + 'offset_entrypoint', + 'creation_date') + + +class offset_callCFG(db.Model): + """ + offset calLCFF DB Model + """ + + __tablename__ = 'offset_callCFG' + + id = db.Column(db.String(), primary_key=True) + sample_id = db.Column(db.Integer(), db.ForeignKey('sample.id')) + offset_func = db.Column(db.String()) + func_name = db.Column(db.String()) + creation_date = db.Column(db.DateTime()) + + def __init__(self): + self.creation_date = datetime.now() + +class offset_callCFGSchema(ma.ModelSchema): + """ + Schema representation. + """ + class Meta(object): + fields = ('id', + 'sample_id', + 'offset_func', + 'func_name', + 'creation_date') diff --git a/polichombr/templates/sample.html b/polichombr/templates/sample.html index aa66039..b9aadc2 100644 --- a/polichombr/templates/sample.html +++ b/polichombr/templates/sample.html @@ -340,6 +340,78 @@


+ + + {% if callcfg.offset_entrypoint %} + +
+
+

Call-CFG compare

+
+
+
+ Start offset :   + {{callcfg.offset_entrypoint}} + + Download Call-CFG +

+ + + + + + + + + + + + {% for reslt in compare_callcfg_results %} + + + + + + + + {% if reslt[3] or reslt[4] %} + + + {% endif %} + {% endfor %} + +
FilenameDiff
{{reslt[0]}}{{reslt[1]}} + {% if reslt[3] or reslt[4] %} + Collapse + {% endif %} + + + {% if reslt[3] or reslt[4] %} + + + + {% endif %} +
+
+ {% if reslt[3] %} + -› Elements IN '{{sample.filenames[0].name}}' and NOT IN '{{reslt[0]}}' +

+ {{reslt[3]|safe}} +

+ {% endif %} + {% if reslt[4] %} + -› Elements NOT IN '{{sample.filenames[0].name}}' and IN '{{reslt[0]}}' +

+ {{reslt[4]|safe}} + {% endif %} +
+
+
+ + {% endif %} + + +

Machoc compare

diff --git a/polichombr/views/api_sample.py b/polichombr/views/api_sample.py index d1ce506..f72dce0 100644 --- a/polichombr/views/api_sample.py +++ b/polichombr/views/api_sample.py @@ -17,6 +17,7 @@ from flask import jsonify, request, send_file, abort, current_app, g from flask_security import login_required +from polichombr.analysis_tools.lib_callCFG.compare_ccfg import compare_ccfg @apiview.route('/samples//') @@ -38,6 +39,43 @@ def api_get_sample_id_from_hash(shash): return jsonify({'sample_id': sample.id}) return jsonify({'sample_id': None}) +@apiview.route('/callcfg//download/') +@login_required +def api_get_callcfg_file(sid): + """ + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description : Return the callCFG png file + Date : 08/2018 + """ + sample = api.get_elem_by_type("sample", sid) + png_filename = sample.storage_file.replace('.bin','.png') + return send_file('../'+png_filename, + as_attachment=True, + attachment_filename=os.path.basename(png_filename)) + +@apiview.route('/callcfg///download_compare_callcfg/') +@login_required +def api_get_compare_callcfg_file(sid_1, sid_2): + """ + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description : Return the compare callCFG png file + Date : 08/2018 + """ + + sample_1 = api.get_elem_by_type("sample", sid_1) + sample_2 = api.get_elem_by_type("sample", sid_2) + + png_filename = "polichombr/storage/{0}_{1}.png".format(sample_1.sha256, sample_2.sha256) + + if not os.path.exists(png_filename): + compare_ccfg_inst = compare_ccfg(sid_1) + compare_ccfg_inst.get_png_comparaison(sid_2) + + return send_file('../'+png_filename, + as_attachment=True, + attachment_filename=os.path.basename(png_filename)) @apiview.route('/samples//download/') @login_required diff --git a/polichombr/views/forms.py b/polichombr/views/forms.py index 9fca698..869c47e 100644 --- a/polichombr/views/forms.py +++ b/polichombr/views/forms.py @@ -11,7 +11,7 @@ from flask_wtf import FlaskForm from flask_wtf.file import FileField from wtforms import StringField, SelectField -from wtforms import SubmitField, TextAreaField, BooleanField +from wtforms import SubmitField, TextAreaField, BooleanField, TextField from wtforms import PasswordField, HiddenField from wtforms import IntegerField from wtforms.validators import DataRequired, Length, EqualTo @@ -267,7 +267,8 @@ class UploadSampleForm(FlaskForm): render_kw={'multiple': True}) level = SelectField('Sensibility', choices=TLPLevelChoices, coerce=int, validators=[DataRequired()]) - family = SelectField('Associated Family', coerce=int) + family = SelectField('Associated Family', coerce=int) + offset_callCFG = TextField('Offset call CFG') zipflag = BooleanField('Sample Zip archive') uploadsample = SubmitField('Submit') diff --git a/polichombr/views/webui_sample.py b/polichombr/views/webui_sample.py index 0997b39..34335d0 100644 --- a/polichombr/views/webui_sample.py +++ b/polichombr/views/webui_sample.py @@ -25,7 +25,7 @@ from polichombr.views.forms import CompareMachocForm from polichombr.controllers.sample import disassemble_sample_get_svg - +from polichombr.analysis_tools.lib_callCFG.compare_ccfg import compare_ccfg @webuiview.route('/samples/', methods=['GET', 'POST']) @login_required @@ -41,6 +41,8 @@ def ui_sample_upload(): if upload_form.validate_on_submit(): family_id = upload_form.family.data zipflag = upload_form.zipflag.data + offset_callcfg = upload_form.offset_callCFG.data + family = None if family_id != 0: family = api.get_elem_by_type("family", family_id) @@ -55,7 +57,8 @@ def ui_sample_upload(): g.user, upload_form.level.data, family, - zipflag) + zipflag, + offset_callcfg) if not samples: flash("Error during sample creation", "error") else: @@ -78,6 +81,51 @@ def parse_machoc_form(sample, form): sample, comparison_level) return results +def format_machoc_print(machoc1, machoc2): + """ + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description : Format machoc print + """ + if machoc1: + return " | {0} -> {1}".format(machoc1, machoc2) + elif machoc2: + return " | ________ -> {0}".format(machoc2) + else: + return "" + + +def compare_call_cfg(sample_id): + """ + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description : Generate comparaison results with call-CFG + """ + ret = [] + inst_compare_ccfg = compare_ccfg(sample_id) + dict_results = inst_compare_ccfg.process_all_comparaison() + + if dict_results == None: + return ret + for sample_id_tmp, result in dict_results.iteritems(): + pourcent, a_inter_b, a_union_b, diff_details = result + if int(pourcent) >= 80: + + ret_plus = [] + ret_minus = [] + diff_plus, diff_minus = diff_details + for element in diff_plus: + ret_plus.append(' [+] {0} ({3}) -> call {4} ({6}){7}'.format(element["parent_func"], sample_id, element["offset_parent"].replace('0x',''), element["offset_parent"], element["child_func"], element["offset_child"].replace('0x',''), element["offset_child"], format_machoc_print(element["machoc1"],element["machoc2"]))) + for element in diff_minus: + ret_minus.append(' [-] {0} ({3}) -> call {4} ({6}){7}'.format(element["parent_func"], sample_id_tmp, element["offset_parent"].replace('0x',''), element["offset_parent"], element["child_func"], element["offset_child"].replace('0x',''), element["offset_child"], format_machoc_print(element["machoc1"],element["machoc2"]))) + + + sample = api.get_elem_by_type("sample", sample_id_tmp) + + ret.append([sample.filenames[0].name, "{0}% ({1}/{2})".format(str(round(pourcent,2)), str(int(a_inter_b)), str(int(a_union_b))), sample_id_tmp, "
".join(ret_plus), "
".join(ret_minus)]) + + + return ret def gen_sample_view(sample_id, graph=None, fctaddr=None): """ @@ -93,6 +141,14 @@ def gen_sample_view(sample_id, graph=None, fctaddr=None): change_tlp_level_form = ChangeTLPForm() machoc_form = CompareMachocForm() + callcfg = api.callcfgcontrol.get_by_id(sample_id) + try: + callcfg.offset_entrypoint = hex(int(callcfg.offset_entrypoint)) + except: + pass + + compare_callcfg_results = compare_call_cfg(sample_id) + if add_family_form.validate_on_submit(): family_id = add_family_form.parentfamily.data family = api.get_elem_by_type("family", family_id) @@ -113,6 +169,7 @@ def gen_sample_view(sample_id, graph=None, fctaddr=None): return render_template("sample.html", sample=sample, + callcfg=callcfg, abstractform=set_sample_abstract_form, checklists=api.samplecontrol.get_all_checklists(), changetlpform=change_tlp_level_form, @@ -120,7 +177,8 @@ def gen_sample_view(sample_id, graph=None, fctaddr=None): hresults=machoc_comparison_results, addfamilyform=add_family_form, graph=graph, - fctaddr=fctaddr) + fctaddr=fctaddr, + compare_callcfg_results=compare_callcfg_results) @webuiview.route('/sample//', methods=['GET', 'POST']) @@ -238,6 +296,15 @@ def delete_sample(sample_id): """ sample = api.get_elem_by_type("sample", sample_id) api.samplecontrol.delete(sample) + + callcfg = api.callcfgcontrol.get_by_id(sample_id) + if callcfg != None: + api.callcfgcontrol.delete(callcfg) + + offset_callcfg = api.offset_callcfgcontrol.get_by_sample_id(sample_id) + if offset_callcfg != None: + api.offset_callcfgcontrol.delete_multiple_offset_callCFG(offset_callcfg) + return redirect(url_for('webuiview.index')) @@ -248,3 +315,29 @@ def download_sample(sample_id): Download a sample's file. """ return redirect(url_for('apiview.api_get_sample_file', sid=sample_id)) + +@webuiview.route('/callcfg//download/') +@login_required +def download_sample_ccfg(sample_id): + """ + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description : Download a sample's file. + Date : 08/2018 + """ + + return redirect(url_for('apiview.api_get_callcfg_file', sid=sample_id)) + +@webuiview.route('/callcfg///download_compare_callcfg/') +@login_required +def download_compare_sample_ccfg(sample_id_1, sample_id_2): + """ + Organization : EDF-R&D-PERICLES-IRC + Author : JCO + Description : Download a sample's file. + Date : 08/2018 + """ + + print sample_id_1, sample_id_2 + return redirect(url_for('apiview.api_get_compare_callcfg_file', sid_1=sample_id_1, sid_2=sample_id_2)) + diff --git a/requirements.txt b/requirements.txt index eafa582..2f3a49b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ python-magic requests sqlalchemy-migrate yara-python +r2pipe