Skip to content

Commit

Permalink
[WIP][Scripts] Complete Python3, static linking, PyPanda API working
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewQuijano committed Aug 27, 2024
1 parent fb70227 commit 881a05c
Show file tree
Hide file tree
Showing 23 changed files with 253 additions and 190 deletions.
3 changes: 3 additions & 0 deletions dependencies/ubuntu_22.04_build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ libzmq3-dev

# I need this for making LavaTool
g++-10

# Install dwarf dump, you need this for 64-bit bugs
dwarfdump
11 changes: 1 addition & 10 deletions init-host.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3

# import argparse
import os
Expand All @@ -18,9 +18,6 @@
from colorama import Fore
from colorama import Style

# if moyix server is down, this image will also work
QCOW_URL = "https://panda.re/qcows/linux/debian/7.3/x86/debian_7.3_x86.qcow"
QCOW_FILE_NAME = "wheezy_panda2.qcow2"
TAR_URL = "ftp://ftp.astron.com/pub/file/file-5.22.tar.gz"
LAVA_DIR = dirname(abspath(sys.argv[0]))

Expand Down Expand Up @@ -78,12 +75,6 @@ def main():
else:
progress("Found existing target_bins/{}".format(basename(TAR_URL)))

if not isfile(join(LAVA_DIR, basename(QCOW_FILE_NAME))):
progress("Downloading {}".format(basename(QCOW_URL)))
run(["wget", "--no-check-certificate", QCOW_URL, "-O", QCOW_FILE_NAME])
else:
progress("Found existing {}".format(basename(QCOW_FILE_NAME)))

if not isfile(join(LAVA_DIR, "host.json")):
progress("Building host.json")
# Build host.json
Expand Down
6 changes: 4 additions & 2 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,16 @@ else
fi

curl -LJO https://github.com/panda-re/panda/releases/download/v1.8.23/pandare_22.04.deb
$SUDO apt install ./pandare_22.04.deb
mv *.deb /tmp
$SUDO apt-get -y install /tmp/pandare_22.04.deb
rm /tmp/*.deb

progress "Installed build dependencies"

pip3 install --upgrade pip
pip3 install -r requirements.txt
progress "Installed Python requirements"

$SUDO bash ./setup_container.sh
bash ./setup_container.sh

progress "Installed LAVA"
22 changes: 11 additions & 11 deletions scripts/add_queries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ USAGE() {
}

set -e # Exit on error
#set -x # Debug mode
set -x # Debug mode

if [ $# -lt 1 ]; then
USAGE $0
Expand Down Expand Up @@ -86,8 +86,8 @@ progress "queries" 0 "Configuring..."
mkdir -p lava-install
configure_file=${configure_cmd%% *}
if [ -e "$configure_file" ]; then
CC=/usr/lib/llvm-11/bin/clang \
CXX=/usr/lib/llvm-11/bin/clang++ \
CC=$llvm/bin/clang \
CXX=$llvm/bin/clang++ \
CFLAGS="-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/" \
$configure_cmd --prefix=$(pwd)/lava-install
fi
Expand All @@ -103,8 +103,8 @@ for i in ${MAKES[@]}; do
IFS=' '
read -ra ARGS <<< $i
echo "$lava/tools/btrace/sw-btrace ${ARGS[@]}"
CC=/usr/lib/llvm-11/bin/clang \
CXX=/usr/lib/llvm-11/bin/clang++ \
CC=$llvm/bin/clang \
CXX=$llvm/bin/clang++ \
CFLAGS="-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/" \
$lava/tools/btrace/sw-btrace ${ARGS[@]}
IFS='&&'
Expand All @@ -119,7 +119,7 @@ bash -c $install
progress "queries" 0 "Creating compile_commands.json..."
# Delete any pre-existing compile commands.json (could be in archive by mistake)
rm -f compile_commands.json
$lava/tools/btrace/sw-btrace-to-compiledb /usr/lib/llvm-11/lib/clang/11/include
$lava/tools/btrace/sw-btrace-to-compiledb $llvm/lib/clang/11/include
if [ -e "$directory/$name/extra_compile_commands.json" ]; then
sed -i '$d' compile_commands.json
echo "," >> compile_commands.json
Expand All @@ -130,7 +130,7 @@ git commit -m 'Add compile_commands.json.'

cd ..

c_files=$(python $lava/tools/lavaTool/get_c_files.py $source)
c_files=$($python $lava/tools/lavaTool/get_c_files.py $source)
c_dirs=$(for i in $c_files; do dirname $i; done | sort | uniq)

progress "queries" 0 "Copying include files..."
Expand All @@ -151,7 +151,7 @@ done

#progress "queries" 0 "Initialize variables..."
#for i in $c_files; do
# $lava/src_clang/build/lavaTool -action=init \
# /src_clang/build/lavaTool -action=init \
# -p="$source/compile_commands.json" \
# -src-prefix=$(readlink -f "$source") \
# $i
Expand All @@ -167,7 +167,7 @@ fninstr=$directory/$name/fninstr

echo "Creating fninstr [$fninstr]"
echo -e "\twith command: \"python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles\""
python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles
$python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles

if [[ ! -z "$df_fn_blacklist" ]]; then
cmd=$(echo "sed -i /${df_fn_blacklist}/d $fninstr")
Expand Down Expand Up @@ -209,13 +209,13 @@ fi
# Do we need to explicitly apply replacements in the root source directory
# This causes clang-apply-replacements to segfault when run a 2nd time
#pushd "$directory/$name/$source"
#$llvm_src/Release/bin/clang-apply-replacements .
#/usr/lib/llvm-11/bin/clang-apply-replacements .
#popd

for i in $c_dirs; do
echo "Applying replacements to $i"
pushd $i
$llvm_src/Release/bin/clang-apply-replacements .
$llvm/bin/clang-apply-replacements .
popd
done

Expand Down
143 changes: 77 additions & 66 deletions scripts/bug_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
from vars import parse_vars
from os.path import abspath, join
from pandare import Panda
from pandare.extras import dwarfdump


host_json = abspath(sys.argv[1])
project_name = sys.argv[2]
Expand All @@ -53,24 +55,25 @@
curtail = 0

installdir = None
isoname = None
command_args = None

# Replace create_recording in first link
# https://github.com/panda-re/panda/blob/dev/panda/scripts/run_guest.py#L151-L189
# https://docs.panda.re/#recordings
# https://github.com/panda-re/panda/blob/dev/panda/python/core/pandare/panda.py#L2595-L2645
@panda.queue_blocking
def create_recording():
# I assume qemu_path is just 'panda-system-i386', `panda-system-x86_64`, etc
global command_args
global installdir
global isoname
print("args", command_args)
print("install dir", installdir)
print("isoname", isoname)
guest_command = subprocess.list2cmdline(command_args)
panda.record_cmd(guest_command=guest_command, copy_directory=installdir, iso_name=isoname)
# Technically the first two steps of record_cmd
# but running executable ONLY works with absolute paths
panda.revert_sync('root')
panda.copy_to_guest(installdir, absolute_paths=True)

# Pass in None for snap_name since I already did the revert_sync already
panda.record_cmd(guest_command=guest_command, snap_name=None)
panda.stop_run()


Expand Down Expand Up @@ -106,8 +109,6 @@ def progress(msg):

tick()



input_file = abspath(project["config_dir"] + "/" + sys.argv[3])
input_file_base = os.path.basename(input_file)
print("bug_mining.py %s %s" % (project_name, input_file))
Expand All @@ -116,11 +117,6 @@ def progress(msg):
# global curtail
curtail = int(sys.argv[4])

chaff = project.get('chaff', False)

panda_os_string = project.get('panda_os_string',
'linux-32-debian:3.2.0-4-686-pae')

lavadir = dirname(dirname(abspath(sys.argv[0])))

progress("Entering {}".format(project['output_dir']))
Expand All @@ -135,8 +131,8 @@ def progress(msg):
# e.g. file-5.22-true.iso
installdir = join(sourcedir, 'lava-install')
input_file_guest = join(installdir, input_file_base)
isoname = '{}-{}.iso'.format(sourcedir, input_file_base)
command_args = shlex.split(project['command'].format(
command_args = shlex.split(
project['command'].format(
install_dir=pipes.quote(installdir),
input_file=input_file_guest))
shutil.copy(input_file, installdir)
Expand All @@ -159,77 +155,90 @@ def progress(msg):
progress("Starting first and only replay, tainting on file open...")

# process name

if command_args[0].startswith('LD_PRELOAD'):
cmdpath = command_args[1]
proc_name = basename(command_args[1])
else:
cmdpath = command_args[0]
proc_name = basename(command_args[0])

pandalog = "{}/queries-{}.plog".format(project['output_dir'], os.path.basename(isoname))
pandalog_json = "{}/queries-{}.json".format(project['output_dir'], os.path.basename(isoname))
binpath = os.path.join(installdir, "bin", proc_name)
if not os.path.exists(binpath):
binpath = os.path.join(installdir, "lib", proc_name)
if not os.path.exists(binpath):
binpath = os.path.join(installdir, proc_name)

pandalog = "{}/queries-{}.plog".format(project['output_dir'], input_file_base)
pandalog_json = "{}/queries-{}.json".format(project['output_dir'], input_file_base)

print("pandalog = [%s] " % pandalog)

panda_args = {
'pri': {},
'pri_dwarf': {
'proc': proc_name,
'g_debugpath': installdir,
'h_debugpath': installdir
},
'pri_taint': {
'hypercall': True,
'chaff': chaff
},
'taint2': {'no_tp': True},
'tainted_branch': {},
'file_taint': {
'pos': True,
'cache_process_details_on_basic_block': True,
}
}
dwarf_cmd = ["dwarfdump", "-dil", cmdpath]
dwarfout = subprocess.check_output(dwarf_cmd)
dwarfdump.parse_dwarfdump(dwarfout, binpath)

# Based on this example:
# https://github.com/panda-re/panda/blob/dev/panda/python/examples/file_taint/file_taint.py
panda.set_pandalog(pandalog)

panda.load_plugin("pri")
panda.load_plugin("dwarf2",
args={
'proc': proc_name,
'g_debugpath': installdir,
'h_debugpath': installdir
})
# pri_taint is almost same as Zhenghao's hypercall
# Chaffx64 branch says these are needed?
# if panda.arch != 'i386':
# panda.load_plugin('hypercall')
# panda.load_plugin('stackprob')

panda.load_plugin("taint2",
args={
'no_tp': True
})
panda.load_plugin("tainted_branch")

if 'use_stdin' in project and project['use_stdin']:
panda_args['file_taint']['first_instr'] = 1
panda_args['file_taint']['use_stdin'] = proc_name
panda.load_plugin("file_taint",
args={
'filename' : input_file_guest,
'pos': True,
'cache_process_details_on_basic_block': True,
'first_instr' : 1,
'use_stdin' : proc_name,
'verbose' : True
})
else:
panda_args['file_taint']['enable_taint_on_open'] = True

qemu_args = [
project['qemu'], '-replay', isoname,
'-pandalog', pandalog, '-os', panda_os_string
]

for plugin, plugin_args in panda_args.items():
qemu_args.append('-panda')
arg_string = ",".join(["{}={}".format(arg, val)
for arg, val in plugin_args.items()])
qemu_args.append('{}{}{}'.format(plugin, ':'
if arg_string else '', arg_string))

# Use -panda-plugin-arg to account for commas and colons in filename.
qemu_args.extend(['-panda-arg', 'file_taint:filename=' + input_file_guest])

dprint("qemu args: [{}]".format(subprocess.list2cmdline(qemu_args)))
sys.stdout.flush()
try:
subprocess.check_call(qemu_args, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
if qemu_use_rr:
qemu_args = ['rr', 'record', project['qemu'], '-replay', isoname]
subprocess.check_call(qemu_args)
else:
raise
panda.load_plugin("file_taint",
args={
'filename' : input_file_guest,
'pos': True,
'cache_process_details_on_basic_block': True,
'enable_taint_on_open': True,
'verbose' : True
})
panda.load_plugin("pri_taint")

# Default name is 'recording'
# https://github.com/panda-re/panda/blob/dev/panda/python/core/pandare/panda.py#L2595
panda.run_replay("recording")

replay_time = tock()
print("taint analysis complete %.2f seconds" % replay_time)
sys.stdout.flush()

tick()

# I attempted to upgrade the version, but panda had trouble including <protobuf-c/protobuf.h> something
# for now, we can use the python implementation, although it is slower
# https://github.com/protocolbuffers/protobuf/releases/tag/v21.0
# https://stackoverflow.com/questions/52040428/how-to-update-protobuf-runtime-library
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
progress("Calling the FBI on queries.plog...")
convert_json_args = ['python3', '-m', 'pandare.plog_reader', pandalog]
print("panda log JSON invocation: [%s]" % (subprocess.list2cmdline(convert_json_args)))
print("panda log JSON invocation: [%s] > %s" % (subprocess.list2cmdline(convert_json_args), pandalog_json))
try:
with open(pandalog_json, 'wb') as fd:
subprocess.check_call(convert_json_args, stdout=fd, stderr=sys.stderr)
Expand All @@ -251,6 +260,8 @@ def progress(msg):

dprint("fbi invocation: [%s]" % (subprocess.list2cmdline(fbi_args)))
sys.stdout.flush()
import sys
sys.exit(0)
try:
subprocess.check_call(fbi_args, stdout=sys.stdout, stderr=sys.stderr)
except subprocess.CalledProcessError as e:
Expand Down
2 changes: 1 addition & 1 deletion scripts/competition.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3

import argparse
import datetime
Expand Down
1 change: 0 additions & 1 deletion scripts/competition.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# Json file required params
#
# lava: directory of lava repository
# pandahost: what remote host to run panda on

trap '' PIPE
set -e # Exit on error
Expand Down
Loading

0 comments on commit 881a05c

Please sign in to comment.