From 57bb9609cbf26e55d43b6cef7acbaed1760db7a6 Mon Sep 17 00:00:00 2001 From: AndrewQuijano Date: Tue, 15 Oct 2024 11:01:54 -0400 Subject: [PATCH] Update scripts to support paths with spaces, WIP --- scripts/add_queries.sh | 46 +++++++++++--------- scripts/funcs.sh | 4 +- scripts/lava.sh | 26 +++++++----- scripts/reset_db.sh | 7 ++- scripts/vars.sh | 66 ++++++++++++++--------------- tools/btrace/sw-btrace | 16 +++---- tools/btrace/sw-btrace-to-compiledb | 35 ++++++--------- 7 files changed, 98 insertions(+), 102 deletions(-) diff --git a/scripts/add_queries.sh b/scripts/add_queries.sh index f062ef67..6baf7970 100755 --- a/scripts/add_queries.sh +++ b/scripts/add_queries.sh @@ -26,9 +26,6 @@ # and run the bug_mining.py script (which uses PANDA to trace taint). # -# Load lava-functions and vars -. `dirname $0`/funcs.sh - tick version="2.0.0" @@ -50,13 +47,18 @@ elif [ $# -eq 2 ]; then ATP_TYPE="-$1" json="$(readlink -f $2)" else - USAGE $0 + USAGE $0 exit 1 fi -lava="$(dirname $(dirname $(readlink -f $0)))" +absolute_path=$(readlink -f "$0") +scripts_path=$(dirname "$absolute_path") +lava=$(dirname "$scripts_path") project_name="$1" -. `dirname $0`/vars.sh + +# Load lava-functions and vars +source "$scripts_path/funcs.sh" +source "$scripts_path/vars.sh" progress "queries" 0 "Entering $directory/$name." mkdir -p "$directory/$name" @@ -98,15 +100,15 @@ progress "queries" 0 "Making with btrace..." rm -f btrace.log ORIGIN_IFS=$IFS IFS='&&' -read -ra MAKES <<< $makecmd -for i in ${MAKES[@]}; do +read -ra MAKES <<< "$makecmd" +for i in "${MAKES[@]}"; do IFS=' ' - read -ra ARGS <<< $i + read -ra ARGS <<< "$i" echo "$lava/tools/btrace/sw-btrace ${ARGS[@]}" CC=$llvm/bin/clang \ CXX=$llvm/bin/clang++ \ CFLAGS="-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/" \ - $lava/tools/btrace/sw-btrace ${ARGS[@]} + "$lava/tools/btrace/sw-btrace" "${ARGS[@]}" IFS='&&' done IFS=$ORIGIN_IFS @@ -119,7 +121,7 @@ bash -c $install progress "queries" 0 "Creating compile_commands.json..." # Delete any pre-existing compile commands.json (could be in archive by mistake) rm -f compile_commands.json -$lava/tools/btrace/sw-btrace-to-compiledb $llvm/lib/clang/11/include +"$lava/tools/btrace/sw-btrace-to-compiledb" $llvm/lib/clang/11/include if [ -e "$directory/$name/extra_compile_commands.json" ]; then sed -i '$d' compile_commands.json echo "," >> compile_commands.json @@ -130,23 +132,24 @@ git commit -m 'Add compile_commands.json.' cd .. -c_files=$($python $lava/tools/lavaTool/get_c_files.py $source) -c_dirs=$(for i in $c_files; do dirname $i; done | sort | uniq) +# Switching IFS to '\n' to support paths with spaces in them. +c_files=$($python "$lava/tools/lavaTool/get_c_files.py" "$source") +IFS=$'\n' +c_dirs=$(for i in $c_files; do dirname "$i"; done | sort | uniq) progress "queries" 0 "Copying include files..." for i in $c_dirs; do echo " $i" - if [ -d $i ]; then - cp $lava/tools/include/*.h $i/ + if [ -d "$i" ]; then + cp "$lava"/tools/include/*.h "$i"/ fi done - # Run another clang tool that provides information about functions, # i.e., which have only prototypes, which have bodies. progress "queries" 0 "Figure out functions" for this_c_file in $c_files; do - $lava/tools/install/bin/lavaFnTool $this_c_file + "$lava/tools/install/bin/lavaFnTool" "$this_c_file" done #progress "queries" 0 "Initialize variables..." @@ -167,7 +170,7 @@ fninstr=$directory/$name/fninstr echo "Creating fninstr [$fninstr]" echo -e "\twith command: \"python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles\"" -$python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles +$python "$lava/scripts/fninstr.py" -d -o $fninstr $fnfiles if [[ ! -z "$df_fn_blacklist" ]]; then cmd=$(echo "sed -i /${df_fn_blacklist}/d $fninstr") @@ -181,7 +184,7 @@ if [ "$dataflow" = "true" ]; then # Since it's okay to pass the whitelist either way progress "queries" 0 "Inserting queries for dataflow" for i in $c_files; do - $lava/tools/install/bin/lavaTool -action=query \ + "$lava/tools/install/bin/lavaTool" -action=query \ -lava-db="$directory/$name/lavadb" \ -p="$directory/$name/$source/compile_commands.json" \ -arg_dataflow \ @@ -195,7 +198,7 @@ else progress "queries" 0 "Inserting queries..." # TODO: remove lava-wl here, unless we're using it to limit where we inject for i in $c_files; do - $lava/tools/install/bin/lavaTool -action=query \ + "$lava/tools/install/bin/lavaTool" -action=query \ -lava-db="$directory/$name/lavadb" \ -lava-wl="$fninstr" \ -p="$source/compile_commands.json" \ @@ -215,7 +218,7 @@ fi for i in $c_dirs; do echo "Applying replacements to $i" pushd $i - $llvm/bin/clang-apply-replacements . + "$llvm/bin/clang-apply-replacements" . popd done @@ -227,6 +230,7 @@ for this_c_file in $c_files; do exit 1 fi done +unset IFS progress "queries" 0 "Done inserting queries. Time to make and run actuate.py on a 64-BIT machine!" diff --git a/scripts/funcs.sh b/scripts/funcs.sh index 24693590..ca307534 100755 --- a/scripts/funcs.sh +++ b/scripts/funcs.sh @@ -53,7 +53,7 @@ if [ -z "$LAVA_FUNCS_INCLUDED" ]; then if [ -z "$logfile" ]; then logfile=/dev/stdout fi - echo $command >> $logfile; + echo "$command" >> "$logfile"; set +e docker_map_args="-v $lava:$lava -v $tarfiledir:$tarfiledir" @@ -61,7 +61,7 @@ if [ -z "$LAVA_FUNCS_INCLUDED" ]; then extradockerargs=""; fi - if [[ "$directory" = "$tarfiledir"* ]]; then true; else + if [[ "$directory" = $tarfiledir* ]]; then true; else docker_map_args="$docker_map_args -v $directory:$directory" fi if [ "$remote_machine" == "localhost" ]; then diff --git a/scripts/lava.sh b/scripts/lava.sh index 35cdfc71..688ab77e 100755 --- a/scripts/lava.sh +++ b/scripts/lava.sh @@ -32,7 +32,7 @@ version="2.0.0" trap '' PIPE -set -e # Exit on error +set -ex # Exit on error USAGE() { echo "$0 version $version" @@ -72,7 +72,10 @@ fi # Load lava-functions . `dirname $0`/funcs.sh -lava=$(dirname $(dirname $(readlink -f "$0"))) +absolute_path=$(readlink -f "$0") +scripts_path=$(dirname "$absolute_path") +lava=$(dirname "$scripts_path") +sql="$lava/tools/lavaODB/generated/lava.sql" # defaults ok=0 @@ -99,6 +102,7 @@ parse_args $@ if [ -z "$project_name" ]; then USAGE fi + . `dirname $0`/vars.sh if [[ $demo -eq 1 ]] @@ -132,7 +136,7 @@ RESET_DB() { progress "everything" 1 "Resetting lava db -- logging to $lf" run_remote "$buildhost" "dropdb -U $pguser -h $dbhost $db || true" "$lf" run_remote "$buildhost" "createdb -U $pguser -h $dbhost $db || true" "$lf" - run_remote "$buildhost" "psql -d $db -h $dbhost -f $lava/tools/lavaODB/generated/lava.sql -U $pguser" "$lf" + run_remote "$buildhost" "psql -d $db -h $dbhost -f \"$sql\" -U $pguser" "$lf" run_remote "$buildhost" "echo dbwipe complete" "$lf" } @@ -160,7 +164,7 @@ if [ $add_queries -eq 1 ]; then lf="$logs/add_queries.log" truncate "$lf" progress "everything" 1 "Adding queries to source with type $ATP and $project_name -- logging to $lf" - run_remote "$buildhost" "$scripts/add_queries.sh $ATP_TYPE $project_name" "$lf" + run_remote "$buildhost" "\"$scripts/add_queries.sh\" $ATP_TYPE $project_name" "$lf" if [ "$fixupscript" != "null" ]; then lf="$logs/fixups.log" truncate "$lf" @@ -180,16 +184,16 @@ if [ $make -eq 1 ]; then lf="$logs/make.log" truncate "$lf" # Note, adding the static flag is important. We are running the binaries on a PANDA VM, so we have no idea if it will have any libraries we need. - run_remote "$buildhost" "cd $sourcedir && CC=$llvm/bin/clang CXX=$llvm/bin/clang++ CFLAGS='-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/ -static' $makecmd" "$lf" + run_remote "$buildhost" "cd \"$sourcedir\" && CC=$llvm/bin/clang CXX=$llvm/bin/clang++ CFLAGS='-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/ -static' $makecmd" "$lf" + run_remote "$buildhost" "cd \"$sourcedir\" && rm -rf lava-install" "$lf" - run_remote "$buildhost" "cd $sourcedir && rm -rf lava-install" "$lf" if [ "$install_simple" == "null" ]; then - run_remote "$buildhost" "cd $sourcedir && $install" "$lf" + run_remote "$buildhost" "cd \"$sourcedir\" && $install" "$lf" else - run_remote "$buildhost" "cd $sourcedir && $install_simple" "$lf" + run_remote "$buildhost" "cd \"$sourcedir\" && $install_simple" "$lf" fi if [ "$post_install" != "null" ]; then - run_remote "$buildhost" "cd $sourcedir && $post_install" "$lf" + run_remote "$buildhost" "cd \"$sourcedir\" && $post_install" "$lf" fi tock echo "make complete $time_diff seconds" @@ -217,7 +221,7 @@ if [ $taint -eq 1 ]; then lf="$logs/bug_mining-$i.log" truncate "$lf" progress "everything" 1 "PANDA taint analysis prospective bug mining -- input $input -- logging to $lf" - run_remote "$buildhost" "$python $scripts/bug_mining.py $hostjson $project_name $input $curtail" "$lf" + run_remote "$buildhost" "$python $scripts/bug_mining.py $hostjson $project_name $input $curtail" "$lf" echo -n "Num Bugs in db: " bug_count=$(run_remote "$buildhost" "psql -At $db -U $pguser -h $dbhost -c 'select count(*) from bug'") if [ "$bug_count" = "0" ]; then @@ -246,7 +250,7 @@ if [ $inject -eq 1 ]; then if [ "$injfixupsscript" != "null" ]; then fix="--fixupsscript='$injfixupsscript'" fi - run_remote "$buildhost" "$python $scripts/inject.py -t $bugtypes -m $many -e $exitCode $kt $fix $hostjson $project_name" "$lf" + run_remote "$buildhost" "$python \"$scripts/inject.py\" -t $bugtypes -m $many -e $exitCode $kt $fix $hostjson $project_name" "$lf" grep yield "$lf" | grep " real bugs " done fi diff --git a/scripts/reset_db.sh b/scripts/reset_db.sh index 1048dda1..7354eca9 100644 --- a/scripts/reset_db.sh +++ b/scripts/reset_db.sh @@ -1,7 +1,10 @@ # Load lava-functions . `dirname $0`/funcs.sh -lava=$(dirname $(dirname $(readlink -f "$0"))) +absolute_path=$(readlink -f "$0") +scripts_path=$(dirname "$absolute_path") +lava=$(dirname "$scripts_path") +sql="$lava/tools/lavaODB/generated/lava.sql" # defaults ok=0 @@ -36,7 +39,7 @@ RESET_DB() { progress "everything" 1 "Resetting lava db -- logging to $lf" run_remote "$buildhost" "dropdb -U $pguser -h $dbhost $db || true" "$lf" run_remote "$buildhost" "createdb -U $pguser -h $dbhost $db || true" "$lf" - run_remote "$buildhost" "psql -d $db -h $dbhost -f $lava/tools/lavaODB/generated/lava.sql -U $pguser" "$lf" + run_remote "$buildhost" "psql -d $db -h $dbhost -f \"$sql\" -U $pguser" "$lf" run_remote "$buildhost" "echo dbwipe complete" "$lf" } diff --git a/scripts/vars.sh b/scripts/vars.sh index a04dfd0c..4c2e5ebc 100644 --- a/scripts/vars.sh +++ b/scripts/vars.sh @@ -1,6 +1,6 @@ #!/bin/sh # Set all our environment variables -# $lava, $json, and must be set prior to calling this +# $lava, "$json", and must be set prior to calling this if [ -z ${project_name+x} ]; then echo "Fatal error: project_name variable unset when calling var.sh" @@ -13,76 +13,76 @@ if [ -z ${lava+x} ]; then fi hostjson="$lava/host.json" -if [ ! -f $hostjson ]; then +if [ ! -f "$hostjson" ]; then echo "Fatal error: host.json not found. Copy host.json.example to host.json" exit 1; fi # Host Vars -qemu="$(jq -r '.qemu' $hostjson)" -qcow_dir="$(jq -r '.qcow_dir // ""' $hostjson)" -output_dir="$(jq -r '.output_dir // ""' $hostjson)" -config_dir="$(jq -r '.config_dir // ""' $hostjson)/$project_name" -tar_dir="$(jq -r '.tar_dir // ""' $hostjson)" -db_suffix="$(jq -r '.db_suffix // ""' $hostjson)" -buildhost="$(jq -r '.buildhost // "localhost"' $hostjson)" -dockername="$(jq -r '.docker // "lava32"' $hostjson)" -pguser="$(jq -r '.pguser // "postgres"' $hostjson)" -pgpass="$(jq -r '.pgpass // "postgrespostgres"' $hostjson)" -dbhost="$(jq -r '.host // "database"' $hostjson)" +qemu="$(jq -r '.qemu' "$hostjson")" +qcow_dir="$(jq -r '.qcow_dir // ""' "$hostjson")" +output_dir="$(jq -r '.output_dir // ""' "$hostjson")" +config_dir="$(jq -r '.config_dir // ""' "$hostjson")/$project_name" +tar_dir="$(jq -r '.tar_dir // ""' "$hostjson")" +db_suffix="$(jq -r '.db_suffix // ""' "$hostjson")" +buildhost="$(jq -r '.buildhost // "localhost"' "$hostjson")" +dockername="$(jq -r '.docker // "lava32"' "$hostjson")" +pguser="$(jq -r '.pguser // "postgres"' "$hostjson")" +pgpass="$(jq -r '.pgpass // "postgrespostgres"' "$hostjson")" +dbhost="$(jq -r '.host // "database"' "$hostjson")" export PGUSER=$pguser export PGPASS=$pgpass json="${config_dir}/$project_name.json" -if [ ! -f $json ]; then - echo "Fatal error: $json not found. Did you provide the right project name?" +if [ ! -f "$json" ]; then + echo "Fatal error: "$json" not found. Did you provide the right project name?" exit 1; fi # Project specific -name="$(jq -r .name $json)" -db="$(jq -r .db $json)$db_suffix" -extradockerargs="$(jq -r .extra_docker_args $json)" -exitCode="$(jq -r .expected_exit_code $json)" -dataflow="$(jq -r '.dataflow // "false"' $json)" # TODO use everywhere, stop passing as argument +name="$(jq -r .name "$json")" +db="$(jq -r .db "$json")$db_suffix" +extradockerargs="$(jq -r .extra_docker_args "$json")" +exitCode="$(jq -r .expected_exit_code "$json")" +dataflow="$(jq -r '.dataflow // "false"' "$json")" # TODO use everywhere, stop passing as argument llvm="/usr/lib/llvm-11" # List of function names to blacklist for data_flow injection, merged as fn1\|fn2\|fn3 so we can use sed # Or an empty string if not present -df_fn_blacklist=`jq -r '.df_fn_blacklist // ""' $json` +df_fn_blacklist=`jq -r '.df_fn_blacklist // ""' "$json"` if [[ ! -z $df_fn_blacklist ]]; then - df_fn_blacklist=`jq -r '.df_fn_blacklist // "" | join ("\\\\|")' $json` + df_fn_blacklist=`jq -r '.df_fn_blacklist // "" | join ("\\\\|")' "$json"` fi tarfiledir="$tar_dir" -tarfile="$tarfiledir/$(jq -r '.tarfile' $json)" +tarfile="$tarfiledir/$(jq -r '.tarfile' "$json")" directory=$output_dir -inputs=`jq -r '.inputs' $json | jq 'join (" ")' | sed 's/\"//g' ` +inputs=`jq -r '.inputs' "$json" | jq 'join (" ")' | sed 's/\"//g' ` fixupscript="null" -if [ "$(jq -r .fixupscript $json)" != "null" ]; then - fixupscript="$config_dir/$(jq -r .fixupscript $json)" +if [ "$(jq -r .fixupscript "$json")" != "null" ]; then + fixupscript="$config_dir/$(jq -r .fixupscript "$json")" fi bug_build="$output_dir/$name/$name/bugs/" # TODO why does this have name twice? injfixupsscript="null" -if [ "$(jq -r .injfixupsscript $json)" != "null" ]; then - injfixupsscript="$config_dir/$(jq -r .injfixupsscript $json)" +if [ "$(jq -r .injfixupsscript "$json")" != "null" ]; then + injfixupsscript="$config_dir/$(jq -r .injfixupsscript "$json")" # replace {bug_build} with string injfixupsscript="${injfixupsscript/\{bug_build\}/$bug_build}" fi logs="$output_dir/$name/logs" -makecmd="$(jq -r .make $json)" -install=$(jq -r .install $json) +makecmd="$(jq -r .make "$json")" +install=$(jq -r .install "$json") install="${install/\{config_dir\}/$config_dir}" # Format string replacement for config_dir -post_install="$(jq -r .post_install $json)" -install_simple=$(jq -r .install_simple $json) -configure_cmd=$(jq -r '.configure // "/bin/true"' $json) +post_install="$(jq -r .post_install "$json")" +install_simple=$(jq -r .install_simple "$json") +configure_cmd=$(jq -r '.configure // "/bin/true"' "$json") # Constants scripts="$lava/scripts" diff --git a/tools/btrace/sw-btrace b/tools/btrace/sw-btrace index 5fd5aeb4..f66c2af6 100755 --- a/tools/btrace/sw-btrace +++ b/tools/btrace/sw-btrace @@ -13,11 +13,8 @@ scriptDir() { cd "$(dirname "$path")" && pwd } -bin_dir=$(scriptDir) -libexec_dir=$bin_dir/../libexec - -BTRACE_LOG=$PWD/btrace.log -export BTRACE_LOG +bin_dir="$(scriptDir)" +export BTRACE_LOG="$PWD/btrace.log" # This code copies fakeroot's behavior by adding the library to the front of the # LD_PRELOAD variable. @@ -33,14 +30,11 @@ if test "$system" = "FreeBSD" -o "$system" = "Linux"; then # FreeBSD's ld.so man page states that LD_PRELOAD is either colon or # whitespace delimited. Linux's ld.so man page states that LD_PRELOAD is # whitespace delimited, but Linux appears to accept either delimiter. - preload_lib=$libexec_dir/libsw-btrace.so - LD_PRELOAD=$preload_lib${LD_PRELOAD:+ $LD_PRELOAD} - export LD_PRELOAD + ln -sf "$bin_dir/../libexec/libsw-btrace.so" /tmp/libsw-btrace.so + export LD_PRELOAD="$LD_PRELOAD:/tmp/libsw-btrace.so" elif test "$system" = "Darwin"; then # On OS X, the DYLD_INSERT_LIBRARIES variable functions like LD_PRELOAD. - preload_lib=$libexec_dir/libsw-btrace.dylib - DYLD_INSERT_LIBRARIES=$preload_lib${DYLD_INSERT_LIBRARIES:+:$DYLD_INSERT_LIBRARIES} - export DYLD_INSERT_LIBRARIES + export DYLD_INSERT_LIBRARIES="$bin_dir/../libexec/libsw-btrace.dylib${DYLD_INSERT_LIBRARIES:+:$DYLD_INSERT_LIBRARIES}" else echo "Unsupported operating system: $system" exit 1 diff --git a/tools/btrace/sw-btrace-to-compiledb b/tools/btrace/sw-btrace-to-compiledb index f7b2015b..126041ef 100755 --- a/tools/btrace/sw-btrace-to-compiledb +++ b/tools/btrace/sw-btrace-to-compiledb @@ -4,11 +4,6 @@ import json import os import sys -if sys.version_info.major >= 3: - # Provide aliases for things that Python 3 renamed. - unicode = str - xrange = range - kDrivers = [ "cc", "c++", "cc1", "gcc", "g++", @@ -21,7 +16,7 @@ kAssemblyExtensions = [".s", ".S"] class Command(object): def __init__(self, cwd, parent, argv, line): - assert isinstance(cwd, unicode) + assert isinstance(cwd, str) assert parent is None or isinstance(parent, Command) self.cwd = cwd self.parent = parent @@ -38,7 +33,7 @@ def readPidList(pidlist): """ assert len(pidlist) % 2 == 0 and len(pidlist) >= 2 ret = [(pidlist[0], pidlist[1])] - for i in xrange(2, len(pidlist), 2): + for i in range(2, len(pidlist), 2): ret.append((ret[-1][0] + pidlist[i], ret[-1][1] + pidlist[i + 1])) return ret @@ -46,23 +41,19 @@ def readPidList(pidlist): def readFile(path): commandList = [] commandIDToCommand = {} - fp = open(path) line = 0 - while True: - recordString = unicode(fp.readline()) - if recordString == "": - break - line += 1 - record = json.loads(recordString) - parent = None - procList = readPidList(record["pidlist"]) - for procID in procList[:-1]: - parent = commandIDToCommand.get(procID, parent) - command = Command(record["cwd"], parent, record["argv"], line) - commandList.append(command) - commandIDToCommand[procList[-1]] = command - + with open(path) as fp: + for recordString in fp: + line += 1 + record = json.loads(recordString) + parent = None + procList = readPidList(record["pidlist"]) + for procID in procList[:-1]: + parent = commandIDToCommand.get(procID, parent) + command = Command(record["cwd"], parent, record["argv"], line) + commandList.append(command) + commandIDToCommand[procList[-1]] = command return commandList