diff --git a/CMakeLists.txt b/CMakeLists.txt index c535a9ab..5a8ca4ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,8 @@ if(NOT EXISTS "${PROJECT_SOURCE_DIR}/third_party/indicators/CMakeLists.txt") message(FATAL_ERROR "git submodule update --init --recursive must be run first to checkout submodules") endif() +set(CMAKE_CXX_STANDARD 20) + add_subdirectory(third_party/Catch2) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/third_party/Catch2/contrib") add_subdirectory(third_party/indicators) diff --git a/Dockerfile b/Dockerfile index 13315a4a..266d272a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,10 @@ RUN update-ca-certificates RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 10 RUN python3 -m pip install pip && python3 -m pip install pytest +WORKDIR /blight +RUN git clone https://github.com/trailofbits/blight.git . +RUN pip3 install . + COPY . /polytracker RUN mkdir /polytracker/build @@ -27,9 +31,6 @@ WORKDIR /polytracker/build RUN cmake -GNinja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_VERBOSE_MAKEFILE=TRUE -DCXX_LIB_PATH=/cxx_libs .. RUN ninja install -WORKDIR /blight -RUN git clone https://github.com/trailofbits/blight.git . -RUN pip3 install . WORKDIR /polytracker RUN pip3 install . diff --git a/README.md b/README.md index 83ea36e7..71b53d0c 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,8 @@ variables PolyTracker supports is: POLYDB: A path to which to save the output database (default is polytracker.tdag) WLLVM_ARTIFACT_STORE: Provides a path to an existing directory to store artifact/manifest for all build targets + +POLYTRACKER_TAINT_ARGV: Set to '1' to use argv as a taint source. ``` Polytracker will set its configuration parameters in the following order: diff --git a/polytracker/custom_abi/dfsan_abilist.txt b/polytracker/custom_abi/dfsan_abilist.txt index 8da32a00..85a9b887 100644 --- a/polytracker/custom_abi/dfsan_abilist.txt +++ b/polytracker/custom_abi/dfsan_abilist.txt @@ -62,6 +62,8 @@ fun:__polytracker_store_blob=uninstrumented fun:__polytracker_store_blob=discard fun:__polytracker_preserve_map=uninstrumented fun:__polytracker_preserve_map=discard +fun:__polytracker_taint_argv=uninstrumented +fun:__polytracker_taint_argv=discard fun:__dfsan_update_label_count=uninstrumented fun:__dfsan_update_label_count=discard diff --git a/polytracker/custom_abi/polytracker_abilist.txt b/polytracker/custom_abi/polytracker_abilist.txt index e9690f6a..46693305 100644 --- a/polytracker/custom_abi/polytracker_abilist.txt +++ b/polytracker/custom_abi/polytracker_abilist.txt @@ -62,6 +62,8 @@ fun:__polytracker_store_blob=uninstrumented fun:__polytracker_store_blob=discard fun:__polytracker_preserve_map=uninstrumented fun:__polytracker_preserve_map=discard +fun:__polytracker_taint_argv=uninstrumented +fun:__polytracker_taint_argv=discard fun:__remill_jump=uninstrumented fun:__remill_jump=discard diff --git a/polytracker/include/polytracker/polytracker.h b/polytracker/include/polytracker/polytracker.h index d2871a9a..409e3bfc 100644 --- a/polytracker/include/polytracker/polytracker.h +++ b/polytracker/include/polytracker/polytracker.h @@ -46,3 +46,6 @@ extern uint64_t func_mapping_count; extern const block_mapping *block_mappings; extern uint64_t block_mapping_count; + +// Controls argv being a taint source +extern bool polytracker_taint_argv; \ No newline at end of file diff --git a/polytracker/include/polytracker/taint_sources.h b/polytracker/include/polytracker/taint_sources.h index a5917d5b..dd09f3c8 100644 --- a/polytracker/include/polytracker/taint_sources.h +++ b/polytracker/include/polytracker/taint_sources.h @@ -8,4 +8,8 @@ #define EXT_C_FUNC extern "C" __attribute__((visibility("default"))) #define EXT_CXX_FUNC extern __attribute__((visibility("default"))) +namespace polytracker { +void taint_argv(int argc, char *argv[]); +} + #endif \ No newline at end of file diff --git a/polytracker/include/taintdag/polytracker.h b/polytracker/include/taintdag/polytracker.h index dd95c5fc..e6480684 100644 --- a/polytracker/include/taintdag/polytracker.h +++ b/polytracker/include/taintdag/polytracker.h @@ -2,6 +2,7 @@ #define POLYTRACKER_TAINTDAG_POLYTRACKER_H #include +#include #include "taintdag/fdmapping.hpp" #include "taintdag/output.hpp" @@ -32,6 +33,13 @@ class PolyTracker { std::optional source_taint(int fd, source_offset_t offset, size_t length); + // Create a new taint source (not a file) and assigns taint labels + // A new taint source named 'name' is created + // Memory in 'dst' is assigned source taint labels referring to source 'name' + // and in increasing offset. + std::optional create_taint_source(std::string_view name, + std::span dst); + // Update the label, it affects control flow void affects_control_flow(label_t taint_label); diff --git a/polytracker/src/CMakeLists.txt b/polytracker/src/CMakeLists.txt index 316a052d..ef77a9c7 100644 --- a/polytracker/src/CMakeLists.txt +++ b/polytracker/src/CMakeLists.txt @@ -1,5 +1,3 @@ -set(CMAKE_CXX_STANDARD 17) - find_package(LLVM 13 CONFIG) if(LLVM_FOUND) @@ -50,7 +48,8 @@ set(CMAKE_EXE_LINKER_FLAGS set(POLY_SOURCES ${POLY_DIR}/main.cpp ${POLY_DIR}/polytracker.cpp) set(TAINT_SOURCES ${TAINT_DIR}/taint_sources.cpp ${TAINT_DIR}/string_taints.cpp - ${TAINT_DIR}/memory_taints.cpp ${TAINT_DIR}/write_taints.cpp) + ${TAINT_DIR}/memory_taints.cpp ${TAINT_DIR}/write_taints.cpp + ${TAINT_DIR}/argv.cpp) set(TAINTDAG_SOURCES ${TAINTDAG_DIR}/encoding.cpp ${TAINTDAG_DIR}/fdmapping.cpp ${TAINTDAG_DIR}/output.cpp ${TAINTDAG_DIR}/polytracker.cpp) diff --git a/polytracker/src/passes/polytracker_pass.cpp b/polytracker/src/passes/polytracker_pass.cpp index 74bbfa86..92d75d1a 100644 --- a/polytracker/src/passes/polytracker_pass.cpp +++ b/polytracker/src/passes/polytracker_pass.cpp @@ -5,6 +5,7 @@ // #include "polytracker/thread_pool.h" #include "spdlog/cfg/env.h" #include "spdlog/spdlog.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -289,12 +290,54 @@ bool PolytrackerPass::analyzeBlock(llvm::Function *func, return true; } +// Inserts a function call to polytracker::taint_argv(argc, argv) +// Assumes main is actually the main function of the program and +// interprets first arg as argc and second as argv. +static void emitTaintArgvCall(llvm::Function &main) { + // Get the parameters of the main function, argc, argv + auto argc = main.getArg(0); + if (!argc) { + spdlog::error("Failed to instrument argv. No argc available."); + return; + } + auto argc_ty = argc->getType(); + + auto argv = main.getArg(1); + if (!argv) { + spdlog::error("Failed to instrument argv. No argv available."); + return; + } + auto argv_ty = argv->getType(); + + // IRBuilder for emitting a call to __polytracker_taint_argv. Need to + // specify insertion point first, to ensure that no instruction can + // use argv before it is tainted. + llvm::IRBuilder<> irb(&*(main.getEntryBlock().getFirstInsertionPt())); + + // Define the target function type and make it available in the module + auto taint_argv_ty = + llvm::FunctionType::get(irb.getVoidTy(), {argc_ty, argv_ty}, false); + llvm::FunctionCallee taint_argv = main.getParent()->getOrInsertFunction( + "__polytracker_taint_argv", taint_argv_ty); + if (!taint_argv) { + spdlog::error("Failed to declare __polytracker_taint_argv."); + return; + } + + // Emit the call using parameters from main. + auto ci = irb.CreateCall(taint_argv, {argc, argv}); + if (!ci) { + spdlog::error("Failed to insert call to taint_argv."); + } +} + /* We should instrument everything we have bitcode for, right? If instructions have __polytracker, or they have __dfsan, ignore! */ bool PolytrackerPass::analyzeFunction(llvm::Function *f, const func_index_t &func_index) { + // Add Function entry polytracker::BBSplittingPass bbSplitter; // llvm::removeUnreachableBlocks(*f); @@ -340,6 +383,11 @@ bool PolytrackerPass::analyzeFunction(llvm::Function *f, visit(inst); } + // If this is the main function, insert a taint-argv call + if (f && f->getName() == "main") { + emitTaintArgvCall(*f); + } + return true; } diff --git a/polytracker/src/polytracker/main.cpp b/polytracker/src/polytracker/main.cpp index 1b715dd9..6cdb7d2b 100644 --- a/polytracker/src/polytracker/main.cpp +++ b/polytracker/src/polytracker/main.cpp @@ -28,10 +28,14 @@ DECLARE_EARLY_CONSTRUCT(std::string, polytracker_db_name); DECLARE_EARLY_CONSTRUCT(std::string, polytracker_stderr_sink); DECLARE_EARLY_CONSTRUCT(std::string, polytracker_stdout_sink); +// Controls argv being a taint source +bool polytracker_taint_argv = false; + uint64_t byte_start = 0; uint64_t byte_end = 0; bool polytracker_trace = false; bool polytracker_trace_func = false; + /** * Whether or not to save the input files to the output database */ @@ -65,6 +69,10 @@ void polytracker_parse_env() { if (auto err = getenv("POLYTRACKER_STDERR_SINK")) { get_polytracker_stderr_sink() = err; } + + if (auto argv = getenv("POLYTRACKER_TAINT_ARGV")) { + polytracker_taint_argv = argv[0] == '1'; + } } /* diff --git a/polytracker/src/polytracker/polytracker.cpp b/polytracker/src/polytracker/polytracker.cpp index 8c973dc2..0c262feb 100644 --- a/polytracker/src/polytracker/polytracker.cpp +++ b/polytracker/src/polytracker/polytracker.cpp @@ -1,5 +1,6 @@ #include "polytracker/polytracker.h" #include "polytracker/early_construct.h" +#include "polytracker/taint_sources.h" #include "taintdag/polytracker.h" #include #include @@ -87,6 +88,10 @@ extern "C" void __polytracker_start(func_mapping const *globals, no_control_flow_tracing); } +extern "C" void __polytracker_taint_argv(int argc, char *argv[]) { + polytracker::taint_argv(argc, argv); +} + extern "C" void __polytracker_store_function_mapping(const func_mapping *func_map, uint64_t *count) { diff --git a/polytracker/src/taint_sources/argv.cpp b/polytracker/src/taint_sources/argv.cpp new file mode 100644 index 00000000..a2339c38 --- /dev/null +++ b/polytracker/src/taint_sources/argv.cpp @@ -0,0 +1,32 @@ +#include + +#include "polytracker/early_construct.h" +#include "polytracker/polytracker.h" +#include "taintdag/polytracker.h" + +EARLY_CONSTRUCT_EXTERN_GETTER(taintdag::PolyTracker, polytracker_tdag); + +namespace polytracker { + +void taint_argv(int argc, char *argv[]) { + + // The check could be done in the calling code, for performance reasons. + // However this function should only ever be invoked once (from main). + if (!polytracker_taint_argv) + return; + + if (argc <= 0) { + // Weird. Not much to do though. + return; + } + + auto &polyt = get_polytracker_tdag(); + + for (int i = 0; i < argc; ++i) { + auto name = std::string{"argv["} + std::to_string(i) + "]"; + // NOTE(hbrodin): Currently not tainting terminating null char. + polyt.create_taint_source( + name, {reinterpret_cast(argv[i]), strlen(argv[i])}); + } +} +} // namespace polytracker \ No newline at end of file diff --git a/polytracker/src/taintdag/CMakeLists.txt b/polytracker/src/taintdag/CMakeLists.txt index 2bf242e5..a7c2c6ac 100644 --- a/polytracker/src/taintdag/CMakeLists.txt +++ b/polytracker/src/taintdag/CMakeLists.txt @@ -1,5 +1,4 @@ -set(CMAKE_CXX_STANDARD 17) add_subdirectory(test) add_library(taintdag STATIC encoding.cpp fdmapping.cpp output.cpp print.cpp) \ No newline at end of file diff --git a/polytracker/src/taintdag/polytracker.cpp b/polytracker/src/taintdag/polytracker.cpp index d8fe9686..5bcca294 100644 --- a/polytracker/src/taintdag/polytracker.cpp +++ b/polytracker/src/taintdag/polytracker.cpp @@ -120,6 +120,28 @@ PolyTracker::source_taint(int fd, source_offset_t offset, size_t length) { return create_source_taint(fd, offset, length); } +std::optional +PolyTracker::create_taint_source(std::string_view name, + std::span dst) { + // Reserve a contiguous range of labels for this source + auto rng = tdag_.reserve_source_labels(dst.size()); + + // Register the source by name (and its preallocated range). + auto idx = fdm_.add_mapping(-1, name, rng); + if (!idx) + return {}; + + // Construct the allocated labels as source labels belonging to source 'idx' + tdag_.assign_source_labels(rng, *idx, 0); + + // Mark memory with corresponding labels + auto lbl = rng.first; + for (auto &c : dst) { + dfsan_set_label(lbl++, &c, sizeof(char)); + } + return rng; +} + void PolyTracker::taint_sink(int fd, sink_offset_t offset, void const *mem, size_t length) { auto idx = fdm_.mapping_idx(fd); diff --git a/polytracker/src/taintdag/test/CMakeLists.txt b/polytracker/src/taintdag/test/CMakeLists.txt index 9d1ab88a..6c748f93 100644 --- a/polytracker/src/taintdag/test/CMakeLists.txt +++ b/polytracker/src/taintdag/test/CMakeLists.txt @@ -1,5 +1,3 @@ -set(CMAKE_CXX_STANDARD 17) - include(CTest) include(Catch) diff --git a/tests/test_argv.cpp b/tests/test_argv.cpp new file mode 100644 index 00000000..a0987492 --- /dev/null +++ b/tests/test_argv.cpp @@ -0,0 +1,10 @@ +#include +#include + +int main(int argc, char *argv[]) { + auto f = fopen("outputfile.txt", "w"); + for (int i=0;i