Skip to content

Commit

Permalink
Always call CentipedeFinalizeProcessing to report FuzzTest input pr…
Browse files Browse the repository at this point in the history
…operly.

This is needed because otherwise Centipede would crash for exceeded memory/time limits after FuzzTest cleaning up the current input, leading to undesired "setup failure" reports.
But we don't want Centipede to prepare the coverage twice (which would result in garbage coverage). Thus using the input_start_time as a guard - they should be set to 0 after each iteration anyway.

PiperOrigin-RevId: 703558946
  • Loading branch information
xinhaoyuan authored and copybara-github committed Dec 12, 2024
1 parent 4c740a0 commit 7144810
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 16 deletions.
5 changes: 5 additions & 0 deletions centipede/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,7 @@ cc_library(
":environment",
":mutation_input",
":runner_result",
":stop",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_fuzztest//common:defs",
Expand Down Expand Up @@ -1121,6 +1122,7 @@ cc_library(
":thread_pool",
":util",
":workdir",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/random",
Expand Down Expand Up @@ -1688,8 +1690,11 @@ cc_test(
":feature",
":seed_corpus_maker_lib",
":workdir",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@com_google_fuzztest//common:defs",
"@com_google_fuzztest//common:logging",
"@com_google_fuzztest//common:remote_file",
"@com_google_fuzztest//common:test_util",
"@com_google_googletest//:gtest_main",
],
Expand Down
6 changes: 6 additions & 0 deletions centipede/centipede_default_callbacks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "./centipede/environment.h"
#include "./centipede/mutation_input.h"
#include "./centipede/runner_result.h"
#include "./centipede/stop.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep

Expand Down Expand Up @@ -83,6 +84,11 @@ void CentipedeDefaultCallbacks::Mutate(
LOG_FIRST_N(WARNING, 5)
<< "Custom mutator returned no mutants: falling back to internal "
"default mutator";
} else if (ShouldStop()) {
LOG(WARNING) << "Custom mutator failed, but ignored since the stop "
"condition it met. Possibly what triggered the stop "
"condition also interrupted the mutator.";
return;
} else {
LOG(WARNING) << "Custom mutator undetected or misbehaving:";
CHECK(!custom_mutator_is_usable_.has_value())
Expand Down
8 changes: 6 additions & 2 deletions centipede/runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,9 @@ static void RunOneInput(const uint8_t *data, size_t size,
int target_return_value = callbacks.Execute({data, size}) ? 0 : -1;
state.stats.exec_time_usec = UsecSinceLast();
CheckWatchdogLimits();
PostProcessCoverage(target_return_value);
if (centipede::state.input_start_time.exchange(0) != 0) {
PostProcessCoverage(target_return_value);
}
state.stats.post_time_usec = UsecSinceLast();
state.stats.peak_rss_mb = GetPeakRSSMb();
}
Expand Down Expand Up @@ -1235,7 +1237,9 @@ extern "C" void CentipedePrepareProcessing() {

extern "C" void CentipedeFinalizeProcessing() {
centipede::CheckWatchdogLimits();
centipede::PostProcessCoverage(/*target_return_value=*/0);
if (centipede::state.input_start_time.exchange(0) != 0) {
centipede::PostProcessCoverage(/*target_return_value=*/0);
}
}

extern "C" size_t CentipedeGetExecutionResult(uint8_t *data, size_t capacity) {
Expand Down
3 changes: 3 additions & 0 deletions centipede/runner_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ extern "C" void CentipedeEndExecutionBatch();
extern "C" void CentipedePrepareProcessing();

// Finalizes the processing of an input and stores the state internally.
//
// For tool integration, it can be called inside `RunnerCallbacks::Execute()` to
// finalize the execution early before extra cleanups.
extern "C" void CentipedeFinalizeProcessing();

// Retrieves the execution results (including coverage information) after
Expand Down
85 changes: 73 additions & 12 deletions centipede/seed_corpus_maker_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <variant>
#include <vector>

#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/random/random.h"
Expand Down Expand Up @@ -115,21 +116,54 @@ absl::Status SampleSeedCorpusElementsFromSource( //
LOG(INFO) << "Selected " << src_dirs.size() << " corpus dir(s)";
}

// Find all the corpus shard files in the found dirs.
// Find all the corpus shard and individual input files in the found dirs.

std::vector<std::string> corpus_shard_fnames;
std::vector<std::string> individual_input_fnames;
for (const auto& dir : src_dirs) {
const std::string shards_glob = fs::path{dir} / source.shard_rel_glob;
// NOTE: `RemoteGlobMatch` appends to the output list.
const auto prev_num_shards = corpus_shard_fnames.size();
RETURN_IF_NOT_OK(RemoteGlobMatch(shards_glob, corpus_shard_fnames));
LOG(INFO) << "Found " << (corpus_shard_fnames.size() - prev_num_shards)
<< " shard(s) matching " << shards_glob;
absl::flat_hash_set<std::string> current_corpus_shard_fnames;
if (!source.shard_rel_glob.empty()) {
std::vector<std::string> matched_fnames;
const std::string glob = fs::path{dir} / source.shard_rel_glob;
const auto match_status = RemoteGlobMatch(glob, matched_fnames);
if (!match_status.ok() && !absl::IsNotFound(match_status)) {
LOG(ERROR) << "Got error when glob-matching in " << dir << ": "
<< match_status;
} else {
current_corpus_shard_fnames.insert(matched_fnames.begin(),
matched_fnames.end());
corpus_shard_fnames.insert(corpus_shard_fnames.end(),
matched_fnames.begin(),
matched_fnames.end());
LOG(INFO) << "Found " << matched_fnames.size() << " shard(s) matching "
<< glob;
}
}
if (!source.individual_input_rel_glob.empty()) {
std::vector<std::string> matched_fnames;
const std::string glob = fs::path{dir} / source.individual_input_rel_glob;
const auto match_status = RemoteGlobMatch(glob, matched_fnames);
if (!match_status.ok() && !absl::IsNotFound(match_status)) {
LOG(ERROR) << "Got error when glob-matching in " << dir << ": "
<< match_status;
} else {
size_t num_added_individual_inputs = 0;
for (auto& fname : matched_fnames) {
if (current_corpus_shard_fnames.contains(fname)) continue;
if (RemotePathIsDirectory(fname)) continue;
++num_added_individual_inputs;
individual_input_fnames.push_back(std::move(fname));
}
LOG(INFO) << "Found " << num_added_individual_inputs
<< " individual input(s) with glob " << glob;
}
}
}
LOG(INFO) << "Found " << corpus_shard_fnames.size()
<< " shard(s) total in source " << source.dir_glob;
LOG(INFO) << "Found " << corpus_shard_fnames.size() << " shard(s) and "
<< individual_input_fnames.size()
<< " individual input(s) total in source " << source.dir_glob;

if (corpus_shard_fnames.empty()) {
if (corpus_shard_fnames.empty() && individual_input_fnames.empty()) {
LOG(WARNING) << "Skipping empty source " << source.dir_glob;
return absl::OkStatus();
}
Expand All @@ -140,10 +174,12 @@ absl::Status SampleSeedCorpusElementsFromSource( //
const auto num_shards = corpus_shard_fnames.size();
std::vector<InputAndFeaturesVec> src_elts_per_shard(num_shards);
std::vector<size_t> src_elts_with_features_per_shard(num_shards, 0);
InputAndFeaturesVec src_elts;

{
constexpr int kMaxReadThreads = 32;
ThreadPool threads{std::min<int>(kMaxReadThreads, num_shards)};
ThreadPool threads{std::min<int>(
kMaxReadThreads, std::max(num_shards, individual_input_fnames.size()))};

for (int shard = 0; shard < num_shards; ++shard) {
const auto& corpus_fname = corpus_shard_fnames[shard];
Expand Down Expand Up @@ -193,11 +229,27 @@ absl::Status SampleSeedCorpusElementsFromSource( //

threads.Schedule(read_shard);
}

RPROF_SNAPSHOT_AND_LOG("Done reading shards");

src_elts.resize(individual_input_fnames.size());
for (size_t index = 0; index < individual_input_fnames.size(); ++index) {
threads.Schedule([index, &individual_input_fnames, &src_elts] {
ByteArray input;
const auto& path = individual_input_fnames[index];
const auto read_status = RemoteFileGetContents(path, input);
if (!read_status.ok()) {
LOG(WARNING) << "Skipping individual input path " << path
<< " due to read error: " << read_status;
return;
}
src_elts[index] = {std::move(input), {}};
});
}
}

RPROF_SNAPSHOT_AND_LOG("Done reading");

InputAndFeaturesVec src_elts;
size_t src_num_features = 0;

for (int s = 0; s < num_shards; ++s) {
Expand All @@ -217,6 +269,15 @@ absl::Status SampleSeedCorpusElementsFromSource( //

RPROF_SNAPSHOT_AND_LOG("Done merging");

// Remove empty inputs possibly due to read errors.
auto remove_it =
std::remove_if(src_elts.begin(), src_elts.end(),
[](const auto& elt) { return std::get<0>(elt).empty(); });
if (remove_it != src_elts.end()) {
LOG(WARNING) << "Removed " << src_elts.end() - remove_it << " empty inputs";
src_elts.erase(remove_it, src_elts.end());
}

LOG(INFO) << "Read total of " << src_elts.size() << " elements ("
<< src_num_features << " with features) from source "
<< source.dir_glob;
Expand Down
7 changes: 6 additions & 1 deletion centipede/seed_corpus_maker_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,16 @@ namespace centipede {
// Native struct used by the seed corpus library for seed corpus source.
//
// TODO(b/362576261): Currently this is mirroring the `proto::SeedCorpusSource`
// proto. But in the future it may change with the core seeding API.
// proto. But in the future it may change with the core seeding API - any
// difference is commented below.
struct SeedCorpusSource {
std::string dir_glob;
uint32_t num_recent_dirs;
std::string shard_rel_glob;
// If non-empty, will be used to glob the individual input files (with one
// input in each file) in the source dirs. Any files matching `shard_rel_glob`
// will be skipped.
std::string individual_input_rel_glob;
std::variant<float, uint32_t> sampled_fraction_or_count;
};

Expand Down
69 changes: 69 additions & 0 deletions centipede/seed_corpus_maker_lib_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
#include <filesystem> // NOLINT
#include <string>
#include <string_view>
#include <vector>

#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
#include "./centipede/feature.h"
#include "./centipede/workdir.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep
#include "./common/remote_file.h"
#include "./common/test_util.h"

namespace centipede {
Expand All @@ -36,6 +40,7 @@ namespace {
namespace fs = std::filesystem;

using ::testing::IsSubsetOf;
using ::testing::IsSupersetOf;

inline constexpr auto kIdxDigits = WorkDir::kDigitsInShardIndex;

Expand Down Expand Up @@ -178,5 +183,69 @@ TEST(SeedCorpusMakerLibTest, RoundTripWriteReadWrite) {
}
}

TEST(SeedCorpusMakerLibTest, LoadsBothIndividualInputsAndShardsFromSource) {
const fs::path test_dir = GetTestTempDir(test_info_->name());
chdir(test_dir.c_str());

const InputAndFeaturesVec kShardedInputs = {
{{0}, {}},
{{1}, {feature_domains::kNoFeature}},
{{0, 1}, {0x11, 0x23}},
};
constexpr std::string_view kCovBin = "bin";
constexpr std::string_view kCovHash = "hash";
constexpr std::string_view kRelDir = "dir/foo";

const std::vector<ByteArray> kIndividualInputs = {
{0, 1, 2},
{0, 1, 2, 3},
// Empty input expected to be not in the sample result.
{}};
// Write sharded inputs.
{
constexpr size_t kNumShards = 2;
const SeedCorpusDestination destination = {
.dir_path = std::string(kRelDir),
.shard_rel_glob = absl::StrCat("distilled-", kCovBin, ".*"),
.shard_index_digits = kIdxDigits,
.num_shards = kNumShards,
};
CHECK_OK(WriteSeedCorpusElementsToDestination( //
kShardedInputs, kCovBin, kCovHash, destination));
const std::string workdir = (test_dir / kRelDir).c_str();
ASSERT_NO_FATAL_FAILURE(VerifyShardsExist( //
workdir, kCovBin, kCovHash, kNumShards, ShardType::kDistilled));
}

// Write individual inputs
for (int i = 0; i < kIndividualInputs.size(); ++i) {
const auto path = std::filesystem::path(test_dir) / kRelDir /
absl::StrCat("individual_input_", i);
CHECK_OK(RemoteFileSetContents(path.string(), kIndividualInputs[i]));
}

// Test that sharded and individual inputs matches what we wrote.
{
InputAndFeaturesVec elements;
ASSERT_OK(SampleSeedCorpusElementsFromSource( //
SeedCorpusSource{
.dir_glob = std::string(kRelDir),
.num_recent_dirs = 1,
.shard_rel_glob = absl::StrCat("distilled-", kCovBin, ".*"),
// Intentionally try to match the shard files and test if they will
// be read as individual inputs.
.individual_input_rel_glob = "*",
.sampled_fraction_or_count = 1.0f,
},
kCovBin, kCovHash, elements));
EXPECT_EQ(elements.size(), 5); // Non-empty inputs
EXPECT_THAT(elements, IsSupersetOf(kShardedInputs));
EXPECT_THAT(elements, IsSupersetOf(InputAndFeaturesVec{
{{0, 1, 2}, {}},
{{0, 1, 2, 3}, {}},
}));
}
}

} // namespace
} // namespace centipede
4 changes: 4 additions & 0 deletions common/remote_file_oss.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ absl::Status RemoteGlobMatch(std::string_view glob,
if (int ret = ::glob(std::string{glob}.c_str(), GLOB_TILDE, HandleGlobError,
&glob_ret);
ret != 0) {
if (ret == GLOB_NOMATCH) {
return absl::NotFoundError(absl::StrCat(
"glob() returned NOMATCH for pattern: ", std::string(glob)));
}
return absl::UnknownError(absl::StrCat(
"glob() failed, pattern: ", std::string(glob), ", returned: ", ret));
}
Expand Down
2 changes: 1 addition & 1 deletion fuzztest/internal/centipede_adaptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ class CentipedeFixtureDriver : public UntypedFixtureDriver {
if (runtime_.skipping_requested()) {
CentipedeSetExecutionResult(nullptr, 0);
}
if (!runner_mode) CentipedeFinalizeProcessing();
CentipedeFinalizeProcessing();
}

void TearDownFuzzTest() override { orig_fixture_driver_->TearDownFuzzTest(); }
Expand Down

0 comments on commit 7144810

Please sign in to comment.