Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 707574993
  • Loading branch information
xinhaoyuan authored and copybara-github committed Dec 19, 2024
1 parent 3b4a590 commit ba554ec
Show file tree
Hide file tree
Showing 17 changed files with 632 additions and 431 deletions.
5 changes: 5 additions & 0 deletions centipede/centipede_callbacks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@ bool CentipedeCallbacks::GetSeedsViaExternalBinary(
.temp_file_path = temp_input_file_path_}};
const int retval = cmd.Execute();

if (env_.print_runner_log) {
LOG(INFO) << "Getting seeds via external binary returns " << retval;
PrintExecutionLog();
}

std::vector<std::string> seed_input_filenames;
for (const auto &dir_ent : std::filesystem::directory_iterator(output_dir)) {
seed_input_filenames.push_back(dir_ent.path().filename());
Expand Down
138 changes: 86 additions & 52 deletions centipede/centipede_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -431,22 +431,34 @@ void DeduplicateAndStoreNewCrashes(
}
}

// Seeds the corpus files in `env.workdir` with the previously distilled corpus
// files from `src_dir`.
// Seeds the corpus files in `env.workdir` with the inputs in `regression_dir`
// (always used) and the previously distilled corpus files from `coverage_dir`
// (used if non-empty).
SeedCorpusConfig GetSeedCorpusConfig(const Environment &env,
std::string_view src_dir) {
std::string_view regression_dir,
std::string_view coverage_dir) {
const WorkDir workdir{env};
std::vector<SeedCorpusSource> sources = {{
.dir_glob = std::string(regression_dir),
.num_recent_dirs = 1,
.individual_input_rel_glob = "*",
.sampled_fraction_or_count = 1.0f,
}};
if (!coverage_dir.empty()) {
sources.push_back(SeedCorpusSource{
.dir_glob = std::string(coverage_dir),
.num_recent_dirs = 1,
// We're using the previously distilled corpus files as seeds.
.shard_rel_glob =
std::filesystem::path{
workdir.DistilledCorpusFilePaths().AllShardsGlob()}
.filename(),
.individual_input_rel_glob = "*",
.sampled_fraction_or_count = 1.0f,
});
}
return {
.sources = {SeedCorpusSource{
.dir_glob = std::string(src_dir),
.num_recent_dirs = 1,
// We're using the previously distilled corpus files as seeds.
.shard_rel_glob =
std::filesystem::path{
workdir.DistilledCorpusFilePaths().AllShardsGlob()}
.filename(),
.sampled_fraction_or_count = 1.0f,
}},
.sources = std::move(sources),
.destination =
{
.dir_path = env.workdir,
Expand Down Expand Up @@ -495,9 +507,7 @@ int UpdateCorpusDatabaseForFuzzTests(
absl::Time start_time = absl::Now();
LOG(INFO) << "Starting the update of the corpus database for fuzz tests:"
<< "\nBinary: " << env.binary
<< "\nCorpus database: " << fuzztest_config.corpus_database
<< "\nFuzz tests: "
<< absl::StrJoin(fuzztest_config.fuzz_tests, ", ");
<< "\nCorpus database: " << fuzztest_config.corpus_database;

// Step 1: Preliminary set up of test sharding, binary info, etc.
const auto [test_shard_index, total_test_shards] = SetUpTestSharding();
Expand All @@ -514,14 +524,32 @@ int UpdateCorpusDatabaseForFuzzTests(
absl::FormatTime("%Y-%m-%d-%H-%M-%S", absl::Now(), absl::UTCTimeZone());
return stamp;
}();
std::vector<std::string> fuzz_tests_to_run;
if (env.fuzztest_single_test_mode) {
CHECK(fuzztest_config.fuzz_tests_in_current_shard.size() == 1)
<< "Must select exactly one fuzz test when running in the unified "
"exeuction model.";
fuzz_tests_to_run = fuzztest_config.fuzz_tests_in_current_shard;
} else {
for (int i = 0; i < fuzztest_config.fuzz_tests.size(); ++i) {
if (i % total_test_shards == test_shard_index) {
fuzz_tests_to_run.push_back(fuzztest_config.fuzz_tests[i]);
}
}
}
LOG(INFO) << "Fuzz tests to run:" << absl::StrJoin(fuzz_tests_to_run, ", ");

const bool is_workdir_specified = !env.workdir.empty();
// The full workdir paths will be formed by appending the fuzz test names to
// the base workdir path. We use different path when only replaying to avoid
// replaying an unfinished fuzzing sessions.
const auto base_workdir_path =
corpus_database_path /
absl::StrFormat("workdir%s.%03d",
fuzztest_config.only_replay_corpus ? "-replay" : "",
test_shard_index);
is_workdir_specified
? std::filesystem::path(env.workdir)
: corpus_database_path /
absl::StrFormat("workdir%s.%03d",
fuzztest_config.only_replay ? "-replay" : "",
test_shard_index);
// There's no point in saving the binary info to the workdir, since the
// workdir is deleted at the end.
env.save_binary_info = false;
Expand All @@ -536,9 +564,8 @@ int UpdateCorpusDatabaseForFuzzTests(
// Find the last index of a fuzz test for which we already have a workdir.
bool is_resuming = false;
int resuming_fuzztest_idx = 0;
for (int i = 0; i < fuzztest_config.fuzz_tests.size(); ++i) {
if (i % total_test_shards != test_shard_index) continue;
env.workdir = base_workdir_path / fuzztest_config.fuzz_tests[i];
for (int i = 0; i < fuzz_tests_to_run.size(); ++i) {
env.workdir = base_workdir_path / fuzz_tests_to_run[i];
// Check the existence of the coverage path to not only make sure the
// workdir exists, but also that it was created for the same binary as in
// this run.
Expand All @@ -549,22 +576,20 @@ int UpdateCorpusDatabaseForFuzzTests(
}

LOG_IF(INFO, is_resuming) << "Resuming from the fuzz test "
<< fuzztest_config.fuzz_tests[resuming_fuzztest_idx]
<< fuzz_tests_to_run[resuming_fuzztest_idx]
<< " (index: " << resuming_fuzztest_idx << ")";

// Step 3: Iterate over the fuzz tests and run them.
const std::string binary = env.binary;
for (int i = resuming_fuzztest_idx; i < fuzztest_config.fuzz_tests.size();
++i) {
if (i % total_test_shards != test_shard_index) continue;
if (fuzztest_config.GetTimeLimitPerTest() < absl::InfiniteDuration()) {
// TODO(fniksic): Test this behavior in end-to-end tests.
for (int i = resuming_fuzztest_idx; i < fuzz_tests_to_run.size(); ++i) {
if (!env.fuzztest_single_test_mode &&
fuzztest_config.GetTimeLimitPerTest() < absl::InfiniteDuration()) {
ReportErrorWhenNotEnoughTimeToRunEverything(
start_time, fuzztest_config.GetTimeLimitPerTest(),
/*executed_tests_in_shard=*/i / total_test_shards,
fuzztest_config.fuzz_tests.size(), total_test_shards);
}
env.workdir = base_workdir_path / fuzztest_config.fuzz_tests[i];
env.workdir = base_workdir_path / fuzz_tests_to_run[i];
if (RemotePathExists(env.workdir) && !is_resuming) {
// This could be a workdir from a failed run that used a different version
// of the binary. We delete it so that we don't have to deal with the
Expand All @@ -575,26 +600,36 @@ int UpdateCorpusDatabaseForFuzzTests(
CHECK_OK(RemoteMkdir(
workdir.CoverageDirPath())); // Implicitly creates the workdir

// Seed the fuzzing session with the latest coverage corpus from the
// previous fuzzing session.
const std::filesystem::path fuzztest_db_path =
corpus_database_path / fuzztest_config.fuzz_tests[i];
corpus_database_path / fuzz_tests_to_run[i];
const std::filesystem::path regression_dir =
fuzztest_db_path / "regression";
const std::filesystem::path coverage_dir = fuzztest_db_path / "coverage";
if (RemotePathExists(coverage_dir.c_str()) && !is_resuming) {
CHECK_OK(GenerateSeedCorpusFromConfig(
GetSeedCorpusConfig(env, coverage_dir.c_str()), env.binary_name,
env.binary_hash));

// Seed the fuzzing session with the latest coverage corpus and regression
// inputs from the previous fuzzing session.
if (!is_resuming) {
if (const auto status = GenerateSeedCorpusFromConfig(
GetSeedCorpusConfig(
env, regression_dir.c_str(),
fuzztest_config.replay_corpus ? coverage_dir.c_str() : ""),
env.binary_name, env.binary_hash);
!status.ok()) {
LOG(ERROR) << "Got error while generating the seed corpus: " << status;
}
}

// TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way.
constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz=";
constexpr std::string_view kFuzzTestReplayCorpusFlag =
"--replay_corpus=";
std::string_view test_selection_flag = fuzztest_config.only_replay_corpus
? kFuzzTestReplayCorpusFlag
: kFuzzTestFuzzFlag;
env.binary = absl::StrCat(binary, " ", test_selection_flag,
fuzztest_config.fuzz_tests[i]);
if (!env.fuzztest_single_test_mode) {
// TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way.
constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz=";
constexpr std::string_view kFuzzTestReplayCorpusFlag =
"--replay_corpus=";
std::string_view test_selection_flag = fuzztest_config.only_replay
? kFuzzTestReplayCorpusFlag
: kFuzzTestFuzzFlag;
env.binary =
absl::StrCat(binary, " ", test_selection_flag, fuzz_tests_to_run[i]);
}

absl::Duration time_limit = fuzztest_config.GetTimeLimitPerTest();
absl::Duration time_spent = absl::ZeroDuration();
Expand All @@ -606,9 +641,8 @@ int UpdateCorpusDatabaseForFuzzTests(
}
is_resuming = false;

LOG(INFO) << (fuzztest_config.only_replay_corpus ? "Replaying "
: "Fuzzing ")
<< fuzztest_config.fuzz_tests[i] << " for " << time_limit
LOG(INFO) << (fuzztest_config.only_replay ? "Replaying " : "Fuzzing ")
<< fuzz_tests_to_run[i] << " for " << time_limit
<< "\n\tTest binary: " << env.binary;

const absl::Time start_time = absl::Now();
Expand All @@ -620,15 +654,15 @@ int UpdateCorpusDatabaseForFuzzTests(
record_fuzzing_time.Stop();

if (!stats_root_path.empty()) {
const auto stats_dir = stats_root_path / fuzztest_config.fuzz_tests[i];
const auto stats_dir = stats_root_path / fuzz_tests_to_run[i];
CHECK_OK(RemoteMkdir(stats_dir.c_str()));
CHECK_OK(RemotePathRename(
workdir.FuzzingStatsPath(),
(stats_dir / absl::StrCat("fuzzing_stats_", execution_stamp))
.c_str()));
}

if (fuzztest_config.only_replay_corpus) continue;
if (fuzztest_config.only_replay || is_workdir_specified) continue;

// Distill and store the coverage corpus.
Distill(env);
Expand Down Expand Up @@ -720,7 +754,7 @@ int CentipedeMain(const Environment &env,
<< "Failed to deserialize target configuration";
if (!target_config->corpus_database.empty()) {
const auto time_limit_per_test = target_config->GetTimeLimitPerTest();
CHECK(target_config->only_replay_corpus ||
CHECK(target_config->only_replay ||
time_limit_per_test < absl::InfiniteDuration())
<< "Updating corpus database requires specifying time limit per "
"fuzz test.";
Expand Down
2 changes: 1 addition & 1 deletion centipede/environment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ void Environment::UpdateWithTargetConfig(
<< VV(stack_limit_kb) << VV(config.stack_limit);
stack_limit_kb = bytes_to_kb(config.stack_limit);

if (config.only_replay_corpus) {
if (config.only_replay) {
load_shards_only = true;
populate_binary_info = false;
}
Expand Down
3 changes: 3 additions & 0 deletions centipede/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ struct Environment {
bool first_corpus_dir_output_only = false;
// If set, load/merge shards without fuzzing new inputs.
bool load_shards_only = false;
// If set, operate on the corpus database for a single test specified by
// FuzzTest instead of all the tests.
bool fuzztest_single_test_mode = false;

// Command line-related fields -----------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion centipede/environment_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ TEST(Environment, DiesOnInconsistentStackLimitKbAndTargetConfigStackLimit) {

TEST(Environment, UpdatesReplayOnlyConfiguration) {
Environment env;
fuzztest::internal::Configuration config{.only_replay_corpus = true};
fuzztest::internal::Configuration config{.only_replay = true};
env.UpdateWithTargetConfig(config);
EXPECT_TRUE(env.load_shards_only);
EXPECT_FALSE(env.populate_binary_info);
Expand Down
Loading

0 comments on commit ba554ec

Please sign in to comment.