Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use fuzz_tests_in_current_shard instead of computing the sharding again. #1477

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions centipede/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,7 @@ cc_library(
":environment",
":mutation_input",
":runner_result",
":stop",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_fuzztest//common:defs",
Expand Down Expand Up @@ -1121,6 +1122,7 @@ cc_library(
":thread_pool",
":util",
":workdir",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/random",
Expand Down Expand Up @@ -1688,8 +1690,11 @@ cc_test(
":feature",
":seed_corpus_maker_lib",
":workdir",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@com_google_fuzztest//common:defs",
"@com_google_fuzztest//common:logging",
"@com_google_fuzztest//common:remote_file",
"@com_google_fuzztest//common:test_util",
"@com_google_googletest//:gtest_main",
],
Expand Down
4 changes: 4 additions & 0 deletions centipede/centipede_default_callbacks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "./centipede/environment.h"
#include "./centipede/mutation_input.h"
#include "./centipede/runner_result.h"
#include "./centipede/stop.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep

Expand Down Expand Up @@ -83,6 +84,9 @@ void CentipedeDefaultCallbacks::Mutate(
LOG_FIRST_N(WARNING, 5)
<< "Custom mutator returned no mutants: falling back to internal "
"default mutator";
} else if (ShouldStop()) {
LOG(INFO) << "Stop condition detected. Ignoring mutation errors.";
return;
} else {
LOG(WARNING) << "Custom mutator undetected or misbehaving:";
CHECK(!custom_mutator_is_usable_.has_value())
Expand Down
32 changes: 16 additions & 16 deletions centipede/centipede_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,15 @@ int UpdateCorpusDatabaseForFuzzTests(
CentipedeCallbacksFactory &callbacks_factory) {
env.UpdateWithTargetConfig(fuzztest_config);

std::vector<std::string> fuzz_tests_to_run =
fuzztest_config.fuzz_tests_in_current_shard;
std::sort(fuzz_tests_to_run.begin(), fuzz_tests_to_run.end());

absl::Time start_time = absl::Now();
LOG(INFO) << "Starting the update of the corpus database for fuzz tests:"
<< "\nBinary: " << env.binary
<< "\nCorpus database: " << fuzztest_config.corpus_database
<< "\nFuzz tests: "
<< absl::StrJoin(fuzztest_config.fuzz_tests, ", ");
<< "\nFuzz tests: " << absl::StrJoin(fuzz_tests_to_run, ", ");

// Step 1: Preliminary set up of test sharding, binary info, etc.
const auto [test_shard_index, total_test_shards] = SetUpTestSharding();
Expand Down Expand Up @@ -523,9 +526,8 @@ int UpdateCorpusDatabaseForFuzzTests(
// Find the last index of a fuzz test for which we already have a workdir.
bool is_resuming = false;
int resuming_fuzztest_idx = 0;
for (int i = 0; i < fuzztest_config.fuzz_tests.size(); ++i) {
if (i % total_test_shards != test_shard_index) continue;
env.workdir = base_workdir_path / fuzztest_config.fuzz_tests[i];
for (int i = 0; i < fuzz_tests_to_run.size(); ++i) {
env.workdir = base_workdir_path / fuzz_tests_to_run[i];
// Check the existence of the coverage path to not only make sure the
// workdir exists, but also that it was created for the same binary as in
// this run.
Expand All @@ -536,19 +538,17 @@ int UpdateCorpusDatabaseForFuzzTests(
}

LOG_IF(INFO, is_resuming) << "Resuming from the fuzz test "
<< fuzztest_config.fuzz_tests[resuming_fuzztest_idx]
<< fuzz_tests_to_run[resuming_fuzztest_idx]
<< " (index: " << resuming_fuzztest_idx << ")";

// Step 3: Iterate over the fuzz tests and run them.
const std::string binary = env.binary;
for (int i = resuming_fuzztest_idx; i < fuzztest_config.fuzz_tests.size();
++i) {
if (i % total_test_shards != test_shard_index) continue;
for (int i = resuming_fuzztest_idx; i < fuzz_tests_to_run.size(); ++i) {
ReportErrorWhenNotEnoughTimeToRunEverything(
start_time, fuzztest_config.time_limit,
/*executed_tests_in_shard=*/i / total_test_shards,
fuzztest_config.fuzz_tests.size(), total_test_shards);
env.workdir = base_workdir_path / fuzztest_config.fuzz_tests[i];
env.workdir = base_workdir_path / fuzz_tests_to_run[i];
if (RemotePathExists(env.workdir) && !is_resuming) {
// This could be a workdir from a failed run that used a different version
// of the binary. We delete it so that we don't have to deal with the
Expand All @@ -562,7 +562,7 @@ int UpdateCorpusDatabaseForFuzzTests(
// Seed the fuzzing session with the latest coverage corpus from the
// previous fuzzing session.
const std::filesystem::path fuzztest_db_path =
corpus_database_path / fuzztest_config.fuzz_tests[i];
corpus_database_path / fuzz_tests_to_run[i];
const std::filesystem::path coverage_dir = fuzztest_db_path / "coverage";
if (RemotePathExists(coverage_dir.c_str()) && !is_resuming) {
CHECK_OK(GenerateSeedCorpusFromConfig(
Expand All @@ -572,8 +572,8 @@ int UpdateCorpusDatabaseForFuzzTests(

// TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way.
constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz=";
env.binary = absl::StrCat(binary, " ", kFuzzTestFuzzFlag,
fuzztest_config.fuzz_tests[i]);
env.binary =
absl::StrCat(binary, " ", kFuzzTestFuzzFlag, fuzz_tests_to_run[i]);

absl::Duration time_limit = fuzztest_config.GetTimeLimitPerTest();
absl::Duration time_spent = absl::ZeroDuration();
Expand All @@ -585,8 +585,8 @@ int UpdateCorpusDatabaseForFuzzTests(
}
is_resuming = false;

LOG(INFO) << "Fuzzing " << fuzztest_config.fuzz_tests[i] << " for "
<< time_limit << "\n\tTest binary: " << env.binary;
LOG(INFO) << "Fuzzing " << fuzz_tests_to_run[i] << " for " << time_limit
<< "\n\tTest binary: " << env.binary;

const absl::Time start_time = absl::Now();
ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/start_time + time_limit);
Expand All @@ -597,7 +597,7 @@ int UpdateCorpusDatabaseForFuzzTests(
record_fuzzing_time.Stop();

if (!stats_root_path.empty()) {
const auto stats_dir = stats_root_path / fuzztest_config.fuzz_tests[i];
const auto stats_dir = stats_root_path / fuzz_tests_to_run[i];
CHECK_OK(RemoteMkdir(stats_dir.c_str()));
CHECK_OK(RemotePathRename(
workdir.FuzzingStatsPath(),
Expand Down
88 changes: 77 additions & 11 deletions centipede/seed_corpus_maker_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <variant>
#include <vector>

#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/random/random.h"
Expand Down Expand Up @@ -115,21 +116,54 @@ absl::Status SampleSeedCorpusElementsFromSource( //
LOG(INFO) << "Selected " << src_dirs.size() << " corpus dir(s)";
}

// Find all the corpus shard files in the found dirs.
// Find all the corpus shard and legecy input files in the found dirs.

std::vector<std::string> corpus_shard_fnames;
std::vector<std::string> legacy_input_fnames;
for (const auto& dir : src_dirs) {
const std::string shards_glob = fs::path{dir} / source.shard_rel_glob;
// NOTE: `RemoteGlobMatch` appends to the output list.
const auto prev_num_shards = corpus_shard_fnames.size();
RETURN_IF_NOT_OK(RemoteGlobMatch(shards_glob, corpus_shard_fnames));
LOG(INFO) << "Found " << (corpus_shard_fnames.size() - prev_num_shards)
<< " shard(s) matching " << shards_glob;
absl::flat_hash_set<std::string> current_corpus_shard_fnames;
if (!source.shard_rel_glob.empty()) {
std::vector<std::string> matched_fnames;
const std::string glob = fs::path{dir} / source.shard_rel_glob;
const auto match_status = RemoteGlobMatch(glob, matched_fnames);
if (!match_status.ok() && !absl::IsNotFound(match_status)) {
LOG(ERROR) << "Got error when glob-matching in " << dir << ": "
<< match_status;
} else {
current_corpus_shard_fnames.insert(matched_fnames.begin(),
matched_fnames.end());
corpus_shard_fnames.insert(corpus_shard_fnames.end(),
matched_fnames.begin(),
matched_fnames.end());
LOG(INFO) << "Found " << matched_fnames.size() << " shard(s) matching "
<< glob;
}
}
if (!source.legacy_input_rel_glob.empty()) {
std::vector<std::string> matched_fnames;
const std::string glob = fs::path{dir} / source.legacy_input_rel_glob;
const auto match_status = RemoteGlobMatch(glob, matched_fnames);
if (!match_status.ok() && !absl::IsNotFound(match_status)) {
LOG(ERROR) << "Got error when glob-matching in " << dir << ": "
<< match_status;
} else {
size_t num_added_legacy_inputs = 0;
for (auto& fname : matched_fnames) {
if (current_corpus_shard_fnames.contains(fname)) continue;
if (RemotePathIsDirectory(fname)) continue;
++num_added_legacy_inputs;
legacy_input_fnames.push_back(std::move(fname));
}
LOG(INFO) << "Found " << num_added_legacy_inputs
<< " legacy input(s) with glob " << glob;
}
}
}
LOG(INFO) << "Found " << corpus_shard_fnames.size()
<< " shard(s) total in source " << source.dir_glob;
LOG(INFO) << "Found " << corpus_shard_fnames.size() << " shard(s) and "
<< legacy_input_fnames.size() << " legacy input(s) total in source "
<< source.dir_glob;

if (corpus_shard_fnames.empty()) {
if (corpus_shard_fnames.empty() && legacy_input_fnames.empty()) {
LOG(WARNING) << "Skipping empty source " << source.dir_glob;
return absl::OkStatus();
}
Expand Down Expand Up @@ -195,9 +229,41 @@ absl::Status SampleSeedCorpusElementsFromSource( //
}
}

RPROF_SNAPSHOT_AND_LOG("Done reading");
RPROF_SNAPSHOT_AND_LOG("Done reading shards");

InputAndFeaturesVec src_elts;

if (!legacy_input_fnames.empty()) {
constexpr size_t kLeastNumLegacyInputsPerBatch = 256;
const size_t legacy_input_batches = std::max(
size_t{1}, legacy_input_fnames.size() / kLeastNumLegacyInputsPerBatch);
const size_t batch_size =
(legacy_input_fnames.size() - 1) / legacy_input_batches + 1;
constexpr int kMaxReadThreads = 32;
ThreadPool threads{std::min<int>(kMaxReadThreads, legacy_input_batches)};
src_elts.resize(legacy_input_fnames.size());

for (int batch = 0; batch < legacy_input_batches; ++batch) {
threads.Schedule([batch, batch_size, &legacy_input_fnames, &src_elts] {
for (int index = batch * batch_size; index < (batch + 1) * batch_size &&
index < legacy_input_fnames.size();
++index) {
ByteArray input;
const auto& path = legacy_input_fnames[index];
const auto read_status = RemoteFileGetContents(path, input);
if (!read_status.ok()) {
LOG(WARNING) << "Error while reading legacy input path " << path
<< ": " << read_status;
continue;
}
src_elts[index] = {input, {}};
}
});
}
}

RPROF_SNAPSHOT_AND_LOG("Done reading");

size_t src_num_features = 0;

for (int s = 0; s < num_shards; ++s) {
Expand Down
7 changes: 6 additions & 1 deletion centipede/seed_corpus_maker_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,16 @@ namespace centipede {
// Native struct used by the seed corpus library for seed corpus source.
//
// TODO(b/362576261): Currently this is mirroring the `proto::SeedCorpusSource`
// proto. But in the future it may change with the core seeding API.
// proto. But in the future it may change with the core seeding API - any
// difference is commented below.
struct SeedCorpusSource {
std::string dir_glob;
uint32_t num_recent_dirs;
std::string shard_rel_glob;
// If non-empty, will be used to glob the legacy input files (with one input
// in each file) in the source dirs. Any files matching `shard_rel_glob` will
// be skipped.
std::string legacy_input_rel_glob;
std::variant<float, uint32_t> sampled_fraction_or_count;
};

Expand Down
65 changes: 65 additions & 0 deletions centipede/seed_corpus_maker_lib_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
#include <filesystem> // NOLINT
#include <string>
#include <string_view>
#include <vector>

#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
#include "./centipede/feature.h"
#include "./centipede/workdir.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep
#include "./common/remote_file.h"
#include "./common/test_util.h"

namespace centipede {
Expand All @@ -36,6 +40,7 @@ namespace {
namespace fs = std::filesystem;

using ::testing::IsSubsetOf;
using ::testing::IsSupersetOf;

inline constexpr auto kIdxDigits = WorkDir::kDigitsInShardIndex;

Expand Down Expand Up @@ -178,5 +183,65 @@ TEST(SeedCorpusMakerLibTest, RoundTripWriteReadWrite) {
}
}

TEST(SeedCorpusMakerLibTest, LoadsBothLegacyInputsAndShardsFromSource) {
const fs::path test_dir = GetTestTempDir(test_info_->name());
chdir(test_dir.c_str());

const InputAndFeaturesVec kShardedInputs = {
{{0}, {}},
{{1}, {feature_domains::kNoFeature}},
{{0, 1}, {0x11, 0x23}},
};
constexpr std::string_view kCovBin = "bin";
constexpr std::string_view kCovHash = "hash";
constexpr std::string_view kRelDir1 = "dir/foo";

const std::vector<ByteArray> kLegacyInputs = {{0, 1, 2}, {0, 1, 2, 3}};
// Write sharded inputs.
{
constexpr size_t kNumShards = 2;
const SeedCorpusDestination destination = {
.dir_path = std::string(kRelDir1),
.shard_rel_glob = absl::StrCat("distilled-", kCovBin, ".*"),
.shard_index_digits = kIdxDigits,
.num_shards = kNumShards,
};
CHECK_OK(WriteSeedCorpusElementsToDestination( //
kShardedInputs, kCovBin, kCovHash, destination));
const std::string workdir = (test_dir / kRelDir1).c_str();
ASSERT_NO_FATAL_FAILURE(VerifyShardsExist( //
workdir, kCovBin, kCovHash, kNumShards, ShardType::kDistilled));
}

// Write legacy inputs
for (int i = 0; i < kLegacyInputs.size(); ++i) {
const auto path = std::filesystem::path(test_dir) / kRelDir1 /
absl::StrCat("legacy_input_", i);
CHECK_OK(RemoteFileSetContents(path.string(), kLegacyInputs[i]));
}

// Test that sharded and legacy inputs matches what we wrote.
{
InputAndFeaturesVec elements;
ASSERT_OK(SampleSeedCorpusElementsFromSource( //
SeedCorpusSource{
.dir_glob = std::string(kRelDir1),
.num_recent_dirs = 1,
.shard_rel_glob = absl::StrCat("distilled-", kCovBin, ".*"),
// Intentionally try to match the shard files and test if they will
// be read as legacy inputs.
.legacy_input_rel_glob = "*",
.sampled_fraction_or_count = 1.0f,
},
kCovBin, kCovHash, elements));
EXPECT_EQ(elements.size(), kShardedInputs.size() + kLegacyInputs.size());
EXPECT_THAT(elements, IsSupersetOf(kShardedInputs));
EXPECT_THAT(elements, IsSupersetOf(InputAndFeaturesVec{
{{0, 1, 2}, {}},
{{0, 1, 2, 3}, {}},
}));
}
}

} // namespace
} // namespace centipede
4 changes: 4 additions & 0 deletions common/remote_file_oss.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ absl::Status RemoteGlobMatch(std::string_view glob,
if (int ret = ::glob(std::string{glob}.c_str(), GLOB_TILDE, HandleGlobError,
&glob_ret);
ret != 0) {
if (ret == GLOB_NOMATCH) {
return absl::NotFoundError(absl::StrCat(
"glob() returned NOMATCH for pattern: ", std::string(glob)));
}
return absl::UnknownError(absl::StrCat(
"glob() failed, pattern: ", std::string(glob), ", returned: ", ret));
}
Expand Down
Loading