Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 703558952
  • Loading branch information
xinhaoyuan authored and copybara-github committed Dec 12, 2024
1 parent 4c740a0 commit f1c0b66
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 70 deletions.
5 changes: 5 additions & 0 deletions centipede/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,7 @@ cc_library(
":environment",
":mutation_input",
":runner_result",
":stop",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_fuzztest//common:defs",
Expand Down Expand Up @@ -1121,6 +1122,7 @@ cc_library(
":thread_pool",
":util",
":workdir",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/random",
Expand Down Expand Up @@ -1688,8 +1690,11 @@ cc_test(
":feature",
":seed_corpus_maker_lib",
":workdir",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@com_google_fuzztest//common:defs",
"@com_google_fuzztest//common:logging",
"@com_google_fuzztest//common:remote_file",
"@com_google_fuzztest//common:test_util",
"@com_google_googletest//:gtest_main",
],
Expand Down
6 changes: 6 additions & 0 deletions centipede/centipede_default_callbacks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "./centipede/environment.h"
#include "./centipede/mutation_input.h"
#include "./centipede/runner_result.h"
#include "./centipede/stop.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep

Expand Down Expand Up @@ -83,6 +84,11 @@ void CentipedeDefaultCallbacks::Mutate(
LOG_FIRST_N(WARNING, 5)
<< "Custom mutator returned no mutants: falling back to internal "
"default mutator";
} else if (ShouldStop()) {
LOG(WARNING) << "Custom mutator failed, but ignored since the stop "
"condition it met. Possibly what triggered the stop "
"condition also interrupted the mutator.";
return;
} else {
LOG(WARNING) << "Custom mutator undetected or misbehaving:";
CHECK(!custom_mutator_is_usable_.has_value())
Expand Down
7 changes: 5 additions & 2 deletions centipede/runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,10 @@ static void RunOneInput(const uint8_t *data, size_t size,
state.ResetTimers();
int target_return_value = callbacks.Execute({data, size}) ? 0 : -1;
state.stats.exec_time_usec = UsecSinceLast();
CheckWatchdogLimits();
PostProcessCoverage(target_return_value);
if (centipede::state.input_start_time.exchange(0) != 0) {
CheckWatchdogLimits();
PostProcessCoverage(target_return_value);
}
state.stats.post_time_usec = UsecSinceLast();
state.stats.peak_rss_mb = GetPeakRSSMb();
}
Expand Down Expand Up @@ -1234,6 +1236,7 @@ extern "C" void CentipedePrepareProcessing() {
}

extern "C" void CentipedeFinalizeProcessing() {
if (centipede::state.input_start_time.exchange(0) == 0) return;
centipede::CheckWatchdogLimits();
centipede::PostProcessCoverage(/*target_return_value=*/0);
}
Expand Down
85 changes: 73 additions & 12 deletions centipede/seed_corpus_maker_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <variant>
#include <vector>

#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/random/random.h"
Expand Down Expand Up @@ -115,21 +116,54 @@ absl::Status SampleSeedCorpusElementsFromSource( //
LOG(INFO) << "Selected " << src_dirs.size() << " corpus dir(s)";
}

// Find all the corpus shard files in the found dirs.
// Find all the corpus shard and individual input files in the found dirs.

std::vector<std::string> corpus_shard_fnames;
std::vector<std::string> individual_input_fnames;
for (const auto& dir : src_dirs) {
const std::string shards_glob = fs::path{dir} / source.shard_rel_glob;
// NOTE: `RemoteGlobMatch` appends to the output list.
const auto prev_num_shards = corpus_shard_fnames.size();
RETURN_IF_NOT_OK(RemoteGlobMatch(shards_glob, corpus_shard_fnames));
LOG(INFO) << "Found " << (corpus_shard_fnames.size() - prev_num_shards)
<< " shard(s) matching " << shards_glob;
absl::flat_hash_set<std::string> current_corpus_shard_fnames;
if (!source.shard_rel_glob.empty()) {
std::vector<std::string> matched_fnames;
const std::string glob = fs::path{dir} / source.shard_rel_glob;
const auto match_status = RemoteGlobMatch(glob, matched_fnames);
if (!match_status.ok() && !absl::IsNotFound(match_status)) {
LOG(ERROR) << "Got error when glob-matching in " << dir << ": "
<< match_status;
} else {
current_corpus_shard_fnames.insert(matched_fnames.begin(),
matched_fnames.end());
corpus_shard_fnames.insert(corpus_shard_fnames.end(),
matched_fnames.begin(),
matched_fnames.end());
LOG(INFO) << "Found " << matched_fnames.size() << " shard(s) matching "
<< glob;
}
}
if (!source.individual_input_rel_glob.empty()) {
std::vector<std::string> matched_fnames;
const std::string glob = fs::path{dir} / source.individual_input_rel_glob;
const auto match_status = RemoteGlobMatch(glob, matched_fnames);
if (!match_status.ok() && !absl::IsNotFound(match_status)) {
LOG(ERROR) << "Got error when glob-matching in " << dir << ": "
<< match_status;
} else {
size_t num_added_individual_inputs = 0;
for (auto& fname : matched_fnames) {
if (current_corpus_shard_fnames.contains(fname)) continue;
if (RemotePathIsDirectory(fname)) continue;
++num_added_individual_inputs;
individual_input_fnames.push_back(std::move(fname));
}
LOG(INFO) << "Found " << num_added_individual_inputs
<< " individual input(s) with glob " << glob;
}
}
}
LOG(INFO) << "Found " << corpus_shard_fnames.size()
<< " shard(s) total in source " << source.dir_glob;
LOG(INFO) << "Found " << corpus_shard_fnames.size() << " shard(s) and "
<< individual_input_fnames.size()
<< " individual input(s) total in source " << source.dir_glob;

if (corpus_shard_fnames.empty()) {
if (corpus_shard_fnames.empty() && individual_input_fnames.empty()) {
LOG(WARNING) << "Skipping empty source " << source.dir_glob;
return absl::OkStatus();
}
Expand All @@ -140,10 +174,12 @@ absl::Status SampleSeedCorpusElementsFromSource( //
const auto num_shards = corpus_shard_fnames.size();
std::vector<InputAndFeaturesVec> src_elts_per_shard(num_shards);
std::vector<size_t> src_elts_with_features_per_shard(num_shards, 0);
InputAndFeaturesVec src_elts;

{
constexpr int kMaxReadThreads = 32;
ThreadPool threads{std::min<int>(kMaxReadThreads, num_shards)};
ThreadPool threads{std::min<int>(
kMaxReadThreads, std::max(num_shards, individual_input_fnames.size()))};

for (int shard = 0; shard < num_shards; ++shard) {
const auto& corpus_fname = corpus_shard_fnames[shard];
Expand Down Expand Up @@ -193,11 +229,27 @@ absl::Status SampleSeedCorpusElementsFromSource( //

threads.Schedule(read_shard);
}

RPROF_SNAPSHOT_AND_LOG("Done reading shards");

src_elts.resize(individual_input_fnames.size());
for (size_t index = 0; index < individual_input_fnames.size(); ++index) {
threads.Schedule([index, &individual_input_fnames, &src_elts] {
ByteArray input;
const auto& path = individual_input_fnames[index];
const auto read_status = RemoteFileGetContents(path, input);
if (!read_status.ok()) {
LOG(WARNING) << "Skipping individual input path " << path
<< " due to read error: " << read_status;
return;
}
src_elts[index] = {std::move(input), {}};
});
}
}

RPROF_SNAPSHOT_AND_LOG("Done reading");

InputAndFeaturesVec src_elts;
size_t src_num_features = 0;

for (int s = 0; s < num_shards; ++s) {
Expand All @@ -217,6 +269,15 @@ absl::Status SampleSeedCorpusElementsFromSource( //

RPROF_SNAPSHOT_AND_LOG("Done merging");

// Remove empty inputs possibly due to read errors.
auto remove_it =
std::remove_if(src_elts.begin(), src_elts.end(),
[](const auto& elt) { return std::get<0>(elt).empty(); });
if (remove_it != src_elts.end()) {
LOG(WARNING) << "Removed " << src_elts.end() - remove_it << " empty inputs";
src_elts.erase(remove_it, src_elts.end());
}

LOG(INFO) << "Read total of " << src_elts.size() << " elements ("
<< src_num_features << " with features) from source "
<< source.dir_glob;
Expand Down
7 changes: 6 additions & 1 deletion centipede/seed_corpus_maker_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,16 @@ namespace centipede {
// Native struct used by the seed corpus library for seed corpus source.
//
// TODO(b/362576261): Currently this is mirroring the `proto::SeedCorpusSource`
// proto. But in the future it may change with the core seeding API.
// proto. But in the future it may change with the core seeding API - any
// difference is commented below.
struct SeedCorpusSource {
std::string dir_glob;
uint32_t num_recent_dirs;
std::string shard_rel_glob;
// If non-empty, will be used to glob the individual input files (with one
// input in each file) in the source dirs. Any files matching `shard_rel_glob`
// will be skipped.
std::string individual_input_rel_glob;
std::variant<float, uint32_t> sampled_fraction_or_count;
};

Expand Down
69 changes: 69 additions & 0 deletions centipede/seed_corpus_maker_lib_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
#include <filesystem> // NOLINT
#include <string>
#include <string_view>
#include <vector>

#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
#include "./centipede/feature.h"
#include "./centipede/workdir.h"
#include "./common/defs.h"
#include "./common/logging.h" // IWYU pragma: keep
#include "./common/remote_file.h"
#include "./common/test_util.h"

namespace centipede {
Expand All @@ -36,6 +40,7 @@ namespace {
namespace fs = std::filesystem;

using ::testing::IsSubsetOf;
using ::testing::IsSupersetOf;

inline constexpr auto kIdxDigits = WorkDir::kDigitsInShardIndex;

Expand Down Expand Up @@ -178,5 +183,69 @@ TEST(SeedCorpusMakerLibTest, RoundTripWriteReadWrite) {
}
}

TEST(SeedCorpusMakerLibTest, LoadsBothIndividualInputsAndShardsFromSource) {
const fs::path test_dir = GetTestTempDir(test_info_->name());
chdir(test_dir.c_str());

const InputAndFeaturesVec kShardedInputs = {
{{0}, {}},
{{1}, {feature_domains::kNoFeature}},
{{0, 1}, {0x11, 0x23}},
};
constexpr std::string_view kCovBin = "bin";
constexpr std::string_view kCovHash = "hash";
constexpr std::string_view kRelDir = "dir/foo";

const std::vector<ByteArray> kIndividualInputs = {
{0, 1, 2},
{0, 1, 2, 3},
// Empty input expected to be not in the sample result.
{}};
// Write sharded inputs.
{
constexpr size_t kNumShards = 2;
const SeedCorpusDestination destination = {
.dir_path = std::string(kRelDir),
.shard_rel_glob = absl::StrCat("distilled-", kCovBin, ".*"),
.shard_index_digits = kIdxDigits,
.num_shards = kNumShards,
};
CHECK_OK(WriteSeedCorpusElementsToDestination( //
kShardedInputs, kCovBin, kCovHash, destination));
const std::string workdir = (test_dir / kRelDir).c_str();
ASSERT_NO_FATAL_FAILURE(VerifyShardsExist( //
workdir, kCovBin, kCovHash, kNumShards, ShardType::kDistilled));
}

// Write individual inputs
for (int i = 0; i < kIndividualInputs.size(); ++i) {
const auto path = std::filesystem::path(test_dir) / kRelDir /
absl::StrCat("individual_input_", i);
CHECK_OK(RemoteFileSetContents(path.string(), kIndividualInputs[i]));
}

// Test that sharded and individual inputs matches what we wrote.
{
InputAndFeaturesVec elements;
ASSERT_OK(SampleSeedCorpusElementsFromSource( //
SeedCorpusSource{
.dir_glob = std::string(kRelDir),
.num_recent_dirs = 1,
.shard_rel_glob = absl::StrCat("distilled-", kCovBin, ".*"),
// Intentionally try to match the shard files and test if they will
// be read as individual inputs.
.individual_input_rel_glob = "*",
.sampled_fraction_or_count = 1.0f,
},
kCovBin, kCovHash, elements));
EXPECT_EQ(elements.size(), 5); // Non-empty inputs
EXPECT_THAT(elements, IsSupersetOf(kShardedInputs));
EXPECT_THAT(elements, IsSupersetOf(InputAndFeaturesVec{
{{0, 1, 2}, {}},
{{0, 1, 2, 3}, {}},
}));
}
}

} // namespace
} // namespace centipede
4 changes: 4 additions & 0 deletions common/remote_file_oss.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ absl::Status RemoteGlobMatch(std::string_view glob,
if (int ret = ::glob(std::string{glob}.c_str(), GLOB_TILDE, HandleGlobError,
&glob_ret);
ret != 0) {
if (ret == GLOB_NOMATCH) {
return absl::NotFoundError(absl::StrCat(
"glob() returned NOMATCH for pattern: ", std::string(glob)));
}
return absl::UnknownError(absl::StrCat(
"glob() failed, pattern: ", std::string(glob), ", returned: ", ret));
}
Expand Down
Loading

0 comments on commit f1c0b66

Please sign in to comment.