From e6230c2d35f62bb9b4af91b5cf6768c0bf124c29 Mon Sep 17 00:00:00 2001 From: Xinhao Yuan Date: Tue, 16 Dec 2025 15:11:15 -0800 Subject: [PATCH] Use resevior sampling in auto-dictionary tracing and collection. This is to avoid biases that favors later entries that always overwrite the earilers. As side-effects, auto-dictionary tracing tables are filled randomly only after they become full of entries, and it saves the memory copying when it randomly decides to skip an entry. Also the table cleanup is now lazy, and it clean only the previously used entries. It should be much faster than the previous method of zero-filling all the entries. PiperOrigin-RevId: 845451974 --- centipede/BUILD | 1 + centipede/byte_array_mutator_test.cc | 2 +- centipede/fuzztest_mutator.cc | 33 +++++++++++--- centipede/runner_cmp_trace.h | 47 +++++++++++++++++--- centipede/runner_cmp_trace_test.cc | 8 ++-- fuzztest/internal/table_of_recent_compares.h | 1 - 6 files changed, 73 insertions(+), 19 deletions(-) diff --git a/centipede/BUILD b/centipede/BUILD index 371abdacb..0344cab92 100644 --- a/centipede/BUILD +++ b/centipede/BUILD @@ -938,6 +938,7 @@ cc_library( name = "runner_cmp_trace", hdrs = ["runner_cmp_trace.h"], copts = DISABLE_SANCOV_COPTS, + deps = ["@abseil-cpp//absl/base:core_headers"], ) # Library for manipulating centipede runner flags. This is not used by the diff --git a/centipede/byte_array_mutator_test.cc b/centipede/byte_array_mutator_test.cc index ae35641b5..a6f6e1b5e 100644 --- a/centipede/byte_array_mutator_test.cc +++ b/centipede/byte_array_mutator_test.cc @@ -158,7 +158,7 @@ TEST(CmpDictionary, CmpDictionary) { } TEST(CmpDictionary, CmpDictionaryIsCompatibleWithCmpTrace) { - CmpTrace<0, 13> traceN; + CmpTrace<0, 13> traceN = {}; traceN.Clear(); constexpr uint8_t long_array[20] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; diff --git a/centipede/fuzztest_mutator.cc b/centipede/fuzztest_mutator.cc index 0690f2801..101be6756 100644 --- a/centipede/fuzztest_mutator.cc +++ b/centipede/fuzztest_mutator.cc @@ -42,6 +42,16 @@ namespace { using MutatorDomainBase = decltype(fuzztest::VectorOf(fuzztest::Arbitrary())); +template +bool SampleInsert(const T& cmp_table, size_t& counter) { + static thread_local absl::BitGen bitgen; + counter++; + if (counter <= cmp_table.kTableSize) { + return true; + } + return absl::Uniform(bitgen, 0, counter) < cmp_table.kTableSize; +} + template void InsertCmpEntryIntoIntegerDictionary(const uint8_t* a, const uint8_t* b, TablesOfRecentCompares& cmp_tables) { @@ -59,25 +69,36 @@ void PopulateCmpEntries(const ExecutionMetadata& metadata, // Size limits on the cmp entries to be populated. static constexpr uint8_t kMaxCmpEntrySize = 15; static constexpr uint8_t kMinCmpEntrySize = 2; + size_t uint16_sample_counter = 0; + size_t uint32_sample_counter = 0; + size_t uint64_sample_counter = 0; + size_t mem_sample_counter = 0; - metadata.ForEachCmpEntry([&cmp_tables](fuzztest::internal::ByteSpan a, - fuzztest::internal::ByteSpan b) { + metadata.ForEachCmpEntry([&](fuzztest::internal::ByteSpan a, + fuzztest::internal::ByteSpan b) { FUZZTEST_CHECK(a.size() == b.size()) << "cmp operands must have the same size"; const size_t size = a.size(); if (size < kMinCmpEntrySize) return; if (size > kMaxCmpEntrySize) return; - if (size == 2) { + if (size == 2 && SampleInsert(cmp_tables.GetMutable(), + uint16_sample_counter)) { InsertCmpEntryIntoIntegerDictionary(a.data(), b.data(), cmp_tables); - } else if (size == 4) { + } else if (size == 4 && + SampleInsert(cmp_tables.GetMutable(), + uint32_sample_counter)) { InsertCmpEntryIntoIntegerDictionary(a.data(), b.data(), cmp_tables); - } else if (size == 8) { + } else if (size == 8 && + SampleInsert(cmp_tables.GetMutable(), + uint64_sample_counter)) { InsertCmpEntryIntoIntegerDictionary(a.data(), b.data(), cmp_tables); } - cmp_tables.GetMutable<0>().Insert(a.data(), b.data(), size); + if (SampleInsert(cmp_tables.GetMutable<0>(), mem_sample_counter)) { + cmp_tables.GetMutable<0>().Insert(a.data(), b.data(), size); + } }); } diff --git a/centipede/runner_cmp_trace.h b/centipede/runner_cmp_trace.h index 66878758b..c98001b1a 100644 --- a/centipede/runner_cmp_trace.h +++ b/centipede/runner_cmp_trace.h @@ -18,10 +18,14 @@ // Capturing arguments of CMP instructions, memcmp, and similar. // WARNING: this code needs to have minimal dependencies. +#include + #include #include #include +#include "absl/base/optimization.h" + namespace fuzztest::internal { // Captures up to `kNumItems` different CMP argument pairs. @@ -45,16 +49,38 @@ class CmpTrace { // No CTOR - objects will be created in TLS. // Clears `this`. - void Clear() { memset(this, 0, sizeof(*this)); } + void Clear() { to_clear = true; } // Captures one CMP argument pair, as two byte arrays, `size` bytes each. void Capture(uint8_t size, const uint8_t *value0, const uint8_t *value1) { + if (ABSL_PREDICT_FALSE(to_clear)) { + for (size_t i = 0; i < kNumItems; ++i) { + if (sizes_[i] == 0) break; + sizes_[i] = 0; + } + capture_count_ = 0; + to_clear = false; + } if (size > kNumBytesPerValue) size = kNumBytesPerValue; // We choose a pseudo-random slot each time. // This way after capturing many pairs we end up with up to `kNumItems` // pairs which are typically, but not always, the most recent. - rand_seed_ = rand_seed_ * 1103515245 + 12345; - const size_t index = rand_seed_ % kNumItems; + size_t index = 0; + if (capture_count_ < kNumItems) { + index = capture_count_++; + } else { + if (rand_seed_ == 0) { + // Initialize the random seed (likely) once. + struct timeval tv = {}; + constexpr size_t kUsecInSec = 1000000; + gettimeofday(&tv, nullptr); + rand_seed_ = tv.tv_sec * kUsecInSec + tv.tv_usec; + } + capture_count_++; + rand_seed_ = rand_seed_ * 1103515245 + 12345; + index = rand_seed_ % capture_count_; + if (index >= kNumItems) return; + } Item& item = items_[index]; sizes_[index] = size; __builtin_memcpy(item.value0, value0, size); @@ -74,12 +100,14 @@ class CmpTrace { // Iterates non-zero CMP pairs. template void ForEachNonZero(Callback callback) { + if (ABSL_PREDICT_FALSE(to_clear)) return; for (size_t i = 0; i < kNumItems; ++i) { const auto size = sizes_[i]; - if (size == 0 || size > kNumBytesPerValue) continue; - sizes_[i] = 0; + if (size == 0) break; + if (size > kNumBytesPerValue) continue; callback(size, items_[i].value0, items_[i].value1); } + to_clear = true; } private: @@ -89,17 +117,22 @@ class CmpTrace { uint8_t value1[kNumBytesPerValue]; }; + volatile bool to_clear; + // Value sizes of argument pairs. zero-size indicates that the corresponding // entry is empty. // // Marked volatile because of the potential racing between the owning thread // and the main thread, which is tolerated gracefully. volatile uint8_t sizes_[kNumItems]; + + size_t capture_count_; // Values of argument pairs. Item items_[kNumItems]; - // Pseudo-random seed. - size_t rand_seed_; + // Pseudo-random seed from glibc + // (https://en.wikipedia.org/wiki/Linear_congruential_generator). + uint32_t rand_seed_; }; } // namespace fuzztest::internal diff --git a/centipede/runner_cmp_trace_test.cc b/centipede/runner_cmp_trace_test.cc index 492c18f4d..04d20ca27 100644 --- a/centipede/runner_cmp_trace_test.cc +++ b/centipede/runner_cmp_trace_test.cc @@ -56,10 +56,10 @@ TEST(CmpTrace, T1) { observed_pairs.push_back(cmp_pair); }; - CmpTrace<2, 10> trace2; - CmpTrace<4, 11> trace4; - CmpTrace<8, 12> trace8; - CmpTrace<0, 13> traceN; + CmpTrace<2, 10> trace2 = {}; + CmpTrace<4, 11> trace4 = {}; + CmpTrace<8, 12> trace8 = {}; + CmpTrace<0, 13> traceN = {}; trace2.Clear(); trace4.Clear(); trace8.Clear(); diff --git a/fuzztest/internal/table_of_recent_compares.h b/fuzztest/internal/table_of_recent_compares.h index ed59b71ef..742bf55e5 100644 --- a/fuzztest/internal/table_of_recent_compares.h +++ b/fuzztest/internal/table_of_recent_compares.h @@ -1,4 +1,3 @@ - // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License");