forked from google/fuzztest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunner_sancov.cc
309 lines (280 loc) · 12.7 KB
/
runner_sancov.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Instrumentation callbacks for SanitizerCoverage (sancov).
// https://clang.llvm.org/docs/SanitizerCoverage.html
#include <pthread.h>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include "./centipede/feature.h"
#include "./centipede/int_utils.h"
#include "./centipede/pc_info.h"
#include "./centipede/reverse_pc_table.h"
#include "./centipede/runner.h"
#include "./centipede/runner_dl_info.h"
namespace centipede {
void RunnerSancov() {} // to be referenced in runner.cc
} // namespace centipede
using centipede::PCGuard;
using centipede::PCInfo;
using centipede::state;
using centipede::tls;
// Tracing data flow.
// The instrumentation is provided by
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow.
// For every load we get the address of the load. We can also get the caller PC.
// If the load address in
// [main_object.start_address, main_object.start_address + main_object.size),
// it is likely a global.
// We form a feature from a pair of {caller_pc, address_of_load}.
// The rationale here is that loading from a global address unique for the
// given PC is an interesting enough behavior that it warrants its own feature.
//
// Downsides:
// * The instrumentation is expensive, it can easily add 2x slowdown.
// * This creates plenty of features, easily 10x compared to control flow,
// and bloats the corpus. But this is also what we want to achieve here.
// NOTE: In addition to `always_inline`, also use `inline`, because some
// compilers require both to actually enforce inlining, e.g. GCC:
// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html.
#define ENFORCE_INLINE __attribute__((always_inline)) inline
// Use this attribute for functions that must not be instrumented even if
// the runner is built with sanitizers (asan, etc).
#define NO_SANITIZE __attribute__((no_sanitize("all")))
// NOTE: Enforce inlining so that `__builtin_return_address` works.
ENFORCE_INLINE static void TraceLoad(void *addr) {
if (!state.run_time_flags.use_dataflow_features) return;
auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
auto load_addr = reinterpret_cast<uintptr_t>(addr);
auto pc_offset = caller_pc - state.main_object.start_address;
if (pc_offset >= state.main_object.size) return; // PC outside main obj.
auto addr_offset = load_addr - state.main_object.start_address;
if (addr_offset >= state.main_object.size) return; // Not a global address.
state.data_flow_feature_set.set(centipede::ConvertPcPairToNumber(
pc_offset, addr_offset, state.main_object.size));
}
// NOTE: Enforce inlining so that `__builtin_return_address` works.
ENFORCE_INLINE static void TraceCmp(uint64_t Arg1, uint64_t Arg2) {
if (!state.run_time_flags.use_cmp_features) return;
auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
auto pc_offset = caller_pc - state.main_object.start_address;
uintptr_t hash =
centipede::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
if (Arg1 == Arg2) {
state.cmp_eq_set.set(hash);
} else {
hash <<= 6; // ABTo* generate 6-bit numbers.
state.cmp_moddiff_set.set(hash | centipede::ABToCmpModDiff(Arg1, Arg2));
state.cmp_hamming_set.set(hash | centipede::ABToCmpHamming(Arg1, Arg2));
state.cmp_difflog_set.set(hash | centipede::ABToCmpDiffLog(Arg1, Arg2));
}
}
//------------------------------------------------------------------------------
// Implementations of the external sanitizer coverage hooks.
//------------------------------------------------------------------------------
extern "C" {
NO_SANITIZE void __sanitizer_cov_load1(uint8_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load2(uint16_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load4(uint32_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load8(uint64_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load16(__uint128_t *addr) { TraceLoad(addr); }
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) {
TraceCmp(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace2.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace4.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace8.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
TraceCmp(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace2.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace4.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace8.Capture(Arg1, Arg2);
}
// TODO(kcc): [impl] handle switch.
NO_SANITIZE
void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {}
// This function is called at startup when
// -fsanitize-coverage=inline-8bit-counters is used.
// See https://clang.llvm.org/docs/SanitizerCoverage.html#inline-8bit-counters
void __sanitizer_cov_8bit_counters_init(uint8_t *beg, uint8_t *end) {
state.sancov_objects.Inline8BitCountersInit(beg, end);
}
// https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table
// This function is called at the DSO init time, potentially several times.
// When called from the same DSO, the arguments will always be the same.
// If a different DSO calls this function, it will have different arguments.
// We currently do not support more than one sancov-instrumented DSO.
void __sanitizer_cov_pcs_init(const PCInfo *beg, const PCInfo *end) {
state.sancov_objects.PCInfoInit(beg, end);
}
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow
// This function is called at the DSO init time.
void __sanitizer_cov_cfs_init(const uintptr_t *beg, const uintptr_t *end) {
state.sancov_objects.CFSInit(beg, end);
}
// Updates the state of the paths, `path_level > 0`.
// Marked noinline so that not to create spills/fills on the fast path
// of __sanitizer_cov_trace_pc_guard.
__attribute__((noinline)) static void HandlePath(uintptr_t normalized_pc) {
uintptr_t hash = tls.path_ring_buffer.push(normalized_pc);
state.path_feature_set.set(hash);
}
// Handles one observed PC.
// `normalized_pc` is an integer representation of PC that is stable between
// the executions.
// `is_function_entry` is true if the PC is known to be a function entry.
// With __sanitizer_cov_trace_pc_guard this is an index of PC in the PC table.
// With __sanitizer_cov_trace_pc this is PC itself, normalized by subtracting
// the DSO's dynamic start address.
static inline void HandleOnePc(PCGuard pc_guard) {
if (!state.run_time_flags.use_pc_features) return;
state.pc_counter_set.SaturatedIncrement(pc_guard.pc_index);
if (pc_guard.is_function_entry) {
uintptr_t sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
// It should be rare for the stack depth to exceed the previous record.
if (__builtin_expect(
sp < tls.lowest_sp &&
// And ignore the stack pointer when it is not in the known
// region (e.g. for signal handling with an alternative stack).
(tls.stack_region_low == 0 || sp >= tls.stack_region_low),
0)) {
tls.lowest_sp = sp;
centipede::CheckStackLimit(sp);
}
if (state.run_time_flags.callstack_level != 0) {
tls.call_stack.OnFunctionEntry(pc_guard.pc_index, sp);
state.callstack_set.set(tls.call_stack.Hash());
}
}
// path features.
if (state.run_time_flags.path_level != 0) HandlePath(pc_guard.pc_index);
}
// Caller PC is the PC of the call instruction.
// Return address is the PC where the callee will return upon completion.
// On x86_64, CallerPC == ReturnAddress - 5
// On AArch64, CallerPC == ReturnAddress - 4
static uintptr_t ReturnAddressToCallerPc(uintptr_t return_address) {
#ifdef __x86_64__
return return_address - 5;
#elif defined(__aarch64__)
return return_address - 4;
#else
#error "unsupported architecture"
#endif
}
// Sets `actual_pc_counter_set_size_aligned` to `size`, properly aligned up.
static void UpdatePcCounterSetSizeAligned(size_t size) {
constexpr size_t kAlignment = state.pc_counter_set.kSizeMultiple;
constexpr size_t kMask = kAlignment - 1;
state.actual_pc_counter_set_size_aligned = (size + kMask) & ~kMask;
}
// MainObjectLazyInit() and helpers allow us to initialize state.main_object
// lazily and thread-safely on the first call to __sanitizer_cov_trace_pc().
//
// TODO(kcc): consider removing :dl_path_suffix= since with lazy init
// we can auto-detect the instrumented DSO.
//
// TODO(kcc): this lazy init is brittle.
// It assumes that __sanitizer_cov_trace_pc is the only code that touches
// state.main_object concurrently. I.e. we can not blindly reuse this lazy init
// for other instrumentation callbacks that use state.main_object.
// This code is also considered *temporary* because
// a) __sanitizer_cov_trace_pc is obsolete and we hope to not need it in future.
// b) a better option might be to do a non-lazy init by intercepting dlopen.
//
// We do not call MainObjectLazyInit() in
// __sanitizer_cov_trace_pc_guard() because
// a) there is not use case for that currently and
// b) it will slowdown the hot function.
static pthread_once_t main_object_lazy_init_once = PTHREAD_ONCE_INIT;
static void MainObjectLazyInitOnceCallback() {
state.main_object =
centipede::GetDlInfo(state.GetStringFlag(":dl_path_suffix="));
fprintf(stderr, "MainObjectLazyInitOnceCallback %zx\n",
state.main_object.start_address);
UpdatePcCounterSetSizeAligned(state.reverse_pc_table.NumPcs());
}
__attribute__((noinline)) static void MainObjectLazyInit() {
pthread_once(&main_object_lazy_init_once, MainObjectLazyInitOnceCallback);
}
// TODO(kcc): [impl] add proper testing for this callback.
// TODO(kcc): make sure the pc_table in the engine understands the raw PCs.
// TODO(kcc): this implementation is temporary. In order for symbolization to
// work we will need to translate the PC into a PCIndex or make pc_table sparse.
// See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs.
// This instrumentation is redundant if other instrumentation
// (e.g. trace-pc-guard) is available, but GCC as of 2022-04 only supports
// this variant.
void __sanitizer_cov_trace_pc() {
uintptr_t pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
if (!state.main_object.start_address ||
!state.actual_pc_counter_set_size_aligned) {
// Don't track coverage at all before the PC table is initialized.
if (state.reverse_pc_table.NumPcs() == 0) return;
MainObjectLazyInit();
}
pc -= state.main_object.start_address;
pc = ReturnAddressToCallerPc(pc);
const auto pc_guard = state.reverse_pc_table.GetPCGuard(pc);
// TODO(kcc): compute is_function_entry for this case.
if (pc_guard.IsValid()) HandleOnePc(pc_guard);
}
// This function is called at the DSO init time.
void __sanitizer_cov_trace_pc_guard_init(PCGuard *start, PCGuard *stop) {
state.sancov_objects.PCGuardInit(start, stop);
UpdatePcCounterSetSizeAligned(state.sancov_objects.NumInstrumentedPCs());
}
// This function is called on every instrumented edge.
NO_SANITIZE
void __sanitizer_cov_trace_pc_guard(PCGuard *guard) {
// This function may be called very early during the DSO initialization,
// before the values of `*guard` are initialized to non-zero.
// But it will immidiately return bacause state.run_time_flags.use_pc_features
// is false. Once state.run_time_flags.use_pc_features becomes true, it is
// already ok to call this function.
HandleOnePc(*guard);
}
} // extern "C"